Thursday, January 4, 2024

 

These are some code snippets to summarize text:

1.       Using genism

from gensim.summarization import summarize

def shrinktext(request):

    text = request.POST.get('text','')

    text = text.split('.')

    text = '\n'.join(text)

    try:

       summary = summarize(text)

       summary_list = []

       for line in summary.splitlines():

           if line not in summary_list:

              summary_list.append(line)

       summary = '\n'.join(summary_list)

    except Exception as e:

       summary = str(e)

       if type(e).__name__ == "TypeError":

          summary = ''.join(text.splitlines()[0:1])

2.       Using langchain

!pip install openai tiktoken chromadb langchain

 

# Set env var OPENAI_API_KEY or load from a .env file

# import dotenv

 

# dotenv.load_dotenv()

from langchain.chains.summarize import load_summarize_chain

from langchain.chat_models import ChatOpenAI

from langchain.document_loaders import WebBaseLoader

 

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

docs = loader.load()

 

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106")

chain = load_summarize_chain(llm, chain_type="stuff")

 

chain.run(docs)

 

OR with documents in a single prompt:

from langchain.chains.combine_documents.stuff import StuffDocumentsChain

from langchain.chains.llm import LLMChain

from langchain.prompts import PromptTemplate

 

# Define prompt

prompt_template = """Write a concise summary of the following:

"{text}"

CONCISE SUMMARY:"""

prompt = PromptTemplate.from_template(prompt_template)

 

# Define LLM chain

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")

llm_chain = LLMChain(llm=llm, prompt=prompt)

 

# Define StuffDocumentsChain

stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

 

docs = loader.load()

print(stuff_chain.run(docs))

 

3.       Using cloud apis

setx LANGUAGE_KEY your-key

setx LANGUAGE_ENDPOINT your-endpoint

pip install azure-ai-textanalytics==5.3.0

# This example requires environment variables named "LANGUAGE_KEY" and "LANGUAGE_ENDPOINT"

key = os.environ.get('LANGUAGE_KEY')

endpoint = os.environ.get('LANGUAGE_ENDPOINT')

 

from azure.ai.textanalytics import TextAnalyticsClient

from azure.core.credentials import AzureKeyCredential

 

# Authenticate the client using your key and endpoint

def authenticate_client():

    ta_credential = AzureKeyCredential(key)

    text_analytics_client = TextAnalyticsClient(

            endpoint=endpoint,

            credential=ta_credential)

    return text_analytics_client

 

client = authenticate_client()

 

# Example method for summarizing text

def sample_extractive_summarization(client, document):

    from azure.core.credentials import AzureKeyCredential

    from azure.ai.textanalytics import (

        TextAnalyticsClient,

        ExtractiveSummaryAction

    )

 

    poller = client.begin_analyze_actions(

        document,

        actions=[

            ExtractiveSummaryAction(max_sentence_count=4)

        ],

    )

 

    document_results = poller.result()

    return document_results

 

sample_extractive_summarization(client)

 

 

There are variations possible with the LLM context window or the keyword versus latent-semantic model or the pipeline but the above provide readable summary.

Wednesday, January 3, 2024

 

Partition to K Equal Sum Subsets


Given an integer array 
nums and an integer k, return true if it is possible to divide this array into k non-empty subsets whose sums are all equal.

 

Example 1:

Input: nums = [4,3,2,3,5,2,1], k = 4

Output: true

Explanation: It is possible to divide it into 4 subsets (5), (1, 4), (2,3), (2,3) with equal sums.

Example 2:

Input: nums = [1,2,3,4], k = 3

Output: false

 

Constraints:

  • 1 <= k <= nums.length <= 16
  • 1 <= nums[i] <= 104
  • The frequency of each element is in the range [1, 4].

 

class Solution {

    public boolean canPartitionKSubsets(int[] nums, int k) {

        int sum = 0;

        for (int i = 0; i < nums.length; i++){

            sum += nums[i];

        }

 

        for (int i = 1; i <= sum/k; i++) {

            List<List<Integer>> subsets = new ArrayList<>();

            for (int j = 0; j < k; j++){

                subsets.add(new ArrayList<Integer>());

            }

            Arrays.sort(nums);

            if (insertRecursively(subsets, nums, i, 0))

                return true;

            }

        }

        return false;

    }

 

    public boolean insertRecursively(List<List<Integer>> subsets, int[] nums, int sum, int index) {

        if (index == nums.length &&

            valid(subsets, sum)) {

                return true;

            }

        }

        for (int i = 0; i < subsets.size(); i++){

            int subsetSum = 0;

            for (int j = 0; j < subsets.get(i).size(); j++){

                subsetSum += subsets.get(i).get(j);

            }

            if (subsetSum + nums[index] <= sum){

                subsets.get(i).add(nums[index]);

                if (insertRecursively(subsets, nums, sum, index+1)) {

                    return true;

                }

                subset.get(i).remove(subset.get(i).size()-1);

            }

        }

        return false;

    }

 

    public boolean valid(List<List<Integer>> subsets, int sum){

        for (int i = 0; i < subsets.size(); i++){

            int subsetSum = 0;

            for (int j = 0; j < subsets.get(i).size(); j++){

                subsetSum += subsets.get(i).get(j);

            }

            if (subsetSum != sum){

                return false;

            }

        }

        return true;

    }

}