Cluster computing

Thursday, January 4, 2024

These are some code snippets to summarize text:

1. Using genism

from gensim.summarization import summarize

def shrinktext(request):

text = request.POST.get('text','')

text = text.split('.')

text = '\n'.join(text)

try:

summary = summarize(text)

summary_list = []

for line in summary.splitlines():

if line not in summary_list:

summary_list.append(line)

summary = '\n'.join(summary_list)

except Exception as e:

summary = str(e)

if type(e).__name__ == "TypeError":

summary = ''.join(text.splitlines()[0:1])

2. Using langchain

!pip install openai tiktoken chromadb langchain

# Set env var OPENAI_API_KEY or load from a .env file

# import dotenv

# dotenv.load_dotenv()

from langchain.chains.summarize import load_summarize_chain

from langchain.chat_models import ChatOpenAI

from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

docs = loader.load()

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106")

chain = load_summarize_chain(llm, chain_type="stuff")

chain.run(docs)

OR with documents in a single prompt:

from langchain.chains.combine_documents.stuff import StuffDocumentsChain

from langchain.chains.llm import LLMChain

from langchain.prompts import PromptTemplate

# Define prompt

prompt_template = """Write a concise summary of the following:

"{text}"

CONCISE SUMMARY:"""

prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")

llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain

stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

docs = loader.load()

print(stuff_chain.run(docs))

3. Using cloud apis

setx LANGUAGE_KEY your-key

setx LANGUAGE_ENDPOINT your-endpoint

pip install azure-ai-textanalytics==5.3.0

# This example requires environment variables named "LANGUAGE_KEY" and "LANGUAGE_ENDPOINT"

key = os.environ.get('LANGUAGE_KEY')

endpoint = os.environ.get('LANGUAGE_ENDPOINT')

from azure.ai.textanalytics import TextAnalyticsClient

from azure.core.credentials import AzureKeyCredential

# Authenticate the client using your key and endpoint

def authenticate_client():

ta_credential = AzureKeyCredential(key)

text_analytics_client = TextAnalyticsClient(

endpoint=endpoint,

credential=ta_credential)

return text_analytics_client

client = authenticate_client()

# Example method for summarizing text

def sample_extractive_summarization(client, document):

from azure.core.credentials import AzureKeyCredential

from azure.ai.textanalytics import (

TextAnalyticsClient,

ExtractiveSummaryAction

)

poller = client.begin_analyze_actions(

document,

actions=[

ExtractiveSummaryAction(max_sentence_count=4)

)

document_results = poller.result()

return document_results

sample_extractive_summarization(client)

There are variations possible with the LLM context window or the keyword versus latent-semantic model or the pipeline but the above provide readable summary.

Wednesday, January 3, 2024

Partition to K Equal Sum Subsets

Given an integer array nums and an integer k, return true if it is possible to divide this array into k non-empty subsets whose sums are all equal.

Example 1:

Input: nums = [4,3,2,3,5,2,1], k = 4

Output: true

Explanation: It is possible to divide it into 4 subsets (5), (1, 4), (2,3), (2,3) with equal sums.

Example 2:

Input: nums = [1,2,3,4], k = 3

Output: false

Constraints:

1 <= k <= nums.length <= 16
1 <= nums[i] <= 10⁴
The frequency of each element is in the range [1, 4].

class Solution {

public boolean canPartitionKSubsets(int[] nums, int k) {

int sum = 0;

for (int i = 0; i < nums.length; i++){

sum += nums[i];

}

for (int i = 1; i <= sum/k; i++) {

List<List<Integer>> subsets = new ArrayList<>();

for (int j = 0; j < k; j++){

subsets.add(new ArrayList<Integer>());

}

Arrays.sort(nums);

if (insertRecursively(subsets, nums, i, 0))

return true;

}

return false;

}

public boolean insertRecursively(List<List<Integer>> subsets, int[] nums, int sum, int index) {

if (index == nums.length &&

valid(subsets, sum)) {

return true;

}

for (int i = 0; i < subsets.size(); i++){

int subsetSum = 0;

for (int j = 0; j < subsets.get(i).size(); j++){

subsetSum += subsets.get(i).get(j);

}

if (subsetSum + nums[index] <= sum){

subsets.get(i).add(nums[index]);

if (insertRecursively(subsets, nums, sum, index+1)) {

return true;

}

subset.get(i).remove(subset.get(i).size()-1);

}

return false;

}

public boolean valid(List<List<Integer>> subsets, int sum){

for (int i = 0; i < subsets.size(); i++){

int subsetSum = 0;

for (int j = 0; j < subsets.get(i).size(); j++){

subsetSum += subsets.get(i).get(j);

}

if (subsetSum != sum){

return false;

}

return true;

}