Cluster computing: Summarizer code snippets

Thursday, December 28, 2023

Summarizer code snippets

These are some code snippets to summarize text:

1. Using genism

from gensim.summarization import summarize

def shrinktext(request):

text = request.POST.get('text','')

text = text.split('.')

text = '\n'.join(text)

try:

summary = summarize(text)

summary_list = []

for line in summary.splitlines():

if line not in summary_list:

summary_list.append(line)

summary = '\n'.join(summary_list)

except Exception as e:

summary = str(e)

if type(e).__name__ == "TypeError":

summary = ''.join(text.splitlines()[0:1])

2. Using langchain

!pip install openai tiktoken chromadb langchain

# Set env var OPENAI_API_KEY or load from a .env file

# import dotenv

# dotenv.load_dotenv()

from langchain.chains.summarize import load_summarize_chain

from langchain.chat_models import ChatOpenAI

from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

docs = loader.load()

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106")

chain = load_summarize_chain(llm, chain_type="stuff")

chain.run(docs)

OR with documents in a single prompt:

from langchain.chains.combine_documents.stuff import StuffDocumentsChain

from langchain.chains.llm import LLMChain

from langchain.prompts import PromptTemplate

# Define prompt

prompt_template = """Write a concise summary of the following:

"{text}"

CONCISE SUMMARY:"""

prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")

llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain

stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

docs = loader.load()

print(stuff_chain.run(docs))

3. Using cloud apis

setx LANGUAGE_KEY your-key

setx LANGUAGE_ENDPOINT your-endpoint

pip install azure-ai-textanalytics==5.3.0

# This example requires environment variables named "LANGUAGE_KEY" and "LANGUAGE_ENDPOINT"

key = os.environ.get('LANGUAGE_KEY')

endpoint = os.environ.get('LANGUAGE_ENDPOINT')

from azure.ai.textanalytics import TextAnalyticsClient

from azure.core.credentials import AzureKeyCredential

# Authenticate the client using your key and endpoint

def authenticate_client():

ta_credential = AzureKeyCredential(key)

text_analytics_client = TextAnalyticsClient(

endpoint=endpoint,

credential=ta_credential)

return text_analytics_client

client = authenticate_client()

# Example method for summarizing text

def sample_extractive_summarization(client, document):

from azure.core.credentials import AzureKeyCredential

from azure.ai.textanalytics import (

TextAnalyticsClient,

ExtractiveSummaryAction

)

poller = client.begin_analyze_actions(

document,

actions=[

ExtractiveSummaryAction(max_sentence_count=4)

)

document_results = poller.result()

return document_results

sample_extractive_summarization(client)

There are variations possible with the LLM context window or the keyword versus latent-semantic model or the pipeline but the above provide readable summary.

Cluster computing

Thursday, December 28, 2023

Summarizer code snippets

No comments:

Post a Comment