Thursday, December 28, 2023

Summarizer code snippets

 

These are some code snippets to summarize text:

1.       Using genism

from gensim.summarization import summarize

def shrinktext(request):

    text = request.POST.get('text','')

    text = text.split('.')

    text = '\n'.join(text)

    try:

       summary = summarize(text)

       summary_list = []

       for line in summary.splitlines():

           if line not in summary_list:

              summary_list.append(line)

       summary = '\n'.join(summary_list)

    except Exception as e:

       summary = str(e)

       if type(e).__name__ == "TypeError":

          summary = ''.join(text.splitlines()[0:1])

2.       Using langchain

!pip install openai tiktoken chromadb langchain

 

# Set env var OPENAI_API_KEY or load from a .env file

# import dotenv

 

# dotenv.load_dotenv()

from langchain.chains.summarize import load_summarize_chain

from langchain.chat_models import ChatOpenAI

from langchain.document_loaders import WebBaseLoader

 

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

docs = loader.load()

 

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106")

chain = load_summarize_chain(llm, chain_type="stuff")

 

chain.run(docs)

 

OR with documents in a single prompt:

from langchain.chains.combine_documents.stuff import StuffDocumentsChain

from langchain.chains.llm import LLMChain

from langchain.prompts import PromptTemplate

 

# Define prompt

prompt_template = """Write a concise summary of the following:

"{text}"

CONCISE SUMMARY:"""

prompt = PromptTemplate.from_template(prompt_template)

 

# Define LLM chain

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")

llm_chain = LLMChain(llm=llm, prompt=prompt)

 

# Define StuffDocumentsChain

stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

 

docs = loader.load()

print(stuff_chain.run(docs))

 

3.       Using cloud apis

setx LANGUAGE_KEY your-key

setx LANGUAGE_ENDPOINT your-endpoint

pip install azure-ai-textanalytics==5.3.0

# This example requires environment variables named "LANGUAGE_KEY" and "LANGUAGE_ENDPOINT"

key = os.environ.get('LANGUAGE_KEY')

endpoint = os.environ.get('LANGUAGE_ENDPOINT')

 

from azure.ai.textanalytics import TextAnalyticsClient

from azure.core.credentials import AzureKeyCredential

 

# Authenticate the client using your key and endpoint

def authenticate_client():

    ta_credential = AzureKeyCredential(key)

    text_analytics_client = TextAnalyticsClient(

            endpoint=endpoint,

            credential=ta_credential)

    return text_analytics_client

 

client = authenticate_client()

 

# Example method for summarizing text

def sample_extractive_summarization(client, document):

    from azure.core.credentials import AzureKeyCredential

    from azure.ai.textanalytics import (

        TextAnalyticsClient,

        ExtractiveSummaryAction

    )

 

    poller = client.begin_analyze_actions(

        document,

        actions=[

            ExtractiveSummaryAction(max_sentence_count=4)

        ],

    )

 

    document_results = poller.result()

    return document_results

 

sample_extractive_summarization(client)

 

 

There are variations possible with the LLM context window or the keyword versus latent-semantic model or the pipeline but the above provide readable summary.

No comments:

Post a Comment