These are some code snippets to summarize text:
1.
Using genism
from gensim.summarization
import summarize
def shrinktext(request):
text = request.POST.get('text','')
text = text.split('.')
text = '\n'.join(text)
try:
summary = summarize(text)
summary_list = []
for line in summary.splitlines():
if line not in summary_list:
summary_list.append(line)
summary = '\n'.join(summary_list)
except Exception as e:
summary = str(e)
if type(e).__name__ ==
"TypeError":
summary =
''.join(text.splitlines()[0:1])
2.
Using langchain
!pip install openai tiktoken
chromadb langchain
# Set env var OPENAI_API_KEY
or load from a .env file
# import dotenv
# dotenv.load_dotenv()
from
langchain.chains.summarize import load_summarize_chain
from langchain.chat_models
import ChatOpenAI
from
langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()
llm =
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106")
chain =
load_summarize_chain(llm, chain_type="stuff")
chain.run(docs)
OR with documents in a single
prompt:
from
langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm
import LLMChain
from langchain.prompts import
PromptTemplate
# Define prompt
prompt_template =
"""Write a concise summary of the following:
"{text}"
CONCISE
SUMMARY:"""
prompt =
PromptTemplate.from_template(prompt_template)
# Define LLM chain
llm =
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
llm_chain = LLMChain(llm=llm,
prompt=prompt)
# Define StuffDocumentsChain
stuff_chain =
StuffDocumentsChain(llm_chain=llm_chain,
document_variable_name="text")
docs = loader.load()
print(stuff_chain.run(docs))
3.
Using cloud apis
setx LANGUAGE_KEY your-key
setx LANGUAGE_ENDPOINT
your-endpoint
pip install azure-ai-textanalytics==5.3.0
# This example requires
environment variables named "LANGUAGE_KEY" and
"LANGUAGE_ENDPOINT"
key =
os.environ.get('LANGUAGE_KEY')
endpoint =
os.environ.get('LANGUAGE_ENDPOINT')
from azure.ai.textanalytics
import TextAnalyticsClient
from azure.core.credentials
import AzureKeyCredential
# Authenticate the client
using your key and endpoint
def authenticate_client():
ta_credential = AzureKeyCredential(key)
text_analytics_client =
TextAnalyticsClient(
endpoint=endpoint,
credential=ta_credential)
return text_analytics_client
client =
authenticate_client()
# Example method for
summarizing text
def
sample_extractive_summarization(client, document):
from azure.core.credentials import
AzureKeyCredential
from azure.ai.textanalytics import (
TextAnalyticsClient,
ExtractiveSummaryAction
)
poller = client.begin_analyze_actions(
document,
actions=[
ExtractiveSummaryAction(max_sentence_count=4)
],
)
document_results = poller.result()
return document_results
sample_extractive_summarization(client)
There are variations possible with the LLM
context window or the keyword versus latent-semantic model or the pipeline but
the above provide readable summary.
No comments:
Post a Comment