The following implementation follows up on an article about contextual embeddings for UAV swarm camera image analytics.
import the
from azure.ai.vision import VisionClient, AzureKeyCredential, AnalyzeImageOptions
from azure.ai.textanalytics import TextAnalyticsClient
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
# Replace with your Azure credentials and endpoints
vision_key = "YOUR_VISION_API_KEY"
vision_endpoint = "YOUR_VISION_API_ENDPOINT"
text_analytics_key = "YOUR_TEXT_ANALYTICS_API_KEY"
text_analytics_endpoint = "YOUR_TEXT_ANALYTICS_API_ENDPOINT"
# Initialize Azure Vision and Text Analytics clients
vision_client = VisionClient(credential=AzureKeyCredential(vision_key), endpoint=vision_endpoint)
text_analytics_client = TextAnalyticsClient(endpoint=text_analytics_endpoint, credential=AzureKeyCredential(text_analytics_key))
# Function to analyze image and extract tags and metadata
def extract_tags_and_metadata(image_url):
analyze_options = AnalyzeImageOptions(features=["objects", "tags", "description"])
analysis_result = vision_client.analyze_image(image_url, analyze_options)
tags = [tag.name for tag in analysis_result.tags]
metadata = {
"description": analysis_result.description.captions[0].text if analysis_result.description.captions else "",
"tags": tags
}
return metadata
# Function to generate a description using tags and metadata
def generate_description(tags, metadata):
description = metadata["description"]
if not description:
description = f"This image contains {', '.join(tags)}."
return description
# Example image collection
image_collection = [
"URL_OF_IMAGE_1",
"URL_OF_IMAGE_2",
"URL_OF_IMAGE_3"
]
# Extract tags, metadata, and generate descriptions
image_data = []
for image_url in image_collection:
metadata = extract_tags_and_metadata(image_url)
description = generate_description(metadata["tags"], metadata)
image_data.append({
"url": image_url,
"description": description
})
# Function to match a query to the best description using text embeddings
def match_query_to_best_description(query, image_data):
texts = [item["description"] for item in image_data]
embedding = OpenAIEmbeddings()
text_splitter = RecursiveCharacterTextSplitter()
vectorstore = FAISS.from_texts(texts, embedding, text_splitter)
query_embedding = embedding.embed_query(query)
best_match = vectorstore.similarity_search(query_embedding, 1)
return best_match[0]["page_content"] if best_match else None
# Example query
query = "A scenic view of mountains with a lake in the foreground"
# Find the best matching description for the query
best_description = match_query_to_best_description(query, image_data)
print("Best Matching Description:")
print(best_description)
#codingexercise: CodingExercise-12-01-2024.docx
No comments:
Post a Comment