Sunday, December 1, 2024

 The following implementation follows up on an article about contextual embeddings for UAV swarm camera image analytics.

import the

from azure.ai.vision import VisionClient, AzureKeyCredential, AnalyzeImageOptions

from azure.ai.textanalytics import TextAnalyticsClient

from langchain.embeddings import OpenAIEmbeddings

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.vectorstores import FAISS

# Replace with your Azure credentials and endpoints

vision_key = "YOUR_VISION_API_KEY"

vision_endpoint = "YOUR_VISION_API_ENDPOINT"

text_analytics_key = "YOUR_TEXT_ANALYTICS_API_KEY"

text_analytics_endpoint = "YOUR_TEXT_ANALYTICS_API_ENDPOINT"

# Initialize Azure Vision and Text Analytics clients

vision_client = VisionClient(credential=AzureKeyCredential(vision_key), endpoint=vision_endpoint)

text_analytics_client = TextAnalyticsClient(endpoint=text_analytics_endpoint, credential=AzureKeyCredential(text_analytics_key))

# Function to analyze image and extract tags and metadata

def extract_tags_and_metadata(image_url):

    analyze_options = AnalyzeImageOptions(features=["objects", "tags", "description"])

    analysis_result = vision_client.analyze_image(image_url, analyze_options)

    tags = [tag.name for tag in analysis_result.tags]

    metadata = {

        "description": analysis_result.description.captions[0].text if analysis_result.description.captions else "",

        "tags": tags

    }

    return metadata

# Function to generate a description using tags and metadata

def generate_description(tags, metadata):

    description = metadata["description"]

    if not description:

        description = f"This image contains {', '.join(tags)}."

    return description

# Example image collection

image_collection = [

    "URL_OF_IMAGE_1",

    "URL_OF_IMAGE_2",

    "URL_OF_IMAGE_3"

]

# Extract tags, metadata, and generate descriptions

image_data = []

for image_url in image_collection:

    metadata = extract_tags_and_metadata(image_url)

    description = generate_description(metadata["tags"], metadata)

    image_data.append({

        "url": image_url,

        "description": description

    })

# Function to match a query to the best description using text embeddings

def match_query_to_best_description(query, image_data):

    texts = [item["description"] for item in image_data]

    embedding = OpenAIEmbeddings()

    text_splitter = RecursiveCharacterTextSplitter()

    vectorstore = FAISS.from_texts(texts, embedding, text_splitter)

    query_embedding = embedding.embed_query(query)

    best_match = vectorstore.similarity_search(query_embedding, 1)

    return best_match[0]["page_content"] if best_match else None

# Example query

query = "A scenic view of mountains with a lake in the foreground"

# Find the best matching description for the query

best_description = match_query_to_best_description(query, image_data)

print("Best Matching Description:")

print(best_description)

#codingexercise: CodingExercise-12-01-2024.docx

No comments:

Post a Comment