Cluster computing

Monday, April 21, 2025

Multimodal image search

The following code snippet describes how multimodal search can come useful to search images. The images are indexed and searched based on vector embeddings but the query is text based.

from dotenv import load_dotenv,dotenv_values

import json

import os

import requests

from tenacity import retry, stop_after_attempt, wait_fixed

from dotenv import load_dotenv

from azure.core.credentials import AzureKeyCredential

from azure.identity import DefaultAzureCredential

from azure.search.documents import SearchClient

from azure.search.documents.indexes import SearchIndexClient

from azure.search.documents.models import (

RawVectorQuery,

)

from azure.search.documents.indexes.models import (

ExhaustiveKnnParameters,

ExhaustiveKnnVectorSearchAlgorithmConfiguration,

HnswParameters,

HnswVectorSearchAlgorithmConfiguration,

SimpleField,

SearchField,

SearchFieldDataType,

SearchIndex,

VectorSearch,

VectorSearchAlgorithmKind,

VectorSearchProfile,

)

from IPython.display import Image, display

load_dotenv()

service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")

index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

api_version = os.getenv("AZURE_SEARCH_API_VERSION")

key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

aiVisionApiKey = os.getenv("AZURE_AI_VISION_API_KEY")

aiVisionRegion = os.getenv("AZURE_AI_VISION_REGION")

aiVisionEndpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")

credential = AzureKeyCredential(key)

search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)

query_image_path = "images/PIC01.jpeg"

@retry(stop=stop_after_attempt(5), wait=wait_fixed(1))

def get_image_vector(image_path, key, region):

headers = {

'Ocp-Apim-Subscription-Key': key,

}

params = urllib.parse.urlencode({

'model-version': '2023-04-15',

})

try:

if image_path.startswith(('http://', 'https://')):

headers['Content-Type'] = 'application/json'

body = json.dumps({"url": image_path})

else:

headers['Content-Type'] = 'application/octet-stream'

with open(image_path, "rb") as filehandler:

image_data = filehandler.read()

body = image_data

conn = http.client.HTTPSConnection(f'{region}.api.cognitive.microsoft.com', timeout=3)

conn.request("POST", "/computervision/retrieval:vectorizeImage?api-version=2023-04-01-preview&%s" % params, body, headers)

response = conn.getresponse()

data = json.load(response)

conn.close()

if response.status != 200:

raise Exception(f"Error processing image {image_path}: {data.get('message', '')}")

return data.get("vector")

except (requests.exceptions.Timeout, http.client.HTTPException) as e:

print(f"Timeout/Error for {image_path}. Retrying...")

raise

vector_query = RawVectorQuery(vector=get_image_vector(query_image_path,

aiVisionApiKey,

aiVisionRegion),

k=3,

fields="image_vector")

def generate_embeddings(text, aiVisionEndpoint, aiVisionApiKey):

url = f"{aiVisionEndpoint}/computervision/retrieval:vectorizeText"

params = {

"api-version": "2023-02-01-preview"

}

headers = {

"Content-Type": "application/json",

"Ocp-Apim-Subscription-Key": aiVisionApiKey

}

data = {

"text": text

}

response = requests.post(url, params=params, headers=headers, json=data)

if response.status_code == 200:

embeddings = response.json()["vector"]

return embeddings

else:

print(f"Error: {response.status_code} - {response.text}")

return None

query = "farm"

vector_text = generate_embeddings(query, aiVisionEndpoint, aiVisionApiKey)

vector_query = RawVectorQuery(vector=vector_text,

k=3,

fields="image_vector")

# Perform vector search

results = search_client.search(

search_text=query,

vector_queries= [vector_query],

select=["description"]

)

for result in results:

print(f"{result['description']}")

display(Image(DIR_PATH + "/images/" + result["description"]))

print("\n")

Cluster computing

Monday, April 21, 2025

No comments:

Post a Comment