The following is a sample of how to index images in Azure AI Search for lexical and vector search.
#! /usr/bin/python
#from azure.ai.vision import VisionClient
from azure.core.credentials import AzureKeyCredential
from azure.core.rest import HttpRequest, HttpResponse
from azure.core.exceptions import HttpResponseError
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from tenacity import retry, stop_after_attempt, wait_fixed
from dotenv import load_dotenv
import json
import requests
import http.client, urllib.parse
import os
load_dotenv()
search_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
search_api_version = os.getenv("AZURE_SEARCH_API_VERSION")
search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY")
vision_api_version = os.getenv("AZURE_AI_VISION_API_VERSION")
vision_region = os.getenv("AZURE_AI_VISION_REGION")
vision_endpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")
credential = DefaultAzureCredential()
#search_credential = AzureKeyCredential(search_api_key)
vision_credential = AzureKeyCredential(vision_api_key)
# Initialize Azure clients
#vision_client = VisionClient(endpoint=vision_endpoint, credential=AzureKeyCredential(vision_api_key))
search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=credential)
analysis_client = ImageAnalysisClient(vision_endpoint, vision_credential)
# Define SAS URL template
sas_template = "https://saravinoteblogs.blob.core.windows.net/playground/vision/main/main/{file}.jpg?sp=rle&st=2025-05-11T00:36:41Z&se=2025-05-11T08:36:41Z&spr=https&sv=2024-11-04&sr=d&sig=vjCrqWLo3LbmkXwCyIKWtAtFnYO2uBSxEWNgGKbeS00%3D&sdd=3"
# Process images in batches of 100
batch_size = 100
total_images = 2 # 17853 # Adjust this as needed
def get_description(id, image_url):
result = analyze_image_from_sdk(client, image_url)
description = {}
description["id"] = id
# Access the results (e.g., image categories)
if result.caption:
print(f"Caption: {result.caption.text}")
print(f"Caption Confidence: {result.caption.confidence}")
description["caption"] = f"{result.caption.text}"
description["caption_confidence"] = result.caption.confidence
if result.tags:
print("Tags:")
tags = []
for tag in result.tags:
tag = {}
print(f" {tag.name}: {tag.confidence}")
tag["name"] = f"{tag.name}"
tag["confidence"] = f"{tag.confidence}"
tags += [tag]
description["tags"] = tags
if result.objects:
print("Objects:")
objectItems = []
for obj in result.objects:
objectItem = {}
print(f" {obj.name}: {obj.confidence}")
objectItem["name"] = f"{obj.name}"
objectItem["confidence"] = obj.confidence
if obj.bounding_box:
print(f" Bounding Box: {obj.bounding_box}")
objectItem["bounding_box"] = f"{obj.bounding_box}"
objectItems += [objectItem]
description["objects"] = objectItems
return description
#@retry(stop=stop_after_attempt(5), wait=wait_fixed(1))
def vectorize_image(client, blob_url):
headers = {
'Ocp-Apim-Subscription-Key': vision_api_key,
}
params = {
'model-version': '2023-04-15',
'language': 'en'
}
headers['Content-Type'] = 'application/json'
request = HttpRequest(
method="POST",
url=f"/retrieval:vectorizeImage?api-version={vision_api_version}",
json={"url": blob_url},
params=params,
headers=headers
)
response = client.send_request(request)
try:
print(repr(response))
response.raise_for_status()
print(f"vectorize returned {response.json()}")
return response.json()
except HttpResponseError:
print(str(e))
return None
#@retry(stop=stop_after_attempt(5), wait=wait_fixed(1))
def get_image_vector(image_path, key, region):
headers = {
'Ocp-Apim-Subscription-Key': key,
}
params = urllib.parse.urlencode({
'model-version': 'latest',
})
try:
if image_path.startswith(('http://', 'https://')):
headers['Content-Type'] = 'application/json'
body = json.dumps({"url": image_path})
else:
headers['Content-Type'] = 'application/octet-stream'
with open(image_path, "rb") as filehandler:
image_data = filehandler.read()
body = image_data
conn = http.client.HTTPSConnection("img01.cognitiveservices.azure.com", timeout=3)
conn.request("POST", "/retrieval:vectorizeImage?api-version=2023-04-01-preview&%s" % params, body, headers)
response = conn.getresponse()
print(repr(response))
data = json.load(response)
print(repr(data))
conn.close()
if response.status != 200:
raise Exception(f"Error processing image {image_path}: {data.get('message', '')}")
return data.get("vector")
except (requests.exceptions.Timeout, http.client.HTTPException) as e:
print(f"Timeout/Error for {image_path}. Retrying...")
raise
#@retry(stop=stop_after_attempt(5), wait=wait_fixed(1))
def analyze_image(client, blob_url):
headers = {
'Ocp-Apim-Subscription-Key': search_api_key,
}
params = {
'model-version': '2023-04-15',
'language': 'en'
}
headers['Content-Type'] = 'application/json'
request = HttpRequest(
method="POST",
url=f"/computervision/imageanalysis:analyze?api-version={vision_api_version}",
json={"url": blob_url},
params=params,
headers=headers
)
response = client.send_request(request)
try:
response.raise_for_status()
print(f"analyze returned {response.json()}")
return response.json()
except HttpResponseError:
print(str(e))
return None
def analyze_image_from_sdk(client, blob_url):
result = client.analyze(
image_url=blob_url,
visual_features=[
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.CAPTION,
VisualFeatures.DENSE_CAPTIONS,
VisualFeatures.READ,
VisualFeatures.SMART_CROPS,
VisualFeatures.PEOPLE,
], # Mandatory. Select one or more visual features to analyze.
smart_crops_aspect_ratios=[0.9, 1.33], # Optional. Relevant only if SMART_CROPS was specified above.
gender_neutral_caption=True, # Optional. Relevant only if CAPTION or DENSE_CAPTIONS were specified above.
language="en", # Optional. Relevant only if TAGS is specified above. See https://aka.ms/cv-languages for supported languages.
model_version="latest", # Optional. Analysis model version to use. Defaults to "latest".
)
return result
def vectorize_image_from_sdk(client, blob_url):
result = client.vectorize(
image_url=blob_url,
language="en", # Optional. Relevant only if TAGS is specified above. See https://aka.ms/cv-languages for supported languages.
model_version="latest", # Optional. Analysis model version to use. Defaults to "latest".
)
return result
for batch_start in range(1, total_images + 1, batch_size):
vectorized_images = {}
documents = []
# Vectorize 100 images at a time
batch_end = min(batch_start + batch_size, total_images + 1)
for i in range(batch_start, batch_end):
file_name = f"{i:06}"
blob_url = sas_template.format(file=file_name)
try:
#response = get_image_vector(blob_url, vision_api_key, "eastus")
response = vectorize_image(analysis_client, blob_url)
print(repr(response))
if response:
vectorized_images[file_name] = response
documents += [
{"id": file_name, "description": repr(get_description(file_name, sas_template.format(file=file_name))), "vector": response}
]
except Exception as e:
print(f"Error processing {file_name}.jpg: {e}")
print(f"Vectorization complete for images {batch_start} to {min(batch_start + batch_size - 1, total_images)}")
# Upload batch to Azure AI Search
if len(documents) > 0:
# search_client.upload_documents(documents)
print(f"Uploaded {len(documents)} images {batch_start} to {batch_end} to {index_name}.")
print(f"Vectorized images successfully added to {index_name}!")
No comments:
Post a Comment