Cluster computing: May 2025

Saturday, May 31, 2025

In continuation of the previous three articles for 1. vectorizing and analyzing drone images 2. performing semantic retrieval from vector store for drone sensing applications and 3. errors and resolutions encountered, this article takes it a step forward from one-shot RAG based response to automatic query decomposition and multiple query executions using agentic-retrieval which has a 40% improvement in f-score.

from azure.search.documents.indexes import SearchIndexClient

from azure.search.documents.indexes.models import (

KnowledgeAgent,

KnowledgeAgentAzureOpenAIModel,

KnowledgeAgentRequestLimits,

KnowledgeAgentTargetIndex

)

from dotenv import load_dotenv

from azure.identity import DefaultAzureCredential, get_bearer_token_provider

import os

load_dotenv(override=True)

answer_model = os.getenv("ANSWER_MODEL", "gpt-4o")

endpoint = os.environ["AZURE_SEARCH_ENDPOINT"]

credential = DefaultAzureCredential()

token_provider = get_bearer_token_provider(credential, "https://search.azure.com/.default")

index_name = os.getenv("AZURE_SEARCH_INDEX", "index01")

azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]

azure_openai_gpt_deployment = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT", "gpt-4o")

azure_openai_gpt_model = os.getenv("AZURE_OPENAI_GPT_MODEL", "gpt-4o")

azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2025-03-01-preview")

azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")

azure_openai_embedding_model = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-large")

agent_name = os.getenv("AZURE_SEARCH_AGENT_NAME", "image-search-agent")

api_version = "2025-05-01-Preview"

AZURE_OPENAI_ENDPOINT=https://<openai-resource-name>.openai.azure.com

AZURE_OPENAI_GPT_DEPLOYMENT=gpt-4o-mini

AZURE_SEARCH_ENDPOINT=https://<search-resource-name>.search.windows.net

AZURE_SEARCH_INDEX_NAME=agentic-retrieval-drone-images

agent=KnowledgeAgent(

name=agent_name,

target_indexes=[

KnowledgeAgentTargetIndex(

index_name=index_name, default_include_reference_source_data=True,

default_reranker_threshold=2.5

)

models=[

KnowledgeAgentAzureOpenAIModel(

azure_open_ai_parameters=AzureOpenAIVectorizerParameters(

resource_url=azure_openai_endpoint,

deployment_name=azure_openai_gpt_deployment,

model_name=azure_openai_gpt_model,

)

request_limits=KnowledgeAgentRequestLimits(

max_output_size=agent_max_output_tokens

)

index_client = SearchIndexClient(endpoint=endpoint, credential=credential)

index_client.create_or_update_agent(agent)

instructions = """

A Q&A agent that can answer questions about the drone images stored in Azure AI Search.

Sources have a JSON description and vector format with a ref_id that must be cited in the answer.

If you do not have the answer, respond with "I don't know".

"""

messages = [

{

"role": "system",

"content": instructions

}

]

from azure.search.documents.agent import KnowledgeAgentRetrievalClient

from azure.search.documents.agent.models import KnowledgeAgentRetrievalRequest, KnowledgeAgentMessage, KnowledgeAgentMessageTextContent, KnowledgeAgentIndexParams

agent_client = KnowledgeAgentRetrievalClient(endpoint=endpoint, agent_name=agent_name, credential=credential)

messages.append({

"role": "user",

"content":

"""

How many red cars could be found?

"""

})

# retrieval_result = agent_client.knowledge_retrieval.retrieve(

# messages[KnowledgeAgentMessage(

# role=msgp["role"],

# content=[KnowledgeAgentMessageTextContent(text=msg["content"])])

# for msg in messages if msg["role"] != "system"],

# Target_index_params=[KnowedgeAgentIndexParams(index_name=index_name, reranker_threshold=3, include_reference_source_data=True)],

# )

retrieval_result = agent_client.retrieve(

retrieval_request=KnowledgeAgentRetrievalRequest(

messages=[KnowledgeAgentMessage(role=msg["role"], content=[KnowledgeAgentMessageTextContent(text=msg["content"])]) for msg in messages if msg["role"] != "system"],

target_index_params=[KnowledgeAgentIndexParams(index_name=index_name, reranker_threshold=2.5)]

)

messages.append({

"role": "assistant",

"content": retrieval_result.response[0].content[0].text

})

print(messages)

import textwrap

print("Response")

print(textwrap.fill(retrieval_result.response[0].content[0].text, width=120))

import json

print("Activity")

print(json.dumps([a.as_dict() for a in retrieval_result.activity], indent=2))

print("Results")

print(json.dumps([r.as_dict() for r in retrieval_result.references], indent=2))

Friday, May 30, 2025

In continuation of the previous two posts to vectorize and analyze drone images and to perform semantic retrieval for a wide variety of drone sensing applications, this article lists some of the errors encountered in setting up the infrastructure on Azure and the possible resolutions:

1. The index size appears as 0 even though the index works correctly to list entries. The exact error message is:

400: An error occurred when calling Azure Cognitive Search: Azure Search Error: 400, message='Server responded with status 400. Error message: {"error":{"code":"","message":"This index must have valid semantic configurations defined before using the 'semanticConfiguration' query parameter.\r\nParameter name: semanticConfiguration"}}', url='https://srch-vision-01.search.windows.net/indexes/index00/docs/search?api-version=2024-03-01-preview'

Semantic search with Azure Search failed with configuration=default.

Please ensure semantic search is enabled and you have semantic search quota available on your instance.

This error can be resolved by going to the vector search settings of the Azure AI Search resource and ensuring that the semantic search configuration is created or updated followed by resetting and restarting the indexer. Semantic configurations describe the title, content, and keywords fields that will be used for semantic ranking, captions, highlights, and answers.

2. SubscriptionIsOverQuotaForSku error when compute cannot be provisioned to deploy a chat user interface web application. The exact error message is:

This region has quota of 0 instances for your subscription. Try selecting different region or SKU.

The resolution for this error involves increasing the limits defined on my quotas page.

3. Error encountered when adding an embedding model to the vector store:

Exact error message is:

Error: Request failed with status code 400

at ht (https://ai.azure.com/assets/manualChunk_data-fetch-84262679.js:14:85613)

at dt (https://ai.azure.com/assets/manualChunk_data-fetch-84262679.js:14:85796)

at XMLHttpRequest.S (https://ai.azure.com/assets/manualChunk_data-fetch-84262679.js:15:1655)

azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2

Failed to updated Index "index00", error: "The request is invalid. Details: definition: Error with vectorizer 'vectorizer-1748496106073': 'dimensions' parameter is out of range for the 'modelName' value of 'text-embedding-ada-002'. Value was '1024', Expected value to be 1536."

The dimensions for the vectors inserted into the vector store is typically 1024 but the embedding models operate on vectors with dimensions as shown:

• text-embedding-ada-002 (1536 dimensions)

• text-embedding-3-small (1536 dimensions)

• text-embedding-3-large (3072 dimensions)

And increasing the dimensions of the vectors in the store from 1024 to those corresponding to your choice of embedding model resolves this error. This must be followed by resetting and rerunning the indexer for the vectors especially if the vectorizer is added to the vector profiles leveraging Hnsw or ExhaustiveKnn Vector algorithms. This can be done with the following cli commands:

• POST /indexers/[indexer name]/reset?api-version=[api-version]

• POST /indexers/[indexer name]/run?api-version=[api-version]

• POST /indexers/[indexer name]/status?api-version=[api-version]

For example:

POST {endpoint}/indexers('{indexerName}')/search.run?api-version=2024-07-01

#Codingexercise: https://1drv.ms/w/c/d609fb70e39b65c8/Echlm-Nw-wkggNYlIwEAAAABD8nSsN--hM7kfA-W_mzuWw?e=jr5Kc9

Thursday, May 29, 2025

The following code sample demonstrates searching the image vector store to include in RAG-based query responses.

# Import libraries

import json

import os

import requests

from azure.core.credentials import AzureKeyCredential

from azure.identity import DefaultAzureCredential

from azure.identity import get_bearer_token_provider

from azure.search.documents import SearchClient

from azure.search.documents.indexes import SearchIndexClient

from azure.search.documents.models import (

RawVectorQuery,

VectorizableTextQuery

)

from openai import AzureOpenAI

search_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")

index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

api_version = os.getenv("AZURE_SEARCH_API_VERSION")

search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY")

vision_api_version = os.getenv("AZURE_AI_VISION_API_VERSION")

vision_region = os.getenv("AZURE_AI_VISION_REGION")

vision_endpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")

openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")

openai_api_key = os.getenv("AZURE_OPENAI_API_KEY")

credential = AzureKeyCredential(openai_api_key)

# Set up the Azure OpenAI client

token_provider = get_bearer_token_provider(AzureKeyCredential(openai_api_key), "https://cognitiveservices.azure.com/.default")

openai_client = AzureOpenAI(

api_version="2024-06-01",

azure_endpoint=openai_endpoint,

azure_ad_token_provider=token_provider

)

deployment_name = "gpt-4o"

# Set up the Azure Azure AI Search client

search_client = SearchClient(

endpoint=search_endpoint,

index_name=index_name,

credential=AzureKeyCredential(search_api_key)

)

# Provide instructions to the model

GROUNDED_PROMPT="""

You are an AI assistant that helps users learn from the images found in the source material.

Answer the query using only the sources provided below.

Use bullets if the answer has multiple points.

If the answer is longer than 3 sentences, provide a summary.

Answer ONLY with the facts listed in the list of sources below. Cite your source when you answer the question

If there isn't enough information below, say you don't know.

Do not generate answers that don't use the sources below.

Query: {query}

Sources:\n{sources}

"""

# Provide the search query.

# It's hybrid: a keyword search on "query", with text-to-vector conversion for "vector_query".

# The vector query finds 50 nearest neighbor matches in the search index

query="Do bicycles have a dedicated crossing at street intersections?"

vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="image_vector")

# Set up the search results and the chat thread.

# Retrieve the selected fields from the search index related to the question.

# Search results are limited to the top 5 matches. Limiting top can help you stay under LLM quotas.

search_results = search_client.search(

search_text=query,

vector_queries= [vector_query],

select=["id", "description"],

top=5,

)

# Use a unique separator to make the sources distinct.

# We chose repeated equal signs (=) followed by a newline because it's unlikely the source documents contain this sequence.

sources_formatted = "=================\n".join([f'ID: {document["id"]}' for document in search_results])

response = openai_client.chat.completions.create(

messages=[

{

"role": "user",

"content": GROUNDED_PROMPT.format(query=query, sources=sources_formatted)

}

model=deployment_name

)

print(response.choices[0].message.content)

Wednesday, May 28, 2025

In continuation of the previous article1 to search the vector store for references to include in RAG, the following shows demonstrates the chat:

# Import libraries

from azure.search.documents import SearchClient

from openai import AzureOpenAI

# Set up the Azure OpenAI client

token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")

openai_client = AzureOpenAI(

api_version="2024-06-01",

azure_endpoint=AZURE_OPENAI_ACCOUNT,

azure_ad_token_provider=token_provider

)

deployment_name = "gpt-4o"

# Set up the Azure Azure AI Search client

search_client = SearchClient(

endpoint=AZURE_SEARCH_SERVICE,

index_name=index_name,

credential=credential

)

# Provide instructions to the model

GROUNDED_PROMPT="""

You are an AI assistant that helps users learn from the information found in the source material.

Answer the query using only the sources provided below.

Use bullets if the answer has multiple points.

If the answer is longer than 3 sentences, provide a summary.

Answer ONLY with the facts listed in the list of sources below. Cite your source when you answer the question

If there isn't enough information below, say you don't know.

Do not generate answers that don't use the sources below.

Query: {query}

Sources:\n{sources}

"""

# Provide the search query.

# It's hybrid: a keyword search on "query", with text-to-vector conversion for "vector_query".

# The vector query finds 50 nearest neighbor matches in the search index

query="Do bicycles have a dedicated crossing at street intersections?"

vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="text_vector")

# Set up the search results and the chat thread.

# Retrieve the selected fields from the search index related to the question.

# Search results are limited to the top 5 matches. Limiting top can help you stay under LLM quotas.

search_results = search_client.search(

search_text=query,

vector_queries= [vector_query],

select=["id", "description"],

top=5,

)

# Newlines could be in the OCR'd content or in PDFs, as is the case for the sample PDFs used for this tutorial.

# Use a unique separator to make the sources distinct.

# We chose repeated equal signs (=) followed by a newline because it's unlikely the source documents contain this sequence.

sources_formatted = "=================\n".join([f'TITLE: {document["title"]}, CONTENT: {document["chunk"]}, LOCATIONS: {document["locations"]}' for document in search_results])

response = openai_client.chat.completions.create(

messages=[

{

"role": "user",

"content": GROUNDED_PROMPT.format(query=query, sources=sources_formatted)

}

model=deployment_name

)

print(response.choices[0].message.content)

Tuesday, May 27, 2025

Based on the vectorizing and indexing images, the following executes a query on the index to retrieve results.

#! /usr/bin/python

import json

import sys

import os

import requests

from azure.core.credentials import AzureKeyCredential

from azure.identity import DefaultAzureCredential

from azure.search.documents import SearchClient

from azure.search.documents.indexes import SearchIndexClient

from azure.search.documents.models import (

RawVectorQuery,

VectorizableTextQuery

)

sys.path.insert(0, os.path.abspath(".."))

from visionprocessor.vectorizer import vectorize_image

search_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")

index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

api_version = os.getenv("AZURE_SEARCH_API_VERSION")

search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY")

vision_api_version = os.getenv("AZURE_AI_VISION_API_VERSION")

vision_region = os.getenv("AZURE_AI_VISION_REGION")

vision_endpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")

credential = AzureKeyCredential(search_api_key)

search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=credential)

"""

# Each of the search cases commented out is valid and successful but we will keep it simple

# start with a vector search

blob_url = "https://saravinoteblogs.blob.core.windows.net/playground/vision/query/RedCar4.jpg?sp=racwdle&st=2025-05-26T23:54:09Z&se=2025-05-27T07:54:09Z&spr=https&sv=2024-11-04&sr=d&sig=9RRmmtlBnEiFsOGHJ2d%2ByEkBz2gxXOrQEc%2B4uf%2Fd6ao%3D&sdd=2"

vector = vectorize_image(blob_url, vision_api_key, "eastus")

print(f"len={len(vector)}")

print("search_client created")

vector_query = RawVectorQuery(vector=vector,

k=3,

fields = "image_vector")

results = search_client.search(

search_text=None,

vector_queries= [vector_query],

select=["id", "description"]

)

# and simple text multimodal

results = search_client.search(query_type='simple',

search_text="green street crossing mark for bicycles" ,

select='id,description',

include_total_count=True,

top=10)

"""

# and effect of alternate jargon

results = search_client.search(query_type='simple',

search_text="red car" ,

select='id,description',

include_total_count=True,

top=5)

"""

# and semantic search

results = search_client.search(query_type='semantic', semantic_configuration_name='my-semantic-config',

search_text="green crossing for bicycles at street intersection",

select='id,description', query_caption='extractive')

# and vectorizable text query

query="Do bicycles have a dedicated crossing at street intersections?"

vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="image_vector")

# Set up the search results and the chat thread.

# Retrieve the selected fields from the search index related to the question.

# Search results are limited to the top 5 matches. Limiting top can help you stay under LLM quotas.

results = search_client.search(

search_text=query,

vector_queries= [vector_query],

select=["id", "description"],

include_total_count=True,

top=5,

)

# this one returns Message: Field 'image_vector' does not have a vectorizer defined in it's vector profile.

"""

print(repr(results))

if results:

print(f"Number of results: {results.get_count()}")

for result in results:

if result:

# print(repr(result))

print(f"{result['id']}")

# print("\n")

# break

Output:

Number of results: 12305

017760

017761

017754

004391

014962

Monday, May 26, 2025

import cv2

import numpy as np

# Function to detect and extract features from the aerial images

def extract_features(image_path):

image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

orb = cv2.ORB_create()

keypoints, descriptors = orb.detectAndCompute(image, None)

return keypoints, descriptors, image

def match_features(descriptors1, descriptors2):

matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

matches = matcher.match(descriptors1, descriptors2)

matches = sorted(matches, key=lambda x: x.distance) # Sort by match quality

return matches

def get_matches(image1_file, image2_file):

# Extract features from both images

keypoints1, descriptors1, image1 = extract_features(image1_file)

keypoints2, descriptors2, image2 = extract_features(image2_file)

matches = match_features(descriptors1, descriptors2)

return matches

def get_matches_image(image1_file, image2_file):

# Extract features from both images

keypoints1, descriptors1, image1 = extract_features(image1_file)

keypoints2, descriptors2, image2 = extract_features(image2_file)

matches = match_features(descriptors1, descriptors2)

output_image = cv2.drawMatches(image1, keypoints1, image2, keypoints2, matches[:50], None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

return output_image

# sample invocation

#output_image = get_matches_image("000001.jpg", "000002.jpg")

#cv2.imwrite("output_image_1.jpg", output_image)

#cv2.waitKey(0)

#cv2.destroyAllWindows()

#sample output

# https://tinyurl.com/orbimage

Sunday, May 25, 2025

This is a summary of the book titled “AI 2041: Ten visions of the future” written by AI expert Kai-fu Lee and fiction author Chen Qiufan and published by Currency in 2021. As AI permeates all walks of life and its usage is moving beyond large-language models to agentic frameworks, the authors envision its role in 2041. They alternate between stories and nonfiction analyses and remain hopeful for AI to lift poverty, improve healthcare and let people lead meaningful lives.

Deep AI, a recent breakthrough in machine learning, can provide tailored health advice. AI programming works through layers of neural networks, mimicking the human brain's functioning to maximize a specified aim. Deepfakes, which rely on computer vision, can create serious problems for governments and businesses. By 2041, anti-deepfake software is predicted to serve a similar purpose as antivirus software. In healthcare, digitization has led to more efficient and accurate AI, with researchers focusing on harnessing massive databases of digitized patient records for drug and vaccine development and "precision medicine." AI will speed up the development of vaccines and drugs, potentially reducing development costs but can also reproduce societal prejudices like racism.

AI-driven autonomous vehicles will transform transportation by providing on-demand cars, safer, and cheaper than human-driven vehicles. However, ethical and legal issues arise, such as the ability of AI to make complex ethical choices and determine responsibility for fatalities. This is similar to the invention of gunpowder and nuclear bombs, and the development of AI-driven weapons, such as the Israeli Harpy drone and "slaughterbots." Autonomous weapons will become faster, more precise, and cheaper to make, but ethical objections to delegating killing to machines outweigh their benefits. Mass unemployment may exacerbate economic and social inequality, while technology companies may benefit. Society must reconfigure old jobs and generate new ones, with the possibility of an AI-inspired creative renaissance where people revere creativity, compassion, and humanity.

Lee and Qiufan present human conjecture about AI's future through speculative fiction and rigorous analysis. They provide emotional consideration of likely facts and imaginative interpretations, making their work readable, fascinating, and thought-provoking. They use available data to turn information into narrative.

Saturday, May 24, 2025

agentic retrieval

In continuation of previous post, you can either upgrade your LLM from text-to-text, multimodal or reasoning models or you can employ specific agents in an agentic retrieval to get better analysis for your drone images. The agentic retrieval is easier to diversify and scale but the model is harder to upskill. The flip side is that the more agents use LLMs, the number and cost for tokens used increases proportionally while the sample data can be enhanced to help fine-tune or improve chain of thought for the reasoning model with zero cost or change to the infrastructure. The following section walks us through a sample of using an agent retrieval.

#! /usr/bin/python

"""
requirements.txt:
azure-identity
openai
aiohttp
ipykernel
dotenv
requests
azure-search-documents==11.6.0b12
"""
from azure.ai.agents.models import FunctionTool, ToolSet, ListSortOrder

from azure.search.documents.agent import KnowledgeAgentRetrievalClient
from azure.search.documents.agent.models import KnowledgeAgentRetrievalRequest, KnowledgeAgentMessage, KnowledgeAgentMessageTextContent, KnowledgeAgentIndexParams

agent_client = KnowledgeAgentRetrievalClient(endpoint=endpoint, agent_name=agent_name, credential=credential)

thread = project_client.agents.threads.create()
retrieval_results = {}

def agentic_retrieval() -> str:
    """
        Searches the drone images and other curated metadata and facts.
        The returned string is in a JSON format that contains the reference id.
        Using the same format as in agent's response
        References are cited by zero-based id number
    """
    # Take the last 5 messages in the conversation
    messages = project_client.agents.messages.list(thread.id, limit=5, order=ListSortOrder.DESCENDING)
    # Reverse the order so the most recent message is last
    messages = list(messages)
    messages.reverse()
    retrieval_result = agent_client.retrieve(
        retrieval_request=KnowledgeAgentRetrievalRequest(
            messages=[KnowledgeAgentMessage(role=msg["role"], content=[KnowledgeAgentMessageTextContent(text=msg.content[0].text)]) for msg in messages if msg["role"] != "system"],
            target_index_params=[KnowledgeAgentIndexParams(index_name=index_name, reranker_threshold=2.5)]
        )
    )

    # Associate the retrieval results with the last message in the conversation
    last_message = messages[-1]
    retrieval_results[last_message.id] = retrieval_result

    # Return the grounding response to the agent
    return retrieval_result.response[0].content[0].text

# https://learn.microsoft.com/en-us/azure/ai-services/agents/how-to/tools/function-calling
functions = FunctionTool({ agentic_retrieval })
toolset = ToolSet()
toolset.add(functions)
project_client.agents.enable_auto_function_calls(toolset)

# start a chat

from azure.ai.agents.models import AgentsNamedToolChoice, AgentsNamedToolChoiceType, FunctionName

message = project_client.agents.messages.create(
    thread_id=thread.id,
    role="user",
    content="""
        Which landmarks are responsible for intersections that are more prone to vehicle and pedestrian traffic conflicts or activities? Which hours of the day are quiet for pedestrian traffic at those intersections?
    """
)

run = project_client.agents.runs.create_and_process(
    thread_id=thread.id,
    agent_id=agent.id,
    tool_choice=AgentsNamedToolChoice(type=AgentsNamedToolChoiceType.FUNCTION, function=FunctionName(name="agentic_retrieval")),
    toolset=toolset)
if run.status == "failed":
    raise RuntimeError(f"Run failed: {run.last_error}")
output = project_client.agents.messages.get_last_message_text_by_role(thread_id=thread.id, role="assistant").text.value

print("Agent response:", output.replace(".", "\n"))