Friday, June 6, 2025

 Shredding images into objects for vector search:

The previous article discussed a technique to enhance the image retrieval for drone images following the vectorize and analyze method described in the references and comes helpful regardless of zero agent, one-agent or multiple agent-based retrieval. To enhance the retrieval based on the high probability of objects being mentioned in the query, it would be even better to query based on an index of objects along with their bm25 description and semantic similarity vectors. One tip here is that the same object might be detected in multiple aerial images and different objects might be available across images spread out over temporal dimension that are more meaningful to group. This can be achieved by the following steps:

Retrieve all objects(azure search documents) with their IDs and vectors

For each object, if not already grouped:

A. Perform a vector query with its vector, excluding itself

B. Collect objects with a score above a threshold

C. Use sliding window for finding the same object repeated over consecutive images, discard duplicates

D. use reranker to find temporally distributed different objects (ids are wide apart)

E. Add these objects to a group

F. Mark all objects in the group and duplicates as processed

The following code is an illustration to populate an index with all the objects (id,description,citation,boundingBox, and vector) from all the images to do the above steps.

import json

from azure.search.documents import SearchClient

from azure.core.credentials import AzureKeyCredential

import os

import re

search_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]

api_version = os.getenv("AZURE_SEARCH_API_VERSION")

search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

index_name = os.getenv("AZURE_SEARCH_INDEX_NAME", "index00")

dest_index_name = os.getenv("AZURE_SEARCH_1024_INDEX_NAME", "index1024")

credential = AzureKeyCredential(search_api_key)

# Initialize SearchClient

search_client = SearchClient(

    endpoint=search_endpoint,

    index_name=index_name,

    credential=AzureKeyCredential(search_api_key)

)

destination_client = SearchClient(

    endpoint=search_endpoint,

    index_name=dest_index_name,

    credential=AzureKeyCredential(search_api_key)

)

def prepare_json_string_for_load(text):

  text = text.replace("\"", "'")

  text = text.replace("{'", "{\"")

  text = text.replace("'}", "\"}")

  text = text.replace(" '", " \"")

  text = text.replace("' ", "\" ")

  text = text.replace(":'", ":\"")

  text = text.replace("':", "\":")

  text = text.replace(",'", ",\"")

  text = text.replace("',", "\",")

  return re.sub(r'\n\s*', '', text)

def to_string(bounding_box):

    return f"{bounding_box['x']},{bounding_box['y']},{bounding_box['w']},{bounding_box['h']}"

page_size = 10

skip = 0

total = 17833

index = 0

while True:

    # Retrieve the first 10 entries from the index

    search_results = search_client.search("*", select=["id", "description", "vector"], top=page_size, skip = skip, include_total_count=True)

    # Process entries and shred descriptions

    flat_list = []

    if search_results.get_count() == 0:

        break

    for entry in search_results:

        entry_id = index

        index += 1

        width = 0

        height = 0

        tags = ""

        title = ""

        description_text = prepare_json_string_for_load(entry["description"]).replace('""','')

        description_json = json.loads(description_text)

        if description_json and description_json["description"]:

            title = description_json["description"]

        if description_json and description_json["_data"] and description_json["_data"]["tagsResult"] and description_json["_data"]["tagsResult"]["values"]:

            tags = ','.join([tag["name"] for tag in description_json["_data"]["tagsResult"]["values"]]).strip(",")

        # add entries at object level instead of image level

        if description_json and description_json["_data"] and description_json["_data"]["denseCaptionsResult"] and description_json["_data"]["denseCaptionsResult"]["values"]:

            for item in description_json["_data"]["denseCaptionsResult"]["values"]:

                text = item.get("text", "")

                bounding_box = item.get("boundingBox", {

                    "x": 0,

                    "y": 0,

                    "w": 0,

                    "h": 0

                })

                flat_list.append({

                    "id": index,

                    "image_id": entry_id,

                    "text": text,

                    "bounding_box": to_string(bounding_box),

                    "tags" : tags,

                    "title": title

                })

        else:

            print(f"Nothing found in entry with id:{id}")

        flat_list.append({

        "id": entry_id,

        "tags" : tags,

        "title": title

        })

     if len(flat_list) != 0:

                upload_results = destination_client.upload_documents(flat_list)

                error = ','.join([upload_result.error_message for upload_result in upload_results if upload_result.error_message]).strip(",")

                if error:

                    print(error)

                if len([upload_result.succeeded for upload_result in upload_results if upload_result.succeeded]) == page_size:

                   print(f"success in processing entries with id: {skip} to {skip + page_size}")

    skip += page_size


Vectorize and Analyze: https://1drv.ms/w/c/d609fb70e39b65c8/Eb6vxQeXGE9MsVwwdsvLSskBLgFNNuClDqAepem73pMcbQ?e=LtQasJ


No comments:

Post a Comment