Shredding images into objects for vector search:
The previous article discussed a technique to enhance the image retrieval for drone images following the vectorize and analyze method described in the references and comes helpful regardless of zero agent, one-agent or multiple agent-based retrieval. To enhance the retrieval based on the high probability of objects being mentioned in the query, it would be even better to query based on an index of objects along with their bm25 description and semantic similarity vectors. One tip here is that the same object might be detected in multiple aerial images and different objects might be available across images spread out over temporal dimension that are more meaningful to group. This can be achieved by the following steps:
Retrieve all objects(azure search documents) with their IDs and vectors
For each object, if not already grouped:
A. Perform a vector query with its vector, excluding itself
B. Collect objects with a score above a threshold
C. Use sliding window for finding the same object repeated over consecutive images, discard duplicates
D. use reranker to find temporally distributed different objects (ids are wide apart)
E. Add these objects to a group
F. Mark all objects in the group and duplicates as processed
The following code is an illustration to populate an index with all the objects (id,description,citation,boundingBox, and vector) from all the images to do the above steps.
import json
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
import os
import re
search_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
api_version = os.getenv("AZURE_SEARCH_API_VERSION")
search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME", "index00")
dest_index_name = os.getenv("AZURE_SEARCH_1024_INDEX_NAME", "index1024")
credential = AzureKeyCredential(search_api_key)
# Initialize SearchClient
search_client = SearchClient(
endpoint=search_endpoint,
index_name=index_name,
credential=AzureKeyCredential(search_api_key)
)
destination_client = SearchClient(
endpoint=search_endpoint,
index_name=dest_index_name,
credential=AzureKeyCredential(search_api_key)
)
def prepare_json_string_for_load(text):
text = text.replace("\"", "'")
text = text.replace("{'", "{\"")
text = text.replace("'}", "\"}")
text = text.replace(" '", " \"")
text = text.replace("' ", "\" ")
text = text.replace(":'", ":\"")
text = text.replace("':", "\":")
text = text.replace(",'", ",\"")
text = text.replace("',", "\",")
return re.sub(r'\n\s*', '', text)
def to_string(bounding_box):
return f"{bounding_box['x']},{bounding_box['y']},{bounding_box['w']},{bounding_box['h']}"
page_size = 10
skip = 0
total = 17833
index = 0
while True:
# Retrieve the first 10 entries from the index
search_results = search_client.search("*", select=["id", "description", "vector"], top=page_size, skip = skip, include_total_count=True)
# Process entries and shred descriptions
flat_list = []
if search_results.get_count() == 0:
break
for entry in search_results:
entry_id = index
index += 1
width = 0
height = 0
tags = ""
title = ""
description_text = prepare_json_string_for_load(entry["description"]).replace('""','')
description_json = json.loads(description_text)
if description_json and description_json["description"]:
title = description_json["description"]
if description_json and description_json["_data"] and description_json["_data"]["tagsResult"] and description_json["_data"]["tagsResult"]["values"]:
tags = ','.join([tag["name"] for tag in description_json["_data"]["tagsResult"]["values"]]).strip(",")
# add entries at object level instead of image level
if description_json and description_json["_data"] and description_json["_data"]["denseCaptionsResult"] and description_json["_data"]["denseCaptionsResult"]["values"]:
for item in description_json["_data"]["denseCaptionsResult"]["values"]:
text = item.get("text", "")
bounding_box = item.get("boundingBox", {
"x": 0,
"y": 0,
"w": 0,
"h": 0
})
flat_list.append({
"id": index,
"image_id": entry_id,
"text": text,
"bounding_box": to_string(bounding_box),
"tags" : tags,
"title": title
})
else:
print(f"Nothing found in entry with id:{id}")
flat_list.append({
"id": entry_id,
"tags" : tags,
"title": title
})
if len(flat_list) != 0:
upload_results = destination_client.upload_documents(flat_list)
error = ','.join([upload_result.error_message for upload_result in upload_results if upload_result.error_message]).strip(",")
if error:
print(error)
if len([upload_result.succeeded for upload_result in upload_results if upload_result.succeeded]) == page_size:
print(f"success in processing entries with id: {skip} to {skip + page_size}")
skip += page_size
Vectorize and Analyze: https://1drv.ms/w/c/d609fb70e39b65c8/Eb6vxQeXGE9MsVwwdsvLSskBLgFNNuClDqAepem73pMcbQ?e=LtQasJ
No comments:
Post a Comment