The following samples are ways of detecting and counting objects in images that do not require custom models to be trained and can be done after image processing and vectorizing. It is preferable to do agentic search where agents are versed in one of these techniques.
#! /usr/bin/python
import requests
import cv2
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import normalize
import hdbscan
import matplotlib.pyplot as plt
from io import BytesIO
from azure.core.credentials import AzureKeyCredential
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
import os
match_threshold = 0.5
min_number_of_cluster_members = 2
# Azure Vision credentials
vision_endpoint=os.getenv("AZURE_AI_VISION_ENDPOINT")
vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY")
object_uri = os.getenv("AZURE_RED_CAR_2_SAS_URL").strip('"')
scene_uri = os.getenv("AZURE_QUERY_SAS_URI").strip('"')
# Step 1: Download images from SAS URLs
def download_image(url):
response = requests.get(url)
image_array = np.frombuffer(response.content, np.uint8)
return cv2.imdecode(image_array, cv2.IMREAD_COLOR)
# Step 2: Use OpenCV template matching to find object occurrences
def count_object_occurrences(scene, template, threshold=match_threshold):
scene_gray = cv2.cvtColor(scene, cv2.COLOR_BGR2GRAY)
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
result = cv2.matchTemplate(scene_gray, template_gray, cv2.TM_CCOEFF_NORMED)
locations = np.where(result >= threshold)
w, h = template_gray.shape[::-1]
rects = [[pt[0], pt[1], pt[0] + w, pt[1] + h] for pt in zip(*locations[::-1])]
rects, _ = cv2.groupRectangles(rects, groupThreshold=1, eps=0.5)
return len(rects)
# Step 3: Count matches
def count_matches():
scene_img = download_image(scene_uri)
object_img = download_image(object_uri)
count = count_object_occurrences(scene_img, object_img)
return count
# print(f"Detected {count_matches()} occurrences of the object.")
#Output: Detected 1 occurrences of the object.
# Load image from SAS URL
def load_image_from_sas(url):
response = requests.get(url)
image_array = np.frombuffer(response.content, np.uint8)
return cv2.imdecode(image_array, cv2.IMREAD_COLOR)
def keypoints_and_descriptors(scene_img, object_img):
orb = cv2.ORB_create(nfeatures=1000)
kp1, des1 = orb.detectAndCompute(object_img, None)
kp2, des2 = orb.detectAndCompute(scene_img, None)
if des1 is None or des2 is None:
return None, None, None, None
return kp1, des1, kp2, des2
# Feature detection and matching
def get_matched_keypoints(scene_img, object_img):
kp1,des1,kp2,des2 = keypoints_and_descriptors(scene_img, object_img)
if des1 is None or des2 is None:
return []
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = matcher.match(des1, des2)
matches = sorted(matches, key=lambda x: x.distance)
matched_pts = np.float32([kp2[m.trainIdx].pt for m in matches])
print(f"matched_pts={matched_pts}")
return matched_pts
# Extract matched descriptors using ORB
def get_matched_descriptors(scene_img, object_img):
kp1,des1,kp2,des2 = keypoints_and_descriptors(scene_img, object_img)
if des1 is None or des2 is None:
return np.array([])
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = matcher.match(des1, des2)
matches = sorted(matches, key=lambda x: x.distance)
matched_descriptors = np.array([des2[m.trainIdx] for m in matches])
return matched_descriptors
# Cluster matched keypoints using DBSCAN
def cluster_keypoints(points, eps=30, min_samples=min_number_of_cluster_members):
if len(points) == 0:
return []
clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(points)
labels = clustering.labels_
return labels
# Cluster keypoints using HDBSCAN
def cluster_keypoints_hdbscan(points, min_cluster_size=min_number_of_cluster_members):
if len(points) == 0:
return np.array([])
clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size)
labels = clusterer.fit_predict(points)
# if len(labels) > 0:
# plot_clusters(matched_points, labels)
return labels
# Cluster descriptors using cosine similarity
def cluster_by_similarity(descriptors, min_cluster_size=min_number_of_cluster_members):
if len(descriptors) == 0:
return np.array([])
# Normalize for cosine similarity
descriptors = normalize(descriptors, norm='l2')
clusterer = hdbscan.HDBSCAN(
min_cluster_size=min_cluster_size,
metric='euclidean', # Euclidean on normalized vectors ≈ cosine similarity
cluster_selection_method='eom'
)
labels = clusterer.fit_predict(descriptors)
return labels
# Optional: visualize clusters
def plot_clusters(points, labels):
plt.figure(figsize=(8, 6))
for label in set(labels):
mask = labels == label
color = 'gray' if label == -1 else None
plt.scatter(points[mask, 0], points[mask, 1], label=f"Cluster {label}", alpha=0.6, s=30, c=color)
plt.title("HDBSCAN Clusters of Matched Keypoints")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.show()
def count_multiple_matches():
scene_img = load_image_from_sas(scene_uri)
object_img = load_image_from_sas(object_uri)
# matched_points = get_matched_keypoints(scene_img, object_img)
# labels = cluster_keypoints_hdbscan(matched_points)
descriptors = get_matched_descriptors(scene_img, object_img)
labels = cluster_by_similarity(descriptors)
print(f"len of labels={len(labels)} and labels={labels}")
# Count valid clusters (excluding noise label -1)
count = len(set(labels)) - (1 if -1 in labels else 0)
return count
print(f"Estimated object instances: {count_multiple_matches()}")
# for dbscan
#Output: Estimated object instances: 3
# for hdbscan based on similarity
# len of labels=24 and labels=[ 1 -1 -1 1 -1 1 0 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 -1 1 -1]
# Estimated object instances: 2
No comments:
Post a Comment