The following serves as an illustration to remove duplicates from a continuous stream of aerial images:
import cv2
import imagehash
import numpy as np
from PIL import Image
from collections import deque
class ImageDeduplicator:
def __init__(self, buffer_size=100):
"""Initialize a ring buffer for tracking image hashes."""
self.buffer_size = buffer_size
self.hash_buffer = deque(maxlen=buffer_size)
self.vector_buffer = deque(maxlen=buffer_size)
self.threshold = 0.97 # as close to an exact match of 1.0
def compute_hash(self, image):
"""Compute perceptual hash of an image."""
return imagehash.phash(Image.fromarray(image))
def is_duplicate(self, image):
"""Check if the image is a duplicate."""
img_hash = self.compute_hash(image)
if img_hash in self.hash_buffer:
return True
self.hash_buffer.append(img_hash)
return False
def is_visited(self, vector):
index = 0
for existing in reversed(self.vector_buffer):
# print(existing)
score = self.cosine_similarity(existing, vector)
if score > self.threshold:
return True
index += 1
self.vector_buffer.append(vector)
return False
def get_hash_buffer_len(self):
return len(self.hash_buffer)
def get_vector_buffer_len(self):
return len(self.vector_buffer)
def cosine_similarity(self, vec1, vec2):
"""Computes cosine similarity between two vectors."""
dot_product = np.dot(vec1, vec2)
norm_vec1 = np.linalg.norm(vec1)
norm_vec2 = np.linalg.norm(vec2)
return dot_product / (norm_vec1 * norm_vec2)
def euclidean_distance(self, vec1, vec2):
"""Computes Euclidean distance between two vectors."""
value = np.linalg.norm(np.array(vec1) - np.array(vec2))
print(f"Euclidean={value}")
return value
def is_duplicate(destination_client, vector):
vector_query = VectorizedQuery(vector=vector,
k_nearest_neighbors=3,
exhaustive=True,
fields = "vector")
results = search_client.search(
search_text=None,
vector_queries= [vector_query],
select=["id", "description","vector"],
# select='id,description,vector',
include_total_count=True,
top=4
)
if results != None and results.get_count() > 0:
best = 0
for match in results:
match_vector = match["vector"]
score = self.cosine_similarity(vector, match_vector)
if score > best:
best = score
if best > 0.8:
return True
return False
Reference: previous posts for context.
#codingexercise: https://1drv.ms/w/c/d609fb70e39b65c8/Echlm-Nw-wkggNYlIwEAAAABD8nSsN--hM7kfA-W_mzuWw?e=vGBXc1
No comments:
Post a Comment