Location of objects detected in Aerial Drone Images.
Determining distance and location of objects found via video/image insights neither requires continuous processing of every image in the feed nor does it require the calculation of camera angles, orientation and direction of drones. Most urban and populated areas have a common set of well-known categories of vehicles such as sedans and trucks and their scale in an image can be used to calculate relative distances between points of interest such as landmarks. Therefore, processing it with the video processing pipeline is not needed and can be deferred until analysis and agentic retrieval of a query response. The dones reference with regard to the image can be assumed to be constant for all images because the camera usually does not move and even if it did, only the perpendicular to the earth passing through the drone is needed as a reference point in the image. As the objects and video/image insights are populated in the DroneWorld catalog, the distance information between selected pairs of objects, specifically important structures or landmarks can be determined. Wrapping the distance calculation in a function to determine the distance from an image that has both the objects the same helps for any such pairs of objects.
import torch
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
def read_image_from_blob(sas_url):
"""Reads an image from Azure Blob Storage using its SAS URL."""
response = requests.get(sas_url)
if response.status_code == 200:
image_array = np.asarray(bytearray(response.content), dtype=np.uint8)
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
return image
else:
# raise Exception(f"Failed to fetch image. Status code: {response.status_code}")
return None
def detect_vehicles(frame):
results = model(frame)
# Keep only 'car', 'truck', 'bus', 'motorcycle' detections
vehicle_labels = ['car', 'truck', 'bus', 'motorcycle']
detections = results.pandas().xyxy[0]
vehicles = detections[detections['name'].isin(vehicle_labels)]
return vehicles
def get_image_output_url(scene_uri):
# Parse the original video URL to get account, container, and path
parsed = urlparse(scene_uri)
path_parts = parsed.path.split('/')
container = path_parts[1]
blob_name = path_parts[-1].split('.')[0]
blob_path = '/'.join(path_parts[2:])
# Remove the file name from the blob path
blob_dir = '/'.join(blob_path.split('/')[:-1])
if blob_dir == "" or blob_dir == None:
blob_dir = "output"
# Create image path
image_path = f"{blob_dir}/analyzed/{blob_name}withvehicles.jpg"
# Rebuild the base URL (without SAS token)
base_url = f"{parsed.scheme}://{parsed.netloc}/{container}/{image_path}"
# Add the SAS token if present
sas_token = parsed.query
if sas_token:
image_url = f"{base_url}?{sas_token}"
else:
image_url = base_url
return image_url
def detect_vehicles_and get_url(scene_uri: Optional[str] = None) -> str:
if not scene_uri:
return None
frame = read_image_from_blob(scene_uri)
if not frame:
return None
vehicles = detect_vehicles(frame)
print(vehicles)
for _, v in vehicles.iterrows():
cv2.rectangle(frame, (x, y), (x + w, y + h), (255,0,0), 2)
_, buffer = cv2.imencode('.jpg', frame)
image_bytes = buffer.tobytes()
image_uri = get_image_output_url(scene_uri)
image_blob_client = BlobClient.from_blob_url(image_url)
image_blob_client.upload_blob(image_bytes, overwrite=True)
return image_uri
import numpy as np
# Average lengths in feet for each vehicle class
VEHICLE_LENGTHS = {
'motorcycle': 5,
'car': 20,
'truck': 30,
'bus': 60,
}
def calculate_pixel_per_foot(vehicles):
"""
Estimate average pixel-per-foot using the detected vehicle bounding boxes and known mean real-world lengths.
Returns average pixel-per-foot scale.
"""
pixels_per_foot = []
for _, v in vehicles.iterrows():
label = v['name']
box_length_pixels = abs(v['xmax'] - v['xmin'])
real_length_feet = VEHICLE_LENGTHS.get(label)
if box_length_pixels > 0 and real_length_feet:
# Estimate scale for this vehicle
pixels_per_foot.append(box_length_pixels / real_length_feet)
if pixels_per_foot:
return np.mean(pixels_per_foot)
else:
raise ValueError("No vehicles with known real size detected.")
def get_vehicle_center(vehicle_row):
"""Returns (x_center, y_center) of a bounding box."""
x_center = (vehicle_row['xmin'] + vehicle_row['xmax']) / 2
y_center = (vehicle_row['ymin'] + vehicle_row['ymax']) / 2
return np.array([x_center, y_center])
def calculate_vehicle_distance(vehicles, idx1, idx2):
"""
Calculate real-world distance between two vehicles given their indices in the vehicles DataFrame.
"""
# Estimate scale: pixels per 1 foot
ppf = calculate_pixel_per_foot(vehicles)
# Get positions
center1 = get_vehicle_center(vehicles.iloc[idx1])
center2 = get_vehicle_center(vehicles.iloc[idx2])
# Pixel distance
pixel_dist = np.linalg.norm(center1 - center2)
# Convert to feet
dist_feet = pixel_dist / ppf
return dist_feet
def calculate_span(frame, first, last):
vehicles = detect_vehicles(frame)
distance_feet = calculate_vehicle_distance(vehicles, first, last)
return distance_feet
image_user_functions: Set[Callable[..., Any]] = {
calculate_span
}