Sunday, October 5, 2025

 Among the objects detected in an urban landscape, parking spots and streets play a vital role in mapping the drone world to the real world. With or without the gps information from the drone, it is possible to establish this mapping afterwards and not by processing each and every frame of the drone video. To help with this analysis at selected times, deep learning models can be used for parking spot and street detection. Below are approaches are to do that:

1. YOLOv8 + SAM Hybrid (GitHub: julienborderon/parking_detection) for parking spot detection

import cv2

from ultralytics import YOLO

from segment_anything import SamPredictor, sam_model_registry

import numpy as np

import matplotlib.pyplot as plt

# --- Load YOLOv8 model ---

yolo_model = YOLO("weights/yolov8n.pt") # Replace with the correct path to pretrained weights

# --- Load SAM model ---

sam = sam_model_registry["vit_b"](checkpoint="weights/sam_vit_b.pth") # Replace with correct SAM checkpoint

predictor = SamPredictor(sam)

# --- Load aerial image ---

image_path = "drone_parking_lot.jpg"

image = cv2.imread(image_path)

image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# --- Run YOLOv8 detection ---

results = yolo_model(image_rgb)

boxes = results[0].boxes.xyxy.cpu().numpy()

classes = results[0].boxes.cls.cpu().numpy()

# --- Filter for parking spot class (assume class index for 'parking spot' is known) ---

parking_spot_class_id = 0 # Replace with correct class ID if custom-trained

parking_boxes = boxes[classes == parking_spot_class_id]

# --- Run SAM segmentation ---

predictor.set_image(image_rgb)

unoccupied_count = 0

for box in parking_boxes:

    x1, y1, x2, y2 = map(int, box)

    input_box = np.array([x1, y1, x2, y2])

    masks, scores, _ = predictor.predict(box=input_box, multimask_output=True)

    # Heuristic: if segmentation mask is mostly empty, assume unoccupied

    mask = masks[np.argmax(scores)]

    occupancy_ratio = np.sum(mask) / ((x2 - x1) * (y2 - y1))

    if occupancy_ratio < 0.2: # Threshold can be tuned

        unoccupied_count += 1

print(f"Detected {len(parking_boxes)} parking spots.")

print(f"Unoccupied spots: {unoccupied_count}")

2. AerialLaneNet (ICASSP 2024) for lane and traffic detection

import cv2

import numpy as np

import torch

from torchvision import transforms

from shapely.geometry import LineString

import matplotlib.pyplot as plt

# --- Load pretrained lane detection model (placeholder for AerialLaneNet) ---

class DummyLaneNet(torch.nn.Module):

    def forward(self, x):

        # Simulated output: binary mask of lane lines

        _, _, H, W = x.shape

        mask = np.zeros((H, W), dtype=np.uint8)

        cv2.line(mask, (int(W*0.3), H), (int(W*0.4), 0), 255, 5)

        cv2.line(mask, (int(W*0.7), H), (int(W*0.6), 0), 255, 5)

        return torch.tensor(mask).unsqueeze(0).unsqueeze(0)

model = DummyLaneNet()

# --- Load and preprocess aerial image ---

image_path = "aerial_street.jpg"

image = cv2.imread(image_path)

image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

transform = transforms.Compose([

    transforms.ToTensor(),

    transforms.Resize((512, 512)),

    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)

])

input_tensor = transform(image_rgb).unsqueeze(0)

# --- Run lane detection ---

with torch.no_grad():

    lane_mask = model(input_tensor).squeeze().numpy()

# --- Extract lane contours and bounding boxes ---

contours, _ = cv2.findContours(lane_mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]

# --- Estimate traffic direction using line fitting ---

directions = []

for cnt in contours:

    [vx, vy, x, y] = cv2.fitLine(cnt, cv2.DIST_L2, 0, 0.01, 0.01)

    directions.append(((x, y), (vx, vy)))

# --- Visualize results ---

output = image_rgb.copy()

for (x, y, w, h), ((cx, cy), (dx, dy)) in zip(bounding_boxes, directions):

    cv2.rectangle(output, (x, y), (x+w, y+h), (0, 255, 0), 2)

    pt1 = (int(cx), int(cy))

    pt2 = (int(cx + 100*dx), int(cy + 100*dy))

    cv2.arrowedLine(output, pt1, pt2, (255, 0, 0), 2)

plt.imshow(output)

plt.title("Detected Streets and Traffic Directions")

plt.axis('off')

plt.show()

And their corroboration with real-world maps helps to put names on them


No comments:

Post a Comment