Sunday, October 12, 2025

 This is a continuation of a previous article1 on BBAVectors and Transformer-based context aware detection:

1. Sample for BBAVectors:

import os

import torch

from PIL import Image

from torchvision import transforms

from models.detector import build_detector # from BBAVectors repo

from utils.visualize import visualize_detections # optional visualization

from utils.inference import run_inference # custom helper you may need to define

# Load pretrained BBAVectors model

def load_bbavectors_model(config_path, checkpoint_path):

    model = build_detector(config_path)

    model.load_state_dict(torch.load(checkpoint_path, map_location='cpu'))

    model.eval()

    return model

# Preprocess image from URI

def load_image_from_uri(uri):

    image = Image.open(uri).convert("RGB")

    transform = transforms.Compose([

        transforms.Resize((1024, 1024)),

        transforms.ToTensor(),

    ])

    return transform(image).unsqueeze(0) # Add batch dimension

# Run detection

def detect_landmarks(model, image_tensor):

    with torch.no_grad():

        outputs = model(image_tensor)

    return outputs # BBAVectors returns oriented bounding boxes

# Main workflow

def main():

    # Paths to config and weights

    config_path = 'configs/dota_bbavectors.yaml'

    checkpoint_path = 'checkpoints/bbavectors_dota.pth'

    # URIs to drone images

    image_uris = [

        'drone_images/scene1.jpg',

        'drone_images/scene2.jpg'

    ]

    model = load_bbavectors_model(config_path, checkpoint_path)

    for uri in image_uris:

        image_tensor = load_image_from_uri(uri)

        detections = detect_landmarks(model, image_tensor)

        print(f"\nDetections for {uri}:")

        for det in detections:

            print(f"Class: {det['label']}, Score: {det['score']:.2f}, BBox: {det['bbox']}")

        # Optional: visualize results

        # visualize_detections(uri, detections)

if __name__ == "__main__":

    main()

2. Sample for semantic based detection:

from PIL import Image

import requests

import torch

from transformers import DetrImageProcessor, DetrForObjectDetection

# Load pretrained DETR model and processor

processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

# Function to load image from URI

def load_image(uri):

    return Image.open(requests.get(uri, stream=True).raw).convert("RGB")

# Function to detect objects and return labels

def detect_objects(image):

    inputs = processor(images=image, return_tensors="pt")

    outputs = model(**inputs)

    # Filter predictions by confidence threshold

    target_sizes = torch.tensor([image.size[::-1]])

    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

    labels = [model.config.id2label[label.item()] for label in results["labels"]]

    return set(labels)

# URIs for two drone-captured scenes

scene1_uri = "https://example.com/drone_scene_1.jpg"

scene2_uri = "https://example.com/drone_scene_2.jpg"

# Load and process both scenes

scene1 = load_image(scene1_uri)

scene2 = load_image(scene2_uri)

labels1 = detect_objects(scene1)

labels2 = detect_objects(scene2)

# Compare object presence

shared_objects = labels1.intersection(labels2)

unique_to_scene1 = labels1 - labels2

unique_to_scene2 = labels2 - labels1

# Print results

print("Shared objects between scenes:", shared_objects)

print("Unique to Scene 1:", unique_to_scene1)

print("Unique to Scene 2:", unique_to_scene2)


No comments:

Post a Comment