This is a continuation of a previous article1 on BBAVectors and Transformer-based context aware detection:
1. Sample for BBAVectors:
import os
import torch
from PIL import Image
from torchvision import transforms
from models.detector import build_detector # from BBAVectors repo
from utils.visualize import visualize_detections # optional visualization
from utils.inference import run_inference # custom helper you may need to define
# Load pretrained BBAVectors model
def load_bbavectors_model(config_path, checkpoint_path):
model = build_detector(config_path)
model.load_state_dict(torch.load(checkpoint_path, map_location='cpu'))
model.eval()
return model
# Preprocess image from URI
def load_image_from_uri(uri):
image = Image.open(uri).convert("RGB")
transform = transforms.Compose([
transforms.Resize((1024, 1024)),
transforms.ToTensor(),
])
return transform(image).unsqueeze(0) # Add batch dimension
# Run detection
def detect_landmarks(model, image_tensor):
with torch.no_grad():
outputs = model(image_tensor)
return outputs # BBAVectors returns oriented bounding boxes
# Main workflow
def main():
# Paths to config and weights
config_path = 'configs/dota_bbavectors.yaml'
checkpoint_path = 'checkpoints/bbavectors_dota.pth'
# URIs to drone images
image_uris = [
'drone_images/scene1.jpg',
'drone_images/scene2.jpg'
]
model = load_bbavectors_model(config_path, checkpoint_path)
for uri in image_uris:
image_tensor = load_image_from_uri(uri)
detections = detect_landmarks(model, image_tensor)
print(f"\nDetections for {uri}:")
for det in detections:
print(f"Class: {det['label']}, Score: {det['score']:.2f}, BBox: {det['bbox']}")
# Optional: visualize results
# visualize_detections(uri, detections)
if __name__ == "__main__":
main()
2. Sample for semantic based detection:
from PIL import Image
import requests
import torch
from transformers import DetrImageProcessor, DetrForObjectDetection
# Load pretrained DETR model and processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
# Function to load image from URI
def load_image(uri):
return Image.open(requests.get(uri, stream=True).raw).convert("RGB")
# Function to detect objects and return labels
def detect_objects(image):
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
# Filter predictions by confidence threshold
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
labels = [model.config.id2label[label.item()] for label in results["labels"]]
return set(labels)
# URIs for two drone-captured scenes
scene1_uri = "https://example.com/drone_scene_1.jpg"
scene2_uri = "https://example.com/drone_scene_2.jpg"
# Load and process both scenes
scene1 = load_image(scene1_uri)
scene2 = load_image(scene2_uri)
labels1 = detect_objects(scene1)
labels2 = detect_objects(scene2)
# Compare object presence
shared_objects = labels1.intersection(labels2)
unique_to_scene1 = labels1 - labels2
unique_to_scene2 = labels2 - labels1
# Print results
print("Shared objects between scenes:", shared_objects)
print("Unique to Scene 1:", unique_to_scene1)
print("Unique to Scene 2:", unique_to_scene2)