Tuesday, May 12, 2026

 Drone Survey Area reconstitution:

Problem statement:

Aerial drone images extracted from a drone video are sufficient to reconstitute the survey area with image selection to create a mosaic that fully covers the survey area. This method does away with the knowledge of flight path of the drone. Write a python implementation that places selections from the input on the tiles in a grid to increase the likelihood of match with the overall survey area.

Solution:

The following implementation uses overlap between consecutive frames to estimate a 2D motion vector (how the drone moved between frame i and i+1), integrates those motions along the timeline to get approximate 2D positions for each frame, rotates and normalizes those positions so the path becomes a clean rectangle-ish footprint, snaps those positions to a 2D grid (with possible collisions—some frames can land in the same cell), builds a mosaic image where the layout reflects the actual flight path much more than just “visual similarity clustering”.

Code:

#! /usr/bin/python

import os

import math

import cv2

import numpy as np

from typing import List, Tuple

# ---------------------------------------------------------

# 1. Load and preprocess images (sorted by filename)

# ---------------------------------------------------------

def load_images_sorted(folder: str,

                       max_images: int = None,

                       target_size: Tuple[int, int] = (512, 512)) -> List[np.ndarray]:

    files = sorted(os.listdir(folder))

    imgs = []

    for fname in files:

        path = os.path.join(folder, fname)

        if not os.path.isfile(path):

            continue

        img = cv2.imread(path, cv2.IMREAD_COLOR)

        if img is None:

            continue

        img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)

        imgs.append(img)

        if max_images is not None and len(imgs) >= max_images:

            break

    if not imgs:

        raise ValueError("No valid images found in folder")

    return imgs

# ---------------------------------------------------------

# 2. Estimate translation between consecutive frames

# using phase correlation (overlap-based)

# ---------------------------------------------------------

def estimate_translation(img1: np.ndarray, img2: np.ndarray) -> np.ndarray:

    """

    Estimate 2D translation from img1 to img2 using phase correlation.

    Returns a 2D vector (dx, dy) in pixels.

    """

    # Convert to grayscale float32

    g1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY).astype(np.float32)

    g2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY).astype(np.float32)

    # Optional: apply Hanning window to reduce edge effects

    h, w = g1.shape

    win = cv2.createHanningWindow((w, h), cv2.CV_32F)

    g1w = g1 * win

    g2w = g2 * win

    shift, response = cv2.phaseCorrelate(g1w, g2w)

    dx, dy = shift # note: phaseCorrelate returns (dx, dy)

    return np.array([dx, dy], dtype=np.float32)

def accumulate_positions(images: List[np.ndarray]) -> np.ndarray:

    """

    For a sequence of images, estimate relative translations and

    integrate them to get approximate 2D positions.

    """

    N = len(images)

    positions = np.zeros((N, 2), dtype=np.float32)

    for i in range(N - 1):

        delta = estimate_translation(images[i], images[i + 1])

        # We accumulate the *negative* of the shift because phaseCorrelate

        # tells us how to move img2 to align with img1.

        positions[i + 1] = positions[i] - delta

    return positions # shape (N, 2)

# ---------------------------------------------------------

# 3. Normalize and straighten the path (PCA)

# ---------------------------------------------------------

def normalize_positions(positions: np.ndarray) -> np.ndarray:

    """

    Center, rotate (PCA), and scale positions into [0,1]x[0,1].

    """

    # Center

    mean = positions.mean(axis=0)

    X = positions - mean

    # PCA for rotation

    cov = np.cov(X.T)

    eigvals, eigvecs = np.linalg.eigh(cov)

    # Sort eigenvectors by descending eigenvalue

    order = np.argsort(eigvals)[::-1]

    R = eigvecs[:, order]

    X_rot = X @ R # rotate

    # Normalize to [0,1]

    min_xy = X_rot.min(axis=0)

    max_xy = X_rot.max(axis=0)

    span = np.maximum(max_xy - min_xy, 1e-6)

    X_norm = (X_rot - min_xy) / span

    return X_norm # shape (N, 2), in [0,1]

# ---------------------------------------------------------

# 4. Snap positions to a grid

# ---------------------------------------------------------

def choose_grid_shape(N: int) -> Tuple[int, int]:

    """

    Choose a roughly rectangular grid for N images.

    """

    rows = int(math.floor(math.sqrt(N)))

    cols = int(math.ceil(N / rows))

    if rows * cols < N:

        cols += 1

    return rows, cols

def snap_to_grid(pos_norm: np.ndarray,

                 grid_rows: int,

                 grid_cols: int) -> List[Tuple[int, int]]:

    """

    Map normalized positions in [0,1]^2 to integer grid cells.

    Multiple images can land in the same cell; that's allowed.

    """

    N = pos_norm.shape[0]

    assignments = []

    for i in range(N):

        x, y = pos_norm[i]

        # x -> col, y -> row

        c = int(np.clip(x * grid_cols, 0, grid_cols - 1))

        r = int(np.clip(y * grid_rows, 0, grid_rows - 1))

        assignments.append((r, c))

    return assignments

# ---------------------------------------------------------

# 5. Build a mosaic for visualization

# ---------------------------------------------------------

def build_mosaic(images: List[np.ndarray],

                 assignments: List[Tuple[int, int]],

                 grid_rows: int,

                 grid_cols: int,

                 tile_size: Tuple[int, int] = (256, 256)) -> np.ndarray:

    """

    Visual mosaic: each grid cell shows the *last* image assigned to it.

    (You can change this to average or small multiples if you want.)

    """

    tile_w, tile_h = tile_size

    mosaic_h = grid_rows * tile_h

    mosaic_w = grid_cols * tile_w

    mosaic = np.zeros((mosaic_h, mosaic_w, 3), dtype=np.uint8)

    for img, (r, c) in zip(images, assignments):

        tile = cv2.resize(img, (tile_w, tile_h), interpolation=cv2.INTER_AREA)

        y0 = r * tile_h

        x0 = c * tile_w

        mosaic[y0:y0+tile_h, x0:x0+tile_w, :] = tile

    return mosaic

# ---------------------------------------------------------

# 6. High-level function

# ---------------------------------------------------------

def layout_drone_tour_by_overlap(folder: str,

                                 max_images: int = None,

                                 base_size: Tuple[int, int] = (512, 512)) -> np.ndarray:

    """

    1) Load sequential frames from folder.

    2) Estimate frame-to-frame translations via phase correlation.

    3) Integrate to get 2D positions along the flight path.

    4) Straighten and normalize the path with PCA.

    5) Snap to a grid and build a mosaic.

    """

    images = load_images_sorted(folder, max_images=max_images, target_size=base_size)

    positions = accumulate_positions(images)

    pos_norm = normalize_positions(positions)

    grid_rows, grid_cols = choose_grid_shape(len(images))

    print(f"Grid shape: {grid_rows} x {grid_cols}")

    assignments = snap_to_grid(pos_norm, grid_rows, grid_cols)

    mosaic = build_mosaic(images, assignments, grid_rows, grid_cols,

                          tile_size=(256, 256))

    return mosaic

if __name__ == "__main__":

    # Requirements:

    # pip install opencv-python numpy

    folder = "."

    mosaic = layout_drone_tour_by_overlap(folder, max_images=None)

    cv2.imwrite("drone_path_layout.png", mosaic)

    print("Saved drone_path_layout.png")


No comments:

Post a Comment