Wednesday, June 18, 2025

 This highlights the need to and the method for reducing workload for populating the drone world catalog based on aerial drone imagery.

#! /usr/bin/python

import json

from azure.search.documents import SearchClient

from azure.core.credentials import AzureKeyCredential

from azure.ai.vision.imageanalysis import ImageAnalysisClient

from azure.search.documents.models import (

    VectorizedQuery,

    VectorizableTextQuery

)

from dedup import ImageDeduplicator

from tenacity import retry, stop_after_attempt, wait_fixed

import os

import re

import sys

import time

search_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]

api_version = os.getenv("AZURE_SEARCH_API_VERSION")

search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

index_name = os.getenv("AZURE_SEARCH_INDEX_NAME", "index00")

credential = AzureKeyCredential(search_api_key)

dest_index_name = os.getenv("AZURE_SEARCH_02_INDEX_NAME", "index02")

vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY")

vision_api_version = os.getenv("AZURE_AI_VISION_API_VERSION")

vision_region = os.getenv("AZURE_AI_VISION_REGION")

vision_endpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")

source_url_template = os.getenv("AZURE_SOURCE_SAS_URI")

destination_url_template = os.getenv("AZURE_DESTINATION_SAS_URI")

sys.path.insert(0, os.path.abspath(".."))

from visionprocessor.vectorizer import vectorize_image, analyze_image

deduplicator = ImageDeduplicator()

# Initialize SearchClient

search_client = SearchClient(

    endpoint=search_endpoint,

    index_name=index_name,

    credential=AzureKeyCredential(search_api_key)

)

destination_client = SearchClient(

    endpoint=search_endpoint,

    index_name=dest_index_name,

    credential=AzureKeyCredential(search_api_key)

)

vision_credential = AzureKeyCredential(vision_api_key)

analysis_client = ImageAnalysisClient(vision_endpoint, vision_credential)

import cv2

import numpy as np

import requests

from io import BytesIO

from azure.storage.blob import BlobClient

def read_image_from_blob(sas_url):

    """Reads an image from Azure Blob Storage using its SAS URL."""

    response = None

    try:

        response = requests.get(sas_url)

    except Exception as e:

        print(f"Error from requests.get: {e}")

    if response.status_code == 200:

        image_array = np.asarray(bytearray(response.content), dtype=np.uint8)

        image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)

        return image

    else:

        # raise Exception(f"Failed to fetch image. Status code: {response.status_code}")

        return None

def upload_image_to_blob(clipped_image, sas_url):

    """Uploads the clipped image to Azure Blob Storage using its SAS URL."""

    _, encoded_image = cv2.imencode(".jpg", clipped_image)

    blob_client = BlobClient.from_blob_url(sas_url)

    blob_client.upload_blob(encoded_image.tobytes(), overwrite=True)

    # print("Clipped image uploaded successfully.")

def save_or_display(clipped_image, destination_file):

    cv2.imwrite(destination_file, clipped_image)

    cv2.imshow("Clipped Image", clipped_image)

    cv2.waitKey(0)

    cv2.destroyAllWindows()

def clip_image(image, bounding_box):

    # Extract bounding box parameters

    x, y, width, height = bounding_box

    # Clip the region using slicing

    clipped_image = image[y:y+height, x:x+width]

    return clipped_image

def prepare_json_string_for_load(text):

  text = text.replace("\"", "'")

  text = text.replace("{'", "{\"")

  text = text.replace("'}", "\"}")

  text = text.replace(" '", " \"")

  text = text.replace("' ", "\" ")

  text = text.replace(":'", ":\"")

  text = text.replace("':", "\":")

  text = text.replace(",'", ",\"")

  text = text.replace("',", "\",")

  return re.sub(r'\n\s*', '', text)

def to_string(bounding_box):

    return f"{bounding_box['x']},{bounding_box['y']},{bounding_box['w']},{bounding_box['h']}"

def is_duplicate_image(deduplicator, image):

    value = deduplicator.is_duplicate(image)

    return value

def is_visited(deduplicator, vector):

    value = deduplicator.is_visited(vector)

    return value

def is_existing(deduplicator, vector):

    start_time = time.time()

    value = deduplicator.is_existing(destination_client, vector)

    end_time = time.time()

    elapsed_time = end_time - start_time

    print(f"Elapsed time for is_existing: {elapsed_time:.3f} seconds")

    return value

@retry(stop=stop_after_attempt(5), wait=wait_fixed(60))

def upload(document):

    try:

        upload_results = destination_client.upload_documents([document])

        error = ','.join([upload_result.error_message for upload_result in upload_results if upload_result.error_message]).strip(",")

        if error:

            print(error)

    except HttpResponseError as e:

        print(f"Error from upload: {e}")

        raise

# Example usage

def shred(entry_id):

        source_file=entry_id

        source_sas_url = source_url_template.replace("{source_file}", source_file)

        print(entry_id)

        entry = search_client.get_document(key=entry_id) # , select=["id", "description"])

        id=entry['id']

        description_text=entry['description']

        tags = entry['tags']

        title = entry['title']

        description_json = None

        try:

            description_text = prepare_json_string_for_load(entry["description"]).replace('""','')

            description_json = json.loads(description_text)

        except Exception as e:

            print(description_text)

            print(f"{entry_id}: parsing error: {e}")

        if description_json == None:

            print("Description could not be parsed.")

            return

        if description_json and description_json["_data"] and description_json["_data"]["denseCaptionsResult"] and description_json["_data"]["denseCaptionsResult"]["values"]:

            objectid = 0

            for item in description_json["_data"]["denseCaptionsResult"]["values"]:

                objectid += 1

                if objectid == 1:

                    continue

                destination_file=source_file+f"-{objectid:04d}"

                destination_sas_url = destination_url_template.replace("{destination_file}", destination_file)

                box = item.get("boundingBox", None)

                print(f"{destination_file}: {box}")

                if box:

                    bounding_box = (box["x"], box["y"], box["w"], box["h"])

                    # Read image from Azure Blob

                    image = read_image_from_blob(source_sas_url)

                    if image.any() == False:

                       print(f"{destination_file} not found.")

                       continue

                    # Clip image

                    clipped = clip_image(image, bounding_box)

                    # Upload clipped image to Azure Blob

                    upload_image_to_blob(clipped, destination_sas_url)

                    vector = vectorize_image(destination_sas_url, vision_api_key, "eastus")

                    vector = np.pad(vector, (0, 1536 - len(vector)), mode='constant')

                    print("checking existing")

                    if vector.any() and is_existing(deduplicator, vector) == False:

                        print(f"Match does not exist for {destination_file}.")

                    else:

                        print(f"Match exists for {destination_file}")

                else:

                    print("no objects detected")

for number in range(5412, 5413):

    entry_id = f"{number:06d}"

    shred(entry_id)

With deduplicator.is_existing() method as:

import cv2

import imagehash

import numpy as np

from PIL import Image

from collections import deque

from azure.search.documents.models import (

    VectorizedQuery,

    VectorizableTextQuery

)

class ImageDeduplicator:

    def __init__(self, buffer_size=100):

        """Initialize a ring buffer for tracking image hashes."""

        self.buffer_size = buffer_size

        self.hash_buffer = deque(maxlen=buffer_size)

        self.vector_buffer = deque(maxlen=buffer_size)

    def compute_hash(self, image):

        """Compute perceptual hash of an image."""

        return imagehash.phash(Image.fromarray(image))

    def is_existing(self, external_vector_client, vector):

        vector_query = VectorizedQuery(vector=vector,

                                  k_nearest_neighbors=3,

                                  exhaustive=False,

                                  fields = "vector")

        results = external_vector_client.search(

        search_text=None,

        vector_queries= [vector_query],

        select=["id", "description","vector"],

        # select='id,description,vector',

        include_total_count=True,

        top=4

        )

        if results != None and results.get_count() > 0:

            best = 0

            id = None

            for match in results:

                # print(f"{match['id']} found." + ",".join([key for key in match.keys()]))

                match_vector = match["vector"]

                score = self.cosine_similarity(vector, match_vector)

                # print(f"score={score}")

                if score > best:

                    id = match['id']

                    best = score

                else:

                    continue

            matches = ','.join([match['id'] for match in results]).strip(',')

            print(f"matches: {matches}")

            if best > 0.8:

               print(f"match found with score {best} for {id}.")

               return True

        else:

            print("no match found.")

        return False

    def get_hash_buffer_len(self):

        return len(self.hash_buffer)

    def get_vector_buffer_len(self):

        return len(self.vector_buffer)

    def cosine_similarity(self, vec1, vec2):

        """Computes cosine similarity between two vectors."""

        dot_product = np.dot(vec1, vec2)

        norm_vec1 = np.linalg.norm(vec1)

        norm_vec2 = np.linalg.norm(vec2)

        return dot_product / (norm_vec1 * norm_vec2)

And results as follows:

005412

005412-0002: {'x': 986, 'y': 49, 'w': 563, 'h': 526}

checking existing

000370-0002

001225-0002

002703-0002

match found with score 0.9856458102556909 for 000370-0002.

Elapsed time for is_existing: 0.607 seconds

Match exists for 005412-0002

005412-0003: {'x': 1363, 'y': 400, 'w': 422, 'h': 373}

checking existing

001784-0006

004981-0004

014676-0003

match found with score 0.9866765401858795 for 001784-0006.

Elapsed time for is_existing: 0.291 seconds

Match exists for 005412-0003

005412-0004: {'x': 0, 'y': 0, 'w': 1896, 'h': 1050}

checking existing

005412-0004

003169-0006

012227-0006

match found with score 0.9999997660907427 for 005412-0004.

Elapsed time for is_existing: 0.239 seconds

Match exists for 005412-0004

005412-0005: {'x': 1110, 'y': 705, 'w': 403, 'h': 363}

checking existing

005412-0005

004463-0007

004980-0008

match found with score 1.0000000000000002 for 005412-0005.

Elapsed time for is_existing: 0.310 seconds

Match exists for 005412-0005

005412-0006: {'x': 1279, 'y': 213, 'w': 77, 'h': 76}

checking existing

005412-0006

014698-0009

013267-0008

match found with score 1.0000000000000002 for 005412-0006.

Elapsed time for is_existing: 0.288 seconds

Match exists for 005412-0006

005412-0007: {'x': 266, 'y': 717, 'w': 69, 'h': 59}

checking existing

005412-0007

012227-0004

015072-0007

match found with score 1.0 for 005412-0007.

Elapsed time for is_existing: 0.314 seconds

Match exists for 005412-0007

005412-0008: {'x': 612, 'y': 441, 'w': 160, 'h': 184}

checking existing

005412-0008

004989-0009

001226-0003

match found with score 1.0 for 005412-0008.

Elapsed time for is_existing: 0.289 seconds

Match exists for 005412-0008

005412-0009: {'x': 775, 'y': 381, 'w': 68, 'h': 66}

checking existing

005412-0009

013213-0005

005416-0004

match found with score 0.9999997252673284 for 005412-0009.

Elapsed time for is_existing: 0.319 seconds

Match exists for 005412-0009

005412-0010: {'x': 4, 'y': 330, 'w': 76, 'h': 66}

checking existing

005412-0010

004464-0007

015072-0005

match found with score 1.0 for 005412-0010.

Elapsed time for is_existing: 0.269 seconds

Match exists for 005412-0010

At nearly 0.3 seconds per object existence check in the drone world catalog and about ten objects per image in a set of 17533 images in a single tour of a drone, this comes to 17533 * 10 * 0.3 / (60*60) hours = 14.61 hours. So workload reduction is called for and images that have even 20% matches or more with existing objects in the catalog can be discarded unless a thresholded time-span is exceeded.

And this even works for comparisons as shown:

To generate a preview video, we could use something like:

def get_preview_url(video_id, access_token):

    insights_url = f"https://api.videoindexer.ai/{LOCATION}/Accounts/{ACCOUNT_ID}/Videos/{video_id}/Index?accessToken={access_token}"

    response = requests.get(insights_url)

    insights = response.json()

    preview_url = insights.get('summarizedInsights', {}).get('previewUrl')

    return preview_url


Tuesday, June 17, 2025

 In the previous few articles, we talked about increasing the performance of a drone video sensing platform. Specifically, we called out two factors: 1. Leverage the characteristics of aerial drone video to reduce the working set size from any capture to build a drone world catalog and 2. defer much of the processing to analytics from video/image processing for any workloads. Indeed, these assertions are grounded in facts such as continuous drone images have a lot of overlap and direction and pattern of flight has no relevance to how fast and comprehensive the drone world catalog is populated or to retrieve specifically detected objects with high precision and recall. Also, the better the drone world catalog or knowledge base, the more inclusive the platform becomes for more drone sensing applications both in terms of spatial and temporal dimensions with some of the work taken away from repeated video processing in favor of specific analytical queries.

This approach makes the drone video sensing platform more flexible, open and available to a diverse set of drone sensing applications. With the performance articulated design decisions favoring a trend that has traditionally worked for any kind of data management platforms, the platform can host and serve many applications from interested parties for reducing their overhead and allowing them to focus more on their business cases. There will always be competition from mature and deep pocket companies to own the vertical from video processing, analytics and end-user experience but the industry as such is relatively new and growing and more hardware vendors trying to write their own software rather than letting software provide a common denominator to allow them added value and focus on their upgrades in device capabilities. So while LLMs continue to fine-tuned and upgraded to handle much of the tasks upfront with classification, labeling and tracking, our bet is that the data rather than the re/processing is going to be more valuable and require the best practices sooner or later and planning for it upfront across devices, vendors and LLMs. In fact, even to reduce workload with say video indexing or to perform more analytics after the initial video processing, we do embrace AI models and allow drone sensing application developers to be more expressive in their queries than they could otherwise.

The requirements of drone sensing applications are going to be different from those of our platform by virtue of the specific business cases they target. The platform must consider increasing performance along each of these cases in a way that raises the bar for the platform as a common denominator across these applications. This calls a brief review of the various players in the industry today:

AeroVironment - top supplier to Defense - Arlington, VA

AmericanRobotics - fully automated - drone-in-a-box - Waltham, MA

AgEagle aerial systems - drone software for image analysis - Wichita, Kansas

Ascent Aerosystems - all weather UAVs - Wilmington, Massachusetts

Brinc drones - fly indoors beyond GPS range - Seattle, WA

Freefly Systems - high payload, filmography - Woodinville, WA

Harris Aerial - Endurance, long-range and payload - Orlando, FL

Hylio - autonomous swarm spraying for agriculture - Richmond, TX

Inspired Flight - modular, open-architecture drones for map/survey - San Luis Obispo, CA

RedCat+Teal+FlightWave - fast-deploy in dark or GPS-denied - Salt Lake City, Utah

Skydio - leader in autonomous flight, obstacle avoidance and hands-free operation - San Mateo, CA

SkyFish - 3D modeling of cell towers, bridges and power lines - Stevensville, Montana

Teledyne Flir - thermal imaging, cutting edge IR - Wilsonville, Oregon

Vantage Robotics - safest flights near crowds or stealth mission - San Leandro, California

As this list shows, companies are targeting differentiated use cases to provide viable commercial solutions and are subject to NDAA compliance or supply chain operations for their businesses. But as a software, the drone video sensing platform has the unique opportunity to serve all while with the best of purview, audit, aging and other best practices.


Monday, June 16, 2025

 This is a continuation of previous article to reduce the number of objects detected and catalogued from aerial drone images for optimum performance. One technique to do so is to lookup the vector store for a similar image and skip it unless the timestamp exceeds the time range for the current flight.

Sample:

import requests

import json

import sys

import os

import numpy as np

# Add the parent folder to the module search path

sys.path.insert(0, os.path.abspath(".."))

from visionprocessor.vectorizer import vectorize_image

# Azure AI Search configurations

search_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")

index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY")

# Query string for red cars

query_text = "Find red cars in drone images"

blob_url = "<BLOB_SAS_URL>"

vector = vectorize_image(blob_url, vision_api_key, "eastus")

vector = np.pad(vector, (0, 1536 - len(vector)), mode='constant')

# print(f"len={len(vector)}")

# Vector search payload

body = {

        "count": True,

        "select": "id,description,vector",

        "vectorQueries": [

            {

                "vector": vector.tolist(),

                "k": 5,

                "fields": "vector",

                "kind": "vector",

                "exhaustive": True

            }

        ]

    }

# Headers for Azure Search API

headers = {

    "Content-Type": "application/json",

    "api-key": search_api_key

}

# Send search request to Azure AI Search

response = requests.post(

    f"{search_endpoint}/indexes/{index_name}/docs/search?api-version=2024-07-01",

    headers=headers,

    data=json.dumps(body)

)

# Parse response

search_results = response.json()

print(len(search_results))

print(search_results)

ids = ",".join([item["id"] for item in search_results.get("value", [])]).strip(",")

print(ids)

# output:

# RedCar3: 015644,015643,012669,008812,011600

# RedCar4: 014076,014075,014077,014074,014543

# Count occurrences of "red car" in descriptions

red_car_count = sum(1 for item in search_results.get("value", []) if "red car" in item["description"].lower())

print(f"Total red cars found in drone images: {red_car_count}")

Reference: previous article: https://1drv.ms/w/c/d609fb70e39b65c8/EVdJ7oJaqFFAvkx9udkFX1UBC0KcZkrPJU6k5yTdwcZlNg?e=LR1SYf


Sunday, June 15, 2025

 This is a summary of the book titled “Beyond No: Harnessing the Power of Resistance for Positive Organizational Growth” written by Erik Nagel and published by Wiley in 2025. The word No is as much a workplace parlance for resistance as any other jargon and perhaps the most unambiguous, yet one must understand the message, address the problems and seek solutions contends the author. While it can be collective as in resistance from workforce to get management attention or lodge protest, it behooves the receiver such as the management to cope with recalcitrance, learn from it, manage it and respond effectively. The author cites common forms of resistance and offers a path forward to both parties. 

Resistance in Everyday Management 

Managers regularly encounter resistance, whether passive (employees ignoring changes) or aggressive (labor strikes). Resistance isn’t confined to lower-ranking employees; managers themselves may push back against corporate decisions. 

Seeking Compromises in Resistance 

The book discusses real-world examples of handling resistance, such as a food sector manager negotiating incremental raises for an underpaid employee. However, not all conflicts resolve positively—stubborn resistance can sometimes lead to termination. 

The Challenge of Covert Resistance 

Silent pushback—like employees subtly undermining company culture—can be trickier to detect than vocal complaints. Historical examples, like slow-down strikes or discreet sabotage, illustrate how covert resistance manifests in different industries. 

Hierarchical Structures and Resistance Suppression 

Organizations with rigid structures, like consulting firms, often mitigate resistance by setting clear expectations. Employees at Magnum Consulting, for example, knowingly accept grueling work conditions due to high pay and prestige. 

Common Myths About Resistance 

Nagel debunks managerial misconceptions about resistance, such as assuming employees are inherently lazy or afraid of change. These myths absolve leaders of responsibility for engaging with employees and addressing legitimate concerns. 

Leadership and Resistance 

Executives who dictate change without employee input often spark resistance. The book warns against a “top-down thinking” mentality, advocating for collaborative leadership where managers acknowledge front-line insights. 

Strategies for Addressing Resistance 

Nagel outlines four key steps for leaders: 

  1. Take accountabilityDon’t scapegoat employees; engage with their concerns. 

  1. Loosen control – Employees resist more when they feel powerless. 

  1. Encourage debate – Welcoming dissent leads to more effective change. 

  1. Stay open to being wrong – Leaders must listen and adapt rather than impose rigid strategies. 

This book offers a nuanced approach to resistance, urging leaders to harness workplace pushback as a tool for growth rather than viewing it as a disruption. Let me know if you need a deeper dive into any section! 


  

Saturday, June 14, 2025

 This is a summary of the book titled “The Geek Way” written by Andrew McAfee and published by Little, Brown US in 2023. Big businesses like Sears, Kodak and Polaroid failed but non-traditional Amazon, Netflix and HubSpot flourished. The author researches these contrasting outcomes by exploring cultural evolution and applying its principles to modern business. He argues that group norms shape behavior more than individual beliefs and that “geek norms” drive success and while it may be challenging, it is quite rewarding and might even be the best way as it embraces change with “science, ownership, speed and openness”. Man is a social animal and a group guided by these four norms curbs overconfidence by grounding in evidence, avoids status seeking and emphasizes iteration and feedback. This Geek way demands effort and patience.

Core Principles of the Geek Way

Geek-driven companies follow four key norms:

Speed: Favor rapid iteration over extensive planning.

Ownership: Small, autonomous teams take responsibility and make independent decisions.

Science: Evidence-based approaches and data-driven experimentation guide decisions.

Openness: Transparency and feedback foster adaptability and growth.

Real-World Examples of Geek Culture

NASA rocket scientist Will Marshall dramatically reduced spacecraft costs through rapid iteration. HubSpot’s CEO welcomes challenges from new hires, and Google relies on A/B testing to drive data-backed decision-making.

Norms and Human Social Behavior

Humans thrive in groups guided by shared norms, which influence decisions more than individual beliefs. A famous Princeton study showed that time constraints dictated behavior more than personal altruism.

Science as an Antidote to Overconfidence

People tend to favor their own ideas due to confirmation bias. The Geek Way mitigates this by emphasizing evidence-based arguments, as seen in Google’s approach to testing multiple versions of a product before selecting the best one.

Bureaucracy and Status-Seeking

People often prioritize status over efficiency, fueling bureaucratic dysfunction. Geek-driven businesses, such as Amazon, counter this by using small, autonomous teams with clear objectives.

Speed vs. Excessive Planning

Many projects suffer from hidden delays due to reputational concerns. Geek companies avoid this by fostering constant iteration and feedback, as demonstrated in an experiment where kindergarteners outperformed business professionals in a marshmallow tower-building challenge.

Openness as a Defense Against Micromanagement

Traditional companies often suppress dissent and prioritize hierarchy. Geek organizations, like Bridgewater Associates, HubSpot, and Netflix, create transparency through structured feedback systems, ensuring decisions are challenged and improved.

Challenges of Adopting the Geek Way

Even successful companies risk stagnation if they fail to evolve. For instance, TikTok disrupted Meta by rapidly acquiring millions of users. Adopting the Geek Way requires continual learning, patience, and adaptability.

Conclusion

Despite the challenges, embracing science, ownership, speed, and openness is the best way to thrive in a fast-changing world.

#codingexercise: https://1drv.ms/w/c/d609fb70e39b65c8/Echlm-Nw-wkggNYlIwEAAAABD8nSsN--hM7kfA-W_mzuWw?e=4H1aMU


Friday, June 13, 2025

 A previous document discussed ways to improve the performance of detection and cataloging of objects to drone world database from drone images. A drone video clip consists of several overlapping frames and repeating the vectorization and analysis of images and objects within images may not only be time consuming and expensive but unnecessary given that duplicates or previously visited objects can be known. This still maintains a frame-by-frame advancement but skips the processing wherever possible.

Another approach is to statistically sample temporally distributed images from drone world depending on the speed of the drone, pattern of flying, GPS and timestamps, but this additional information may need to be sourced externally. While some drones provide correlation keys and additional information can be found online, the premise for our approach did not require this optional information.

A different approach to improve performance would be to run AI models to determine highlights from drone videos and thus reduce the video size to split into frames and analyze while still having high precision and recall of drone world objects. Some examples of generating highlights from videos are available commercially. For example, VEED.IO online tool uses AI to extract the best moments from your videos and turn them into highlights. It allows you to trim clips, rearrange footage, and add text or music. OpusClip is an AI-driven highlight video maker that automatically selects the most engaging moments from your footage. It’s designed to save time and enhance video quality. Pictory specializes in creating highlight reels from long videos. It also offers automatic captioning and background music integration. Kapwing is a versatile online editor that integrates AI tools to streamline video editing. It’s useful for creating short, attention-grabbing clips. Powder.AI is a real-time gameplay automontage development tool that can run locally without cloud services on the windows PC.

While custom models, fine tuning, reasoning and agentic frameworks can help with the selection of frames for a new condensed video clip, significant performance gains can come from more context aware or thresholding parameters that can be used to work with the algorithms. While they remain optional, their inclusion wherever possible to reduce the work or do deeper analysis can make significant improvements to the overall processing speed and accuracy.

Reference: previous article: https://1drv.ms/w/c/d609fb70e39b65c8/EXBBHwTJngVMiCkRUA7rv0MBXhgxzpE4PyWz_8pbHH04cA?e=16wWYh

Sample:

import requests

import time

import os

# Replace these with your actual values

AZURE_VIDEO_INDEXER_API_URL = "https://api.videoindexer.ai"

AZURE_LOCATION = "westus2" # e.g., "westus2"

AZURE_ACCOUNT_ID = "your-account-id"

AZURE_API_KEY = "your-api-key"

VIDEO_FILE_PATH = "path/to/your/video.mp4"

# Step 1: Get an access token

def get_access_token():

    url = f"{AZURE_VIDEO_INDEXER_API_URL}/auth/{AZURE_LOCATION}/Accounts/{AZURE_ACCOUNT_ID}/AccessToken"

    headers = {

        "Ocp-Apim-Subscription-Key": AZURE_API_KEY

    }

    response = requests.get(url, headers=headers)

    return response.text.strip('"')

# Step 2: Upload video and start indexing

def upload_and_index_video(video_file_path, access_token):

    video_name = os.path.basename(video_file_path)

    url = f"{AZURE_VIDEO_INDEXER_API_URL}/{AZURE_LOCATION}/Accounts/{AZURE_ACCOUNT_ID}/Videos?name={video_name}&accessToken={access_token}&privacy=Private"

    with open(video_file_path, 'rb') as video_file:

        files = {'file': video_file}

        response = requests.post(url, files=files)

    return response.json()

# Step 3: Wait for indexing to complete and get insights

def get_video_insights(access_token, video_id):

    url = f"{AZURE_VIDEO_INDEXER_API_URL}/{AZURE_LOCATION}/Accounts/{AZURE_ACCOUNT_ID}/Videos/{video_id}/Index?accessToken={access_token}"

    while True:

        response = requests.get(url)

        data = response.json()

        if data['state'] == 'Processed':

            return data

        time.sleep(10) # Wait 10 seconds before checking again

# Step 4: Main workflow

access_token = get_access_token()

video_data = upload_and_index_video(VIDEO_FILE_PATH, access_token)

video_id = video_data['id']

insights = get_video_insights(access_token, video_id)

print("Video highlights and key insights:")

print("=" * 50)

# Extract highlights: keyframes, topics, and summarization

if 'summarizedInsights' in insights:

    for theme in insights['summarizedInsights']['themes']:

        print(f"Theme: {theme['name']}")

        for highlight in theme['keyframes']:

            print(f" Keyframe at {highlight['adjustedStart']} to {highlight['adjustedEnd']}")

            print(f" Thumbnail: {highlight['thumbnailId']}")

            print(f" Description: {highlight.get('description', 'No description')}")

else:

    print("No summarization available. See full insights:", insights)



Using Azure AI video indexer interface:

https://videoindexer.ai/media/library

Upload and index

100% 1 file uploaded

File:

main3-trim-fast-local

Video source language:

English

Indexing preset:

Standard video + audio

Included models: Audio effects, Closed captions, Keyframes, Audio transcription, Object detection, Text-based emotions, Named entities, Keywords, Visual labels, Character recognition (OCR), Rolling credits, Speakers, Topics

Excluded models: Face detection, Celebrities, Custom faces, Editorial shot type

Privacy:

Private

Streaming quality:

Single bitrate

Output:

Azure AI Video Indexer

Create unlimited account

ravibeta-80d6fe

Trial

1

Render

Save project

Add videos

View insights

Filter options

main3-trim-fast-local

Duration: 00:09:10

97 segments selected

00:07:31 - 00:07:33

building

00:07:35 - 00:07:36

building

00:07:38 - 00:07:38

building

00:07:44 - 00:07:44

outdoor

00:07:48 - 00:07:54

building

00:07:49 - 00:08:04

outdoor

00:07:50 - 00:07:50

car

00:07:57 - 00:08:01

building

00:08:02 - 00:08:02

car

vehicle

00:08:04 - 00:08:05

building

00:08:11 - 00:08:11

building

00:08:12 - 00:08:12

aerial photography

00:08:13 - 00:08:13

text

00:08:16 - 00:08:16

text

00:08:19 - 00:08:19

building

00:08:21 - 00:08:24

building

00:08:22 - 00:08:22

text

00:08:24 - 00:08:25

outdoor

00:08:25 - 00:08:25

text

00:08:26 - 00:08:29

building

00:08:27 - 00:08:31

outdoor

text

00:08:28 - 00:08:28

car

00:08:31 - 00:08:38

building

00:08:34 - 00:08:36

outdoor

00:08:38 - 00:08:42

outdoor

00:08:39 - 00:08:39

window

00:08:40 - 00:08:45

building

text

00:08:46 - 00:08:46

text

00:08:51 - 00:08:54

building

00:08:52 - 00:09:09

outdoor

00:08:54 - 00:08:55

car

00:08:55 - 00:08:56

vehicle

00:08:57 - 00:09:09

building


Thursday, June 12, 2025

 The following serves as an illustration to remove duplicates from a continuous stream of aerial images:

import cv2

import imagehash

import numpy as np

from PIL import Image

from collections import deque

class ImageDeduplicator:

    def __init__(self, buffer_size=100):

        """Initialize a ring buffer for tracking image hashes."""

        self.buffer_size = buffer_size

        self.hash_buffer = deque(maxlen=buffer_size)

        self.vector_buffer = deque(maxlen=buffer_size)

        self.threshold = 0.97 # as close to an exact match of 1.0

    def compute_hash(self, image):

        """Compute perceptual hash of an image."""

        return imagehash.phash(Image.fromarray(image))

    def is_duplicate(self, image):

        """Check if the image is a duplicate."""

        img_hash = self.compute_hash(image)

        if img_hash in self.hash_buffer:

            return True

        self.hash_buffer.append(img_hash)

        return False

    def is_visited(self, vector):

        index = 0

        for existing in reversed(self.vector_buffer):

            # print(existing)

            score = self.cosine_similarity(existing, vector)

            if score > self.threshold:

                return True

            index += 1

        self.vector_buffer.append(vector)

        return False

    def get_hash_buffer_len(self):

        return len(self.hash_buffer)

    def get_vector_buffer_len(self):

        return len(self.vector_buffer)

    def cosine_similarity(self, vec1, vec2):

        """Computes cosine similarity between two vectors."""

        dot_product = np.dot(vec1, vec2)

        norm_vec1 = np.linalg.norm(vec1)

        norm_vec2 = np.linalg.norm(vec2)

        return dot_product / (norm_vec1 * norm_vec2)

    def euclidean_distance(self, vec1, vec2):

        """Computes Euclidean distance between two vectors."""

        value = np.linalg.norm(np.array(vec1) - np.array(vec2))

        print(f"Euclidean={value}")

        return value

def is_duplicate(destination_client, vector):

    vector_query = VectorizedQuery(vector=vector,

                                  k_nearest_neighbors=3,

                                  exhaustive=True,

                                  fields = "vector")

    results = search_client.search(

        search_text=None,

        vector_queries= [vector_query],

        select=["id", "description","vector"],

        # select='id,description,vector',

        include_total_count=True,

        top=4

    )

    if results != None and results.get_count() > 0:

        best = 0

        for match in results:

            match_vector = match["vector"]

            score = self.cosine_similarity(vector, match_vector)

            if score > best:

                best = score

        if best > 0.8:

           return True

    return False

Reference: previous posts for context.

#codingexercise: https://1drv.ms/w/c/d609fb70e39b65c8/Echlm-Nw-wkggNYlIwEAAAABD8nSsN--hM7kfA-W_mzuWw?e=vGBXc1 

Wednesday, June 11, 2025

 This is a summary of the book titled “The Startup Community Way” written by Brad Feld and Ian Hathaway and published by Wiley in 2020. Innovators and especially those that become entrepreneurs are in their endeavors for the long haul. With ubiquitous internet and technological advancements, they can now launch their digitally enabled business anywhere and can even do it in localized groups. These startup communities follow their own principles and logic withing collaborative, inclusive networks. This book helps such daring entrepreneurs to form and lead their own startup communities. Unlike traditional businesses, the paradigm of startup communities avoids hierarchies and is built on networks and relationships. They are complex systems that value openness and collaboration. A successful one is singular and irreplaceable. Relationships with other startup communities enable them to thrive. Leaders in startup communities become role models and inspire future entrepreneurs. Therefore, this guide becomes valuable to foster such an ecosystem.

1. The Principles of Startup Communities

Startup communities thrive due to widespread internet access and technological advancements, allowing entrepreneurs to launch businesses anywhere. These communities follow fundamental principles: leaders are entrepreneurs with long-term commitment, participation is inclusive and continuous, and success is strengthened through connections with other communities.

2. Networks Over Hierarchies

Unlike large institutions that operate through top-down hierarchies, startup communities' function through networks based on trust, shared resources, and collaborative creativity. These ecosystems flourish when leaders encourage openness and communication rather than rigid control.

3. Geographical Considerations

Although startup communities can form anywhere, location still matters. Successful communities benefit from access to talent, funding, corporate support, and a favorable legal and economic environment. A startup community integrates with the surrounding business network and entrepreneurial ecosystem.

4. Founding Entrepreneurs as Priority

A startup community’s success depends on prioritizing the needs of entrepreneurs. A thriving ecosystem includes multiple communities, but each startup group retains a distinct identity. The broader network benefits when entrepreneurs receive support from external entities like customers, investors, and service providers.

5. The Complexity of Startup Communities

These communities operate as complex adaptive systems—nonlinear, dynamic networks requiring collaboration and openness. They evolve constantly due to market shifts, technological advancements, and changing consumer demands. Success depends on adaptability rather than rigid strategies.

6. Entrepreneurial Success and Future Leadership

Entrepreneurial success generates wealth, experience, and knowledge that fuel future startups. However, success is not merely about increasing resources—it’s about having the right leadership and vision at the right time. Past triumphs serve as guidance, but startup communities don’t follow predictable trajectories.

7. Uniqueness and Irreplaceability

No two startup communities are identical and attempts to replicate Silicon Valley’s model are misguided. Leaders can’t control outcomes; they can only cultivate favorable conditions for success. Since startup ecosystems thrive unpredictably, adaptation and continuous improvement matter more than copying existing frameworks.

8. Interconnectivity Among Startup Communities

These communities don’t exist in isolation. Connectivity strengthens performance through shared knowledge, resources, and collaboration. Relationships with other startup networks—both local and global—help startups grow stronger.

9. Entrepreneurs as Role Models

Startup leaders must embody integrity, openness, and mentorship. Strong leadership fosters trust and motivates future entrepreneurs to build successful businesses. Leaders must eliminate negative influences while supporting individuals who drive positive change.

This book serves as a guide for entrepreneurs to understand and navigate the complexities of startup communities, ensuring long-term success through collaboration, adaptability, and network-driven leadership.


Tuesday, June 10, 2025

 The use of UAV swarm is better applied to surveying, remote sensing, disaster preparedness and responses such as wildfires, and those that make use of LiDAR data. Power line and windmill monitoring companies are especially suited for making use of a fleet of drones. Besides, there are over ten LiDAR companies that are public in US and many more across Europe and Asia that make use of a fleet of drones, photogrammetry and LiDAR data. Those that are using simultaneous localization and mapping (SLAM), structure-from-motion (SfM), and semantic segmentation with CNNs are possibly building their own knowledge bases, so it would not hurt to show them one that is built in the cloud in incremental, observable and near real-time. With GPS and satellite imagery, most terrains are navigable but vision-based navigation enables autonomous navigation and one day hopefully at all heights.

Sample SLAM to compare images:

import cv2

import numpy as np

from vslam_py import FeatureMatcher

# Load the aerial images

image1 = cv2.imread("hoover_tower_forward.jpg", cv2.IMREAD_GRAYSCALE)

image2 = cv2.imread("hoover_tower_reverse.jpg", cv2.IMREAD_GRAYSCALE)

# Initialize ORB feature detector

orb = cv2.ORB_create()

# Detect keypoints and descriptors

keypoints1, descriptors1 = orb.detectAndCompute(image1, None)

keypoints2, descriptors2 = orb.detectAndCompute(image2, None)

# Use the vSLAM-py matcher (or OpenCV's BFMatcher)

matcher = FeatureMatcher()

matches = matcher.match(descriptors1, descriptors2)

# Draw matches

output_image = cv2.drawMatches(image1, keypoints1, image2, keypoints2, matches[:50], None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

# Display the results

cv2.imshow("Matched Features", output_image)

cv2.waitKey(0)

cv2.destroyAllWindows()

Emerging trends:

Constructing an incremental “knowledge base” of a landscape from drone imagery merges ideas from simultaneous localization and mapping (SLAM), structure-from-motion (SfM), and semantic segmentation. Incremental SLAM and 3D reconstruction is suggested in the ORB-SLAM2 paper by Mur-Atal and Tardos in 2017 where a 3D Map is built by estimating camera poses and reconstructing scene geometry from monocular, stereo, or RGB-D inputs. Such SLAM framework can also be extended by fusing in semantic cues to enrich the resulting map with object and scene labels The idea of including semantic information for 3D reconstruction is demonstrated by SemanticFusion written by McCormick et al for ICCV 2017 where they use a Convolutional Neural Network aka CNN for semantic segmentation as their system fuses semantic labels into a Surfel-based 3D map, thereby transforming a purely geometric reconstruction into a semantically rich representation of a scene. SemanticFusion helps to label parts of the scene – turning a raw point cloud or mesh into a knowledge base where objects, surfaces and even relationships can be recognized and queries. SfM, on the other hand, helps to stitch multi-view data into a consistent 3D-model where the techniques are particularly relevant for drone applications. Incremental SfM pipelines can populate information about a 3D space based on the data that arrives in the pipeline, but the drones can “walk the grid” around an area of interest to make sure sufficient data is captured to build the 3D-model from 0 to 100% and the progress can even be tracked. Semantic layer is not added to SfM processing, but semantic segmentation or object detection can be layered on independently overly the purely geometric data. Layering-on additional modules for say, object detection, region classification, or even reasoning over scene changes helps to start with basic geometric layouts and add optionally to build comprehensive knowledge base. Algorithms that crunch these sensor data whether they are images or LiDAR data must operate in real-time and not on batch periodic analysis. They can, however, be dedicated to specific domains such as urban monitoring, agricultural surveying, or environmental monitoring for additional context-specific knowledge.

Addendum:

• SIFT is best for high-accuracy applications like object recognition.

• ORB is ideal for real-time applications like SLAM (Simultaneous Localization and Mapping).

• SURF balances speed and accuracy, making it useful for tracking and image stitching.


Monday, June 9, 2025

 The following serves as an illustration to remove duplicates from a continuous stream of aerial images:

import cv2

import imagehash

import numpy as np

from PIL import Image

from collections import deque

from PIL import Image

import imagehash

def perceptual_hash(image_path):

    img = Image.open(image_path)

    return imagehash.phash(img)

class ImageDeduplicator:

    def __init__(self, buffer_size=100):

        """Initialize a ring buffer for tracking image hashes."""

        self.buffer_size = buffer_size

        self.hash_buffer = deque(maxlen=buffer_size)

    def compute_hash(self, image):

        """Compute perceptual hash of an image."""

        return imagehash.phash(Image.fromarray(image))

    def is_duplicate(self, image):

        """Check if the image is a duplicate."""

        img_hash = self.compute_hash(image)

        if img_hash in self.hash_buffer:

            return True

        self.hash_buffer.append(img_hash)

        return False

def process_image_stream(image_stream):

    """Process a stream of images and eliminate duplicates."""

    deduplicator = ImageDeduplicator()

    unique_images = []

    for image in image_stream:

        if not deduplicator.is_duplicate(image):

            unique_images.append(image)

    return unique_images

# Example usage

image_paths = ["image1.jpg", "image2.jpg", "image3.jpg"] # Replace with actual image paths

image_stream = [cv2.imread(img) for img in image_paths]

unique_images = process_image_stream(image_stream)

print(f"Unique images count: {len(unique_images)}")

Reference: previous article for context: 

Schema: