Sunday, August 3, 2025

 This explains why location services from public cloud providers are unreliable for aerial drone images unless they are using custom models trained to detect based on features of the scene. 

import requests 
import os 
from azure.cognitiveservices.vision.computervision import ComputerVisionClient 
from msrest.authentication import CognitiveServicesCredentials 
from PIL import Image 
 
# === Azure Computer Vision credentials === 
vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY") 
vision_endpoint = os.getenv("AZURE_AI_VISION_ENDPOINT") 
computervision_client = ComputerVisionClient(vision_endpoint, CognitiveServicesCredentials(vision_api_key)) 
 
# === Azure Maps credentials === 
azure_maps_key = os.getenv("AZURE_MAPS_SUBSCRIPTION_KEY") 
 
# === Load local image and get tags === 
image_path = "frame5.jpg" 
with open(image_path, "rb") as img_stream: 
    analysis = computervision_client.analyze_image_in_stream( 
        img_stream, 
        visual_features=["Tags"] 
    ) 
 
tags = [tag.name for tag in analysis.tags if tag.confidence > 0.5] 
 
# === Azure Maps Search API for landmark coordinates === 
def get_coordinates_from_azure_maps(landmark, azure_key): 
    url = f"https://atlas.microsoft.com/search/address/json" 
    params = { 
        "api-version": "1.0", 
        "subscription-key": azure_key, 
        "query": landmark 
    } 
    response = requests.get(url, params=params) 
    data = response.json() 
    results = data.get("results", []) 
    if results: 
        position = results[0]["position"] 
        return (position["lat"], position["lon"]) 
    return None 
tags = ["circular plaza"] 
# === Display matched coordinates === 
for tag in tags: 
    coords = get_coordinates_from_azure_maps(tag, azure_maps_key) 
    if coords: 
        print(f"Landmark: {tag}, Latitude: {coords[0]}, Longitude: {coords[1]}") 
    else: 
        print(f"No match found for tag: {tag}") 
 
""" 
Output: 
Landmark: outdoor, Latitude: 39.688359, Longitude: -84.235051 
Landmark: text, Latitude: 17.9739757, Longitude: -76.7856201 
Landmark: building, Latitude: 23.3531395, Longitude: -75.0597782 
Landmark: car, Latitude: 18.5366554, Longitude: -72.4020263 
Landmark: urban design, Latitude: 48.4732981, Longitude: 35.0019145 
Landmark: metropolitan area, Latitude: 55.6033166, Longitude: 13.0013362 
Landmark: urban area, Latitude: 8.448839, Longitude: -13.258005 
Landmark: neighbourhood, Latitude: 54.8811412, Longitude: -6.2779797 
Landmark: intersection, Latitude: 34.899284, Longitude: -83.392743 
Landmark: vehicle, Latitude: 38.6151446, Longitude: -121.273215 
Landmark: residential area, Latitude: 9.982962, Longitude: 76.2954466 
Landmark: city, Latitude: 19.4326773, Longitude: -99.1342112 
Landmark: traffic, Latitude: 23.5786896, Longitude: 87.1950397 
Landmark: street, Latitude: 51.1250213, Longitude: -2.7313088 
Landmark: aerial, Latitude: 34.95435, Longitude: -117.826011 
 
#  
# Not even close to the nearest neigbhorhood: https://www.google.com/maps?q=42.3736,-71.1097 
and when trying google cloud: 

gcloud ml vision detect-landmarks frame5.jpg 

{ 

  "responses": [ 

    {} 

  ] 

} 
 
import nyckel 
import os 
nyckel_client_id = os.getenv("NYCKEL_CLIENT_ID") 
nyckel_client_secret = os.getenv("NYCKEL_CLIENT_SECRET") 
credentials = nyckel.Credentials(nyckel_client_id, nyckel_client_secret) 
image_url = os.getenv("CIRCULAR_BUILDING_SAS_URL").strip('"') 
response = nyckel.invoke("landmark-identifier", image_url, credentials) 
print(response) 
# Output: 
# {'labelName': 'Yellowstone National Park', 'labelId': 'label_wottnvl9ole6ch4o', 'confidence': 0.02} 
""" 

 

Or the landmarks may not be detected at all: 
import requests 
import os 
from azure.cognitiveservices.vision.computervision import ComputerVisionClient 
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes 
from msrest.authentication import CognitiveServicesCredentials 
from PIL import Image 
from pprint import pprint 
 
# === Azure Computer Vision credentials === 
vision_api_key = os.getenv("AZURE_AI_VISION_API_KEY") 
vision_endpoint = os.getenv("AZURE_AI_VISION_ENDPOINT") 
computervision_client = ComputerVisionClient(vision_endpoint, CognitiveServicesCredentials(vision_api_key)) 
scene_url = os.getenv("CIRCULAR_BUILDING_SAS_URL").strip('"') 
 
def get_landmark_info(image_path_or_url): 
    """ 
    Detects landmarks in an aerial image and returns detailed metadata. 
    Supports both local file paths and image URLs. 
    """ 
    visual_features = [VisualFeatureTypes.categories, VisualFeatureTypes.description, VisualFeatureTypes.tags] 
 
    if image_path_or_url.startswith("http"): 
        analysis = computervision_client.analyze_image(image_path_or_url, visual_features) 
    else: 
        with open(image_path_or_url, "rb") as image_stream: 
            analysis = computervision_client.analyze_image_in_stream(image_stream, visual_features) 
 
    # Extract landmark-related tags and descriptions 
    landmark_tags = [tag.name for tag in analysis.tags if "landmark" in tag.name.lower()] 
    description = analysis.description.captions[0].text if analysis.description.captions else "No description available" 
 
    result = { 
        "description": description, 
        "landmark_tags": landmark_tags, 
        "categories": [cat.name for cat in analysis.categories] 
    } 
 
    return result 
 
# Example usage 
if __name__ == "__main__": 
    landmark_data = get_landmark_info(scene_url) 
    pprint(landmark_data) 
 
 
### output: 
# {'categories': ['abstract_', 'others_', 'outdoor_', 'text_sign'], 
#  'description': 'graphical user interface', 
#  'landmark_tags': []} 
# actual location information is  
# 42.371305, -71.117339 

# Orthodox Minyan at Harvard Hillel, 52 Mt Auburn St, Cambridge, MA 02138 

 

# and the drone provided GPS information is the most accurate in this regard such as: 
import json 
import numpy as np 
 
# Replace this with actual GPS bounds for transformation 
# Example: top-left, top-right, bottom-right, bottom-left in pixel & GPS 
pixel_bounds = np.array([[0, 0], [4096, 0], [4096, 4096], [0, 4096]]) 
gps_bounds = np.array([[39.735, -104.997], [39.735, -104.989], 
                       [39.729, -104.989], [39.729, -104.997]]) 
 
# Compute affine transform matrix from pixel to GPS 
A = np.linalg.lstsq(pixel_bounds, gps_bounds, rcond=None)[0] 
 
def pixel_to_gps(coord): 
    """Map pixel coordinate to GPS using affine approximation""" 
    return tuple(np.dot(coord, A)) 
 
def parse_json_gps(json_data): 
    gps_coords = [] 
    for frame in json_data: 
        if frame is None: 
            continue 
        frame_coords = [pixel_to_gps(coord) for coord in frame] 
        gps_coords.append(frame_coords) 
    return gps_coords 
 
# Example JSON input 
data = [None, [[3132, 4151], [3354, 2924], [4044, 3056], [3824, 4275]], 
              [[3095, 4164], [3318, 2939], [4006, 3073], [3787, 4289]]] 
 
gps_output = parse_json_gps(data) 
for i, frame in enumerate(gps_output): 
    print(f"Frame {i+1}:") 
    for lat, lon in frame: 
        print(f"Latitude: {lat:.6f}, Longitude: {lon:.6f}") 


#codingexercise: https://1drv.ms/w/c/d609fb70e39b65c8/EfWTrWDGqOxFvkw4sb48NWUBmxjiu90rja-WxBLPsbgS0Q?e=quyCp2

Saturday, August 2, 2025

 The previous posts explained how to leverage scale resolution of known vehicles to compute distances between landmarks given by their bounding boxes in aerial drone images but that is not the only way to calculate distance. With the information that aerial drone images are zoom in images of well-known mapping services images of known cities and urban environments, specifically in North America, the images can be analyzed with automation for determining culture, season, economic and regional styles to associate with a latitude and longitude. Even if it is not an exact match, an approximation to the city in which the aerial image was shot by the drone can help in narrowing down the suburb in which drone was moving. For example, the following a code enables a machine to understand which city the drone was flying over when a frame captured from its video is analyzed.

from geospyer import GeoSpy

import os

gemini_api_key = os.getenv("GEMINI_API_KEY").strip('"')

def get_nearest_latitude_longitude(image_path="frame23.jpg"):

    # Initialize GeoSpy with your Gemini API key

    geospy = GeoSpy(api_key=gemini_api_key)

    # Analyze the image

    result = geospy.locate(image_path=image_path)

    # Check for errors

    if "error" in result:

        print(f"Error: {result['error']}")

    else:

        # Extract location info

        if "locations" in result and result["locations"]:

            location = result["locations"][0]

            lat = location["coordinates"]["latitude"]

            lon = location["coordinates"]["longitude"]

            print(f"Estimated Coordinates: Latitude = {lat}, Longitude = {lon}")

            # Optional: Open in Google Maps

            # import webbrowser

            maps_url = f"https://www.google.com/maps?q={lat},{lon}"

            print(maps_url)

            #webbrowser.open(maps_url)

            return lat, lon

        else:

            print("No location data found.")

            return None, None

print(get_nearest_latitude_longitude())

 # output:

 # Estimated Coordinates: Latitude = 42.3736, Longitude = -71.1097

 # https://www.google.com/maps?q=42.3736,-71.1097

 # (42.3736, -71.1097)

And as with earlier capabilities, such modular functions can be easily included in the list of function tools to augment agentic retrieval on the analysis side of the aerial drone image processing pipeline.


Friday, August 1, 2025

 This is a summary of the book titled “Artificial Intelligence in Accounting, Auditing and Finance” written by Michael Adelowotan and Collins Leke and published by Springer in 2025. This book covers the concepts and applications for AI in financial realm, through real world case studies and explanations in auditing, taxation, risk management and financial forecasting. AI transforms finance through automation, real-time analysis and predictive power. While it can bring about inequity in insights from data organization and compartmentalization, its speed, accuracy and efficiency is unmatched. Especially when combined with traditional methods, its predictive capability can unlock the next level of insights. It does come with challenges and require workforce upskilling. 

Artificial intelligence has ushered in the Fourth Industrial Revolution within the financial domain, reshaping the landscape of accounting, auditing, and corporate finance. Adelowotan and Leke trace this transformation from the era of manual bookkeeping to modern cloud-based platforms, highlighting AI's leap beyond mere automation. Unlike past tools, AI brings cognitive abilities—pattern recognition, adaptive learning, and predictive analysis—that fundamentally redefine financial decision-making. 

The book explores AI’s impact across financial functions, beginning with accounting. Here, machine learning algorithms automate tasks like transaction categorization and real-time financial reporting. Intelligent document processing extracts and verifies information seamlessly, replacing tedious manual input with high-speed precision. In auditing, AI introduces predictive models capable of continuous data monitoring, anomaly detection, and automated evidence verification—enhancing both the depth and reliability of audit conclusions. Tools like EY’s Helix and PwC’s Aura demonstrate how audit workflows are increasingly intertwined with AI systems. 

Financial trading experiences a parallel evolution. Deep learning and reinforcement learning help optimize trade strategies and forecast market movements based on complex indicators and sentiment analysis. This approach enables traders to respond more dynamically to shifting conditions, furthering the edge that AI provides. 

Beyond operational efficiency, AI enables a more strategic role in financial analysis. With real-time processing of vast and varied datasets, AI transforms financial reporting into a forward-looking exercise. Algorithms detect subtle trends and risks—like signs of distress or inconsistencies in strategic narratives—while freeing analysts to focus on high-level interpretation. Yet this comes with caveats. Decision-making becomes centralized, favoring those who control data and systems. “Black box” models—opaque algorithms lacking transparency—challenge accountability and raise ethical concerns about bias and fairness. 

In corporate finance, AI improves forecasting for capital budgeting, optimizes funding strategies, and informs dividend policies. Major firms such as JPMorgan Chase, GE, and BlackRock already rely on AI platforms for planning and risk management. However, overdependence on AI poses risks, including diminished human judgment, data privacy concerns, and potential algorithmic bias. 

Time series forecasting benefits from advanced AI techniques such as neural networks and LSTMs, which capture complex patterns over time. These models offer promising improvements over traditional methods, although their success hinges on tailored implementations. Without proper adaptation, complexity may hinder performance, highlighting the need for thoughtful integration. 

The authors don’t shy away from the profession’s challenges. Automation threatens traditional accounting roles, creating a pressing need for upskilling and digital literacy. Ethical risks—bias, privacy, and opacity—demand stronger oversight and a commitment to fairness. The book’s central message is clear: AI offers transformative tools, but financial professionals must evolve alongside the technology, embracing interdisciplinary skills and ethical frameworks to truly unlock its potential. 

#codingexercise:

CodingExercise-08-01-2025.docx

CodingExercise-08-01b-2025.docx

Thursday, July 31, 2025

 The previous posts explained how to leverage scale resolution of known vehicles to compute distances between landmarks given by their bounding boxes in aerial drone images but that is not the only way to calculate distance. With the information that aerial drone images are zoom in images of well-known mapping services images of known cities and urban environments, specifically in North America, the images can be analyzed with automation for determining culture, season, economic and regional styles to associate with a latitude and longitude. Even if it is not an exact match, an approximation to the city in which the aerial image was shot by the drone can help in narrowing down the suburb in which drone was moving. For example, the following a code enables a machine to understand which city the drone was flying over when a frame captured from its video is analyzed.

from geospyer import GeoSpy

import os

gemini_api_key = os.getenv("GEMINI_API_KEY").strip('"')

def get_nearest_latitude_longitude(image_path="frame23.jpg"):

    # Initialize GeoSpy with your Gemini API key

    geospy = GeoSpy(api_key=gemini_api_key)

    # Analyze the image

    result = geospy.locate(image_path=image_path)

    # Check for errors

    if "error" in result:

        print(f"Error: {result['error']}")

    else:

        # Extract location info

        if "locations" in result and result["locations"]:

            location = result["locations"][0]

            lat = location["coordinates"]["latitude"]

            lon = location["coordinates"]["longitude"]

            print(f"Estimated Coordinates: Latitude = {lat}, Longitude = {lon}")

            # Optional: Open in Google Maps

            # import webbrowser

            maps_url = f"https://www.google.com/maps?q={lat},{lon}"

            print(maps_url)

            #webbrowser.open(maps_url)

            return lat, lon

        else:

            print("No location data found.")

            return None, None

print(get_nearest_latitude_longitude())

 # output:

 # Estimated Coordinates: Latitude = 42.3736, Longitude = -71.1097

 # https://www.google.com/maps?q=42.3736,-71.1097

 # (42.3736, -71.1097)

And as with earlier capabilities, such modular functions can be easily included in the list of function tools to augment agentic retrieval on the analysis side of the aerial drone image processing pipeline.


Wednesday, July 30, 2025

 The following code sample explains how to leverage scale resolution to compute distances between objects given by their bounding boxes:

import numpy as np

def compute_pixel_distance(box1, box2):

    """Compute Euclidean distance between the centers of two bounding boxes"""

    x1_center = (box1[0] + box1[2]) / 2

    y1_center = (box1[1] + box1[3]) / 2

    x2_center = (box2[0] + box2[2]) / 2

    y2_center = (box2[1] + box2[3]) / 2

    return np.sqrt((x2_center - x1_center)**2 + (y2_center - y1_center)**2)

def estimate_scale(reference_box, vehicle_type):

    """Estimate feet per pixel using a reference vehicle box"""

    vehicle_lengths = {

        'motorcycle': 5,

        'car': 20,

        'truck': 40

    }

    pixel_length = reference_box[2] - reference_box[0] # width in pixels

    return vehicle_lengths[vehicle_type.lower()] / pixel_length

def estimate_actual_distance(landmark_box1, landmark_box2, reference_box, vehicle_type):

    pixel_dist = compute_pixel_distance(landmark_box1, landmark_box2)

    scale = estimate_scale(reference_box, vehicle_type)

    return pixel_dist * scale

# Example inputs:

landmark_box1 = (100, 200, 180, 280)

landmark_box2 = (400, 450, 480, 530)

reference_vehicle_box = (300, 300, 340, 340) # e.g., a car seen from side view

vehicle_type = 'car'

actual_distance_feet = estimate_actual_distance(landmark_box1, landmark_box2, reference_vehicle_box, vehicle_type)

print(f"Estimated actual distance between landmarks: {actual_distance_feet:.2f} feet")

This is a continuation of previous article on agentic retrieval on the analysis side of the aerial drone image processing pipeline using modular functions for specific insights into a scene.