Thursday, March 19, 2026

 Coding Exercise: Number of centered subarrays

You are given an integer array nums.

A subarray of nums is called centered if the sum of its elements is equal to at least one element within that same subarray.

Return the number of centered subarrays of nums.

Example 1:

Input: nums = [-1,1,0]

Output: 5

Explanation:

• All single-element subarrays ([-1], [1], [0]) are centered.

• The subarray [1, 0] has a sum of 1, which is present in the subarray.

• The subarray [-1, 1, 0] has a sum of 0, which is present in the subarray.

• Thus, the answer is 5.

Example 2:

Input: nums = [2,-3]

Output: 2

Explanation:

Only single-element subarrays ([2], [-3]) are centered.

class Solution {

    public int centeredSubarrays(int[] nums) {

        int count = 0;

        for (int i = 0; i < nums.length; i++) {

            for (int j = i; j < nums.length; j++) {

                long sum = 0;

                for (int k = i; k<=j; k++) {

                    sum += nums[k];

                }

                for (int k = i; k<=j; k++) {

                    if (nums[k] == (int)sum) {

                        count++;

                        break;

                    }

                }

            }

        }

        return count;

    }

}

Accepted

1042 / 1042 testcases passed


Wednesday, March 18, 2026

 Sample code to import sharepoint data to Azure Storage Account:

# Databricks notebook source

import os

os.environ["PROD_DSS_SHAREPOINT_CLIENT_ID"] = ""

os.environ["PROD_DSS_SHAREPOINT_CLIENT_SECRET"] = ""

os.environ["PROD_DSS_SHAREPOINT_TENANT_ID"] = ""

# COMMAND ----------

# Optional: use dbutils secrets for sensitive values

# client_secret = dbutils.secrets.get(scope="my-scope", key="sharepoint-client-secret")

# client_id = dbutils.secrets.get(scope="my-scope", key="sharepoint-client-id")

# tenant_id = dbutils.secrets.get(scope="my-scope", key="sharepoint-tenant-id")

# COMMAND ----------

import requests

from msal import ConfidentialClientApplication

from azure.identity import DefaultAzureCredential

from azure.storage.blob import BlobServiceClient

import json

import time

# === SharePoint App Registration ===

import os

client_id = os.environ["PROD_DSS_SHAREPOINT_CLIENT_ID"]

client_secret = os.environ["PROD_DSS_SHAREPOINT_CLIENT_SECRET"]

tenant_id = os.environ["PROD_DSS_SHAREPOINT_TENANT_ID"]

siteId = "site01/EI/"

listId = "Links"

authority = f"https://login.microsoftonline.com/{tenant_id}"

scope = ["https://graph.microsoft.com/.default"]

app = ConfidentialClientApplication(

    client_id,

    authority=authority,

    client_credential=client_secret

)

def get_graph_token():

    token = app.acquire_token_for_client(scopes=scope)

    return token["access_token"]

# COMMAND ----------

# === Azure Storage ===

storage_account = "someaccount01"

container_name = "container01"

credential = DefaultAzureCredential()

blob_service = BlobServiceClient(

    f"https://{storage_account}.blob.core.windows.net",

    credential=credential

)

container_client = blob_service.get_container_client(container_name)

# COMMAND ----------

checkpoint_blob = container_client.get_blob_client("_checkpoints/sharepoint_copied.json")

def load_checkpoint():

    try:

        data = checkpoint_blob.download_blob().readall()

        return set(json.loads(data))

    except Exception:

        return set()

def save_checkpoint(copied_ids):

    checkpoint_blob.upload_blob(

        json.dumps(list(copied_ids)),

        overwrite=True

    )

# COMMAND ----------

import requests

from msal import ConfidentialClientApplication

# The SharePoint host and site path from your URL

hostname = "uhgazure.sharepoint.com"

site_path = "/sites/site01/EI" # server-relative path (no trailing path/to/lists)

# MSAL app-only token

authority = f"https://login.microsoftonline.com/{tenant_id}"

app = ConfidentialClientApplication(client_id, authority=authority, client_credential=client_secret)

token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])

access_token = token.get("access_token")

headers = {"Authorization": f"Bearer {access_token}"}

# 1) Resolve site by path -> site-id

site_url = f"https://graph.microsoft.com/v1.0/sites/{hostname}:{site_path}"

r = requests.get(site_url, headers=headers)

r.raise_for_status()

site = r.json()

site_id = site["id"]

print("Site id:", site_id)

# 2) List drives (document libraries) for the site -> find drive id

drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"

r = requests.get(drives_url, headers=headers)

r.raise_for_status()

drives = r.json().get("value", [])

for d in drives:

    print("Drive name:", d["name"], "Drive id:", d["id"])

# If you know the library name, pick it:

target_library = "Documents" # or the library name you expect

drive_id = next((d["id"] for d in drives if d["name"] == target_library), None)

print("Selected drive id:", drive_id)

# COMMAND ----------

drive_id = "pick-a-value-from-above-output"

def list_all_items():

    token = get_graph_token()

    headers = {"Authorization": f"Bearer {token}"}

    url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"

    items = []

    while url:

        resp = requests.get(url, headers=headers)

        resp.raise_for_status()

        data = resp.json()

        items.extend(data.get("value", []))

        url = data.get("@odata.nextLink") # pagination

    return items

# COMMAND ----------

def download_file(item_id):

    token = get_graph_token()

    headers = {"Authorization": f"Bearer {token}"}

    url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}/content"

    resp = requests.get(url, headers=headers)

    resp.raise_for_status()

    return resp.content

# COMMAND ----------

os.environ["STORAGE_CONNECTION_STRING"]

# COMMAND ----------

connection_string = os.environ["STORAGE_CONNECTION_STRING"]

# COMMAND ----------

import os

import mimetypes

from io import BytesIO

from typing import Optional, Dict

import requests

from azure.core.exceptions import ResourceExistsError, ServiceRequestError, ClientAuthenticationError, HttpResponseError

from azure.storage.blob import (

    BlobServiceClient,

    BlobClient,

    ContentSettings

)

# ----------------------------

# Azure Blob upload helpers

# ----------------------------

def _get_blob_service_client(

    *,

    connection_string: Optional[str] = None,

    account_url: Optional[str] = None,

    sas_token: Optional[str] = None

) -> BlobServiceClient:

    """

    Create a BlobServiceClient from one of:

      - connection_string

      - account_url + sas_token (e.g., https://<acct>.blob.core.windows.net/?<sas>)

    """

    if connection_string:

        return BlobServiceClient.from_connection_string(connection_string)

    if account_url and sas_token:

        # Ensure sas_token starts with '?'

        sas = sas_token if sas_token.startswith('?') else f'?{sas_token}'

        return BlobServiceClient(account_url=account_url, credential=sas)

    raise ValueError("Provide either connection_string OR (account_url AND sas_token).")

def upload_bytes_to_blob(

    data: bytes,

    *,

    container_name: str,

    blob_name: str,

    connection_string: Optional[str] = None,

    account_url: Optional[str] = None,

    sas_token: Optional[str] = None,

    content_type: Optional[str] = None,

    overwrite: bool = False,

    metadata: Optional[Dict[str, str]] = None

) -> str:

    """

    Uploads a bytes object to Azure Blob Storage and returns the blob URL.

    Parameters:

        data (bytes): File content.

        container_name (str): Target container name.

        blob_name (str): Target blob name (e.g., 'reports/file.pdf').

        connection_string/account_url/sas_token: Auth options.

        content_type (str): MIME type; if None, guessed from blob_name.

        overwrite (bool): Replace if exists.

        metadata (dict): Optional metadata key/value pairs.

    Returns:

        str: The URL of the uploaded blob.

    """

    # Guess content type if not provided

    if content_type is None:

        guessed, _ = mimetypes.guess_type(blob_name)

        content_type = guessed or "application/octet-stream"

    # Build client

    bsc = _get_blob_service_client(

        connection_string=connection_string,

        account_url=account_url,

        sas_token=sas_token

    )

    container_client = bsc.get_container_client(container_name)

    # Ensure container exists (idempotent)

    try:

        container_client.create_container()

    except ResourceExistsError:

        pass # already exists

    blob_client: BlobClient = container_client.get_blob_client(blob_name)

    # Upload

    try:

        content_settings = ContentSettings(content_type=content_type)

        # Use a stream to be memory-friendly for large files, though we already have bytes

        stream = BytesIO(data)

        blob_client.upload_blob(

            stream,

            overwrite=overwrite,

            metadata=metadata,

            content_settings=content_settings

        )

    except ResourceExistsError:

        if not overwrite:

            raise

    except ClientAuthenticationError as e:

        raise RuntimeError(f"Authentication failed when uploading blob: {e}") from e

    except (ServiceRequestError, HttpResponseError) as e:

        raise RuntimeError(f"Blob upload failed: {e}") from e

    # Construct and return URL (works for both conn string and SAS)

    return blob_client.url

# ----------------------------

# Orchestrator

# ----------------------------

def download_and_upload_to_blob(

    *,

    item_id: str,

    container_name: str,

    blob_name: str,

    connection_string: Optional[str] = None,

    account_url: Optional[str] = None,

    sas_token: Optional[str] = None,

    content_type: Optional[str] = None,

    overwrite: bool = False,

    metadata: Optional[Dict[str, str]] = None

) -> str:

    """

    Downloads a file from Microsoft Graph using the provided item_id and uploads it to Azure Blob Storage.

    Returns the blob URL.

    """

    # 1) Download bytes from Graph

    file_bytes = download_file(item_id)

    # 2) Upload to Blob

    blob_url = upload_bytes_to_blob(

        file_bytes,

        container_name=container_name,

        blob_name=blob_name,

        connection_string=connection_string,

        account_url=account_url,

        sas_token=sas_token,

        content_type=content_type,

        overwrite=overwrite,

        metadata=metadata,

    )

    return blob_url

# COMMAND ----------

items = list_all_items()

# COMMAND ----------

print(len(items))

# COMMAND ----------

print(items[0:3])

# COMMAND ----------

import json

print(json.dumps(items[0], indent=4))

# COMMAND ----------

from urllib.parse import urlparse, parse_qs, unquote

def after_ei_from_xml_location(url: str, *, decode: bool = True) -> str:

    """

    Extracts the substring after '/EI/' from the XmlLocation query parameter.

    Args:

        url: The full URL containing the XmlLocation query parameter.

        decode: If True, URL-decodes the result (default True).

    Returns:

        The substring after '/EI/' from XmlLocation, or an empty string if not found.

    """

    parsed = urlparse(url)

    qs = parse_qs(parsed.query)

    xml_loc_values = qs.get("XmlLocation")

    # print(f"xml_loc_values={xml_loc_values}")

    if not xml_loc_values:

        return "" # XmlLocation not present

    # Take the first XmlLocation value

    xml_loc = xml_loc_values[0]

    if decode:

        xml_loc = unquote(xml_loc)

    # print(f"xml_loc={xml_loc}")

    marker = "/EI/"

    if marker not in xml_loc:

        return "" # No /EI/ in the XmlLocation value

    return xml_loc.split(marker, 1)[1]

# COMMAND ----------

download_and_upload_to_blob(item_id=items[0]["id"], container_name="iris", blob_name="domestic/EI/" + after_ei_from_xml_location(url=items[0]['webUrl']), connection_string=connection_string)

# COMMAND ----------

max_size = max(entry["size"] for entry in items)

sum_size = sum(entry["size"] for entry in items)

len_items = len(items)

print(f"max_size={max_size}, sum_size={sum_size}, len_items={len_items}")

# COMMAND ----------

for item in items:

    try:

        download_and_upload_to_blob(item_id=item["id"], container_name="iris", blob_name="domestic/EI/" + after_ei_from_xml_location(url=item['webUrl']), connection_string=connection_string)

        print(f"{item["webUrl"]}")

    except Exception as e:

        print(f"Error: {e}, item_id={item["id"]}, item_url={item["webUrl"]}")

Reference: previous article for context: https://1drv.ms/w/c/d609fb70e39b65c8/IQBV3Sd02qPlRa_y13mxnxHSAa6mrM4rmM3pnvbWPW7RpIE?e=b1nGOi

Sample code to import sharepoint data to Azure Storage Account:

# Databricks notebook source

import os

os.environ["PROD_DSS_SHAREPOINT_CLIENT_ID"] = ""

os.environ["PROD_DSS_SHAREPOINT_CLIENT_SECRET"] = ""

os.environ["PROD_DSS_SHAREPOINT_TENANT_ID"] = ""

# COMMAND ----------

# Optional: use dbutils secrets for sensitive values

# client_secret = dbutils.secrets.get(scope="my-scope", key="sharepoint-client-secret")

# client_id = dbutils.secrets.get(scope="my-scope", key="sharepoint-client-id")

# tenant_id = dbutils.secrets.get(scope="my-scope", key="sharepoint-tenant-id")

# COMMAND ----------

import requests

from msal import ConfidentialClientApplication

from azure.identity import DefaultAzureCredential

from azure.storage.blob import BlobServiceClient

import json

import time

# === SharePoint App Registration ===

import os

client_id = os.environ["PROD_DSS_SHAREPOINT_CLIENT_ID"]

client_secret = os.environ["PROD_DSS_SHAREPOINT_CLIENT_SECRET"]

tenant_id = os.environ["PROD_DSS_SHAREPOINT_TENANT_ID"]

siteId = "site01/EI/"

listId = "Links"

authority = f"https://login.microsoftonline.com/{tenant_id}"

scope = ["https://graph.microsoft.com/.default"]

app = ConfidentialClientApplication(

    client_id,

    authority=authority,

    client_credential=client_secret

)

def get_graph_token():

    token = app.acquire_token_for_client(scopes=scope)

    return token["access_token"]

# COMMAND ----------

# === Azure Storage ===

storage_account = "someaccount01"

container_name = "container01"

credential = DefaultAzureCredential()

blob_service = BlobServiceClient(

    f"https://{storage_account}.blob.core.windows.net",

    credential=credential

)

container_client = blob_service.get_container_client(container_name)

# COMMAND ----------

checkpoint_blob = container_client.get_blob_client("_checkpoints/sharepoint_copied.json")

def load_checkpoint():

    try:

        data = checkpoint_blob.download_blob().readall()

        return set(json.loads(data))

    except Exception:

        return set()

def save_checkpoint(copied_ids):

    checkpoint_blob.upload_blob(

        json.dumps(list(copied_ids)),

        overwrite=True

    )

# COMMAND ----------

import requests

from msal import ConfidentialClientApplication

# The SharePoint host and site path from your URL

hostname = "uhgazure.sharepoint.com"

site_path = "/sites/site01/EI" # server-relative path (no trailing path/to/lists)

# MSAL app-only token

authority = f"https://login.microsoftonline.com/{tenant_id}"

app = ConfidentialClientApplication(client_id, authority=authority, client_credential=client_secret)

token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])

access_token = token.get("access_token")

headers = {"Authorization": f"Bearer {access_token}"}

# 1) Resolve site by path -> site-id

site_url = f"https://graph.microsoft.com/v1.0/sites/{hostname}:{site_path}"

r = requests.get(site_url, headers=headers)

r.raise_for_status()

site = r.json()

site_id = site["id"]

print("Site id:", site_id)

# 2) List drives (document libraries) for the site -> find drive id

drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"

r = requests.get(drives_url, headers=headers)

r.raise_for_status()

drives = r.json().get("value", [])

for d in drives:

    print("Drive name:", d["name"], "Drive id:", d["id"])

# If you know the library name, pick it:

target_library = "Documents" # or the library name you expect

drive_id = next((d["id"] for d in drives if d["name"] == target_library), None)

print("Selected drive id:", drive_id)

# COMMAND ----------

drive_id = "pick-a-value-from-above-output"

def list_all_items():

    token = get_graph_token()

    headers = {"Authorization": f"Bearer {token}"}

    url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"

    items = []

    while url:

        resp = requests.get(url, headers=headers)

        resp.raise_for_status()

        data = resp.json()

        items.extend(data.get("value", []))

        url = data.get("@odata.nextLink") # pagination

    return items

# COMMAND ----------

def download_file(item_id):

    token = get_graph_token()

    headers = {"Authorization": f"Bearer {token}"}

    url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}/content"

    resp = requests.get(url, headers=headers)

    resp.raise_for_status()

    return resp.content

# COMMAND ----------

os.environ["STORAGE_CONNECTION_STRING"]

# COMMAND ----------

connection_string = os.environ["STORAGE_CONNECTION_STRING"]

# COMMAND ----------

import os

import mimetypes

from io import BytesIO

from typing import Optional, Dict

import requests

from azure.core.exceptions import ResourceExistsError, ServiceRequestError, ClientAuthenticationError, HttpResponseError

from azure.storage.blob import (

    BlobServiceClient,

    BlobClient,

    ContentSettings

)

# ----------------------------

# Azure Blob upload helpers

# ----------------------------

def _get_blob_service_client(

    *,

    connection_string: Optional[str] = None,

    account_url: Optional[str] = None,

    sas_token: Optional[str] = None

) -> BlobServiceClient:

    """

    Create a BlobServiceClient from one of:

      - connection_string

      - account_url + sas_token (e.g., https://<acct>.blob.core.windows.net/?<sas>)

    """

    if connection_string:

        return BlobServiceClient.from_connection_string(connection_string)

    if account_url and sas_token:

        # Ensure sas_token starts with '?'

        sas = sas_token if sas_token.startswith('?') else f'?{sas_token}'

        return BlobServiceClient(account_url=account_url, credential=sas)

    raise ValueError("Provide either connection_string OR (account_url AND sas_token).")

def upload_bytes_to_blob(

    data: bytes,

    *,

    container_name: str,

    blob_name: str,

    connection_string: Optional[str] = None,

    account_url: Optional[str] = None,

    sas_token: Optional[str] = None,

    content_type: Optional[str] = None,

    overwrite: bool = False,

    metadata: Optional[Dict[str, str]] = None

) -> str:

    """

    Uploads a bytes object to Azure Blob Storage and returns the blob URL.

    Parameters:

        data (bytes): File content.

        container_name (str): Target container name.

        blob_name (str): Target blob name (e.g., 'reports/file.pdf').

        connection_string/account_url/sas_token: Auth options.

        content_type (str): MIME type; if None, guessed from blob_name.

        overwrite (bool): Replace if exists.

        metadata (dict): Optional metadata key/value pairs.

    Returns:

        str: The URL of the uploaded blob.

    """

    # Guess content type if not provided

    if content_type is None:

        guessed, _ = mimetypes.guess_type(blob_name)

        content_type = guessed or "application/octet-stream"

    # Build client

    bsc = _get_blob_service_client(

        connection_string=connection_string,

        account_url=account_url,

        sas_token=sas_token

    )

    container_client = bsc.get_container_client(container_name)

    # Ensure container exists (idempotent)

    try:

        container_client.create_container()

    except ResourceExistsError:

        pass # already exists

    blob_client: BlobClient = container_client.get_blob_client(blob_name)

    # Upload

    try:

        content_settings = ContentSettings(content_type=content_type)

        # Use a stream to be memory-friendly for large files, though we already have bytes

        stream = BytesIO(data)

        blob_client.upload_blob(

            stream,

            overwrite=overwrite,

            metadata=metadata,

            content_settings=content_settings

        )

    except ResourceExistsError:

        if not overwrite:

            raise

    except ClientAuthenticationError as e:

        raise RuntimeError(f"Authentication failed when uploading blob: {e}") from e

    except (ServiceRequestError, HttpResponseError) as e:

        raise RuntimeError(f"Blob upload failed: {e}") from e

    # Construct and return URL (works for both conn string and SAS)

    return blob_client.url

# ----------------------------

# Orchestrator

# ----------------------------

def download_and_upload_to_blob(

    *,

    item_id: str,

    container_name: str,

    blob_name: str,

    connection_string: Optional[str] = None,

    account_url: Optional[str] = None,

    sas_token: Optional[str] = None,

    content_type: Optional[str] = None,

    overwrite: bool = False,

    metadata: Optional[Dict[str, str]] = None

) -> str:

    """

    Downloads a file from Microsoft Graph using the provided item_id and uploads it to Azure Blob Storage.

    Returns the blob URL.

    """

    # 1) Download bytes from Graph

    file_bytes = download_file(item_id)

    # 2) Upload to Blob

    blob_url = upload_bytes_to_blob(

        file_bytes,

        container_name=container_name,

        blob_name=blob_name,

        connection_string=connection_string,

        account_url=account_url,

        sas_token=sas_token,

        content_type=content_type,

        overwrite=overwrite,

        metadata=metadata,

    )

    return blob_url

# COMMAND ----------

items = list_all_items()

# COMMAND ----------

print(len(items))

# COMMAND ----------

print(items[0:3])

# COMMAND ----------

import json

print(json.dumps(items[0], indent=4))

# COMMAND ----------

from urllib.parse import urlparse, parse_qs, unquote

def after_ei_from_xml_location(url: str, *, decode: bool = True) -> str:

    """

    Extracts the substring after '/EI/' from the XmlLocation query parameter.

    Args:

        url: The full URL containing the XmlLocation query parameter.

        decode: If True, URL-decodes the result (default True).

    Returns:

        The substring after '/EI/' from XmlLocation, or an empty string if not found.

    """

    parsed = urlparse(url)

    qs = parse_qs(parsed.query)

    xml_loc_values = qs.get("XmlLocation")

    # print(f"xml_loc_values={xml_loc_values}")

    if not xml_loc_values:

        return "" # XmlLocation not present

    # Take the first XmlLocation value

    xml_loc = xml_loc_values[0]

    if decode:

        xml_loc = unquote(xml_loc)

    # print(f"xml_loc={xml_loc}")

    marker = "/EI/"

    if marker not in xml_loc:

        return "" # No /EI/ in the XmlLocation value

    return xml_loc.split(marker, 1)[1]

# COMMAND ----------

download_and_upload_to_blob(item_id=items[0]["id"], container_name="iris", blob_name="domestic/EI/" + after_ei_from_xml_location(url=items[0]['webUrl']), connection_string=connection_string)

# COMMAND ----------

max_size = max(entry["size"] for entry in items)

sum_size = sum(entry["size"] for entry in items)

len_items = len(items)

print(f"max_size={max_size}, sum_size={sum_size}, len_items={len_items}")

# COMMAND ----------

for item in items:

    try:

        download_and_upload_to_blob(item_id=item["id"], container_name="iris", blob_name="domestic/EI/" + after_ei_from_xml_location(url=item['webUrl']), connection_string=connection_string)

        print(f"{item["webUrl"]}")

    except Exception as e:

        print(f"Error: {e}, item_id={item["id"]}, item_url={item["webUrl"]}")


Tuesday, March 17, 2026

When we discussed an end-to-end GitHub Actions Workflow that demonstrates the creation and usage of “Issues Dashboard” for periodic insights into the customer experience with the repository assets yesterday, we didn't bring up some of the pesky and troublesome tasks such as finding all pull request references on a GitHub issue regardless of whether they appear in comments or events of type “mentioned” or “cross-referenced” or whether are in the closure of the issue. This provides those details in the sample python implementation below.

Python now follows:


import os, re, json, datetime, requests

import hcl2

import pandas as pd


REPO = os.environ["GITHUB_REPOSITORY"]

GH_TOKEN = os.environ["GH_TOKEN"]

HEADERS = {"Authorization": f"Bearer {GH_TOKEN}", "Accept": "application/vnd.github+json, application/vnd.github.mockingbird-preview+json", "X-GitHub-Api-Version": "2026-03-10"}


# ---- Time window (last 7 days) ----

since = (datetime.datetime.utcnow() - datetime.timedelta(days=7)).isoformat() + "Z"


# ---- Helpers ----

def gh_get(url, params=None):

  r = requests.get(url, headers=HEADERS, params=params)

  r.raise_for_status()

  return r.json()


def gh_get_text(url):

  r = requests.get(url, headers=HEADERS)

  r.raise_for_status()

  return r.text


def list_closed_issues():

  # Issues API returns both issues and PRs; filter out PRs.

  url = f"https://api.github.com/repos/{REPO}/issues"

  items = gh_get(url, params={"state":"closed","since":since,"per_page":100})

  return [i for i in items if "pull_request" not in i]


PR_HTML_URL_RE = re.compile(

    r"https?://github\.com/(?P<owner>[^/\s]+)/(?P<repo>[^/\s]+)/pull/(?P<num>\d+)",

    re.IGNORECASE,

)

PR_API_URL_RE = re.compile(

    r"https?://api\.github\.com/repos/(?P<owner>[^/\s]+)/(?P<repo>[^/\s]+)/pulls/(?P<num>\d+)",

    re.IGNORECASE,

)


# Shorthand references that might appear in text:

#   - #123  (assumed to be same repo)

#   - owner/repo#123 (explicit cross-repo)

SHORTHAND_SAME_REPO_RE = re.compile(r"(?<!\w)#(?P<num>\d+)\b")

SHORTHAND_CROSS_REPO_RE = re.compile(

    r"(?P<owner>[A-Za-z0-9_.-]+)/(?P<repo>[A-Za-z0-9_.-]+)#(?P<num>\d+)\b"

)


def _normalize_html_pr_url(owner: str, repo: str, num: int) -> str:

    return f"https://github.com/{owner}/{repo}/pull/{int(num)}"


def _collect_from_text(text: str, default_owner: str, default_repo: str) -> set:

    """Extract candidate PR URLs from free text (body/comments/events text)."""

    found = set()

    if not text:

        return found

 

    # 1) Direct HTML PR URLs

    for m in PR_HTML_URL_RE.finditer(text):

        found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))


    # 2) API PR URLs -> convert to HTML

    for m in PR_API_URL_RE.finditer(text):

        found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))


    # 3) Cross-repo shorthand: owner/repo#123 (we will treat it as PR URL candidate)

    for m in SHORTHAND_CROSS_REPO_RE.finditer(text):

        found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))


    # 4) Same-repo shorthand: #123

    for m in SHORTHAND_SAME_REPO_RE.finditer(text):

        found.add(_normalize_html_pr_url(default_owner, default_repo, m.group("num")))


    return found


def _paginate_gh_get(url, headers=None, per_page=100):

    """Generator: fetch all pages until fewer than per_page are returned."""

    page = 1

    while True:

        data = gh_get(url, params={"per_page": per_page, "page": page})

        if not isinstance(data, list) or len(data) == 0:

            break

        for item in data:

            yield item

        if len(data) < per_page:

            break

        page += 1


def extract_pr_urls_from_issue(issue_number: int):

    """

    Extract PR URLs associated with an issue by scanning:

      - Issue body

      - Issue comments

      - Issue events (including 'mentioned', 'cross-referenced', etc.)

      - Issue timeline (most reliable for cross references)


    Returns a sorted list of unique, normalized HTML PR URLs.

    Requires:

      - REPO = "owner/repo"

      - gh_get(url, params=None, headers=None) is available

    """

    owner, repo = REPO.split("/", 1)

    pr_urls = set()


    # Baseline Accept header for REST v3 + timeline support.

    # The timeline historically required a preview header. Keep both for compatibility.

    base_headers = {

        "Accept": "application/vnd.github+json, application/vnd.github.mockingbird-preview+json"

    }


    # 1) Issue body

    issue_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}"

    issue = gh_get(issue_url)

    if isinstance(issue, dict):

        body = issue.get("body") or ""

        pr_urls |= _collect_from_text(body, owner, repo)


        # If this issue IS itself a PR (when called with a PR number), make sure we don't add itself erroneously

        # We won't add unless text contains it anyway; still fine.


    # 2) All comments

    comments_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/comments"

    for c in _paginate_gh_get(comments_url):

        body = c.get("body") or ""

        pr_urls |= _collect_from_text(body, owner, repo)


    # 3) Issue events (event stream can have 'mentioned', 'cross-referenced', etc.)

    events_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/events"

    for ev in _paginate_gh_get(events_url):

        # (a) Free-text fields: some events carry body/commit messages, etc.

        if isinstance(ev, dict):

            body = ev.get("body") or ""

            pr_urls |= _collect_from_text(body, owner, repo)


            # (b) Structured cross-reference (best: 'cross-referenced' events)

            #     If the source.issue has 'pull_request' key, it's a PR; use its html_url.

            if ev.get("event") == "cross-referenced":

                src = ev.get("source") or {}

                issue_obj = src.get("issue") or {}

                pr_obj = issue_obj.get("pull_request") or {}

                html_url = issue_obj.get("html_url")

                if pr_obj and html_url and "/pull/" in html_url:

                    pr_urls.add(html_url)

                # Fallback: If not marked but looks like a PR in URL

                elif html_url and "/pull/" in html_url:

                    pr_urls.add(html_url)


        # (c) Also include 'mentioned' events (broadened): inspect whatever text fields exist

        # Already covered via 'body' text extraction


    # 4) Timeline API (the most complete for references)

    timeline_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/timeline"

    for item in _paginate_gh_get(timeline_url):

        if not isinstance(item, dict):

            continue


        # Free-text scan on any plausible string field

        for key in ("body", "message", "title", "commit_message", "subject"):

            val = item.get(key)

            if isinstance(val, str):

                pr_urls |= _collect_from_text(val, owner, repo)


        # Structured cross-reference payloads

        if item.get("event") == "cross-referenced":

            src = item.get("source") or {}

            issue_obj = src.get("issue") or {}

            pr_obj = issue_obj.get("pull_request") or {}

            html_url = issue_obj.get("html_url")

            if pr_obj and html_url and "/pull/" in html_url:

                pr_urls.add(html_url)

            elif html_url and "/pull/" in html_url:

                pr_urls.add(html_url)


        # Some timeline items are themselves issues/PRs with html_url

        html_url = item.get("html_url")

        if isinstance(html_url, str) and "/pull/" in html_url:

            pr_urls.add(html_url)


        # Occasionally the timeline includes API-style URLs

        api_url = item.get("url")

        if isinstance(api_url, str):

            m = PR_API_URL_RE.search(api_url)

            if m:

                pr_urls.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))


    # Final normalization: keep only HTML PR URLs and sort

    pr_urls = {m.group(0) for url in pr_urls for m in [PR_HTML_URL_RE.search(url)] if m}

    return sorted(pr_urls)


def pr_number_from_url(u):

  m = re.search(r"/pull/(\d+)", u)

  return int(m.group(1)) if m else None

#codingexercise: CodingExercise-03-17-2026.pdf

Monday, March 16, 2026

 This is a summary of the book titled “Creative Machines: AI, Art & Us” written by Maya Ackerman and published by Wiley, 2025. This book explores how artificial intelligence can deepen, rather than diminish, human creativity. Long before AI became a mainstream topic, Ackerman—a computer science professor and lifelong musician—was already investigating how machines might inspire people to become better creators. Drawing on both academic research and her experience as the CEO of a generative AI company, she argues that the most meaningful creative technologies are not those that replace human effort, but those that work alongside people as supportive collaborators. Throughout the book, she introduces the idea of “humble creative machines,” tools designed to empower users, foster growth, and keep humans at the center of the creative process.

Ackerman grounds her argument in a clear definition of creativity. Creativity, she explains, is best understood through its products—objects or experiences that are both novel and valuable. Novelty excludes mere variations on what already exists, while value implies intention and purpose. A creative work may be valuable because it is useful, or because it provides enjoyment, meaning, or emotional resonance, as art and music do. As Ackerman puts it, creativity is not simply about being first, but about making something that matters. Importantly, this definition does not restrict creativity to humans. Many animals and even plants exhibit creative behaviors, such as the bowerbird’s carefully constructed displays made from colorful objects to attract mates. By focusing on novelty and value rather than on the identity of the creator, Ackerman opens the door to recognizing how machines, too, can participate in creative processes.

She cautions, however, that machine creativity does not need to look like human creativity. One common way of evaluating creative machines is the Discrimination Test, which asks whether people can distinguish machine-generated works from those made by humans. Ackerman recounts a 1997 experiment in which a computer program composed a piece of music in the style of Bach, and most listeners—including trained musicians—mistook it for the original composer’s work. While this test is useful because it forces audiences to judge the artifact rather than the creator, Ackerman notes its limitations. Comparing machines to humans risks overlooking the fact that machines can be creative in their own right. For ethical and intellectual property reasons, she argues that AI should not imitate living artists. Instead, creative machines should be treated as entirely new entities—tools that expand human potential rather than mimic it.

Creativity depends on the balance between divergent and convergent thinking. Divergent thinking involves curiosity, exploration, and the willingness to generate many ideas, including strange or impractical ones. It is essential for innovation and often benefits from stepping away from problems and returning to them with fresh perspectives. Convergent thinking, by contrast, is the process of refining ideas, making decisions, and turning possibilities into finished work. Ackerman emphasizes that creators must move back and forth between these modes and resist judging ideas too early. Great creative output, she explains, often comes from producing a large volume of work, much of which will never be seen. Picasso’s thousands of artworks serve as an example of how brilliance emerges from persistence rather than perfection. This same principle applies to machines: if AI systems are limited to producing only the most likely or “correct” answers, they lose their ability to help humans discover something genuinely new.

Ackerman encourages readers to think of creative machines as collaborators rather than replacements. She compares working with AI to collaborating with a talented friend. The value lies not in having the work done for you, but in being supported, challenged, and inspired as you develop your own ideas. Many creative AI tools, she observes, focus on showing off impressive results without inviting users into the process. The most impactful tools, by contrast, enable co-creativity, where humans and machines interact dynamically to shape a final outcome. Used in this way, AI becomes a partner with unique strengths—always available, endlessly patient, and capable of offering fresh perspectives.

Her own journey illustrates this approach. After years of frustration with songwriting, Ackerman collaborated with an AI system called ALYSIA, which generated melodies based on lyrics. Rather than replacing her creativity, the tool sparked new ideas and helped her break out of repetitive patterns. Over time, working with AI gave her the confidence to compose independently. This experience informed her advocacy for “humble creative machines,” a concept she helped formalize with other researchers. These tools are flexible, allowing users to decide how much assistance they want; supportive, acting as coaches rather than crutches; genuinely creative, offering novel suggestions; and easy to use, fitting seamlessly into existing workflows. When users approach AI as a responsive tool rather than a performer meant to impress, the quality of the work improves and ownership remains with the human creator.

Ackerman points to conversational AI tools such as ChatGPT as practical examples of this philosophy in action. These systems adapt to user input, encourage iteration, and leave room for human judgment and editing. However, she stresses that whether an AI functions as a humble creative machine depends as much on the user’s mindset as on the technology itself. When people allow AI to do all the work, they miss valuable learning opportunities. When they engage with it thoughtfully—using feedback, critique, and iteration—they develop stronger creative skills over time.

The book also addresses the business and design implications of creative AI. Tools should not aim to retain users by fostering dependence or addiction. Instead, they should support long-term creative growth, even as users become more skilled. Ackerman describes how her own company learned this lesson when an early version of their songwriting app failed to gain traction because it tried to do too much. By refocusing on a simpler tool that offered suggestions rather than complete solutions, they attracted more users who returned consistently. Sustainable success, she argues, comes from empowering people rather than dazzling them once.

Ackerman does not shy away from the ethical challenges of creative machines, particularly the issue of bias. Because AI systems are trained on human-generated data, they inevitably reflect human stereotypes and blind spots. Studies have shown that image generators often reproduce racial, gender, and cultural biases, misrepresenting marginalized groups and oversimplifying non-Western cultures. These failures do not reflect machine intent, but rather the limitations of the data on which they are trained. Addressing this problem requires more diverse voices in AI development and research, as well as deliberate efforts to improve datasets and representations. Projects like narrative-generating systems designed to preserve Indigenous stories demonstrate how creative machines can also be used to counter erasure and support cultural understanding.

Ackerman argues that creative machines hold up a mirror to humanity. By exposing our biases, assumptions, and unexamined patterns, AI offers an opportunity for deeper self-reflection. Drawing on ideas from psychology, she suggests that confronting what machines reveal about us—both the negative and the positive—can lead to meaningful change. Technology alone cannot fix human problems, but it can help make them visible. When used thoughtfully, creative machines can support not only better art and innovation, but also greater awareness, responsibility, and human flourishing.


Sunday, March 15, 2026

 Transient and transit object detection in aerial drone images:

Introduction:

The increasing availability of high-resolution aerial imagery from unmanned aerial vehicles (UAVs) presents a unique opportunity for time-domain object detection. Unlike traditional satellite imagery, UAVs offer flexible sampling rates, dynamic perspectives, and real-time responsiveness. However, the irregular cadence and noise inherent in aerial sequences pose challenges for conventional object detection pipelines, especially when attempting to identify transient or fleeting objects such as pedestrians, vehicles, or small mobile assets.

Machine learning techniques have become indispensable in aerial image analysis, particularly in large datasets where manual annotation is infeasible. Convolutional neural networks (CNNs) have been widely adopted for static object detection, but their performance degrades when applied to temporally sparse or noisy sequences. Prior work has explored phase-folding and frame-by-frame tracking, but these methods are computationally expensive and sensitive to sampling irregularities.

This article introduces DroneWorldNet, a frequency-domain model that bypasses the limitations of traditional tracking by transforming image clip vectors into frequency-domain tensors. DroneWorldNet applies discrete wavelet transform (DWT) to suppress noise and highlight localized changes, followed by FEFT to extract periodic and harmonic features across time. These tensors are then classified into one of four object states: null (no object), transient (brief appearance), stable (persistent presence), or transit (periodic occlusion or movement).

We apply DroneWorldNet to the DOTA dataset, which contains annotated aerial scenes from diverse environments. Each image clip is treated as a temporal stack, and the model is trained on both real and synthetic sequences to ensure robustness across lighting, altitude, and occlusion conditions. The pipeline includes spatial clustering, data normalization, and tensor construction, followed by classification using CNN and fully connected layers.

DroneWorldNet achieves subsecond inference latency and high classification accuracy, demonstrating its suitability for real-time deployment in edge-cloud UAV systems. This work lays the foundation for a full-scale variability survey of aerial scenes and opens new avenues for time-domain analysis in geospatial workflows.

Data Preprocessing:

Each image clip sequence is preprocessed to construct a high-quality, neural-network-friendly representation. For each frame, we extract three features: normalized brightness, estimated uncertainty (e.g., motion blur or sensor noise), and timestamp. Brightness values are converted from log scale to linear flux using calibration constants derived from the UAV sensor specifications. We then subtract the median and standardize using the interquartile range (IQR), followed by compression into the [-1, 1] range using the arcsinh function.

Time values are normalized to [0, 1] based on the total observation window, typically spanning 10–30 seconds. Uncertainty values are similarly rescaled and compressed to match the flux scale. The final input tensor for each sequence is a matrix of shape T × 3, where T is the number of frames, and each row contains brightness, uncertainty, and timestamp.

This representation ensures that DroneWorldNet can handle sequences of varying length and sampling rate, a critical requirement for aerial deployments where cadence may fluctuate due to flight path, altitude, or environmental conditions.

DroneWorldNet model:

DroneWorldNet is a hybrid signal-processing and deep learning model designed to classify aerial image sequences into four object states: null, transient, stable, and transit. The model architecture integrates three core components:

Wavelet Decomposition: A one-dimensional discrete wavelet transform (DWT) is applied to the brightness vector to suppress noise and highlight localized changes. This is particularly effective in identifying transient objects that appear briefly and then vanish.

Finite-Embedding Fourier Transform (FEFT): A modified discrete Fourier transform is applied to the time series to extract periodic and harmonic features. FEFT enables detection of transit-like behavior, such as vehicles passing through occluded regions or pedestrians crossing paths.

Convolutional Neural Network (CNN): The frequency-domain tensor is passed through a series of convolutional and fully connected layers, which learn to discriminate between the four object states. The model is trained using a categorical cross-entropy loss function and optimized with Adam.

Training and Evaluation:

To train DroneWorldNet, we generate synthetic aerial sequences using motion simulation models that replicate pedestrian and vehicle dynamics under varying conditions. These include changes in lighting, altitude, occlusion, and background texture. Synthetic sequences are blended with real samples from the DOTA dataset to ensure generalization across diverse environments.

The model is trained on a four-class scheme: null (no object), transient (brief appearance), stable (persistent presence), and transit (periodic occlusion or movement). On a held-out validation set, DroneWorldNet achieves an F1 score of 0.89, with precision and recall exceeding 0.90 for stable and transit classes. Transient detection remains challenging due to low signal-to-noise ratio, but wavelet decomposition significantly improves sensitivity.


Saturday, March 14, 2026

 This is a summary of the book titled “There’s Got to Be a Better Way: How to Deliver Results and Get Rid of the Stuff That Gets in the Way of Real Work” written by Nelson P. Repenning and Donald C. Kieffer and published by Basic Books in 2025. This book explores why so many modern organizations feel overwhelmed, inefficient, and perpetually behind despite having talented people and advanced technologies. Drawing on decades of research and practical experience, the authors argue that the root of the problem is not a lack of effort or intelligence, but the persistence of outdated approaches to designing work. Many organizations still rely on management ideas inherited from Fordist and Taylorist traditions, which assume that work can be planned in advance, broken into static parts, and controlled through rigid rules. In a world characterized by constant change, uncertainty, and complexity, these assumptions no longer hold. Repenning and Kieffer propose an alternative they call dynamic work design, a way of organizing work that builds learning, adaptability, and responsiveness directly into everyday operations.

 

The authors begin by observing that poorly designed workflow systems often create chaos rather than control. Managers become exhausted, employees feel frustrated, and inefficiencies multiply as organizations try to impose static structures on dynamic realities. To illustrate this mismatch, the book compares traditional workflow systems to early GPS technology that blindly followed preprogrammed routes and steered drivers into traffic jams. Dynamic work design, by contrast, resembles modern GPS systems that continuously gather data from the environment and adjust in real time. In the same way, dynamic organizations constantly draw information from their work systems so people can respond quickly and intelligently as conditions change.

 

When leaders face complexity, rapid growth, or ongoing disruption, they often fall into what the authors call the “firefighting trap.” Instead of understanding how work is actually flowing through the organization, leaders focus on urgent problems and short-term fixes. Over time, this reactive behavior can turn them into “firefighting arsonists,” as well-intentioned decisions unintentionally worsen the underlying issues. Rules proliferate, processes become more burdensome, and employees push back. Dynamic work design offers a way out of this downward spiral by helping leaders maintain visibility into how work is happening while still addressing immediate challenges.

 

A central idea in the book is that dynamic work design is not a rigid framework or a one-size-fits-all solution. Instead, it is guided by a small set of powerful principles that can be applied flexibly, depending on the situation. These principles emphasize solving the right problems, structuring work to enable discovery, connecting people effectively across the workflow, regulating work to avoid overload, and making work visible through visual management. Together, they form a system that keeps organizations focused on creating value for customers while continuously improving how work gets done.

 

The authors stress the importance of choosing the right problems to tackle. Rather than launching large-scale transformations, dynamic work design encourages leaders to focus on small but important problems that a team of six to eight people can meaningfully address within 30 to 60 days. Although this approach may feel unsatisfying to leaders accustomed to sweeping initiatives, starting small accelerates learning and builds confidence. Clear problem statements help teams understand the gap between current performance and desired outcomes, and seemingly minor frustrations often turn out to be major drains on productivity.

 

To support this problem-solving approach, the book emphasizes adopting a discovery mindset. Leaders and teams are encouraged to set aside familiar solutions and resist the temptation to assume they already know the root cause of an issue. Expertise, the authors warn, can sometimes obscure simple answers. By experimenting with small improvements, testing ideas before scaling them, and postponing major technology investments until the problem is well understood, organizations can reduce risk while learning faster. Successful solutions tend to spread organically as people see tangible improvements in their work.

 

Learning, in this model, is not something that happens in classrooms or training programs but is embedded directly into workflows. The authors argue that no learning budget is large enough to compensate for a poorly designed system. Instead, learning is optimized by setting clear targets, defining specific actions, and integrating feedback so teams can see whether their efforts are producing results. Regular, up-to-date data allows people to identify obstacles, adjust their approach, and continuously improve.

 

Another key element of dynamic work design is ensuring that people are connected in ways that allow work to flow smoothly from one step to the next. Confusion often arises when handoffs are poorly defined or when managers rely on informal networks instead of clear processes. The book distinguishes between huddles, which are face-to-face conversations used to resolve ambiguity or make decisions, and handoffs, which transfer work without discussion when expectations are already clear. Managers play a critical role in huddles, using them to monitor progress, solve problems, and coach their teams. Well-designed huddles and handoffs prevent bottlenecks and keep the human chain connected.

 

The authors also challenge the widespread belief that keeping people busy maximizes productivity. Research shows that overloaded teams experience more stress, make more mistakes, and learn less. Dynamic work design aims for optimal challenge, where people are stretched just enough to grow without becoming overwhelmed. By aligning priorities, limiting work in progress, and monitoring flow through metrics like cycle time, leaders can prevent congestion and identify bottlenecks before they cause delays.

 

Visual management plays a crucial role in making work understandable and actionable. Because humans process visual information so efficiently, visual systems help teams see their work as a coherent whole rather than a disconnected set of tasks. Simple tools like problem boards can reveal the status of work, clarify responsibilities, and surface issues early. The authors caution against overengineering these systems, noting that their real value lies in the conversations they provoke. Messy boards often signal deeper problems and should be treated as opportunities for learning rather than failures.

 

Friday, March 13, 2026

 This extends the same workflow we discussed in the previous 3 articles to include the following but for pull-requests:

• Heatmaps use the same issue‑to‑type matrix

• Trend lines use the same CSVs once history is accumulated

• Dependency graphs using the already implemented module‑source parsing

The above three are typical of custom GitHub dashboards and are well-known for being eye-candies.

Yaml now follows:

---

name: Weekly Terraform PR analytics with AI summary and advanced visuals

on:

  workflow_dispatch:

    inputs:

      window_days:

        description: "Number of days back to collect closed PRs (integer)"

        required: false

        default: "7"

  schedule:

    - cron: "0 14 * * MON" # 6am Pacific, staggered from issue workflow

permissions:

  contents: write

  pull-requests: read

  issues: read

jobs:

  pr_report:

    runs-on: ubuntu-latest

steps:

  - name: Check out repository

    uses: actions/checkout@v4

    with:

      fetch-depth: 0

  - name: Set up Python

    uses: actions/setup-python@v5

    with:

      python-version: "3.11"

  - name: Install dependencies

    run: |

      python -m pip install --upgrade pip

      pip install requests pandas matplotlib seaborn networkx python-hcl2

  - name: Prepare environment variables

    run: |

      echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV

      echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV

  ###########################################################################

  # 1. FETCH CLOSED PRs WITHIN WINDOW

  ###########################################################################

  - name: Fetch closed PRs (window)

    id: fetch

    env:

      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      REPO: ${{ env.REPO }}

      WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

    run: |

      python << 'PY'

      import os, json, datetime, requests

      REPO = os.environ["REPO"]

      TOKEN = os.environ["GH_TOKEN"]

      WINDOW_DAYS = int(os.environ["WINDOW_DAYS"])

      HEADERS = {"Authorization": f"Bearer {TOKEN}"}

      since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"

      def gh(url, params=None):

        r = requests.get(url, headers=HEADERS, params=params)

        r.raise_for_status()

        return r.json()

      prs = []

      page = 1

      while True:

        batch = gh(

          f"https://api.github.com/repos/{REPO}/pulls",

          {"state":"closed","per_page":100,"page":page}

        )

        if not batch:

          break

        for pr in batch:

          if pr.get("merged_at") and pr["merged_at"] >= since:

            prs.append({

              "number": pr["number"],

              "title": pr["title"],

              "user": pr["user"]["login"],

              "created_at": pr["created_at"],

              "merged_at": pr["merged_at"],

              "html_url": pr["html_url"]

            })

        page += 1

      with open("prs.json","w") as f:

        json.dump(prs, f, indent=2)

      print(f"FOUND_PRS={len(prs)}")

      PY

  ###########################################################################

  # 2. ANALYZE PRs: MODULES, AZURERM TYPES, TURNAROUND, REVIEW LATENCY

  ###########################################################################

  - name: Analyze PRs for Terraform impact

    id: analyze

    env:

      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      REPO: ${{ env.REPO }}

    run: |

      python << 'PY'

      import os, json, re, requests, subprocess

      import hcl2

      from datetime import datetime

      REPO = os.environ["REPO"]

      TOKEN = os.environ["GH_TOKEN"]

      HEADERS = {"Authorization": f"Bearer {TOKEN}"}

      def gh(url, params=None):

        r = requests.get(url, headers=HEADERS, params=params)

        r.raise_for_status()

        return r.json()

      def gh_text(url):

        r = requests.get(url, headers=HEADERS)

        r.raise_for_status()

        return r.text

      def list_pr_files(pr):

        files = []

        page = 1

        while True:

          batch = gh(

            f"https://api.github.com/repos/{REPO}/pulls/{pr}/files",

            {"per_page":100,"page":page}

          )

          if not batch:

            break

          files.extend(batch)

          page += 1

        return files

      def get_pr_reviews(pr):

        return gh(f"https://api.github.com/repos/{REPO}/pulls/{pr}/reviews")

      def get_file_at_sha(path, sha):

        r = requests.get(

          f"https://api.github.com/repos/{REPO}/contents/{path}",

          headers=HEADERS,

          params={"ref": sha}

        )

        if r.status_code == 404:

          return None

        data = r.json()

        if data.get("download_url"):

          return gh_text(data["download_url"])

        return None

      def parse_azurerm(tf):

        try:

          obj = hcl2.loads(tf)

        except:

          return set()

        out = set()

        res = obj.get("resource", {})

        if isinstance(res, dict):

          for rtype in res.keys():

            if rtype.startswith("azurerm_"):

              out.add(rtype)

        return out

      def parse_modules(tf):

        try:

          obj = hcl2.loads(tf)

        except:

          return set()

        out = set()

        mods = obj.get("module", {})

        if isinstance(mods, dict):

          for _, body in mods.items():

            src = body.get("source")

            if isinstance(src, str):

              out.add(src)

        return out

      def normalize_local(src, app):

        if src.startswith("./") or src.startswith("../"):

          import posixpath

          return posixpath.normpath(posixpath.join(app, src))

        return None

      def list_tf(dir, sha):

        try:

          out = subprocess.check_output(

            ["git","ls-tree","-r","--name-only",sha,dir],

            text=True

          )

          return [p for p in out.splitlines() if p.endswith(".tf")]

        except:

          return []

      with open("prs.json") as f:

        prs = json.load(f)

      pr_to_types = {}

      pr_turnaround = {}

      pr_review_latency = {}

      module_deps = {}

      for pr in prs:

        num = pr["number"]

        created = pr["created_at"]

        merged = pr["merged_at"]

        fmt = "%Y-%m-%dT%H:%M:%SZ"

        dtc = datetime.strptime(created, fmt)

        dtm = datetime.strptime(merged, fmt)

        pr_turnaround[num] = (dtm - dtc).total_seconds() / 86400.0

        reviews = get_pr_reviews(num)

        if reviews:

          first = min(r["submitted_at"] for r in reviews if r.get("submitted_at"))

          pr_review_latency[num] = (

            datetime.strptime(first, fmt) - dtc

          ).total_seconds() / 86400.0

        else:

          pr_review_latency[num] = None

        files = list_pr_files(num)

        sha = gh(f"https://api.github.com/repos/{REPO}/pulls/{num}")["head"]["sha"]

        touched_apps = set()

        for f in files:

          path = f["filename"]

          if path.startswith("workload/"):

            parts = path.split("/")

            if len(parts) >= 2:

              touched_apps.add("/".join(parts[:2]))

        types = set()

        for app in touched_apps:

          tf_paths = list_tf(app, sha)

          for p in tf_paths:

            txt = get_file_at_sha(p, sha)

            if not txt:

              continue

            types |= parse_azurerm(txt)

            for src in parse_modules(txt):

              local = normalize_local(src, app)

              if local:

                module_deps.setdefault(app, set()).add(local)

        if types:

          pr_to_types[num] = sorted(types)

      import pandas as pd

      rows = []

      for pr, types in pr_to_types.items():

        for t in types:

          rows.append({"pr": pr, "azurerm_type": t})

      pd.DataFrame(rows).to_csv("pr_severity_data.csv", index=False)

      pd.DataFrame(

        [{"pr":k,"turnaround_days":v} for k,v in pr_turnaround.items()]

      ).to_csv("pr_turnaround.csv", index=False)

      pd.DataFrame(

        [{"pr":k,"review_latency_days":v} for k,v in pr_review_latency.items()]

      ).to_csv("pr_review_latency.csv", index=False)

      with open("pr_module_deps.json","w") as f:

        json.dump({k: sorted(list(v)) for k,v in module_deps.items()}, f, indent=2)

      PY

  ###########################################################################

  # 3. GENERATE CHARTS (heatmap, trend, dependency, turnaround, review latency)

  ###########################################################################

  - name: Generate PR charts and markdown

    id: charts

    env:

      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

    run: |

      python << 'PY'

      import os, json, datetime, glob

      import pandas as pd

      import matplotlib.pyplot as plt

      import seaborn as sns

      import networkx as nx

      ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")

      os.makedirs("history/pr", exist_ok=True)

      # Load severity data

      if os.path.exists("pr_severity_data.csv"):

        df = pd.read_csv("pr_severity_data.csv")

        counts = df.groupby("azurerm_type")["pr"].nunique().sort_values(ascending=False)

      else:

        counts = pd.Series(dtype=int)

      # Severity bar

      sev_png = f"history/pr/pr-severity-{ts}.png"

      plt.figure(figsize=(12,6))

      if not counts.empty:

        counts.plot(kind="bar")

        plt.title("PR frequency by azurerm resource type")

        plt.xlabel("azurerm resource type")

        plt.ylabel("number of PRs touching type")

      else:

        plt.text(0.5,0.5,"No PR data",ha="center",va="center")

        plt.axis("off")

      plt.tight_layout()

      plt.savefig(sev_png)

      plt.clf()

      # Heatmap

      heat_png = f"history/pr/pr-heatmap-{ts}.png"

      if os.path.exists("pr_severity_data.csv"):

        mat = pd.read_csv("pr_severity_data.csv")

        if not mat.empty:

          pivot = mat.pivot_table(index="azurerm_type", columns="pr", aggfunc='size', fill_value=0)

          pivot['total'] = pivot.sum(axis=1)

          pivot = pivot.sort_values('total', ascending=False).drop(columns=['total'])

          if pivot.shape[1] > 100:

            pivot = pivot.iloc[:, :100]

          plt.figure(figsize=(14, max(6, 0.2*pivot.shape[0])))

          sns.heatmap(pivot, cmap="YlGnBu")

          plt.title("Heatmap: azurerm types vs PRs")

          plt.tight_layout()

          plt.savefig(heat_png)

          plt.clf()

      # Trend lines

      trend_png = f"history/pr/pr-trend-{ts}.png"

      hist_files = glob.glob("history/pr/pr-severity-*.csv")

      if os.path.exists("pr_severity_data.csv"):

        hist_files.append("pr_severity_data.csv")

      trend_df = pd.DataFrame()

      for f in hist_files:

        try:

          import re

          m = re.search(r"(\d{8}-\d{6})", f)

          label = m.group(1) if m else os.path.getmtime(f)

          tmp = pd.read_csv(f)

          if tmp.empty:

            continue

          counts_tmp = tmp.groupby("azurerm_type")["pr"].nunique().rename(label)

          trend_df = pd.concat([trend_df, counts_tmp], axis=1)

        except:

          pass

      if not trend_df.empty:

        trend_df = trend_df.fillna(0).T

        try:

          trend_df.index = pd.to_datetime(trend_df.index, format="%Y%m%d-%H%M%S", errors='coerce')

        except:

          pass

        plt.figure(figsize=(14,6))

        latest = trend_df.iloc[-1].sort_values(ascending=False).head(8).index.tolist()

        for col in latest:

          plt.plot(trend_df.index, trend_df[col], marker='o', label=col)

        plt.legend()

        plt.title("Trend lines: PR frequency over time")

        plt.tight_layout()

        plt.savefig(trend_png)

        plt.clf()

      # Dependency graph

      dep_png = f"history/pr/pr-deps-{ts}.png"

      if os.path.exists("pr_module_deps.json"):

        with open("pr_module_deps.json") as f:

          deps = json.load(f)

        G = nx.DiGraph()

        for app, mods in deps.items():

          G.add_node(app, type='app')

          for m in mods:

            G.add_node(m, type='module')

            G.add_edge(app, m)

        plt.figure(figsize=(12,8))

        pos = nx.spring_layout(G, k=0.5)

        colors = ['#1f78b4' if G.nodes[n]['type']=='app' else '#33a02c' for n in G.nodes()]

        nx.draw(G, pos, with_labels=True, node_color=colors, node_size=600, arrows=True)

        plt.title("PR module dependency graph")

        plt.tight_layout()

        plt.savefig(dep_png)

        plt.clf()

      # Turnaround chart

      ta_png = f"history/pr/pr-turnaround-{ts}.png"

      if os.path.exists("pr_turnaround.csv"):

        ta = pd.read_csv("pr_turnaround.csv").dropna()

        ta = ta.sort_values("turnaround_days", ascending=False).head(50)

        plt.figure(figsize=(12,6))

        plt.bar(ta["pr"].astype(str), ta["turnaround_days"])

        plt.xticks(rotation=90)

        plt.title("PR turnaround time (days)")

        plt.tight_layout()

        plt.savefig(ta_png)

        plt.clf()

      # Review latency chart

      rl_png = f"history/pr/pr-review-latency-{ts}.png"

      if os.path.exists("pr_review_latency.csv"):

        rl = pd.read_csv("pr_review_latency.csv").dropna()

        rl = rl.sort_values("review_latency_days", ascending=False).head(50)

        plt.figure(figsize=(12,6))

        plt.bar(rl["pr"].astype(str), rl["review_latency_days"])

        plt.xticks(rotation=90)

        plt.title("PR review latency (days)")

        plt.tight_layout()

        plt.savefig(rl_png)

        plt.clf()

      # AI summary

      if os.path.exists("prs.json"):

        prs = json.load(open("prs.json"))

      else:

        prs = []

      condensed = [

        {"pr": p["number"], "user": p["user"], "title": p["title"], "url": p["html_url"]}

        for p in prs

      ]

      ai_text = "AI summary skipped."

      if os.environ.get("OPENAI_API_KEY"):

        import subprocess

        prompt = (

          "Summarize each PR as a one-line 'who changed what' statement. "

          "Format: '#<pr> — <user> changed <succinct summary>'."

        )

        payload = {

          "model": "gpt-4o-mini",

          "messages": [

            {"role":"system","content":"You summarize PRs concisely."},

            {"role":"user","content": prompt + "\n\n" + json.dumps(condensed)[:15000]}

          ],

          "temperature":0.2

        }

        proc = subprocess.run(

          ["curl","-sS","https://api.openai.com/v1/chat/completions",

           "-H","Content-Type: application/json",

           "-H",f"Authorization: Bearer {os.environ['OPENAI_API_KEY']}",

           "-d",json.dumps(payload)],

          capture_output=True, text=True

        )

        try:

          ai_text = json.loads(proc.stdout)["choices"][0]["message"]["content"]

        except:

          pass

          # Markdown report

          md_path = f"history/pr/pr-report-{ts}.md"

          with open(md_path, "w") as f:

            f.write("# Weekly Terraform PR analytics report\n\n")

            f.write(f"**Window (days):** {os.environ['WINDOW_DAYS']}\n\n")

            f.write("## AI Summary (who changed what)\n\n```\n")

            f.write(ai_text + "\n```\n\n")

            f.write("## PR frequency by azurerm type\n\n")

            f.write(f"![]({os.path.basename(sev_png)})\n\n")

            f.write("## Heatmap: azurerm types vs PRs\n\n")

            f.write(f"![]({os.path.basename(heat_png)})\n\n")

            f.write("## Trend lines\n\n")

            f.write(f"![]({os.path.basename(trend_png)})\n\n")

            f.write("## Dependency graph\n\n")

            f.write(f"![]({os.path.basename(dep_png)})\n\n")

            f.write("## PR turnaround time\n\n")

            f.write(f"![]({os.path.basename(ta_png)})\n\n")

            f.write("## PR review latency\n\n")

            f.write(f"![]({os.path.basename(rl_png)})\n\n")

            f.write("## Data artifacts\n\n")

            f.write("- `pr_severity_data.csv` — per-PR azurerm type mapping\n")

            f.write("- `pr_turnaround.csv` — per-PR turnaround in days\n")

            f.write("- `pr_review_latency.csv` — per-PR review latency in days\n")

            f.write("- `pr_module_deps.json` — module dependency data used for graph\n")


          # Save CSVs into history for future trend aggregation

          import shutil

          if os.path.exists("pr_severity_data.csv"):

            shutil.copy("pr_severity_data.csv", f"history/pr/pr-severity-{ts}.csv")

          if os.path.exists("pr_turnaround.csv"):

            shutil.copy("pr_turnaround.csv", f"history/pr/pr-turnaround-{ts}.csv")

          if os.path.exists("pr_review_latency.csv"):

            shutil.copy("pr_review_latency.csv", f"history/pr/pr-review-latency-{ts}.csv")


          print(f"PR_REPORT_MD={md_path}")

          print(f"PR_REPORT_SEV={sev_png}")

          print(f"PR_REPORT_HEAT={heat_png}")

          print(f"PR_REPORT_TREND={trend_png}")

          print(f"PR_REPORT_DEP={dep_png}")

          print(f"PR_REPORT_TA={ta_png}")

          print(f"PR_REPORT_RL={rl_png}")

          PY


      ###########################################################################

      # 4. CREATE PR WITH REPORT AND PRUNE HISTORY

      ###########################################################################

      - name: Create PR for PR analytics report

        id: create_pr

        uses: peter-evans/create-pull-request@v6

        with:

          commit-message: "Add weekly Terraform PR analytics report and visuals (prune to last 10)"

          title: "Weekly Terraform PR analytics report"

          body: |

            This PR adds the latest weekly PR analytics report and charts under `history/pr/`.

            The workflow prunes older reports to keep at most 10 report sets.

          branch: "weekly-terraform-pr-analytics"

          base: "main"

          path: "history/pr"


      - name: Prune PR history to max 10 report sets

        if: steps.create_pr.outcome == 'success'

        run: |

          python << 'PY'

          import re

          from pathlib import Path


          hist = Path("history/pr")

          hist.mkdir(parents=True, exist_ok=True)


          groups = {}

          for p in hist.iterdir():

            m = re.search(r"(\d{8}-\d{6})", p.name)

            if not m:

              continue

            ts = m.group(1)

            groups.setdefault(ts, []).append(p)


          timestamps = sorted(groups.keys(), reverse=True)

          keep = set(timestamps[:10])

          drop = [p for ts, files in groups.items() if ts not in keep for p in files]


          for p in drop:

            try:

              p.unlink()

            except Exception:

              pass


          print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")

          PY


      ###########################################################################

      # 5. NOTIFY RUNBOOK WEBHOOK (OPTIONAL EMAIL VIA AZ COMMUNICATION)

      ###########################################################################

      - name: Notify runbook webhook for PR analytics

        if: steps.create_pr.outcome == 'success'

        env:

          RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}

          PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}

          WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

        run: |

          payload=$(jq -n \

            --arg pr "$PR_URL" \

            --arg window "$WINDOW_DAYS" \

            '{subject: ("Weekly Terraform PR analytics report - " + $window + "d"), body: ("A new weekly PR analytics report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')

          curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \

            -H "Content-Type: application/json" \

            -d "$payload"


      - name: Output PR analytics artifacts

        if: always()

        run: |

          echo "Generated files in history/pr/:"

          ls -la history/pr || true