Cluster computing

Tuesday, March 17, 2026

When we discussed an end-to-end GitHub Actions Workflow that demonstrates the creation and usage of “Issues Dashboard” for periodic insights into the customer experience with the repository assets yesterday, we didn't bring up some of the pesky and troublesome tasks such as finding all pull request references on a GitHub issue regardless of whether they appear in comments or events of type “mentioned” or “cross-referenced” or whether are in the closure of the issue. This provides those details in the sample python implementation below.

Python now follows:

import os, re, json, datetime, requests

import hcl2

import pandas as pd

REPO = os.environ["GITHUB_REPOSITORY"]

GH_TOKEN = os.environ["GH_TOKEN"]

HEADERS = {"Authorization": f"Bearer {GH_TOKEN}", "Accept": "application/vnd.github+json, application/vnd.github.mockingbird-preview+json", "X-GitHub-Api-Version": "2026-03-10"}

# ---- Time window (last 7 days) ----

since = (datetime.datetime.utcnow() - datetime.timedelta(days=7)).isoformat() + "Z"

# ---- Helpers ----

def gh_get(url, params=None):

r = requests.get(url, headers=HEADERS, params=params)

r.raise_for_status()

return r.json()

def gh_get_text(url):

r = requests.get(url, headers=HEADERS)

r.raise_for_status()

return r.text

def list_closed_issues():

# Issues API returns both issues and PRs; filter out PRs.

url = f"https://api.github.com/repos/{REPO}/issues"

items = gh_get(url, params={"state":"closed","since":since,"per_page":100})

return [i for i in items if "pull_request" not in i]

PR_HTML_URL_RE = re.compile(

r"https?://github\.com/(?P<owner>[^/\s]+)/(?P<repo>[^/\s]+)/pull/(?P<num>\d+)",

re.IGNORECASE,

)

PR_API_URL_RE = re.compile(

r"https?://api\.github\.com/repos/(?P<owner>[^/\s]+)/(?P<repo>[^/\s]+)/pulls/(?P<num>\d+)",

re.IGNORECASE,

)

# Shorthand references that might appear in text:

# - #123 (assumed to be same repo)

# - owner/repo#123 (explicit cross-repo)

SHORTHAND_SAME_REPO_RE = re.compile(r"(?<!\w)#(?P<num>\d+)\b")

SHORTHAND_CROSS_REPO_RE = re.compile(

r"(?P<owner>[A-Za-z0-9_.-]+)/(?P<repo>[A-Za-z0-9_.-]+)#(?P<num>\d+)\b"

)

def _normalize_html_pr_url(owner: str, repo: str, num: int) -> str:

return f"https://github.com/{owner}/{repo}/pull/{int(num)}"

def _collect_from_text(text: str, default_owner: str, default_repo: str) -> set:

"""Extract candidate PR URLs from free text (body/comments/events text)."""

found = set()

if not text:

return found

# 1) Direct HTML PR URLs

for m in PR_HTML_URL_RE.finditer(text):

found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))

# 2) API PR URLs -> convert to HTML

for m in PR_API_URL_RE.finditer(text):

found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))

# 3) Cross-repo shorthand: owner/repo#123 (we will treat it as PR URL candidate)

for m in SHORTHAND_CROSS_REPO_RE.finditer(text):

found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))

# 4) Same-repo shorthand: #123

for m in SHORTHAND_SAME_REPO_RE.finditer(text):

found.add(_normalize_html_pr_url(default_owner, default_repo, m.group("num")))

return found

def _paginate_gh_get(url, headers=None, per_page=100):

"""Generator: fetch all pages until fewer than per_page are returned."""

page = 1

while True:

data = gh_get(url, params={"per_page": per_page, "page": page})

if not isinstance(data, list) or len(data) == 0:

break

for item in data:

yield item

if len(data) < per_page:

break

page += 1

def extract_pr_urls_from_issue(issue_number: int):

"""

Extract PR URLs associated with an issue by scanning:

- Issue body

- Issue comments

- Issue events (including 'mentioned', 'cross-referenced', etc.)

- Issue timeline (most reliable for cross references)

Returns a sorted list of unique, normalized HTML PR URLs.

Requires:

- REPO = "owner/repo"

- gh_get(url, params=None, headers=None) is available

"""

owner, repo = REPO.split("/", 1)

pr_urls = set()

# Baseline Accept header for REST v3 + timeline support.

# The timeline historically required a preview header. Keep both for compatibility.

base_headers = {

"Accept": "application/vnd.github+json, application/vnd.github.mockingbird-preview+json"

}

# 1) Issue body

issue_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}"

issue = gh_get(issue_url)

if isinstance(issue, dict):

body = issue.get("body") or ""

pr_urls |= _collect_from_text(body, owner, repo)

# If this issue IS itself a PR (when called with a PR number), make sure we don't add itself erroneously

# We won't add unless text contains it anyway; still fine.

# 2) All comments

comments_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/comments"

for c in _paginate_gh_get(comments_url):

body = c.get("body") or ""

pr_urls |= _collect_from_text(body, owner, repo)

# 3) Issue events (event stream can have 'mentioned', 'cross-referenced', etc.)

events_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/events"

for ev in _paginate_gh_get(events_url):

# (a) Free-text fields: some events carry body/commit messages, etc.

if isinstance(ev, dict):

body = ev.get("body") or ""

pr_urls |= _collect_from_text(body, owner, repo)

# (b) Structured cross-reference (best: 'cross-referenced' events)

# If the source.issue has 'pull_request' key, it's a PR; use its html_url.

if ev.get("event") == "cross-referenced":

src = ev.get("source") or {}

issue_obj = src.get("issue") or {}

pr_obj = issue_obj.get("pull_request") or {}

html_url = issue_obj.get("html_url")

if pr_obj and html_url and "/pull/" in html_url:

pr_urls.add(html_url)

# Fallback: If not marked but looks like a PR in URL

elif html_url and "/pull/" in html_url:

pr_urls.add(html_url)

# (c) Also include 'mentioned' events (broadened): inspect whatever text fields exist

# Already covered via 'body' text extraction

# 4) Timeline API (the most complete for references)

timeline_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/timeline"

for item in _paginate_gh_get(timeline_url):

if not isinstance(item, dict):

continue

# Free-text scan on any plausible string field

for key in ("body", "message", "title", "commit_message", "subject"):

val = item.get(key)

if isinstance(val, str):

pr_urls |= _collect_from_text(val, owner, repo)

# Structured cross-reference payloads

if item.get("event") == "cross-referenced":

src = item.get("source") or {}

issue_obj = src.get("issue") or {}

pr_obj = issue_obj.get("pull_request") or {}

html_url = issue_obj.get("html_url")

if pr_obj and html_url and "/pull/" in html_url:

pr_urls.add(html_url)

elif html_url and "/pull/" in html_url:

pr_urls.add(html_url)

# Some timeline items are themselves issues/PRs with html_url

html_url = item.get("html_url")

if isinstance(html_url, str) and "/pull/" in html_url:

pr_urls.add(html_url)

# Occasionally the timeline includes API-style URLs

api_url = item.get("url")

if isinstance(api_url, str):

m = PR_API_URL_RE.search(api_url)

if m:

pr_urls.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))

# Final normalization: keep only HTML PR URLs and sort

pr_urls = {m.group(0) for url in pr_urls for m in [PR_HTML_URL_RE.search(url)] if m}

return sorted(pr_urls)

def pr_number_from_url(u):

m = re.search(r"/pull/(\d+)", u)

return int(m.group(1)) if m else None

#codingexercise: CodingExercise-03-17-2026.pdf

Monday, March 16, 2026

This is a summary of the book titled “Creative Machines: AI, Art & Us” written by Maya Ackerman and published by Wiley, 2025. This book explores how artificial intelligence can deepen, rather than diminish, human creativity. Long before AI became a mainstream topic, Ackerman—a computer science professor and lifelong musician—was already investigating how machines might inspire people to become better creators. Drawing on both academic research and her experience as the CEO of a generative AI company, she argues that the most meaningful creative technologies are not those that replace human effort, but those that work alongside people as supportive collaborators. Throughout the book, she introduces the idea of “humble creative machines,” tools designed to empower users, foster growth, and keep humans at the center of the creative process.

Ackerman grounds her argument in a clear definition of creativity. Creativity, she explains, is best understood through its products—objects or experiences that are both novel and valuable. Novelty excludes mere variations on what already exists, while value implies intention and purpose. A creative work may be valuable because it is useful, or because it provides enjoyment, meaning, or emotional resonance, as art and music do. As Ackerman puts it, creativity is not simply about being first, but about making something that matters. Importantly, this definition does not restrict creativity to humans. Many animals and even plants exhibit creative behaviors, such as the bowerbird’s carefully constructed displays made from colorful objects to attract mates. By focusing on novelty and value rather than on the identity of the creator, Ackerman opens the door to recognizing how machines, too, can participate in creative processes.

She cautions, however, that machine creativity does not need to look like human creativity. One common way of evaluating creative machines is the Discrimination Test, which asks whether people can distinguish machine-generated works from those made by humans. Ackerman recounts a 1997 experiment in which a computer program composed a piece of music in the style of Bach, and most listeners—including trained musicians—mistook it for the original composer’s work. While this test is useful because it forces audiences to judge the artifact rather than the creator, Ackerman notes its limitations. Comparing machines to humans risks overlooking the fact that machines can be creative in their own right. For ethical and intellectual property reasons, she argues that AI should not imitate living artists. Instead, creative machines should be treated as entirely new entities—tools that expand human potential rather than mimic it.

Creativity depends on the balance between divergent and convergent thinking. Divergent thinking involves curiosity, exploration, and the willingness to generate many ideas, including strange or impractical ones. It is essential for innovation and often benefits from stepping away from problems and returning to them with fresh perspectives. Convergent thinking, by contrast, is the process of refining ideas, making decisions, and turning possibilities into finished work. Ackerman emphasizes that creators must move back and forth between these modes and resist judging ideas too early. Great creative output, she explains, often comes from producing a large volume of work, much of which will never be seen. Picasso’s thousands of artworks serve as an example of how brilliance emerges from persistence rather than perfection. This same principle applies to machines: if AI systems are limited to producing only the most likely or “correct” answers, they lose their ability to help humans discover something genuinely new.

Ackerman encourages readers to think of creative machines as collaborators rather than replacements. She compares working with AI to collaborating with a talented friend. The value lies not in having the work done for you, but in being supported, challenged, and inspired as you develop your own ideas. Many creative AI tools, she observes, focus on showing off impressive results without inviting users into the process. The most impactful tools, by contrast, enable co-creativity, where humans and machines interact dynamically to shape a final outcome. Used in this way, AI becomes a partner with unique strengths—always available, endlessly patient, and capable of offering fresh perspectives.

Her own journey illustrates this approach. After years of frustration with songwriting, Ackerman collaborated with an AI system called ALYSIA, which generated melodies based on lyrics. Rather than replacing her creativity, the tool sparked new ideas and helped her break out of repetitive patterns. Over time, working with AI gave her the confidence to compose independently. This experience informed her advocacy for “humble creative machines,” a concept she helped formalize with other researchers. These tools are flexible, allowing users to decide how much assistance they want; supportive, acting as coaches rather than crutches; genuinely creative, offering novel suggestions; and easy to use, fitting seamlessly into existing workflows. When users approach AI as a responsive tool rather than a performer meant to impress, the quality of the work improves and ownership remains with the human creator.

Ackerman points to conversational AI tools such as ChatGPT as practical examples of this philosophy in action. These systems adapt to user input, encourage iteration, and leave room for human judgment and editing. However, she stresses that whether an AI functions as a humble creative machine depends as much on the user’s mindset as on the technology itself. When people allow AI to do all the work, they miss valuable learning opportunities. When they engage with it thoughtfully—using feedback, critique, and iteration—they develop stronger creative skills over time.

The book also addresses the business and design implications of creative AI. Tools should not aim to retain users by fostering dependence or addiction. Instead, they should support long-term creative growth, even as users become more skilled. Ackerman describes how her own company learned this lesson when an early version of their songwriting app failed to gain traction because it tried to do too much. By refocusing on a simpler tool that offered suggestions rather than complete solutions, they attracted more users who returned consistently. Sustainable success, she argues, comes from empowering people rather than dazzling them once.

Ackerman does not shy away from the ethical challenges of creative machines, particularly the issue of bias. Because AI systems are trained on human-generated data, they inevitably reflect human stereotypes and blind spots. Studies have shown that image generators often reproduce racial, gender, and cultural biases, misrepresenting marginalized groups and oversimplifying non-Western cultures. These failures do not reflect machine intent, but rather the limitations of the data on which they are trained. Addressing this problem requires more diverse voices in AI development and research, as well as deliberate efforts to improve datasets and representations. Projects like narrative-generating systems designed to preserve Indigenous stories demonstrate how creative machines can also be used to counter erasure and support cultural understanding.

Ackerman argues that creative machines hold up a mirror to humanity. By exposing our biases, assumptions, and unexamined patterns, AI offers an opportunity for deeper self-reflection. Drawing on ideas from psychology, she suggests that confronting what machines reveal about us—both the negative and the positive—can lead to meaningful change. Technology alone cannot fix human problems, but it can help make them visible. When used thoughtfully, creative machines can support not only better art and innovation, but also greater awareness, responsibility, and human flourishing.

Sunday, March 15, 2026

Transient and transit object detection in aerial drone images:

Introduction:

The increasing availability of high-resolution aerial imagery from unmanned aerial vehicles (UAVs) presents a unique opportunity for time-domain object detection. Unlike traditional satellite imagery, UAVs offer flexible sampling rates, dynamic perspectives, and real-time responsiveness. However, the irregular cadence and noise inherent in aerial sequences pose challenges for conventional object detection pipelines, especially when attempting to identify transient or fleeting objects such as pedestrians, vehicles, or small mobile assets.

Machine learning techniques have become indispensable in aerial image analysis, particularly in large datasets where manual annotation is infeasible. Convolutional neural networks (CNNs) have been widely adopted for static object detection, but their performance degrades when applied to temporally sparse or noisy sequences. Prior work has explored phase-folding and frame-by-frame tracking, but these methods are computationally expensive and sensitive to sampling irregularities.

This article introduces DroneWorldNet, a frequency-domain model that bypasses the limitations of traditional tracking by transforming image clip vectors into frequency-domain tensors. DroneWorldNet applies discrete wavelet transform (DWT) to suppress noise and highlight localized changes, followed by FEFT to extract periodic and harmonic features across time. These tensors are then classified into one of four object states: null (no object), transient (brief appearance), stable (persistent presence), or transit (periodic occlusion or movement).

We apply DroneWorldNet to the DOTA dataset, which contains annotated aerial scenes from diverse environments. Each image clip is treated as a temporal stack, and the model is trained on both real and synthetic sequences to ensure robustness across lighting, altitude, and occlusion conditions. The pipeline includes spatial clustering, data normalization, and tensor construction, followed by classification using CNN and fully connected layers.

DroneWorldNet achieves subsecond inference latency and high classification accuracy, demonstrating its suitability for real-time deployment in edge-cloud UAV systems. This work lays the foundation for a full-scale variability survey of aerial scenes and opens new avenues for time-domain analysis in geospatial workflows.

Data Preprocessing:

Each image clip sequence is preprocessed to construct a high-quality, neural-network-friendly representation. For each frame, we extract three features: normalized brightness, estimated uncertainty (e.g., motion blur or sensor noise), and timestamp. Brightness values are converted from log scale to linear flux using calibration constants derived from the UAV sensor specifications. We then subtract the median and standardize using the interquartile range (IQR), followed by compression into the [-1, 1] range using the arcsinh function.

Time values are normalized to [0, 1] based on the total observation window, typically spanning 10–30 seconds. Uncertainty values are similarly rescaled and compressed to match the flux scale. The final input tensor for each sequence is a matrix of shape T × 3, where T is the number of frames, and each row contains brightness, uncertainty, and timestamp.

This representation ensures that DroneWorldNet can handle sequences of varying length and sampling rate, a critical requirement for aerial deployments where cadence may fluctuate due to flight path, altitude, or environmental conditions.

DroneWorldNet model:

DroneWorldNet is a hybrid signal-processing and deep learning model designed to classify aerial image sequences into four object states: null, transient, stable, and transit. The model architecture integrates three core components:

Wavelet Decomposition: A one-dimensional discrete wavelet transform (DWT) is applied to the brightness vector to suppress noise and highlight localized changes. This is particularly effective in identifying transient objects that appear briefly and then vanish.

Finite-Embedding Fourier Transform (FEFT): A modified discrete Fourier transform is applied to the time series to extract periodic and harmonic features. FEFT enables detection of transit-like behavior, such as vehicles passing through occluded regions or pedestrians crossing paths.

Convolutional Neural Network (CNN): The frequency-domain tensor is passed through a series of convolutional and fully connected layers, which learn to discriminate between the four object states. The model is trained using a categorical cross-entropy loss function and optimized with Adam.

Training and Evaluation:

To train DroneWorldNet, we generate synthetic aerial sequences using motion simulation models that replicate pedestrian and vehicle dynamics under varying conditions. These include changes in lighting, altitude, occlusion, and background texture. Synthetic sequences are blended with real samples from the DOTA dataset to ensure generalization across diverse environments.

The model is trained on a four-class scheme: null (no object), transient (brief appearance), stable (persistent presence), and transit (periodic occlusion or movement). On a held-out validation set, DroneWorldNet achieves an F1 score of 0.89, with precision and recall exceeding 0.90 for stable and transit classes. Transient detection remains challenging due to low signal-to-noise ratio, but wavelet decomposition significantly improves sensitivity.

Saturday, March 14, 2026

This is a summary of the book titled “There’s Got to Be a Better Way: How to Deliver Results and Get Rid of the Stuff That Gets in the Way of Real Work” written by Nelson P. Repenning and Donald C. Kieffer and published by Basic Books in 2025. This book explores why so many modern organizations feel overwhelmed, inefficient, and perpetually behind despite having talented people and advanced technologies. Drawing on decades of research and practical experience, the authors argue that the root of the problem is not a lack of effort or intelligence, but the persistence of outdated approaches to designing work. Many organizations still rely on management ideas inherited from Fordist and Taylorist traditions, which assume that work can be planned in advance, broken into static parts, and controlled through rigid rules. In a world characterized by constant change, uncertainty, and complexity, these assumptions no longer hold. Repenning and Kieffer propose an alternative they call dynamic work design, a way of organizing work that builds learning, adaptability, and responsiveness directly into everyday operations.

The authors begin by observing that poorly designed workflow systems often create chaos rather than control. Managers become exhausted, employees feel frustrated, and inefficiencies multiply as organizations try to impose static structures on dynamic realities. To illustrate this mismatch, the book compares traditional workflow systems to early GPS technology that blindly followed preprogrammed routes and steered drivers into traffic jams. Dynamic work design, by contrast, resembles modern GPS systems that continuously gather data from the environment and adjust in real time. In the same way, dynamic organizations constantly draw information from their work systems so people can respond quickly and intelligently as conditions change.

When leaders face complexity, rapid growth, or ongoing disruption, they often fall into what the authors call the “firefighting trap.” Instead of understanding how work is actually flowing through the organization, leaders focus on urgent problems and short-term fixes. Over time, this reactive behavior can turn them into “firefighting arsonists,” as well-intentioned decisions unintentionally worsen the underlying issues. Rules proliferate, processes become more burdensome, and employees push back. Dynamic work design offers a way out of this downward spiral by helping leaders maintain visibility into how work is happening while still addressing immediate challenges.

A central idea in the book is that dynamic work design is not a rigid framework or a one-size-fits-all solution. Instead, it is guided by a small set of powerful principles that can be applied flexibly, depending on the situation. These principles emphasize solving the right problems, structuring work to enable discovery, connecting people effectively across the workflow, regulating work to avoid overload, and making work visible through visual management. Together, they form a system that keeps organizations focused on creating value for customers while continuously improving how work gets done.

The authors stress the importance of choosing the right problems to tackle. Rather than launching large-scale transformations, dynamic work design encourages leaders to focus on small but important problems that a team of six to eight people can meaningfully address within 30 to 60 days. Although this approach may feel unsatisfying to leaders accustomed to sweeping initiatives, starting small accelerates learning and builds confidence. Clear problem statements help teams understand the gap between current performance and desired outcomes, and seemingly minor frustrations often turn out to be major drains on productivity.

To support this problem-solving approach, the book emphasizes adopting a discovery mindset. Leaders and teams are encouraged to set aside familiar solutions and resist the temptation to assume they already know the root cause of an issue. Expertise, the authors warn, can sometimes obscure simple answers. By experimenting with small improvements, testing ideas before scaling them, and postponing major technology investments until the problem is well understood, organizations can reduce risk while learning faster. Successful solutions tend to spread organically as people see tangible improvements in their work.

Learning, in this model, is not something that happens in classrooms or training programs but is embedded directly into workflows. The authors argue that no learning budget is large enough to compensate for a poorly designed system. Instead, learning is optimized by setting clear targets, defining specific actions, and integrating feedback so teams can see whether their efforts are producing results. Regular, up-to-date data allows people to identify obstacles, adjust their approach, and continuously improve.

Another key element of dynamic work design is ensuring that people are connected in ways that allow work to flow smoothly from one step to the next. Confusion often arises when handoffs are poorly defined or when managers rely on informal networks instead of clear processes. The book distinguishes between huddles, which are face-to-face conversations used to resolve ambiguity or make decisions, and handoffs, which transfer work without discussion when expectations are already clear. Managers play a critical role in huddles, using them to monitor progress, solve problems, and coach their teams. Well-designed huddles and handoffs prevent bottlenecks and keep the human chain connected.

The authors also challenge the widespread belief that keeping people busy maximizes productivity. Research shows that overloaded teams experience more stress, make more mistakes, and learn less. Dynamic work design aims for optimal challenge, where people are stretched just enough to grow without becoming overwhelmed. By aligning priorities, limiting work in progress, and monitoring flow through metrics like cycle time, leaders can prevent congestion and identify bottlenecks before they cause delays.

Visual management plays a crucial role in making work understandable and actionable. Because humans process visual information so efficiently, visual systems help teams see their work as a coherent whole rather than a disconnected set of tasks. Simple tools like problem boards can reveal the status of work, clarify responsibilities, and surface issues early. The authors caution against overengineering these systems, noting that their real value lies in the conversations they provoke. Messy boards often signal deeper problems and should be treated as opportunities for learning rather than failures.

Friday, March 13, 2026

This extends the same workflow we discussed in the previous 3 articles to include the following but for pull-requests:

• Heatmaps use the same issue‑to‑type matrix

• Trend lines use the same CSVs once history is accumulated

• Dependency graphs using the already implemented module‑source parsing

The above three are typical of custom GitHub dashboards and are well-known for being eye-candies.

Yaml now follows:

---

name: Weekly Terraform PR analytics with AI summary and advanced visuals

on:

workflow_dispatch:

inputs:

window_days:

description: "Number of days back to collect closed PRs (integer)"

required: false

default: "7"

schedule:

- cron: "0 14 * * MON" # 6am Pacific, staggered from issue workflow

permissions:

contents: write

pull-requests: read

issues: read

jobs:

pr_report:

runs-on: ubuntu-latest

steps:

- name: Check out repository

uses: actions/checkout@v4

with:

fetch-depth: 0

- name: Set up Python

uses: actions/setup-python@v5

with:

python-version: "3.11"

- name: Install dependencies

run: |

python -m pip install --upgrade pip

pip install requests pandas matplotlib seaborn networkx python-hcl2

- name: Prepare environment variables

run: |

echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV

echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV

###########################################################################

# 1. FETCH CLOSED PRs WITHIN WINDOW

###########################################################################

- name: Fetch closed PRs (window)

id: fetch

env:

GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

REPO: ${{ env.REPO }}

WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

run: |

python << 'PY'

import os, json, datetime, requests

REPO = os.environ["REPO"]

TOKEN = os.environ["GH_TOKEN"]

WINDOW_DAYS = int(os.environ["WINDOW_DAYS"])

HEADERS = {"Authorization": f"Bearer {TOKEN}"}

since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"

def gh(url, params=None):

r = requests.get(url, headers=HEADERS, params=params)

r.raise_for_status()

return r.json()

prs = []

page = 1

while True:

batch = gh(

f"https://api.github.com/repos/{REPO}/pulls",

{"state":"closed","per_page":100,"page":page}

)

if not batch:

break

for pr in batch:

if pr.get("merged_at") and pr["merged_at"] >= since:

prs.append({

"number": pr["number"],

"title": pr["title"],

"user": pr["user"]["login"],

"created_at": pr["created_at"],

"merged_at": pr["merged_at"],

"html_url": pr["html_url"]

})

page += 1

with open("prs.json","w") as f:

json.dump(prs, f, indent=2)

print(f"FOUND_PRS={len(prs)}")

###########################################################################

# 2. ANALYZE PRs: MODULES, AZURERM TYPES, TURNAROUND, REVIEW LATENCY

###########################################################################

- name: Analyze PRs for Terraform impact

id: analyze

env:

GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

REPO: ${{ env.REPO }}

run: |

python << 'PY'

import os, json, re, requests, subprocess

import hcl2

from datetime import datetime

REPO = os.environ["REPO"]

TOKEN = os.environ["GH_TOKEN"]

HEADERS = {"Authorization": f"Bearer {TOKEN}"}

def gh(url, params=None):

r = requests.get(url, headers=HEADERS, params=params)

r.raise_for_status()

return r.json()

def gh_text(url):

r = requests.get(url, headers=HEADERS)

r.raise_for_status()

return r.text

def list_pr_files(pr):

files = []

page = 1

while True:

batch = gh(

f"https://api.github.com/repos/{REPO}/pulls/{pr}/files",

{"per_page":100,"page":page}

)

if not batch:

break

files.extend(batch)

page += 1

return files

def get_pr_reviews(pr):

return gh(f"https://api.github.com/repos/{REPO}/pulls/{pr}/reviews")

def get_file_at_sha(path, sha):

r = requests.get(

f"https://api.github.com/repos/{REPO}/contents/{path}",

headers=HEADERS,

params={"ref": sha}

)

if r.status_code == 404:

return None

data = r.json()

if data.get("download_url"):

return gh_text(data["download_url"])

return None

def parse_azurerm(tf):

try:

obj = hcl2.loads(tf)

except:

return set()

out = set()

res = obj.get("resource", {})

if isinstance(res, dict):

for rtype in res.keys():

if rtype.startswith("azurerm_"):

out.add(rtype)

return out

def parse_modules(tf):

try:

obj = hcl2.loads(tf)

except:

return set()

out = set()

mods = obj.get("module", {})

if isinstance(mods, dict):

for _, body in mods.items():

src = body.get("source")

if isinstance(src, str):

out.add(src)

return out

def normalize_local(src, app):

if src.startswith("./") or src.startswith("../"):

import posixpath

return posixpath.normpath(posixpath.join(app, src))

return None

def list_tf(dir, sha):

try:

out = subprocess.check_output(

["git","ls-tree","-r","--name-only",sha,dir],

text=True

)

return [p for p in out.splitlines() if p.endswith(".tf")]

except:

return []

with open("prs.json") as f:

prs = json.load(f)

pr_to_types = {}

pr_turnaround = {}

pr_review_latency = {}

module_deps = {}

for pr in prs:

num = pr["number"]

created = pr["created_at"]

merged = pr["merged_at"]

fmt = "%Y-%m-%dT%H:%M:%SZ"

dtc = datetime.strptime(created, fmt)

dtm = datetime.strptime(merged, fmt)

pr_turnaround[num] = (dtm - dtc).total_seconds() / 86400.0

reviews = get_pr_reviews(num)

if reviews:

first = min(r["submitted_at"] for r in reviews if r.get("submitted_at"))

pr_review_latency[num] = (

datetime.strptime(first, fmt) - dtc

).total_seconds() / 86400.0

else:

pr_review_latency[num] = None

files = list_pr_files(num)

sha = gh(f"https://api.github.com/repos/{REPO}/pulls/{num}")["head"]["sha"]

touched_apps = set()

for f in files:

path = f["filename"]

if path.startswith("workload/"):

parts = path.split("/")

if len(parts) >= 2:

touched_apps.add("/".join(parts[:2]))

types = set()

for app in touched_apps:

tf_paths = list_tf(app, sha)

for p in tf_paths:

txt = get_file_at_sha(p, sha)

if not txt:

continue

types |= parse_azurerm(txt)

for src in parse_modules(txt):

local = normalize_local(src, app)

if local:

module_deps.setdefault(app, set()).add(local)

if types:

pr_to_types[num] = sorted(types)

import pandas as pd

rows = []

for pr, types in pr_to_types.items():

for t in types:

rows.append({"pr": pr, "azurerm_type": t})

pd.DataFrame(rows).to_csv("pr_severity_data.csv", index=False)

pd.DataFrame(

[{"pr":k,"turnaround_days":v} for k,v in pr_turnaround.items()]

).to_csv("pr_turnaround.csv", index=False)

pd.DataFrame(

[{"pr":k,"review_latency_days":v} for k,v in pr_review_latency.items()]

).to_csv("pr_review_latency.csv", index=False)

with open("pr_module_deps.json","w") as f:

json.dump({k: sorted(list(v)) for k,v in module_deps.items()}, f, indent=2)

###########################################################################

# 3. GENERATE CHARTS (heatmap, trend, dependency, turnaround, review latency)

###########################################################################

- name: Generate PR charts and markdown

id: charts

env:

OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

run: |

python << 'PY'

import os, json, datetime, glob

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

import networkx as nx

ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")

os.makedirs("history/pr", exist_ok=True)

# Load severity data

if os.path.exists("pr_severity_data.csv"):

df = pd.read_csv("pr_severity_data.csv")

counts = df.groupby("azurerm_type")["pr"].nunique().sort_values(ascending=False)

else:

counts = pd.Series(dtype=int)

# Severity bar

sev_png = f"history/pr/pr-severity-{ts}.png"

plt.figure(figsize=(12,6))

if not counts.empty:

counts.plot(kind="bar")

plt.title("PR frequency by azurerm resource type")

plt.xlabel("azurerm resource type")

plt.ylabel("number of PRs touching type")

else:

plt.text(0.5,0.5,"No PR data",ha="center",va="center")

plt.axis("off")

plt.tight_layout()

plt.savefig(sev_png)

plt.clf()

# Heatmap

heat_png = f"history/pr/pr-heatmap-{ts}.png"

if os.path.exists("pr_severity_data.csv"):

mat = pd.read_csv("pr_severity_data.csv")

if not mat.empty:

pivot = mat.pivot_table(index="azurerm_type", columns="pr", aggfunc='size', fill_value=0)

pivot['total'] = pivot.sum(axis=1)

pivot = pivot.sort_values('total', ascending=False).drop(columns=['total'])

if pivot.shape[1] > 100:

pivot = pivot.iloc[:, :100]

plt.figure(figsize=(14, max(6, 0.2*pivot.shape[0])))

sns.heatmap(pivot, cmap="YlGnBu")

plt.title("Heatmap: azurerm types vs PRs")

plt.tight_layout()

plt.savefig(heat_png)

plt.clf()

# Trend lines

trend_png = f"history/pr/pr-trend-{ts}.png"

hist_files = glob.glob("history/pr/pr-severity-*.csv")

if os.path.exists("pr_severity_data.csv"):

hist_files.append("pr_severity_data.csv")

trend_df = pd.DataFrame()

for f in hist_files:

try:

import re

m = re.search(r"(\d{8}-\d{6})", f)

label = m.group(1) if m else os.path.getmtime(f)

tmp = pd.read_csv(f)

if tmp.empty:

continue

counts_tmp = tmp.groupby("azurerm_type")["pr"].nunique().rename(label)

trend_df = pd.concat([trend_df, counts_tmp], axis=1)

except:

pass

if not trend_df.empty:

trend_df = trend_df.fillna(0).T

try:

trend_df.index = pd.to_datetime(trend_df.index, format="%Y%m%d-%H%M%S", errors='coerce')

except:

pass

plt.figure(figsize=(14,6))

latest = trend_df.iloc[-1].sort_values(ascending=False).head(8).index.tolist()

for col in latest:

plt.plot(trend_df.index, trend_df[col], marker='o', label=col)

plt.legend()

plt.title("Trend lines: PR frequency over time")

plt.tight_layout()

plt.savefig(trend_png)

plt.clf()

# Dependency graph

dep_png = f"history/pr/pr-deps-{ts}.png"

if os.path.exists("pr_module_deps.json"):

with open("pr_module_deps.json") as f:

deps = json.load(f)

G = nx.DiGraph()

for app, mods in deps.items():

G.add_node(app, type='app')

for m in mods:

G.add_node(m, type='module')

G.add_edge(app, m)

plt.figure(figsize=(12,8))

pos = nx.spring_layout(G, k=0.5)

colors = ['#1f78b4' if G.nodes[n]['type']=='app' else '#33a02c' for n in G.nodes()]

nx.draw(G, pos, with_labels=True, node_color=colors, node_size=600, arrows=True)

plt.title("PR module dependency graph")

plt.tight_layout()

plt.savefig(dep_png)

plt.clf()

# Turnaround chart

ta_png = f"history/pr/pr-turnaround-{ts}.png"

if os.path.exists("pr_turnaround.csv"):

ta = pd.read_csv("pr_turnaround.csv").dropna()

ta = ta.sort_values("turnaround_days", ascending=False).head(50)

plt.figure(figsize=(12,6))

plt.bar(ta["pr"].astype(str), ta["turnaround_days"])

plt.xticks(rotation=90)

plt.title("PR turnaround time (days)")

plt.tight_layout()

plt.savefig(ta_png)

plt.clf()

# Review latency chart

rl_png = f"history/pr/pr-review-latency-{ts}.png"

if os.path.exists("pr_review_latency.csv"):

rl = pd.read_csv("pr_review_latency.csv").dropna()

rl = rl.sort_values("review_latency_days", ascending=False).head(50)

plt.figure(figsize=(12,6))

plt.bar(rl["pr"].astype(str), rl["review_latency_days"])

plt.xticks(rotation=90)

plt.title("PR review latency (days)")

plt.tight_layout()

plt.savefig(rl_png)

plt.clf()

# AI summary

if os.path.exists("prs.json"):

prs = json.load(open("prs.json"))

else:

prs = []

condensed = [

{"pr": p["number"], "user": p["user"], "title": p["title"], "url": p["html_url"]}

for p in prs

]

ai_text = "AI summary skipped."

if os.environ.get("OPENAI_API_KEY"):

import subprocess

prompt = (

"Summarize each PR as a one-line 'who changed what' statement. "

"Format: '#<pr> — <user> changed <succinct summary>'."

)

payload = {

"model": "gpt-4o-mini",

"messages": [

{"role":"system","content":"You summarize PRs concisely."},

{"role":"user","content": prompt + "\n\n" + json.dumps(condensed)[:15000]}

"temperature":0.2

}

proc = subprocess.run(

["curl","-sS","https://api.openai.com/v1/chat/completions",

"-H","Content-Type: application/json",

"-H",f"Authorization: Bearer {os.environ['OPENAI_API_KEY']}",

"-d",json.dumps(payload)],

capture_output=True, text=True

)

try:

ai_text = json.loads(proc.stdout)["choices"][0]["message"]["content"]

except:

pass

# Markdown report

md_path = f"history/pr/pr-report-{ts}.md"

with open(md_path, "w") as f:

f.write("# Weekly Terraform PR analytics report\n\n")

f.write(f"**Window (days):** {os.environ['WINDOW_DAYS']}\n\n")

f.write("## AI Summary (who changed what)\n\n```\n")

f.write(ai_text + "\n```\n\n")

f.write("## PR frequency by azurerm type\n\n")

f.write(f"![]({os.path.basename(sev_png)})\n\n")

f.write("## Heatmap: azurerm types vs PRs\n\n")

f.write(f"![]({os.path.basename(heat_png)})\n\n")

f.write("## Trend lines\n\n")

f.write(f"![]({os.path.basename(trend_png)})\n\n")

f.write("## Dependency graph\n\n")

f.write(f"![]({os.path.basename(dep_png)})\n\n")

f.write("## PR turnaround time\n\n")

f.write(f"![]({os.path.basename(ta_png)})\n\n")

f.write("## PR review latency\n\n")

f.write(f"![]({os.path.basename(rl_png)})\n\n")

f.write("## Data artifacts\n\n")

f.write("- `pr_severity_data.csv` — per-PR azurerm type mapping\n")

f.write("- `pr_turnaround.csv` — per-PR turnaround in days\n")

f.write("- `pr_review_latency.csv` — per-PR review latency in days\n")

f.write("- `pr_module_deps.json` — module dependency data used for graph\n")

# Save CSVs into history for future trend aggregation

import shutil

if os.path.exists("pr_severity_data.csv"):

shutil.copy("pr_severity_data.csv", f"history/pr/pr-severity-{ts}.csv")

if os.path.exists("pr_turnaround.csv"):

shutil.copy("pr_turnaround.csv", f"history/pr/pr-turnaround-{ts}.csv")

if os.path.exists("pr_review_latency.csv"):

shutil.copy("pr_review_latency.csv", f"history/pr/pr-review-latency-{ts}.csv")

print(f"PR_REPORT_MD={md_path}")

print(f"PR_REPORT_SEV={sev_png}")

print(f"PR_REPORT_HEAT={heat_png}")

print(f"PR_REPORT_TREND={trend_png}")

print(f"PR_REPORT_DEP={dep_png}")

print(f"PR_REPORT_TA={ta_png}")

print(f"PR_REPORT_RL={rl_png}")

###########################################################################

# 4. CREATE PR WITH REPORT AND PRUNE HISTORY

###########################################################################

- name: Create PR for PR analytics report

id: create_pr

uses: peter-evans/create-pull-request@v6

with:

commit-message: "Add weekly Terraform PR analytics report and visuals (prune to last 10)"

title: "Weekly Terraform PR analytics report"

body: |

This PR adds the latest weekly PR analytics report and charts under `history/pr/`.

The workflow prunes older reports to keep at most 10 report sets.

branch: "weekly-terraform-pr-analytics"

base: "main"

path: "history/pr"

- name: Prune PR history to max 10 report sets

if: steps.create_pr.outcome == 'success'

run: |

python << 'PY'

import re

from pathlib import Path

hist = Path("history/pr")

hist.mkdir(parents=True, exist_ok=True)

groups = {}

for p in hist.iterdir():

m = re.search(r"(\d{8}-\d{6})", p.name)

if not m:

continue

ts = m.group(1)

groups.setdefault(ts, []).append(p)

timestamps = sorted(groups.keys(), reverse=True)

keep = set(timestamps[:10])

drop = [p for ts, files in groups.items() if ts not in keep for p in files]

for p in drop:

try:

p.unlink()

except Exception:

pass

print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")

###########################################################################

# 5. NOTIFY RUNBOOK WEBHOOK (OPTIONAL EMAIL VIA AZ COMMUNICATION)

###########################################################################

- name: Notify runbook webhook for PR analytics

if: steps.create_pr.outcome == 'success'

env:

RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}

PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}

WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

run: |

payload=$(jq -n \

--arg pr "$PR_URL" \

--arg window "$WINDOW_DAYS" \

'{subject: ("Weekly Terraform PR analytics report - " + $window + "d"), body: ("A new weekly PR analytics report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')

curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \

-H "Content-Type: application/json" \

-d "$payload"

- name: Output PR analytics artifacts

if: always()

run: |

echo "Generated files in history/pr/:"

ls -la history/pr || true

Thursday, March 12, 2026

This is a summary of a book titled “Work/Life Flywheel: Harness the work revolution and reimagine your career without fear” written by Ollie Henderson and published by Practical Inspiration Publishing, 2023. This book explores how people can rethink their careers in response to a rapidly changing world of work. As technology enables remote work, freelancing, and entrepreneurship, traditional ideas of work–life balance have become harder to sustain. Henderson suggests that the problem lies not in individual effort, but in the assumption that work and life can ever be neatly balanced. Instead, he proposes a different way of thinking about career and personal development, one that allows the two to reinforce each other over time.

At the center of the book is the idea of a “flywheel,” borrowed from business theory and popularized by Jim Collins. A mechanical flywheel maintains momentum through the coordinated movement of its parts, and Henderson applies this idea to careers and personal lives. Rather than treating work and life as competing forces, the flywheel model encourages people to design systems in which effort in one area feeds progress in another, creating a steady, self-sustaining cycle of growth. This approach focuses less on achieving equilibrium and more on building momentum without exhaustion.

Henderson identifies several interconnected elements that keep this flywheel turning. The process begins with mindset, emphasizing curiosity and openness to change. While external events are often beyond personal control, responses to them are not. Viewing challenges as opportunities for learning can build resilience and make it easier to recognize new possibilities. Simple habits, such as asking thoughtful questions and forming genuine connections, can lead to collaborations that spark creativity and further learning. Over time, these experiences accumulate and widen the range of options available.

To avoid being pulled in too many directions, Henderson stresses the importance of defining purpose and values. Clarifying priorities, understanding where time and energy are spent, and identifying a personal “north star” help people decide which opportunities to pursue and which to decline. Visualizing long-term goals and breaking them into smaller, achievable steps can make change feel more manageable, especially when paired with realistic planning and an awareness of potential setbacks.

Creativity plays a central role in the flywheel model, particularly as automation reshapes many forms of work. Henderson treats creativity not as artistic talent but as a practical skill rooted in critical thinking, empathy, and context. Exposure to new ideas, engagement with other people’s perspectives, and a focus on contributing value rather than self-promotion can all strengthen creative capacity. He also emphasizes the importance of finding conditions that support “flow,” a state in which focus and motivation align. While routines differ from person to person, flow tends to emerge when distractions are reduced, goals are clear, and a degree of uncertainty or risk is embraced.

Storytelling is another creative tool Henderson highlights. Reflecting on personal experiences and articulating them through clear, human narratives can help people better understand their own motivations and communicate them to others. Being able to explain why change matters builds credibility and makes it easier for others to offer support or collaboration.

Experimentation follows naturally from creativity. Henderson encourages writing and sharing ideas publicly, not as a performance but as a way to test assumptions and refine thinking. Digital platforms make it easier than ever to reach an audience and iterate quickly, but standing out often requires narrowing focus rather than broadening it. Finding a niche allows for more meaningful experimentation and clearer feedback. Failure, in this view, is not an endpoint but a source of information. Examples such as Airbnb’s early missteps illustrate how adjusting a model in response to real-world responses can lead to more viable outcomes.

Community provides the social infrastructure that keeps the flywheel moving. Relationships built on mutual support and shared learning can open doors during career transitions and reduce the isolation that often accompanies remote or independent work. While physical workplaces may be less central than before, online communities and collaboration platforms have made it possible to build global networks, exchange expertise, and create opportunities across time zones.

Learning underpins all of these elements. Henderson challenges the idea that success means reaching a fixed destination, arguing instead that careers now require ongoing adaptation. Treating learning as a continuous, self-directed practice helps people remain flexible and open to reinvention. Stories such as Jeff Kofman’s shift from war correspondence to entrepreneurship illustrate how curiosity and willingness to learn can lead to entirely new paths without discarding past experience.

The final element of the flywheel is rest. Henderson emphasizes that progress depends not only on effort but also on recovery. Periods of intense focus need to be balanced with time for reflection and restoration, much like an athlete’s training cycle. Activities such as exercise, time outdoors, gratitude practices, and adequate sleep help sustain both physical and mental energy. Breakthroughs often appear during moments of rest, but only after consistent work has laid the groundwork.

Wednesday, March 11, 2026

This extends the same workflow we discussed in the previous 2 articles to include the following:

• Heatmaps use the same issue‑to‑type matrix

• Trend lines use the same CSVs once history is accumulated

• Dependency graphs using the already implemented module‑source parsing

The above three are typical of issue management dashboards and are well-known for being eye-candies.

Yaml now follows:

---

name: Weekly Terraform azurerm hotspot report with AI summary and advanced visuals

on:

workflow_dispatch:

inputs:

window_days:

description: "Number of days back to collect closed issues (integer)"

required: false

default: "7"

schedule:

- cron: "0 13 * * MON"

permissions:

contents: write

pull-requests: write

issues: read

jobs:

report:

runs-on: ubuntu-latest

steps:

- name: Check out repository

uses: actions/checkout@v4

with:

fetch-depth: 0

- name: Set up Python

uses: actions/setup-python@v5

with:

python-version: "3.11"

- name: Install dependencies

run: |

python -m pip install --upgrade pip

pip install requests pandas matplotlib seaborn networkx python-hcl2

- name: Prepare environment variables

id: env

run: |

echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV

echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV

- name: Fetch closed issues and linked PRs (window)

id: fetch

env:

GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

REPO: ${{ env.REPO }}

WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

run: |

python - <<'PY'

import os, requests, json, datetime, re

REPO = os.environ["REPO"]

TOKEN = os.environ["GH_TOKEN"]

WINDOW_DAYS = int(os.environ.get("WINDOW_DAYS","7"))

HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}

since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"

def gh_get(url, params=None):

r = requests.get(url, headers=HEADERS, params=params)

r.raise_for_status()

return r.json()

issues_url = f"https://api.github.com/repos/{REPO}/issues"

params = {"state":"closed","since":since,"per_page":100}

items = gh_get(issues_url, params=params)

issues = []

for i in items:

if "pull_request" in i:

continue

comments = gh_get(i["comments_url"], params={"per_page":100})

pr_urls = set()

for c in comments:

body = c.get("body","") or ""

for m in re.findall(r"https://github\.com/[^/\s]+/[^/\s]+/pull/\d+", body):

pr_urls.add(m)

for m in re.findall(r"(?:^|\s)#(\d+)\b", body):

pr_urls.add(f"https://github.com/{REPO}/pull/{m}")

issues.append({

"number": i["number"],

"title": i.get("title",""),

"user": i.get("user",{}).get("login",""),

"created_at": i.get("created_at"),

"closed_at": i.get("closed_at"),

"html_url": i.get("html_url"),

"comments": [{"id":c.get("id"), "body":c.get("body",""), "created_at":c.get("created_at")} for c in comments],

"pr_urls": sorted(pr_urls)

})

with open("issues.json","w") as f:

json.dump(issues, f, indent=2)

print(f"WROTE_ISSUES={len(issues)}")

- name: Resolve PRs, collect touched workload apps and azurerm types

id: analyze

env:

GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

REPO: ${{ env.REPO }}

run: |

python - <<'PY'

import os, json, re, requests, subprocess

import hcl2

REPO = os.environ["REPO"]

TOKEN = os.environ["GH_TOKEN"]

HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}

def gh_get(url, params=None):

r = requests.get(url, headers=HEADERS, params=params)

r.raise_for_status()

return r.json()

def gh_get_text(url):

r = requests.get(url, headers=HEADERS)

r.raise_for_status()

return r.text

def pr_number_from_url(u):

m = re.search(r"/pull/(\d+)", u)

return int(m.group(1)) if m else None

def list_pr_files(pr_number):

url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}/files"

files = []

page = 1

while True:

batch = gh_get(url, params={"per_page":100,"page":page})

if not batch:

break

files.extend(batch)

page += 1

return files

def get_pr_head_sha(pr_number):

url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}"

pr = gh_get(url)

return pr["head"]["sha"]

def get_file_at_sha(path, sha):

url = f"https://api.github.com/repos/{REPO}/contents/{path}"

r = requests.get(url, headers=HEADERS, params={"ref": sha})

if r.status_code == 404:

return None

r.raise_for_status()

data = r.json()

if isinstance(data, dict) and data.get("type") == "file" and data.get("download_url"):

return gh_get_text(data["download_url"])

return None

def parse_azurerm_resource_types_from_tf(tf_text):

types = set()

try:

obj = hcl2.loads(tf_text)

except Exception:

return types

res = obj.get("resource", [])

if isinstance(res, list):

for item in res:

if isinstance(item, dict):

for rtype in item.keys():

if isinstance(rtype, str) and rtype.startswith("azurerm_"):

types.add(rtype)

elif isinstance(res, dict):

for rtype in res.keys():

if isinstance(rtype, str) and rtype.startswith("azurerm_"):

types.add(rtype)

return types

def parse_module_sources_from_tf(tf_text):

sources = set()

try:

obj = hcl2.loads(tf_text)

except Exception:

return sources

mods = obj.get("module", [])

if isinstance(mods, list):

for item in mods:

if isinstance(item, dict):

for _, body in item.items():

if isinstance(body, dict):

src = body.get("source")

if isinstance(src, str):

sources.add(src)

elif isinstance(mods, dict):

for _, body in mods.items():

if isinstance(body, dict):

src = body.get("source")

if isinstance(src, str):

sources.add(src)

return sources

def normalize_local_module_path(source, app_dir):

if source.startswith("./") or source.startswith("../"):

import posixpath

return posixpath.normpath(posixpath.join(app_dir, source))

return None

def list_repo_tf_files_under(dir_path, sha):

try:

out = subprocess.check_output(["git","ls-tree","-r","--name-only",sha,dir_path], text=True)

paths = [p.strip() for p in out.splitlines() if p.strip().endswith(".tf")]

return paths

except Exception:

return []

def collect_azurerm_types_for_app(app_dir, sha):

az_types = set()

module_dirs = set()

tf_paths = list_repo_tf_files_under(app_dir, sha)

for p in tf_paths:

txt = get_file_at_sha(p, sha)

if not txt:

continue

az_types |= parse_azurerm_resource_types_from_tf(txt)

for src in parse_module_sources_from_tf(txt):

local = normalize_local_module_path(src, app_dir)

if local:

module_dirs.add(local)

for mdir in sorted(module_dirs):

m_tf_paths = list_repo_tf_files_under(mdir, sha)

for p in m_tf_paths:

txt = get_file_at_sha(p, sha)

if not txt:

continue

az_types |= parse_azurerm_resource_types_from_tf(txt)

return az_types

with open("issues.json") as f:

issues = json.load(f)

issue_to_types = {}

issue_turnaround = {}

module_deps = {} # app_dir -> set(module paths it references)

for issue in issues:

inum = issue["number"]

created = issue.get("created_at")

closed = issue.get("closed_at")

if created and closed:

from datetime import datetime

fmt = "%Y-%m-%dT%H:%M:%SZ"

try:

dt_created = datetime.strptime(created, fmt)

dt_closed = datetime.strptime(closed, fmt)

delta_days = (dt_closed - dt_created).total_seconds() / 86400.0

except Exception:

delta_days = None

else:

delta_days = None

issue_turnaround[inum] = delta_days

pr_urls = issue.get("pr_urls",[])

pr_numbers = sorted({pr_number_from_url(u) for u in pr_urls if pr_number_from_url(u)})

types_for_issue = set()

for prn in pr_numbers:

sha = get_pr_head_sha(prn)

files = list_pr_files(prn)

touched_apps = set()

for f in files:

path = f.get("filename","")

if path.startswith("workload/"):

parts = path.split("/")

if len(parts) >= 2:

touched_apps.add("/".join(parts[:2]))

for app_dir in sorted(touched_apps):

types_for_issue |= collect_azurerm_types_for_app(app_dir, sha)

# collect module sources for dependency graph

# scan app tf files for module sources at PR head

tf_paths = list_repo_tf_files_under(app_dir, sha)

for p in tf_paths:

txt = get_file_at_sha(p, sha)

if not txt:

continue

for src in parse_module_sources_from_tf(txt):

local = normalize_local_module_path(src, app_dir)

if local:

module_deps.setdefault(app_dir, set()).add(local)

if types_for_issue:

issue_to_types[inum] = sorted(types_for_issue)

rows = []

for inum, types in issue_to_types.items():

for t in set(types):

rows.append({"issue": inum, "azurerm_type": t})

import pandas as pd

df = pd.DataFrame(rows)

df.to_csv("severity_data.csv", index=False)

ta_rows = []

for inum, days in issue_turnaround.items():

ta_rows.append({"issue": inum, "turnaround_days": days})

pd.DataFrame(ta_rows).to_csv("turnaround.csv", index=False)

with open("issue_to_azurerm_types.json","w") as f:

json.dump(issue_to_types, f, indent=2)

with open("issue_turnaround.json","w") as f:

json.dump(issue_turnaround, f, indent=2)

with open("module_deps.json","w") as f:

json.dump({k: sorted(list(v)) for k,v in module_deps.items()}, f, indent=2)

print(f"ISSUES_WITH_TYPES={len(issue_to_types)}")

- name: Generate charts and markdown (severity, heatmap, trend, dependency, turnaround) and include AI summary

id: report

env:

OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

run: |

set -euo pipefail

python - <<'PY'

import os, json, datetime, glob

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

import networkx as nx

ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")

os.makedirs("history", exist_ok=True)

# --- Severity bar (existing) ---

if os.path.exists("severity_data.csv"):

df = pd.read_csv("severity_data.csv")

counts = df.groupby("azurerm_type")["issue"].nunique().sort_values(ascending=False)

else:

counts = pd.Series(dtype=int)

png_sev = f"history/severity-by-azurerm-{ts}.png"

plt.figure(figsize=(12,6))

if not counts.empty:

counts.plot(kind="bar")

plt.title("Issue frequency by azurerm resource type")

plt.xlabel("azurerm resource type")

plt.ylabel("number of closed issues touching type")

else:

plt.text(0.5, 0.5, "No azurerm-impacting issues in window", ha="center", va="center")

plt.axis("off")

plt.tight_layout()

plt.savefig(png_sev)

plt.clf()

# --- Heatmap: azurerm_type x issue (binary or counts) ---

heat_png = f"history/heatmap-azurerm-issues-{ts}.png"

if os.path.exists("severity_data.csv"):

mat = pd.read_csv("severity_data.csv")

if not mat.empty:

pivot = mat.pivot_table(index="azurerm_type", columns="issue", aggfunc='size', fill_value=0)

# Optionally cluster or sort by total counts

pivot['total'] = pivot.sum(axis=1)

pivot = pivot.sort_values('total', ascending=False).drop(columns=['total'])

# limit columns for readability (most recent/top issues)

if pivot.shape[1] > 100:

pivot = pivot.iloc[:, :100]

plt.figure(figsize=(14, max(6, 0.2 * pivot.shape[0])))

sns.heatmap(pivot, cmap="YlOrRd", cbar=True)

plt.title("Heatmap: azurerm resource types (rows) vs issues (columns)")

plt.xlabel("Issue number (truncated)")

plt.ylabel("azurerm resource type")

plt.tight_layout()

plt.savefig(heat_png)

plt.clf()

else:

plt.figure(figsize=(6,2))

plt.text(0.5,0.5,"No data for heatmap",ha="center",va="center")

plt.axis("off")

plt.savefig(heat_png)

plt.clf()

else:

plt.figure(figsize=(6,2))

plt.text(0.5,0.5,"No data for heatmap",ha="center",va="center")

plt.axis("off")

plt.savefig(heat_png)

plt.clf()

# --- Trend lines: aggregate historical severity_data.csv files in history/ ---

trend_png = f"history/trendlines-azurerm-{ts}.png"

# collect historical CSVs that match severity_data pattern

hist_files = sorted(glob.glob("history/*severity-data-*.csv") + glob.glob("history/*severity_data.csv") + glob.glob("history/*severity-by-azurerm-*.csv"))

# also include current run's severity_data.csv

if os.path.exists("severity_data.csv"):

hist_files.append("severity_data.csv")

# Build weekly counts per azurerm_type by deriving timestamp from filenames where possible

trend_df = pd.DataFrame()

for f in hist_files:

try:

# attempt to extract timestamp from filename

import re

m = re.search(r"(\d{8}-\d{6})", f)

ts_label = m.group(1) if m else os.path.getmtime(f)

tmp = pd.read_csv(f)

if tmp.empty:

continue

counts_tmp = tmp.groupby("azurerm_type")["issue"].nunique().rename(ts_label)

trend_df = pd.concat([trend_df, counts_tmp], axis=1)

except Exception:

continue

if not trend_df.empty:

trend_df = trend_df.fillna(0).T

# convert index to datetime where possible

try:

trend_df.index = pd.to_datetime(trend_df.index, format="%Y%m%d-%H%M%S", errors='coerce').fillna(pd.to_datetime(trend_df.index, unit='s'))

except Exception:

pass

plt.figure(figsize=(14,6))

# plot top N azurerm types by latest total

latest = trend_df.iloc[-1].sort_values(ascending=False).head(8).index.tolist()

for col in latest:

plt.plot(trend_df.index, trend_df[col], marker='o', label=col)

plt.legend(loc='best', fontsize='small')

plt.title("Trend lines: issue frequency over time for top azurerm types")

plt.xlabel("time")

plt.ylabel("issue count")

plt.xticks(rotation=45)

plt.tight_layout()

plt.savefig(trend_png)

plt.clf()

else:

plt.figure(figsize=(8,2))

plt.text(0.5,0.5,"No historical data for trend lines",ha="center",va="center")

plt.axis("off")

plt.savefig(trend_png)

plt.clf()

# --- Dependency graph: build directed graph from module_deps.json ---

dep_png = f"history/dependency-graph-{ts}.png"

if os.path.exists("module_deps.json"):

with open("module_deps.json") as f:

deps = json.load(f)

G = nx.DiGraph()

# add edges app -> module

for app, mods in deps.items():

G.add_node(app, type='app')

for m in mods:

G.add_node(m, type='module')

G.add_edge(app, m)

if len(G.nodes) == 0:

plt.figure(figsize=(6,2))

plt.text(0.5,0.5,"No dependency data",ha="center",va="center")

plt.axis("off")

plt.savefig(dep_png)

plt.clf()

else:

plt.figure(figsize=(12,8))

pos = nx.spring_layout(G, k=0.5, iterations=50)

node_colors = ['#1f78b4' if G.nodes[n].get('type')=='app' else '#33a02c' for n in G.nodes()]

nx.draw_networkx_nodes(G, pos, node_size=600, node_color=node_colors)

nx.draw_networkx_edges(G, pos, arrows=True, arrowstyle='->', arrowsize=12, edge_color='#888888')

nx.draw_networkx_labels(G, pos, font_size=8)

plt.title("Module dependency graph (apps -> local modules)")

plt.axis('off')

plt.tight_layout()

plt.savefig(dep_png)

plt.clf()

else:

plt.figure(figsize=(6,2))

plt.text(0.5,0.5,"No dependency data",ha="center",va="center")

plt.axis("off")

plt.savefig(dep_png)

plt.clf()

# --- Turnaround chart (existing) ---

ta_png = f"history/turnaround-by-issue-{ts}.png"

if os.path.exists("turnaround.csv"):

ta = pd.read_csv("turnaround.csv")

ta = ta.dropna(subset=["turnaround_days"])

if not ta.empty:

ta_sorted = ta.sort_values("turnaround_days", ascending=False).head(50)

plt.figure(figsize=(12,6))

plt.bar(ta_sorted["issue"].astype(str), ta_sorted["turnaround_days"])

plt.xticks(rotation=90)

plt.title("Turnaround time (days) for closed issues in window")

plt.xlabel("Issue number")

plt.ylabel("Turnaround (days)")

plt.tight_layout()

plt.savefig(ta_png)

plt.clf()

else:

plt.figure(figsize=(8,2))

plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")

plt.axis("off")

plt.savefig(ta_png)

plt.clf()

else:

plt.figure(figsize=(8,2))

plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")

plt.axis("off")

plt.savefig(ta_png)

plt.clf()

# --- AI summary (who wants what) ---

if os.path.exists("issues.json"):

with open("issues.json") as f:

issues = json.load(f)

else:

issues = []

condensed = []

for i in issues:

condensed.append({

"number": i.get("number"),

"user": i.get("user"),

"title": i.get("title"),

"html_url": i.get("html_url")

})

with open("issues_for_ai.json","w") as f:

json.dump(condensed, f, indent=2)

# call OpenAI if key present (same approach as before)

import subprocess, os

OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

ai_text = "AI summary skipped (no OPENAI_API_KEY)."

if OPENAI_KEY:

prompt = ("You are given a JSON array of GitHub issues with fields: number, user, title, html_url. "

"Produce a concise list of one-line 'who wants what' statements, one per issue, in plain text. "

"Format: '#<number> — <user> wants <succinct request derived from title>'. "

"Do not add commentary.")

payload = {

"model": "gpt-4o-mini",

"messages": [{"role":"system","content":"You are a concise summarizer."},

{"role":"user","content": prompt + "\\n\\nJSON:\\n" + json.dumps(condensed)[:15000]}],

"temperature":0.2,

"max_tokens":400

}

proc = subprocess.run([

"curl","-sS","https://api.openai.com/v1/chat/completions",

"-H", "Content-Type: application/json",

"-H", f"Authorization: Bearer {OPENAI_KEY}",

"-d", json.dumps(payload)

], capture_output=True, text=True)

if proc.returncode == 0 and proc.stdout:

try:

resp = json.loads(proc.stdout)

ai_text = resp["choices"][0]["message"]["content"].strip()

except Exception:

ai_text = "AI summary unavailable (parsing error)."

# --- Write markdown report combining all visuals ---

md_path = f"history/severity-report-{ts}.md"

with open(md_path, "w") as f:

f.write("# Weekly Terraform azurerm hotspot report\n\n")

f.write(f"**Window (days):** {os.environ.get('WINDOW_DAYS','7')}\n\n")

f.write("## AI Summary (who wants what)\n\n")

f.write("```\n")

f.write(ai_text + "\n")

f.write("```\n\n")

f.write("## Top azurerm resource types by issue frequency\n\n")

if not counts.empty:

f.write("![" + os.path.basename(png_sev) + "](" + os.path.basename(png_sev) + ")\n\n")

f.write(counts.head(30).to_frame("issues").to_markdown() + "\n\n")

else:

f.write("No azurerm-impacting issues found in the selected window.\n\n")

f.write("## Heatmap: azurerm types vs issues\n\n")

f.write("![" + os.path.basename(heat_png) + "](" + os.path.basename(heat_png) + ")\n\n")

f.write("## Trend lines: historical issue frequency for top azurerm types\n\n")

f.write("![" + os.path.basename(trend_png) + "](" + os.path.basename(trend_png) + ")\n\n")

f.write("## Dependency graph: apps -> local modules\n\n")

f.write("![" + os.path.basename(dep_png) + "](" + os.path.basename(dep_png) + ")\n\n")

f.write("## Turnaround time for closed issues (days)\n\n")

f.write("![" + os.path.basename(ta_png) + "](" + os.path.basename(ta_png) + ")\n\n")

f.write("## Data artifacts\n\n")

f.write("- `severity_data.csv` — per-issue azurerm type mapping\n")

f.write("- `turnaround.csv` — per-issue turnaround in days\n")

f.write("- `issue_to_azurerm_types.json` — mapping used to build charts\n")

f.write("- `module_deps.json` — module dependency data used for graph\n")

# Save current CSVs into history with timestamp for future trend aggregation

try:

import shutil

if os.path.exists("severity_data.csv"):

shutil.copy("severity_data.csv", f"history/severity-data-{ts}.csv")

if os.path.exists("turnaround.csv"):

shutil.copy("turnaround.csv", f"history/turnaround-{ts}.csv")

except Exception:

pass

print(f"REPORT_MD={md_path}")

print(f"REPORT_PNG={png_sev}")

print(f"REPORT_HEAT={heat_png}")

print(f"REPORT_TREND={trend_png}")

print(f"REPORT_DEP={dep_png}")

print(f"REPORT_TA={ta_png}")

- name: Add report files to history and commit via PR

id: create_pr

uses: peter-evans/create-pull-request@v6

with:

commit-message: "Add weekly Terraform azurerm hotspot report and advanced visuals (prune to last 10)"

title: "Weekly Terraform azurerm hotspot report"

body: |

This PR adds the latest weekly azurerm hotspot report and charts under `history/`.

The workflow prunes older reports to keep at most 10 report sets.

branch: "weekly-terraform-azurerm-hotspots"

base: "main"

path: "history"

- name: Prune history to max 10 report sets (post-commit)

if: steps.create_pr.outcome == 'success'

run: |

python - <<'PY'

import os, re

from pathlib import Path

hist = Path("history")

hist.mkdir(exist_ok=True)

groups = {}

for p in hist.iterdir():

m = re.search(r"(\d{8}-\d{6})", p.name)

if not m:

continue

ts = m.group(1)

groups.setdefault(ts, []).append(p)

timestamps = sorted(groups.keys(), reverse=True)

keep = set(timestamps[:10])

drop = [p for ts, files in groups.items() if ts not in keep for p in files]

for p in drop:

try:

p.unlink()

except Exception:

pass

print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")

- name: Notify runbook webhook (which will send az communication email)

if: steps.create_pr.outcome == 'success'

env:

RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}

PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}

WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

run: |

payload=$(jq -n \

--arg pr "$PR_URL" \

--arg window "$WINDOW_DAYS" \

'{subject: ("Weekly Terraform azurerm hotspot report - " + $window + "d"), body: ("A new weekly azurerm hotspot report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')

curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \

-H "Content-Type: application/json" \

-d "$payload"

- name: Output artifact list

if: always()

run: |

echo "Generated files in history/:"

ls -la history || true