This extends the same workflow we discussed in the previous 2 articles to include the following:
• Heatmaps use the same issue‑to‑type matrix
• Trend lines use the same CSVs once history is accumulated
• Dependency graphs using the already implemented module‑source parsing
The above three are typical of issue management dashboards and are well-known for being eye-candies.
Yaml now follows:
---
name: Weekly Terraform azurerm hotspot report with AI summary and advanced visuals
on:
workflow_dispatch:
inputs:
window_days:
description: "Number of days back to collect closed issues (integer)"
required: false
default: "7"
schedule:
- cron: "0 13 * * MON"
permissions:
contents: write
pull-requests: write
issues: read
jobs:
report:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests pandas matplotlib seaborn networkx python-hcl2
- name: Prepare environment variables
id: env
run: |
echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV
echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV
- name: Fetch closed issues and linked PRs (window)
id: fetch
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ env.REPO }}
WINDOW_DAYS: ${{ env.WINDOW_DAYS }}
run: |
python - <<'PY'
import os, requests, json, datetime, re
REPO = os.environ["REPO"]
TOKEN = os.environ["GH_TOKEN"]
WINDOW_DAYS = int(os.environ.get("WINDOW_DAYS","7"))
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}
since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"
def gh_get(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
r.raise_for_status()
return r.json()
issues_url = f"https://api.github.com/repos/{REPO}/issues"
params = {"state":"closed","since":since,"per_page":100}
items = gh_get(issues_url, params=params)
issues = []
for i in items:
if "pull_request" in i:
continue
comments = gh_get(i["comments_url"], params={"per_page":100})
pr_urls = set()
for c in comments:
body = c.get("body","") or ""
for m in re.findall(r"https://github\.com/[^/\s]+/[^/\s]+/pull/\d+", body):
pr_urls.add(m)
for m in re.findall(r"(?:^|\s)#(\d+)\b", body):
pr_urls.add(f"https://github.com/{REPO}/pull/{m}")
issues.append({
"number": i["number"],
"title": i.get("title",""),
"user": i.get("user",{}).get("login",""),
"created_at": i.get("created_at"),
"closed_at": i.get("closed_at"),
"html_url": i.get("html_url"),
"comments": [{"id":c.get("id"), "body":c.get("body",""), "created_at":c.get("created_at")} for c in comments],
"pr_urls": sorted(pr_urls)
})
with open("issues.json","w") as f:
json.dump(issues, f, indent=2)
print(f"WROTE_ISSUES={len(issues)}")
PY
- name: Resolve PRs, collect touched workload apps and azurerm types
id: analyze
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ env.REPO }}
run: |
python - <<'PY'
import os, json, re, requests, subprocess
import hcl2
REPO = os.environ["REPO"]
TOKEN = os.environ["GH_TOKEN"]
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}
def gh_get(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
r.raise_for_status()
return r.json()
def gh_get_text(url):
r = requests.get(url, headers=HEADERS)
r.raise_for_status()
return r.text
def pr_number_from_url(u):
m = re.search(r"/pull/(\d+)", u)
return int(m.group(1)) if m else None
def list_pr_files(pr_number):
url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}/files"
files = []
page = 1
while True:
batch = gh_get(url, params={"per_page":100,"page":page})
if not batch:
break
files.extend(batch)
page += 1
return files
def get_pr_head_sha(pr_number):
url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}"
pr = gh_get(url)
return pr["head"]["sha"]
def get_file_at_sha(path, sha):
url = f"https://api.github.com/repos/{REPO}/contents/{path}"
r = requests.get(url, headers=HEADERS, params={"ref": sha})
if r.status_code == 404:
return None
r.raise_for_status()
data = r.json()
if isinstance(data, dict) and data.get("type") == "file" and data.get("download_url"):
return gh_get_text(data["download_url"])
return None
def parse_azurerm_resource_types_from_tf(tf_text):
types = set()
try:
obj = hcl2.loads(tf_text)
except Exception:
return types
res = obj.get("resource", [])
if isinstance(res, list):
for item in res:
if isinstance(item, dict):
for rtype in item.keys():
if isinstance(rtype, str) and rtype.startswith("azurerm_"):
types.add(rtype)
elif isinstance(res, dict):
for rtype in res.keys():
if isinstance(rtype, str) and rtype.startswith("azurerm_"):
types.add(rtype)
return types
def parse_module_sources_from_tf(tf_text):
sources = set()
try:
obj = hcl2.loads(tf_text)
except Exception:
return sources
mods = obj.get("module", [])
if isinstance(mods, list):
for item in mods:
if isinstance(item, dict):
for _, body in item.items():
if isinstance(body, dict):
src = body.get("source")
if isinstance(src, str):
sources.add(src)
elif isinstance(mods, dict):
for _, body in mods.items():
if isinstance(body, dict):
src = body.get("source")
if isinstance(src, str):
sources.add(src)
return sources
def normalize_local_module_path(source, app_dir):
if source.startswith("./") or source.startswith("../"):
import posixpath
return posixpath.normpath(posixpath.join(app_dir, source))
return None
def list_repo_tf_files_under(dir_path, sha):
try:
out = subprocess.check_output(["git","ls-tree","-r","--name-only",sha,dir_path], text=True)
paths = [p.strip() for p in out.splitlines() if p.strip().endswith(".tf")]
return paths
except Exception:
return []
def collect_azurerm_types_for_app(app_dir, sha):
az_types = set()
module_dirs = set()
tf_paths = list_repo_tf_files_under(app_dir, sha)
for p in tf_paths:
txt = get_file_at_sha(p, sha)
if not txt:
continue
az_types |= parse_azurerm_resource_types_from_tf(txt)
for src in parse_module_sources_from_tf(txt):
local = normalize_local_module_path(src, app_dir)
if local:
module_dirs.add(local)
for mdir in sorted(module_dirs):
m_tf_paths = list_repo_tf_files_under(mdir, sha)
for p in m_tf_paths:
txt = get_file_at_sha(p, sha)
if not txt:
continue
az_types |= parse_azurerm_resource_types_from_tf(txt)
return az_types
with open("issues.json") as f:
issues = json.load(f)
issue_to_types = {}
issue_turnaround = {}
module_deps = {} # app_dir -> set(module paths it references)
for issue in issues:
inum = issue["number"]
created = issue.get("created_at")
closed = issue.get("closed_at")
if created and closed:
from datetime import datetime
fmt = "%Y-%m-%dT%H:%M:%SZ"
try:
dt_created = datetime.strptime(created, fmt)
dt_closed = datetime.strptime(closed, fmt)
delta_days = (dt_closed - dt_created).total_seconds() / 86400.0
except Exception:
delta_days = None
else:
delta_days = None
issue_turnaround[inum] = delta_days
pr_urls = issue.get("pr_urls",[])
pr_numbers = sorted({pr_number_from_url(u) for u in pr_urls if pr_number_from_url(u)})
types_for_issue = set()
for prn in pr_numbers:
sha = get_pr_head_sha(prn)
files = list_pr_files(prn)
touched_apps = set()
for f in files:
path = f.get("filename","")
if path.startswith("workload/"):
parts = path.split("/")
if len(parts) >= 2:
touched_apps.add("/".join(parts[:2]))
for app_dir in sorted(touched_apps):
types_for_issue |= collect_azurerm_types_for_app(app_dir, sha)
# collect module sources for dependency graph
# scan app tf files for module sources at PR head
tf_paths = list_repo_tf_files_under(app_dir, sha)
for p in tf_paths:
txt = get_file_at_sha(p, sha)
if not txt:
continue
for src in parse_module_sources_from_tf(txt):
local = normalize_local_module_path(src, app_dir)
if local:
module_deps.setdefault(app_dir, set()).add(local)
if types_for_issue:
issue_to_types[inum] = sorted(types_for_issue)
rows = []
for inum, types in issue_to_types.items():
for t in set(types):
rows.append({"issue": inum, "azurerm_type": t})
import pandas as pd
df = pd.DataFrame(rows)
df.to_csv("severity_data.csv", index=False)
ta_rows = []
for inum, days in issue_turnaround.items():
ta_rows.append({"issue": inum, "turnaround_days": days})
pd.DataFrame(ta_rows).to_csv("turnaround.csv", index=False)
with open("issue_to_azurerm_types.json","w") as f:
json.dump(issue_to_types, f, indent=2)
with open("issue_turnaround.json","w") as f:
json.dump(issue_turnaround, f, indent=2)
with open("module_deps.json","w") as f:
json.dump({k: sorted(list(v)) for k,v in module_deps.items()}, f, indent=2)
print(f"ISSUES_WITH_TYPES={len(issue_to_types)}")
PY
- name: Generate charts and markdown (severity, heatmap, trend, dependency, turnaround) and include AI summary
id: report
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
set -euo pipefail
python - <<'PY'
import os, json, datetime, glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
os.makedirs("history", exist_ok=True)
# --- Severity bar (existing) ---
if os.path.exists("severity_data.csv"):
df = pd.read_csv("severity_data.csv")
counts = df.groupby("azurerm_type")["issue"].nunique().sort_values(ascending=False)
else:
counts = pd.Series(dtype=int)
png_sev = f"history/severity-by-azurerm-{ts}.png"
plt.figure(figsize=(12,6))
if not counts.empty:
counts.plot(kind="bar")
plt.title("Issue frequency by azurerm resource type")
plt.xlabel("azurerm resource type")
plt.ylabel("number of closed issues touching type")
else:
plt.text(0.5, 0.5, "No azurerm-impacting issues in window", ha="center", va="center")
plt.axis("off")
plt.tight_layout()
plt.savefig(png_sev)
plt.clf()
# --- Heatmap: azurerm_type x issue (binary or counts) ---
heat_png = f"history/heatmap-azurerm-issues-{ts}.png"
if os.path.exists("severity_data.csv"):
mat = pd.read_csv("severity_data.csv")
if not mat.empty:
pivot = mat.pivot_table(index="azurerm_type", columns="issue", aggfunc='size', fill_value=0)
# Optionally cluster or sort by total counts
pivot['total'] = pivot.sum(axis=1)
pivot = pivot.sort_values('total', ascending=False).drop(columns=['total'])
# limit columns for readability (most recent/top issues)
if pivot.shape[1] > 100:
pivot = pivot.iloc[:, :100]
plt.figure(figsize=(14, max(6, 0.2 * pivot.shape[0])))
sns.heatmap(pivot, cmap="YlOrRd", cbar=True)
plt.title("Heatmap: azurerm resource types (rows) vs issues (columns)")
plt.xlabel("Issue number (truncated)")
plt.ylabel("azurerm resource type")
plt.tight_layout()
plt.savefig(heat_png)
plt.clf()
else:
plt.figure(figsize=(6,2))
plt.text(0.5,0.5,"No data for heatmap",ha="center",va="center")
plt.axis("off")
plt.savefig(heat_png)
plt.clf()
else:
plt.figure(figsize=(6,2))
plt.text(0.5,0.5,"No data for heatmap",ha="center",va="center")
plt.axis("off")
plt.savefig(heat_png)
plt.clf()
# --- Trend lines: aggregate historical severity_data.csv files in history/ ---
trend_png = f"history/trendlines-azurerm-{ts}.png"
# collect historical CSVs that match severity_data pattern
hist_files = sorted(glob.glob("history/*severity-data-*.csv") + glob.glob("history/*severity_data.csv") + glob.glob("history/*severity-by-azurerm-*.csv"))
# also include current run's severity_data.csv
if os.path.exists("severity_data.csv"):
hist_files.append("severity_data.csv")
# Build weekly counts per azurerm_type by deriving timestamp from filenames where possible
trend_df = pd.DataFrame()
for f in hist_files:
try:
# attempt to extract timestamp from filename
import re
m = re.search(r"(\d{8}-\d{6})", f)
ts_label = m.group(1) if m else os.path.getmtime(f)
tmp = pd.read_csv(f)
if tmp.empty:
continue
counts_tmp = tmp.groupby("azurerm_type")["issue"].nunique().rename(ts_label)
trend_df = pd.concat([trend_df, counts_tmp], axis=1)
except Exception:
continue
if not trend_df.empty:
trend_df = trend_df.fillna(0).T
# convert index to datetime where possible
try:
trend_df.index = pd.to_datetime(trend_df.index, format="%Y%m%d-%H%M%S", errors='coerce').fillna(pd.to_datetime(trend_df.index, unit='s'))
except Exception:
pass
plt.figure(figsize=(14,6))
# plot top N azurerm types by latest total
latest = trend_df.iloc[-1].sort_values(ascending=False).head(8).index.tolist()
for col in latest:
plt.plot(trend_df.index, trend_df[col], marker='o', label=col)
plt.legend(loc='best', fontsize='small')
plt.title("Trend lines: issue frequency over time for top azurerm types")
plt.xlabel("time")
plt.ylabel("issue count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(trend_png)
plt.clf()
else:
plt.figure(figsize=(8,2))
plt.text(0.5,0.5,"No historical data for trend lines",ha="center",va="center")
plt.axis("off")
plt.savefig(trend_png)
plt.clf()
# --- Dependency graph: build directed graph from module_deps.json ---
dep_png = f"history/dependency-graph-{ts}.png"
if os.path.exists("module_deps.json"):
with open("module_deps.json") as f:
deps = json.load(f)
G = nx.DiGraph()
# add edges app -> module
for app, mods in deps.items():
G.add_node(app, type='app')
for m in mods:
G.add_node(m, type='module')
G.add_edge(app, m)
if len(G.nodes) == 0:
plt.figure(figsize=(6,2))
plt.text(0.5,0.5,"No dependency data",ha="center",va="center")
plt.axis("off")
plt.savefig(dep_png)
plt.clf()
else:
plt.figure(figsize=(12,8))
pos = nx.spring_layout(G, k=0.5, iterations=50)
node_colors = ['#1f78b4' if G.nodes[n].get('type')=='app' else '#33a02c' for n in G.nodes()]
nx.draw_networkx_nodes(G, pos, node_size=600, node_color=node_colors)
nx.draw_networkx_edges(G, pos, arrows=True, arrowstyle='->', arrowsize=12, edge_color='#888888')
nx.draw_networkx_labels(G, pos, font_size=8)
plt.title("Module dependency graph (apps -> local modules)")
plt.axis('off')
plt.tight_layout()
plt.savefig(dep_png)
plt.clf()
else:
plt.figure(figsize=(6,2))
plt.text(0.5,0.5,"No dependency data",ha="center",va="center")
plt.axis("off")
plt.savefig(dep_png)
plt.clf()
# --- Turnaround chart (existing) ---
ta_png = f"history/turnaround-by-issue-{ts}.png"
if os.path.exists("turnaround.csv"):
ta = pd.read_csv("turnaround.csv")
ta = ta.dropna(subset=["turnaround_days"])
if not ta.empty:
ta_sorted = ta.sort_values("turnaround_days", ascending=False).head(50)
plt.figure(figsize=(12,6))
plt.bar(ta_sorted["issue"].astype(str), ta_sorted["turnaround_days"])
plt.xticks(rotation=90)
plt.title("Turnaround time (days) for closed issues in window")
plt.xlabel("Issue number")
plt.ylabel("Turnaround (days)")
plt.tight_layout()
plt.savefig(ta_png)
plt.clf()
else:
plt.figure(figsize=(8,2))
plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")
plt.axis("off")
plt.savefig(ta_png)
plt.clf()
else:
plt.figure(figsize=(8,2))
plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")
plt.axis("off")
plt.savefig(ta_png)
plt.clf()
# --- AI summary (who wants what) ---
if os.path.exists("issues.json"):
with open("issues.json") as f:
issues = json.load(f)
else:
issues = []
condensed = []
for i in issues:
condensed.append({
"number": i.get("number"),
"user": i.get("user"),
"title": i.get("title"),
"html_url": i.get("html_url")
})
with open("issues_for_ai.json","w") as f:
json.dump(condensed, f, indent=2)
# call OpenAI if key present (same approach as before)
import subprocess, os
OPENAI_KEY = os.environ.get("OPENAI_API_KEY")
ai_text = "AI summary skipped (no OPENAI_API_KEY)."
if OPENAI_KEY:
prompt = ("You are given a JSON array of GitHub issues with fields: number, user, title, html_url. "
"Produce a concise list of one-line 'who wants what' statements, one per issue, in plain text. "
"Format: '#<number> — <user> wants <succinct request derived from title>'. "
"Do not add commentary.")
payload = {
"model": "gpt-4o-mini",
"messages": [{"role":"system","content":"You are a concise summarizer."},
{"role":"user","content": prompt + "\\n\\nJSON:\\n" + json.dumps(condensed)[:15000]}],
"temperature":0.2,
"max_tokens":400
}
proc = subprocess.run([
"curl","-sS","https://api.openai.com/v1/chat/completions",
"-H", "Content-Type: application/json",
"-H", f"Authorization: Bearer {OPENAI_KEY}",
"-d", json.dumps(payload)
], capture_output=True, text=True)
if proc.returncode == 0 and proc.stdout:
try:
resp = json.loads(proc.stdout)
ai_text = resp["choices"][0]["message"]["content"].strip()
except Exception:
ai_text = "AI summary unavailable (parsing error)."
# --- Write markdown report combining all visuals ---
md_path = f"history/severity-report-{ts}.md"
with open(md_path, "w") as f:
f.write("# Weekly Terraform azurerm hotspot report\n\n")
f.write(f"**Window (days):** {os.environ.get('WINDOW_DAYS','7')}\n\n")
f.write("## AI Summary (who wants what)\n\n")
f.write("```\n")
f.write(ai_text + "\n")
f.write("```\n\n")
f.write("## Top azurerm resource types by issue frequency\n\n")
if not counts.empty:
f.write(" + ")\n\n")
f.write(counts.head(30).to_frame("issues").to_markdown() + "\n\n")
else:
f.write("No azurerm-impacting issues found in the selected window.\n\n")
f.write("## Heatmap: azurerm types vs issues\n\n")
f.write(" + ")\n\n")
f.write("## Trend lines: historical issue frequency for top azurerm types\n\n")
f.write(" + ")\n\n")
f.write("## Dependency graph: apps -> local modules\n\n")
f.write(" + ")\n\n")
f.write("## Turnaround time for closed issues (days)\n\n")
f.write(" + ")\n\n")
f.write("## Data artifacts\n\n")
f.write("- `severity_data.csv` — per-issue azurerm type mapping\n")
f.write("- `turnaround.csv` — per-issue turnaround in days\n")
f.write("- `issue_to_azurerm_types.json` — mapping used to build charts\n")
f.write("- `module_deps.json` — module dependency data used for graph\n")
# Save current CSVs into history with timestamp for future trend aggregation
try:
import shutil
if os.path.exists("severity_data.csv"):
shutil.copy("severity_data.csv", f"history/severity-data-{ts}.csv")
if os.path.exists("turnaround.csv"):
shutil.copy("turnaround.csv", f"history/turnaround-{ts}.csv")
except Exception:
pass
print(f"REPORT_MD={md_path}")
print(f"REPORT_PNG={png_sev}")
print(f"REPORT_HEAT={heat_png}")
print(f"REPORT_TREND={trend_png}")
print(f"REPORT_DEP={dep_png}")
print(f"REPORT_TA={ta_png}")
PY
- name: Add report files to history and commit via PR
id: create_pr
uses: peter-evans/create-pull-request@v6
with:
commit-message: "Add weekly Terraform azurerm hotspot report and advanced visuals (prune to last 10)"
title: "Weekly Terraform azurerm hotspot report"
body: |
This PR adds the latest weekly azurerm hotspot report and charts under `history/`.
The workflow prunes older reports to keep at most 10 report sets.
branch: "weekly-terraform-azurerm-hotspots"
base: "main"
path: "history"
- name: Prune history to max 10 report sets (post-commit)
if: steps.create_pr.outcome == 'success'
run: |
python - <<'PY'
import os, re
from pathlib import Path
hist = Path("history")
hist.mkdir(exist_ok=True)
groups = {}
for p in hist.iterdir():
m = re.search(r"(\d{8}-\d{6})", p.name)
if not m:
continue
ts = m.group(1)
groups.setdefault(ts, []).append(p)
timestamps = sorted(groups.keys(), reverse=True)
keep = set(timestamps[:10])
drop = [p for ts, files in groups.items() if ts not in keep for p in files]
for p in drop:
try:
p.unlink()
except Exception:
pass
print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")
PY
- name: Notify runbook webhook (which will send az communication email)
if: steps.create_pr.outcome == 'success'
env:
RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}
PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}
WINDOW_DAYS: ${{ env.WINDOW_DAYS }}
run: |
payload=$(jq -n \
--arg pr "$PR_URL" \
--arg window "$WINDOW_DAYS" \
'{subject: ("Weekly Terraform azurerm hotspot report - " + $window + "d"), body: ("A new weekly azurerm hotspot report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')
curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \
-H "Content-Type: application/json" \
-d "$payload"
- name: Output artifact list
if: always()
run: |
echo "Generated files in history/:"
ls -la history || true