This is a continuation of a previous article for context. Here we extend that workflow to include the following capabilities in the report generated:
- LLM generated summary of “who asks what”
- Turn-around time bar chart for issues
- Flexible sampling window as input parameter.
Yaml now follows:
--
name: Weekly Terraform azurerm hotspot report with AI summary and turnaround chart
on:
workflow_dispatch:
inputs:
window_days:
description: "Number of days back to collect closed issues (integer)"
required: false
default: "7"
schedule:
- cron: "0 13 * * MON"
permissions:
contents: write
pull-requests: write
issues: read
jobs:
report:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests pandas matplotlib python-hcl2
- name: Prepare environment variables
id: env
run: |
echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV
echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV
- name: Fetch closed issues and linked PRs (window)
id: fetch
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ env.REPO }}
WINDOW_DAYS: ${{ env.WINDOW_DAYS }}
run: |
python - <<'PY'
import os, requests, json, datetime, re
REPO = os.environ["REPO"]
TOKEN = os.environ["GH_TOKEN"]
WINDOW_DAYS = int(os.environ.get("WINDOW_DAYS","7"))
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}
since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"
def gh_get(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
r.raise_for_status()
return r.json()
issues_url = f"https://api.github.com/repos/{REPO}/issues"
params = {"state":"closed","since":since,"per_page":100}
items = gh_get(issues_url, params=params)
issues = []
for i in items:
if "pull_request" in i:
continue
# fetch comments to find any PR links
comments = gh_get(i["comments_url"], params={"per_page":100})
pr_urls = set()
for c in comments:
body = c.get("body","") or ""
for m in re.findall(r"https://github\.com/[^/\s]+/[^/\s]+/pull/\d+", body):
pr_urls.add(m)
for m in re.findall(r"(?:^|\s)#(\d+)\b", body):
pr_urls.add(f"https://github.com/{REPO}/pull/{m}")
issues.append({
"number": i["number"],
"title": i.get("title",""),
"user": i.get("user",{}).get("login",""),
"created_at": i.get("created_at"),
"closed_at": i.get("closed_at"),
"html_url": i.get("html_url"),
"comments": [{"id":c.get("id"), "body":c.get("body",""), "created_at":c.get("created_at")} for c in comments],
"pr_urls": sorted(pr_urls)
})
with open("issues.json","w") as f:
json.dump(issues, f, indent=2)
print(f"WROTE_ISSUES={len(issues)}")
PY
- name: Resolve PRs, collect touched workload apps and azurerm types
id: analyze
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ env.REPO }}
run: |
python - <<'PY'
import os, json, re, requests, subprocess
import hcl2
from collections import defaultdict
REPO = os.environ["REPO"]
TOKEN = os.environ["GH_TOKEN"]
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}
def gh_get(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
r.raise_for_status()
return r.json()
def gh_get_text(url):
r = requests.get(url, headers=HEADERS)
r.raise_for_status()
return r.text
def pr_number_from_url(u):
m = re.search(r"/pull/(\d+)", u)
return int(m.group(1)) if m else None
def list_pr_files(pr_number):
url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}/files"
files = []
page = 1
while True:
batch = gh_get(url, params={"per_page":100,"page":page})
if not batch:
break
files.extend(batch)
page += 1
return files
def get_pr_head_sha(pr_number):
url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}"
pr = gh_get(url)
return pr["head"]["sha"]
def get_file_at_sha(path, sha):
url = f"https://api.github.com/repos/{REPO}/contents/{path}"
r = requests.get(url, headers=HEADERS, params={"ref": sha})
if r.status_code == 404:
return None
r.raise_for_status()
data = r.json()
if isinstance(data, dict) and data.get("type") == "file" and data.get("download_url"):
return gh_get_text(data["download_url"])
return None
def parse_azurerm_resource_types_from_tf(tf_text):
types = set()
try:
obj = hcl2.loads(tf_text)
except Exception:
return types
res = obj.get("resource", [])
if isinstance(res, list):
for item in res:
if isinstance(item, dict):
for rtype in item.keys():
if isinstance(rtype, str) and rtype.startswith("azurerm_"):
types.add(rtype)
elif isinstance(res, dict):
for rtype in res.keys():
if isinstance(rtype, str) and rtype.startswith("azurerm_"):
types.add(rtype)
return types
def parse_module_sources_from_tf(tf_text):
sources = set()
try:
obj = hcl2.loads(tf_text)
except Exception:
return sources
mods = obj.get("module", [])
if isinstance(mods, list):
for item in mods:
if isinstance(item, dict):
for _, body in item.items():
if isinstance(body, dict):
src = body.get("source")
if isinstance(src, str):
sources.add(src)
elif isinstance(mods, dict):
for _, body in mods.items():
if isinstance(body, dict):
src = body.get("source")
if isinstance(src, str):
sources.add(src)
return sources
def normalize_local_module_path(source, app_dir):
if source.startswith("./") or source.startswith("../"):
import posixpath
return posixpath.normpath(posixpath.join(app_dir, source))
return None
def list_repo_tf_files_under(dir_path, sha):
try:
out = subprocess.check_output(["git","ls-tree","-r","--name-only",sha,dir_path], text=True)
paths = [p.strip() for p in out.splitlines() if p.strip().endswith(".tf")]
return paths
except Exception:
return []
def collect_azurerm_types_for_app(app_dir, sha):
az_types = set()
module_dirs = set()
tf_paths = list_repo_tf_files_under(app_dir, sha)
for p in tf_paths:
txt = get_file_at_sha(p, sha)
if not txt:
continue
az_types |= parse_azurerm_resource_types_from_tf(txt)
for src in parse_module_sources_from_tf(txt):
local = normalize_local_module_path(src, app_dir)
if local:
module_dirs.add(local)
for mdir in sorted(module_dirs):
m_tf_paths = list_repo_tf_files_under(mdir, sha)
for p in m_tf_paths:
txt = get_file_at_sha(p, sha)
if not txt:
continue
az_types |= parse_azurerm_resource_types_from_tf(txt)
return az_types
with open("issues.json") as f:
issues = json.load(f)
issue_to_types = {}
issue_turnaround = {} # issue -> days (float)
for issue in issues:
inum = issue["number"]
created = issue.get("created_at")
closed = issue.get("closed_at")
if created and closed:
from datetime import datetime
fmt = "%Y-%m-%dT%H:%M:%SZ"
try:
dt_created = datetime.strptime(created, fmt)
dt_closed = datetime.strptime(closed, fmt)
delta_days = (dt_closed - dt_created).total_seconds() / 86400.0
except Exception:
delta_days = None
else:
delta_days = None
issue_turnaround[inum] = delta_days
pr_urls = issue.get("pr_urls",[])
pr_numbers = sorted({pr_number_from_url(u) for u in pr_urls if pr_number_from_url(u)})
types_for_issue = set()
for prn in pr_numbers:
sha = get_pr_head_sha(prn)
files = list_pr_files(prn)
touched_apps = set()
for f in files:
path = f.get("filename","")
if path.startswith("workload/"):
parts = path.split("/")
if len(parts) >= 2:
touched_apps.add("/".join(parts[:2]))
for app_dir in sorted(touched_apps):
types_for_issue |= collect_azurerm_types_for_app(app_dir, sha)
if types_for_issue:
issue_to_types[inum] = sorted(types_for_issue)
# Build CSV rows: issue, azurerm_type
rows = []
for inum, types in issue_to_types.items():
for t in set(types):
rows.append({"issue": inum, "azurerm_type": t})
import pandas as pd
df = pd.DataFrame(rows)
df.to_csv("severity_data.csv", index=False)
# Turnaround CSV
ta_rows = []
for inum, days in issue_turnaround.items():
ta_rows.append({"issue": inum, "turnaround_days": days})
pd.DataFrame(ta_rows).to_csv("turnaround.csv", index=False)
with open("issue_to_azurerm_types.json","w") as f:
json.dump(issue_to_types, f, indent=2)
with open("issue_turnaround.json","w") as f:
json.dump(issue_turnaround, f, indent=2)
print(f"ISSUES_WITH_TYPES={len(issue_to_types)}")
PY
- name: Generate charts and markdown (severity + turnaround) and include AI summary
id: report
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
set -euo pipefail
python - <<'PY'
import os, json, datetime
import pandas as pd
import matplotlib.pyplot as plt
ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
os.makedirs("history", exist_ok=True)
# Severity chart (issue frequency by azurerm type)
if os.path.exists("severity_data.csv"):
df = pd.read_csv("severity_data.csv")
if not df.empty:
counts = df.groupby("azurerm_type")["issue"].nunique().sort_values(ascending=False)
else:
counts = pd.Series(dtype=int)
else:
counts = pd.Series(dtype=int)
png_sev = f"history/severity-by-azurerm-{ts}.png"
plt.figure(figsize=(12,6))
if not counts.empty:
counts.plot(kind="bar")
plt.title("Issue frequency by azurerm resource type")
plt.xlabel("azurerm resource type")
plt.ylabel("number of closed issues touching type")
else:
plt.text(0.5, 0.5, "No azurerm-impacting issues in window", ha="center", va="center")
plt.axis("off")
plt.tight_layout()
plt.savefig(png_sev)
plt.clf()
# Turnaround bar chart for issues closed in window
ta_png = f"history/turnaround-by-issue-{ts}.png"
if os.path.exists("turnaround.csv"):
ta = pd.read_csv("turnaround.csv")
ta = ta.dropna(subset=["turnaround_days"])
if not ta.empty:
# sort by turnaround descending for visibility
ta_sorted = ta.sort_values("turnaround_days", ascending=False).head(50)
plt.figure(figsize=(12,6))
plt.bar(ta_sorted["issue"].astype(str), ta_sorted["turnaround_days"])
plt.xticks(rotation=90)
plt.title("Turnaround time (days) for closed issues in window")
plt.xlabel("Issue number")
plt.ylabel("Turnaround (days)")
plt.tight_layout()
plt.savefig(ta_png)
plt.clf()
else:
plt.figure(figsize=(8,2))
plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")
plt.axis("off")
plt.savefig(ta_png)
plt.clf()
else:
plt.figure(figsize=(8,2))
plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")
plt.axis("off")
plt.savefig(ta_png)
plt.clf()
# Prepare condensed issues JSON for AI: one-line per issue with number, user, title
if os.path.exists("issues.json"):
with open("issues.json") as f:
issues = json.load(f)
else:
issues = []
condensed = []
for i in issues:
condensed.append({
"number": i.get("number"),
"user": i.get("user"),
"title": i.get("title"),
"html_url": i.get("html_url")
})
with open("issues_for_ai.json","w") as f:
json.dump(condensed, f, indent=2)
# Call OpenAI Chat to produce "who wants what" one-liners
import subprocess, shlex, sys
ai_input = json.dumps(condensed)[:15000] # truncate to avoid huge payloads
prompt = (
"You are given a JSON array of GitHub issues with fields: number, user, title, html_url. "
"Produce a concise list of one-line 'who wants what' statements, one per issue, in plain text. "
"Format: '#<number> — <user> wants <succinct request derived from title>'. "
"Do not add commentary. If the title is ambiguous, produce a best-effort short paraphrase."
f"\n\nJSON:\n{ai_input}"
)
# Use curl to call OpenAI Chat Completions (Chat API). Adjust model as appropriate.
import os, json, subprocess
OPENAI_KEY = os.environ.get("OPENAI_API_KEY")
if OPENAI_KEY:
payload = {
"model": "gpt-4o-mini",
"messages": [{"role":"system","content":"You are a concise summarizer."},
{"role":"user","content":prompt}],
"temperature":0.2,
"max_tokens":400
}
proc = subprocess.run([
"curl","-sS","https://api.openai.com/v1/chat/completions",
"-H", "Content-Type: application/json",
"-H", f"Authorization: Bearer {OPENAI_KEY}",
"-d", json.dumps(payload)
], capture_output=True, text=True)
if proc.returncode == 0 and proc.stdout:
try:
resp = json.loads(proc.stdout)
ai_text = resp["choices"][0]["message"]["content"].strip()
except Exception:
ai_text = "AI summary unavailable (parsing error)."
else:
ai_text = "AI summary unavailable (request failed)."
else:
ai_text = "AI summary skipped (no OPENAI_API_KEY)."
# Write markdown report combining charts and AI summary
md_path = f"history/severity-report-{ts}.md"
with open(md_path, "w") as f:
f.write("# Weekly Terraform azurerm hotspot report\n\n")
f.write(f"**Window (days):** {os.environ.get('WINDOW_DAYS','7')}\n\n")
f.write("## AI Summary (who wants what)\n\n")
f.write("```\n")
f.write(ai_text + "\n")
f.write("```\n\n")
f.write("## Top azurerm resource types by issue frequency\n\n")
if not counts.empty:
f.write(" + ")\n\n")
f.write(counts.head(30).to_frame("issues").to_markdown() + "\n\n")
else:
f.write("No azurerm-impacting issues found in the selected window.\n\n")
f.write("## Turnaround time for closed issues (days)\n\n")
f.write(" + ")\n\n")
f.write("## Data artifacts\n\n")
f.write("- `severity_data.csv` — per-issue azurerm type mapping\n")
f.write("- `turnaround.csv` — per-issue turnaround in days\n")
# expose outputs
print(f"REPORT_MD={md_path}")
print(f"REPORT_PNG={png_sev}")
print(f"REPORT_TA_PNG={ta_png}")
PY
- name: Add report files to history and commit via PR
id: create_pr
uses: peter-evans/create-pull-request@v6
with:
commit-message: "Add weekly Terraform azurerm hotspot report and charts (prune to last 10)"
title: "Weekly Terraform azurerm hotspot report"
body: |
This PR adds the latest weekly azurerm hotspot report and charts under `history/`.
The workflow prunes older reports to keep at most 10 report sets.
branch: "weekly-terraform-azurerm-hotspots"
base: "main"
path: "history"
- name: Prune history to max 10 report sets (post-commit)
if: steps.create_pr.outcome == 'success'
run: |
python - <<'PY'
import os, re
from pathlib import Path
hist = Path("history")
hist.mkdir(exist_ok=True)
pat = re.compile(r"^severity-by-azurerm-(\d{8}-\d{6})\.(md|png)$|^severity-report-(\d{8}-\d{6})\.md$|^turnaround-by-issue-(\d{8}-\d{6})\.(md|png)$")
# Collect timestamps by grouping filenames that contain the same timestamp
groups = {}
for p in hist.iterdir():
m = re.search(r"(\d{8}-\d{6})", p.name)
if not m:
continue
ts = m.group(1)
groups.setdefault(ts, []).append(p)
timestamps = sorted(groups.keys(), reverse=True)
keep = set(timestamps[:10])
drop = [p for ts, files in groups.items() if ts not in keep for p in files]
for p in drop:
try:
p.unlink()
except Exception:
pass
print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")
PY
- name: Notify runbook webhook (which will send az communication email)
if: steps.create_pr.outcome == 'success'
env:
RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}
PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}
WINDOW_DAYS: ${{ env.WINDOW_DAYS }}
run: |
# The runbook webhook is expected to accept a JSON payload and perform the az communication email send.
# Adjust payload keys to match your runbook's expected schema.
payload=$(jq -n \
--arg pr "$PR_URL" \
--arg window "$WINDOW_DAYS" \
'{subject: ("Weekly Terraform azurerm hotspot report - " + $window + "d"), body: ("A new weekly azurerm hotspot report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')
curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \
-H "Content-Type: application/json" \
-d "$payload"
- name: Output artifact list
if: always()
run: |
echo "Generated files in history/:"
ls -la history || true