This extends the same workflow we discussed in the previous 3 articles to include the following but for pull-requests:
• Heatmaps use the same issue‑to‑type matrix
• Trend lines use the same CSVs once history is accumulated
• Dependency graphs using the already implemented module‑source parsing
The above three are typical of custom GitHub dashboards and are well-known for being eye-candies.
Yaml now follows:
---
name: Weekly Terraform PR analytics with AI summary and advanced visuals
on:
workflow_dispatch:
inputs:
window_days:
description: "Number of days back to collect closed PRs (integer)"
required: false
default: "7"
schedule:
- cron: "0 14 * * MON" # 6am Pacific, staggered from issue workflow
permissions:
contents: write
pull-requests: read
issues: read
jobs:
pr_report:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests pandas matplotlib seaborn networkx python-hcl2
- name: Prepare environment variables
run: |
echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV
echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV
###########################################################################
# 1. FETCH CLOSED PRs WITHIN WINDOW
###########################################################################
- name: Fetch closed PRs (window)
id: fetch
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ env.REPO }}
WINDOW_DAYS: ${{ env.WINDOW_DAYS }}
run: |
python << 'PY'
import os, json, datetime, requests
REPO = os.environ["REPO"]
TOKEN = os.environ["GH_TOKEN"]
WINDOW_DAYS = int(os.environ["WINDOW_DAYS"])
HEADERS = {"Authorization": f"Bearer {TOKEN}"}
since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"
def gh(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
r.raise_for_status()
return r.json()
prs = []
page = 1
while True:
batch = gh(
f"https://api.github.com/repos/{REPO}/pulls",
{"state":"closed","per_page":100,"page":page}
)
if not batch:
break
for pr in batch:
if pr.get("merged_at") and pr["merged_at"] >= since:
prs.append({
"number": pr["number"],
"title": pr["title"],
"user": pr["user"]["login"],
"created_at": pr["created_at"],
"merged_at": pr["merged_at"],
"html_url": pr["html_url"]
})
page += 1
with open("prs.json","w") as f:
json.dump(prs, f, indent=2)
print(f"FOUND_PRS={len(prs)}")
PY
###########################################################################
# 2. ANALYZE PRs: MODULES, AZURERM TYPES, TURNAROUND, REVIEW LATENCY
###########################################################################
- name: Analyze PRs for Terraform impact
id: analyze
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ env.REPO }}
run: |
python << 'PY'
import os, json, re, requests, subprocess
import hcl2
from datetime import datetime
REPO = os.environ["REPO"]
TOKEN = os.environ["GH_TOKEN"]
HEADERS = {"Authorization": f"Bearer {TOKEN}"}
def gh(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
r.raise_for_status()
return r.json()
def gh_text(url):
r = requests.get(url, headers=HEADERS)
r.raise_for_status()
return r.text
def list_pr_files(pr):
files = []
page = 1
while True:
batch = gh(
f"https://api.github.com/repos/{REPO}/pulls/{pr}/files",
{"per_page":100,"page":page}
)
if not batch:
break
files.extend(batch)
page += 1
return files
def get_pr_reviews(pr):
return gh(f"https://api.github.com/repos/{REPO}/pulls/{pr}/reviews")
def get_file_at_sha(path, sha):
r = requests.get(
f"https://api.github.com/repos/{REPO}/contents/{path}",
headers=HEADERS,
params={"ref": sha}
)
if r.status_code == 404:
return None
data = r.json()
if data.get("download_url"):
return gh_text(data["download_url"])
return None
def parse_azurerm(tf):
try:
obj = hcl2.loads(tf)
except:
return set()
out = set()
res = obj.get("resource", {})
if isinstance(res, dict):
for rtype in res.keys():
if rtype.startswith("azurerm_"):
out.add(rtype)
return out
def parse_modules(tf):
try:
obj = hcl2.loads(tf)
except:
return set()
out = set()
mods = obj.get("module", {})
if isinstance(mods, dict):
for _, body in mods.items():
src = body.get("source")
if isinstance(src, str):
out.add(src)
return out
def normalize_local(src, app):
if src.startswith("./") or src.startswith("../"):
import posixpath
return posixpath.normpath(posixpath.join(app, src))
return None
def list_tf(dir, sha):
try:
out = subprocess.check_output(
["git","ls-tree","-r","--name-only",sha,dir],
text=True
)
return [p for p in out.splitlines() if p.endswith(".tf")]
except:
return []
with open("prs.json") as f:
prs = json.load(f)
pr_to_types = {}
pr_turnaround = {}
pr_review_latency = {}
module_deps = {}
for pr in prs:
num = pr["number"]
created = pr["created_at"]
merged = pr["merged_at"]
fmt = "%Y-%m-%dT%H:%M:%SZ"
dtc = datetime.strptime(created, fmt)
dtm = datetime.strptime(merged, fmt)
pr_turnaround[num] = (dtm - dtc).total_seconds() / 86400.0
reviews = get_pr_reviews(num)
if reviews:
first = min(r["submitted_at"] for r in reviews if r.get("submitted_at"))
pr_review_latency[num] = (
datetime.strptime(first, fmt) - dtc
).total_seconds() / 86400.0
else:
pr_review_latency[num] = None
files = list_pr_files(num)
sha = gh(f"https://api.github.com/repos/{REPO}/pulls/{num}")["head"]["sha"]
touched_apps = set()
for f in files:
path = f["filename"]
if path.startswith("workload/"):
parts = path.split("/")
if len(parts) >= 2:
touched_apps.add("/".join(parts[:2]))
types = set()
for app in touched_apps:
tf_paths = list_tf(app, sha)
for p in tf_paths:
txt = get_file_at_sha(p, sha)
if not txt:
continue
types |= parse_azurerm(txt)
for src in parse_modules(txt):
local = normalize_local(src, app)
if local:
module_deps.setdefault(app, set()).add(local)
if types:
pr_to_types[num] = sorted(types)
import pandas as pd
rows = []
for pr, types in pr_to_types.items():
for t in types:
rows.append({"pr": pr, "azurerm_type": t})
pd.DataFrame(rows).to_csv("pr_severity_data.csv", index=False)
pd.DataFrame(
[{"pr":k,"turnaround_days":v} for k,v in pr_turnaround.items()]
).to_csv("pr_turnaround.csv", index=False)
pd.DataFrame(
[{"pr":k,"review_latency_days":v} for k,v in pr_review_latency.items()]
).to_csv("pr_review_latency.csv", index=False)
with open("pr_module_deps.json","w") as f:
json.dump({k: sorted(list(v)) for k,v in module_deps.items()}, f, indent=2)
PY
###########################################################################
# 3. GENERATE CHARTS (heatmap, trend, dependency, turnaround, review latency)
###########################################################################
- name: Generate PR charts and markdown
id: charts
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
python << 'PY'
import os, json, datetime, glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
os.makedirs("history/pr", exist_ok=True)
# Load severity data
if os.path.exists("pr_severity_data.csv"):
df = pd.read_csv("pr_severity_data.csv")
counts = df.groupby("azurerm_type")["pr"].nunique().sort_values(ascending=False)
else:
counts = pd.Series(dtype=int)
# Severity bar
sev_png = f"history/pr/pr-severity-{ts}.png"
plt.figure(figsize=(12,6))
if not counts.empty:
counts.plot(kind="bar")
plt.title("PR frequency by azurerm resource type")
plt.xlabel("azurerm resource type")
plt.ylabel("number of PRs touching type")
else:
plt.text(0.5,0.5,"No PR data",ha="center",va="center")
plt.axis("off")
plt.tight_layout()
plt.savefig(sev_png)
plt.clf()
# Heatmap
heat_png = f"history/pr/pr-heatmap-{ts}.png"
if os.path.exists("pr_severity_data.csv"):
mat = pd.read_csv("pr_severity_data.csv")
if not mat.empty:
pivot = mat.pivot_table(index="azurerm_type", columns="pr", aggfunc='size', fill_value=0)
pivot['total'] = pivot.sum(axis=1)
pivot = pivot.sort_values('total', ascending=False).drop(columns=['total'])
if pivot.shape[1] > 100:
pivot = pivot.iloc[:, :100]
plt.figure(figsize=(14, max(6, 0.2*pivot.shape[0])))
sns.heatmap(pivot, cmap="YlGnBu")
plt.title("Heatmap: azurerm types vs PRs")
plt.tight_layout()
plt.savefig(heat_png)
plt.clf()
# Trend lines
trend_png = f"history/pr/pr-trend-{ts}.png"
hist_files = glob.glob("history/pr/pr-severity-*.csv")
if os.path.exists("pr_severity_data.csv"):
hist_files.append("pr_severity_data.csv")
trend_df = pd.DataFrame()
for f in hist_files:
try:
import re
m = re.search(r"(\d{8}-\d{6})", f)
label = m.group(1) if m else os.path.getmtime(f)
tmp = pd.read_csv(f)
if tmp.empty:
continue
counts_tmp = tmp.groupby("azurerm_type")["pr"].nunique().rename(label)
trend_df = pd.concat([trend_df, counts_tmp], axis=1)
except:
pass
if not trend_df.empty:
trend_df = trend_df.fillna(0).T
try:
trend_df.index = pd.to_datetime(trend_df.index, format="%Y%m%d-%H%M%S", errors='coerce')
except:
pass
plt.figure(figsize=(14,6))
latest = trend_df.iloc[-1].sort_values(ascending=False).head(8).index.tolist()
for col in latest:
plt.plot(trend_df.index, trend_df[col], marker='o', label=col)
plt.legend()
plt.title("Trend lines: PR frequency over time")
plt.tight_layout()
plt.savefig(trend_png)
plt.clf()
# Dependency graph
dep_png = f"history/pr/pr-deps-{ts}.png"
if os.path.exists("pr_module_deps.json"):
with open("pr_module_deps.json") as f:
deps = json.load(f)
G = nx.DiGraph()
for app, mods in deps.items():
G.add_node(app, type='app')
for m in mods:
G.add_node(m, type='module')
G.add_edge(app, m)
plt.figure(figsize=(12,8))
pos = nx.spring_layout(G, k=0.5)
colors = ['#1f78b4' if G.nodes[n]['type']=='app' else '#33a02c' for n in G.nodes()]
nx.draw(G, pos, with_labels=True, node_color=colors, node_size=600, arrows=True)
plt.title("PR module dependency graph")
plt.tight_layout()
plt.savefig(dep_png)
plt.clf()
# Turnaround chart
ta_png = f"history/pr/pr-turnaround-{ts}.png"
if os.path.exists("pr_turnaround.csv"):
ta = pd.read_csv("pr_turnaround.csv").dropna()
ta = ta.sort_values("turnaround_days", ascending=False).head(50)
plt.figure(figsize=(12,6))
plt.bar(ta["pr"].astype(str), ta["turnaround_days"])
plt.xticks(rotation=90)
plt.title("PR turnaround time (days)")
plt.tight_layout()
plt.savefig(ta_png)
plt.clf()
# Review latency chart
rl_png = f"history/pr/pr-review-latency-{ts}.png"
if os.path.exists("pr_review_latency.csv"):
rl = pd.read_csv("pr_review_latency.csv").dropna()
rl = rl.sort_values("review_latency_days", ascending=False).head(50)
plt.figure(figsize=(12,6))
plt.bar(rl["pr"].astype(str), rl["review_latency_days"])
plt.xticks(rotation=90)
plt.title("PR review latency (days)")
plt.tight_layout()
plt.savefig(rl_png)
plt.clf()
# AI summary
if os.path.exists("prs.json"):
prs = json.load(open("prs.json"))
else:
prs = []
condensed = [
{"pr": p["number"], "user": p["user"], "title": p["title"], "url": p["html_url"]}
for p in prs
]
ai_text = "AI summary skipped."
if os.environ.get("OPENAI_API_KEY"):
import subprocess
prompt = (
"Summarize each PR as a one-line 'who changed what' statement. "
"Format: '#<pr> — <user> changed <succinct summary>'."
)
payload = {
"model": "gpt-4o-mini",
"messages": [
{"role":"system","content":"You summarize PRs concisely."},
{"role":"user","content": prompt + "\n\n" + json.dumps(condensed)[:15000]}
],
"temperature":0.2
}
proc = subprocess.run(
["curl","-sS","https://api.openai.com/v1/chat/completions",
"-H","Content-Type: application/json",
"-H",f"Authorization: Bearer {os.environ['OPENAI_API_KEY']}",
"-d",json.dumps(payload)],
capture_output=True, text=True
)
try:
ai_text = json.loads(proc.stdout)["choices"][0]["message"]["content"]
except:
pass
# Markdown report
md_path = f"history/pr/pr-report-{ts}.md"
with open(md_path, "w") as f:
f.write("# Weekly Terraform PR analytics report\n\n")
f.write(f"**Window (days):** {os.environ['WINDOW_DAYS']}\n\n")
f.write("## AI Summary (who changed what)\n\n```\n")
f.write(ai_text + "\n```\n\n")
f.write("## PR frequency by azurerm type\n\n")
f.write(f"})\n\n")
f.write("## Heatmap: azurerm types vs PRs\n\n")
f.write(f"})\n\n")
f.write("## Trend lines\n\n")
f.write(f"})\n\n")
f.write("## Dependency graph\n\n")
f.write(f"})\n\n")
f.write("## PR turnaround time\n\n")
f.write(f"})\n\n")
f.write("## PR review latency\n\n")
f.write(f"})\n\n")
f.write("## Data artifacts\n\n")
f.write("- `pr_severity_data.csv` — per-PR azurerm type mapping\n")
f.write("- `pr_turnaround.csv` — per-PR turnaround in days\n")
f.write("- `pr_review_latency.csv` — per-PR review latency in days\n")
f.write("- `pr_module_deps.json` — module dependency data used for graph\n")
# Save CSVs into history for future trend aggregation
import shutil
if os.path.exists("pr_severity_data.csv"):
shutil.copy("pr_severity_data.csv", f"history/pr/pr-severity-{ts}.csv")
if os.path.exists("pr_turnaround.csv"):
shutil.copy("pr_turnaround.csv", f"history/pr/pr-turnaround-{ts}.csv")
if os.path.exists("pr_review_latency.csv"):
shutil.copy("pr_review_latency.csv", f"history/pr/pr-review-latency-{ts}.csv")
print(f"PR_REPORT_MD={md_path}")
print(f"PR_REPORT_SEV={sev_png}")
print(f"PR_REPORT_HEAT={heat_png}")
print(f"PR_REPORT_TREND={trend_png}")
print(f"PR_REPORT_DEP={dep_png}")
print(f"PR_REPORT_TA={ta_png}")
print(f"PR_REPORT_RL={rl_png}")
PY
###########################################################################
# 4. CREATE PR WITH REPORT AND PRUNE HISTORY
###########################################################################
- name: Create PR for PR analytics report
id: create_pr
uses: peter-evans/create-pull-request@v6
with:
commit-message: "Add weekly Terraform PR analytics report and visuals (prune to last 10)"
title: "Weekly Terraform PR analytics report"
body: |
This PR adds the latest weekly PR analytics report and charts under `history/pr/`.
The workflow prunes older reports to keep at most 10 report sets.
branch: "weekly-terraform-pr-analytics"
base: "main"
path: "history/pr"
- name: Prune PR history to max 10 report sets
if: steps.create_pr.outcome == 'success'
run: |
python << 'PY'
import re
from pathlib import Path
hist = Path("history/pr")
hist.mkdir(parents=True, exist_ok=True)
groups = {}
for p in hist.iterdir():
m = re.search(r"(\d{8}-\d{6})", p.name)
if not m:
continue
ts = m.group(1)
groups.setdefault(ts, []).append(p)
timestamps = sorted(groups.keys(), reverse=True)
keep = set(timestamps[:10])
drop = [p for ts, files in groups.items() if ts not in keep for p in files]
for p in drop:
try:
p.unlink()
except Exception:
pass
print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")
PY
###########################################################################
# 5. NOTIFY RUNBOOK WEBHOOK (OPTIONAL EMAIL VIA AZ COMMUNICATION)
###########################################################################
- name: Notify runbook webhook for PR analytics
if: steps.create_pr.outcome == 'success'
env:
RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}
PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}
WINDOW_DAYS: ${{ env.WINDOW_DAYS }}
run: |
payload=$(jq -n \
--arg pr "$PR_URL" \
--arg window "$WINDOW_DAYS" \
'{subject: ("Weekly Terraform PR analytics report - " + $window + "d"), body: ("A new weekly PR analytics report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')
curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \
-H "Content-Type: application/json" \
-d "$payload"
- name: Output PR analytics artifacts
if: always()
run: |
echo "Generated files in history/pr/:"
ls -la history/pr || true
No comments:
Post a Comment