Friday, March 13, 2026

 This extends the same workflow we discussed in the previous 3 articles to include the following but for pull-requests:

• Heatmaps use the same issue‑to‑type matrix

• Trend lines use the same CSVs once history is accumulated

• Dependency graphs using the already implemented module‑source parsing

The above three are typical of custom GitHub dashboards and are well-known for being eye-candies.

Yaml now follows:

---

name: Weekly Terraform PR analytics with AI summary and advanced visuals

on:

  workflow_dispatch:

    inputs:

      window_days:

        description: "Number of days back to collect closed PRs (integer)"

        required: false

        default: "7"

  schedule:

    - cron: "0 14 * * MON" # 6am Pacific, staggered from issue workflow

permissions:

  contents: write

  pull-requests: read

  issues: read

jobs:

  pr_report:

    runs-on: ubuntu-latest

steps:

  - name: Check out repository

    uses: actions/checkout@v4

    with:

      fetch-depth: 0

  - name: Set up Python

    uses: actions/setup-python@v5

    with:

      python-version: "3.11"

  - name: Install dependencies

    run: |

      python -m pip install --upgrade pip

      pip install requests pandas matplotlib seaborn networkx python-hcl2

  - name: Prepare environment variables

    run: |

      echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV

      echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV

  ###########################################################################

  # 1. FETCH CLOSED PRs WITHIN WINDOW

  ###########################################################################

  - name: Fetch closed PRs (window)

    id: fetch

    env:

      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      REPO: ${{ env.REPO }}

      WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

    run: |

      python << 'PY'

      import os, json, datetime, requests

      REPO = os.environ["REPO"]

      TOKEN = os.environ["GH_TOKEN"]

      WINDOW_DAYS = int(os.environ["WINDOW_DAYS"])

      HEADERS = {"Authorization": f"Bearer {TOKEN}"}

      since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"

      def gh(url, params=None):

        r = requests.get(url, headers=HEADERS, params=params)

        r.raise_for_status()

        return r.json()

      prs = []

      page = 1

      while True:

        batch = gh(

          f"https://api.github.com/repos/{REPO}/pulls",

          {"state":"closed","per_page":100,"page":page}

        )

        if not batch:

          break

        for pr in batch:

          if pr.get("merged_at") and pr["merged_at"] >= since:

            prs.append({

              "number": pr["number"],

              "title": pr["title"],

              "user": pr["user"]["login"],

              "created_at": pr["created_at"],

              "merged_at": pr["merged_at"],

              "html_url": pr["html_url"]

            })

        page += 1

      with open("prs.json","w") as f:

        json.dump(prs, f, indent=2)

      print(f"FOUND_PRS={len(prs)}")

      PY

  ###########################################################################

  # 2. ANALYZE PRs: MODULES, AZURERM TYPES, TURNAROUND, REVIEW LATENCY

  ###########################################################################

  - name: Analyze PRs for Terraform impact

    id: analyze

    env:

      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      REPO: ${{ env.REPO }}

    run: |

      python << 'PY'

      import os, json, re, requests, subprocess

      import hcl2

      from datetime import datetime

      REPO = os.environ["REPO"]

      TOKEN = os.environ["GH_TOKEN"]

      HEADERS = {"Authorization": f"Bearer {TOKEN}"}

      def gh(url, params=None):

        r = requests.get(url, headers=HEADERS, params=params)

        r.raise_for_status()

        return r.json()

      def gh_text(url):

        r = requests.get(url, headers=HEADERS)

        r.raise_for_status()

        return r.text

      def list_pr_files(pr):

        files = []

        page = 1

        while True:

          batch = gh(

            f"https://api.github.com/repos/{REPO}/pulls/{pr}/files",

            {"per_page":100,"page":page}

          )

          if not batch:

            break

          files.extend(batch)

          page += 1

        return files

      def get_pr_reviews(pr):

        return gh(f"https://api.github.com/repos/{REPO}/pulls/{pr}/reviews")

      def get_file_at_sha(path, sha):

        r = requests.get(

          f"https://api.github.com/repos/{REPO}/contents/{path}",

          headers=HEADERS,

          params={"ref": sha}

        )

        if r.status_code == 404:

          return None

        data = r.json()

        if data.get("download_url"):

          return gh_text(data["download_url"])

        return None

      def parse_azurerm(tf):

        try:

          obj = hcl2.loads(tf)

        except:

          return set()

        out = set()

        res = obj.get("resource", {})

        if isinstance(res, dict):

          for rtype in res.keys():

            if rtype.startswith("azurerm_"):

              out.add(rtype)

        return out

      def parse_modules(tf):

        try:

          obj = hcl2.loads(tf)

        except:

          return set()

        out = set()

        mods = obj.get("module", {})

        if isinstance(mods, dict):

          for _, body in mods.items():

            src = body.get("source")

            if isinstance(src, str):

              out.add(src)

        return out

      def normalize_local(src, app):

        if src.startswith("./") or src.startswith("../"):

          import posixpath

          return posixpath.normpath(posixpath.join(app, src))

        return None

      def list_tf(dir, sha):

        try:

          out = subprocess.check_output(

            ["git","ls-tree","-r","--name-only",sha,dir],

            text=True

          )

          return [p for p in out.splitlines() if p.endswith(".tf")]

        except:

          return []

      with open("prs.json") as f:

        prs = json.load(f)

      pr_to_types = {}

      pr_turnaround = {}

      pr_review_latency = {}

      module_deps = {}

      for pr in prs:

        num = pr["number"]

        created = pr["created_at"]

        merged = pr["merged_at"]

        fmt = "%Y-%m-%dT%H:%M:%SZ"

        dtc = datetime.strptime(created, fmt)

        dtm = datetime.strptime(merged, fmt)

        pr_turnaround[num] = (dtm - dtc).total_seconds() / 86400.0

        reviews = get_pr_reviews(num)

        if reviews:

          first = min(r["submitted_at"] for r in reviews if r.get("submitted_at"))

          pr_review_latency[num] = (

            datetime.strptime(first, fmt) - dtc

          ).total_seconds() / 86400.0

        else:

          pr_review_latency[num] = None

        files = list_pr_files(num)

        sha = gh(f"https://api.github.com/repos/{REPO}/pulls/{num}")["head"]["sha"]

        touched_apps = set()

        for f in files:

          path = f["filename"]

          if path.startswith("workload/"):

            parts = path.split("/")

            if len(parts) >= 2:

              touched_apps.add("/".join(parts[:2]))

        types = set()

        for app in touched_apps:

          tf_paths = list_tf(app, sha)

          for p in tf_paths:

            txt = get_file_at_sha(p, sha)

            if not txt:

              continue

            types |= parse_azurerm(txt)

            for src in parse_modules(txt):

              local = normalize_local(src, app)

              if local:

                module_deps.setdefault(app, set()).add(local)

        if types:

          pr_to_types[num] = sorted(types)

      import pandas as pd

      rows = []

      for pr, types in pr_to_types.items():

        for t in types:

          rows.append({"pr": pr, "azurerm_type": t})

      pd.DataFrame(rows).to_csv("pr_severity_data.csv", index=False)

      pd.DataFrame(

        [{"pr":k,"turnaround_days":v} for k,v in pr_turnaround.items()]

      ).to_csv("pr_turnaround.csv", index=False)

      pd.DataFrame(

        [{"pr":k,"review_latency_days":v} for k,v in pr_review_latency.items()]

      ).to_csv("pr_review_latency.csv", index=False)

      with open("pr_module_deps.json","w") as f:

        json.dump({k: sorted(list(v)) for k,v in module_deps.items()}, f, indent=2)

      PY

  ###########################################################################

  # 3. GENERATE CHARTS (heatmap, trend, dependency, turnaround, review latency)

  ###########################################################################

  - name: Generate PR charts and markdown

    id: charts

    env:

      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

    run: |

      python << 'PY'

      import os, json, datetime, glob

      import pandas as pd

      import matplotlib.pyplot as plt

      import seaborn as sns

      import networkx as nx

      ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")

      os.makedirs("history/pr", exist_ok=True)

      # Load severity data

      if os.path.exists("pr_severity_data.csv"):

        df = pd.read_csv("pr_severity_data.csv")

        counts = df.groupby("azurerm_type")["pr"].nunique().sort_values(ascending=False)

      else:

        counts = pd.Series(dtype=int)

      # Severity bar

      sev_png = f"history/pr/pr-severity-{ts}.png"

      plt.figure(figsize=(12,6))

      if not counts.empty:

        counts.plot(kind="bar")

        plt.title("PR frequency by azurerm resource type")

        plt.xlabel("azurerm resource type")

        plt.ylabel("number of PRs touching type")

      else:

        plt.text(0.5,0.5,"No PR data",ha="center",va="center")

        plt.axis("off")

      plt.tight_layout()

      plt.savefig(sev_png)

      plt.clf()

      # Heatmap

      heat_png = f"history/pr/pr-heatmap-{ts}.png"

      if os.path.exists("pr_severity_data.csv"):

        mat = pd.read_csv("pr_severity_data.csv")

        if not mat.empty:

          pivot = mat.pivot_table(index="azurerm_type", columns="pr", aggfunc='size', fill_value=0)

          pivot['total'] = pivot.sum(axis=1)

          pivot = pivot.sort_values('total', ascending=False).drop(columns=['total'])

          if pivot.shape[1] > 100:

            pivot = pivot.iloc[:, :100]

          plt.figure(figsize=(14, max(6, 0.2*pivot.shape[0])))

          sns.heatmap(pivot, cmap="YlGnBu")

          plt.title("Heatmap: azurerm types vs PRs")

          plt.tight_layout()

          plt.savefig(heat_png)

          plt.clf()

      # Trend lines

      trend_png = f"history/pr/pr-trend-{ts}.png"

      hist_files = glob.glob("history/pr/pr-severity-*.csv")

      if os.path.exists("pr_severity_data.csv"):

        hist_files.append("pr_severity_data.csv")

      trend_df = pd.DataFrame()

      for f in hist_files:

        try:

          import re

          m = re.search(r"(\d{8}-\d{6})", f)

          label = m.group(1) if m else os.path.getmtime(f)

          tmp = pd.read_csv(f)

          if tmp.empty:

            continue

          counts_tmp = tmp.groupby("azurerm_type")["pr"].nunique().rename(label)

          trend_df = pd.concat([trend_df, counts_tmp], axis=1)

        except:

          pass

      if not trend_df.empty:

        trend_df = trend_df.fillna(0).T

        try:

          trend_df.index = pd.to_datetime(trend_df.index, format="%Y%m%d-%H%M%S", errors='coerce')

        except:

          pass

        plt.figure(figsize=(14,6))

        latest = trend_df.iloc[-1].sort_values(ascending=False).head(8).index.tolist()

        for col in latest:

          plt.plot(trend_df.index, trend_df[col], marker='o', label=col)

        plt.legend()

        plt.title("Trend lines: PR frequency over time")

        plt.tight_layout()

        plt.savefig(trend_png)

        plt.clf()

      # Dependency graph

      dep_png = f"history/pr/pr-deps-{ts}.png"

      if os.path.exists("pr_module_deps.json"):

        with open("pr_module_deps.json") as f:

          deps = json.load(f)

        G = nx.DiGraph()

        for app, mods in deps.items():

          G.add_node(app, type='app')

          for m in mods:

            G.add_node(m, type='module')

            G.add_edge(app, m)

        plt.figure(figsize=(12,8))

        pos = nx.spring_layout(G, k=0.5)

        colors = ['#1f78b4' if G.nodes[n]['type']=='app' else '#33a02c' for n in G.nodes()]

        nx.draw(G, pos, with_labels=True, node_color=colors, node_size=600, arrows=True)

        plt.title("PR module dependency graph")

        plt.tight_layout()

        plt.savefig(dep_png)

        plt.clf()

      # Turnaround chart

      ta_png = f"history/pr/pr-turnaround-{ts}.png"

      if os.path.exists("pr_turnaround.csv"):

        ta = pd.read_csv("pr_turnaround.csv").dropna()

        ta = ta.sort_values("turnaround_days", ascending=False).head(50)

        plt.figure(figsize=(12,6))

        plt.bar(ta["pr"].astype(str), ta["turnaround_days"])

        plt.xticks(rotation=90)

        plt.title("PR turnaround time (days)")

        plt.tight_layout()

        plt.savefig(ta_png)

        plt.clf()

      # Review latency chart

      rl_png = f"history/pr/pr-review-latency-{ts}.png"

      if os.path.exists("pr_review_latency.csv"):

        rl = pd.read_csv("pr_review_latency.csv").dropna()

        rl = rl.sort_values("review_latency_days", ascending=False).head(50)

        plt.figure(figsize=(12,6))

        plt.bar(rl["pr"].astype(str), rl["review_latency_days"])

        plt.xticks(rotation=90)

        plt.title("PR review latency (days)")

        plt.tight_layout()

        plt.savefig(rl_png)

        plt.clf()

      # AI summary

      if os.path.exists("prs.json"):

        prs = json.load(open("prs.json"))

      else:

        prs = []

      condensed = [

        {"pr": p["number"], "user": p["user"], "title": p["title"], "url": p["html_url"]}

        for p in prs

      ]

      ai_text = "AI summary skipped."

      if os.environ.get("OPENAI_API_KEY"):

        import subprocess

        prompt = (

          "Summarize each PR as a one-line 'who changed what' statement. "

          "Format: '#<pr> — <user> changed <succinct summary>'."

        )

        payload = {

          "model": "gpt-4o-mini",

          "messages": [

            {"role":"system","content":"You summarize PRs concisely."},

            {"role":"user","content": prompt + "\n\n" + json.dumps(condensed)[:15000]}

          ],

          "temperature":0.2

        }

        proc = subprocess.run(

          ["curl","-sS","https://api.openai.com/v1/chat/completions",

           "-H","Content-Type: application/json",

           "-H",f"Authorization: Bearer {os.environ['OPENAI_API_KEY']}",

           "-d",json.dumps(payload)],

          capture_output=True, text=True

        )

        try:

          ai_text = json.loads(proc.stdout)["choices"][0]["message"]["content"]

        except:

          pass

          # Markdown report

          md_path = f"history/pr/pr-report-{ts}.md"

          with open(md_path, "w") as f:

            f.write("# Weekly Terraform PR analytics report\n\n")

            f.write(f"**Window (days):** {os.environ['WINDOW_DAYS']}\n\n")

            f.write("## AI Summary (who changed what)\n\n```\n")

            f.write(ai_text + "\n```\n\n")

            f.write("## PR frequency by azurerm type\n\n")

            f.write(f"![]({os.path.basename(sev_png)})\n\n")

            f.write("## Heatmap: azurerm types vs PRs\n\n")

            f.write(f"![]({os.path.basename(heat_png)})\n\n")

            f.write("## Trend lines\n\n")

            f.write(f"![]({os.path.basename(trend_png)})\n\n")

            f.write("## Dependency graph\n\n")

            f.write(f"![]({os.path.basename(dep_png)})\n\n")

            f.write("## PR turnaround time\n\n")

            f.write(f"![]({os.path.basename(ta_png)})\n\n")

            f.write("## PR review latency\n\n")

            f.write(f"![]({os.path.basename(rl_png)})\n\n")

            f.write("## Data artifacts\n\n")

            f.write("- `pr_severity_data.csv` — per-PR azurerm type mapping\n")

            f.write("- `pr_turnaround.csv` — per-PR turnaround in days\n")

            f.write("- `pr_review_latency.csv` — per-PR review latency in days\n")

            f.write("- `pr_module_deps.json` — module dependency data used for graph\n")


          # Save CSVs into history for future trend aggregation

          import shutil

          if os.path.exists("pr_severity_data.csv"):

            shutil.copy("pr_severity_data.csv", f"history/pr/pr-severity-{ts}.csv")

          if os.path.exists("pr_turnaround.csv"):

            shutil.copy("pr_turnaround.csv", f"history/pr/pr-turnaround-{ts}.csv")

          if os.path.exists("pr_review_latency.csv"):

            shutil.copy("pr_review_latency.csv", f"history/pr/pr-review-latency-{ts}.csv")


          print(f"PR_REPORT_MD={md_path}")

          print(f"PR_REPORT_SEV={sev_png}")

          print(f"PR_REPORT_HEAT={heat_png}")

          print(f"PR_REPORT_TREND={trend_png}")

          print(f"PR_REPORT_DEP={dep_png}")

          print(f"PR_REPORT_TA={ta_png}")

          print(f"PR_REPORT_RL={rl_png}")

          PY


      ###########################################################################

      # 4. CREATE PR WITH REPORT AND PRUNE HISTORY

      ###########################################################################

      - name: Create PR for PR analytics report

        id: create_pr

        uses: peter-evans/create-pull-request@v6

        with:

          commit-message: "Add weekly Terraform PR analytics report and visuals (prune to last 10)"

          title: "Weekly Terraform PR analytics report"

          body: |

            This PR adds the latest weekly PR analytics report and charts under `history/pr/`.

            The workflow prunes older reports to keep at most 10 report sets.

          branch: "weekly-terraform-pr-analytics"

          base: "main"

          path: "history/pr"


      - name: Prune PR history to max 10 report sets

        if: steps.create_pr.outcome == 'success'

        run: |

          python << 'PY'

          import re

          from pathlib import Path


          hist = Path("history/pr")

          hist.mkdir(parents=True, exist_ok=True)


          groups = {}

          for p in hist.iterdir():

            m = re.search(r"(\d{8}-\d{6})", p.name)

            if not m:

              continue

            ts = m.group(1)

            groups.setdefault(ts, []).append(p)


          timestamps = sorted(groups.keys(), reverse=True)

          keep = set(timestamps[:10])

          drop = [p for ts, files in groups.items() if ts not in keep for p in files]


          for p in drop:

            try:

              p.unlink()

            except Exception:

              pass


          print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")

          PY


      ###########################################################################

      # 5. NOTIFY RUNBOOK WEBHOOK (OPTIONAL EMAIL VIA AZ COMMUNICATION)

      ###########################################################################

      - name: Notify runbook webhook for PR analytics

        if: steps.create_pr.outcome == 'success'

        env:

          RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}

          PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}

          WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

        run: |

          payload=$(jq -n \

            --arg pr "$PR_URL" \

            --arg window "$WINDOW_DAYS" \

            '{subject: ("Weekly Terraform PR analytics report - " + $window + "d"), body: ("A new weekly PR analytics report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')

          curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \

            -H "Content-Type: application/json" \

            -d "$payload"


      - name: Output PR analytics artifacts

        if: always()

        run: |

          echo "Generated files in history/pr/:"

          ls -la history/pr || true


No comments:

Post a Comment