Tuesday, March 10, 2026

 This is a continuation of a previous article for context. Here we extend that workflow to include the following capabilities in the report generated:

- LLM generated summary of “who asks what”

- Turn-around time bar chart for issues

- Flexible sampling window as input parameter.


Yaml now follows:

--

name: Weekly Terraform azurerm hotspot report with AI summary and turnaround chart

on:

  workflow_dispatch:

    inputs:

      window_days:

        description: "Number of days back to collect closed issues (integer)"

        required: false

        default: "7"

  schedule:

    - cron: "0 13 * * MON"

permissions:

  contents: write

  pull-requests: write

  issues: read

jobs:

  report:

    runs-on: ubuntu-latest

    steps:

      - name: Check out repository

        uses: actions/checkout@v4

        with:

          fetch-depth: 0

      - name: Set up Python

        uses: actions/setup-python@v5

        with:

          python-version: "3.11"

      - name: Install dependencies

        run: |

          python -m pip install --upgrade pip

          pip install requests pandas matplotlib python-hcl2

      - name: Prepare environment variables

        id: env

        run: |

          echo "WINDOW_DAYS=${{ github.event.inputs.window_days || '7' }}" >> $GITHUB_ENV

          echo "REPO=${GITHUB_REPOSITORY}" >> $GITHUB_ENV

      - name: Fetch closed issues and linked PRs (window)

        id: fetch

        env:

          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

          REPO: ${{ env.REPO }}

          WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

        run: |

          python - <<'PY'

          import os, requests, json, datetime, re

          REPO = os.environ["REPO"]

          TOKEN = os.environ["GH_TOKEN"]

          WINDOW_DAYS = int(os.environ.get("WINDOW_DAYS","7"))

          HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}

          since = (datetime.datetime.utcnow() - datetime.timedelta(days=WINDOW_DAYS)).isoformat() + "Z"

          def gh_get(url, params=None):

            r = requests.get(url, headers=HEADERS, params=params)

            r.raise_for_status()

            return r.json()

          issues_url = f"https://api.github.com/repos/{REPO}/issues"

          params = {"state":"closed","since":since,"per_page":100}

          items = gh_get(issues_url, params=params)

          issues = []

          for i in items:

            if "pull_request" in i:

              continue

            # fetch comments to find any PR links

            comments = gh_get(i["comments_url"], params={"per_page":100})

            pr_urls = set()

            for c in comments:

              body = c.get("body","") or ""

              for m in re.findall(r"https://github\.com/[^/\s]+/[^/\s]+/pull/\d+", body):

                pr_urls.add(m)

              for m in re.findall(r"(?:^|\s)#(\d+)\b", body):

                pr_urls.add(f"https://github.com/{REPO}/pull/{m}")

            issues.append({

              "number": i["number"],

              "title": i.get("title",""),

              "user": i.get("user",{}).get("login",""),

              "created_at": i.get("created_at"),

              "closed_at": i.get("closed_at"),

              "html_url": i.get("html_url"),

              "comments": [{"id":c.get("id"), "body":c.get("body",""), "created_at":c.get("created_at")} for c in comments],

              "pr_urls": sorted(pr_urls)

            })

          with open("issues.json","w") as f:

            json.dump(issues, f, indent=2)

          print(f"WROTE_ISSUES={len(issues)}")

          PY

      - name: Resolve PRs, collect touched workload apps and azurerm types

        id: analyze

        env:

          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

          REPO: ${{ env.REPO }}

        run: |

          python - <<'PY'

          import os, json, re, requests, subprocess

          import hcl2

          from collections import defaultdict

          REPO = os.environ["REPO"]

          TOKEN = os.environ["GH_TOKEN"]

          HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}

          def gh_get(url, params=None):

            r = requests.get(url, headers=HEADERS, params=params)

            r.raise_for_status()

            return r.json()

          def gh_get_text(url):

            r = requests.get(url, headers=HEADERS)

            r.raise_for_status()

            return r.text

          def pr_number_from_url(u):

            m = re.search(r"/pull/(\d+)", u)

            return int(m.group(1)) if m else None

          def list_pr_files(pr_number):

            url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}/files"

            files = []

            page = 1

            while True:

              batch = gh_get(url, params={"per_page":100,"page":page})

              if not batch:

                break

              files.extend(batch)

              page += 1

            return files

          def get_pr_head_sha(pr_number):

            url = f"https://api.github.com/repos/{REPO}/pulls/{pr_number}"

            pr = gh_get(url)

            return pr["head"]["sha"]

          def get_file_at_sha(path, sha):

            url = f"https://api.github.com/repos/{REPO}/contents/{path}"

            r = requests.get(url, headers=HEADERS, params={"ref": sha})

            if r.status_code == 404:

              return None

            r.raise_for_status()

            data = r.json()

            if isinstance(data, dict) and data.get("type") == "file" and data.get("download_url"):

              return gh_get_text(data["download_url"])

            return None

          def parse_azurerm_resource_types_from_tf(tf_text):

            types = set()

            try:

              obj = hcl2.loads(tf_text)

            except Exception:

              return types

            res = obj.get("resource", [])

            if isinstance(res, list):

              for item in res:

                if isinstance(item, dict):

                  for rtype in item.keys():

                    if isinstance(rtype, str) and rtype.startswith("azurerm_"):

                      types.add(rtype)

            elif isinstance(res, dict):

              for rtype in res.keys():

                if isinstance(rtype, str) and rtype.startswith("azurerm_"):

                  types.add(rtype)

            return types

          def parse_module_sources_from_tf(tf_text):

            sources = set()

            try:

              obj = hcl2.loads(tf_text)

            except Exception:

              return sources

            mods = obj.get("module", [])

            if isinstance(mods, list):

              for item in mods:

                if isinstance(item, dict):

                  for _, body in item.items():

                    if isinstance(body, dict):

                      src = body.get("source")

                      if isinstance(src, str):

                        sources.add(src)

            elif isinstance(mods, dict):

              for _, body in mods.items():

                if isinstance(body, dict):

                  src = body.get("source")

                  if isinstance(src, str):

                    sources.add(src)

            return sources

          def normalize_local_module_path(source, app_dir):

            if source.startswith("./") or source.startswith("../"):

              import posixpath

              return posixpath.normpath(posixpath.join(app_dir, source))

            return None

          def list_repo_tf_files_under(dir_path, sha):

            try:

              out = subprocess.check_output(["git","ls-tree","-r","--name-only",sha,dir_path], text=True)

              paths = [p.strip() for p in out.splitlines() if p.strip().endswith(".tf")]

              return paths

            except Exception:

              return []

          def collect_azurerm_types_for_app(app_dir, sha):

            az_types = set()

            module_dirs = set()

            tf_paths = list_repo_tf_files_under(app_dir, sha)

            for p in tf_paths:

              txt = get_file_at_sha(p, sha)

              if not txt:

                continue

              az_types |= parse_azurerm_resource_types_from_tf(txt)

              for src in parse_module_sources_from_tf(txt):

                local = normalize_local_module_path(src, app_dir)

                if local:

                  module_dirs.add(local)

            for mdir in sorted(module_dirs):

              m_tf_paths = list_repo_tf_files_under(mdir, sha)

              for p in m_tf_paths:

                txt = get_file_at_sha(p, sha)

                if not txt:

                  continue

                az_types |= parse_azurerm_resource_types_from_tf(txt)

            return az_types

          with open("issues.json") as f:

            issues = json.load(f)

          issue_to_types = {}

          issue_turnaround = {} # issue -> days (float)

          for issue in issues:

            inum = issue["number"]

            created = issue.get("created_at")

            closed = issue.get("closed_at")

            if created and closed:

              from datetime import datetime

              fmt = "%Y-%m-%dT%H:%M:%SZ"

              try:

                dt_created = datetime.strptime(created, fmt)

                dt_closed = datetime.strptime(closed, fmt)

                delta_days = (dt_closed - dt_created).total_seconds() / 86400.0

              except Exception:

                delta_days = None

            else:

              delta_days = None

            issue_turnaround[inum] = delta_days

            pr_urls = issue.get("pr_urls",[])

            pr_numbers = sorted({pr_number_from_url(u) for u in pr_urls if pr_number_from_url(u)})

            types_for_issue = set()

            for prn in pr_numbers:

              sha = get_pr_head_sha(prn)

              files = list_pr_files(prn)

              touched_apps = set()

              for f in files:

                path = f.get("filename","")

                if path.startswith("workload/"):

                  parts = path.split("/")

                  if len(parts) >= 2:

                    touched_apps.add("/".join(parts[:2]))

              for app_dir in sorted(touched_apps):

                types_for_issue |= collect_azurerm_types_for_app(app_dir, sha)

            if types_for_issue:

              issue_to_types[inum] = sorted(types_for_issue)

          # Build CSV rows: issue, azurerm_type

          rows = []

          for inum, types in issue_to_types.items():

            for t in set(types):

              rows.append({"issue": inum, "azurerm_type": t})

          import pandas as pd

          df = pd.DataFrame(rows)

          df.to_csv("severity_data.csv", index=False)

          # Turnaround CSV

          ta_rows = []

          for inum, days in issue_turnaround.items():

            ta_rows.append({"issue": inum, "turnaround_days": days})

          pd.DataFrame(ta_rows).to_csv("turnaround.csv", index=False)

          with open("issue_to_azurerm_types.json","w") as f:

            json.dump(issue_to_types, f, indent=2)

          with open("issue_turnaround.json","w") as f:

            json.dump(issue_turnaround, f, indent=2)

          print(f"ISSUES_WITH_TYPES={len(issue_to_types)}")

          PY

      - name: Generate charts and markdown (severity + turnaround) and include AI summary

        id: report

        env:

          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

        run: |

          set -euo pipefail

          python - <<'PY'

          import os, json, datetime

          import pandas as pd

          import matplotlib.pyplot as plt

          ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")

          os.makedirs("history", exist_ok=True)

          # Severity chart (issue frequency by azurerm type)

          if os.path.exists("severity_data.csv"):

            df = pd.read_csv("severity_data.csv")

            if not df.empty:

              counts = df.groupby("azurerm_type")["issue"].nunique().sort_values(ascending=False)

            else:

              counts = pd.Series(dtype=int)

          else:

            counts = pd.Series(dtype=int)

          png_sev = f"history/severity-by-azurerm-{ts}.png"

          plt.figure(figsize=(12,6))

          if not counts.empty:

            counts.plot(kind="bar")

            plt.title("Issue frequency by azurerm resource type")

            plt.xlabel("azurerm resource type")

            plt.ylabel("number of closed issues touching type")

          else:

            plt.text(0.5, 0.5, "No azurerm-impacting issues in window", ha="center", va="center")

            plt.axis("off")

          plt.tight_layout()

          plt.savefig(png_sev)

          plt.clf()

          # Turnaround bar chart for issues closed in window

          ta_png = f"history/turnaround-by-issue-{ts}.png"

          if os.path.exists("turnaround.csv"):

            ta = pd.read_csv("turnaround.csv")

            ta = ta.dropna(subset=["turnaround_days"])

            if not ta.empty:

              # sort by turnaround descending for visibility

              ta_sorted = ta.sort_values("turnaround_days", ascending=False).head(50)

              plt.figure(figsize=(12,6))

              plt.bar(ta_sorted["issue"].astype(str), ta_sorted["turnaround_days"])

              plt.xticks(rotation=90)

              plt.title("Turnaround time (days) for closed issues in window")

              plt.xlabel("Issue number")

              plt.ylabel("Turnaround (days)")

              plt.tight_layout()

              plt.savefig(ta_png)

              plt.clf()

            else:

              plt.figure(figsize=(8,2))

              plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")

              plt.axis("off")

              plt.savefig(ta_png)

              plt.clf()

          else:

            plt.figure(figsize=(8,2))

            plt.text(0.5,0.5,"No turnaround data available",ha="center",va="center")

            plt.axis("off")

            plt.savefig(ta_png)

            plt.clf()

          # Prepare condensed issues JSON for AI: one-line per issue with number, user, title

          if os.path.exists("issues.json"):

            with open("issues.json") as f:

              issues = json.load(f)

          else:

            issues = []

          condensed = []

          for i in issues:

            condensed.append({

              "number": i.get("number"),

              "user": i.get("user"),

              "title": i.get("title"),

              "html_url": i.get("html_url")

            })

          with open("issues_for_ai.json","w") as f:

            json.dump(condensed, f, indent=2)

          # Call OpenAI Chat to produce "who wants what" one-liners

          import subprocess, shlex, sys

          ai_input = json.dumps(condensed)[:15000] # truncate to avoid huge payloads

          prompt = (

            "You are given a JSON array of GitHub issues with fields: number, user, title, html_url. "

            "Produce a concise list of one-line 'who wants what' statements, one per issue, in plain text. "

            "Format: '#<number> — <user> wants <succinct request derived from title>'. "

            "Do not add commentary. If the title is ambiguous, produce a best-effort short paraphrase."

            f"\n\nJSON:\n{ai_input}"

          )

          # Use curl to call OpenAI Chat Completions (Chat API). Adjust model as appropriate.

          import os, json, subprocess

          OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

          if OPENAI_KEY:

            payload = {

              "model": "gpt-4o-mini",

              "messages": [{"role":"system","content":"You are a concise summarizer."},

                           {"role":"user","content":prompt}],

              "temperature":0.2,

              "max_tokens":400

            }

            proc = subprocess.run([

              "curl","-sS","https://api.openai.com/v1/chat/completions",

              "-H", "Content-Type: application/json",

              "-H", f"Authorization: Bearer {OPENAI_KEY}",

              "-d", json.dumps(payload)

            ], capture_output=True, text=True)

            if proc.returncode == 0 and proc.stdout:

              try:

                resp = json.loads(proc.stdout)

                ai_text = resp["choices"][0]["message"]["content"].strip()

              except Exception:

                ai_text = "AI summary unavailable (parsing error)."

            else:

              ai_text = "AI summary unavailable (request failed)."

          else:

            ai_text = "AI summary skipped (no OPENAI_API_KEY)."

          # Write markdown report combining charts and AI summary

          md_path = f"history/severity-report-{ts}.md"

          with open(md_path, "w") as f:

            f.write("# Weekly Terraform azurerm hotspot report\n\n")

            f.write(f"**Window (days):** {os.environ.get('WINDOW_DAYS','7')}\n\n")

            f.write("## AI Summary (who wants what)\n\n")

            f.write("```\n")

            f.write(ai_text + "\n")

            f.write("```\n\n")

            f.write("## Top azurerm resource types by issue frequency\n\n")

            if not counts.empty:

              f.write("![" + os.path.basename(png_sev) + "](" + os.path.basename(png_sev) + ")\n\n")

              f.write(counts.head(30).to_frame("issues").to_markdown() + "\n\n")

            else:

              f.write("No azurerm-impacting issues found in the selected window.\n\n")

            f.write("## Turnaround time for closed issues (days)\n\n")

            f.write("![" + os.path.basename(ta_png) + "](" + os.path.basename(ta_png) + ")\n\n")

            f.write("## Data artifacts\n\n")

            f.write("- `severity_data.csv` — per-issue azurerm type mapping\n")

            f.write("- `turnaround.csv` — per-issue turnaround in days\n")

          # expose outputs

          print(f"REPORT_MD={md_path}")

          print(f"REPORT_PNG={png_sev}")

          print(f"REPORT_TA_PNG={ta_png}")

          PY

      - name: Add report files to history and commit via PR

        id: create_pr

        uses: peter-evans/create-pull-request@v6

        with:

          commit-message: "Add weekly Terraform azurerm hotspot report and charts (prune to last 10)"

          title: "Weekly Terraform azurerm hotspot report"

          body: |

            This PR adds the latest weekly azurerm hotspot report and charts under `history/`.

            The workflow prunes older reports to keep at most 10 report sets.

          branch: "weekly-terraform-azurerm-hotspots"

          base: "main"

          path: "history"

      - name: Prune history to max 10 report sets (post-commit)

        if: steps.create_pr.outcome == 'success'

        run: |

          python - <<'PY'

          import os, re

          from pathlib import Path

          hist = Path("history")

          hist.mkdir(exist_ok=True)

          pat = re.compile(r"^severity-by-azurerm-(\d{8}-\d{6})\.(md|png)$|^severity-report-(\d{8}-\d{6})\.md$|^turnaround-by-issue-(\d{8}-\d{6})\.(md|png)$")

          # Collect timestamps by grouping filenames that contain the same timestamp

          groups = {}

          for p in hist.iterdir():

            m = re.search(r"(\d{8}-\d{6})", p.name)

            if not m:

              continue

            ts = m.group(1)

            groups.setdefault(ts, []).append(p)

          timestamps = sorted(groups.keys(), reverse=True)

          keep = set(timestamps[:10])

          drop = [p for ts, files in groups.items() if ts not in keep for p in files]

          for p in drop:

            try:

              p.unlink()

            except Exception:

              pass

          print(f"Pruned {len(drop)} files; kept {len(keep)} report sets.")

          PY

      - name: Notify runbook webhook (which will send az communication email)

        if: steps.create_pr.outcome == 'success'

        env:

          RUNBOOK_WEBHOOK_URL: ${{ secrets.RUNBOOK_WEBHOOK_URL }}

          PR_URL: ${{ steps.create_pr.outputs.pull-request-url }}

          WINDOW_DAYS: ${{ env.WINDOW_DAYS }}

        run: |

          # The runbook webhook is expected to accept a JSON payload and perform the az communication email send.

          # Adjust payload keys to match your runbook's expected schema.

          payload=$(jq -n \

            --arg pr "$PR_URL" \

            --arg window "$WINDOW_DAYS" \

            '{subject: ("Weekly Terraform azurerm hotspot report - " + $window + "d"), body: ("A new weekly azurerm hotspot report has been generated. Review the PR: " + $pr), pr_url: $pr, window_days: $window}')

          curl -sS -X POST "$RUNBOOK_WEBHOOK_URL" \

            -H "Content-Type: application/json" \

            -d "$payload"

      - name: Output artifact list

        if: always()

        run: |

          echo "Generated files in history/:"

          ls -la history || true


No comments:

Post a Comment