When we discussed an end-to-end GitHub Actions Workflow that
demonstrates the creation and usage of “Issues Dashboard” for periodic insights
into the customer experience with the repository assets yesterday, we didn't
bring up some of the pesky and troublesome tasks such as finding all pull
request references on a GitHub issue regardless of whether they appear in
comments or events of type “mentioned” or “cross-referenced” or whether are in
the closure of the issue. This provides those details in the sample python
implementation below.
Python now follows:
import os, re, json, datetime, requests
import hcl2
import pandas as pd
REPO = os.environ["GITHUB_REPOSITORY"]
GH_TOKEN = os.environ["GH_TOKEN"]
HEADERS = {"Authorization": f"Bearer {GH_TOKEN}", "Accept": "application/vnd.github+json, application/vnd.github.mockingbird-preview+json", "X-GitHub-Api-Version": "2026-03-10"}
# ---- Time window (last 7 days) ----
since = (datetime.datetime.utcnow() - datetime.timedelta(days=7)).isoformat() + "Z"
# ---- Helpers ----
def gh_get(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
r.raise_for_status()
return r.json()
def gh_get_text(url):
r = requests.get(url, headers=HEADERS)
r.raise_for_status()
return r.text
def list_closed_issues():
# Issues API returns both issues and PRs; filter out PRs.
url = f"https://api.github.com/repos/{REPO}/issues"
items = gh_get(url, params={"state":"closed","since":since,"per_page":100})
return [i for i in items if "pull_request" not in i]
PR_HTML_URL_RE = re.compile(
r"https?://github\.com/(?P<owner>[^/\s]+)/(?P<repo>[^/\s]+)/pull/(?P<num>\d+)",
re.IGNORECASE,
)
PR_API_URL_RE = re.compile(
r"https?://api\.github\.com/repos/(?P<owner>[^/\s]+)/(?P<repo>[^/\s]+)/pulls/(?P<num>\d+)",
re.IGNORECASE,
)
# Shorthand references that might appear in text:
# - #123 (assumed to be same repo)
# - owner/repo#123 (explicit cross-repo)
SHORTHAND_SAME_REPO_RE = re.compile(r"(?<!\w)#(?P<num>\d+)\b")
SHORTHAND_CROSS_REPO_RE = re.compile(
r"(?P<owner>[A-Za-z0-9_.-]+)/(?P<repo>[A-Za-z0-9_.-]+)#(?P<num>\d+)\b"
)
def _normalize_html_pr_url(owner: str, repo: str, num: int) -> str:
return f"https://github.com/{owner}/{repo}/pull/{int(num)}"
def _collect_from_text(text: str, default_owner: str, default_repo: str) -> set:
"""Extract candidate PR URLs from free text (body/comments/events text)."""
found = set()
if not text:
return found
# 1) Direct HTML PR URLs
for m in PR_HTML_URL_RE.finditer(text):
found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))
# 2) API PR URLs -> convert to HTML
for m in PR_API_URL_RE.finditer(text):
found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))
# 3) Cross-repo shorthand: owner/repo#123 (we will treat it as PR URL candidate)
for m in SHORTHAND_CROSS_REPO_RE.finditer(text):
found.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))
# 4) Same-repo shorthand: #123
for m in SHORTHAND_SAME_REPO_RE.finditer(text):
found.add(_normalize_html_pr_url(default_owner, default_repo, m.group("num")))
return found
def _paginate_gh_get(url, headers=None, per_page=100):
"""Generator: fetch all pages until fewer than per_page are returned."""
page = 1
while True:
data = gh_get(url, params={"per_page": per_page, "page": page})
if not isinstance(data, list) or len(data) == 0:
break
for item in data:
yield item
if len(data) < per_page:
break
page += 1
def extract_pr_urls_from_issue(issue_number: int):
"""
Extract PR URLs associated with an issue by scanning:
- Issue body
- Issue comments
- Issue events (including 'mentioned', 'cross-referenced', etc.)
- Issue timeline (most reliable for cross references)
Returns a sorted list of unique, normalized HTML PR URLs.
Requires:
- REPO = "owner/repo"
- gh_get(url, params=None, headers=None) is available
"""
owner, repo = REPO.split("/", 1)
pr_urls = set()
# Baseline Accept header for REST v3 + timeline support.
# The timeline historically required a preview header. Keep both for compatibility.
base_headers = {
"Accept": "application/vnd.github+json, application/vnd.github.mockingbird-preview+json"
}
# 1) Issue body
issue_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}"
issue = gh_get(issue_url)
if isinstance(issue, dict):
body = issue.get("body") or ""
pr_urls |= _collect_from_text(body, owner, repo)
# If this issue IS itself a PR (when called with a PR number), make sure we don't add itself erroneously
# We won't add unless text contains it anyway; still fine.
# 2) All comments
comments_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/comments"
for c in _paginate_gh_get(comments_url):
body = c.get("body") or ""
pr_urls |= _collect_from_text(body, owner, repo)
# 3) Issue events (event stream can have 'mentioned', 'cross-referenced', etc.)
events_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/events"
for ev in _paginate_gh_get(events_url):
# (a) Free-text fields: some events carry body/commit messages, etc.
if isinstance(ev, dict):
body = ev.get("body") or ""
pr_urls |= _collect_from_text(body, owner, repo)
# (b) Structured cross-reference (best: 'cross-referenced' events)
# If the source.issue has 'pull_request' key, it's a PR; use its html_url.
if ev.get("event") == "cross-referenced":
src = ev.get("source") or {}
issue_obj = src.get("issue") or {}
pr_obj = issue_obj.get("pull_request") or {}
html_url = issue_obj.get("html_url")
if pr_obj and html_url and "/pull/" in html_url:
pr_urls.add(html_url)
# Fallback: If not marked but looks like a PR in URL
elif html_url and "/pull/" in html_url:
pr_urls.add(html_url)
# (c) Also include 'mentioned' events (broadened): inspect whatever text fields exist
# Already covered via 'body' text extraction
# 4) Timeline API (the most complete for references)
timeline_url = f"https://api.github.com/repos/{REPO}/issues/{issue_number}/timeline"
for item in _paginate_gh_get(timeline_url):
if not isinstance(item, dict):
continue
# Free-text scan on any plausible string field
for key in ("body", "message", "title", "commit_message", "subject"):
val = item.get(key)
if isinstance(val, str):
pr_urls |= _collect_from_text(val, owner, repo)
# Structured cross-reference payloads
if item.get("event") == "cross-referenced":
src = item.get("source") or {}
issue_obj = src.get("issue") or {}
pr_obj = issue_obj.get("pull_request") or {}
html_url = issue_obj.get("html_url")
if pr_obj and html_url and "/pull/" in html_url:
pr_urls.add(html_url)
elif html_url and "/pull/" in html_url:
pr_urls.add(html_url)
# Some timeline items are themselves issues/PRs with html_url
html_url = item.get("html_url")
if isinstance(html_url, str) and "/pull/" in html_url:
pr_urls.add(html_url)
# Occasionally the timeline includes API-style URLs
api_url = item.get("url")
if isinstance(api_url, str):
m = PR_API_URL_RE.search(api_url)
if m:
pr_urls.add(_normalize_html_pr_url(m.group("owner"), m.group("repo"), m.group("num")))
# Final normalization: keep only HTML PR URLs and sort
pr_urls = {m.group(0) for url in pr_urls for m in [PR_HTML_URL_RE.search(url)] if m}
return sorted(pr_urls)
def pr_number_from_url(u):
m = re.search(r"/pull/(\d+)", u)
return int(m.group(1)) if m else None
#codingexercise: CodingExercise-03-17-2026.pdf
No comments:
Post a Comment