|
| 1 | +import json |
| 2 | +import os |
| 3 | +import re |
| 4 | +import sys |
| 5 | +from datetime import datetime, timezone |
| 6 | + |
| 7 | +import requests |
| 8 | + |
| 9 | +CONFIG_PATH = os.environ.get("ARCHIVE_CONFIG", "docs/discussions/archive.json") |
| 10 | +TOKEN = os.environ.get("DISCUSSIONS_TOKEN") or os.environ.get("GITHUB_TOKEN") |
| 11 | + |
| 12 | +QUERY = """ |
| 13 | +query($org: String!, $number: Int!) { |
| 14 | + organization(login: $org) { |
| 15 | + discussion(number: $number) { |
| 16 | + title |
| 17 | + body |
| 18 | + url |
| 19 | + updatedAt |
| 20 | + } |
| 21 | + } |
| 22 | +} |
| 23 | +""" |
| 24 | + |
| 25 | +REPLACEMENTS = { |
| 26 | + "\u2018": "'", |
| 27 | + "\u2019": "'", |
| 28 | + "\u201c": '"', |
| 29 | + "\u201d": '"', |
| 30 | + "\u2013": "-", |
| 31 | + "\u2014": "-", |
| 32 | + "\u2026": "...", |
| 33 | + "\u00a0": " ", |
| 34 | +} |
| 35 | + |
| 36 | + |
| 37 | +def normalize_ascii(text): |
| 38 | + for src, dst in REPLACEMENTS.items(): |
| 39 | + text = text.replace(src, dst) |
| 40 | + return text.encode("ascii", "ignore").decode("ascii") |
| 41 | + |
| 42 | + |
| 43 | +def parse_org_and_number(url, default_org): |
| 44 | + if not url: |
| 45 | + return default_org, None |
| 46 | + |
| 47 | + org_match = re.search(r"github\.com/orgs/([^/]+)/discussions/(\d+)", url) |
| 48 | + if org_match: |
| 49 | + return org_match.group(1), int(org_match.group(2)) |
| 50 | + |
| 51 | + repo_match = re.search(r"github\.com/([^/]+)/[^/]+/discussions/(\d+)", url) |
| 52 | + if repo_match: |
| 53 | + return repo_match.group(1), int(repo_match.group(2)) |
| 54 | + |
| 55 | + number_match = re.search(r"/discussions/(\d+)", url) |
| 56 | + if number_match: |
| 57 | + return default_org, int(number_match.group(1)) |
| 58 | + |
| 59 | + return default_org, None |
| 60 | + |
| 61 | + |
| 62 | +def load_config(path): |
| 63 | + if not os.path.exists(path): |
| 64 | + print(f"Config not found: {path}") |
| 65 | + sys.exit(1) |
| 66 | + |
| 67 | + with open(path, "r", encoding="utf-8") as handle: |
| 68 | + return json.load(handle) |
| 69 | + |
| 70 | + |
| 71 | +def fetch_discussion(org, number, token): |
| 72 | + response = requests.post( |
| 73 | + "https://api.github.com/graphql", |
| 74 | + headers={"Authorization": f"bearer {token}"}, |
| 75 | + json={"query": QUERY, "variables": {"org": org, "number": number}}, |
| 76 | + timeout=30, |
| 77 | + ) |
| 78 | + response.raise_for_status() |
| 79 | + payload = response.json() |
| 80 | + if "errors" in payload: |
| 81 | + print(f"GraphQL error for discussion {number}: {payload['errors']}") |
| 82 | + return None |
| 83 | + |
| 84 | + org_data = payload.get("data", {}).get("organization") |
| 85 | + if not org_data: |
| 86 | + print(f"Organization not found: {org}") |
| 87 | + return None |
| 88 | + |
| 89 | + return org_data.get("discussion") |
| 90 | + |
| 91 | + |
| 92 | +def render_discussion(discussion): |
| 93 | + title = normalize_ascii(discussion.get("title", "Untitled")) |
| 94 | + body = normalize_ascii(discussion.get("body", "")).strip() |
| 95 | + url = normalize_ascii(discussion.get("url", "")) |
| 96 | + updated_at = discussion.get("updatedAt") |
| 97 | + |
| 98 | + if updated_at: |
| 99 | + last_updated = updated_at.split("T")[0] |
| 100 | + else: |
| 101 | + last_updated = datetime.now(timezone.utc).strftime("%Y-%m-%d") |
| 102 | + |
| 103 | + parts = [ |
| 104 | + f"# {title}", |
| 105 | + "", |
| 106 | + f"Source discussion: {url}", |
| 107 | + f"Last updated: {last_updated}", |
| 108 | + "", |
| 109 | + body, |
| 110 | + "", |
| 111 | + ] |
| 112 | + return "\n".join(parts) |
| 113 | + |
| 114 | + |
| 115 | +def write_if_changed(path, content): |
| 116 | + existing = "" |
| 117 | + if os.path.exists(path): |
| 118 | + with open(path, "r", encoding="utf-8") as handle: |
| 119 | + existing = handle.read() |
| 120 | + |
| 121 | + if existing == content: |
| 122 | + print(f"No changes: {path}") |
| 123 | + return False |
| 124 | + |
| 125 | + os.makedirs(os.path.dirname(path), exist_ok=True) |
| 126 | + with open(path, "w", encoding="utf-8") as handle: |
| 127 | + handle.write(content) |
| 128 | + print(f"Updated: {path}") |
| 129 | + return True |
| 130 | + |
| 131 | + |
| 132 | +def main(): |
| 133 | + if not TOKEN: |
| 134 | + print("DISCUSSIONS_TOKEN or GITHUB_TOKEN is required") |
| 135 | + sys.exit(1) |
| 136 | + |
| 137 | + config = load_config(CONFIG_PATH) |
| 138 | + default_org = config.get("organization") |
| 139 | + items = config.get("items", []) |
| 140 | + |
| 141 | + if not items: |
| 142 | + print("No items to archive") |
| 143 | + return |
| 144 | + |
| 145 | + for item in items: |
| 146 | + output_path = item.get("output_path") |
| 147 | + if not output_path: |
| 148 | + print("Skipping item with no output_path") |
| 149 | + continue |
| 150 | + |
| 151 | + org, number = parse_org_and_number(item.get("discussion_url", ""), default_org) |
| 152 | + if not org or not number: |
| 153 | + print(f"Skipping item with no discussion number: {output_path}") |
| 154 | + continue |
| 155 | + |
| 156 | + discussion = fetch_discussion(org, number, TOKEN) |
| 157 | + if not discussion: |
| 158 | + print(f"Discussion not found: {org} #{number}") |
| 159 | + continue |
| 160 | + |
| 161 | + content = render_discussion(discussion) |
| 162 | + write_if_changed(output_path, content) |
| 163 | + |
| 164 | + |
| 165 | +if __name__ == "__main__": |
| 166 | + main() |
0 commit comments