|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import os |
| 4 | +import re |
| 5 | +from dataclasses import dataclass |
| 6 | +from typing import Any |
| 7 | +from urllib.parse import parse_qs, urlparse |
| 8 | + |
| 9 | +from _utils.enrich import should_refetch |
| 10 | +from _utils.enrichment_engine import FetchResult, Patch |
| 11 | +from _utils.github import parse_owner_repo |
| 12 | +from _utils.http import RetryConfig, fetch_json |
| 13 | +from _utils.time import utc_now_iso |
| 14 | + |
| 15 | +GITHUB_API_BASE = "https://api.github.com" |
| 16 | + |
| 17 | + |
| 18 | +@dataclass(frozen=True) |
| 19 | +class GitHubResult: |
| 20 | + owner: str |
| 21 | + repo: str |
| 22 | + stars: int | None |
| 23 | + forks: int | None |
| 24 | + contributors_count: int | None |
| 25 | + open_issues: int | None |
| 26 | + pushed_at: str | None |
| 27 | + |
| 28 | + |
| 29 | +def _github_repo_api_url(owner: str, repo: str) -> str: |
| 30 | + return f"{GITHUB_API_BASE}/repos/{owner}/{repo}" |
| 31 | + |
| 32 | + |
| 33 | +def _github_contributors_api_url(owner: str, repo: str) -> str: |
| 34 | + return f"{GITHUB_API_BASE}/repos/{owner}/{repo}/contributors?per_page=1" |
| 35 | + |
| 36 | + |
| 37 | +_LINK_LAST_RE = re.compile(r'<([^>]+)>;\s*rel="last"') |
| 38 | + |
| 39 | + |
| 40 | +def _parse_last_page_from_link_header(link: str | None) -> int | None: |
| 41 | + if not isinstance(link, str) or not link.strip(): |
| 42 | + return None |
| 43 | + m = _LINK_LAST_RE.search(link) |
| 44 | + if not m: |
| 45 | + return None |
| 46 | + try: |
| 47 | + last_url = m.group(1) |
| 48 | + parsed = urlparse(last_url) |
| 49 | + qs = parse_qs(parsed.query) |
| 50 | + page_vals = qs.get("page") |
| 51 | + if not page_vals: |
| 52 | + return None |
| 53 | + page = int(page_vals[0]) |
| 54 | + return page if page >= 0 else None |
| 55 | + except Exception: |
| 56 | + return None |
| 57 | + |
| 58 | + |
| 59 | +def _get_token(token_env: str) -> str | None: |
| 60 | + token = os.environ.get(token_env) |
| 61 | + if token: |
| 62 | + return token.strip() or None |
| 63 | + for k in ("GH_TOKEN", "GH_API_TOKEN", "GITHUB_TOKEN"): |
| 64 | + token = os.environ.get(k) |
| 65 | + if token: |
| 66 | + return token.strip() or None |
| 67 | + return None |
| 68 | + |
| 69 | + |
| 70 | +class GitHubEnricher: |
| 71 | + name = "github" |
| 72 | + bucket = "github" |
| 73 | + |
| 74 | + def __init__(self, *, token_env: str = "GH_TOKEN") -> None: |
| 75 | + self._token_env = token_env |
| 76 | + self._token = _get_token(token_env) |
| 77 | + self._retry_cfg = RetryConfig(retry_statuses=(403, 429, 500, 502, 503, 504)) |
| 78 | + |
| 79 | + def key_for_component(self, comp: dict[str, Any]) -> tuple[str, str] | None: |
| 80 | + gh_url = comp.get("gitHubUrl") |
| 81 | + if not isinstance(gh_url, str) or not gh_url.strip(): |
| 82 | + return None |
| 83 | + try: |
| 84 | + owner, repo = parse_owner_repo(gh_url) |
| 85 | + except Exception: |
| 86 | + return None |
| 87 | + return (owner.lower(), repo.lower()) |
| 88 | + |
| 89 | + def needs_fetch( |
| 90 | + self, comp: dict[str, Any], refresh_older_than_hours: float | None |
| 91 | + ) -> bool: |
| 92 | + metrics = comp.get("metrics") |
| 93 | + gh_metrics = metrics.get("github") if isinstance(metrics, dict) else None |
| 94 | + existing_fetched_at = ( |
| 95 | + gh_metrics.get("fetchedAt") if isinstance(gh_metrics, dict) else None |
| 96 | + ) |
| 97 | + stale = gh_metrics.get("isStale") if isinstance(gh_metrics, dict) else None |
| 98 | + return should_refetch( |
| 99 | + fetched_at=( |
| 100 | + existing_fetched_at if isinstance(existing_fetched_at, str) else None |
| 101 | + ), |
| 102 | + is_stale=stale if isinstance(stale, bool) else None, |
| 103 | + refresh_older_than_hours=refresh_older_than_hours, |
| 104 | + ) |
| 105 | + |
| 106 | + def _headers(self) -> dict[str, str]: |
| 107 | + headers = { |
| 108 | + "Accept": "application/vnd.github+json", |
| 109 | + "User-Agent": "component-gallery-enrich-github", |
| 110 | + "X-GitHub-Api-Version": "2022-11-28", |
| 111 | + } |
| 112 | + if self._token: |
| 113 | + headers["Authorization"] = f"Bearer {self._token}" |
| 114 | + return headers |
| 115 | + |
| 116 | + def _fetch_contributors_count( |
| 117 | + self, *, ctx, owner: str, repo: str |
| 118 | + ) -> tuple[int | None, int, int | None, str | None]: |
| 119 | + url = _github_contributors_api_url(owner, repo) |
| 120 | + r = ctx.request_json( |
| 121 | + url=url, |
| 122 | + headers=self._headers(), |
| 123 | + fetcher=fetch_json, |
| 124 | + retry_cfg=self._retry_cfg, |
| 125 | + ) |
| 126 | + if not r.ok or not isinstance(r.data, list): |
| 127 | + return None, r.attempts, r.status, r.error |
| 128 | + link = None |
| 129 | + if isinstance(r.headers, dict): |
| 130 | + link = r.headers.get("Link") or r.headers.get("link") |
| 131 | + last_page = _parse_last_page_from_link_header(link) |
| 132 | + if isinstance(last_page, int): |
| 133 | + return last_page, r.attempts, r.status, None |
| 134 | + return (1 if len(r.data) >= 1 else 0), r.attempts, r.status, None |
| 135 | + |
| 136 | + def fetch(self, key: tuple[str, str], ctx) -> FetchResult: |
| 137 | + owner, repo = key |
| 138 | + url = _github_repo_api_url(owner, repo) |
| 139 | + r = ctx.request_json( |
| 140 | + url=url, |
| 141 | + headers=self._headers(), |
| 142 | + fetcher=fetch_json, |
| 143 | + retry_cfg=self._retry_cfg, |
| 144 | + ) |
| 145 | + attempts = int(r.attempts) |
| 146 | + if not r.ok or not isinstance(r.data, dict): |
| 147 | + return FetchResult( |
| 148 | + ok=False, |
| 149 | + data=None, |
| 150 | + error=r.error or "Request failed.", |
| 151 | + attempts=attempts, |
| 152 | + status=r.status, |
| 153 | + ) |
| 154 | + |
| 155 | + data = r.data |
| 156 | + stars = data.get("stargazers_count") |
| 157 | + forks = data.get("forks_count") |
| 158 | + open_issues = data.get("open_issues_count") |
| 159 | + pushed_at = data.get("pushed_at") |
| 160 | + |
| 161 | + contributors_count, contrib_attempts, status, err = ( |
| 162 | + self._fetch_contributors_count(ctx=ctx, owner=owner, repo=repo) |
| 163 | + ) |
| 164 | + attempts += int(contrib_attempts) |
| 165 | + if err: |
| 166 | + return FetchResult( |
| 167 | + ok=False, |
| 168 | + data=None, |
| 169 | + error=err, |
| 170 | + attempts=attempts, |
| 171 | + status=status, |
| 172 | + ) |
| 173 | + |
| 174 | + result = GitHubResult( |
| 175 | + owner=owner, |
| 176 | + repo=repo, |
| 177 | + stars=int(stars) if isinstance(stars, int) else None, |
| 178 | + forks=int(forks) if isinstance(forks, int) else None, |
| 179 | + contributors_count=( |
| 180 | + int(contributors_count) |
| 181 | + if isinstance(contributors_count, int) and contributors_count >= 0 |
| 182 | + else None |
| 183 | + ), |
| 184 | + open_issues=int(open_issues) if isinstance(open_issues, int) else None, |
| 185 | + pushed_at=str(pushed_at) if isinstance(pushed_at, str) else None, |
| 186 | + ) |
| 187 | + return FetchResult( |
| 188 | + ok=True, data=result, error=None, attempts=attempts, status=r.status |
| 189 | + ) |
| 190 | + |
| 191 | + def patch_success( |
| 192 | + self, comp: dict[str, Any], result: GitHubResult, fetched_at: str |
| 193 | + ) -> Patch: |
| 194 | + metrics = comp.get("metrics") |
| 195 | + gh_metrics = metrics.get("github") if isinstance(metrics, dict) else None |
| 196 | + prev_stars = gh_metrics.get("stars") if isinstance(gh_metrics, dict) else None |
| 197 | + |
| 198 | + updates: dict[str, Any] = {} |
| 199 | + if isinstance(result.stars, int): |
| 200 | + updates["stars"] = result.stars |
| 201 | + if isinstance(result.forks, int): |
| 202 | + updates["forks"] = result.forks |
| 203 | + if isinstance(result.contributors_count, int): |
| 204 | + updates["contributorsCount"] = result.contributors_count |
| 205 | + if isinstance(result.open_issues, int): |
| 206 | + updates["openIssues"] = result.open_issues |
| 207 | + if isinstance(result.pushed_at, str): |
| 208 | + updates["lastPushAt"] = result.pushed_at |
| 209 | + updates["fetchedAt"] = fetched_at or utc_now_iso() |
| 210 | + updates["isStale"] = False |
| 211 | + |
| 212 | + changed = isinstance(result.stars, int) and prev_stars != result.stars |
| 213 | + return Patch(bucket=self.bucket, updates=updates, changed=changed) |
| 214 | + |
| 215 | + def patch_failure(self, comp: dict[str, Any], error: str | None) -> Patch: |
| 216 | + return Patch(bucket=self.bucket, updates={"isStale": True}, changed=False) |
0 commit comments