diff --git a/.github/scripts/release_review/__init__.py b/.github/scripts/release_review/__init__.py new file mode 100644 index 00000000000..2efdb19df2d --- /dev/null +++ b/.github/scripts/release_review/__init__.py @@ -0,0 +1,5 @@ +from .schemas import Issue, Links, ReviewPayload +from .config import ReleaseReviewConfig, get_config +from .reviewer import ReleaseNotesReviewer, ReviewResult, get_reviewer +from .github_client import GitHubClient, get_github_client +from .reporter import Reporter, get_reporter diff --git a/.github/scripts/release_review/config.py b/.github/scripts/release_review/config.py new file mode 100644 index 00000000000..df063a67d1d --- /dev/null +++ b/.github/scripts/release_review/config.py @@ -0,0 +1,190 @@ +""" +Configuration settings for the release review service. + +Loads settings from environment variables and optional YAML config file. +""" +import os +from pathlib import Path +from typing import Dict, Optional +from dataclasses import dataclass, field + +import yaml + + +@dataclass +class GitHubConfig: + """GitHub-related configuration.""" + token: str = "" + check_run_name: str = "Release Notes Review (AI)" + check_run_title: str = "Advisory Release Notes Review (AI)" + max_annotations: int = 50 + max_annotation_message_length: int = 640 + bot_comment_marker: str = "" + + +@dataclass +class SecurityConfig: + """Security-related configuration.""" + docs_agent_secret: str = "" + signature_header: str = "X-Docs-Agent-Signature" + idempotency_header: str = "X-Idempotency-Key" + + +@dataclass +class StoreConfig: + """Storage configuration.""" + db_path: str = "docs_agent.db" + stale_job_ttl_seconds: int = 3600 # 1 hour + + +@dataclass +class FeatureFlags: + """Feature flags for the service.""" + post_comments: bool = True + post_check_runs: bool = True + + +@dataclass +class SeverityMapping: + """Mapping of severity levels to GitHub annotation levels.""" + high: str = "failure" + medium: str = "warning" + low: str = "notice" + + +@dataclass +class ReleaseReviewConfig: + """Main configuration for the release review service.""" + github: GitHubConfig = field(default_factory=GitHubConfig) + security: SecurityConfig = field(default_factory=SecurityConfig) + store: StoreConfig = field(default_factory=StoreConfig) + features: FeatureFlags = field(default_factory=FeatureFlags) + severity_mapping: SeverityMapping = field(default_factory=SeverityMapping) + + @classmethod + def from_env(cls, config_path: Optional[str] = None) -> "ReleaseReviewConfig": + """ + Load configuration from environment variables and optional YAML file. + + Environment variables take precedence over YAML config. + """ + config = cls() + + # Load from YAML if path provided or default exists + yaml_path = config_path or os.getenv("RELEASE_REVIEWER_CONFIG") + if yaml_path and Path(yaml_path).exists(): + config = cls._load_yaml(yaml_path, config) + else: + # Check default locations + default_paths = [ + Path("config/release-reviewer.yml"), + Path("config/release-reviewer.yaml"), + ] + for path in default_paths: + if path.exists(): + config = cls._load_yaml(str(path), config) + break + + # Override with environment variables + config.github.token = os.getenv("GITHUB_TOKEN", config.github.token) + config.security.docs_agent_secret = os.getenv( + "DOCS_AGENT_SECRET", config.security.docs_agent_secret + ) + config.store.db_path = os.getenv("DOCS_AGENT_DB", config.store.db_path) + + # Feature flags from env + post_comments = os.getenv("POST_COMMENTS") + if post_comments is not None: + config.features.post_comments = post_comments.lower() in ("true", "1", "yes") + + post_check_runs = os.getenv("POST_CHECK_RUNS") + if post_check_runs is not None: + config.features.post_check_runs = post_check_runs.lower() in ("true", "1", "yes") + + return config + + @classmethod + def _load_yaml(cls, path: str, config: "ReleaseReviewConfig") -> "ReleaseReviewConfig": + """Load configuration from YAML file.""" + try: + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + + # GitHub settings + if "github" in data: + gh = data["github"] + config.github.check_run_name = gh.get( + "check_run_name", config.github.check_run_name + ) + config.github.check_run_title = gh.get( + "check_run_title", config.github.check_run_title + ) + config.github.max_annotations = gh.get( + "max_annotations", config.github.max_annotations + ) + config.github.max_annotation_message_length = gh.get( + "max_annotation_message_length", + config.github.max_annotation_message_length + ) + + # Store settings + if "store" in data: + store = data["store"] + config.store.stale_job_ttl_seconds = store.get( + "stale_job_ttl_seconds", config.store.stale_job_ttl_seconds + ) + + # Severity mapping + if "severity_mapping" in data: + sm = data["severity_mapping"] + config.severity_mapping.high = sm.get("high", config.severity_mapping.high) + config.severity_mapping.medium = sm.get("medium", config.severity_mapping.medium) + config.severity_mapping.low = sm.get("low", config.severity_mapping.low) + + # Feature flags + if "features" in data: + features = data["features"] + config.features.post_comments = features.get( + "post_comments", config.features.post_comments + ) + config.features.post_check_runs = features.get( + "post_check_runs", config.features.post_check_runs + ) + + except Exception as e: + # Log warning but don't fail - use defaults + import logging + logging.warning(f"Failed to load config from {path}: {e}") + + return config + + def validate(self) -> None: + """Validate that required configuration is present.""" + errors = [] + + if not self.github.token: + errors.append("GITHUB_TOKEN is required") + + if not self.security.docs_agent_secret: + errors.append("DOCS_AGENT_SECRET is required") + + if errors: + raise ValueError(f"Configuration errors: {', '.join(errors)}") + + +# Global config instance (lazy loaded) +_config: Optional[ReleaseReviewConfig] = None + + +def get_config() -> ReleaseReviewConfig: + """Get the global configuration instance.""" + global _config + if _config is None: + _config = ReleaseReviewConfig.from_env() + return _config + + +def reset_config() -> None: + """Reset the global configuration (useful for testing).""" + global _config + _config = None diff --git a/.github/scripts/release_review/github_client.py b/.github/scripts/release_review/github_client.py new file mode 100644 index 00000000000..413d27deab3 --- /dev/null +++ b/.github/scripts/release_review/github_client.py @@ -0,0 +1,306 @@ +""" +GitHub API client for the release review service. + +Handles creating check runs, posting/updating PR comments, and fetching PR info. +""" +import logging +import time +from typing import Optional, Dict, Any, List, Tuple +from dataclasses import dataclass + +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +from .config import get_config +from .schemas import ReviewPayload, Annotation + +logger = logging.getLogger(__name__) + + +class GitHubAPIError(Exception): + """Exception for GitHub API errors.""" + + def __init__(self, message: str, status_code: int = 0, retry_after: Optional[int] = None): + super().__init__(message) + self.status_code = status_code + self.retry_after = retry_after + + +class GitHubRateLimitError(GitHubAPIError): + """Exception for GitHub rate limit errors.""" + pass + + +@dataclass +class CheckRunOutput: + """Output data for a GitHub check run.""" + title: str + summary: str + annotations: List[Dict[str, Any]] + + +class GitHubClient: + """Client for interacting with GitHub API.""" + + BASE_URL = "https://api.github.com" + + def __init__(self, token: Optional[str] = None): + """Initialize the GitHub client.""" + config = get_config() + self.token = token or config.github.token + self.config = config.github + + # Set up session with retry logic + self.session = requests.Session() + retry_strategy = Retry( + total=2, + backoff_factor=0.5, + status_forcelist=[500, 502, 503, 504], + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("https://", adapter) + self.session.mount("http://", adapter) + + def _headers(self) -> Dict[str, str]: + """Get the headers for GitHub API requests.""" + return { + "Authorization": f"Bearer {self.token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + + def _handle_response(self, response: requests.Response) -> Dict[str, Any]: + """Handle GitHub API response and raise appropriate errors.""" + # Check for rate limiting + if response.status_code in (403, 429): + remaining = response.headers.get("X-RateLimit-Remaining", "unknown") + retry_after = response.headers.get("Retry-After") + + if remaining == "0" or response.status_code == 429: + retry_seconds = int(retry_after) if retry_after else 60 + raise GitHubRateLimitError( + f"GitHub rate limit exceeded. Retry after {retry_seconds}s", + status_code=response.status_code, + retry_after=retry_seconds + ) + + # Check for other errors + if not response.ok: + try: + error_data = response.json() + message = error_data.get("message", response.text) + except Exception: + message = response.text + + raise GitHubAPIError( + f"GitHub API error: {message}", + status_code=response.status_code + ) + + return response.json() + + def get_pr_head_sha(self, repo: str, pr_number: int) -> str: + """ + Get the head SHA of a pull request. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + + Returns: + The head commit SHA + """ + url = f"{self.BASE_URL}/repos/{repo}/pulls/{pr_number}" + logger.debug(f"Fetching PR info: {url}") + + response = self.session.get(url, headers=self._headers()) + data = self._handle_response(response) + + return data["head"]["sha"] + + def create_check_run( + self, + repo: str, + head_sha: str, + output: CheckRunOutput + ) -> int: + """ + Create a GitHub check run with neutral conclusion. + + Args: + repo: Repository in 'owner/repo' format + head_sha: The commit SHA to attach the check run to + output: Check run output data + + Returns: + The check run ID + """ + url = f"{self.BASE_URL}/repos/{repo}/check-runs" + + # Truncate annotations to max allowed + annotations = output.annotations[:self.config.max_annotations] + if len(output.annotations) > self.config.max_annotations: + logger.warning( + f"Truncating annotations from {len(output.annotations)} " + f"to {self.config.max_annotations}" + ) + + # Truncate annotation messages + for ann in annotations: + if len(ann.get("message", "")) > self.config.max_annotation_message_length: + ann["message"] = ( + ann["message"][:self.config.max_annotation_message_length - 3] + "..." + ) + + payload = { + "name": self.config.check_run_name, + "head_sha": head_sha, + "status": "completed", + "conclusion": "neutral", + "output": { + "title": output.title, + "summary": output.summary, + "annotations": annotations + } + } + + logger.debug(f"Creating check run: {url}") + response = self.session.post(url, headers=self._headers(), json=payload) + data = self._handle_response(response) + + check_run_id = data["id"] + logger.info(f"Created check run {check_run_id} for {repo}") + return check_run_id + + def find_bot_comment(self, repo: str, pr_number: int) -> Optional[int]: + """ + Find an existing bot comment on the PR. + + Looks for a comment containing the bot marker. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + + Returns: + The comment ID if found, None otherwise + """ + url = f"{self.BASE_URL}/repos/{repo}/issues/{pr_number}/comments" + logger.debug(f"Searching for bot comment: {url}") + + # Paginate through comments + page = 1 + per_page = 100 + + while True: + response = self.session.get( + url, + headers=self._headers(), + params={"page": page, "per_page": per_page} + ) + comments = self._handle_response(response) + + if not comments: + break + + for comment in comments: + body = comment.get("body", "") + if self.config.bot_comment_marker in body: + logger.debug(f"Found existing bot comment: {comment['id']}") + return comment["id"] + + if len(comments) < per_page: + break + + page += 1 + + return None + + def create_comment(self, repo: str, pr_number: int, body: str) -> int: + """ + Create a new PR comment. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + body: Comment body (markdown) + + Returns: + The comment ID + """ + url = f"{self.BASE_URL}/repos/{repo}/issues/{pr_number}/comments" + logger.debug(f"Creating comment: {url}") + + response = self.session.post( + url, + headers=self._headers(), + json={"body": body} + ) + data = self._handle_response(response) + + comment_id = data["id"] + logger.info(f"Created comment {comment_id} on PR {pr_number}") + return comment_id + + def update_comment(self, repo: str, comment_id: int, body: str) -> int: + """ + Update an existing PR comment. + + Args: + repo: Repository in 'owner/repo' format + comment_id: The comment ID to update + body: New comment body (markdown) + + Returns: + The comment ID + """ + url = f"{self.BASE_URL}/repos/{repo}/issues/comments/{comment_id}" + logger.debug(f"Updating comment: {url}") + + response = self.session.patch( + url, + headers=self._headers(), + json={"body": body} + ) + self._handle_response(response) + + logger.info(f"Updated comment {comment_id}") + return comment_id + + def create_or_update_comment(self, repo: str, pr_number: int, body: str) -> int: + """ + Create a new comment or update existing bot comment. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + body: Comment body (markdown) + + Returns: + The comment ID (new or existing) + """ + existing_comment_id = self.find_bot_comment(repo, pr_number) + + if existing_comment_id: + return self.update_comment(repo, existing_comment_id, body) + else: + return self.create_comment(repo, pr_number, body) + + +# Global client instance (lazy loaded) +_client: Optional[GitHubClient] = None + + +def get_github_client() -> GitHubClient: + """Get the global GitHub client instance.""" + global _client + if _client is None: + _client = GitHubClient() + return _client + + +def reset_github_client() -> None: + """Reset the global GitHub client (useful for testing).""" + global _client + _client = None diff --git a/.github/scripts/release_review/reporter.py b/.github/scripts/release_review/reporter.py new file mode 100644 index 00000000000..e2b8464f9d2 --- /dev/null +++ b/.github/scripts/release_review/reporter.py @@ -0,0 +1,252 @@ +""" +Reporter for formatting check run outputs and PR comments. + +Builds the check run summary/annotations and the PR comment body +from the review payload. +""" +import logging +from typing import Dict, List, Any, Tuple +from dataclasses import dataclass + +from .schemas import ReviewPayload, Issue, Annotation +from .config import get_config +from .github_client import CheckRunOutput + +logger = logging.getLogger(__name__) + + +@dataclass +class SeverityCounts: + """Counts of issues by severity.""" + high: int = 0 + medium: int = 0 + low: int = 0 + + def total(self) -> int: + return self.high + self.medium + self.low + + def summary_line(self) -> str: + return f"High: {self.high} · Medium: {self.medium} · Low: {self.low}" + + +class Reporter: + """Formats review results for GitHub.""" + + def __init__(self): + self.config = get_config() + + def count_severities(self, issues: List[Issue]) -> SeverityCounts: + """Count issues by severity level.""" + counts = SeverityCounts() + for issue in issues: + if issue.severity == "HIGH": + counts.high += 1 + elif issue.severity == "MEDIUM": + counts.medium += 1 + elif issue.severity == "LOW": + counts.low += 1 + return counts + + def build_check_output(self, payload: ReviewPayload) -> CheckRunOutput: + """ + Build the check run output from the review payload. + + Args: + payload: The review payload + + Returns: + CheckRunOutput with title, summary, and annotations + """ + counts = self.count_severities(payload.issues) + + # Build title + title = self.config.github.check_run_title + + # Build summary + summary_parts = [counts.summary_line()] + + if payload.summary: + summary_parts.append("") + summary_parts.append(payload.summary) + + # Add up to 10 example issues in summary + if payload.issues: + summary_parts.append("") + summary_parts.append("**Sample issues:**") + for issue in payload.issues[:10]: + location = "" + if issue.file: + location = f" (`{issue.file}" + if issue.line: + location += f":{issue.line}" + location += "`)" + summary_parts.append(f"- [{issue.severity}] {issue.title}{location}") + + if len(payload.issues) > 10: + summary_parts.append(f"- ... and {len(payload.issues) - 10} more") + + summary = "\n".join(summary_parts) + + # Build annotations + annotations = self._build_annotations(payload) + + return CheckRunOutput( + title=title, + summary=summary, + annotations=annotations + ) + + def _build_annotations(self, payload: ReviewPayload) -> List[Dict[str, Any]]: + """Build GitHub annotations from issues and explicit annotations.""" + annotations = [] + + # First, add explicit annotations from the payload + if payload.annotations: + for ann in payload.annotations: + annotations.append({ + "path": ann.path, + "start_line": ann.start_line, + "end_line": ann.end_line or ann.start_line, + "annotation_level": ann.annotation_level, + "message": ann.message + }) + + # Then, create annotations from issues that have file/line info + for issue in payload.issues: + if issue.file and issue.line: + # Map severity to annotation level + level = self._severity_to_annotation_level(issue.severity) + + message = f"{issue.title}: {issue.message}" + if issue.suggestion: + message += f"\n\nSuggestion: {issue.suggestion}" + + annotations.append({ + "path": issue.file, + "start_line": issue.line, + "end_line": issue.line, + "annotation_level": level, + "message": message + }) + + # Warn if we have too many annotations + max_annotations = self.config.github.max_annotations + if len(annotations) > max_annotations: + logger.warning( + f"Payload has {len(annotations)} annotations, " + f"but GitHub only accepts {max_annotations}. Truncating." + ) + + return annotations + + def _severity_to_annotation_level(self, severity: str) -> str: + """Map issue severity to GitHub annotation level.""" + mapping = { + "HIGH": self.config.severity_mapping.high, + "MEDIUM": self.config.severity_mapping.medium, + "LOW": self.config.severity_mapping.low, + } + return mapping.get(severity, "notice") + + def build_comment_body(self, payload: ReviewPayload) -> str: + """ + Build the PR comment body from the review payload. + + Args: + payload: The review payload + + Returns: + Markdown-formatted comment body + """ + counts = self.count_severities(payload.issues) + marker = self.config.github.bot_comment_marker + + # Start with marker and header + lines = [ + marker, + f"**Release Notes Advisory (AI)** — {payload.summary or 'Review complete'}", + "", + counts.summary_line(), + "", + ] + + # Group issues by severity + high_issues = [i for i in payload.issues if i.severity == "HIGH"] + medium_issues = [i for i in payload.issues if i.severity == "MEDIUM"] + low_issues = [i for i in payload.issues if i.severity == "LOW"] + + # Add each severity section + if high_issues: + lines.extend(self._format_issue_section("HIGH", high_issues)) + + if medium_issues: + lines.extend(self._format_issue_section("MEDIUM", medium_issues)) + + if low_issues: + lines.extend(self._format_issue_section("LOW", low_issues)) + + # Add links section + if payload.links: + lines.append("---") + lines.append("") + lines.append("**Links**") + if payload.links.deploy_preview: + lines.append(f"- [Deploy Preview]({payload.links.deploy_preview})") + if payload.links.full_report: + lines.append(f"- [Full JSON Report]({payload.links.full_report})") + lines.append("") + + # Footer + lines.append("---") + lines.append("_Posted by docs-fast-agent — advisory only, does not block merge._") + + return "\n".join(lines) + + def _format_issue_section( + self, + severity: str, + issues: List[Issue] + ) -> List[str]: + """Format a section of issues for the PR comment.""" + lines = [ + f"### {severity} ({len(issues)})", + "", + ] + + for issue in issues: + # Title and message + lines.append(f"- **{issue.title}:** {issue.message}") + + # File and line + if issue.file: + location = f"`{issue.file}" + if issue.line: + location += f":{issue.line}" + location += "`" + lines.append(f" {location}") + + # Suggestion + if issue.suggestion: + lines.append(f" **Suggestion:** {issue.suggestion}") + + lines.append("") + + return lines + + +# Global reporter instance +_reporter: Reporter = None + + +def get_reporter() -> Reporter: + """Get the global reporter instance.""" + global _reporter + if _reporter is None: + _reporter = Reporter() + return _reporter + + +def reset_reporter() -> None: + """Reset the global reporter (useful for testing).""" + global _reporter + _reporter = None diff --git a/.github/scripts/release_review/reviewer.py b/.github/scripts/release_review/reviewer.py new file mode 100644 index 00000000000..5dea2ebf750 --- /dev/null +++ b/.github/scripts/release_review/reviewer.py @@ -0,0 +1,571 @@ +""" +AI-powered Release Notes Reviewer. + +This module analyzes release notes PRs and generates review issues +using AI (OpenAI) based on the CockroachDB style guide. +""" +import os +import re +import logging +import json +from typing import List, Dict, Any, Optional, Tuple +from dataclasses import dataclass, field +from datetime import datetime + +import requests +import yaml +from openai import OpenAI + +from .schemas import Issue, ReviewPayload, Links + +logger = logging.getLogger(__name__) + +# Style guide content (loaded from file or embedded) +STYLE_GUIDE = """ +# Release Note Writing Guide for CockroachDB + +## Style and Tone +- Use clear, concise, and correct language +- Use the second-person imperative present tense for instructions +- Use active voice instead of passive for clarity +- Avoid using "please" when giving instructions +- Avoid hyperbolic language like "simple," "just," "easily," or "actually" +- Use contractions to simplify language, except for clear directives (use "cannot" instead of "can't") +- Avoid forward-looking language about future features + +## Format and Structure +- Use title case for page titles +- Use sentence case for all headings +- Use the Oxford (serial) comma +- When listing a range of versions, use "to" not a dash (e.g., v22.1.0 to v22.1.4) + +## Technical Content +- Link to relevant documentation when referencing CockroachDB features +- Use inline code format (backticks) for code, commands, or technical syntax +- Include GitHub issue or PR numbers for reference + +## Version References +- Format as vXX.X.X (e.g., v21.1.8) with lowercase 'v' + +## Technical Terminology +- Use "CockroachDB" (proper capitalization) +- Use "PostgreSQL" (not "Postgres") +- Use inclusive terminology (allowlist/denylist, main/primary) + +## Release Note Requirements +- Clearly describe what changed or was added +- Mention any impact on users, including breaking changes +- Be factual and technical without unnecessary jargon +- Include GitHub issue or PR numbers for reference +""" + + +@dataclass +class ParsedYAMLRelease: + """Parsed release entry from releases.yml.""" + release_name: str + major_version: str + release_date: str + release_type: str + go_version: Optional[str] = None + sha: Optional[str] = None + previous_release: Optional[str] = None + raw: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class ParsedMarkdownRelease: + """Parsed release notes from markdown file.""" + version: str + release_date: str + sections: Dict[str, List[str]] # section_name -> list of notes + pr_references: List[str] # List of PR numbers referenced + link_definitions: Dict[str, str] # PR number -> URL + raw_content: str = "" + + +@dataclass +class ReviewResult: + """Result of the review process.""" + issues: List[Issue] + summary: str + yaml_data: Optional[ParsedYAMLRelease] = None + markdown_data: Optional[ParsedMarkdownRelease] = None + + +class ReleaseNotesReviewer: + """AI-powered reviewer for CockroachDB release notes PRs.""" + + # Required YAML fields + REQUIRED_YAML_FIELDS = [ + "release_name", "major_version", "release_date", "release_type" + ] + + # Valid release types + VALID_RELEASE_TYPES = [ + "Production", "Testing", "Preview", "Beta", "Alpha", "Withdrawn" + ] + + # Valid section headers in markdown + VALID_SECTIONS = [ + "backward-incompatible-changes", "security-updates", "sql-language-changes", + "operational-changes", "command-line-changes", "db-console-changes", + "bug-fixes", "performance-improvements", "contributors", "doc-updates", + "enterprise-edition-changes", "general-changes" + ] + + def __init__(self, github_token: Optional[str] = None, openai_api_key: Optional[str] = None): + """Initialize the reviewer.""" + self.github_token = github_token or os.getenv("GITHUB_TOKEN") + self.openai_api_key = openai_api_key or os.getenv("OPENAI_API_KEY") + self.openai_client = None + if self.openai_api_key: + self.openai_client = OpenAI(api_key=self.openai_api_key) + + def _github_headers(self) -> Dict[str, str]: + """Get GitHub API headers.""" + return { + "Authorization": f"Bearer {self.github_token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + + def fetch_pr_files(self, repo: str, pr_number: int) -> List[Dict[str, Any]]: + """Fetch the files changed in a PR.""" + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files" + response = requests.get(url, headers=self._github_headers()) + response.raise_for_status() + return response.json() + + def fetch_file_content(self, repo: str, path: str, ref: str) -> str: + """Fetch file content from GitHub.""" + url = f"https://api.github.com/repos/{repo}/contents/{path}?ref={ref}" + response = requests.get(url, headers=self._github_headers()) + response.raise_for_status() + data = response.json() + + if data.get("encoding") == "base64": + import base64 + return base64.b64decode(data["content"]).decode("utf-8") + return data.get("content", "") + + def fetch_pr_diff(self, repo: str, pr_number: int) -> str: + """Fetch the diff of a PR.""" + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" + headers = self._github_headers() + headers["Accept"] = "application/vnd.github.diff" + response = requests.get(url, headers=headers) + response.raise_for_status() + return response.text + + def check_pr_exists(self, pr_number: str) -> bool: + """Check if a PR exists in cockroachdb/cockroach.""" + url = f"https://api.github.com/repos/cockroachdb/cockroach/pulls/{pr_number}" + response = requests.get(url, headers=self._github_headers()) + return response.status_code == 200 + + def parse_yaml_diff(self, diff: str) -> Optional[ParsedYAMLRelease]: + """Parse the YAML additions from a diff.""" + # Extract added lines from releases.yml + yaml_section = False + yaml_lines = [] + + for line in diff.split("\n"): + if "releases.yml" in line: + yaml_section = True + continue + if yaml_section: + if line.startswith("diff --git"): + break + if line.startswith("+") and not line.startswith("+++"): + yaml_lines.append(line[1:]) # Remove the '+' prefix + + if not yaml_lines: + return None + + # Parse the YAML + yaml_content = "\n".join(yaml_lines) + try: + # Handle the case where we're adding to a list + if yaml_content.strip().startswith("-"): + data = yaml.safe_load(yaml_content) + if isinstance(data, list) and len(data) > 0: + release = data[0] + else: + release = data + else: + release = yaml.safe_load(yaml_content) + + if not release: + return None + + return ParsedYAMLRelease( + release_name=release.get("release_name", ""), + major_version=release.get("major_version", ""), + release_date=release.get("release_date", ""), + release_type=release.get("release_type", ""), + go_version=release.get("go_version"), + sha=release.get("sha"), + previous_release=release.get("previous_release"), + raw=release + ) + except yaml.YAMLError as e: + logger.warning(f"Failed to parse YAML: {e}") + return None + + def parse_markdown_diff(self, diff: str) -> Optional[ParsedMarkdownRelease]: + """Parse the markdown additions from a diff.""" + # Extract added lines from .md file + md_section = False + md_lines = [] + md_filename = "" + + for line in diff.split("\n"): + if ".md" in line and "diff --git" in line: + md_section = True + md_filename = line + continue + if md_section: + if line.startswith("diff --git"): + break + if line.startswith("+") and not line.startswith("+++"): + md_lines.append(line[1:]) + + if not md_lines: + return None + + content = "\n".join(md_lines) + + # Extract version from header + version_match = re.search(r"## (v[\d.]+(?:-[\w.]+)?)", content) + version = version_match.group(1) if version_match else "" + + # Extract release date + date_match = re.search(r"Release Date:\s*(.+)", content) + release_date = date_match.group(1).strip() if date_match else "" + + # Extract sections + sections: Dict[str, List[str]] = {} + current_section = None + current_notes = [] + + for line in md_lines: + # Check for section header + section_match = re.search(r'