diff --git a/.github/scripts/triage-issue.py b/.github/scripts/triage-issue.py new file mode 100644 index 000000000..31eaf03c4 --- /dev/null +++ b/.github/scripts/triage-issue.py @@ -0,0 +1,242 @@ +""" +Issue triage bot for TorchSharp. + +Classifies new GitHub issues using an LLM and applies the appropriate label. +Posts a polite comment acknowledging the issue. +Skips issues that already have triage labels (manually set by maintainers). +""" + +import json +import os +import re +import sys +import time +import urllib.error +import urllib.request + +GITHUB_API = "https://api.github.com" +INFERENCE_API = "https://models.github.ai/inference" +MODEL = "gpt-4o-mini" + +TRIAGE_LABELS = {"bug", "Missing Feature", "question"} + +SYSTEM_PROMPT = """\ +You are an issue triage bot for TorchSharp, a .NET binding for PyTorch. + +Classify the following GitHub issue into exactly ONE of these categories: +- bug: Something is broken, crashes, throws an unexpected error, or produces wrong results. +- Missing Feature: A PyTorch API or feature that is not yet available in TorchSharp. +- question: The user is asking for help, guidance, or clarification on how to use TorchSharp. + +Respond with ONLY a JSON object in this exact format, no other text: +{"label": "", "reason": ""} +""" + +COMMENT_TEMPLATES = { + "bug": ( + "Thank you for reporting this issue! 🙏\n\n" + "I've triaged this as a **bug**. {reason}\n\n" + "A maintainer will review this soon. In the meantime, please make sure you've " + "included a minimal code sample to reproduce the issue and the TorchSharp version you're using.\n\n" + "*This comment was generated automatically by the issue triage bot.*" + ), + "Missing Feature": ( + "Thank you for opening this issue! 🙏\n\n" + "I've triaged this as a **missing feature** request. {reason}\n\n" + "If you haven't already, it would be very helpful to include a link to the " + "corresponding PyTorch documentation and a Python code example.\n\n" + "*This comment was generated automatically by the issue triage bot.*" + ), + "question": ( + "Thank you for reaching out! 🙏\n\n" + "I've triaged this as a **question**. {reason}\n\n" + "A maintainer or community member will try to help as soon as possible. " + "Please make sure to include the TorchSharp version and a code sample for context.\n\n" + "*This comment was generated automatically by the issue triage bot.*" + ), +} + + +def github_request(method, path, body=None): + """Make an authenticated request to the GitHub API.""" + token = os.environ["GITHUB_TOKEN"] + url = f"{GITHUB_API}{path}" + data = json.dumps(body).encode() if body else None + req = urllib.request.Request(url, data=data, method=method) + req.add_header("Authorization", f"Bearer {token}") + req.add_header("Accept", "application/vnd.github+json") + req.add_header("X-GitHub-Api-Version", "2022-11-28") + if data: + req.add_header("Content-Type", "application/json") + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + error_body = e.read().decode(errors="replace") if e.fp else "" + raise RuntimeError(f"GitHub API {method} {path} failed ({e.code}): {error_body}") from e + + +def sanitize_reason(reason): + """Sanitize LLM-generated reason to prevent markdown injection.""" + # Limit length to avoid excessively long comments. + reason = reason[:200] + + # Strip markdown links: [text](url) -> text + reason = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", reason) + + # Strip markdown images entirely: ![alt](url) -> "" + reason = re.sub(r"!\[([^\]]*)\]\([^\)]+\)", "", reason) + + # Remove fenced code blocks with triple backticks to prevent block injection. + reason = re.sub(r"```.*?```", "", reason, flags=re.DOTALL) + + # Remove any remaining standalone backticks used for inline code. + reason = reason.replace("`", "") + + # Strip simple HTML tags such as