|
| 1 | +# /// script |
| 2 | +# requires-python = ">=3.10" |
| 3 | +# dependencies = [ |
| 4 | +# "rigging", |
| 5 | +# "typer", |
| 6 | +# ] |
| 7 | +# /// |
| 8 | + |
1 | 9 | import asyncio |
2 | | -import base64 |
3 | | -import os |
| 10 | +import subprocess |
4 | 11 | import typing as t |
5 | 12 |
|
6 | | -from pydantic import ConfigDict, StringConstraints |
| 13 | +import typer |
7 | 14 |
|
8 | 15 | import rigging as rg |
9 | | -from rigging import logger |
10 | | -from rigging.generator import GenerateParams, Generator, register_generator |
11 | | - |
12 | | -logger.enable("rigging") |
13 | | - |
14 | | -MAX_TOKENS = 8000 |
15 | | -TRUNCATION_WARNING = "\n\n**Note**: Due to the large size of this diff, some content has been truncated." |
16 | | -str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)] |
17 | | - |
18 | | - |
19 | | -class PRDiffData(rg.Model): |
20 | | - """XML model for PR diff data""" |
21 | | - |
22 | | - content: str_strip = rg.element() |
23 | | - |
24 | | - @classmethod |
25 | | - def xml_example(cls) -> str: |
26 | | - return """<diff><content>example diff content</content></diff>""" |
27 | | - |
28 | | - |
29 | | -class PRDecorator(Generator): |
30 | | - """Generator for creating PR descriptions""" |
31 | | - |
32 | | - model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True) |
33 | | - |
34 | | - api_key: str = "" |
35 | | - max_tokens: int = MAX_TOKENS |
36 | | - |
37 | | - def __init__(self, model: str, params: rg.GenerateParams) -> None: |
38 | | - api_key = params.extra.get("api_key") |
39 | | - if not api_key: |
40 | | - raise ValueError("api_key is required in params.extra") |
41 | | - |
42 | | - super().__init__(model=model, params=params, api_key=api_key) |
43 | | - self.api_key = api_key |
44 | | - self.max_tokens = params.max_tokens or MAX_TOKENS |
45 | | - |
46 | | - async def generate_messages( |
47 | | - self, |
48 | | - messages: t.Sequence[t.Sequence[rg.Message]], |
49 | | - params: t.Sequence[GenerateParams], |
50 | | - ) -> t.Sequence[rg.GeneratedMessage]: |
51 | | - responses = [] |
52 | | - for message_seq, p in zip(messages, params): |
53 | | - base_generator = rg.get_generator(self.model, params=p) |
54 | | - llm_response = await base_generator.generate_messages([message_seq], [p]) |
55 | | - responses.extend(llm_response) |
56 | | - return responses |
57 | | - |
58 | | - |
59 | | -register_generator("pr_decorator", PRDecorator) |
60 | | - |
61 | | - |
62 | | -async def generate_pr_description(diff_text: str) -> str: |
63 | | - """Generate a PR description from the diff text""" |
64 | | - diff_tokens = len(diff_text) // 4 |
65 | | - if diff_tokens >= MAX_TOKENS: |
66 | | - char_limit = (MAX_TOKENS * 4) - len(TRUNCATION_WARNING) |
67 | | - diff_text = diff_text[:char_limit] + TRUNCATION_WARNING |
68 | | - |
69 | | - diff_data = PRDiffData(content=diff_text) |
70 | | - params = rg.GenerateParams( |
71 | | - extra={ |
72 | | - "api_key": os.environ["OPENAI_API_KEY"], |
73 | | - "diff_text": diff_text, |
74 | | - }, |
75 | | - temperature=0.7, |
76 | | - max_tokens=500, |
77 | | - ) |
78 | | - |
79 | | - generator = rg.get_generator("pr_decorator!gpt-4-turbo-preview", params=params) |
80 | | - prompt = f"""You are a helpful AI that generates clear and concise PR descriptions. |
81 | | - Analyze the provided diff between {PRDiffData.xml_example()} tags and create a summary using exactly this format: |
82 | | -
|
83 | | - ### PR Summary |
84 | | -
|
85 | | - #### Overview of Changes |
86 | | - <overview paragraph> |
87 | | -
|
88 | | - #### Key Modifications |
89 | | - 1. **<modification title>**: <description> |
90 | | - 2. **<modification title>**: <description> |
91 | | - 3. **<modification title>**: <description> |
92 | | - (continue as needed) |
93 | | -
|
94 | | - #### Potential Impact |
95 | | - - <impact point 1> |
96 | | - - <impact point 2> |
97 | | - - <impact point 3> |
98 | | - (continue as needed) |
99 | | -
|
100 | | - Here is the PR diff to analyze: |
101 | | - {diff_data.to_xml()}""" |
102 | | - |
103 | | - chat = await generator.chat(prompt).run() |
104 | | - return chat.last.content.strip() |
105 | | - |
106 | | - |
107 | | -async def main(): |
108 | | - """Main function for CI environment""" |
109 | | - if not os.environ.get("OPENAI_API_KEY"): |
110 | | - raise ValueError("OPENAI_API_KEY environment variable must be set") |
111 | | - |
112 | | - try: |
113 | | - diff_text = os.environ.get("GIT_DIFF", "") |
114 | | - if not diff_text: |
115 | | - raise ValueError("No diff found in GIT_DIFF environment variable") |
116 | 16 |
|
117 | | - try: |
118 | | - diff_text = base64.b64decode(diff_text).decode("utf-8") |
119 | | - except Exception: |
120 | | - padding = 4 - (len(diff_text) % 4) |
121 | | - if padding != 4: |
122 | | - diff_text += "=" * padding |
123 | | - diff_text = base64.b64decode(diff_text).decode("utf-8") |
124 | | - |
125 | | - logger.debug(f"Processing diff of length: {len(diff_text)}") |
126 | | - description = await generate_pr_description(diff_text) |
127 | | - |
128 | | - with open(os.environ["GITHUB_OUTPUT"], "a") as f: |
129 | | - f.write("content<<EOF\n") |
130 | | - f.write(description) |
131 | | - f.write("\nEOF\n") |
132 | | - f.write(f"debug_diff_length={len(diff_text)}\n") |
133 | | - f.write(f"debug_description_length={len(description)}\n") |
134 | | - debug_preview = description[:500] |
135 | | - f.write("debug_preview<<EOF\n") |
136 | | - f.write(debug_preview) |
137 | | - f.write("\nEOF\n") |
138 | | - |
139 | | - except Exception as e: |
140 | | - logger.error(f"Error in main: {e}") |
141 | | - raise |
| 17 | +TRUNCATION_WARNING = "\n---\n**Note**: Due to the large size of this diff, some content has been truncated." |
| 18 | + |
| 19 | + |
| 20 | +@rg.prompt |
| 21 | +def generate_pr_description(diff: str) -> t.Annotated[str, rg.Ctx("markdown")]: # type: ignore[empty-body] |
| 22 | + """ |
| 23 | + Analyze the provided git diff and create a PR description in markdown format. |
| 24 | +
|
| 25 | + <guidance> |
| 26 | + - Keep the summary concise and informative. |
| 27 | + - Use bullet points to structure important statements. |
| 28 | + - Focus on key modifications and potential impact - if any. |
| 29 | + - Do not add in general advice or best-practice information. |
| 30 | + - Write like a developer who authored the changes. |
| 31 | + - Prefer flat bullet lists over nested. |
| 32 | + - Do not include any title structure. |
| 33 | + </guidance> |
| 34 | + """ |
| 35 | + |
| 36 | + |
| 37 | +def get_diff(target_ref: str, source_ref: str) -> str: |
| 38 | + """ |
| 39 | + Get the git diff between two branches. |
| 40 | + """ |
| 41 | + |
| 42 | + merge_base = subprocess.run( |
| 43 | + ["git", "merge-base", source_ref, target_ref], |
| 44 | + capture_output=True, |
| 45 | + text=True, |
| 46 | + check=True, |
| 47 | + ).stdout.strip() |
| 48 | + diff_text = subprocess.run( |
| 49 | + ["git", "diff", merge_base], |
| 50 | + capture_output=True, |
| 51 | + text=True, |
| 52 | + check=True, |
| 53 | + ).stdout |
| 54 | + return diff_text |
| 55 | + |
| 56 | + |
| 57 | +def main( |
| 58 | + target_ref: str, |
| 59 | + source_ref: str = "HEAD", |
| 60 | + generator_id: str = "openai/gpt-4o-mini", |
| 61 | + max_diff_lines: int = 1000, |
| 62 | +) -> None: |
| 63 | + """ |
| 64 | + Use rigging to generate a PR description from a git diff. |
| 65 | + """ |
| 66 | + |
| 67 | + diff = get_diff(target_ref, source_ref) |
| 68 | + diff_lines = diff.split("\n") |
| 69 | + if len(diff_lines) > max_diff_lines: |
| 70 | + diff = "\n".join(diff_lines[:max_diff_lines]) + TRUNCATION_WARNING |
| 71 | + |
| 72 | + description = asyncio.run(generate_pr_description.bind(generator_id)(diff)) |
| 73 | + print(description) |
142 | 74 |
|
143 | 75 |
|
144 | 76 | if __name__ == "__main__": |
145 | | - asyncio.run(main()) |
| 77 | + typer.run(main) |
0 commit comments