|
40 | 40 | # Constants |
41 | 41 | # ───────────────────────────────────────────────────────────────────────────── |
42 | 42 |
|
43 | | -MAX_AGENT_TURNS = 200 |
| 43 | +MAX_AGENT_TURNS = 400 |
44 | 44 | MAX_VALIDATION_RETRIES = 3 |
45 | 45 | DEFAULT_TIMEOUT_MS = 60_000 |
46 | 46 |
|
@@ -565,32 +565,40 @@ def _build_user_message(self, task: SweTask) -> str: |
565 | 565 | {self._truncate(task.patch, 4000)} |
566 | 566 | ``` |
567 | 567 |
|
568 | | -== WORKFLOW (FOLLOW EXACTLY) == |
| 568 | +== CRITICAL: YOU MUST USE TOOLS, NOT TEXT == |
569 | 569 |
|
570 | | -Step 1: Install dependencies |
571 | | -- Run: apt-get update && apt-get install -y python3 python3-pip git |
572 | | -- Check pyproject.toml/setup.py for install commands |
573 | | -- Run install commands via `shell` |
| 570 | +DO NOT write text responses. DO NOT ask questions. DO NOT explain. |
| 571 | +USE TOOLS: shell, read_file, write_file, submit_tests. |
574 | 572 |
|
575 | | -Step 2: Explore the changed code |
576 | | -- Use `read_file` to read the files mentioned in the diff |
577 | | -- Use `list_dir` to understand project structure |
| 573 | +== WORKFLOW (EXECUTE NOW) == |
578 | 574 |
|
579 | | -Step 3: WRITE TEST FILES (DO THIS NOW!) |
580 | | -- Use `write_file` to create test_swe_<feature>.py |
581 | | -- Write behavioral tests that EXERCISE the changed functionality |
| 575 | +Step 1: INSTALL (use shell tool) |
| 576 | +shell: apt-get update && apt-get install -y python3 python3-pip git |
| 577 | +shell: pip install -e . (or check pyproject.toml) |
582 | 578 |
|
583 | | -Step 4: Run tests to validate |
584 | | -- Use `shell` to run: pytest -c /dev/null test_swe_<feature>.py -v |
| 579 | +Step 2: READ the changed files (use read_file tool) |
| 580 | +read_file: path to files from the diff |
585 | 581 |
|
586 | | -Step 5: SUBMIT (MUST CALL THIS!) |
587 | | -- Call `submit_tests` with your test files and install commands |
| 582 | +Step 3: WRITE TESTS (use write_file tool) |
| 583 | +write_file: path=test_swe_feature.py, content=your test code |
| 584 | +Tests MUST import modules and call functions - behavioral tests only. |
588 | 585 |
|
589 | | -== CRITICAL REMINDERS == |
590 | | -- Do NOT just explore forever - WRITE TEST FILES using `write_file` |
591 | | -- Do NOT end without calling `submit_tests` |
592 | | -- Tests MUST be behavioral (import, call functions, check values) |
593 | | -- Do NOT read source and assert on file content.""" |
| 586 | +Step 4: RUN TESTS (use shell tool) |
| 587 | +shell: pytest -c /dev/null test_swe_feature.py -v |
| 588 | +
|
| 589 | +Step 5: SUBMIT (use submit_tests tool) - THIS IS MANDATORY! |
| 590 | +submit_tests: fail_to_pass=["pytest test_swe_feature.py"], |
| 591 | + pass_to_pass=[], |
| 592 | + test_files=[{{path, content}}], |
| 593 | + install_commands=["apt-get...", "pip install..."] |
| 594 | +
|
| 595 | +== ABSOLUTE RULES == |
| 596 | +- EVERY response MUST include a tool call |
| 597 | +- NO text explanations - only tool calls |
| 598 | +- NO questions - just execute |
| 599 | +- MUST call submit_tests to finish |
| 600 | +- If unsure, try something with shell tool |
| 601 | +- YOU HAVE 400 TURNS - use them to complete the task""" |
594 | 602 |
|
595 | 603 | def _test_commands_for_language(self, language: str) -> tuple[list[str], list[str]]: |
596 | 604 | """Get suggested build and test commands for a language. |
@@ -905,17 +913,16 @@ async def generate_tests( |
905 | 913 |
|
906 | 914 | continue |
907 | 915 |
|
908 | | - # No tool calls, check for text response |
909 | | - if message.content and message.content.strip(): |
910 | | - loop.add_assistant(message.content) |
911 | | - loop.add_user( |
912 | | - "Use the `shell` tool to explore the repo and run tests, " |
913 | | - "then call `submit_tests`." |
914 | | - ) |
915 | | - continue |
916 | | - |
917 | | - # Empty response, we're done |
918 | | - break |
| 916 | + # No tool calls - FORCE TOOL USAGE |
| 917 | + # Agent must use tools, not write text |
| 918 | + loop.add_assistant(message.content or "") |
| 919 | + loop.add_user( |
| 920 | + "ERROR: You must use tools (shell, read_file, write_file, submit_tests), not text. " |
| 921 | + "DO NOT explain. DO NOT ask questions. " |
| 922 | + "Execute: shell('apt-get update'), read_file('file.py'), write_file('test.py', '...'), submit_tests(...). " |
| 923 | + "Call a tool NOW." |
| 924 | + ) |
| 925 | + continue |
919 | 926 |
|
920 | 927 | # Exhausted turns without success |
921 | 928 | return GeneratedTests( |
|
0 commit comments