From 270bb8cddc038633798720197a4b8c8741692041 Mon Sep 17 00:00:00 2001 From: "MagicMock/mock.effective_git_name/139641991453328" Date: Sun, 22 Mar 2026 20:47:24 +0100 Subject: [PATCH 1/2] Distinguish timeouts from usage limits and enable seamless resumption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timeouts were misclassified as UsageLimitError, causing false "usage limit hit" log messages and — for CLI backend — losing all session state so retries started from scratch. This introduces InvocationTimeoutError as a separate exception and ensures conversation state is saved before the timeout propagates: - API backend: catch TimeoutError, save conversation + commit WIP - CLI backend: pre-generate session ID via --session-id and save it before subprocess.run, so it survives timeouts - Task files: catch InvocationTimeoutError with accurate log messages Co-Authored-By: Claude Opus 4.6 --- src/clayde/claude.py | 67 +++++++++++++++++++++------ src/clayde/tasks/implement.py | 11 ++++- src/clayde/tasks/plan.py | 23 +++++---- src/clayde/tasks/review.py | 9 ++-- tests/test_claude.py | 87 +++++++++++++++++++++++++++++++---- tests/test_tasks_implement.py | 25 +++++++++- 6 files changed, 184 insertions(+), 38 deletions(-) diff --git a/src/clayde/claude.py b/src/clayde/claude.py index 2b3c18d..d5c93a7 100644 --- a/src/clayde/claude.py +++ b/src/clayde/claude.py @@ -7,6 +7,7 @@ import shutil import subprocess import time +import uuid from abc import ABC, abstractmethod from pathlib import Path @@ -61,6 +62,18 @@ def __init__(self, message: str, cost_eur: float = 0.0): self.cost_eur = cost_eur +class InvocationTimeoutError(Exception): + """Raised when a Claude invocation exceeds the configured timeout. + + Distinct from UsageLimitError — timeouts are not rate limits. + WIP is committed and conversation state is saved for seamless resumption. + """ + + def __init__(self, message: str, cost_eur: float = 0.0): + super().__init__(message) + self.cost_eur = cost_eur + + def format_cost_line(cost_eur: float) -> str: """Format a cost line for inclusion in GitHub comments. @@ -338,6 +351,17 @@ def invoke( repo_path=repo_path, span=span, timeout_s=tool_loop_timeout_s, token_counter=token_counter, ) + except TimeoutError as e: + log.error("Claude API tool loop timed out after %ds", tool_loop_timeout_s) + if branch_name: + commit_wip(repo_path, branch_name) + if conversation_path: + self._save_conversation(conversation_path, messages) + partial_cost_eur = _calculate_cost_usd(model, token_counter["input"], token_counter["output"]) * _EUR_PER_USD + span.set_attribute("claude.timeout", True) + timeout_exc = InvocationTimeoutError(str(e), cost_eur=partial_cost_eur) + span.record_exception(timeout_exc) + raise timeout_exc from e except anthropic.APIConnectionError as e: log.error("Claude API connection error: %s", e) raise self._build_usage_limit_error( @@ -477,6 +501,17 @@ def invoke( span.set_attribute("claude.backend", "cli") span.set_attribute("claude.cli_bin", cli_bin) + # Determine session ID: load existing or generate new + session_id = None + resumed = False + if conversation_path: + session_id = self._load_session_id(conversation_path) + if session_id: + resumed = True + + if not session_id: + session_id = str(uuid.uuid4()) + cmd = [ cli_bin, "-p", prompt, "--append-system-prompt", identity, @@ -484,16 +519,19 @@ def invoke( "--dangerously-skip-permissions", ] - # Resume from a previous session if available + if resumed: + cmd.extend(["--resume", session_id]) + span.set_attribute("claude.resumed", True) + span.set_attribute("claude.resumed_session_id", session_id) + log.info("Resuming CLI session %s", session_id) + else: + cmd.extend(["--session-id", session_id]) + span.set_attribute("claude.resumed", False) + span.set_attribute("claude.session_id", session_id) + + # Save session ID immediately so it survives timeouts if conversation_path: - session_id = self._load_session_id(conversation_path) - if session_id: - cmd.extend(["--resume", session_id]) - span.set_attribute("claude.resumed", True) - span.set_attribute("claude.resumed_session_id", session_id) - log.info("Resuming CLI session %s", session_id) - else: - span.set_attribute("claude.resumed", False) + self._save_session_id(conversation_path, session_id) try: result = subprocess.run( @@ -505,7 +543,7 @@ def invoke( if branch_name: commit_wip(repo_path, branch_name) span.set_attribute("claude.timeout", True) - exc = UsageLimitError(f"Claude CLI timed out after {timeout_s}s") + exc = InvocationTimeoutError(f"Claude CLI timed out after {timeout_s}s") span.record_exception(exc) raise exc @@ -515,14 +553,15 @@ def invoke( and "no conversation found with session id" in (result.stderr or "").lower() and conversation_path ): - log.warning("CLI session not found — deleting stale conversation file and retrying fresh") - conversation_path.unlink(missing_ok=True) - # Rebuild command without --resume + log.warning("CLI session not found — retrying with new session") + session_id = str(uuid.uuid4()) + self._save_session_id(conversation_path, session_id) cmd = [ cli_bin, "-p", prompt, "--append-system-prompt", identity, "--output-format", "json", "--dangerously-skip-permissions", + "--session-id", session_id, ] span.set_attribute("claude.stale_session_retry", True) try: @@ -535,7 +574,7 @@ def invoke( if branch_name: commit_wip(repo_path, branch_name) span.set_attribute("claude.timeout", True) - exc = UsageLimitError(f"Claude CLI timed out after {timeout_s}s") + exc = InvocationTimeoutError(f"Claude CLI timed out after {timeout_s}s") span.record_exception(exc) raise exc diff --git a/src/clayde/tasks/implement.py b/src/clayde/tasks/implement.py index 06fd541..b8fcc74 100644 --- a/src/clayde/tasks/implement.py +++ b/src/clayde/tasks/implement.py @@ -8,7 +8,7 @@ import subprocess from pathlib import Path -from clayde.claude import UsageLimitError, format_cost_line, invoke_claude +from clayde.claude import InvocationTimeoutError, UsageLimitError, format_cost_line, invoke_claude from clayde.config import DATA_DIR, get_github_client, get_settings from clayde.git import ensure_repo from clayde.prompts import collect_comments_after, render_template @@ -85,6 +85,15 @@ def run(issue_url: str) -> None: "interrupted_phase": IssueStatus.IMPLEMENTING, }) return + except InvocationTimeoutError as e: + log.warning("[%s: %s] Timed out during implementation — will resume next cycle", issue_ref(owner, repo, number), issue.title) + accumulate_cost(issue_url, e.cost_eur) + span.set_attribute("implement.status", "timeout") + update_issue_state(issue_url, { + "status": IssueStatus.INTERRUPTED, + "interrupted_phase": IssueStatus.IMPLEMENTING, + }) + return total_cost = pop_accumulated_cost(issue_url) + result.cost_eur diff --git a/src/clayde/tasks/plan.py b/src/clayde/tasks/plan.py index a61f876..ab08c04 100644 --- a/src/clayde/tasks/plan.py +++ b/src/clayde/tasks/plan.py @@ -11,7 +11,7 @@ from github import Github from github.Issue import Issue -from clayde.claude import UsageLimitError, format_cost_line, invoke_claude +from clayde.claude import InvocationTimeoutError, UsageLimitError, format_cost_line, invoke_claude from clayde.config import get_github_client from clayde.git import ensure_repo from clayde.github import ( @@ -65,10 +65,11 @@ def run_preliminary(issue_url: str) -> None: log.info("[%s: %s] Invoking Claude for preliminary plan", issue_ref(owner, repo, number), issue.title) try: result = invoke_claude(prompt, repo_path) - except UsageLimitError as e: - log.warning("Usage limit hit during preliminary planning #%d", number) + except (UsageLimitError, InvocationTimeoutError) as e: + label = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit" + log.warning("%s during preliminary planning #%d", label, number) accumulate_cost(issue_url, e.cost_eur) - span.set_attribute("plan.status", "limit") + span.set_attribute("plan.status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit") update_issue_state(issue_url, { "status": IssueStatus.INTERRUPTED, "interrupted_phase": IssueStatus.PRELIMINARY_PLANNING, @@ -161,10 +162,11 @@ def run_thorough(issue_url: str) -> None: log.info("[%s: %s] Invoking Claude for thorough plan", issue_ref(owner, repo, number), issue.title) try: result = invoke_claude(prompt, repo_path) - except UsageLimitError as e: - log.warning("Usage limit hit during thorough planning #%d", number) + except (UsageLimitError, InvocationTimeoutError) as e: + label = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit" + log.warning("%s during thorough planning #%d", label, number) accumulate_cost(issue_url, e.cost_eur) - span.set_attribute("plan.status", "limit") + span.set_attribute("plan.status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit") update_issue_state(issue_url, { "status": IssueStatus.INTERRUPTED, "interrupted_phase": IssueStatus.PLANNING, @@ -278,10 +280,11 @@ def run_update(issue_url: str, phase: str) -> None: log.info("[%s: %s] Invoking Claude for plan update (%s phase)", issue_ref(owner, repo, number), issue.title, phase) try: result = invoke_claude(prompt, repo_path) - except UsageLimitError as e: - log.warning("Usage limit hit during plan update #%d", number) + except (UsageLimitError, InvocationTimeoutError) as e: + label = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit" + log.warning("%s during plan update #%d", label, number) accumulate_cost(issue_url, e.cost_eur) - span.set_attribute("plan.update_status", "limit") + span.set_attribute("plan.update_status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit") update_issue_state(issue_url, { "status": IssueStatus.INTERRUPTED, "interrupted_phase": IssueStatus.PRELIMINARY_PLANNING if phase == "preliminary" else IssueStatus.PLANNING, diff --git a/src/clayde/tasks/review.py b/src/clayde/tasks/review.py index 247f8b0..0d4a102 100644 --- a/src/clayde/tasks/review.py +++ b/src/clayde/tasks/review.py @@ -2,7 +2,7 @@ import logging -from clayde.claude import UsageLimitError, format_cost_line, invoke_claude +from clayde.claude import InvocationTimeoutError, UsageLimitError, format_cost_line, invoke_claude from clayde.config import DATA_DIR, get_github_client, get_settings from clayde.git import ensure_repo from clayde.prompts import render_template @@ -112,10 +112,11 @@ def run(issue_url: str) -> None: branch_name=branch_name, conversation_path=conversation_path, ) - except UsageLimitError as e: - log.warning("[%s] Usage limit hit during review handling", issue_label) + except (UsageLimitError, InvocationTimeoutError) as e: + label_msg = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit" + log.warning("[%s] %s during review handling", issue_label, label_msg) accumulate_cost(issue_url, e.cost_eur) - span.set_attribute("review.status", "limit") + span.set_attribute("review.status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit") update_issue_state(issue_url, { "status": IssueStatus.INTERRUPTED, "interrupted_phase": IssueStatus.ADDRESSING_REVIEW, diff --git a/tests/test_claude.py b/tests/test_claude.py index 72b6ebe..7c330dd 100644 --- a/tests/test_claude.py +++ b/tests/test_claude.py @@ -11,6 +11,7 @@ ApiBackend, CliBackend, InvocationResult, + InvocationTimeoutError, UsageLimitError, _calculate_cost_usd, _get_backend, @@ -333,9 +334,39 @@ def fake_monotonic(): patch.object(backend, "_get_client", return_value=mock_client), \ patch.object(ApiBackend, "_execute_tool", return_value="output"), \ patch("clayde.claude.time.monotonic", side_effect=fake_monotonic): - with pytest.raises(TimeoutError): + with pytest.raises(InvocationTimeoutError): backend.invoke("implement", str(tmp_path)) + def test_tool_loop_timeout_saves_conversation(self, tmp_path): + (tmp_path / "CLAUDE.md").write_text("identity") + conv_path = tmp_path / "conv.json" + tool_block = _make_tool_use_block("bash", "tool-1", {"command": "echo loop"}) + tool_response = _make_tool_response([tool_block]) + mock_client = MagicMock() + mock_client.beta.messages.create.return_value = tool_response + backend = ApiBackend() + + call_count = [0] + def fake_monotonic(): + call_count[0] += 1 + if call_count[0] <= 1: + return 0.0 + return 2000.0 + + with patch("clayde.claude.APP_DIR", tmp_path), \ + patch("clayde.claude.get_settings", return_value=_mock_settings()), \ + patch.object(backend, "_get_client", return_value=mock_client), \ + patch.object(ApiBackend, "_execute_tool", return_value="output"), \ + patch("clayde.claude.time.monotonic", side_effect=fake_monotonic), \ + patch("clayde.claude.commit_wip") as mock_wip: + with pytest.raises(InvocationTimeoutError) as exc_info: + backend.invoke("implement", str(tmp_path), + branch_name="branch", conversation_path=conv_path) + mock_wip.assert_called_once_with(str(tmp_path), "branch") + + assert conv_path.exists() + assert exc_info.value.cost_eur >= 0.0 + def test_token_usage_accumulated_across_turns(self, tmp_path): (tmp_path / "CLAUDE.md").write_text("identity") tool_block = _make_tool_use_block("bash", "t1", {"command": "echo x"}) @@ -652,6 +683,7 @@ def test_saves_session_id(self, tmp_path): assert conv_path.exists() data = json.loads(conv_path.read_text()) + # Session ID from response overwrites the pre-generated one assert data["session_id"] == "my-session-id" def test_resumes_from_session_id(self, tmp_path): @@ -659,7 +691,7 @@ def test_resumes_from_session_id(self, tmp_path): conv_path = tmp_path / "conv.json" conv_path.write_text(json.dumps({"session_id": "prev-session"})) mock_result = MagicMock() - mock_result.stdout = self._cli_json_output("resumed") + mock_result.stdout = self._cli_json_output("resumed", "prev-session") mock_result.stderr = "" mock_result.returncode = 0 backend = CliBackend() @@ -731,7 +763,7 @@ def test_limit_saves_session_before_raising(self, tmp_path): data = json.loads(conv_path.read_text()) assert data["session_id"] == "limit-session" - def test_timeout_raises_usage_limit_error(self, tmp_path): + def test_timeout_raises_invocation_timeout_error(self, tmp_path): (tmp_path / "CLAUDE.md").write_text("identity") backend = CliBackend() @@ -740,10 +772,48 @@ def test_timeout_raises_usage_limit_error(self, tmp_path): patch("clayde.claude._resolve_cli_bin", return_value="/usr/bin/claude"), \ patch("clayde.claude.subprocess.run", side_effect=__import__("subprocess").TimeoutExpired("claude", 1800)), \ patch("clayde.claude.commit_wip") as mock_wip: - with pytest.raises(UsageLimitError): + with pytest.raises(InvocationTimeoutError): backend.invoke("prompt", "/repo", branch_name="branch") mock_wip.assert_called_once_with("/repo", "branch") + def test_timeout_saves_session_id_for_resumption(self, tmp_path): + """When a fresh CLI session times out, the pre-generated session ID is saved for resumption.""" + (tmp_path / "CLAUDE.md").write_text("identity") + conv_path = tmp_path / "conv.json" + backend = CliBackend() + + with patch("clayde.claude.APP_DIR", tmp_path), \ + patch("clayde.claude.get_settings", return_value=_mock_settings(backend="cli")), \ + patch("clayde.claude._resolve_cli_bin", return_value="/usr/bin/claude"), \ + patch("clayde.claude.subprocess.run", side_effect=__import__("subprocess").TimeoutExpired("claude", 1800)), \ + patch("clayde.claude.commit_wip"): + with pytest.raises(InvocationTimeoutError): + backend.invoke("prompt", "/repo", branch_name="branch", conversation_path=conv_path) + + # Session ID should be saved even though the process timed out + assert conv_path.exists() + data = json.loads(conv_path.read_text()) + assert data["session_id"] # a UUID was generated and saved + + def test_timeout_preserves_session_id_for_resumed(self, tmp_path): + """When a resumed CLI session times out, the session ID is preserved for next resumption.""" + (tmp_path / "CLAUDE.md").write_text("identity") + conv_path = tmp_path / "conv.json" + conv_path.write_text(json.dumps({"session_id": "my-session"})) + backend = CliBackend() + + with patch("clayde.claude.APP_DIR", tmp_path), \ + patch("clayde.claude.get_settings", return_value=_mock_settings(backend="cli")), \ + patch("clayde.claude._resolve_cli_bin", return_value="/usr/bin/claude"), \ + patch("clayde.claude.subprocess.run", side_effect=__import__("subprocess").TimeoutExpired("claude", 1800)), \ + patch("clayde.claude.commit_wip"): + with pytest.raises(InvocationTimeoutError): + backend.invoke("prompt", "/repo", branch_name="branch", conversation_path=conv_path) + + # Session ID should still be in the conversation file + data = json.loads(conv_path.read_text()) + assert data["session_id"] == "my-session" + def test_fallback_on_non_json_stdout(self, tmp_path): (tmp_path / "CLAUDE.md").write_text("identity") mock_result = MagicMock() @@ -760,8 +830,8 @@ def test_fallback_on_non_json_stdout(self, tmp_path): assert result.output == "plain text output" - def test_stale_session_retries_fresh(self, tmp_path): - """When CLI reports 'No conversation found', delete conv file and retry without --resume.""" + def test_stale_session_retries_with_new_session_id(self, tmp_path): + """When CLI reports 'No conversation found', retry with a new session ID.""" (tmp_path / "CLAUDE.md").write_text("identity") conv_path = tmp_path / "conv.json" conv_path.write_text(json.dumps({"session_id": "stale-session"})) @@ -788,12 +858,13 @@ def test_stale_session_retries_fresh(self, tmp_path): result = backend.invoke("prompt", str(tmp_path), conversation_path=conv_path) assert result.output == "fresh output" - # First call should have --resume, second should not + # First call should have --resume, second should have --session-id (new UUID) first_cmd = mock_run.call_args_list[0][0][0] second_cmd = mock_run.call_args_list[1][0][0] assert "--resume" in first_cmd assert "--resume" not in second_cmd - # Conv file should now have the new session ID + assert "--session-id" in second_cmd + # Conv file should now have the new session ID from the response data = json.loads(conv_path.read_text()) assert data["session_id"] == "new-session" diff --git a/tests/test_tasks_implement.py b/tests/test_tasks_implement.py index 0d930a8..9910387 100644 --- a/tests/test_tasks_implement.py +++ b/tests/test_tasks_implement.py @@ -3,7 +3,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from clayde.claude import InvocationResult, UsageLimitError +from clayde.claude import InvocationResult, InvocationTimeoutError, UsageLimitError from clayde.prompts import collect_comments_after from clayde.tasks.implement import ( _assign_reviewer_and_finish, @@ -199,6 +199,29 @@ def test_usage_limit_sets_interrupted_and_accumulates_cost(self, tmp_path): assert last_call[0][1]["interrupted_phase"] == "implementing" mock_accum.assert_called_once_with("url", 2.00) + def test_timeout_sets_interrupted_and_accumulates_cost(self, tmp_path): + with patch("clayde.tasks.implement.get_github_client"), \ + patch("clayde.tasks.implement.parse_issue_url", return_value=("o", "r", 1)), \ + patch("clayde.tasks.implement.get_issue_state", return_value={"plan_comment_id": 100}), \ + patch("clayde.tasks.implement.update_issue_state") as mock_update, \ + patch("clayde.tasks.implement.fetch_issue"), \ + patch("clayde.tasks.implement.get_default_branch", return_value="main"), \ + patch("clayde.tasks.implement.ensure_repo", return_value="/tmp/repo"), \ + patch("clayde.tasks.implement.fetch_comment") as mock_fc, \ + patch("clayde.tasks.implement.fetch_issue_comments", return_value=[]), \ + patch("clayde.tasks.implement.filter_comments", return_value=[]), \ + patch("clayde.tasks.implement._build_prompt", return_value="prompt"), \ + patch("clayde.tasks.implement.invoke_claude", side_effect=InvocationTimeoutError("timeout", cost_eur=1.50)), \ + patch("clayde.tasks.implement.accumulate_cost") as mock_accum, \ + patch("clayde.tasks.implement.DATA_DIR", tmp_path): + mock_fc.return_value.body = "plan text" + run("url") + + last_call = mock_update.call_args_list[-1] + assert last_call[0][1]["status"] == "interrupted" + assert last_call[0][1]["interrupted_phase"] == "implementing" + mock_accum.assert_called_once_with("url", 1.50) + def test_resumes_interrupted_with_existing_pr(self): state = {"plan_comment_id": 100, "status": "interrupted"} with patch("clayde.tasks.implement.get_github_client") as mock_gc, \ From 387965d2764a40030dbaad49ef084834583fb7c5 Mon Sep 17 00:00:00 2001 From: "MagicMock/mock.effective_git_name/140297175410272" Date: Tue, 24 Mar 2026 11:38:19 +0100 Subject: [PATCH 2/2] Fix KeyError when implementing small issues that skip thorough planning Small issues go directly from preliminary plan approval to implementation, but implement.run unconditionally read issue_state["plan_comment_id"] which doesn't exist for small issues. Fall back to preliminary_comment_id. Co-Authored-By: Claude Opus 4.6 --- src/clayde/tasks/implement.py | 2 +- tests/test_tasks_implement.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/clayde/tasks/implement.py b/src/clayde/tasks/implement.py index b8fcc74..4023b83 100644 --- a/src/clayde/tasks/implement.py +++ b/src/clayde/tasks/implement.py @@ -137,7 +137,7 @@ def _try_resume_from_existing_pr(g, owner, repo, number, issue_url, issue_state, def _prepare_implementation_context(g, owner, repo, number, issue_url, issue_state, resumed): """Fetch all resources needed to run Claude and return them as a tuple.""" - plan_comment_id = issue_state["plan_comment_id"] + plan_comment_id = issue_state.get("plan_comment_id") or issue_state["preliminary_comment_id"] issue = fetch_issue(g, owner, repo, number) default_branch = get_default_branch(g, owner, repo) repo_path = ensure_repo(owner, repo, default_branch) diff --git a/tests/test_tasks_implement.py b/tests/test_tasks_implement.py index 9910387..e061819 100644 --- a/tests/test_tasks_implement.py +++ b/tests/test_tasks_implement.py @@ -9,6 +9,7 @@ _assign_reviewer_and_finish, _checkout_wip_branch, _post_result, + _prepare_implementation_context, run, ) @@ -498,3 +499,29 @@ def test_failed_after_retries_deletes_conversation_file(self, tmp_path): run("https://github.com/o/r/issues/1") assert not conv_file.exists() + + +class TestPrepareImplementationContext: + """Tests for _prepare_implementation_context.""" + + def test_falls_back_to_preliminary_comment_id(self, tmp_path): + """Small issues have no plan_comment_id; should use preliminary_comment_id.""" + g = MagicMock() + issue_state = {"preliminary_comment_id": 42, "branch_name": "clayde/issue-1"} + + with patch("clayde.tasks.implement.fetch_issue") as mock_fi, \ + patch("clayde.tasks.implement.get_default_branch", return_value="main"), \ + patch("clayde.tasks.implement.ensure_repo", return_value=tmp_path), \ + patch("clayde.tasks.implement.fetch_comment") as mock_fc, \ + patch("clayde.tasks.implement.update_issue_state"), \ + patch("clayde.tasks.implement.fetch_issue_comments", return_value=[]), \ + patch("clayde.tasks.implement.filter_comments", return_value=[]), \ + patch("clayde.tasks.implement.DATA_DIR", tmp_path): + mock_fi.return_value.title = "Test" + mock_fc.return_value.body = "preliminary plan text" + + _prepare_implementation_context( + g, "o", "r", 1, "https://github.com/o/r/issues/1", issue_state, False, + ) + + mock_fc.assert_called_once_with(g, "o", "r", 1, 42)