Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 53 additions & 14 deletions src/clayde/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import shutil
import subprocess
import time
import uuid
from abc import ABC, abstractmethod
from pathlib import Path

Expand Down Expand Up @@ -61,6 +62,18 @@ def __init__(self, message: str, cost_eur: float = 0.0):
self.cost_eur = cost_eur


class InvocationTimeoutError(Exception):
"""Raised when a Claude invocation exceeds the configured timeout.

Distinct from UsageLimitError — timeouts are not rate limits.
WIP is committed and conversation state is saved for seamless resumption.
"""

def __init__(self, message: str, cost_eur: float = 0.0):
super().__init__(message)
self.cost_eur = cost_eur


def format_cost_line(cost_eur: float) -> str:
"""Format a cost line for inclusion in GitHub comments.

Expand Down Expand Up @@ -338,6 +351,17 @@ def invoke(
repo_path=repo_path, span=span, timeout_s=tool_loop_timeout_s,
token_counter=token_counter,
)
except TimeoutError as e:
log.error("Claude API tool loop timed out after %ds", tool_loop_timeout_s)
if branch_name:
commit_wip(repo_path, branch_name)
if conversation_path:
self._save_conversation(conversation_path, messages)
partial_cost_eur = _calculate_cost_usd(model, token_counter["input"], token_counter["output"]) * _EUR_PER_USD
span.set_attribute("claude.timeout", True)
timeout_exc = InvocationTimeoutError(str(e), cost_eur=partial_cost_eur)
span.record_exception(timeout_exc)
raise timeout_exc from e
except anthropic.APIConnectionError as e:
log.error("Claude API connection error: %s", e)
raise self._build_usage_limit_error(
Expand Down Expand Up @@ -477,23 +501,37 @@ def invoke(
span.set_attribute("claude.backend", "cli")
span.set_attribute("claude.cli_bin", cli_bin)

# Determine session ID: load existing or generate new
session_id = None
resumed = False
if conversation_path:
session_id = self._load_session_id(conversation_path)
if session_id:
resumed = True

if not session_id:
session_id = str(uuid.uuid4())

cmd = [
cli_bin, "-p", prompt,
"--append-system-prompt", identity,
"--output-format", "json",
"--dangerously-skip-permissions",
]

# Resume from a previous session if available
if resumed:
cmd.extend(["--resume", session_id])
span.set_attribute("claude.resumed", True)
span.set_attribute("claude.resumed_session_id", session_id)
log.info("Resuming CLI session %s", session_id)
else:
cmd.extend(["--session-id", session_id])
span.set_attribute("claude.resumed", False)
span.set_attribute("claude.session_id", session_id)

# Save session ID immediately so it survives timeouts
if conversation_path:
session_id = self._load_session_id(conversation_path)
if session_id:
cmd.extend(["--resume", session_id])
span.set_attribute("claude.resumed", True)
span.set_attribute("claude.resumed_session_id", session_id)
log.info("Resuming CLI session %s", session_id)
else:
span.set_attribute("claude.resumed", False)
self._save_session_id(conversation_path, session_id)

try:
result = subprocess.run(
Expand All @@ -505,7 +543,7 @@ def invoke(
if branch_name:
commit_wip(repo_path, branch_name)
span.set_attribute("claude.timeout", True)
exc = UsageLimitError(f"Claude CLI timed out after {timeout_s}s")
exc = InvocationTimeoutError(f"Claude CLI timed out after {timeout_s}s")
span.record_exception(exc)
raise exc

Expand All @@ -515,14 +553,15 @@ def invoke(
and "no conversation found with session id" in (result.stderr or "").lower()
and conversation_path
):
log.warning("CLI session not found — deleting stale conversation file and retrying fresh")
conversation_path.unlink(missing_ok=True)
# Rebuild command without --resume
log.warning("CLI session not found — retrying with new session")
session_id = str(uuid.uuid4())
self._save_session_id(conversation_path, session_id)
cmd = [
cli_bin, "-p", prompt,
"--append-system-prompt", identity,
"--output-format", "json",
"--dangerously-skip-permissions",
"--session-id", session_id,
]
span.set_attribute("claude.stale_session_retry", True)
try:
Expand All @@ -535,7 +574,7 @@ def invoke(
if branch_name:
commit_wip(repo_path, branch_name)
span.set_attribute("claude.timeout", True)
exc = UsageLimitError(f"Claude CLI timed out after {timeout_s}s")
exc = InvocationTimeoutError(f"Claude CLI timed out after {timeout_s}s")
span.record_exception(exc)
raise exc

Expand Down
11 changes: 10 additions & 1 deletion src/clayde/tasks/implement.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import subprocess
from pathlib import Path

from clayde.claude import UsageLimitError, format_cost_line, invoke_claude
from clayde.claude import InvocationTimeoutError, UsageLimitError, format_cost_line, invoke_claude
from clayde.config import DATA_DIR, get_github_client, get_settings
from clayde.git import ensure_repo
from clayde.prompts import collect_comments_after, render_template
Expand Down Expand Up @@ -85,6 +85,15 @@ def run(issue_url: str) -> None:
"interrupted_phase": IssueStatus.IMPLEMENTING,
})
return
except InvocationTimeoutError as e:
log.warning("[%s: %s] Timed out during implementation — will resume next cycle", issue_ref(owner, repo, number), issue.title)
accumulate_cost(issue_url, e.cost_eur)
span.set_attribute("implement.status", "timeout")
update_issue_state(issue_url, {
"status": IssueStatus.INTERRUPTED,
"interrupted_phase": IssueStatus.IMPLEMENTING,
})
return

total_cost = pop_accumulated_cost(issue_url) + result.cost_eur

Expand Down
23 changes: 13 additions & 10 deletions src/clayde/tasks/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from github import Github
from github.Issue import Issue

from clayde.claude import UsageLimitError, format_cost_line, invoke_claude
from clayde.claude import InvocationTimeoutError, UsageLimitError, format_cost_line, invoke_claude
from clayde.config import get_github_client
from clayde.git import ensure_repo
from clayde.github import (
Expand Down Expand Up @@ -65,10 +65,11 @@ def run_preliminary(issue_url: str) -> None:
log.info("[%s: %s] Invoking Claude for preliminary plan", issue_ref(owner, repo, number), issue.title)
try:
result = invoke_claude(prompt, repo_path)
except UsageLimitError as e:
log.warning("Usage limit hit during preliminary planning #%d", number)
except (UsageLimitError, InvocationTimeoutError) as e:
label = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit"
log.warning("%s during preliminary planning #%d", label, number)
accumulate_cost(issue_url, e.cost_eur)
span.set_attribute("plan.status", "limit")
span.set_attribute("plan.status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit")
update_issue_state(issue_url, {
"status": IssueStatus.INTERRUPTED,
"interrupted_phase": IssueStatus.PRELIMINARY_PLANNING,
Expand Down Expand Up @@ -161,10 +162,11 @@ def run_thorough(issue_url: str) -> None:
log.info("[%s: %s] Invoking Claude for thorough plan", issue_ref(owner, repo, number), issue.title)
try:
result = invoke_claude(prompt, repo_path)
except UsageLimitError as e:
log.warning("Usage limit hit during thorough planning #%d", number)
except (UsageLimitError, InvocationTimeoutError) as e:
label = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit"
log.warning("%s during thorough planning #%d", label, number)
accumulate_cost(issue_url, e.cost_eur)
span.set_attribute("plan.status", "limit")
span.set_attribute("plan.status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit")
update_issue_state(issue_url, {
"status": IssueStatus.INTERRUPTED,
"interrupted_phase": IssueStatus.PLANNING,
Expand Down Expand Up @@ -278,10 +280,11 @@ def run_update(issue_url: str, phase: str) -> None:
log.info("[%s: %s] Invoking Claude for plan update (%s phase)", issue_ref(owner, repo, number), issue.title, phase)
try:
result = invoke_claude(prompt, repo_path)
except UsageLimitError as e:
log.warning("Usage limit hit during plan update #%d", number)
except (UsageLimitError, InvocationTimeoutError) as e:
label = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit"
log.warning("%s during plan update #%d", label, number)
accumulate_cost(issue_url, e.cost_eur)
span.set_attribute("plan.update_status", "limit")
span.set_attribute("plan.update_status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit")
update_issue_state(issue_url, {
"status": IssueStatus.INTERRUPTED,
"interrupted_phase": IssueStatus.PRELIMINARY_PLANNING if phase == "preliminary" else IssueStatus.PLANNING,
Expand Down
9 changes: 5 additions & 4 deletions src/clayde/tasks/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging

from clayde.claude import UsageLimitError, format_cost_line, invoke_claude
from clayde.claude import InvocationTimeoutError, UsageLimitError, format_cost_line, invoke_claude
from clayde.config import DATA_DIR, get_github_client, get_settings
from clayde.git import ensure_repo
from clayde.prompts import render_template
Expand Down Expand Up @@ -112,10 +112,11 @@ def run(issue_url: str) -> None:
branch_name=branch_name,
conversation_path=conversation_path,
)
except UsageLimitError as e:
log.warning("[%s] Usage limit hit during review handling", issue_label)
except (UsageLimitError, InvocationTimeoutError) as e:
label_msg = "Timed out" if isinstance(e, InvocationTimeoutError) else "Usage limit hit"
log.warning("[%s] %s during review handling", issue_label, label_msg)
accumulate_cost(issue_url, e.cost_eur)
span.set_attribute("review.status", "limit")
span.set_attribute("review.status", "timeout" if isinstance(e, InvocationTimeoutError) else "limit")
update_issue_state(issue_url, {
"status": IssueStatus.INTERRUPTED,
"interrupted_phase": IssueStatus.ADDRESSING_REVIEW,
Expand Down
87 changes: 79 additions & 8 deletions tests/test_claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
ApiBackend,
CliBackend,
InvocationResult,
InvocationTimeoutError,
UsageLimitError,
_calculate_cost_usd,
_get_backend,
Expand Down Expand Up @@ -333,9 +334,39 @@ def fake_monotonic():
patch.object(backend, "_get_client", return_value=mock_client), \
patch.object(ApiBackend, "_execute_tool", return_value="output"), \
patch("clayde.claude.time.monotonic", side_effect=fake_monotonic):
with pytest.raises(TimeoutError):
with pytest.raises(InvocationTimeoutError):
backend.invoke("implement", str(tmp_path))

def test_tool_loop_timeout_saves_conversation(self, tmp_path):
(tmp_path / "CLAUDE.md").write_text("identity")
conv_path = tmp_path / "conv.json"
tool_block = _make_tool_use_block("bash", "tool-1", {"command": "echo loop"})
tool_response = _make_tool_response([tool_block])
mock_client = MagicMock()
mock_client.beta.messages.create.return_value = tool_response
backend = ApiBackend()

call_count = [0]
def fake_monotonic():
call_count[0] += 1
if call_count[0] <= 1:
return 0.0
return 2000.0

with patch("clayde.claude.APP_DIR", tmp_path), \
patch("clayde.claude.get_settings", return_value=_mock_settings()), \
patch.object(backend, "_get_client", return_value=mock_client), \
patch.object(ApiBackend, "_execute_tool", return_value="output"), \
patch("clayde.claude.time.monotonic", side_effect=fake_monotonic), \
patch("clayde.claude.commit_wip") as mock_wip:
with pytest.raises(InvocationTimeoutError) as exc_info:
backend.invoke("implement", str(tmp_path),
branch_name="branch", conversation_path=conv_path)
mock_wip.assert_called_once_with(str(tmp_path), "branch")

assert conv_path.exists()
assert exc_info.value.cost_eur >= 0.0

def test_token_usage_accumulated_across_turns(self, tmp_path):
(tmp_path / "CLAUDE.md").write_text("identity")
tool_block = _make_tool_use_block("bash", "t1", {"command": "echo x"})
Expand Down Expand Up @@ -652,14 +683,15 @@ def test_saves_session_id(self, tmp_path):

assert conv_path.exists()
data = json.loads(conv_path.read_text())
# Session ID from response overwrites the pre-generated one
assert data["session_id"] == "my-session-id"

def test_resumes_from_session_id(self, tmp_path):
(tmp_path / "CLAUDE.md").write_text("identity")
conv_path = tmp_path / "conv.json"
conv_path.write_text(json.dumps({"session_id": "prev-session"}))
mock_result = MagicMock()
mock_result.stdout = self._cli_json_output("resumed")
mock_result.stdout = self._cli_json_output("resumed", "prev-session")
mock_result.stderr = ""
mock_result.returncode = 0
backend = CliBackend()
Expand Down Expand Up @@ -731,7 +763,7 @@ def test_limit_saves_session_before_raising(self, tmp_path):
data = json.loads(conv_path.read_text())
assert data["session_id"] == "limit-session"

def test_timeout_raises_usage_limit_error(self, tmp_path):
def test_timeout_raises_invocation_timeout_error(self, tmp_path):
(tmp_path / "CLAUDE.md").write_text("identity")
backend = CliBackend()

Expand All @@ -740,10 +772,48 @@ def test_timeout_raises_usage_limit_error(self, tmp_path):
patch("clayde.claude._resolve_cli_bin", return_value="/usr/bin/claude"), \
patch("clayde.claude.subprocess.run", side_effect=__import__("subprocess").TimeoutExpired("claude", 1800)), \
patch("clayde.claude.commit_wip") as mock_wip:
with pytest.raises(UsageLimitError):
with pytest.raises(InvocationTimeoutError):
backend.invoke("prompt", "/repo", branch_name="branch")
mock_wip.assert_called_once_with("/repo", "branch")

def test_timeout_saves_session_id_for_resumption(self, tmp_path):
"""When a fresh CLI session times out, the pre-generated session ID is saved for resumption."""
(tmp_path / "CLAUDE.md").write_text("identity")
conv_path = tmp_path / "conv.json"
backend = CliBackend()

with patch("clayde.claude.APP_DIR", tmp_path), \
patch("clayde.claude.get_settings", return_value=_mock_settings(backend="cli")), \
patch("clayde.claude._resolve_cli_bin", return_value="/usr/bin/claude"), \
patch("clayde.claude.subprocess.run", side_effect=__import__("subprocess").TimeoutExpired("claude", 1800)), \
patch("clayde.claude.commit_wip"):
with pytest.raises(InvocationTimeoutError):
backend.invoke("prompt", "/repo", branch_name="branch", conversation_path=conv_path)

# Session ID should be saved even though the process timed out
assert conv_path.exists()
data = json.loads(conv_path.read_text())
assert data["session_id"] # a UUID was generated and saved

def test_timeout_preserves_session_id_for_resumed(self, tmp_path):
"""When a resumed CLI session times out, the session ID is preserved for next resumption."""
(tmp_path / "CLAUDE.md").write_text("identity")
conv_path = tmp_path / "conv.json"
conv_path.write_text(json.dumps({"session_id": "my-session"}))
backend = CliBackend()

with patch("clayde.claude.APP_DIR", tmp_path), \
patch("clayde.claude.get_settings", return_value=_mock_settings(backend="cli")), \
patch("clayde.claude._resolve_cli_bin", return_value="/usr/bin/claude"), \
patch("clayde.claude.subprocess.run", side_effect=__import__("subprocess").TimeoutExpired("claude", 1800)), \
patch("clayde.claude.commit_wip"):
with pytest.raises(InvocationTimeoutError):
backend.invoke("prompt", "/repo", branch_name="branch", conversation_path=conv_path)

# Session ID should still be in the conversation file
data = json.loads(conv_path.read_text())
assert data["session_id"] == "my-session"

def test_fallback_on_non_json_stdout(self, tmp_path):
(tmp_path / "CLAUDE.md").write_text("identity")
mock_result = MagicMock()
Expand All @@ -760,8 +830,8 @@ def test_fallback_on_non_json_stdout(self, tmp_path):

assert result.output == "plain text output"

def test_stale_session_retries_fresh(self, tmp_path):
"""When CLI reports 'No conversation found', delete conv file and retry without --resume."""
def test_stale_session_retries_with_new_session_id(self, tmp_path):
"""When CLI reports 'No conversation found', retry with a new session ID."""
(tmp_path / "CLAUDE.md").write_text("identity")
conv_path = tmp_path / "conv.json"
conv_path.write_text(json.dumps({"session_id": "stale-session"}))
Expand All @@ -788,12 +858,13 @@ def test_stale_session_retries_fresh(self, tmp_path):
result = backend.invoke("prompt", str(tmp_path), conversation_path=conv_path)

assert result.output == "fresh output"
# First call should have --resume, second should not
# First call should have --resume, second should have --session-id (new UUID)
first_cmd = mock_run.call_args_list[0][0][0]
second_cmd = mock_run.call_args_list[1][0][0]
assert "--resume" in first_cmd
assert "--resume" not in second_cmd
# Conv file should now have the new session ID
assert "--session-id" in second_cmd
# Conv file should now have the new session ID from the response
data = json.loads(conv_path.read_text())
assert data["session_id"] == "new-session"

Expand Down
Loading
Loading