Skip to content

Commit 1fbbe81

Browse files
committed
scripts: increase scenario timeouts
1 parent 80786c5 commit 1fbbe81

6 files changed

Lines changed: 246 additions & 20 deletions

File tree

scripts/lib/common.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/usr/bin/env bash
2+
3+
die() {
4+
printf 'ERROR: %s\n' "$*" >&2
5+
exit 1
6+
}
7+
8+
log() {
9+
printf '[%s] %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$*" >&2
10+
}
11+
12+
need() {
13+
local cmd="$1"
14+
command -v "$cmd" >/dev/null 2>&1 || die "missing required command: $cmd"
15+
}

scripts/testbench/tests/test_runner.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import subprocess
23
import tempfile
34
import unittest
@@ -117,6 +118,64 @@ def test_run_scenario_validates_corpus_before_setup(self) -> None:
117118
write_config.assert_not_called()
118119
ensure_backend_ready.assert_not_called()
119120

121+
def test_run_scenario_records_failed_compact_phase_without_crashing(self) -> None:
122+
scenario = {
123+
"name": "compact-timeout",
124+
"corpus": {"size_gib": 0.1},
125+
"phases": [{"action": "init"}, {"action": "compact"}],
126+
}
127+
128+
phase_results = [
129+
{"action": "init", "label": "", "passed": True, "detail": "ok", "duration_sec": 0.01},
130+
{
131+
"action": "compact",
132+
"label": "",
133+
"passed": False,
134+
"detail": "Command timed out after 3600 seconds",
135+
"duration_sec": 3600.0,
136+
},
137+
]
138+
139+
with tempfile.TemporaryDirectory() as output_dir:
140+
with mock.patch(
141+
"vykar_testbench.scenarios.corpus.validate_corpus_mix"
142+
), mock.patch(
143+
"vykar_testbench.scenarios.corpus.generate_corpus",
144+
return_value={"file_count": 1, "total_bytes": 1024},
145+
), mock.patch(
146+
"vykar_testbench.scenarios.cfg.write_vykar_config",
147+
return_value="/mnt/repos/scenario-repo",
148+
), mock.patch(
149+
"vykar_testbench.scenarios.cfg.ensure_backend_ready"
150+
), mock.patch(
151+
"vykar_testbench.scenarios.vykar_cmd.vykar_delete_repo"
152+
), mock.patch(
153+
"vykar_testbench.scenarios._run_phase",
154+
side_effect=phase_results,
155+
), mock.patch(
156+
"vykar_testbench.scenarios.write_aggregate_report"
157+
), mock.patch(
158+
"vykar_testbench.scenarios.print_summary"
159+
):
160+
passed = scenarios.run_scenario(
161+
scenario,
162+
backend="local",
163+
runs=1,
164+
output_dir=output_dir,
165+
vykar_bin="vykar",
166+
seed=123,
167+
)
168+
summary_path = Path(output_dir) / "compact-timeout" / next(
169+
p.name for p in (Path(output_dir) / "compact-timeout").iterdir() if p.is_dir()
170+
) / "run-001" / "summary.json"
171+
with open(summary_path) as f:
172+
summary = json.load(f)
173+
174+
self.assertFalse(passed)
175+
self.assertEqual(summary["phases"][1]["action"], "compact")
176+
self.assertFalse(summary["phases"][1]["passed"])
177+
self.assertIn("timed out", summary["phases"][1]["detail"])
178+
120179
def test_churn_phase_reports_cap_stats(self) -> None:
121180
ctx = {
122181
"vykar_bin": "vykar",
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import subprocess
2+
import tempfile
3+
import unittest
4+
from pathlib import Path
5+
from unittest import mock
6+
7+
from vykar_testbench import stress
8+
from vykar_testbench import vykar
9+
10+
11+
class StressRunnerTests(unittest.TestCase):
12+
def test_run_vykar_logged_uses_long_timeout_for_compact(self) -> None:
13+
completed = subprocess.CompletedProcess(
14+
args=["vykar", "compact"],
15+
returncode=0,
16+
stdout="",
17+
stderr="",
18+
)
19+
20+
with tempfile.TemporaryDirectory() as tmpdir:
21+
log_file = str(Path(tmpdir) / "compact.log")
22+
with mock.patch("vykar_testbench.stress.subprocess.run", return_value=completed) as run:
23+
rc, returned_log = stress._run_vykar_logged(
24+
"vykar",
25+
"/tmp/config.yaml",
26+
["compact", "-R", "scenario-simple", "--threshold", "0"],
27+
env={},
28+
log_file=log_file,
29+
)
30+
31+
self.assertEqual(rc, 0)
32+
self.assertEqual(returned_log, log_file)
33+
self.assertEqual(run.call_args.kwargs["timeout"], vykar.LONG_TIMEOUT_SECONDS)
34+
35+
def test_run_vykar_logged_returns_124_and_logs_timeout(self) -> None:
36+
timeout = subprocess.TimeoutExpired(
37+
cmd=["vykar", "--config", "/tmp/config.yaml", "compact", "-R", "scenario-simple"],
38+
timeout=vykar.LONG_TIMEOUT_SECONDS,
39+
)
40+
41+
with tempfile.TemporaryDirectory() as tmpdir:
42+
log_file = str(Path(tmpdir) / "compact.log")
43+
with mock.patch("vykar_testbench.stress.subprocess.run", side_effect=timeout):
44+
rc, returned_log = stress._run_vykar_logged(
45+
"vykar",
46+
"/tmp/config.yaml",
47+
["compact", "-R", "scenario-simple", "--threshold", "0"],
48+
env={},
49+
log_file=log_file,
50+
)
51+
52+
log_text = Path(log_file).read_text()
53+
54+
self.assertEqual(rc, 124)
55+
self.assertEqual(returned_log, log_file)
56+
self.assertIn("timed out after 3600 seconds", log_text)
57+
58+
59+
if __name__ == "__main__":
60+
unittest.main()

scripts/testbench/tests/test_vykar.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,37 @@
66

77

88
class VykarWrapperTests(unittest.TestCase):
9+
def test_timeout_policy_marks_compact_prune_and_delete_as_long_running(self) -> None:
10+
self.assertEqual(
11+
vykar.timeout_for_args(["compact", "-R", "scenario-simple"]),
12+
vykar.LONG_TIMEOUT_SECONDS,
13+
)
14+
self.assertEqual(
15+
vykar.timeout_for_args(["prune", "-R", "scenario-simple"]),
16+
vykar.LONG_TIMEOUT_SECONDS,
17+
)
18+
self.assertEqual(
19+
vykar.timeout_for_args(["delete", "-R", "scenario-simple", "--yes-delete-this-repo"]),
20+
vykar.LONG_TIMEOUT_SECONDS,
21+
)
22+
23+
def test_compact_uses_long_running_timeout_policy(self) -> None:
24+
compact_result = subprocess.CompletedProcess(
25+
args=["vykar", "compact"],
26+
returncode=0,
27+
stdout="Compaction complete\n",
28+
stderr="",
29+
)
30+
31+
with mock.patch("vykar_testbench.vykar.subprocess.run", return_value=compact_result) as run:
32+
result = vykar.vykar_compact("vykar", "/tmp/config.yaml", "scenario-simple")
33+
34+
self.assertEqual(result.returncode, 0)
35+
self.assertEqual(
36+
run.call_args.kwargs["timeout"],
37+
vykar.LONG_TIMEOUT_SECONDS,
38+
)
39+
940
def test_delete_repo_tolerates_missing_repository(self) -> None:
1041
missing_repo = subprocess.CompletedProcess(
1142
args=["vykar", "delete"],
@@ -42,6 +73,27 @@ def test_backup_ignores_runner_phase_label_for_configured_sources(self) -> None:
4273
["vykar", "--config", "/tmp/config.yaml", "backup", "-R", "scenario-simple"],
4374
)
4475

76+
def test_run_vykar_converts_timeouts_to_rc_124(self) -> None:
77+
timeout = subprocess.TimeoutExpired(
78+
cmd=["vykar", "--config", "/tmp/config.yaml", "compact", "-R", "scenario-simple"],
79+
timeout=vykar.LONG_TIMEOUT_SECONDS,
80+
)
81+
timeout.stdout = "partial stdout"
82+
timeout.stderr = "partial stderr"
83+
84+
with mock.patch("vykar_testbench.vykar.subprocess.run", side_effect=timeout):
85+
result = vykar.run_vykar(
86+
"vykar",
87+
"/tmp/config.yaml",
88+
["compact", "-R", "scenario-simple"],
89+
label="compact",
90+
)
91+
92+
self.assertEqual(result.returncode, 124)
93+
self.assertEqual(result.stdout, "partial stdout")
94+
self.assertIn("partial stderr", result.stderr)
95+
self.assertIn("timed out after 3600 seconds", result.stderr)
96+
4597

4698
if __name__ == "__main__":
4799
unittest.main()

scripts/testbench/vykar_testbench/stress.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,34 @@ def _run_vykar_logged(
3333
env: dict[str, str],
3434
log_file: str,
3535
time_v: bool = False,
36-
timeout: int = 120,
36+
timeout: int | None = None,
3737
) -> tuple[int, str]:
3838
"""Run vykar, capturing output to log_file. Returns (rc, log_file)."""
3939
cmd = [vykar_bin, "--config", config_path] + args
40+
resolved_timeout = timeout if timeout is not None else vykar_cmd.timeout_for_args(args)
4041
if time_v:
4142
time_file = log_file.replace(".log", ".timev")
4243
full_cmd = ["/usr/bin/time", "-v", "-o", time_file] + cmd
4344
else:
4445
full_cmd = cmd
4546

4647
with open(log_file, "w") as f:
47-
result = subprocess.run(
48-
full_cmd,
49-
env=env,
50-
stdout=f,
51-
stderr=subprocess.STDOUT,
52-
timeout=timeout,
53-
check=False,
54-
)
48+
try:
49+
result = subprocess.run(
50+
full_cmd,
51+
env=env,
52+
stdout=f,
53+
stderr=subprocess.STDOUT,
54+
timeout=resolved_timeout,
55+
check=False,
56+
)
57+
except subprocess.TimeoutExpired:
58+
print(
59+
f"Command timed out after {resolved_timeout} seconds: {' '.join(cmd)}",
60+
file=f,
61+
flush=True,
62+
)
63+
return 124, log_file
5564

5665
return result.returncode, log_file
5766

scripts/testbench/vykar_testbench/vykar.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,19 @@
1515

1616

1717
_SNAPSHOT_RE = re.compile(r"Snapshot created: (\S+)")
18+
SHORT_TIMEOUT_SECONDS = 120
19+
LONG_TIMEOUT_SECONDS = 3600
20+
21+
22+
def timeout_for_args(args: list[str]) -> int:
23+
"""Return the default timeout for a vykar subcommand."""
24+
if not args:
25+
return SHORT_TIMEOUT_SECONDS
26+
27+
command = args[0]
28+
if command in {"backup", "restore", "check", "compact", "prune", "delete"}:
29+
return LONG_TIMEOUT_SECONDS
30+
return SHORT_TIMEOUT_SECONDS
1831

1932

2033
def _env_with_passphrase() -> dict[str, str]:
@@ -32,7 +45,7 @@ def run_vykar(
3245
config_path: str,
3346
args: list[str],
3447
*,
35-
timeout: int = 120,
48+
timeout: int | None = None,
3649
label: str = "",
3750
allow_missing_repo: bool = False,
3851
passphrase: str | None = None,
@@ -43,14 +56,33 @@ def run_vykar(
4356
env = _env_with_passphrase()
4457
if passphrase is not None:
4558
env["VYKAR_PASSPHRASE"] = passphrase
59+
resolved_timeout = timeout if timeout is not None else timeout_for_args(args)
60+
61+
try:
62+
result = subprocess.run(
63+
cmd,
64+
capture_output=capture,
65+
text=True,
66+
env=env,
67+
timeout=resolved_timeout,
68+
)
69+
except subprocess.TimeoutExpired as exc:
70+
stdout = exc.stdout if isinstance(exc.stdout, str) else ""
71+
stderr = exc.stderr if isinstance(exc.stderr, str) else ""
72+
timeout_msg = (
73+
f"Command timed out after {resolved_timeout} seconds: {' '.join(cmd)}"
74+
)
75+
if stderr:
76+
stderr = f"{stderr}\n{timeout_msg}"
77+
else:
78+
stderr = timeout_msg
79+
result = subprocess.CompletedProcess(
80+
args=cmd,
81+
returncode=124,
82+
stdout=stdout,
83+
stderr=stderr,
84+
)
4685

47-
result = subprocess.run(
48-
cmd,
49-
capture_output=capture,
50-
text=True,
51-
env=env,
52-
timeout=timeout,
53-
)
5486
missing_repo = allow_missing_repo and _is_missing_repo_error(result.stderr)
5587
if result.returncode != 0 and not missing_repo:
5688
print(
@@ -81,7 +113,7 @@ def vykar_backup(
81113
) -> tuple[subprocess.CompletedProcess, str | None]:
82114
"""Run backup, return (result, snapshot_id or None)."""
83115
args = ["backup", "-R", repo_label]
84-
result = run_vykar(vykar_bin, config_path, args, timeout=3600, label="backup")
116+
result = run_vykar(vykar_bin, config_path, args, label="backup")
85117
snapshot_id = None
86118
if result.returncode == 0:
87119
snapshot_id = extract_snapshot_id(result.stdout)
@@ -103,7 +135,6 @@ def vykar_restore(
103135
vykar_bin,
104136
config_path,
105137
["restore", "-R", repo_label, snapshot_id, target_dir],
106-
timeout=3600,
107138
label="restore",
108139
)
109140

@@ -118,7 +149,7 @@ def vykar_check(
118149
args = ["check", "-R", repo_label]
119150
if verify_data:
120151
args.append("--verify-data")
121-
return run_vykar(vykar_bin, config_path, args, timeout=3600, label="check")
152+
return run_vykar(vykar_bin, config_path, args, label="check")
122153

123154

124155
def vykar_prune(vykar_bin: str, config_path: str, repo_label: str) -> subprocess.CompletedProcess:

0 commit comments

Comments
 (0)