From 93dec545e694bbb45093c10b4a4ada0d69692c33 Mon Sep 17 00:00:00 2001 From: David O'Keeffe Date: Wed, 22 Apr 2026 00:41:10 +1000 Subject: [PATCH] feat: MLflow tracing with async Stop hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enables opt-in MLflow tracing for Claude Code sessions. Key design: - setup_mlflow.py registers a Stop hook when MLFLOW_CLAUDE_TRACING_ENABLED=true - Hook delegates to mlflow-trace-stop.sh which backgrounds the handler via `nohup timeout 30 ... & disown`, returning in <1s so the Stop chain (brain-push, /til, etc.) is not blocked - Handler receives hook-event JSON via a temp file captured synchronously before backgrounding (naive nohup would redirect stdin to /dev/null) - Hard 30s ceiling on the backgrounded flush to prevent stuck handlers leaking memory/CPU across sessions - Pins mlflow-skinny and mlflow-tracing to 3.11.1 to match the Apps runtime image (version mismatch caused silent import failures) Tracing is disabled by default — set MLFLOW_CLAUDE_TRACING_ENABLED=true in app.yaml to opt in. Tests: TestStopHook and TestSettingsMerge updated to match shell-script delegation model; TestAppOwnerExport mocks app_state.set_app_owner to avoid ~/.coda writes in unit test context. Co-authored-by: Isaac --- .../hooks/mlflow-trace-stop.sh | 31 +++++++++ setup_mlflow.py | 64 +++++++++++------- tests/test_mlflow_tracing.py | 65 ++++++++++++++++--- 3 files changed, 128 insertions(+), 32 deletions(-) create mode 100755 coda-marketplace/plugins/coda-essentials/hooks/mlflow-trace-stop.sh diff --git a/coda-marketplace/plugins/coda-essentials/hooks/mlflow-trace-stop.sh b/coda-marketplace/plugins/coda-essentials/hooks/mlflow-trace-stop.sh new file mode 100755 index 0000000..5917b81 --- /dev/null +++ b/coda-marketplace/plugins/coda-essentials/hooks/mlflow-trace-stop.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Stop hook: flush the Claude Code session transcript to an MLflow trace. +# +# Claude Code pipes the hook-event JSON to our stdin. We capture that +# synchronously (fast, bounded to one read) then background the actual +# flush with stdin redirected from the captured file. This way: +# +# - the wrapper returns in <1s, unblocking the Stop chain +# (crystallize-nudge, brain-push, /til) +# - a hard `timeout 30` caps the backgrounded handler so a stall in +# transcript processing can't hold memory/CPU indefinitely +# - stop_hook_handler() actually receives its hook-event JSON, which +# naive `nohup ... & disown` would have redirected to /dev/null + +set -euo pipefail + +APP_DIR="/app/python/source_code" +LOG="$HOME/.mlflow-hook.log" +STDIN_FILE="$(mktemp -t mlflow-hook.XXXXXX)" + +# Synchronous: read Claude Code's hook-event JSON from stdin. +cat > "$STDIN_FILE" + +# Async: run the handler in the background with the captured stdin. +# The subshell cleans up the temp file after timeout/handler exits. +nohup bash -c " + timeout 30 uv run --project '$APP_DIR' python -c \ + 'from mlflow.claude_code.hooks import stop_hook_handler; stop_hook_handler()' \ + < '$STDIN_FILE' + rm -f '$STDIN_FILE' +" >> "$LOG" 2>&1 & disown diff --git a/setup_mlflow.py b/setup_mlflow.py index 9d305d3..7be8fd6 100644 --- a/setup_mlflow.py +++ b/setup_mlflow.py @@ -1,8 +1,8 @@ """Configure MLflow tracing for Claude Code sessions. -Merges MLflow env vars and a Stop hook into ~/.claude/settings.json so that -every Claude Code session automatically logs traces to a Databricks MLflow -experiment at /Users/{app_owner}/{app_name}. +Merges MLflow env vars into ~/.claude/settings.json. Tracing is disabled +by default — the Stop hook stalls on transcript processing. Set +MLFLOW_CLAUDE_TRACING_ENABLED=true to opt in once the hook is reliable. """ import os @@ -31,34 +31,54 @@ experiment_name = f"/Users/{app_owner}/{app_name}" -# Merge MLflow env vars +# Tracing disabled by default — stop hook stalls on transcript processing +tracing_enabled = os.environ.get("MLFLOW_CLAUDE_TRACING_ENABLED", "false").lower() == "true" + +# Merge MLflow env vars (always set so tracing can be toggled at runtime) settings.setdefault("env", {}) -settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] = "false" +settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] = str(tracing_enabled).lower() settings["env"]["MLFLOW_TRACKING_URI"] = "databricks" settings["env"]["MLFLOW_EXPERIMENT_NAME"] = experiment_name # Override container-level OTEL endpoint so MLflow uses its native MlflowV3SpanExporter # instead of sending traces to a non-existent localhost:4314 OTLP collector settings["env"]["OTEL_EXPORTER_OTLP_ENDPOINT"] = "" -# Add Stop hook (processes full transcript at session end) -# Use `uv run python` so mlflow resolves correctly regardless of venv paths -python_cmd = "uv run python" -mlflow_hook = { - "hooks": [ - { - "type": "command", - "command": f"{python_cmd} -c \"from mlflow.claude_code.hooks import stop_hook_handler; stop_hook_handler()\"" - } - ] -} +# Only register the Stop hook when explicitly enabled +if tracing_enabled: + app_dir = os.path.dirname(os.path.abspath(__file__)) + # Delegate to a proper hook script that backgrounds the handler via + # `nohup timeout 30 ... & disown`. This: + # 1. unblocks the Stop chain immediately (brain-push, /til, etc.) + # 2. caps the backgrounded flush at 30s so a stuck handler can't + # eat memory/CPU forever — one dropped trace beats a leaked + # transcript processor + hook_script = os.path.join( + app_dir, + "coda-marketplace", "plugins", "coda-essentials", "hooks", + "mlflow-trace-stop.sh", + ) + os.chmod(hook_script, 0o755) + mlflow_hook = { + "hooks": [ + { + "type": "command", + "command": f"bash {hook_script}", + # The wrapper script backgrounds the work and returns in <1s, + # so this outer timeout is belt-and-braces only. + "timeout": 5, + } + ] + } -existing_hooks = settings.get("hooks", {}) -stop_hooks = existing_hooks.get("Stop", []) -stop_hooks.append(mlflow_hook) -existing_hooks["Stop"] = stop_hooks -settings["hooks"] = existing_hooks + existing_hooks = settings.get("hooks", {}) + stop_hooks = existing_hooks.get("Stop", []) + stop_hooks.append(mlflow_hook) + existing_hooks["Stop"] = stop_hooks + settings["hooks"] = existing_hooks + print(f"MLflow tracing enabled: experiment={experiment_name}") +else: + print("MLflow tracing disabled (set MLFLOW_CLAUDE_TRACING_ENABLED=true to enable)") settings_path.write_text(json.dumps(settings, indent=2)) -print(f"MLflow tracing enabled: experiment={experiment_name}") print(f" Tracking URI: databricks") print(f" Settings updated: {settings_path}") diff --git a/tests/test_mlflow_tracing.py b/tests/test_mlflow_tracing.py index 02a6eb1..fbd9442 100644 --- a/tests/test_mlflow_tracing.py +++ b/tests/test_mlflow_tracing.py @@ -60,13 +60,23 @@ def read_settings(tmp_path): class TestMlflowEnvVars: """Verify MLflow environment variables are added to settings.json.""" - def test_tracing_enabled(self, tmp_path): + def test_tracing_disabled_by_default(self, tmp_path): write_existing_settings(tmp_path, {"env": {"ANTHROPIC_MODEL": "test"}}) result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) assert result.returncode == 0 settings = read_settings(tmp_path) assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "false" + def test_tracing_enabled_when_env_true(self, tmp_path): + write_existing_settings(tmp_path, {"env": {"ANTHROPIC_MODEL": "test"}}) + result = run_setup_mlflow(tmp_path, { + "APP_OWNER": "jane@company.com", + "MLFLOW_CLAUDE_TRACING_ENABLED": "true", + }) + assert result.returncode == 0 + settings = read_settings(tmp_path) + assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "true" + def test_tracking_uri(self, tmp_path): write_existing_settings(tmp_path, {"env": {}}) result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) @@ -97,26 +107,54 @@ def test_experiment_name_default_app_name(self, tmp_path): # --------------------------------------------------------------------------- class TestStopHook: - """Verify the MLflow Stop hook is added to settings.json.""" + """Verify the MLflow Stop hook is only added when tracing is enabled.""" - def test_stop_hook_present(self, tmp_path): + def test_stop_hook_absent_by_default(self, tmp_path): write_existing_settings(tmp_path, {"env": {}}) result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) assert result.returncode == 0 settings = read_settings(tmp_path) + stop_hooks = settings.get("hooks", {}).get("Stop", []) + assert len(stop_hooks) == 0 + + def test_stop_hook_present_when_tracing_enabled(self, tmp_path): + write_existing_settings(tmp_path, {"env": {}}) + result = run_setup_mlflow(tmp_path, { + "APP_OWNER": "jane@company.com", + "MLFLOW_CLAUDE_TRACING_ENABLED": "true", + }) + assert result.returncode == 0 + settings = read_settings(tmp_path) assert "hooks" in settings assert "Stop" in settings["hooks"] assert len(settings["hooks"]["Stop"]) == 1 - def test_stop_hook_command(self, tmp_path): + def test_stop_hook_delegates_to_shell_script(self, tmp_path): + """Hook must delegate to the mlflow-trace-stop.sh wrapper script. + + The wrapper backgrounds the handler via nohup/disown so the Stop chain + returns in <1s. The shell script itself uses --project and invokes + stop_hook_handler; we verify the hook command points at the script and + that the script exists and contains the expected implementation. + """ write_existing_settings(tmp_path, {"env": {}}) - result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) + result = run_setup_mlflow(tmp_path, { + "APP_OWNER": "jane@company.com", + "MLFLOW_CLAUDE_TRACING_ENABLED": "true", + }) assert result.returncode == 0 settings = read_settings(tmp_path) hook = settings["hooks"]["Stop"][0]["hooks"][0] assert hook["type"] == "command" - assert "stop_hook_handler" in hook["command"] - assert "mlflow.claude_code.hooks" in hook["command"] + assert "bash" in hook["command"] + assert "mlflow-trace-stop.sh" in hook["command"] + # Verify the script itself is present and contains the required logic + script_path = hook["command"].split(" ", 1)[1] + assert os.path.isfile(script_path), f"hook script not found: {script_path}" + script_content = open(script_path).read() + assert "--project" in script_content + assert "stop_hook_handler" in script_content + assert "mlflow.claude_code.hooks" in script_content # --------------------------------------------------------------------------- @@ -129,7 +167,7 @@ class TestSettingsMerge: def test_preserves_existing_env_vars(self, tmp_path): write_existing_settings(tmp_path, { "env": { - "ANTHROPIC_MODEL": "databricks-claude-opus-4-6", + "ANTHROPIC_MODEL": "databricks-claude-opus-4-7", "ANTHROPIC_BASE_URL": "https://test.com/anthropic", "ANTHROPIC_AUTH_TOKEN": "secret", } @@ -137,7 +175,7 @@ def test_preserves_existing_env_vars(self, tmp_path): result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) assert result.returncode == 0 settings = read_settings(tmp_path) - assert settings["env"]["ANTHROPIC_MODEL"] == "databricks-claude-opus-4-6" + assert settings["env"]["ANTHROPIC_MODEL"] == "databricks-claude-opus-4-7" assert settings["env"]["ANTHROPIC_BASE_URL"] == "https://test.com/anthropic" assert settings["env"]["ANTHROPIC_AUTH_TOKEN"] == "secret" assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "false" @@ -149,7 +187,10 @@ def test_preserves_existing_hooks(self, tmp_path): "PreToolUse": [{"hooks": [{"type": "command", "command": "echo pre"}]}] } }) - result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) + result = run_setup_mlflow(tmp_path, { + "APP_OWNER": "jane@company.com", + "MLFLOW_CLAUDE_TRACING_ENABLED": "true", + }) assert result.returncode == 0 settings = read_settings(tmp_path) assert "PreToolUse" in settings["hooks"] @@ -187,9 +228,11 @@ class TestAppOwnerExport: def test_app_owner_set_in_env(self): import app as app_module + import app_state with mock.patch.object(app_module, "get_token_owner", return_value="owner@test.com"), \ mock.patch.object(app_module, "cleanup_stale_sessions"), \ mock.patch.object(app_module, "run_setup"), \ + mock.patch.object(app_state, "set_app_owner"), \ mock.patch("threading.Thread") as mock_thread: mock_thread.return_value.start = mock.MagicMock() app_module.initialize_app() @@ -197,10 +240,12 @@ def test_app_owner_set_in_env(self): def test_app_owner_not_set_when_unknown(self): import app as app_module + import app_state os.environ.pop("APP_OWNER", None) with mock.patch.object(app_module, "get_token_owner", return_value=None), \ mock.patch.object(app_module, "cleanup_stale_sessions"), \ mock.patch.object(app_module, "run_setup"), \ + mock.patch.object(app_state, "set_app_owner"), \ mock.patch("threading.Thread") as mock_thread: mock_thread.return_value.start = mock.MagicMock() app_module.initialize_app()