Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash
# Stop hook: flush the Claude Code session transcript to an MLflow trace.
#
# Claude Code pipes the hook-event JSON to our stdin. We capture that
# synchronously (fast, bounded to one read) then background the actual
# flush with stdin redirected from the captured file. This way:
#
# - the wrapper returns in <1s, unblocking the Stop chain
# (crystallize-nudge, brain-push, /til)
# - a hard `timeout 30` caps the backgrounded handler so a stall in
# transcript processing can't hold memory/CPU indefinitely
# - stop_hook_handler() actually receives its hook-event JSON, which
# naive `nohup ... & disown` would have redirected to /dev/null

set -euo pipefail

APP_DIR="/app/python/source_code"
LOG="$HOME/.mlflow-hook.log"
STDIN_FILE="$(mktemp -t mlflow-hook.XXXXXX)"

# Synchronous: read Claude Code's hook-event JSON from stdin.
cat > "$STDIN_FILE"

# Async: run the handler in the background with the captured stdin.
# The subshell cleans up the temp file after timeout/handler exits.
nohup bash -c "
timeout 30 uv run --project '$APP_DIR' python -c \
'from mlflow.claude_code.hooks import stop_hook_handler; stop_hook_handler()' \
< '$STDIN_FILE'
rm -f '$STDIN_FILE'
" >> "$LOG" 2>&1 & disown
64 changes: 42 additions & 22 deletions setup_mlflow.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Configure MLflow tracing for Claude Code sessions.

Merges MLflow env vars and a Stop hook into ~/.claude/settings.json so that
every Claude Code session automatically logs traces to a Databricks MLflow
experiment at /Users/{app_owner}/{app_name}.
Merges MLflow env vars into ~/.claude/settings.json. Tracing is disabled
by default — the Stop hook stalls on transcript processing. Set
MLFLOW_CLAUDE_TRACING_ENABLED=true to opt in once the hook is reliable.
"""

import os
Expand Down Expand Up @@ -31,34 +31,54 @@

experiment_name = f"/Users/{app_owner}/{app_name}"

# Merge MLflow env vars
# Tracing disabled by default — stop hook stalls on transcript processing
tracing_enabled = os.environ.get("MLFLOW_CLAUDE_TRACING_ENABLED", "false").lower() == "true"

# Merge MLflow env vars (always set so tracing can be toggled at runtime)
settings.setdefault("env", {})
settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] = "false"
settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] = str(tracing_enabled).lower()
settings["env"]["MLFLOW_TRACKING_URI"] = "databricks"
settings["env"]["MLFLOW_EXPERIMENT_NAME"] = experiment_name
# Override container-level OTEL endpoint so MLflow uses its native MlflowV3SpanExporter
# instead of sending traces to a non-existent localhost:4314 OTLP collector
settings["env"]["OTEL_EXPORTER_OTLP_ENDPOINT"] = ""

# Add Stop hook (processes full transcript at session end)
# Use `uv run python` so mlflow resolves correctly regardless of venv paths
python_cmd = "uv run python"
mlflow_hook = {
"hooks": [
{
"type": "command",
"command": f"{python_cmd} -c \"from mlflow.claude_code.hooks import stop_hook_handler; stop_hook_handler()\""
}
]
}
# Only register the Stop hook when explicitly enabled
if tracing_enabled:
app_dir = os.path.dirname(os.path.abspath(__file__))
# Delegate to a proper hook script that backgrounds the handler via
# `nohup timeout 30 ... & disown`. This:
# 1. unblocks the Stop chain immediately (brain-push, /til, etc.)
# 2. caps the backgrounded flush at 30s so a stuck handler can't
# eat memory/CPU forever — one dropped trace beats a leaked
# transcript processor
hook_script = os.path.join(
app_dir,
"coda-marketplace", "plugins", "coda-essentials", "hooks",
"mlflow-trace-stop.sh",
)
os.chmod(hook_script, 0o755)
mlflow_hook = {
"hooks": [
{
"type": "command",
"command": f"bash {hook_script}",
# The wrapper script backgrounds the work and returns in <1s,
# so this outer timeout is belt-and-braces only.
"timeout": 5,
}
]
}

existing_hooks = settings.get("hooks", {})
stop_hooks = existing_hooks.get("Stop", [])
stop_hooks.append(mlflow_hook)
existing_hooks["Stop"] = stop_hooks
settings["hooks"] = existing_hooks
existing_hooks = settings.get("hooks", {})
stop_hooks = existing_hooks.get("Stop", [])
stop_hooks.append(mlflow_hook)
existing_hooks["Stop"] = stop_hooks
settings["hooks"] = existing_hooks
print(f"MLflow tracing enabled: experiment={experiment_name}")
else:
print("MLflow tracing disabled (set MLFLOW_CLAUDE_TRACING_ENABLED=true to enable)")

settings_path.write_text(json.dumps(settings, indent=2))
print(f"MLflow tracing enabled: experiment={experiment_name}")
print(f" Tracking URI: databricks")
print(f" Settings updated: {settings_path}")
65 changes: 55 additions & 10 deletions tests/test_mlflow_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,23 @@ def read_settings(tmp_path):
class TestMlflowEnvVars:
"""Verify MLflow environment variables are added to settings.json."""

def test_tracing_enabled(self, tmp_path):
def test_tracing_disabled_by_default(self, tmp_path):
write_existing_settings(tmp_path, {"env": {"ANTHROPIC_MODEL": "test"}})
result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"})
assert result.returncode == 0
settings = read_settings(tmp_path)
assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "false"

def test_tracing_enabled_when_env_true(self, tmp_path):
write_existing_settings(tmp_path, {"env": {"ANTHROPIC_MODEL": "test"}})
result = run_setup_mlflow(tmp_path, {
"APP_OWNER": "jane@company.com",
"MLFLOW_CLAUDE_TRACING_ENABLED": "true",
})
assert result.returncode == 0
settings = read_settings(tmp_path)
assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "true"

def test_tracking_uri(self, tmp_path):
write_existing_settings(tmp_path, {"env": {}})
result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"})
Expand Down Expand Up @@ -97,26 +107,54 @@ def test_experiment_name_default_app_name(self, tmp_path):
# ---------------------------------------------------------------------------

class TestStopHook:
"""Verify the MLflow Stop hook is added to settings.json."""
"""Verify the MLflow Stop hook is only added when tracing is enabled."""

def test_stop_hook_present(self, tmp_path):
def test_stop_hook_absent_by_default(self, tmp_path):
write_existing_settings(tmp_path, {"env": {}})
result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"})
assert result.returncode == 0
settings = read_settings(tmp_path)
stop_hooks = settings.get("hooks", {}).get("Stop", [])
assert len(stop_hooks) == 0

def test_stop_hook_present_when_tracing_enabled(self, tmp_path):
write_existing_settings(tmp_path, {"env": {}})
result = run_setup_mlflow(tmp_path, {
"APP_OWNER": "jane@company.com",
"MLFLOW_CLAUDE_TRACING_ENABLED": "true",
})
assert result.returncode == 0
settings = read_settings(tmp_path)
assert "hooks" in settings
assert "Stop" in settings["hooks"]
assert len(settings["hooks"]["Stop"]) == 1

def test_stop_hook_command(self, tmp_path):
def test_stop_hook_delegates_to_shell_script(self, tmp_path):
"""Hook must delegate to the mlflow-trace-stop.sh wrapper script.

The wrapper backgrounds the handler via nohup/disown so the Stop chain
returns in <1s. The shell script itself uses --project and invokes
stop_hook_handler; we verify the hook command points at the script and
that the script exists and contains the expected implementation.
"""
write_existing_settings(tmp_path, {"env": {}})
result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"})
result = run_setup_mlflow(tmp_path, {
"APP_OWNER": "jane@company.com",
"MLFLOW_CLAUDE_TRACING_ENABLED": "true",
})
assert result.returncode == 0
settings = read_settings(tmp_path)
hook = settings["hooks"]["Stop"][0]["hooks"][0]
assert hook["type"] == "command"
assert "stop_hook_handler" in hook["command"]
assert "mlflow.claude_code.hooks" in hook["command"]
assert "bash" in hook["command"]
assert "mlflow-trace-stop.sh" in hook["command"]
# Verify the script itself is present and contains the required logic
script_path = hook["command"].split(" ", 1)[1]
assert os.path.isfile(script_path), f"hook script not found: {script_path}"
script_content = open(script_path).read()
assert "--project" in script_content
assert "stop_hook_handler" in script_content
assert "mlflow.claude_code.hooks" in script_content


# ---------------------------------------------------------------------------
Expand All @@ -129,15 +167,15 @@ class TestSettingsMerge:
def test_preserves_existing_env_vars(self, tmp_path):
write_existing_settings(tmp_path, {
"env": {
"ANTHROPIC_MODEL": "databricks-claude-opus-4-6",
"ANTHROPIC_MODEL": "databricks-claude-opus-4-7",
"ANTHROPIC_BASE_URL": "https://test.com/anthropic",
"ANTHROPIC_AUTH_TOKEN": "secret",
}
})
result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"})
assert result.returncode == 0
settings = read_settings(tmp_path)
assert settings["env"]["ANTHROPIC_MODEL"] == "databricks-claude-opus-4-6"
assert settings["env"]["ANTHROPIC_MODEL"] == "databricks-claude-opus-4-7"
assert settings["env"]["ANTHROPIC_BASE_URL"] == "https://test.com/anthropic"
assert settings["env"]["ANTHROPIC_AUTH_TOKEN"] == "secret"
assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "false"
Expand All @@ -149,7 +187,10 @@ def test_preserves_existing_hooks(self, tmp_path):
"PreToolUse": [{"hooks": [{"type": "command", "command": "echo pre"}]}]
}
})
result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"})
result = run_setup_mlflow(tmp_path, {
"APP_OWNER": "jane@company.com",
"MLFLOW_CLAUDE_TRACING_ENABLED": "true",
})
assert result.returncode == 0
settings = read_settings(tmp_path)
assert "PreToolUse" in settings["hooks"]
Expand Down Expand Up @@ -187,20 +228,24 @@ class TestAppOwnerExport:

def test_app_owner_set_in_env(self):
import app as app_module
import app_state
with mock.patch.object(app_module, "get_token_owner", return_value="owner@test.com"), \
mock.patch.object(app_module, "cleanup_stale_sessions"), \
mock.patch.object(app_module, "run_setup"), \
mock.patch.object(app_state, "set_app_owner"), \
mock.patch("threading.Thread") as mock_thread:
mock_thread.return_value.start = mock.MagicMock()
app_module.initialize_app()
assert os.environ.get("APP_OWNER") == "owner@test.com"

def test_app_owner_not_set_when_unknown(self):
import app as app_module
import app_state
os.environ.pop("APP_OWNER", None)
with mock.patch.object(app_module, "get_token_owner", return_value=None), \
mock.patch.object(app_module, "cleanup_stale_sessions"), \
mock.patch.object(app_module, "run_setup"), \
mock.patch.object(app_state, "set_app_owner"), \
mock.patch("threading.Thread") as mock_thread:
mock_thread.return_value.start = mock.MagicMock()
app_module.initialize_app()
Expand Down