diff --git a/.gitignore b/.gitignore index 4089448..b05e3f9 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,6 @@ uv.lock # Codex skills are generated at runtime by setup_codex.py from .claude/skills/ .agents/ + +# Agent-plane reference clone +agent-plane-ref/ diff --git a/app.py b/app.py index f3e6797..0c63cad 100644 --- a/app.py +++ b/app.py @@ -293,20 +293,24 @@ def _configure_all_cli_auth(token): else: anthropic_base_url = f"{databricks_host}/serving-endpoints/anthropic" - settings = { - "env": { - "ANTHROPIC_MODEL": os.environ.get("ANTHROPIC_MODEL", "databricks-claude-opus-4-7"), - "ANTHROPIC_BASE_URL": anthropic_base_url, - "ANTHROPIC_AUTH_TOKEN": token, - "ANTHROPIC_DEFAULT_OPUS_MODEL": "databricks-claude-opus-4-7", - "ANTHROPIC_DEFAULT_SONNET_MODEL": "databricks-claude-sonnet-4-6", - "ANTHROPIC_DEFAULT_HAIKU_MODEL": "databricks-claude-haiku-4-5", - "ANTHROPIC_CUSTOM_HEADERS": "x-databricks-use-coding-agent-mode: true", - "CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1", - } - } - + # Read-merge-write to preserve env vars from other setup scripts (e.g. setup_mlflow.py) settings_path = os.path.join(claude_dir, "settings.json") + try: + with open(settings_path) as f: + settings = json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + settings = {} + + settings.setdefault("env", {}) + settings["env"]["ANTHROPIC_MODEL"] = os.environ.get("ANTHROPIC_MODEL", "databricks-claude-opus-4-7") + settings["env"]["ANTHROPIC_BASE_URL"] = anthropic_base_url + settings["env"]["ANTHROPIC_AUTH_TOKEN"] = token + settings["env"]["ANTHROPIC_DEFAULT_OPUS_MODEL"] = "databricks-claude-opus-4-7" + settings["env"]["ANTHROPIC_DEFAULT_SONNET_MODEL"] = "databricks-claude-sonnet-4-6" + settings["env"]["ANTHROPIC_DEFAULT_HAIKU_MODEL"] = "databricks-claude-haiku-4-5" + settings["env"]["ANTHROPIC_CUSTOM_HEADERS"] = "x-databricks-use-coding-agent-mode: true" + settings["env"]["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1" + with open(settings_path, "w") as f: json.dump(settings, f, indent=2) @@ -373,7 +377,6 @@ def run_setup(): ("gemini", ["uv", "run", "python", "setup_gemini.py"]), ("hermes", ["uv", "run", "python", "setup_hermes.py"]), ("databricks", ["uv", "run", "python", "setup_databricks.py"]), - ("mlflow", ["uv", "run", "python", "setup_mlflow.py"]), ] with ThreadPoolExecutor(max_workers=len(parallel_steps)) as executor: @@ -383,6 +386,11 @@ def run_setup(): ] wait(futures) + # --- MLflow setup runs AFTER claude setup to avoid settings.json race --- + # setup_mlflow.py merges env vars into ~/.claude/settings.json which + # setup_claude.py also writes; running sequentially prevents clobbering. + _run_step("mlflow", ["uv", "run", "python", "setup_mlflow.py"]) + # Sync latest token into all CLI configs — covers the race where PAT # rotation happened while a setup script was still installing (the # rotation's update_cli_tokens() call silently skips missing config files). diff --git a/pyproject.toml b/pyproject.toml index f3c25d8..2fac033 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "coda" -version = "0.17.4" +version = "0.18.1" description = "CoDA - Coding Agents on Databricks Apps" requires-python = ">=3.10" dependencies = [ diff --git a/setup_claude.py b/setup_claude.py index e42cd69..9815ef5 100644 --- a/setup_claude.py +++ b/setup_claude.py @@ -29,20 +29,27 @@ anthropic_base_url = f"{databricks_host}/serving-endpoints/anthropic" print(f"Using Databricks Host: {databricks_host}") - settings = { - "env": { - "ANTHROPIC_MODEL": os.environ.get("ANTHROPIC_MODEL", "databricks-claude-opus-4-7"), - "ANTHROPIC_BASE_URL": anthropic_base_url, - "ANTHROPIC_AUTH_TOKEN": token, - "ANTHROPIC_DEFAULT_OPUS_MODEL": "databricks-claude-opus-4-7", - "ANTHROPIC_DEFAULT_SONNET_MODEL": "databricks-claude-sonnet-4-6", - "ANTHROPIC_DEFAULT_HAIKU_MODEL": "databricks-claude-haiku-4-5", - "ANTHROPIC_CUSTOM_HEADERS": "x-databricks-use-coding-agent-mode: true", - "CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1", - } - } - settings_path = claude_dir / "settings.json" + + # Read-merge-write to preserve env vars from other setup scripts (e.g. setup_mlflow.py) + if settings_path.exists(): + try: + settings = json.loads(settings_path.read_text()) + except (json.JSONDecodeError, OSError): + settings = {} + else: + settings = {} + + settings.setdefault("env", {}) + settings["env"]["ANTHROPIC_MODEL"] = os.environ.get("ANTHROPIC_MODEL", "databricks-claude-opus-4-7") + settings["env"]["ANTHROPIC_BASE_URL"] = anthropic_base_url + settings["env"]["ANTHROPIC_AUTH_TOKEN"] = token + settings["env"]["ANTHROPIC_DEFAULT_OPUS_MODEL"] = "databricks-claude-opus-4-7" + settings["env"]["ANTHROPIC_DEFAULT_SONNET_MODEL"] = "databricks-claude-sonnet-4-6" + settings["env"]["ANTHROPIC_DEFAULT_HAIKU_MODEL"] = "databricks-claude-haiku-4-5" + settings["env"]["ANTHROPIC_CUSTOM_HEADERS"] = "x-databricks-use-coding-agent-mode: true" + settings["env"]["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1" + settings_path.write_text(json.dumps(settings, indent=2)) print(f"Claude configured: {settings_path}") else: diff --git a/setup_codex.py b/setup_codex.py index d546708..6be864f 100644 --- a/setup_codex.py +++ b/setup_codex.py @@ -30,22 +30,39 @@ local_bin.mkdir(parents=True, exist_ok=True) codex_bin = local_bin / "codex" +MAX_RETRIES = 3 +RETRY_DELAY = 5 # seconds + if not codex_bin.exists(): - # Use --prefix ~/.local so npm installs directly into ~/.local/bin npm_prefix = str(home / ".local") codex_version = get_npm_version("@openai/codex") codex_pkg = f"@openai/codex@{codex_version}" if codex_version else "@openai/codex" - print(f"Installing {codex_pkg}...") - result = subprocess.run( - ["npm", "install", "-g", f"--prefix={npm_prefix}", codex_pkg], - capture_output=True, - text=True, - env={**os.environ, "HOME": str(home)}, - ) - if result.returncode == 0: - print(f"Codex CLI installed to {codex_bin}") - else: - print(f"Codex CLI install warning: {result.stderr}") + + for attempt in range(1, MAX_RETRIES + 1): + print(f"Installing {codex_pkg} (attempt {attempt}/{MAX_RETRIES})...") + result = subprocess.run( + ["npm", "install", "-g", f"--prefix={npm_prefix}", codex_pkg], + capture_output=True, + text=True, + env={**os.environ, "HOME": str(home)}, + ) + if result.returncode == 0 and codex_bin.exists(): + print(f"Codex CLI installed to {codex_bin}") + break + else: + stderr = result.stderr.strip() + print(f"Codex CLI install failed (attempt {attempt}/{MAX_RETRIES}, rc={result.returncode})") + if stderr: + print(f" stderr: {stderr[:500]}") + if result.stdout.strip(): + print(f" stdout: {result.stdout.strip()[:500]}") + if attempt < MAX_RETRIES: + import time + print(f" Retrying in {RETRY_DELAY}s...") + time.sleep(RETRY_DELAY) + else: + print(f"ERROR: Codex CLI installation failed after {MAX_RETRIES} attempts. " + f"Run manually: npm install -g --prefix=$HOME/.local @openai/codex") else: print(f"Codex CLI already installed at {codex_bin}") diff --git a/setup_gemini.py b/setup_gemini.py index 635b366..ec77851 100644 --- a/setup_gemini.py +++ b/setup_gemini.py @@ -12,7 +12,6 @@ """ import os import json -import shutil import subprocess from pathlib import Path @@ -33,21 +32,38 @@ local_bin.mkdir(parents=True, exist_ok=True) gemini_bin = local_bin / "gemini" +MAX_RETRIES = 3 +RETRY_DELAY = 5 # seconds + if not gemini_bin.exists(): - # Use --prefix ~/.local so npm installs directly into ~/.local/bin (avoids EACCES on /usr/local) npm_prefix = str(home / ".local") gemini_version = get_npm_version("@google/gemini-cli") gemini_pkg = f"@google/gemini-cli@{gemini_version}" if gemini_version else "@google/gemini-cli@latest" - print(f"Installing {gemini_pkg}...") - result = subprocess.run( - ["npm", "install", "-g", f"--prefix={npm_prefix}", gemini_pkg], - capture_output=True, text=True, - env={**os.environ, "HOME": str(home)} - ) - if result.returncode == 0: - print(f"Gemini CLI installed to {gemini_bin}") - else: - print(f"Gemini CLI install warning: {result.stderr}") + + for attempt in range(1, MAX_RETRIES + 1): + print(f"Installing {gemini_pkg} (attempt {attempt}/{MAX_RETRIES})...") + result = subprocess.run( + ["npm", "install", "-g", f"--prefix={npm_prefix}", gemini_pkg], + capture_output=True, text=True, + env={**os.environ, "HOME": str(home)} + ) + if result.returncode == 0 and gemini_bin.exists(): + print(f"Gemini CLI installed to {gemini_bin}") + break + else: + stderr = result.stderr.strip() + print(f"Gemini CLI install failed (attempt {attempt}/{MAX_RETRIES}, rc={result.returncode})") + if stderr: + print(f" stderr: {stderr[:500]}") + if result.stdout.strip(): + print(f" stdout: {result.stdout.strip()[:500]}") + if attempt < MAX_RETRIES: + import time + print(f" Retrying in {RETRY_DELAY}s...") + time.sleep(RETRY_DELAY) + else: + print(f"ERROR: Gemini CLI installation failed after {MAX_RETRIES} attempts. " + f"Run manually: npm install -g --prefix=$HOME/.local @google/gemini-cli") else: print(f"Gemini CLI already installed at {gemini_bin}") @@ -127,16 +143,9 @@ settings_path.write_text(json.dumps(settings, indent=2)) print(f"Gemini CLI settings configured: {settings_path}") -# 5. Copy Claude skills into .gemini/skills for shared reference -claude_skills_dir = home / ".claude" / "skills" -gemini_skills_dir = gemini_dir / "skills" -if claude_skills_dir.exists(): - if gemini_skills_dir.exists(): - shutil.rmtree(gemini_skills_dir) - shutil.copytree(claude_skills_dir, gemini_skills_dir) - print(f"Skills copied: {claude_skills_dir} -> {gemini_skills_dir}") -else: - print(f"No Claude skills found at {claude_skills_dir}, skipping copy") +# 5. Skills live in ~/.agents/skills/ (shared across all CLIs, copied by setup_codex.py). +# Do NOT copy into ~/.gemini/skills/ — Gemini discovers both paths and logs +# "Skill conflict detected" warnings for every duplicate. # 6. Adapt CLAUDE.md to GEMINI.md for Gemini CLI # Look for CLAUDE.md in common locations diff --git a/setup_opencode.py b/setup_opencode.py index 3df19d5..071252a 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -32,23 +32,40 @@ local_bin.mkdir(parents=True, exist_ok=True) opencode_bin = local_bin / "opencode" +MAX_RETRIES = 3 +RETRY_DELAY = 5 # seconds + if not opencode_bin.exists(): - # Use --prefix ~/.local so npm installs directly into ~/.local/bin (avoids EACCES on /usr/local) npm_prefix = str(home / ".local") # Resolve exact versions to avoid mutable @latest tags (supply chain hardening) oc_version = get_npm_version("opencode-ai") oc_pkg = f"opencode-ai@{oc_version}" if oc_version else "opencode-ai@latest" - print(f"Installing {oc_pkg}...") - result = subprocess.run( - ["npm", "install", "-g", f"--prefix={npm_prefix}", oc_pkg], - capture_output=True, text=True, - env={**os.environ, "HOME": str(home)} - ) - if result.returncode == 0: - print(f"OpenCode CLI installed to {opencode_bin}") - else: - print(f"OpenCode install warning: {result.stderr}") + + for attempt in range(1, MAX_RETRIES + 1): + print(f"Installing {oc_pkg} (attempt {attempt}/{MAX_RETRIES})...") + result = subprocess.run( + ["npm", "install", "-g", f"--prefix={npm_prefix}", oc_pkg], + capture_output=True, text=True, + env={**os.environ, "HOME": str(home)} + ) + if result.returncode == 0 and opencode_bin.exists(): + print(f"OpenCode CLI installed to {opencode_bin}") + break + else: + stderr = result.stderr.strip() + print(f"OpenCode install failed (attempt {attempt}/{MAX_RETRIES}, rc={result.returncode})") + if stderr: + print(f" stderr: {stderr[:500]}") + if result.stdout.strip(): + print(f" stdout: {result.stdout.strip()[:500]}") + if attempt < MAX_RETRIES: + import time + print(f" Retrying in {RETRY_DELAY}s...") + time.sleep(RETRY_DELAY) + else: + print(f"ERROR: OpenCode installation failed after {MAX_RETRIES} attempts. " + f"Run manually: npm install -g --prefix=$HOME/.local opencode-ai") # Install @ai-sdk/openai for GPT models (Responses API support) sdk_version = get_npm_version("@ai-sdk/openai") @@ -62,7 +79,7 @@ if result.returncode == 0: print(f"@ai-sdk/openai@{sdk_version or 'latest'} installed (Responses API support)") else: - print(f"@ai-sdk/openai install warning: {result.stderr}") + print(f"@ai-sdk/openai install warning: {result.stderr[:500]}") else: print(f"OpenCode CLI already installed at {opencode_bin}") diff --git a/tests/test_session_detach.py b/tests/test_session_detach.py index ceff721..c381a40 100644 --- a/tests/test_session_detach.py +++ b/tests/test_session_detach.py @@ -105,6 +105,9 @@ class TestListSessions: def setup_app(self): app_module = _get_app() app_module.app_owner = "test@example.com" + # Clear leftover sessions from prior test classes + with app_module.sessions_lock: + app_module.sessions.clear() self.client = app_module.app.test_client() self.app_module = app_module yield diff --git a/tests/test_session_limit.py b/tests/test_session_limit.py index 5625c52..ae547f7 100644 --- a/tests/test_session_limit.py +++ b/tests/test_session_limit.py @@ -170,6 +170,9 @@ class TestRemovedSessionsFreeSlots: def test_removing_session_frees_slot(self): """After removing a session from the dict, a new one can be created.""" app_module = _get_app() + # Clear any leaked sessions from prior tests + with app_module.sessions_lock: + app_module.sessions.clear() limit = app_module.MAX_CONCURRENT_SESSIONS sids = [f"full-{i}" for i in range(limit)] try: