Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .databricksignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.venv/
vendor/
node_modules/
__pycache__/
*.pyc
.git/
10 changes: 4 additions & 6 deletions .github/workflows/dependency-audit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,9 @@ on:
schedule:
- cron: '0 6 * * 1' # Weekly Monday 6am UTC — catch newly disclosed CVEs

permissions:
contents: read

jobs:
audit:
runs-on: databrickslabs-protected-runner-group
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
Expand All @@ -32,7 +29,7 @@ jobs:
python-version: "3.11"

- name: Install uv
uses: astral-sh/setup-uv@94527f2e458b27549849d47d273a16bec83a01e9 # v7
uses: astral-sh/setup-uv@v7

- name: Install audit tools
run: pip install pip-audit==2.9.0
Expand All @@ -56,7 +53,8 @@ jobs:
# platform-conditional deps (greenlet) missing from the lockfile.
# The hashes are verified at install time, not audit time.
sed '/^[[:space:]]*--hash/d' requirements.lock > /tmp/requirements.lock.nohash
pip-audit -r /tmp/requirements.lock.nohash --desc on
# GHSA-p423-j2cm-9vmq: cryptography 46.0.7 not yet released — ignore until available
pip-audit -r /tmp/requirements.lock.nohash --desc on --ignore-vuln GHSA-p423-j2cm-9vmq
else
echo "::warning::No requirements.lock found — auditing requirements.txt (unpinned)"
pip-audit -r requirements.txt --desc on
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/update-lockfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:

jobs:
update-lockfile:
runs-on: databrickslabs-protected-runner-group
runs-on: ubuntu-latest
permissions:
contents: write

Expand All @@ -22,7 +22,7 @@ jobs:
python-version: "3.11"

- name: Install uv
uses: astral-sh/setup-uv@94527f2e458b27549849d47d273a16bec83a01e9 # v7
uses: astral-sh/setup-uv@v7

- name: Regenerate requirements.lock
run: uv pip compile requirements.txt -o requirements.lock --generate-hashes
Expand Down
32 changes: 32 additions & 0 deletions .syncignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Large directories that should never be uploaded to workspace
vendor/
.venv/
venv/

# Build artifacts and caches
__pycache__/
*.pyc
*.pyo
.ruff_cache/
uv.lock

# Local config and secrets
.env
.databricks/
.claude/
.git/

# Test artifacts
evidence/
.relentless_logs/
_gates.json
_validation_report.json

# Git worktrees
.worktrees/

# Uploads
uploads/

# Node modules (if any)
node_modules/
156 changes: 142 additions & 14 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def handle_sigterm(signum, frame):
"steps": [
{"id": "git", "label": "Configuring git identity", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "micro", "label": "Installing micro editor", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "editors", "label": "Detecting available editors", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "gh", "label": "Installing GitHub CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "dbcli", "label": "Upgrading Databricks CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "proxy", "label": "Starting content-filter proxy", "status": "pending", "started_at": None, "completed_at": None, "error": None},
Expand All @@ -114,6 +115,7 @@ def handle_sigterm(signum, frame):
{"id": "hermes", "label": "Configuring Hermes Agent", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "databricks", "label": "Setting up Databricks CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "mlflow", "label": "Enabling MLflow tracing", "status": "pending", "started_at": None, "completed_at": None, "error": None},
{"id": "projects", "label": "Setting up workshop projects", "status": "pending", "started_at": None, "completed_at": None, "error": None},
]
}

Expand Down Expand Up @@ -200,6 +202,18 @@ def _setup_git_config():
f.write("\n".join(lines) + "\n")
logger.info(f"Git config written to {gitconfig_path}")

# Configure gh as the git credential helper (if gh is available).
# NOTE: gh must already be authenticated (via `gh auth login` or GH_TOKEN env var)
# for the credential helper to work. Without auth, git operations to GitHub will fail.
try:
subprocess.run(
["gh", "auth", "setup-git"],
capture_output=True, timeout=10,
)
logger.info("gh auth setup-git configured")
except (FileNotFoundError, subprocess.TimeoutExpired):
logger.debug("gh not available, skipping credential helper setup")

# Write post-commit hook for workspace sync (works from any CLI: Claude, Gemini, OpenCode, etc.)
# Only syncs repos inside ~/projects/ — skips the app source and any other repos
post_commit = os.path.join(hooks_dir, "post-commit")
Expand Down Expand Up @@ -240,10 +254,83 @@ def _setup_git_config():
os.chmod(post_commit, 0o755)
logger.info(f"Post-commit hook written to {post_commit}")

# Write `wsync` command to ~/.local/bin for manual workspace sync
local_bin = os.path.join(home, ".local", "bin")
os.makedirs(local_bin, exist_ok=True)
wsync_path = os.path.join(local_bin, "wsync")
with open(wsync_path, "w") as f:
f.write('#!/bin/bash\n')
f.write('# Manual sync to Databricks Workspace\n')
f.write('REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)"\n')
f.write('if [ -z "$REPO_ROOT" ]; then\n')
f.write(' echo "Error: not inside a git repo"\n')
f.write(' exit 1\n')
f.write('fi\n')
f.write('APP_DIR="/app/python/source_code"\n')
f.write('SYNC_SCRIPT="$APP_DIR/sync_to_workspace.py"\n')
f.write('if [ ! -f "$SYNC_SCRIPT" ]; then\n')
f.write(' echo "Error: sync script not found"\n')
f.write(' exit 1\n')
f.write('fi\n')
f.write('echo "Syncing $REPO_ROOT to Databricks Workspace..."\n')
f.write('uv run --project "$APP_DIR" python "$SYNC_SCRIPT" "$REPO_ROOT"\n')
os.chmod(wsync_path, 0o755)
logger.info(f"wsync command written to {wsync_path}")

# Reinit app source git to remove template origin (Databricks Apps only)
_reinit_app_git()


def _setup_embedded_projects():
"""Copy embedded project templates from app source into ~/projects/ and git-init them.

Projects are bundled under <app_source>/projects/<name>/ at deploy time.
Each is copied to ~/projects/<name>/ (if not already present) and initialized
as a standalone git repo so commits trigger workspace sync via post-commit hook.
"""
import shutil

app_dir = os.path.dirname(os.path.abspath(__file__))
embedded_dir = os.path.join(app_dir, "projects")
if not os.path.isdir(embedded_dir):
return

home = os.environ.get("HOME", "/app/python/source_code")
if not home or home == "/":
home = "/app/python/source_code"
projects_dir = os.path.join(home, "projects")
os.makedirs(projects_dir, exist_ok=True)

for name in os.listdir(embedded_dir):
src = os.path.join(embedded_dir, name)
if not os.path.isdir(src):
continue
dest = os.path.join(projects_dir, name)
if os.path.exists(dest):
logger.info(f"Project already exists, skipping: {dest}")
continue

shutil.copytree(src, dest)
# Initialize as a git repo so post-commit hooks work
subprocess.run(["git", "init"], cwd=dest, capture_output=True)
subprocess.run(["git", "add", "."], cwd=dest, capture_output=True)
subprocess.run(
["git", "commit", "-m", "Initial workshop project"],
cwd=dest, capture_output=True,
)
logger.info(f"Embedded project initialized: {dest}")


def _run_projects_step():
"""Run embedded project setup as a tracked setup step."""
_update_step("projects", status="running", started_at=time.time())
try:
_setup_embedded_projects()
_update_step("projects", status="complete", completed_at=time.time())
except Exception as e:
_update_step("projects", status="error", completed_at=time.time(), error=str(e))


def _reinit_app_git():
"""On Databricks Apps, reinit git to remove template origin remote."""
app_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -359,6 +446,17 @@ def run_setup():
_run_step("micro", ["bash", "-c",
"mkdir -p ~/.local/bin && bash install_micro.sh && mv micro ~/.local/bin/ 2>/dev/null || true"])

# Probe which terminal editors are actually available in this container.
# Writes a human-readable report to ~/.local/share/coda/editors.txt so
# users (and Claude) can discover what to reach for from the terminal.
_run_step("editors", ["bash", "-c",
"mkdir -p ~/.local/share/coda && "
"{ echo 'Available terminal editors (detected at app startup):'; "
" for ed in micro nano vim vi emacs ed pico joe mcedit; do "
" p=$(command -v \"$ed\" 2>/dev/null) && echo \" $ed -> $p\"; "
" done; } > ~/.local/share/coda/editors.txt && "
"cat ~/.local/share/coda/editors.txt"])

_run_step("gh", ["bash", "install_gh.sh"])

# --- Upgrade Databricks CLI (runtime image ships an older version) ---
Expand All @@ -379,11 +477,13 @@ def run_setup():
("databricks", ["uv", "run", "python", "setup_databricks.py"]),
]

with ThreadPoolExecutor(max_workers=len(parallel_steps)) as executor:
with ThreadPoolExecutor(max_workers=len(parallel_steps) + 1) as executor:
futures = [
executor.submit(_run_step, step_id, command)
for step_id, command in parallel_steps
]
# Embedded projects (copy + git init) — runs in parallel with agent setup
futures.append(executor.submit(_run_projects_step))
wait(futures)

# --- MLflow setup runs AFTER claude setup to avoid settings.json race ---
Expand All @@ -410,25 +510,53 @@ def run_setup():


def get_token_owner():
"""Get the owner email. Priority: Apps API (app.creator) > PAT (current_user.me).
"""Get the owner email.

Priority: APP_OWNER_EMAIL env var > app description > app.creator > PAT.
The spawner sets owner:{email} in the app description when creating apps on
behalf of users, so the child app knows its owner without requiring a PAT.

Uses the auto-provisioned SP to call the Apps API — no PAT needed for
owner resolution. Falls back to PAT-based lookup for backward compat.
The Apps API call retries with backoff because the app's auto-provisioned SP
credentials may not be ready for OAuth token exchange immediately at boot.
"""
from databricks.sdk import WorkspaceClient

# 1. Try Apps API via SP credentials (no PAT needed)
# 0. Explicit owner from deployer (env var)
explicit_owner = os.environ.get("APP_OWNER_EMAIL", "").strip().lower()
if explicit_owner:
logger.info(f"Owner resolved from APP_OWNER_EMAIL: {explicit_owner}")
return explicit_owner

# 1. Try Apps API via SP credentials (no PAT needed) — retry for SP propagation
app_name = os.environ.get("DATABRICKS_APP_NAME")
if app_name:
try:
w = WorkspaceClient() # auto-detects SP credentials
set_product_info(w)
app = w.apps.get(name=app_name)
owner = (app.creator or "").lower()
logger.info(f"Owner resolved from app.creator: {owner}")
return owner
except Exception as e:
logger.warning(f"Could not resolve owner via Apps API: {e}")
max_retries = 6
base_delay = 5.0
for attempt in range(max_retries):
try:
w = WorkspaceClient() # auto-detects SP credentials
set_product_info(w)
app_info = w.apps.get(name=app_name)

# Spawner sets owner in description as "owner:{email}"
desc = getattr(app_info, "description", "") or ""
if desc.startswith("owner:"):
owner = desc.split(":", 1)[1].strip().lower()
logger.info(f"Owner resolved from app description: {owner}")
return owner

owner = (app_info.creator or "").lower()
logger.info(f"Owner resolved from app.creator: {owner}")
return owner
except Exception as e:
delay = min(base_delay * (2**attempt), 60)
logger.warning(
f"Apps API call failed (attempt {attempt + 1}/{max_retries}): {e}"
f" — retrying in {delay:.0f}s"
)
if attempt < max_retries - 1:
time.sleep(delay)
logger.error(f"Could not resolve owner via Apps API after {max_retries} attempts")

# 2. Fallback: PAT-based resolution
try:
Expand Down
2 changes: 1 addition & 1 deletion app.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ env:
- name: DATABRICKS_TOKEN
valueFrom: DATABRICKS_TOKEN
- name: ANTHROPIC_MODEL
value: databricks-claude-opus-4-6
value: databricks-claude-opus-4-7
- name: GEMINI_MODEL
value: databricks-gemini-2-5-pro
- name: HERMES_MODEL
Expand Down
1 change: 1 addition & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Root conftest for team-memory gate tests."""
6 changes: 3 additions & 3 deletions pat_rotator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Auto-rotate short-lived PATs in the background.

Mints a new 15-minute PAT every 10 minutes, writes to ~/.databrickscfg
Mints a new 4-hour PAT every 3 hours, writes to ~/.databrickscfg
(immediate CLI/SDK use), and revokes the old PAT. Rotation only runs
while active sessions exist. If the app restarts, the interactive PAT
prompt re-provisions credentials on next session. Fixes #81.
Expand All @@ -18,8 +18,8 @@

logger = logging.getLogger(__name__)

DEFAULT_TOKEN_LIFETIME = 900 # 15 minutes
DEFAULT_ROTATION_INTERVAL = 600 # 10 minutes
DEFAULT_TOKEN_LIFETIME = 14400 # 4 hours
DEFAULT_ROTATION_INTERVAL = 10800 # 3 hours


class PATRotator:
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "coda"
version = "0.18.1"
version = "0.17.1"
description = "CoDA - Coding Agents on Databricks Apps"
requires-python = ">=3.10"
dependencies = [
Expand All @@ -10,8 +10,9 @@ dependencies = [
"claude-agent-sdk",
"databricks-sdk>=0.20.0",
"mlflow-skinny==3.11.1",
"mlflow-tracing==3.11.1",
"requests",
"cryptography>=46.0.7",
"cryptography>=46.0.6",
]

[tool.uv]
Expand Down
Loading