diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4d0406b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +jobs: + lint-and-security: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install tools + run: uv tool install ruff && uv tool install bandit + + - name: Ruff lint + run: ruff check . + + - name: Ruff format check + run: ruff format --check . + + - name: Bandit security scan + run: bandit -r . -x ./tests,./static,./.claude -ll + + - name: Validate pinned dependencies + run: | + if grep -qE '>=|<=|~=|[^=]>[^=]|[^=]<[^=]' requirements.txt; then + echo "ERROR: requirements.txt contains unpinned dependencies" + grep -nE '>=|<=|~=|[^=]>[^=]|[^=]<[^=]' requirements.txt + exit 1 + fi diff --git a/CLAUDE.md b/CLAUDE.md index f39164f..1164ea6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,91 +1,87 @@ -# Claude Code on Databricks +# CLAUDE.md -Welcome! This environment comes pre-configured with 39 skills and 2 MCP servers. +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. -## Skills (30 total) +## What This Is -### Databricks Skills (16) +A browser-based terminal app (Databricks App) that gives Databricks users access to AI coding agents (Claude Code, Gemini CLI, Codex CLI, OpenCode) via xterm.js. No local IDE needed — models route through Databricks AI Gateway or Model Serving endpoints. -| Category | Skills | -|----------|--------| -| AI & Agents | agent-bricks, databricks-genie, mlflow-evaluation, model-serving | -| Analytics | aibi-dashboards, databricks-unity-catalog | -| Data Engineering | spark-declarative-pipelines, databricks-jobs, synthetic-data-generation | -| Development | asset-bundles, databricks-app-apx, databricks-app-python, databricks-python-sdk, databricks-config | -| Reference | databricks-docs, unstructured-pdf-generation | +## Development Commands -### Development Workflow Skills (14) +```bash +# Run locally (Flask dev server) +uv run python app.py +# Open http://localhost:8000 -From [obra/superpowers](https://github.com/obra/superpowers): +# Production (Gunicorn, used by Databricks Apps) +uv run gunicorn app:app -| Skill | Purpose | -|-------|---------| -| brainstorming | Design features through collaborative dialogue | -| test-driven-development | RED-GREEN-REFACTOR cycle | -| systematic-debugging | 4-phase root cause analysis | -| writing-plans | Create detailed implementation plans | -| verification-before-completion | Verify before claiming done | -| executing-plans | Batch execution with checkpoints | -| dispatching-parallel-agents | Concurrent subagent workflows | -| subagent-driven-development | Fast iteration with two-stage review | -| using-git-worktrees | Parallel development branches | -| requesting-code-review | Pre-review checklist | -| receiving-code-review | Responding to feedback | -| finishing-a-development-branch | Merge/PR decision workflow | -| writing-skills | Create new skills | -| using-superpowers | Introduction to available skills | +# Deploy to Databricks Apps +databricks sync . /Workspace/Users//apps/ --watch=false +databricks apps deploy --source-code-path /Workspace/Users//apps/ -## MCP Servers +# No test suite exists — skip test discovery +``` -- **DeepWiki** - AI-powered documentation for any GitHub repository -- **Exa** - Web search and code context retrieval +## Architecture -## Databricks CLI +**Single-process Flask app** with PTY-based terminal sessions, served by Gunicorn (1 worker, 8 threads via gthread). -The Databricks CLI is pre-configured with your credentials. Test it: -```bash -databricks current-user me -``` +### Startup Flow +1. `gunicorn.conf.py` → `post_worker_init` → `app.initialize_app()` +2. `initialize_app()` resolves auth (PAT or OAuth M2M via `utils.resolve_auth()`), determines app owner, starts cleanup thread, launches setup in background thread +3. Setup runs sequentially: git config (Python), micro editor (bash), GitHub CLI (`gh`), then `setup_claude.py`, `setup_codex.py`, `setup_opencode.py`, `setup_gemini.py`, `setup_databricks.py` — each installs a CLI and writes its config files. Each step has a 300s timeout. If `GIT_REPOS` is set, repos are auto-cloned into `~/projects/` after setup. +4. **State restore**: if `STATE_SYNC=true` (default), downloads saved state (Claude Code auto-memory, shell history) from `/Workspace/Users/{email}/.state/` +5. During setup, `/` serves `static/loading.html` (snake game); after setup, serves `static/index.html` (xterm.js terminal) +6. New terminal sessions start in `~/projects/` directory -Databricks can only authenticate with a PAT or CLIENT_ID and CLIENT_SECRET pair. If you have trouble logging in, remove the CLIENT_SECRET and CLIENT_ID from your environment, then try again. We want access to only be based on the app owner's credentials. +### Key Files +- **`app.py`** — Flask server, PTY session management (create/input/output/resize/close), authorization, setup orchestration +- **`utils.py`** — Auth resolution (PAT → OAuth M2M → SDK fallback), `TokenRefresher` for OAuth, `adapt_instructions_file()` for cross-CLI instruction sharing, `ensure_https()` +- **`setup_*.py`** — Per-agent setup scripts. Each resolves gateway vs direct endpoint, installs CLI binary, writes config files. Claude uses `~/.claude/settings.json`, Gemini uses `~/.gemini/.env`, OpenCode is built from fork (`dgokeeffe/opencode#feat/databricks-ai-sdk-provider`) with native Databricks provider — auto-discovers models and handles auth via `@databricks/sdk-experimental`, config at `~/.config/opencode/opencode.json`, Codex uses `~/.codex/config.toml` + `~/.codex/.env`, Databricks CLI uses `~/.databrickscfg` +- **`state_sync.py`** — Bidirectional state sync: `restore_state()` on startup, `save_state()` every 5 min + on shutdown. Syncs `~/.claude/projects/*/memory/` and `~/.bash_history` to `/Workspace/Users/{email}/.state/` +- **`sync_to_workspace.py`** — Post-commit hook target: syncs `~/projects/*` repos to `/Workspace/Users/{email}/projects/` via `databricks sync` +- **`gunicorn.conf.py`** — Must use `workers=1` (PTY fds and session state are process-local) -Common commands: -```bash -databricks workspace list /Workspace/Users/ -databricks jobs list -databricks clusters list -``` +### Authentication Model +`utils.resolve_auth()` tries in order: explicit `DATABRICKS_TOKEN` (PAT), `DATABRICKS_CLIENT_ID`+`SECRET` (OAuth M2M with token refresh), SDK auto-detect. The `TokenRefresher` class runs a background thread (every 30min) to refresh OAuth tokens and update all agent config files in-place. + +**Git credentials** are handled by a host-aware credential helper (`git-credential-databricks`). It checks `GIT_TOKEN` first (scoped to `GIT_TOKEN_HOST` if set), then falls back to `DATABRICKS_TOKEN`. Users can also authenticate interactively via `gh auth login` (GitHub CLI is pre-installed). Workspace file sync is opt-in via `WORKSPACE_SYNC=true`. + +### State Persistence +With `STATE_SYNC=true` (default), the following survives container restarts: +- **Claude Code auto-memory** (`~/.claude/projects/*/memory/`) — synced every 5 min + on shutdown +- **Shell history** (`~/.bash_history`) — synced every 5 min + on shutdown +- **Git repos** (`~/projects/`) — synced on commit if `WORKSPACE_SYNC=true` -## Project Setup +**Not persisted** (by design): tmux sessions (process state), CLI binaries (rebuilt on startup), gh auth tokens (security risk). -Before starting any new project or documentation: +### Security +Single-user app: the PAT owner is determined at startup, and `@app.before_request` checks `X-Forwarded-Email` against the owner. In OAuth M2M mode, authorization is delegated to the Databricks Apps proxy. -1. **Always initialize a git repo first:** - ```bash - mkdir my-project && cd my-project - git init - ``` - Or clone an existing repo: - ```bash - git clone https://github.com/user/repo.git - cd repo - ``` +### Session Management +PTY sessions use `pty.openpty()` + background reader threads. A cleanup thread kills sessions with no poll activity for 60s (SIGHUP → wait 3s → SIGKILL). -2. **Why?** Git commits automatically sync your work to Databricks Workspace at `/Workspace/Users/{your-email}/projects/{project-name}/` +### API Endpoints +- `GET /` — Loading screen (during setup) or terminal UI +- `GET /health` — Health check (no auth required) +- `GET /api/setup-status` — Setup progress (no auth required) +- `POST /api/session` — Create new PTY session +- `POST /api/input` — Send keystrokes to terminal (`{session_id, input}`) +- `POST /api/output` — Poll for terminal output (`{session_id}`) — also updates `last_poll_time` +- `POST /api/resize` — Resize terminal (`{session_id, cols, rows}`) +- `POST /api/session/close` — Close terminal session -3. **Then start working** - your commits will be backed up to Workspace +## Deployment Config -## Quick Start +- `app.yaml.template` — Template to copy to `app.yaml`. Set `DATABRICKS_GATEWAY_HOST` or remove it to fall back to direct Model Serving. +- Use `databricks sync` (not `workspace import-dir`) to upload — it respects `.gitignore` and handles `.git` correctly. +- **Never move the `.git` folder** to the workspace when running workspace import. -- Projects sync to Databricks Workspace on git commit -- Use `/commit` for guided commits -- Ask "help me create a dashboard" to see skills in action -- Ask about any GitHub repo with DeepWiki MCP +## Skills -## Credits +39 pre-installed skills live in `.claude/skills/`. Databricks skills come from [databricks-solutions/ai-dev-kit](https://github.com/databricks-solutions/ai-dev-kit), workflow skills from [obra/superpowers](https://github.com/obra/superpowers). Use `/refresh-databricks-skills` to pull latest. -- Databricks skills from [databricks-solutions/ai-dev-kit](https://github.com/databricks-solutions/ai-dev-kit) -- Development workflow skills from [obra/superpowers](https://github.com/obra/superpowers) +## Dependencies -# things to remember -Remember to never move .git folder to the workspace if you're running workspace import. \ No newline at end of file +`requirements.txt`: flask, claude-agent-sdk, databricks-sdk. No pyproject.toml — no build system. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9c52e9a --- /dev/null +++ b/Makefile @@ -0,0 +1,87 @@ +# Makefile for deploying Coding Agents to Databricks Apps +# +# Usage: +# make deploy PROFILE=daveok PAT=dapi... +# make deploy PROFILE=daveok # prompts for PAT interactively +# make redeploy PROFILE=daveok # skip secret setup, just sync + deploy +# make status PROFILE=daveok # check app status +# make logs PROFILE=daveok # tail app logs + +# Configuration +PROFILE ?= DEFAULT +APP_NAME ?= coding-agents +SECRET_SCOPE ?= $(APP_NAME)-secrets +SECRET_KEY ?= databricks-token + +# Resolve user email and workspace path from the profile +USER_EMAIL = $(shell databricks current-user me --profile $(PROFILE) --output json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('userName',''))") +WORKSPACE_PATH = /Workspace/Users/$(USER_EMAIL)/apps/$(APP_NAME) + +.PHONY: help deploy redeploy create-app setup-secret sync deploy-app status logs clean-secret + +help: ## Show this help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-18s\033[0m %s\n", $$1, $$2}' + +deploy: create-app setup-secret sync deploy-app ## Full deploy: create app, set secret, sync, deploy + @echo "" + @echo "Deployment complete! App URL:" + @databricks apps get $(APP_NAME) --profile $(PROFILE) --output json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('url','(pending)'))" + +redeploy: sync deploy-app ## Redeploy: sync + deploy (skip secret setup) + @echo "" + @echo "Redeployment complete!" + +create-app: ## Create the Databricks App (idempotent) + @echo "==> Checking if app '$(APP_NAME)' exists..." + @if databricks apps get $(APP_NAME) --profile $(PROFILE) >/dev/null 2>&1; then \ + echo " App '$(APP_NAME)' already exists, skipping create."; \ + else \ + echo " Creating app '$(APP_NAME)'..."; \ + databricks apps create $(APP_NAME) --profile $(PROFILE); \ + fi + +setup-secret: ## Create secret scope and store PAT + @echo "==> Setting up DATABRICKS_TOKEN secret..." + @# Create scope if it doesn't exist + @if databricks secrets list-scopes --profile $(PROFILE) --output json 2>/dev/null | python3 -c "import sys,json; scopes=[s['name'] for s in json.load(sys.stdin).get('scopes',[])]; exit(0 if '$(SECRET_SCOPE)' in scopes else 1)" 2>/dev/null; then \ + echo " Secret scope '$(SECRET_SCOPE)' already exists."; \ + else \ + echo " Creating secret scope '$(SECRET_SCOPE)'..."; \ + databricks secrets create-scope $(SECRET_SCOPE) --profile $(PROFILE); \ + fi + @# Store the PAT - prompt if not provided + @if [ -z "$(PAT)" ]; then \ + echo " Enter your Databricks PAT (will not echo):"; \ + read -s pat_value && \ + echo "$$pat_value" | databricks secrets put-secret $(SECRET_SCOPE) $(SECRET_KEY) --profile $(PROFILE); \ + else \ + echo "$(PAT)" | databricks secrets put-secret $(SECRET_SCOPE) $(SECRET_KEY) --profile $(PROFILE); \ + fi + @echo " Secret stored in $(SECRET_SCOPE)/$(SECRET_KEY)" + @# Link secret to app resource + @echo " Linking secret to app resource 'DATABRICKS_TOKEN'..." + @curl -s -X PATCH \ + "$$(databricks auth env --profile $(PROFILE) 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['env']['DATABRICKS_HOST'])")/api/2.0/apps/$(APP_NAME)" \ + -H "Authorization: Bearer $$(databricks auth token --profile $(PROFILE) 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])")" \ + -H "Content-Type: application/json" \ + -d '{"resources":[{"name":"DATABRICKS_TOKEN","description":"PAT for model serving access","secret":{"scope":"$(SECRET_SCOPE)","key":"$(SECRET_KEY)","permission":"READ"}}]}' \ + >/dev/null + @echo " App resource linked." + +sync: ## Sync local files to Databricks workspace + @echo "==> Syncing to $(WORKSPACE_PATH)..." + databricks sync . $(WORKSPACE_PATH) --watch=false --profile $(PROFILE) + +deploy-app: ## Deploy the app from workspace + @echo "==> Deploying app '$(APP_NAME)'..." + databricks apps deploy $(APP_NAME) --source-code-path $(WORKSPACE_PATH) --profile $(PROFILE) --no-wait + +status: ## Check app status + @databricks apps get $(APP_NAME) --profile $(PROFILE) + +logs: ## Tail app logs + databricks apps logs $(APP_NAME) --profile $(PROFILE) + +clean-secret: ## Remove secret scope (destructive) + @echo "==> Removing secret scope '$(SECRET_SCOPE)'..." + databricks secrets delete-scope $(SECRET_SCOPE) --profile $(PROFILE) diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..6e8bf73 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/app.py b/app.py index 278af44..6363f49 100644 --- a/app.py +++ b/app.py @@ -1,3 +1,4 @@ +import atexit import os import pty import fcntl @@ -11,23 +12,33 @@ import time import copy import logging +import shutil +import sys +from concurrent.futures import ThreadPoolExecutor, wait from flask import Flask, send_from_directory, request, jsonify, session +from flask_socketio import SocketIO, emit, join_room, leave_room from werkzeug.utils import secure_filename from collections import deque -from utils import ensure_https +from utils import resolve_auth, AuthMode, TokenRefresher +from state_sync import save_state, restore_state, start_periodic_sync # Session timeout configuration -SESSION_TIMEOUT_SECONDS = 300 # No poll for 5 min = dead session -CLEANUP_INTERVAL_SECONDS = 60 # How often to check for stale sessions -GRACEFUL_SHUTDOWN_WAIT = 3 # Seconds to wait after SIGHUP before SIGKILL +SESSION_TIMEOUT_SECONDS = 120 # No poll for 120s = dead PTY wrapper (tmux persists) +CLEANUP_INTERVAL_SECONDS = 30 # How often to check for stale sessions +GRACEFUL_SHUTDOWN_WAIT = 3 # Seconds to wait after SIGHUP before SIGKILL + +# Terminal mode configuration +TMUX_ENABLED = os.environ.get('TMUX_ENABLED', 'false').lower() == 'true' +TERMINAL_MODE = os.environ.get('TERMINAL_MODE', 'tabs') # Logging setup logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -app = Flask(__name__, static_folder='static', static_url_path='/static') +app = Flask(__name__, static_folder="static", static_url_path="/static") app.secret_key = os.urandom(24) +socketio = SocketIO(app, async_mode="threading", cors_allowed_origins="*") # Store sessions: {session_id: {"master_fd": fd, "pid": pid, "output_buffer": deque}} sessions = {} @@ -52,15 +63,103 @@ def handle_sigterm(signum, frame): "completed_at": None, "error": None, "steps": [ - {"id": "git", "label": "Configuring git identity", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "micro", "label": "Installing micro editor", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "claude", "label": "Configuring Claude CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "codex", "label": "Configuring Codex CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "opencode", "label": "Configuring OpenCode CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "gemini", "label": "Configuring Gemini CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "databricks", "label": "Setting up Databricks CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "mlflow", "label": "Enabling MLflow tracing", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - ] + { + "id": "git", + "label": "Configuring git identity", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "micro", + "label": "Installing micro editor", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "gh", + "label": "Installing GitHub CLI", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "tmux", + "label": "Installing tmux", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "claude", + "label": "Configuring Claude CLI", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "codex", + "label": "Configuring Codex CLI", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "opencode", + "label": "Configuring OpenCode CLI", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "gemini", + "label": "Configuring Gemini CLI", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "databricks", + "label": "Setting up Databricks CLI", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "mlflow", + "label": "Enabling MLflow tracing", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "git_clone", + "label": "Cloning git repositories", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + { + "id": "state", + "label": "Restoring saved state", + "status": "pending", + "started_at": None, + "completed_at": None, + "error": None, + }, + ], } @@ -74,11 +173,16 @@ def _update_step(step_id, **kwargs): def _get_setup_state_snapshot(): with setup_lock: - return copy.deepcopy(setup_state) + snapshot = copy.deepcopy(setup_state) + snapshot['terminal_mode'] = TERMINAL_MODE + snapshot['tmux_enabled'] = TMUX_ENABLED + return snapshot # Single-user security: only the token owner can access the terminal app_owner = None +# Token refresher for OAuth M2M mode +token_refresher = None def _run_step(step_id, command): @@ -87,17 +191,24 @@ def _run_step(step_id, command): env = os.environ.copy() if not env.get("HOME") or env["HOME"] == "/": env["HOME"] = "/app/python/source_code" - env.pop("DATABRICKS_CLIENT_ID", None) - env.pop("DATABRICKS_CLIENT_SECRET", None) - result = subprocess.run(command, env=env, capture_output=True, text=True, timeout=300) + result = subprocess.run( + command, env=env, capture_output=True, text=True, timeout=300 + ) if result.returncode == 0: _update_step(step_id, status="complete", completed_at=time.time()) else: err = result.stderr.strip() or result.stdout.strip() or "Unknown error" - _update_step(step_id, status="error", completed_at=time.time(), error=err[:500]) + _update_step( + step_id, status="error", completed_at=time.time(), error=err[:500] + ) except subprocess.TimeoutExpired: - _update_step(step_id, status="error", completed_at=time.time(), error="Timed out after 300s") + _update_step( + step_id, + status="error", + completed_at=time.time(), + error="Timed out after 300s", + ) except Exception as e: _update_step(step_id, status="error", completed_at=time.time(), error=str(e)) @@ -108,26 +219,80 @@ def _setup_git_config(): if not home or home == "/": home = "/app/python/source_code" - # Get user identity from Databricks token + # Get user identity from Databricks credentials (PAT or OAuth M2M) user_email = None display_name = None try: from databricks.sdk import WorkspaceClient - db_host = ensure_https(os.environ.get("DATABRICKS_HOST", "")) - db_token = os.environ.get("DATABRICKS_TOKEN") - if db_host and db_token: - w = WorkspaceClient(host=db_host, token=db_token, auth_type="pat") - me = w.current_user.me() - user_email = me.user_name - display_name = me.display_name or user_email.split("@")[0] + + w = WorkspaceClient() + me = w.current_user.me() + user_email = me.user_name + display_name = me.display_name or user_email.split("@")[0] except Exception as e: - logger.warning(f"Could not get user identity from token: {e}") + logger.warning(f"Could not get user identity: {e}") # Write ~/.gitconfig directly (more reliable than subprocess git config) gitconfig_path = os.path.join(home, ".gitconfig") hooks_dir = os.path.join(home, ".githooks") os.makedirs(hooks_dir, exist_ok=True) + # Write git credential helper script + local_bin = os.path.join(home, ".local", "bin") + os.makedirs(local_bin, exist_ok=True) + credential_helper_path = os.path.join(local_bin, "git-credential-databricks") + with open(credential_helper_path, "w") as f: + f.write("#!/bin/bash\n") + f.write( + "# Git credential helper: host-aware, supports both enterprise git and Databricks.\n" + ) + f.write("# Implements the git credential helper protocol.\n") + f.write("#\n") + f.write( + "# GIT_TOKEN + GIT_TOKEN_HOST → used for matching hosts (GitHub, Azure DevOps, GitLab)\n" + ) + f.write( + "# DATABRICKS_TOKEN → fallback for Databricks-hosted git and other hosts\n" + ) + f.write("\n") + f.write('# Only respond to "get" action; silently ignore store/erase.\n') + f.write('if [ "$1" != "get" ]; then\n') + f.write(" exit 0\n") + f.write("fi\n") + f.write("\n") + f.write("# Read stdin to extract the host being requested.\n") + f.write('REQ_HOST=""\n') + f.write("while IFS= read -r line; do\n") + f.write(' [ -z "$line" ] && break\n') + f.write(' case "$line" in\n') + f.write(' host=*) REQ_HOST="${line#host=}" ;;\n') + f.write(" esac\n") + f.write("done\n") + f.write("\n") + f.write( + "# If GIT_TOKEN is set, use it for matching hosts (or all hosts if GIT_TOKEN_HOST is unset).\n" + ) + f.write('if [ -n "$GIT_TOKEN" ]; then\n') + f.write( + ' if [ -z "$GIT_TOKEN_HOST" ] || echo "$REQ_HOST" | grep -qi "$GIT_TOKEN_HOST"; then\n' + ) + f.write(' printf "username=token\\npassword=%s\\n" "$GIT_TOKEN"\n') + f.write(" exit 0\n") + f.write(" fi\n") + f.write("fi\n") + f.write("\n") + f.write( + "# Fallback to DATABRICKS_TOKEN for Databricks-hosted git and other hosts.\n" + ) + f.write('if [ -n "$DATABRICKS_TOKEN" ]; then\n') + f.write(' printf "username=token\\npassword=%s\\n" "$DATABRICKS_TOKEN"\n') + f.write(" exit 0\n") + f.write("fi\n") + f.write("\n") + f.write("exit 1\n") + os.chmod(credential_helper_path, 0o755) + logger.info(f"Git credential helper written to {credential_helper_path}") + lines = [] if user_email and display_name: lines.append("[user]") @@ -135,51 +300,111 @@ def _setup_git_config(): lines.append(f"\tname = {display_name}") lines.append("[core]") lines.append(f"\thooksPath = {hooks_dir}") + lines.append("[credential]") + lines.append(f"\thelper = {credential_helper_path}") with open(gitconfig_path, "w") as f: f.write("\n".join(lines) + "\n") logger.info(f"Git config written to {gitconfig_path}") - # Write post-commit hook for workspace sync (works from any CLI: Claude, Gemini, OpenCode, etc.) - # Only syncs repos inside ~/projects/ — skips the app source and any other repos + # Post-commit hook: workspace sync (opt-in) or just a placeholder post_commit = os.path.join(hooks_dir, "post-commit") + workspace_sync = os.environ.get("WORKSPACE_SYNC", "").lower() in ( + "1", + "true", + "yes", + ) + with open(post_commit, "w") as f: - f.write('#!/bin/bash\n') - f.write('# Auto-sync to Databricks Workspace on commit (works from any CLI)\n') - f.write('SYNC_LOG="$HOME/.sync.log"\n') - f.write('\n') - f.write('# Resolve git repo root (handles commits from subdirectories)\n') - f.write('REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)"\n') - f.write('if [ -z "$REPO_ROOT" ]; then\n') - f.write(' echo "[post-commit] $(date +%H:%M:%S) SKIP: not inside a git repo" >> "$SYNC_LOG"\n') - f.write(' exit 0\n') - f.write('fi\n') - f.write('\n') - f.write('# Only sync repos inside ~/projects/\n') - f.write('PROJECTS_DIR="$HOME/projects"\n') - f.write('case "$REPO_ROOT" in\n') - f.write(' "$PROJECTS_DIR"/*)\n') - f.write(' ;; # allowed - continue\n') - f.write(' *)\n') - f.write(' echo "[post-commit] $(date +%H:%M:%S) SKIP: $REPO_ROOT is outside $PROJECTS_DIR" >> "$SYNC_LOG"\n') - f.write(' exit 0\n') - f.write(' ;;\n') - f.write('esac\n') - f.write('\n') - f.write('echo "[post-commit] $(date +%H:%M:%S) syncing $REPO_ROOT" >> "$SYNC_LOG"\n') - f.write('\n') - f.write('# Use venv python directly (avoids fragile source activate)\n') - f.write('VENV_PYTHON="/app/python/source_code/.venv/bin/python"\n') - f.write('SYNC_SCRIPT="/app/python/source_code/sync_to_workspace.py"\n') - f.write('\n') - f.write('if [ -x "$VENV_PYTHON" ] && [ -f "$SYNC_SCRIPT" ]; then\n') - f.write(' nohup "$VENV_PYTHON" "$SYNC_SCRIPT" "$REPO_ROOT" >> "$SYNC_LOG" 2>&1 & disown\n') - f.write('else\n') - f.write(' echo "[post-commit] $(date +%H:%M:%S) SKIP: venv=$VENV_PYTHON script=$SYNC_SCRIPT" >> "$SYNC_LOG"\n') - f.write('fi\n') + f.write("#!/bin/bash\n") + if workspace_sync: + f.write( + "# Auto-sync to Databricks Workspace on commit (WORKSPACE_SYNC=true)\n" + ) + f.write('SYNC_LOG="$HOME/.sync.log"\n') + f.write("\n") + f.write('REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)"\n') + f.write('if [ -z "$REPO_ROOT" ]; then\n') + f.write( + ' echo "[post-commit] $(date +%H:%M:%S) SKIP: not inside a git repo" >> "$SYNC_LOG"\n' + ) + f.write(" exit 0\n") + f.write("fi\n") + f.write("\n") + f.write('PROJECTS_DIR="$HOME/projects"\n') + f.write('case "$REPO_ROOT" in\n') + f.write(' "$PROJECTS_DIR"/*)\n') + f.write(" ;; # allowed - continue\n") + f.write(" *)\n") + f.write( + ' echo "[post-commit] $(date +%H:%M:%S) SKIP: $REPO_ROOT is outside $PROJECTS_DIR" >> "$SYNC_LOG"\n' + ) + f.write(" exit 0\n") + f.write(" ;;\n") + f.write("esac\n") + f.write("\n") + f.write( + 'echo "[post-commit] $(date +%H:%M:%S) syncing $REPO_ROOT" >> "$SYNC_LOG"\n' + ) + f.write("\n") + f.write('VENV_PYTHON="/app/python/source_code/.venv/bin/python"\n') + f.write('SYNC_SCRIPT="/app/python/source_code/sync_to_workspace.py"\n') + f.write("\n") + f.write('if [ -x "$VENV_PYTHON" ] && [ -f "$SYNC_SCRIPT" ]; then\n') + f.write( + ' nohup "$VENV_PYTHON" "$SYNC_SCRIPT" "$REPO_ROOT" >> "$SYNC_LOG" 2>&1 & disown\n' + ) + f.write("else\n") + f.write( + ' echo "[post-commit] $(date +%H:%M:%S) SKIP: venv=$VENV_PYTHON script=$SYNC_SCRIPT" >> "$SYNC_LOG"\n' + ) + f.write("fi\n") + else: + f.write("# Workspace sync disabled (set WORKSPACE_SYNC=true to enable)\n") + f.write("exit 0\n") os.chmod(post_commit, 0o755) logger.info(f"Post-commit hook written to {post_commit}") + # Write ~/.bashrc with colored prompt and aliases + bashrc_path = os.path.join(home, ".bashrc") + with open(bashrc_path, "w") as f: + f.write("# Guard against stale CWD (happens after tmux reattach if dir was recreated)\n") + f.write('if ! cd . 2>/dev/null; then\n') + f.write(' cd ~/projects 2>/dev/null || cd ~\n') + f.write("fi\n\n") + # Strip OAuth M2M vars when PAT is configured — Databricks SDK rejects + # ambiguous auth ("more than one authorization method configured"). + # This must be in .bashrc (not just shell_env) because tmux server + # may preserve the original process environment across reattach. + if os.environ.get("DATABRICKS_TOKEN"): + f.write("# Strip OAuth M2M vars to avoid SDK auth conflict with PAT\n") + f.write("unset DATABRICKS_CLIENT_ID DATABRICKS_CLIENT_SECRET 2>/dev/null\n\n") + f.write("# Colored prompt: user@host:dir$\n") + f.write( + "PS1='\\[\\033[01;32m\\]\\u@\\h\\[\\033[00m\\]:\\[\\033[01;34m\\]\\w\\[\\033[00m\\]\\$ '\n" + ) + f.write("\n") + f.write("# Color support\n") + f.write('alias ls="ls --color=auto"\n') + f.write('alias grep="grep --color=auto"\n') + f.write("export CLICOLOR=1\n") + logger.info(f"Bashrc written to {bashrc_path}") + + # Ensure login shells source .bashrc + bash_profile_path = os.path.join(home, ".bash_profile") + with open(bash_profile_path, "w") as f: + f.write("# Source .bashrc for login shells\n") + f.write("[ -f ~/.bashrc ] && . ~/.bashrc\n") + + # Configure tmux: use login bash, enable 256-color, increase scrollback + tmux_conf_path = os.path.join(home, ".tmux.conf") + with open(tmux_conf_path, "w") as f: + f.write("set -g default-shell /bin/bash\n") + f.write('set -g default-command "/bin/bash --login"\n') + f.write('set -g default-terminal "xterm-256color"\n') + f.write("set -g history-limit 10000\n") + f.write("set -g mouse on\n") + # Reinit app source git to remove template origin (Databricks Apps only) _reinit_app_git() @@ -194,7 +419,6 @@ def _reinit_app_git(): if not os.path.isdir(git_dir): return # Already clean - import shutil shutil.rmtree(git_dir) subprocess.run(["git", "init"], cwd=app_dir, capture_output=True) subprocess.run(["git", "add", "."], cwd=app_dir, capture_output=True) @@ -205,11 +429,74 @@ def _reinit_app_git(): logger.info("Reinitialized app source git (template origin removed)") +def _clone_git_repos(): + """Clone repos listed in GIT_REPOS env var into ~/projects/.""" + git_repos = os.environ.get("GIT_REPOS", "").strip() + if not git_repos: + _update_step("git_clone", status="complete", completed_at=time.time()) + return + + _update_step("git_clone", status="running", started_at=time.time()) + home = os.environ.get("HOME", "/app/python/source_code") + projects_dir = os.path.join(home, "projects") + os.makedirs(projects_dir, exist_ok=True) + + repos = [r.strip() for r in git_repos.split(",") if r.strip()] + errors = [] + + for repo_url in repos: + # Derive folder name from URL: https://github.com/org/repo.git → repo + repo_name = repo_url.rstrip("/").rsplit("/", 1)[-1].removesuffix(".git") + target_dir = os.path.join(projects_dir, repo_name) + + if os.path.isdir(target_dir): + logger.info(f"Repo already exists, skipping: {target_dir}") + continue + + logger.info(f"Cloning {repo_url} into {target_dir}") + try: + result = subprocess.run( + ["git", "clone", repo_url, target_dir], + capture_output=True, + text=True, + timeout=120, + ) + if result.returncode != 0: + err = result.stderr.strip() or "clone failed" + errors.append(f"{repo_name}: {err}") + logger.error(f"Failed to clone {repo_url}: {err}") + else: + logger.info(f"Cloned {repo_url}") + except subprocess.TimeoutExpired: + errors.append(f"{repo_name}: timed out after 120s") + except Exception as e: + errors.append(f"{repo_name}: {e}") + + if errors: + _update_step( + "git_clone", + status="error", + completed_at=time.time(), + error="; ".join(errors)[:500], + ) + else: + _update_step("git_clone", status="complete", completed_at=time.time()) + + def run_setup(): with setup_lock: setup_state["status"] = "running" setup_state["started_at"] = time.time() + # Ensure ~/.local/bin is in the server process PATH so shutil.which() finds + # binaries installed during setup (tmux, gh, micro, etc.) + home = os.environ.get("HOME", "/app/python/source_code") + if not home or home == "/": + home = "/app/python/source_code" + local_bin = os.path.join(home, ".local", "bin") + if local_bin not in os.environ.get("PATH", "").split(os.pathsep): + os.environ["PATH"] = f"{local_bin}:{os.environ.get('PATH', '')}" + # Git config — done directly in Python, not as a subprocess _update_step("git", status="running", started_at=time.time()) try: @@ -218,14 +505,100 @@ def run_setup(): except Exception as e: _update_step("git", status="error", completed_at=time.time(), error=str(e)) - _run_step("micro", ["bash", "-c", - "mkdir -p ~/.local/bin && bash install_micro.sh && mv micro ~/.local/bin/ 2>/dev/null || true"]) - _run_step("claude", ["python", "setup_claude.py"]) - _run_step("codex", ["python", "setup_codex.py"]) - _run_step("opencode", ["python", "setup_opencode.py"]) - _run_step("gemini", ["python", "setup_gemini.py"]) - _run_step("databricks", ["python", "setup_databricks.py"]) - _run_step("mlflow", ["python", "setup_mlflow.py"]) + _run_step( + "micro", + [ + "bash", + "-c", + "mkdir -p ~/.local/bin && bash install_micro.sh && mv micro ~/.local/bin/ 2>/dev/null || true", + ], + ) + _run_step( + "tmux", + [ + "bash", + "-c", + "which tmux >/dev/null 2>&1 || (" + 'TMUX_VERSION="3.5a" && ' + "mkdir -p ~/.local/bin ~/.local/lib/tmux-appdir && " + 'curl -fsSL "https://github.com/nelsonenzo/tmux-appimage/releases/download/${TMUX_VERSION}/tmux.appimage" -o /tmp/tmux.appimage && ' + "chmod +x /tmp/tmux.appimage && " + "cd /tmp && /tmp/tmux.appimage --appimage-extract >/dev/null 2>&1 && " + "mv /tmp/squashfs-root/* ~/.local/lib/tmux-appdir/ && " + 'printf \'#!/bin/bash\\nexport APPDIR="$HOME/.local/lib/tmux-appdir"\\nexec "$APPDIR/AppRun" "$@"\\n\' > ~/.local/bin/tmux && ' + "chmod +x ~/.local/bin/tmux && " + "rm -rf /tmp/tmux.appimage /tmp/squashfs-root" + ")", + ], + ) + _run_step( + "gh", + [ + "bash", + "-c", + 'GH_VERSION="2.74.1" && ' + "mkdir -p ~/.local/bin && " + 'curl -fsSL "https://github.com/cli/cli/releases/download/v${GH_VERSION}/gh_${GH_VERSION}_linux_amd64.tar.gz" -o /tmp/gh.tar.gz && ' + "tar -xzf /tmp/gh.tar.gz -C /tmp && " + "mv /tmp/gh_${GH_VERSION}_linux_amd64/bin/gh ~/.local/bin/gh && " + "rm -rf /tmp/gh.tar.gz /tmp/gh_${GH_VERSION}_linux_amd64 && " + "chmod +x ~/.local/bin/gh && " + # Configure gh to use git's credential protocol instead of its own + "gh config set git_protocol https 2>/dev/null || true && " + # Wrap gh to auto-add flags that skip interactive prompts (arrow-key menus break in xterm.js PTY) + # The PTY sends OSC escape sequences that corrupt gh's interactive prompt library, + # so we pipe "Y" to answer the git-credential prompt non-interactively. + "printf '#!/bin/bash\\n" + 'if [ "$1" = "auth" ] && [ "$2" = "login" ]; then\\n' + " shift 2\\n" + ' printf "Y\\\\n" | ~/.local/bin/gh.real auth login -h github.com -p https -w --skip-ssh-key "$@"\\n' + "fi\\n" + 'exec ~/.local/bin/gh.real "$@"\\n\' > ~/.local/bin/gh.wrapper && ' + "mv ~/.local/bin/gh ~/.local/bin/gh.real && " + "mv ~/.local/bin/gh.wrapper ~/.local/bin/gh && " + "chmod +x ~/.local/bin/gh", + ], + ) + # Use the currently running interpreter instead of assuming `python` exists in PATH. + py = sys.executable or "python" + + # --- Parallel agent setup (all independent of each other) --- + parallel_steps = [ + ("claude", [py, "setup_claude.py"]), + ("codex", [py, "setup_codex.py"]), + ("opencode", [py, "setup_opencode.py"]), + ("gemini", [py, "setup_gemini.py"]), + ("databricks", [py, "setup_databricks.py"]), + ("mlflow", [py, "setup_mlflow.py"]), + ] + + with ThreadPoolExecutor(max_workers=len(parallel_steps)) as executor: + futures = [ + executor.submit(_run_step, step_id, command) + for step_id, command in parallel_steps + ] + wait(futures) + + # Clone git repos specified in GIT_REPOS env var + _clone_git_repos() + + # Restore persisted state (auto-memory, shell history) from Workspace + state_sync_enabled = os.environ.get("STATE_SYNC", "true").lower() in ( + "1", + "true", + "yes", + ) + if state_sync_enabled: + _update_step("state", status="running", started_at=time.time()) + try: + restore_state() + _update_step("state", status="complete", completed_at=time.time()) + except Exception as e: + _update_step( + "state", status="error", completed_at=time.time(), error=str(e)[:500] + ) + else: + _update_step("state", status="complete", completed_at=time.time()) with setup_lock: any_error = any(s["status"] == "error" for s in setup_state["steps"]) @@ -233,15 +606,22 @@ def run_setup(): setup_state["completed_at"] = time.time() -def get_token_owner(): - """Get the owner email from DATABRICKS_TOKEN at startup.""" +def _get_app_owner(auth): + """Get the owner email for authorization. + + PAT mode: returns user email (existing behavior). + OAuth M2M mode: returns None - Databricks Apps proxy handles access control. + """ + if auth.mode == AuthMode.OAUTH_M2M: + logger.info("OAuth M2M mode: authorization delegated to Databricks Apps proxy") + return None + try: from databricks.sdk import WorkspaceClient - host = ensure_https(os.environ.get("DATABRICKS_HOST", "")) - token = os.environ.get("DATABRICKS_TOKEN") - if not host or not token: + + if not auth.host or not auth.token: return None - w = WorkspaceClient(host=host, token=token, auth_type="pat") + w = WorkspaceClient(host=auth.host, token=auth.token, auth_type="pat") return w.current_user.me().user_name except Exception as e: logger.warning(f"Could not determine token owner: {e}") @@ -250,26 +630,35 @@ def get_token_owner(): def get_request_user(): """Extract user email from Databricks Apps request headers.""" - return request.headers.get("X-Forwarded-Email") or \ - request.headers.get("X-Forwarded-User") or \ - request.headers.get("X-Databricks-User-Email") + return ( + request.headers.get("X-Forwarded-Email") + or request.headers.get("X-Forwarded-User") + or request.headers.get("X-Databricks-User-Email") + ) def check_authorization(): """Check if the current user is authorized to access the app.""" - # If owner not set (local dev or SDK unavailable), allow access - if not app_owner: + # OAuth M2M mode: app_owner is None, Databricks proxy handles auth + if app_owner is None: return True, None current_user = get_request_user() - # If no user identity in request (local dev), allow access - if not current_user: + # If running locally without proxy headers, allow access + if not current_user and os.environ.get("FLASK_ENV") == "development": return True, None + # Reject if no user identity (proxy misconfiguration) + if not current_user: + logger.warning("Request without user identity header — rejecting") + return False, "unknown" + # Check if current user is the owner if current_user != app_owner: - logger.warning(f"Unauthorized access attempt by {current_user} (owner: {app_owner})") + logger.warning( + f"Unauthorized access attempt by {current_user} (owner: {app_owner})" + ) return False, current_user return True, None @@ -291,9 +680,17 @@ def read_pty_output(session_id, fd): if not output: # EOF — process exited break + decoded = output.decode(errors="replace") with sessions_lock: if session_id in sessions: - sessions[session_id]["output_buffer"].append(output.decode(errors="replace")) + sessions[session_id]["output_buffer"].append(decoded) + # Push via WebSocket to the session room + try: + socketio.emit('terminal_output', + {'session_id': session_id, 'output': decoded}, + room=session_id) + except Exception: + pass # No WebSocket clients — HTTP polling handles it else: # select timed out — check if process is still alive try: @@ -312,6 +709,11 @@ def read_pty_output(session_id, fd): if session_id in sessions: sessions[session_id]["exited"] = True logger.info(f"Session {session_id} process exited") + # Notify WebSocket clients + try: + socketio.emit('session_exited', {'session_id': session_id}, room=session_id) + except Exception: + pass def terminate_session(session_id, pid, master_fd): @@ -350,7 +752,9 @@ def cleanup_stale_sessions(): for session_id, session in sessions.items(): idle = now - session["last_poll_time"] if idle > SESSION_TIMEOUT_SECONDS: - stale_sessions.append((session_id, session["pid"], session["master_fd"])) + stale_sessions.append( + (session_id, session["pid"], session["master_fd"]) + ) elif idle > warning_threshold: session["timeout_warning"] = True @@ -365,20 +769,35 @@ def cleanup_stale_sessions(): @app.before_request def authorize_request(): """Check authorization before processing any request.""" - # Skip auth for health check and setup status - if request.path in ("/health", "/api/setup-status"): + # Skip auth for health check, setup status, and active sessions + if request.path in ("/health", "/api/setup-status", "/api/active-sessions"): return None authorized, user = check_authorization() if not authorized: - return jsonify({ - "error": "Unauthorized", - "message": f"This app belongs to {app_owner}. You are logged in as {user}." - }), 403 + return jsonify( + { + "error": "Unauthorized", + "message": f"This app belongs to {app_owner}. You are logged in as {user}.", + } + ), 403 return None +@app.after_request +def set_security_headers(response): + response.headers["X-Content-Type-Options"] = "nosniff" + response.headers["X-Frame-Options"] = "DENY" + response.headers["X-XSS-Protection"] = "1; mode=block" + response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" + response.headers["Content-Security-Policy"] = ( + "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; connect-src 'self'" + ) + response.headers["Permissions-Policy"] = "camera=(), microphone=(), geolocation=()" + return response + + @app.route("/") def index(): with setup_lock: @@ -399,18 +818,84 @@ def health(): session_count = len(sessions) with setup_lock: current_setup_status = setup_state["status"] - return jsonify({ - "status": "healthy", - "setup_status": current_setup_status, - "active_sessions": session_count, - "session_timeout_seconds": SESSION_TIMEOUT_SECONDS - }) + return jsonify( + { + "status": "healthy", + "setup_status": current_setup_status, + "active_sessions": session_count, + "session_timeout_seconds": SESSION_TIMEOUT_SECONDS, + } + ) + + +@app.route("/api/tmux-sessions") +def list_tmux_sessions(): + """List active tmux sessions for reconnection after page refresh.""" + if not shutil.which("tmux"): + return jsonify({"sessions": []}) + try: + result = subprocess.run( + ["tmux", "list-sessions", "-F", "#{session_name}"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode != 0: + return jsonify({"sessions": []}) + sessions_list = [ + s.strip() for s in result.stdout.strip().split("\n") if s.strip() + ] + # Extract pane IDs from session names like "pane-0", "pane-1" + pane_ids = [] + for name in sessions_list: + if name.startswith("pane-"): + try: + pane_ids.append(int(name.split("-", 1)[1])) + except ValueError: + pass + return jsonify({"sessions": sorted(pane_ids)}) + except Exception: + return jsonify({"sessions": []}) + + +@app.route("/api/active-sessions") +def list_active_sessions(): + """List active PTY sessions for reconnection (non-tmux mode). + + Returns: {"sessions": [{"session_id": "...", "pane_id": N, "alive": bool}, ...]} + Filters out sessions whose process has exited. + """ + result = [] + with sessions_lock: + for session_id, session in sessions.items(): + pid = session.get("pid") + alive = False + if pid is not None: + try: + os.kill(pid, 0) # Check if process is still running + alive = not session.get("exited", False) + except OSError: + alive = False + result.append({ + "session_id": session_id, + "pane_id": session.get("pane_id", 0), + "alive": alive, + }) + return jsonify({"sessions": result}) @app.route("/api/session", methods=["POST"]) def create_session(): """Create a new terminal session.""" + MAX_SESSIONS = 50 + with sessions_lock: + if len(sessions) >= MAX_SESSIONS: + return jsonify({"error": "Maximum session limit reached"}), 503 + try: + data = request.get_json(silent=True) or {} + pane_id = int(data.get("pane_id", 0)) + master_fd, slave_fd = pty.openpty() # Set up environment for the shell shell_env = os.environ.copy() @@ -418,6 +903,11 @@ def create_session(): # Remove Claude Code env vars so the browser terminal isn't seen as nested shell_env.pop("CLAUDECODE", None) shell_env.pop("CLAUDE_CODE_SESSION", None) + # Remove OAuth M2M vars when PAT is set — Databricks SDK rejects + # ambiguous auth ("more than one authorization method configured"). + if shell_env.get("DATABRICKS_TOKEN"): + shell_env.pop("DATABRICKS_CLIENT_ID", None) + shell_env.pop("DATABRICKS_CLIENT_SECRET", None) # Ensure HOME is set correctly if not shell_env.get("HOME") or shell_env["HOME"] == "/": shell_env["HOME"] = "/app/python/source_code" @@ -425,18 +915,40 @@ def create_session(): local_bin = f"{shell_env['HOME']}/.local/bin" shell_env["PATH"] = f"{local_bin}:{shell_env.get('PATH', '')}" + # Inject fresh token from TokenRefresher (OAuth M2M keeps tokens current) + if token_refresher is not None: + shell_env["DATABRICKS_TOKEN"] = token_refresher.current_token + # Start shell in ~/projects/ directory projects_dir = os.path.join(shell_env["HOME"], "projects") os.makedirs(projects_dir, exist_ok=True) + # Use tmux for session persistence across page refreshes. + # tmux new-session -A: attach if session exists, create if not. + # Re-read TMUX_ENABLED at request time so tests can toggle it via env. + tmux_enabled_now = os.environ.get('TMUX_ENABLED', 'false').lower() == 'true' + tmux_session = f"pane-{pane_id}" + reattached = False + if tmux_enabled_now and shutil.which("tmux"): + # Check if this tmux session already exists (reattach vs new) + check = subprocess.run( + ["tmux", "has-session", "-t", tmux_session], + capture_output=True, + timeout=5, + ) + reattached = check.returncode == 0 + shell_cmd = ["tmux", "new-session", "-A", "-s", tmux_session] + else: + shell_cmd = ["/bin/bash", "--login"] + pid = subprocess.Popen( - ["/bin/bash"], + shell_cmd, stdin=slave_fd, stdout=slave_fd, stderr=slave_fd, preexec_fn=os.setsid, env=shell_env, - cwd=projects_dir + cwd=projects_dir, ).pid session_id = str(uuid.uuid4()) @@ -445,39 +957,74 @@ def create_session(): sessions[session_id] = { "master_fd": master_fd, "pid": pid, + "pane_id": pane_id, "output_buffer": deque(maxlen=1000), "last_poll_time": time.time(), - "created_at": time.time() + "created_at": time.time(), } # Start background reader thread - thread = threading.Thread(target=read_pty_output, args=(session_id, master_fd), daemon=True) + thread = threading.Thread( + target=read_pty_output, args=(session_id, master_fd), daemon=True + ) thread.start() - return jsonify({"session_id": session_id}) + # Fix stale CWD on tmux reattach (dir may have been recreated with new inode) + if reattached: + time.sleep(0.3) + try: + os.write(master_fd, b"cd ~/projects 2>/dev/null\n") + except OSError: + pass + + return jsonify({"session_id": session_id, "reattached": reattached}) except Exception as e: return jsonify({"error": str(e)}), 500 @app.route("/api/input", methods=["POST"]) def send_input(): - """Send input to the terminal.""" + """Send input to the terminal and return any immediate output. + + Writes input to the PTY, waits briefly for the echo/response, and returns + any available output in the same response. This halves the perceived + keystroke latency by combining two HTTP round-trips into one. + """ data = request.json session_id = data.get("session_id") input_data = data.get("input", "") + if len(input_data) > 4096: + return jsonify({"error": "Input too large (max 4096 bytes)"}), 400 with sessions_lock: if session_id not in sessions: return jsonify({"error": "Session not found"}), 404 fd = sessions[session_id]["master_fd"] + sessions[session_id]["last_poll_time"] = time.time() try: os.write(fd, input_data.encode()) - return jsonify({"status": "ok"}) except OSError as e: return jsonify({"error": str(e)}), 500 + # Wait briefly for PTY to echo, then drain the output buffer. + # The reader thread appends output asynchronously; a short sleep + # lets it capture the echo before we drain. + time.sleep(0.005) # 5ms — enough for local PTY echo + + with sessions_lock: + if session_id not in sessions: + return jsonify({"status": "ok", "output": ""}) + session = sessions[session_id] + session["last_poll_time"] = time.time() + buffer = session["output_buffer"] + output = "".join(buffer) + buffer.clear() + exited = session.get("exited", False) + + return jsonify({"status": "ok", "output": output, "exited": exited}) + @app.route("/api/upload", methods=["POST"]) def upload_file(): @@ -544,6 +1091,39 @@ def heartbeat(): return jsonify({"status": "ok", "timeout_warning": timeout_warning}) +@app.route("/api/output-batch", methods=["POST"]) +def get_output_batch(): + """Get output from multiple terminal sessions in one request. + + Accepts: {"session_ids": ["id1", "id2", ...]} + Returns: {"outputs": {"id1": {"output": "...", "exited": false}, ...}} + + Unknown session_ids are silently skipped (not an error). + """ + data = request.json or {} + session_ids = data.get("session_ids") + + if session_ids is None: + return jsonify({"error": "session_ids required"}), 400 + + outputs = {} + now = time.time() + + with sessions_lock: + for sid in session_ids: + if sid not in sessions: + continue + session = sessions[sid] + session["last_poll_time"] = now + buffer = session["output_buffer"] + output = "".join(buffer) + buffer.clear() + exited = session.get("exited", False) + outputs[sid] = {"output": output, "exited": exited} + + return jsonify({"outputs": outputs}) + + @app.route("/api/resize", methods=["POST"]) def resize_terminal(): """Resize the terminal.""" @@ -551,6 +1131,10 @@ def resize_terminal(): session_id = data.get("session_id") cols = data.get("cols", 80) rows = data.get("rows", 24) + if not isinstance(cols, int) or not isinstance(rows, int): + return jsonify({"error": "cols and rows must be integers"}), 400 + if not (1 <= cols <= 500) or not (1 <= rows <= 200): + return jsonify({"error": "Terminal dimensions out of range"}), 400 with sessions_lock: if session_id not in sessions: @@ -587,16 +1171,88 @@ def close_session(): return jsonify({"status": "ok"}) +# ── WebSocket event handlers ──────────────────────────────────────────────── + +@socketio.on('join_session') +def handle_join_session(data): + """Client joins a session room to receive real-time output.""" + session_id = data.get('session_id') + if not session_id: + return + with sessions_lock: + if session_id not in sessions: + return + sessions[session_id]["last_poll_time"] = time.time() + join_room(session_id) + + +@socketio.on('leave_session') +def handle_leave_session(data): + """Client leaves a session room.""" + session_id = data.get('session_id') + if session_id: + leave_room(session_id) + + +@socketio.on('terminal_input') +def handle_terminal_input(data): + """Receive terminal input via WebSocket.""" + session_id = data.get('session_id') + input_data = data.get('input', '') + if not session_id or len(input_data) > 4096: + return + + with sessions_lock: + if session_id not in sessions: + return + fd = sessions[session_id]["master_fd"] + sessions[session_id]["last_poll_time"] = time.time() + + try: + os.write(fd, input_data.encode()) + except OSError: + pass + + +@socketio.on('terminal_resize') +def handle_terminal_resize(data): + """Resize terminal via WebSocket.""" + session_id = data.get('session_id') + cols = data.get('cols', 80) + rows = data.get('rows', 24) + if not session_id or not isinstance(cols, int) or not isinstance(rows, int): + return + + with sessions_lock: + if session_id not in sessions: + return + fd = sessions[session_id]["master_fd"] + + try: + winsize = struct.pack("HHHH", rows, cols, 0, 0) + fcntl.ioctl(fd, termios.TIOCSWINSZ, winsize) + except OSError: + pass + + def initialize_app(): - """One-time init: detect owner, start cleanup thread.""" - global app_owner + """One-time init: resolve auth, detect owner, start cleanup + token refresh.""" + global app_owner, token_refresher + + # Resolve authentication (PAT or OAuth M2M) + auth = resolve_auth() + logger.info(f"Auth resolved: mode={auth.mode.value}, host={auth.host}") + + # Set DATABRICKS_TOKEN env var so setup scripts and subprocesses can use it + if auth.token: + os.environ["DATABRICKS_TOKEN"] = auth.token - # Remove OAuth credentials - force PAT auth only - os.environ.pop("DATABRICKS_CLIENT_ID", None) - os.environ.pop("DATABRICKS_CLIENT_SECRET", None) + # Start token refresher (only active in OAuth M2M mode) + token_refresher = TokenRefresher(auth) + token_refresher.start() - # Determine app owner from DATABRICKS_TOKEN - app_owner = get_token_owner() + # Determine app owner + app_owner = _get_app_owner(auth) if app_owner: logger.info(f"App owner (from token): {app_owner}") os.environ["APP_OWNER"] = app_owner @@ -606,16 +1262,29 @@ def initialize_app(): # Start background cleanup thread cleanup_thread = threading.Thread(target=cleanup_stale_sessions, daemon=True) cleanup_thread.start() - logger.info(f"Started session cleanup thread (timeout={SESSION_TIMEOUT_SECONDS}s, interval={CLEANUP_INTERVAL_SECONDS}s)") + logger.info( + f"Started session cleanup thread (timeout={SESSION_TIMEOUT_SECONDS}s, interval={CLEANUP_INTERVAL_SECONDS}s)" + ) # Start setup in background thread — app starts immediately with loading screen setup_thread = threading.Thread(target=run_setup, daemon=True, name="setup-thread") setup_thread.start() logger.info("Started background setup thread") + # State sync: periodic save + shutdown hook + state_sync_enabled = os.environ.get("STATE_SYNC", "true").lower() in ( + "1", + "true", + "yes", + ) + if state_sync_enabled: + start_periodic_sync(interval=300) + atexit.register(save_state) + logger.info("State sync enabled: periodic save every 5min + shutdown hook") + if __name__ == "__main__": # Local dev only — production uses gunicorn initialize_app() port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) - app.run(host="0.0.0.0", port=port, threaded=True) + socketio.run(app, host="0.0.0.0", port=port) diff --git a/app.yaml b/app.yaml index 4c0dc5c..8b1924c 100644 --- a/app.yaml +++ b/app.yaml @@ -1,19 +1,23 @@ command: - gunicorn - app:app +compute: + size: Large env: - name: HOME value: /app/python/source_code + # DATABRICKS_TOKEN: PAT auth (required for model serving access). + # Create the secret via: databricks apps set-secret DATABRICKS_TOKEN - name: DATABRICKS_TOKEN valueFrom: DATABRICKS_TOKEN - name: ANTHROPIC_MODEL - value: databricks-claude-opus-4-6 + value: databricks-claude-sonnet-4-6 - name: GEMINI_MODEL value: databricks-gemini-3-1-pro - name: CODEX_MODEL value: databricks-gpt-5-2 - #OPTIONAL: Move to the new Databricks Gateway if you have access (recommended), otherwise it will default to the older endpoint - - name: DATABRICKS_GATEWAY_HOST - valueFrom: DATABRICKS_GATEWAY_HOST + #OPTIONAL: Use AI Gateway if available (recommended), otherwise falls back to direct model serving + #- name: DATABRICKS_GATEWAY_HOST + # value: https://.ai-gateway..cloud.databricks.com - name: CLAUDE_CODE_DISABLE_AUTO_MEMORY value: 0 diff --git a/app.yaml.template b/app.yaml.template index c29f3a6..e9417c3 100644 --- a/app.yaml.template +++ b/app.yaml.template @@ -1,13 +1,15 @@ command: - gunicorn - app:app +compute: + size: Large env: - name: HOME value: /app/python/source_code - name: DATABRICKS_TOKEN valueFrom: DATABRICKS_TOKEN - name: ANTHROPIC_MODEL - value: databricks-claude-opus-4-6 + value: databricks-claude-sonnet-4-6 - name: GEMINI_MODEL value: databricks-gemini-3-1-pro #OPTIONAL: Use the new Databricks AI Gateway if you have access (recommended), otherwise it will default to the older endpoint @@ -18,3 +20,29 @@ env: # completes the rollout and auto memory is on by default, this can be removed entirely. - name: CLAUDE_CODE_DISABLE_AUTO_MEMORY value: 0 + #OPTIONAL: Git credentials for enterprise source control (GitHub, Azure DevOps, GitLab) + # GIT_TOKEN is used for HTTPS git auth. If GIT_TOKEN_HOST is set, it only applies to + # matching hosts; otherwise it applies to all git operations. + # - name: GIT_TOKEN + # valueFrom: GIT_TOKEN + # - name: GIT_TOKEN_HOST + # value: github.com + #OPTIONAL: Comma-separated list of repos to auto-clone into ~/projects/ at startup + # - name: GIT_REPOS + # value: https://github.com/org/repo1.git,https://github.com/org/repo2.git + #OPTIONAL: Set to "true" to sync commits to Databricks Workspace files (default: disabled) + # - name: WORKSPACE_SYNC + # value: "true" + #OPTIONAL: Persist Claude Code auto-memory and shell history across container restarts. + # Syncs state to /Workspace/Users/{email}/.state/ every 5 min and restores on startup. + # Default: true (set to "false" to disable) + # - name: STATE_SYNC + # value: "true" + #OPTIONAL: Terminal UI mode. "tabs" (default) shows browser-like tabs; "grid" shows + # the multi-pane grid layout. Users can toggle between modes in the UI. + # - name: TERMINAL_MODE + # value: "tabs" + #OPTIONAL: Enable tmux session persistence. When "true", terminal sessions + # survive page refreshes via tmux. Default: "false" (plain PTY, lighter weight). + # - name: TMUX_ENABLED + # value: "false" diff --git a/docs/prd/multi-terminal-git-auth.md b/docs/prd/multi-terminal-git-auth.md new file mode 100644 index 0000000..d437bcd --- /dev/null +++ b/docs/prd/multi-terminal-git-auth.md @@ -0,0 +1,106 @@ +# PRD: Multi-Terminal Support & Git Authentication + +**Status:** COMPLETE +**Author:** Claude Code +**Date:** 2025-03-05 + +--- + +## Problem Statement + +The browser-based terminal app currently supports only a single full-screen terminal. Users running AI coding agents (Claude Code, Gemini CLI, etc.) frequently need multiple terminals simultaneously -- one for the agent, one for testing, one for git operations. Switching between tasks requires closing and reopening sessions. Additionally, git credential helpers are not configured, so HTTPS git operations against GitHub/GitLab fail when users try to clone private repos or push changes. + +## Goals + +1. Enable multiple terminal panes visible simultaneously with predefined layouts +2. Provide a toolbar for layout switching, pane management, and focus control +3. Optimize polling performance with a batch output endpoint +4. Configure git credential helpers so Databricks token-based git operations work seamlessly + +## Non-Goals + +- WebSocket support (Databricks Apps proxy limitation) +- Drag-and-drop pane resizing (keep it simple with predefined layouts) +- Saving/restoring terminal sessions across page reloads +- External JS framework dependencies +- Modifying the loading screen (static/loading.html) + +--- + +## Acceptance Criteria + +### Multi-Terminal UI + +**AC-1: Layout System** +The frontend must support four predefined layouts: "single" (1 terminal, full screen), "hsplit" (2 terminals side-by-side), "vsplit" (2 terminals stacked), and "quad" (4 terminals in a 2x2 grid). Each layout allocates equal space to its panes. + +**AC-2: Toolbar** +A toolbar at the top of the page displays: layout toggle buttons (icons or labels for single/hsplit/vsplit/quad), indicators showing which panes are active, and a visual indicator of which pane has focus. The toolbar must use the existing dark theme (#1e1e1e background). + +**AC-3: Pane Lifecycle** +Each pane gets its own independent PTY session via POST /api/session. Sessions are created when a pane is added and closed (via POST /api/session/close) when a pane is removed. Users can close individual panes via a close button on each pane header. Closing a pane in a layout that requires fewer panes does not force a layout change -- the slot becomes empty and shows a "+" button to reopen. + +**AC-4: Independent Resize** +Each pane's xterm.js instance must report its own correct dimensions. When the window resizes or the layout changes, each pane calls fitAddon.fit() and sends its dimensions via POST /api/resize. Resize events must be debounced (at least 150ms). + +**AC-5: Focus Management** +Clicking a pane gives it focus (visually indicated by a highlighted border). The keyboard shortcut Ctrl+Shift+N cycles focus to the next active pane. The focused pane receives all keyboard input. + +**AC-6: Close Pane** +Each pane has a close button (X) in its header bar. Closing a pane sends POST /api/session/close and removes the terminal from the UI. The pane slot shows a "+" button to create a new session in that slot. + +### Performance + +**AC-7: Batch Output Endpoint** +A new endpoint POST /api/output-batch accepts `{"session_ids": ["id1", "id2", ...]}` and returns `{"outputs": {"id1": {"output": "...", "exited": false}, "id2": {...}}}`. The frontend uses this single endpoint instead of individual /api/output calls to reduce HTTP overhead. The existing /api/output endpoint remains for backward compatibility. + +**AC-8: Polling Efficiency** +The frontend uses a single setInterval (100ms) that calls /api/output-batch with all active session IDs. This replaces per-terminal polling intervals. If no sessions are active, polling pauses. + +### Git Authentication + +**AC-9: Git Credential Helper** +During setup (in setup_databricks.py or app.py's _setup_git_config), a git credential helper script is written to ~/.local/bin/git-credential-databricks. It reads DATABRICKS_TOKEN from the environment and returns it as the password for HTTPS git operations. The ~/.gitconfig is updated to include `[credential] helper = /path/to/git-credential-databricks`. This enables `git clone https://...`, `git push`, etc. to authenticate using the Databricks token for Databricks-hosted repos (Repos API). + +**AC-10: Credential Helper Protocol** +The git credential helper must implement the git credential helper protocol: when invoked with "get" as an argument, it reads key=value pairs from stdin (including "host" and "protocol") and writes `username=token\npassword=\n` to stdout. For any other action (store, erase), it exits silently. + +--- + +## Technical Design + +### Frontend (static/index.html) + +- Replace the single `#terminal` div with a `#toolbar` and `#pane-container` +- TerminalPane class: manages one xterm.js Terminal + FitAddon + session lifecycle +- LayoutManager class: manages pane creation/destruction, CSS grid layout switching +- Single poll loop calls /api/output-batch with all active session IDs +- Debounced resize handler updates all panes + +### Backend (app.py) + +- New route: POST /api/output-batch +- Acquires sessions_lock once, reads all requested buffers, returns combined response + +### Git Auth (setup_databricks.py or _setup_git_config in app.py) + +- Write git-credential-databricks shell script to ~/.local/bin/ +- Append credential helper config to ~/.gitconfig +- The credential helper reads DATABRICKS_TOKEN from env at runtime (so token refresh works) + +### Files Changed + +| File | Change | +|------|--------| +| static/index.html | Complete rewrite: toolbar, layout manager, multi-pane support, batch polling | +| app.py | Add /api/output-batch endpoint | +| app.py (_setup_git_config) | Add git credential helper setup | + +--- + +## Resolved Questions + +1. **Last pane behavior:** Closing the last pane auto-creates a new terminal (always at least one terminal open). +2. **Credential helper scope:** The credential helper works for ALL HTTPS git URLs (general helper, not scoped to Databricks only). + +--- diff --git a/gunicorn.conf.py b/gunicorn.conf.py index bb80b37..52a69ed 100644 --- a/gunicorn.conf.py +++ b/gunicorn.conf.py @@ -1,16 +1,32 @@ import os +import logging bind = f"0.0.0.0:{os.environ.get('DATABRICKS_APP_PORT', '8000')}" -workers = 1 # PTY fds + sessions dict are process-local -threads = 8 # Concurrent request handling (poll + input + resize) +workers = 1 # PTY fds + sessions dict are process-local +threads = 32 # Support 20+ concurrent terminals polling + input + resize worker_class = "gthread" -timeout = 30 +timeout = 120 # WebSocket connections are long-lived; 30s was too aggressive graceful_timeout = 10 # Databricks gives 15s after SIGTERM accesslog = "-" errorlog = "-" loglevel = "info" +# Structured access log: method path status response_time +access_log_format = '%(h)s "%(r)s" %(s)s %(b)s %(L)ss' def post_worker_init(worker): from app import initialize_app + initialize_app() + + +def on_exit(server): + logger = logging.getLogger("gunicorn.error") + logger.info("Gunicorn shutting down — triggering state save") + try: + from state_sync import save_state + + save_state() + logger.info("State saved on shutdown") + except Exception as e: + logger.error(f"Failed to save state on shutdown: {e}") diff --git a/requirements.txt b/requirements.txt index 199c9d7..7e392f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -flask>=2.0 -claude-agent-sdk -databricks-sdk>=0.20.0 +flask==3.1.3 +flask-socketio>=5.3 +simple-websocket>=1.0 +claude-agent-sdk==0.1.46 +databricks-sdk==0.96.0 mlflow[genai]>=3.4 opentelemetry-exporter-otlp-proto-grpc diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..bbd135b --- /dev/null +++ b/ruff.toml @@ -0,0 +1 @@ +exclude = [".claude"] diff --git a/setup_claude.py b/setup_claude.py index 128ef37..ee2ff28 100644 --- a/setup_claude.py +++ b/setup_claude.py @@ -1,9 +1,12 @@ +import logging import os import json import subprocess from pathlib import Path -from utils import ensure_https +from utils import ensure_https, resolve_databricks_host_and_token + +logger = logging.getLogger(__name__) # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -18,28 +21,32 @@ # 1. Write settings.json for Databricks model serving # Use DATABRICKS_GATEWAY_HOST if available (new AI Gateway), otherwise fall back to DATABRICKS_HOST gateway_host = ensure_https(os.environ.get("DATABRICKS_GATEWAY_HOST", "").rstrip("/")) -databricks_host = ensure_https(os.environ.get("DATABRICKS_HOST", "").rstrip("/")) +databricks_host, auth_token = resolve_databricks_host_and_token() -gateway_token = os.environ.get("DATABRICKS_TOKEN", "") if gateway_host else "" -if gateway_host and not gateway_token: - print("Warning: DATABRICKS_GATEWAY_HOST set but DATABRICKS_TOKEN missing, falling back to DATABRICKS_HOST") +if gateway_host and not auth_token: + logger.warning( + "DATABRICKS_GATEWAY_HOST set but token unavailable, falling back to DATABRICKS_HOST" + ) gateway_host = "" if gateway_host: anthropic_base_url = f"{gateway_host}/anthropic" - auth_token = gateway_token - print(f"Using Databricks AI Gateway: {gateway_host}") + logger.info(f"Using Databricks AI Gateway: {gateway_host}") else: + if not databricks_host or not auth_token: + logger.error("could not resolve Databricks host/token for Claude setup") + raise SystemExit(1) anthropic_base_url = f"{databricks_host}/serving-endpoints/anthropic" - auth_token = os.environ["DATABRICKS_TOKEN"] - print(f"Using Databricks Host: {databricks_host}") + logger.info(f"Using Databricks Host: {databricks_host}") settings = { "env": { - "ANTHROPIC_MODEL": os.environ.get("ANTHROPIC_MODEL", "databricks-claude-sonnet-4-6"), + "ANTHROPIC_MODEL": os.environ.get( + "ANTHROPIC_MODEL", "databricks-claude-sonnet-4-6" + ), "ANTHROPIC_BASE_URL": anthropic_base_url, "ANTHROPIC_AUTH_TOKEN": auth_token, - "ANTHROPIC_CUSTOM_HEADERS": "x-databricks-use-coding-agent-mode: true" + "ANTHROPIC_CUSTOM_HEADERS": "x-databricks-use-coding-agent-mode: true", } } @@ -50,47 +57,64 @@ claude_json = { "hasCompletedOnboarding": True, "mcpServers": { - "deepwiki": { - "type": "http", - "url": "https://mcp.deepwiki.com/mcp" - }, - "exa": { - "type": "http", - "url": "https://mcp.exa.ai/mcp" - } - } + "deepwiki": {"type": "http", "url": "https://mcp.deepwiki.com/mcp"}, + "exa": {"type": "http", "url": "https://mcp.exa.ai/mcp"}, + }, } claude_json_path = home / ".claude.json" claude_json_path.write_text(json.dumps(claude_json, indent=2)) -print(f"Claude configured: {settings_path}") -print(f"Onboarding skipped + MCPs configured: {claude_json_path}") +logger.info(f"Claude configured: {settings_path}") +logger.info(f"Onboarding skipped + MCPs configured: {claude_json_path}") # 3. Install Claude Code CLI if not present local_bin = home / ".local" / "bin" claude_bin = local_bin / "claude" if not claude_bin.exists(): - print("Installing Claude Code CLI...") + logger.info("Installing Claude Code CLI...") + install_script = "/tmp/claude_install.sh" + # Download install script first (don't pipe directly to bash) + dl_result = subprocess.run( + ["curl", "-fsSL", "-o", install_script, "https://claude.ai/install.sh"], + env={**os.environ, "HOME": str(home)}, + capture_output=True, + text=True, + ) + if dl_result.returncode != 0: + logger.error(f"Failed to download install script: {dl_result.stderr}") + raise SystemExit(1) + # Verify the download is a shell script (basic sanity check) + with open(install_script, "r") as f: + first_line = f.readline() + if not first_line.startswith("#!"): + logger.error( + f"Downloaded file doesn't look like a shell script: {first_line[:50]}" + ) + os.remove(install_script) + raise SystemExit(1) + # Execute the verified script result = subprocess.run( - ["bash", "-c", "curl -fsSL https://claude.ai/install.sh | bash"], + ["bash", install_script], env={**os.environ, "HOME": str(home)}, capture_output=True, - text=True + text=True, ) + os.remove(install_script) if result.returncode == 0: - print("Claude Code CLI installed successfully") + logger.info("Claude Code CLI installed successfully") else: - print(f"CLI install warning: {result.stderr}") + logger.error(f"CLI install failed: {result.stderr}") + raise SystemExit(1) else: - print(f"Claude Code CLI already installed at {claude_bin}") + logger.info(f"Claude Code CLI already installed at {claude_bin}") # 4. Create projects directory projects_dir = home / "projects" projects_dir.mkdir(exist_ok=True) -print(f"Projects directory: {projects_dir}") +logger.info(f"Projects directory: {projects_dir}") # 5. Git identity and hooks are now configured by app.py's _setup_git_config() # (runs directly in Python before setup_claude.py, writes ~/.gitconfig and ~/.githooks/) -print("Git identity and hooks: configured by app.py (skipping here)") +logger.info("Git identity and hooks: configured by app.py (skipping here)") diff --git a/setup_codex.py b/setup_codex.py index ac3e0f2..a5eab7a 100644 --- a/setup_codex.py +++ b/setup_codex.py @@ -8,11 +8,19 @@ Config: ~/.codex/config.toml with custom model_providers for Databricks. Auth: Bearer token via DATABRICKS_TOKEN environment variable. """ + +import logging import os import subprocess from pathlib import Path -from utils import adapt_instructions_file, ensure_https +from utils import ( + adapt_instructions_file, + ensure_https, + resolve_databricks_host_and_token, +) + +logger = logging.getLogger(__name__) # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -20,32 +28,35 @@ home = Path(os.environ["HOME"]) -host = os.environ.get("DATABRICKS_HOST", "") -token = os.environ.get("DATABRICKS_TOKEN", "") +host, token = resolve_databricks_host_and_token() codex_model = os.environ.get("CODEX_MODEL", "databricks-gpt-5-2") if not host or not token: - print("Warning: DATABRICKS_HOST or DATABRICKS_TOKEN not set, skipping Codex CLI config") - exit(0) + logger.error( + "DATABRICKS_HOST or auth token not available, cannot configure Codex CLI" + ) + raise SystemExit(1) # Strip trailing slash and ensure https:// prefix host = ensure_https(host.rstrip("/")) # Use DATABRICKS_GATEWAY_HOST if available (new AI Gateway), otherwise fall back to DATABRICKS_HOST gateway_host = ensure_https(os.environ.get("DATABRICKS_GATEWAY_HOST", "").rstrip("/")) -gateway_token = os.environ.get("DATABRICKS_TOKEN", "") if gateway_host else "" +gateway_token = token if gateway_host else "" if gateway_host and not gateway_token: - print("Warning: DATABRICKS_GATEWAY_HOST set but DATABRICKS_TOKEN missing, falling back to DATABRICKS_HOST") + logger.warning( + "DATABRICKS_GATEWAY_HOST set but token unavailable, falling back to DATABRICKS_HOST" + ) gateway_host = "" if gateway_host: codex_base_url = f"{gateway_host}/openai/v1" auth_token = gateway_token - print(f"Using Databricks AI Gateway: {gateway_host}") + logger.info(f"Using Databricks AI Gateway: {gateway_host}") else: codex_base_url = f"{host}/serving-endpoints" auth_token = token - print(f"Using Databricks Host: {host}") + logger.info(f"Using Databricks Host: {host}") # 1. Install Codex CLI into ~/.local/bin local_bin = home / ".local" / "bin" @@ -53,7 +64,7 @@ codex_bin = local_bin / "codex" if not codex_bin.exists(): - print("Installing Codex CLI...") + logger.info("Installing Codex CLI...") # Use --prefix ~/.local so npm installs directly into ~/.local/bin npm_prefix = str(home / ".local") result = subprocess.run( @@ -63,11 +74,12 @@ env={**os.environ, "HOME": str(home)}, ) if result.returncode == 0: - print(f"Codex CLI installed to {codex_bin}") + logger.info(f"Codex CLI installed to {codex_bin}") else: - print(f"Codex CLI install warning: {result.stderr}") + logger.error(f"Codex CLI install failed: {result.stderr}") + raise SystemExit(1) else: - print(f"Codex CLI already installed at {codex_bin}") + logger.info(f"Codex CLI already installed at {codex_bin}") # 2. Create ~/.codex directory and write config.toml codex_dir = home / ".codex" @@ -94,7 +106,7 @@ config_path = codex_dir / "config.toml" config_path.write_text(config_content) -print(f"Codex CLI configured: {config_path}") +logger.info(f"Codex CLI configured: {config_path}") # 3. Write OPENAI_API_KEY to shell profile for Codex to pick up # Codex reads from env_key specified in config (OPENAI_API_KEY) @@ -106,13 +118,13 @@ env_path = codex_dir / ".env" env_path.write_text(env_content) env_path.chmod(0o600) -print(f"Codex CLI env configured: {env_path}") +logger.info(f"Codex CLI env configured: {env_path}") # 4. Adapt CLAUDE.md to AGENTS.md for Codex # Look for CLAUDE.md in common locations claude_md_locations = [ Path(__file__).parent / "CLAUDE.md", # Same directory as setup script - home / ".claude" / "CLAUDE.md", # User's Claude config + home / ".claude" / "CLAUDE.md", # User's Claude config Path("/app/python/source_code/CLAUDE.md"), # Databricks App location ] @@ -130,9 +142,9 @@ cli_name="Codex", ) -print("\nCodex CLI ready! Usage:") -print(" codex # Start Codex CLI") -print(" codex 'explain this codebase' # Run with prompt") -print(f"\nEndpoint: {codex_base_url}") -print(f"Model: {codex_model}") -print("Auth: Bearer token (Databricks PAT via OPENAI_API_KEY)") +logger.info("Codex CLI ready! Usage:") +logger.info(" codex # Start Codex CLI") +logger.info(" codex 'explain this codebase' # Run with prompt") +logger.info(f"Endpoint: {codex_base_url}") +logger.info(f"Model: {codex_model}") +logger.info("Auth: Bearer token (Databricks PAT via OPENAI_API_KEY)") diff --git a/setup_databricks.py b/setup_databricks.py index 85f21f4..8d6bb74 100644 --- a/setup_databricks.py +++ b/setup_databricks.py @@ -1,10 +1,14 @@ #!/usr/bin/env python """Configure Databricks CLI with the user's PAT from environment.""" + +import logging import os import subprocess from pathlib import Path -from utils import ensure_https +from utils import resolve_databricks_host_and_token + +logger = logging.getLogger(__name__) # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -12,15 +16,14 @@ home = Path(os.environ["HOME"]) -# Get credentials from environment -host = os.environ.get("DATABRICKS_HOST") -token = os.environ.get("DATABRICKS_TOKEN") +# Get credentials from environment or SDK auto-auth fallback +host, token = resolve_databricks_host_and_token() if not host or not token: - print("Warning: DATABRICKS_HOST or DATABRICKS_TOKEN not set, skipping CLI config") - exit(0) - -host = ensure_https(host) + logger.error( + "DATABRICKS_HOST or auth token not available, cannot configure Databricks CLI" + ) + raise SystemExit(1) # Create ~/.databrickscfg with DEFAULT profile using PAT auth databrickscfg = home / ".databrickscfg" @@ -31,42 +34,43 @@ databrickscfg.write_text(config_content) databrickscfg.chmod(0o600) # Restrict permissions -print(f"Databricks CLI configured: {databrickscfg}") +logger.info(f"Databricks CLI configured: {databrickscfg}") # Verify it works result = subprocess.run( ["databricks", "current-user", "me", "--output", "json"], capture_output=True, text=True, - env={ - **os.environ, - # Remove OAuth vars to force PAT auth - "DATABRICKS_CLIENT_ID": "", - "DATABRICKS_CLIENT_SECRET": "" - } ) if result.returncode == 0: import json + try: user = json.loads(result.stdout) - email = user.get('userName', '') - display_name = user.get('displayName', '') - print(f"Databricks CLI authenticated as: {email}") + email = user.get("userName", "") + display_name = user.get("displayName", "") + logger.info(f"Databricks CLI authenticated as: {email}") # Configure git with user's email and name if email: - subprocess.run(["git", "config", "--global", "user.email", email], check=False) - print(f"Git configured with email: {email}") + subprocess.run( + ["git", "config", "--global", "user.email", email], check=False + ) + logger.info(f"Git configured with email: {email}") if display_name: - subprocess.run(["git", "config", "--global", "user.name", display_name], check=False) - print(f"Git configured with name: {display_name}") + subprocess.run( + ["git", "config", "--global", "user.name", display_name], check=False + ) + logger.info(f"Git configured with name: {display_name}") elif email: # Fall back to email prefix as name if no display name - name_from_email = email.split('@')[0].replace('.', ' ').title() - subprocess.run(["git", "config", "--global", "user.name", name_from_email], check=False) - print(f"Git configured with name: {name_from_email}") + name_from_email = email.split("@")[0].replace(".", " ").title() + subprocess.run( + ["git", "config", "--global", "user.name", name_from_email], check=False + ) + logger.info(f"Git configured with name: {name_from_email}") except json.JSONDecodeError: - print("Databricks CLI configured (couldn't parse user)") + logger.info("Databricks CLI configured (couldn't parse user)") else: - print(f"Warning: CLI config may have issues: {result.stderr}") + logger.warning(f"CLI config may have issues: {result.stderr}") diff --git a/setup_gemini.py b/setup_gemini.py index 5dc3412..c7af67c 100644 --- a/setup_gemini.py +++ b/setup_gemini.py @@ -10,13 +10,21 @@ Auth: GEMINI_API_KEY_AUTH_MECHANISM=bearer sends Databricks PAT as Bearer token. """ + +import logging import os import json import shutil import subprocess from pathlib import Path -from utils import adapt_instructions_file, ensure_https +from utils import ( + adapt_instructions_file, + ensure_https, + resolve_databricks_host_and_token, +) + +logger = logging.getLogger(__name__) # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -24,32 +32,35 @@ home = Path(os.environ["HOME"]) -host = os.environ.get("DATABRICKS_HOST", "") -token = os.environ.get("DATABRICKS_TOKEN", "") +host, token = resolve_databricks_host_and_token() gemini_model = os.environ.get("GEMINI_MODEL", "databricks-gemini-3-1-pro") if not host or not token: - print("Warning: DATABRICKS_HOST or DATABRICKS_TOKEN not set, skipping Gemini CLI config") - exit(0) + logger.error( + "DATABRICKS_HOST or auth token not available, cannot configure Gemini CLI" + ) + raise SystemExit(1) # Strip trailing slash and ensure https:// prefix host = ensure_https(host.rstrip("/")) # Use DATABRICKS_GATEWAY_HOST if available (new AI Gateway), otherwise fall back to DATABRICKS_HOST gateway_host = ensure_https(os.environ.get("DATABRICKS_GATEWAY_HOST", "").rstrip("/")) -gateway_token = os.environ.get("DATABRICKS_TOKEN", "") if gateway_host else "" +gateway_token = token if gateway_host else "" if gateway_host and not gateway_token: - print("Warning: DATABRICKS_GATEWAY_HOST set but DATABRICKS_TOKEN missing, falling back to DATABRICKS_HOST") + logger.warning( + "DATABRICKS_GATEWAY_HOST set but token unavailable, falling back to DATABRICKS_HOST" + ) gateway_host = "" if gateway_host: gemini_base_url = f"{gateway_host}/gemini" auth_token = gateway_token - print(f"Using Databricks AI Gateway: {gateway_host}") + logger.info(f"Using Databricks AI Gateway: {gateway_host}") else: gemini_base_url = f"{host}/serving-endpoints/google" auth_token = token - print(f"Using Databricks Host: {host}") + logger.info(f"Using Databricks Host: {host}") # 1. Install Gemini CLI into ~/.local/bin (same approach as Claude Code) local_bin = home / ".local" / "bin" @@ -57,20 +68,28 @@ gemini_bin = local_bin / "gemini" if not gemini_bin.exists(): - print("Installing Gemini CLI...") + logger.info("Installing Gemini CLI...") # Use --prefix ~/.local so npm installs directly into ~/.local/bin (avoids EACCES on /usr/local) npm_prefix = str(home / ".local") result = subprocess.run( - ["npm", "install", "-g", f"--prefix={npm_prefix}", "@google/gemini-cli@nightly"], - capture_output=True, text=True, - env={**os.environ, "HOME": str(home)} + [ + "npm", + "install", + "-g", + f"--prefix={npm_prefix}", + "@google/gemini-cli@nightly", + ], + capture_output=True, + text=True, + env={**os.environ, "HOME": str(home)}, ) if result.returncode == 0: - print(f"Gemini CLI installed to {gemini_bin}") + logger.info(f"Gemini CLI installed to {gemini_bin}") else: - print(f"Gemini CLI install warning: {result.stderr}") + logger.error(f"Gemini CLI install failed: {result.stderr}") + raise SystemExit(1) else: - print(f"Gemini CLI already installed at {gemini_bin}") + logger.info(f"Gemini CLI already installed at {gemini_bin}") # 2. Create ~/.gemini directory and configure environment gemini_dir = home / ".gemini" @@ -89,20 +108,18 @@ env_path = gemini_dir / ".env" env_path.write_text(env_content) env_path.chmod(0o600) -print(f"Gemini CLI env configured: {env_path}") +logger.info(f"Gemini CLI env configured: {env_path}") # 3. Write settings.json with model preferences and auth settings = { "theme": "Default", "selectedAuthType": "gemini-api-key", - "model": { - "name": gemini_model - } + "model": {"name": gemini_model}, } settings_path = gemini_dir / "settings.json" settings_path.write_text(json.dumps(settings, indent=2)) -print(f"Gemini CLI settings configured: {settings_path}") +logger.info(f"Gemini CLI settings configured: {settings_path}") # 4. Copy Claude skills into .gemini/skills for shared reference claude_skills_dir = home / ".claude" / "skills" @@ -111,15 +128,15 @@ if gemini_skills_dir.exists(): shutil.rmtree(gemini_skills_dir) shutil.copytree(claude_skills_dir, gemini_skills_dir) - print(f"Skills copied: {claude_skills_dir} -> {gemini_skills_dir}") + logger.info(f"Skills copied: {claude_skills_dir} -> {gemini_skills_dir}") else: - print(f"No Claude skills found at {claude_skills_dir}, skipping copy") + logger.info(f"No Claude skills found at {claude_skills_dir}, skipping copy") # 5. Adapt CLAUDE.md to GEMINI.md for Gemini CLI # Look for CLAUDE.md in common locations claude_md_locations = [ Path(__file__).parent / "CLAUDE.md", # Same directory as setup script - home / ".claude" / "CLAUDE.md", # User's Claude config + home / ".claude" / "CLAUDE.md", # User's Claude config Path("/app/python/source_code/CLAUDE.md"), # Databricks App location ] @@ -137,7 +154,7 @@ cli_name="Gemini", ) -print("\nGemini CLI ready! Usage:") -print(" gemini # Start Gemini CLI") -print(f"\nEndpoint: {gemini_base_url}") -print("Auth: Bearer token (Databricks PAT)") +logger.info("Gemini CLI ready! Usage:") +logger.info(" gemini # Start Gemini CLI") +logger.info(f"Endpoint: {gemini_base_url}") +logger.info("Auth: Bearer token (Databricks PAT)") diff --git a/setup_opencode.py b/setup_opencode.py index 5e46078..4e4196d 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -1,11 +1,22 @@ #!/usr/bin/env python -"""Configure OpenCode CLI with Databricks Model Serving as an OpenAI-compatible provider.""" +"""Configure OpenCode CLI with native Databricks provider from fork. + +Installs from https://github.com/dgokeeffe/opencode (feat/databricks-ai-sdk-provider branch) +which has built-in Databricks model serving support via @databricks/ai-sdk-provider. +The native provider auto-discovers models from serving endpoints and handles auth +through the full Databricks SDK credential chain (PAT, OAuth M2M, CLI, Azure, GCP). +""" + +import logging import os import json import subprocess +import platform from pathlib import Path -from utils import ensure_https +from utils import ensure_https, resolve_databricks_host_and_token + +logger = logging.getLogger(__name__) # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -13,223 +24,206 @@ home = Path(os.environ["HOME"]) -host = os.environ.get("DATABRICKS_HOST", "") -token = os.environ.get("DATABRICKS_TOKEN", "") +host, token = resolve_databricks_host_and_token() anthropic_model = os.environ.get("ANTHROPIC_MODEL", "databricks-claude-sonnet-4-6") if not host or not token: - print("Warning: DATABRICKS_HOST or DATABRICKS_TOKEN not set, skipping OpenCode config") - exit(0) + logger.error( + "DATABRICKS_HOST or auth token not available, cannot configure OpenCode" + ) + raise SystemExit(1) # Strip trailing slash and ensure https:// prefix host = ensure_https(host.rstrip("/")) -# Use DATABRICKS_GATEWAY_HOST if available (new AI Gateway), otherwise fall back to current gateway (DATABRICKS_HOST) -gateway_host = ensure_https(os.environ.get("DATABRICKS_GATEWAY_HOST", "").rstrip("/")) -gateway_token = os.environ.get("DATABRICKS_TOKEN", "") if gateway_host else "" -if gateway_host and not gateway_token: - print("Warning: DATABRICKS_GATEWAY_HOST set but DATABRICKS_TOKEN missing, falling back to DATABRICKS_HOST") - gateway_host = "" - -if gateway_host: - print(f"Using Databricks AI Gateway: {gateway_host}") -else: - print(f"Using Databricks Host: {host}") +FORK_REPO = "https://github.com/dgokeeffe/opencode.git" +FORK_BRANCH = "feat/databricks-ai-sdk-provider" -# 1. Install OpenCode CLI into ~/.local/bin (same approach as Claude Code) +# 1. Install OpenCode CLI from fork local_bin = home / ".local" / "bin" local_bin.mkdir(parents=True, exist_ok=True) opencode_bin = local_bin / "opencode" if not opencode_bin.exists(): - print("Installing OpenCode CLI...") - # Use --prefix ~/.local so npm installs directly into ~/.local/bin (avoids EACCES on /usr/local) + logger.info("Installing OpenCode CLI from Databricks fork...") npm_prefix = str(home / ".local") + build_dir = home / ".cache" / "opencode-build" + env = {**os.environ, "HOME": str(home)} + + # Step 1: Install bun via npm + logger.info(" Installing bun...") result = subprocess.run( - ["npm", "install", "-g", f"--prefix={npm_prefix}", "opencode-ai@latest"], - capture_output=True, text=True, - env={**os.environ, "HOME": str(home)} + ["npm", "install", "-g", f"--prefix={npm_prefix}", "bun"], + capture_output=True, + text=True, + env=env, ) - if result.returncode == 0: - print(f"OpenCode CLI installed to {opencode_bin}") - else: - print(f"OpenCode install warning: {result.stderr}") -else: - print(f"OpenCode CLI already installed at {opencode_bin}") + if result.returncode != 0: + logger.error(f" bun install failed: {result.stderr}") + raise SystemExit(1) + + bun_bin = local_bin / "bun" + if not bun_bin.exists(): + # bun might be in a different location + bun_candidates = list((home / ".local" / "lib").rglob("bun")) + if bun_candidates: + bun_bin = bun_candidates[0] + else: + logger.error(" bun binary not found after install") + raise SystemExit(1) + logger.info(f" bun installed: {bun_bin}") + + # Step 2: Clone the fork + logger.info(f" Cloning {FORK_REPO} ({FORK_BRANCH})...") + if build_dir.exists(): + subprocess.run(["rm", "-rf", str(build_dir)], check=True) + result = subprocess.run( + [ + "git", + "clone", + "--depth=1", + f"--branch={FORK_BRANCH}", + FORK_REPO, + str(build_dir), + ], + capture_output=True, + text=True, + env=env, + ) + if result.returncode != 0: + logger.error(f" git clone failed: {result.stderr}") + raise SystemExit(1) + + # Step 3: Install dependencies + logger.info(" Installing dependencies (bun install)...") + # Ensure bun's directory is on PATH for child processes + bun_dir = str(bun_bin.parent) + install_env = {**env, "PATH": f"{bun_dir}:{env.get('PATH', '')}"} + result = subprocess.run( + [str(bun_bin), "install"], + capture_output=True, + text=True, + cwd=str(build_dir), + env=install_env, + ) + if result.returncode != 0: + logger.error(f" bun install failed: {result.stderr}") + raise SystemExit(1) + + # Step 4: Build for current platform only + logger.info(" Building OpenCode (single platform)...") + pkg_dir = build_dir / "packages" / "opencode" + # Ensure bun's directory is on PATH so child processes can find it + bun_dir = str(bun_bin.parent) + build_env = {**env, "PATH": f"{bun_dir}:{env.get('PATH', '')}"} + result = subprocess.run( + [str(bun_bin), "run", "build", "--", "--single"], + capture_output=True, + text=True, + cwd=str(pkg_dir), + env=build_env, + timeout=180, + ) + if result.returncode != 0: + logger.error(f" Build failed: {result.stderr}") + logger.error(f" Build stdout: {result.stdout}") + raise SystemExit(1) + + # Step 5: Find and copy the built binary + # Build output: dist/@opencode-ai/script-{os}-{arch}/bin/opencode + os_name = "linux" if platform.system() == "Linux" else "darwin" + arch_name = "arm64" if platform.machine() in ("aarch64", "arm64") else "x64" + dist_dir = pkg_dir / "dist" + + # Find the binary - try exact match first, then glob + expected_bin = ( + dist_dir / f"@opencode-ai/script-{os_name}-{arch_name}" / "bin" / "opencode" + ) + if not expected_bin.exists(): + # Try to find any built binary + candidates = list(dist_dir.rglob("bin/opencode")) + if candidates: + expected_bin = candidates[0] + else: + logger.error(f" built binary not found in {dist_dir}") + logger.error( + f" Contents: {list(dist_dir.iterdir()) if dist_dir.exists() else 'dist dir missing'}" + ) + raise SystemExit(1) + + # Copy binary to ~/.local/bin + import shutil + + # Install real binary as _opencode_real, create wrapper to strip OAuth vars + opencode_real = local_bin / "_opencode_real" + shutil.copy2(str(expected_bin), str(opencode_real)) + opencode_real.chmod(0o755) + + # Write wrapper that strips OAuth M2M vars before exec'ing the real binary. + # Databricks Apps injects both PAT and OAuth M2M env vars, causing the + # Databricks SDK to reject with "more than one authorization method". + opencode_bin.write_text( + "#!/bin/sh\n" + "cd \"$HOME\" 2>/dev/null || true\n" + "unset DATABRICKS_CLIENT_ID DATABRICKS_CLIENT_SECRET\n" + 'exec "$(dirname "$0")/_opencode_real" "$@"\n' + ) + opencode_bin.chmod(0o755) + logger.info(f" OpenCode CLI installed to {opencode_bin} (wrapper + _opencode_real)") -# 2. Write global opencode.json config -# OpenCode looks for config at ~/.config/opencode/opencode.json (global) -# and ./opencode.json (project-level) + # Clean up build directory to save space + logger.info(" Cleaning up build directory...") + subprocess.run(["rm", "-rf", str(build_dir)], check=True) +else: + logger.info(f"OpenCode CLI already installed at {opencode_bin}") + # Ensure wrapper exists even if binary was cached from previous deploy + opencode_real = local_bin / "_opencode_real" + if not opencode_real.exists() and opencode_bin.exists(): + # Binary exists but no wrapper — convert to wrapper pattern + import shutil as _shutil + + _shutil.move(str(opencode_bin), str(opencode_real)) + opencode_bin.write_text( + "#!/bin/sh\n" + "cd \"$HOME\" 2>/dev/null || true\n" + "unset DATABRICKS_CLIENT_ID DATABRICKS_CLIENT_SECRET\n" + 'exec "$(dirname "$0")/_opencode_real" "$@"\n' + ) + opencode_bin.chmod(0o755) + logger.info(f" Converted to wrapper pattern: {opencode_bin}") + +# 2. Write opencode.json config with MCP servers +# The fork's native Databricks provider auto-discovers models from serving endpoints +# and handles auth via DATABRICKS_TOKEN env var / ~/.databrickscfg / SDK credential chain. opencode_config_dir = home / ".config" / "opencode" opencode_config_dir.mkdir(parents=True, exist_ok=True) -if gateway_host: - # Gateway mode: separate providers for different API protocols - # SDK auto-appends /chat/completions and /responses to baseURL - # - Anthropic/Gemini models: baseURL={gateway}/mlflow/v1 → /mlflow/v1/chat/completions - # - OpenAI/GPT models: baseURL={gateway}/openai/v1 → /openai/v1/responses - opencode_config = { - "$schema": "https://opencode.ai/config.json", - "provider": { - "databricks": { - "npm": "@ai-sdk/openai-compatible", - "name": "Databricks AI Gateway (MLflow)", - "options": { - "baseURL": f"{gateway_host}/mlflow/v1", - "apiKey": "{env:DATABRICKS_TOKEN}" - }, - "models": { - "databricks-claude-opus-4-6": { - "name": "Claude Opus 4.6 (Databricks)", - "limit": { - "context": 200000, - "output": 16384 - } - }, - "databricks-claude-sonnet-4-6": { - "name": "Claude Sonnet 4.6 (Databricks)", - "limit": { - "context": 200000, - "output": 8192 - } - }, - "databricks-gemini-2-5-flash": { - "name": "Gemini 2.5 Flash (Databricks)", - "limit": { - "context": 1000000, - "output": 8192 - } - }, - "databricks-gemini-2-5-pro": { - "name": "Gemini 2.5 Pro (Databricks)", - "limit": { - "context": 1000000, - "output": 8192 - } - }, - "databricks-gemini-3-1-pro": { - "name": "Gemini 3.1 Pro (Databricks)", - "limit": { - "context": 1000000, - "output": 8192 - } - }, - } - }, - "databricks-openai": { - "npm": "@ai-sdk/openai-compatible", - "name": "Databricks AI Gateway (OpenAI)", - "options": { - "baseURL": f"{gateway_host}/openai/v1", - "apiKey": "{env:DATABRICKS_TOKEN}" - }, - "models": { - "databricks-gpt-5-2-codex": { - "name": "GPT 5.2 Codex (Databricks)", - "limit": { - "context": 200000, - "output": 16384 - } - }, - "databricks-gpt-5-1-codex-max": { - "name": "GPT 5.1 Codex Max (Databricks)", - "limit": { - "context": 200000, - "output": 16384 - } - } - } - } +opencode_config = { + "$schema": "https://opencode.ai/config.json", + "enabled_providers": ["databricks"], + "model": f"databricks/{anthropic_model}", + "mcp": { + "deepwiki": { + "type": "remote", + "url": "https://mcp.deepwiki.com/mcp", + "enabled": True, }, - "model": f"databricks/{anthropic_model}" - } -else: - # Fallback: current gateway using DATABRICKS_HOST /serving-endpoints (OpenAI-compatible) - opencode_config = { - "$schema": "https://opencode.ai/config.json", - "provider": { - "databricks": { - "npm": "@ai-sdk/openai-compatible", - "name": "Databricks Model Serving", - "options": { - "baseURL": f"{host}/serving-endpoints", - "apiKey": "{env:DATABRICKS_TOKEN}" - }, - "models": { - "databricks-claude-opus-4-6": { - "name": "Claude Opus 4.6 (Databricks)", - "limit": { - "context": 200000, - "output": 16384 - } - }, - "databricks-claude-sonnet-4-6": { - "name": "Claude Sonnet 4.6 (Databricks)", - "limit": { - "context": 200000, - "output": 8192 - } - }, - "databricks-gemini-2-5-flash": { - "name": "Gemini 2.5 Flash (Databricks)", - "limit": { - "context": 1000000, - "output": 8192 - } - }, - "databricks-gemini-2-5-pro": { - "name": "Gemini 2.5 Pro (Databricks)", - "limit": { - "context": 1000000, - "output": 8192 - } - }, - "databricks-gemini-3-1-pro": { - "name": "Gemini 3.1 Pro (Databricks)", - "limit": { - "context": 1000000, - "output": 8192 - } - }, - } - } + "exa": { + "type": "remote", + "url": "https://mcp.exa.ai/mcp", + "enabled": True, }, - "model": f"databricks/{anthropic_model}" - } + }, +} config_path = opencode_config_dir / "opencode.json" config_path.write_text(json.dumps(opencode_config, indent=2)) -print(f"OpenCode configured: {config_path}") - -# 3. Also create auth credentials for the databricks provider(s) -# OpenCode stores credentials at ~/.local/share/opencode/auth.json -opencode_data_dir = home / ".local" / "share" / "opencode" -opencode_data_dir.mkdir(parents=True, exist_ok=True) - -if gateway_host: - auth_data = { - "databricks": { - "api_key": gateway_token - }, - "databricks-openai": { - "api_key": gateway_token - } - } -else: - auth_data = { - "databricks": { - "api_key": token - } - } - -auth_path = opencode_data_dir / "auth.json" -auth_path.write_text(json.dumps(auth_data, indent=2)) -auth_path.chmod(0o600) -print(f"OpenCode auth configured: {auth_path}") - -print(f"\nOpenCode ready! Default model: {anthropic_model}") -print(" opencode # Start OpenCode TUI") -if gateway_host: - print(" opencode -m databricks-openai/databricks-gpt-5-2-codex # Use GPT 5.2 Codex") -print(" opencode -m databricks/databricks-gemini-2-5-flash # Use Gemini") -print(f" opencode -m databricks/{anthropic_model} # Use Claude (default)") +logger.info(f"OpenCode configured: {config_path}") +logger.info(" Provider: databricks (native, auto-discovers models)") +logger.info(f" Default model: databricks/{anthropic_model}") +logger.info(" MCP servers: deepwiki, exa") + +logger.info(f"OpenCode ready! Default model: {anthropic_model}") +logger.info(" opencode # Start OpenCode TUI") +logger.info(" opencode -m databricks/ # Use a specific model") +logger.info(" (Models auto-discovered from serving endpoints)") diff --git a/state_sync.py b/state_sync.py new file mode 100644 index 0000000..deefa2d --- /dev/null +++ b/state_sync.py @@ -0,0 +1,199 @@ +"""Bidirectional state sync between container and Databricks Workspace. + +Persists Claude Code auto-memory, shell history, and other state files +to /Workspace/Users/{email}/.state/ so they survive container restarts. +""" + +import os +import base64 +import time +import threading +import logging +from pathlib import Path +from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError + +logger = logging.getLogger(__name__) + +# Timeout for individual Workspace API calls (seconds) +WORKSPACE_API_TIMEOUT = 30 + +# Max file size to sync (bytes) - prevents syncing huge files +MAX_SYNC_FILE_SIZE = 10 * 1024 * 1024 # 10MB + +# Directories/files to sync (relative to HOME) +STATE_ITEMS = [ + # Claude Code auto-memory (glob pattern) + ".claude/projects/*/memory", + # Shell history + ".bash_history", +] + +# Workspace destination prefix (under user's home) +WORKSPACE_STATE_PREFIX = ".state" + + +def _get_home(): + home = os.environ.get("HOME", "/app/python/source_code") + return home if home and home != "/" else "/app/python/source_code" + + +def _get_workspace_client(): + from databricks.sdk import WorkspaceClient + + return WorkspaceClient() + + +def _get_user_email(w): + return w.current_user.me().user_name + + +def _workspace_base(user_email): + return f"/Workspace/Users/{user_email}/{WORKSPACE_STATE_PREFIX}" + + +def _collect_files(home): + """Collect all files matching STATE_ITEMS patterns.""" + home_path = Path(home) + files = [] + for pattern in STATE_ITEMS: + if "*" in pattern: + # Glob pattern — find matching directories/files + for match in home_path.glob(pattern): + if match.is_dir(): + for f in match.rglob("*"): + if f.is_file(): + files.append(f) + elif match.is_file(): + files.append(match) + else: + # Exact path + p = home_path / pattern + if p.is_file(): + files.append(p) + return files + + +def save_state(): + """Upload state files to Databricks Workspace.""" + home = _get_home() + try: + w = _get_workspace_client() + user_email = _get_user_email(w) + base = _workspace_base(user_email) + + files = _collect_files(home) + if not files: + logger.info("State sync: no state files to save") + return + + saved = 0 + for file_path in files: + rel = file_path.relative_to(home) + ws_path = f"{base}/{rel}" + + # Skip files that are too large + file_size = file_path.stat().st_size + if file_size > MAX_SYNC_FILE_SIZE: + logger.warning( + f"State sync: skipping {rel} (size {file_size} exceeds {MAX_SYNC_FILE_SIZE})" + ) + continue + + try: + content = file_path.read_bytes() + w.workspace.import_( + path=ws_path, + content=base64.b64encode(content).decode(), + format="AUTO", + overwrite=True, + ) + saved += 1 + except Exception as e: + logger.warning(f"State sync: failed to save {rel}: {e}") + + logger.info(f"State saved: {saved}/{len(files)} files to {base}") + except Exception as e: + logger.warning(f"State sync save failed: {e}") + + +def restore_state(): + """Download state files from Databricks Workspace.""" + home = _get_home() + try: + w = _get_workspace_client() + user_email = _get_user_email(w) + base = _workspace_base(user_email) + + # Check if state directory exists (with timeout) + try: + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(w.workspace.get_status, base) + future.result(timeout=WORKSPACE_API_TIMEOUT) + except FuturesTimeoutError: + logger.warning(f"State sync: timeout checking {base}") + return + except Exception: + logger.info("State sync: no saved state found (first run)") + return + + restored = _restore_recursive(w, base, Path(home)) + logger.info(f"State restored: {restored} files from {base}") + except Exception as e: + logger.warning(f"State sync restore failed: {e}") + + +def _restore_recursive(w, ws_path, local_base): + """Recursively download files from a workspace directory.""" + restored = 0 + try: + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(lambda: list(w.workspace.list(ws_path))) + items = future.result(timeout=WORKSPACE_API_TIMEOUT) + except (FuturesTimeoutError, Exception): + return 0 + + for item in items: + # item.path is the full workspace path like /Workspace/Users/.../. state/... + # We need the relative part after the .state/ prefix + rel = item.path.split(f"/{WORKSPACE_STATE_PREFIX}/", 1) + if len(rel) < 2: + continue + rel_path = rel[1] + local_path = local_base / rel_path + + if item.object_type and item.object_type.value == "DIRECTORY": + restored += _restore_recursive(w, item.path, local_base) + else: + try: + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit( + w.workspace.export, path=item.path, format="AUTO" + ) + response = future.result(timeout=WORKSPACE_API_TIMEOUT) + if response.content: + local_path.parent.mkdir(parents=True, exist_ok=True) + local_path.write_bytes(base64.b64decode(response.content)) + restored += 1 + except FuturesTimeoutError: + logger.warning(f"State sync: timeout restoring {rel_path}") + except Exception as e: + logger.warning(f"State sync: failed to restore {rel_path}: {e}") + + return restored + + +def start_periodic_sync(interval=300): + """Start a background thread that saves state every `interval` seconds.""" + + def _sync_loop(): + while True: + time.sleep(interval) + try: + save_state() + except Exception as e: + logger.warning(f"Periodic state sync error: {e}") + + thread = threading.Thread(target=_sync_loop, daemon=True, name="state-sync") + thread.start() + logger.info(f"Started periodic state sync (every {interval}s)") + return thread diff --git a/static/index.html b/static/index.html index bac7110..4761fdb 100644 --- a/static/index.html +++ b/static/index.html @@ -1,6 +1,8 @@ - + + + Terminal @@ -8,933 +10,1224 @@ -
Loading...
-
- ◀ -
-
-
-
Theme
- - -
-
Font
- -
- - - -
-
-
- - - - - -
-
+
+ Layout: + + + + + + + + + +
+ Ctrl+Shift+N: cycle focus
+ +
+
+
+
Loading...
- -
- - - - -
- - -
-
- - Voice Dictation - ⌥V -
- -
-
- - - Enter to send · Esc to cancel -
-
- -
- +