From c0620bcd681611a37848c839f881eb8da1ed1e40 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 12:49:41 -0400 Subject: [PATCH 01/21] fix: add LiteLLM local proxy to sanitize empty content blocks for OpenCode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenCode intermittently sends empty text content blocks in messages, which Databricks Foundation Model API strictly rejects with "text content blocks must be non-empty" (OpenCode #5028). This adds a LiteLLM proxy running on localhost:4000 inside the container that strips these blocks before they reach the API. Simpler alternative to PR #52's fork approach — no fork maintenance, proven fix via LiteLLM PR #20384, preserves full AI Gateway/MLflow/UC governance. Changes: - setup_litellm.py: new setup script, starts LiteLLM proxy with health check - setup_opencode.py: route baseURL through localhost:4000 instead of direct - app.py: add litellm setup step (sequential, before parallel agent setup) - requirements.txt: add litellm>=1.60 - docs/plans: design document with analysis of PR #52 trade-offs Co-Authored-By: Claude Opus 4.6 (1M context) --- app.py | 6 + ...-11-litellm-empty-content-blocks-design.md | 146 ++++++++++++++++ requirements.txt | 1 + setup_litellm.py | 165 ++++++++++++++++++ setup_opencode.py | 30 ++-- 5 files changed, 338 insertions(+), 10 deletions(-) create mode 100644 docs/plans/2026-03-11-litellm-empty-content-blocks-design.md create mode 100644 setup_litellm.py diff --git a/app.py b/app.py index a7188a4..0b40547 100644 --- a/app.py +++ b/app.py @@ -83,6 +83,7 @@ def handle_sigterm(signum, frame): "steps": [ {"id": "git", "label": "Configuring git identity", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "micro", "label": "Installing micro editor", "status": "pending", "started_at": None, "completed_at": None, "error": None}, + {"id": "litellm", "label": "Starting LiteLLM proxy", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "claude", "label": "Configuring Claude CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "codex", "label": "Configuring Codex CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "opencode", "label": "Configuring OpenCode CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, @@ -251,6 +252,11 @@ def run_setup(): _run_step("micro", ["bash", "-c", "mkdir -p ~/.local/bin && bash install_micro.sh && mv micro ~/.local/bin/ 2>/dev/null || true"]) + # --- LiteLLM proxy (must be running before OpenCode starts) --- + # Sanitizes empty text content blocks that cause "Bad Request" errors + # with Databricks Foundation Model API (see OpenCode #5028) + _run_step("litellm", ["python", "setup_litellm.py"]) + # --- Parallel agent setup (all independent of each other) --- parallel_steps = [ ("claude", ["python", "setup_claude.py"]), diff --git a/docs/plans/2026-03-11-litellm-empty-content-blocks-design.md b/docs/plans/2026-03-11-litellm-empty-content-blocks-design.md new file mode 100644 index 0000000..75c775a --- /dev/null +++ b/docs/plans/2026-03-11-litellm-empty-content-blocks-design.md @@ -0,0 +1,146 @@ +# Design: LiteLLM Local Proxy for Empty Content Block Sanitization + +**Date:** 2026-03-11 +**Branch:** `fix/litellm-empty-content-blocks` +**Related:** OpenCode [#5028](https://github.com/sst/opencode/issues/5028), LiteLLM [PR #20384](https://github.com/BerriAI/litellm/pull/20384) + +## Problem + +OpenCode intermittently sends malformed messages containing empty text content blocks +(`{"type": "text", "text": ""}`) to the Databricks Foundation Model API. This occurs during: + +1. **Streaming** — empty text blocks appear between thinking blocks in conversation history +2. **Compaction** — `/compact` command produces empty or whitespace-only blocks +3. **Model switching** — switching between models (e.g., Gemini to Claude) generates whitespace-only chunks + +The Databricks Foundation Model API strictly rejects these with: +``` +Bad Request: {"message":"messages: text content blocks must be non-empty"} +``` + +Once a corrupted message enters the conversation history, **every subsequent request fails** — +the session is permanently bricked. This is OpenCode issue +[#5028](https://github.com/sst/opencode/issues/5028), still open as of March 2026. + +## Why Not PR #52's Approach + +[PR #52](https://github.com/datasciencemonkey/coding-agents-databricks-apps/pull/52) proposes +forking OpenCode (`dgokeeffe/opencode`) to add a native Databricks provider. After analysis: + +1. **Does not fix the root cause** — The fork's `feat/databricks-ai-sdk-provider` branch + has no commits that sanitize empty content blocks. The bug originates in OpenCode's core + agent loop (conversation history management), not the provider layer. A native provider + sends whatever the core gives it. + +2. **Fork maintenance burden** — Must track upstream OpenCode releases indefinitely. + When upstream fixes #5028, the fork may conflict. + +3. **Scope creep** — PR #52 bundles the fork with a spawner app, GitHub CLI setup, + and performance fixes. These are independent concerns that should be separate PRs. + +4. **Fragile coupling** — Tightly couples our project to a fork that may diverge from + upstream, creating long-term maintenance risk for a demo/tool project. + +### What to cherry-pick from PR #52 (separately) + +PR #52 contains valuable changes that are **independent of the fork** and should be +extracted into their own PRs: + +- **Performance fixes** — `select()` timeout reduction (500ms → 50ms), lock contention + fixes in `get_output_batch()` and `cleanup_stale_sessions()`, poll-worker interval + reduction (100ms → 50ms). These are changes to `app.py` and `static/poll-worker.js`. + +- **WebSocket detection fix** — Correct Socket.IO transport detection that checks + `socket.io.engine.transport.name` instead of trusting `connected=true`. This is a + change to `static/index.html`. + +- **GitHub CLI setup** — Automated `gh` install with xterm.js-safe auth wrapper. + Standalone setup script. + +These should be reviewed and merged independently — they don't require the OpenCode fork. + +## Our Approach: LiteLLM Local Proxy + +Run a lightweight LiteLLM instance **inside the same container** on an internal port. +It intercepts requests from OpenCode, strips empty content blocks via the sanitization +logic added in [LiteLLM PR #20384](https://github.com/BerriAI/litellm/pull/20384), +and forwards clean messages to Databricks AI Gateway. + +### Architecture + +``` +Users → port 8000 (Flask/xterm.js UI) + ↓ spawns PTY + OpenCode → localhost:4000 (LiteLLM) → Databricks AI Gateway → Claude/Gemini +``` + +- **Port 8000** — Flask/Gunicorn (exposed to users via Databricks Apps) +- **Port 4000** — LiteLLM proxy (internal only, never exposed externally) +- Databricks Apps only routes external traffic to port 8000 + +### What LiteLLM Does + +For every outbound request, LiteLLM strips: +- Empty text blocks: `{"type": "text", "text": ""}` +- Whitespace-only text blocks: `{"type": "text", "text": " "}` + +This happens on **every request**, so even if OpenCode's conversation history is corrupted, +LiteLLM cleans it before it reaches Databricks. When upstream OpenCode eventually fixes +#5028, LiteLLM becomes a no-op (nothing to strip) — it degrades gracefully. + +### Implementation Plan + +#### 1. Add `litellm` to `requirements.txt` + +``` +litellm>=1.60 +``` + +#### 2. Create `setup_litellm.py` + +New setup script that: +- Writes a LiteLLM config YAML pointing to Databricks AI Gateway +- Starts LiteLLM as a background process on `localhost:4000` +- Waits for the health endpoint to confirm it's ready +- Maps each Databricks model to the `databricks/` prefix so the sanitization path activates + +#### 3. Update `setup_opencode.py` + +Change OpenCode's `baseURL` from the Databricks Gateway URL to `http://localhost:4000` +so all requests route through LiteLLM first. The model names and auth stay the same. + +#### 4. Add `litellm` setup step to `app.py` + +Add a new step in `run_setup()` that runs **before** the parallel agent setup +(LiteLLM must be running before OpenCode starts using it): + +```python +# Sequential: LiteLLM proxy must be running before agents that use it +_run_step("litellm", ["python", "setup_litellm.py"]) + +# Then parallel agent setup... +``` + +#### 5. Health check + +`setup_litellm.py` should poll `http://localhost:4000/health` before returning success, +ensuring the proxy is ready before OpenCode sends its first request. + +### Trade-offs + +| Aspect | Impact | +|--------|--------| +| Added dependency | `litellm` package (~small footprint as proxy) | +| Added latency | Negligible — localhost hop, no network | +| Startup time | ~2-3s for LiteLLM to start (sequential, before agents) | +| Maintenance | Zero — LiteLLM is a well-maintained OSS project | +| Graceful degradation | When #5028 is fixed upstream, proxy strips nothing | +| Governance preserved | AI Gateway, MLflow tracing, Unity Catalog all intact | + +### Testing + +1. Deploy to Databricks Apps +2. Launch OpenCode with `databricks-claude-opus-4-6` +3. Run 10+ iterations including `/compact` — verify no 400 errors +4. Check MLflow traces — confirm requests still flow through AI Gateway +5. Verify LiteLLM is NOT accessible from outside the container (port 4000 not exposed) diff --git a/requirements.txt b/requirements.txt index a9c32c9..027a070 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ claude-agent-sdk databricks-sdk>=0.20.0 mlflow[genai]>=3.4 opentelemetry-exporter-otlp-proto-grpc +litellm>=1.60 diff --git a/setup_litellm.py b/setup_litellm.py new file mode 100644 index 0000000..33c98f0 --- /dev/null +++ b/setup_litellm.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +"""Start LiteLLM as a local proxy to sanitize empty content blocks before they reach Databricks. + +OpenCode occasionally produces empty text content blocks in messages, which the Databricks +Foundation Model API rejects with: "messages: text content blocks must be non-empty" +(see https://github.com/sst/opencode/issues/5028). + +LiteLLM strips these empty blocks before forwarding requests to the Databricks AI Gateway, +fixing the issue without forking OpenCode. The proxy runs on localhost:4000 (internal only, +never exposed externally). + +Related: https://github.com/BerriAI/litellm/pull/20384 +""" +import os +import sys +import json +import time +import subprocess +from pathlib import Path +from urllib.request import urlopen, Request +from urllib.error import URLError + +from utils import ensure_https + +LITELLM_PORT = 4000 +LITELLM_HOST = "127.0.0.1" +HEALTH_TIMEOUT = 30 # seconds to wait for LiteLLM to be ready +HEALTH_POLL_INTERVAL = 1 # seconds between health checks + +# Set HOME if not properly set +if not os.environ.get("HOME") or os.environ["HOME"] == "/": + os.environ["HOME"] = "/app/python/source_code" + +home = Path(os.environ["HOME"]) + +# Databricks configuration +gateway_host = ensure_https(os.environ.get("DATABRICKS_GATEWAY_HOST", "").rstrip("/")) +host = ensure_https(os.environ.get("DATABRICKS_HOST", "").rstrip("/")) +token = os.environ.get("DATABRICKS_TOKEN", "") + +if not token: + print("Warning: DATABRICKS_TOKEN not set, skipping LiteLLM proxy setup") + sys.exit(0) + +# Determine the upstream base URL (AI Gateway or direct serving endpoints) +if gateway_host: + upstream_base = f"{gateway_host}/mlflow/v1" + print(f"LiteLLM proxy will forward to AI Gateway: {gateway_host}") +else: + upstream_base = f"{host}/serving-endpoints" + print(f"LiteLLM proxy will forward to: {host}/serving-endpoints") + +# Build LiteLLM config +# Models use the databricks/ prefix so LiteLLM's sanitization logic activates +# (see https://github.com/BerriAI/litellm/pull/20384) +litellm_config = { + "model_list": [ + { + "model_name": "databricks-claude-opus-4-6", + "litellm_params": { + "model": "databricks/databricks-claude-opus-4-6", + "api_key": f"os.environ/DATABRICKS_TOKEN", + "api_base": upstream_base, + } + }, + { + "model_name": "databricks-claude-sonnet-4-6", + "litellm_params": { + "model": "databricks/databricks-claude-sonnet-4-6", + "api_key": f"os.environ/DATABRICKS_TOKEN", + "api_base": upstream_base, + } + }, + { + "model_name": "databricks-gemini-2-5-flash", + "litellm_params": { + "model": "databricks/databricks-gemini-2-5-flash", + "api_key": f"os.environ/DATABRICKS_TOKEN", + "api_base": upstream_base, + } + }, + { + "model_name": "databricks-gemini-2-5-pro", + "litellm_params": { + "model": "databricks/databricks-gemini-2-5-pro", + "api_key": f"os.environ/DATABRICKS_TOKEN", + "api_base": upstream_base, + } + }, + { + "model_name": "databricks-gemini-3-1-pro", + "litellm_params": { + "model": "databricks/databricks-gemini-3-1-pro", + "api_key": f"os.environ/DATABRICKS_TOKEN", + "api_base": upstream_base, + } + }, + ], + "general_settings": { + "master_key": None, # No auth needed for localhost-only proxy + } +} + +# Write config +config_dir = home / ".config" / "litellm" +config_dir.mkdir(parents=True, exist_ok=True) +config_path = config_dir / "config.yaml" + +# LiteLLM accepts YAML; write as JSON which is valid YAML +config_path.write_text(json.dumps(litellm_config, indent=2)) + +print(f"LiteLLM config written to {config_path}") + +# Start LiteLLM as a background process +log_path = home / ".litellm-proxy.log" +print(f"Starting LiteLLM proxy on {LITELLM_HOST}:{LITELLM_PORT}...") + +proc = subprocess.Popen( + [ + sys.executable, "-m", "litellm", + "--config", str(config_path), + "--host", LITELLM_HOST, + "--port", str(LITELLM_PORT), + "--detailed_debug", "false", + "--drop_params", # Drop unsupported params instead of erroring + ], + stdout=open(log_path, "w"), + stderr=subprocess.STDOUT, + env=os.environ.copy(), + start_new_session=True, # Detach from parent process group +) + +# Write PID file for cleanup +pid_path = home / ".litellm-proxy.pid" +pid_path.write_text(str(proc.pid)) +print(f"LiteLLM proxy started (PID: {proc.pid})") + +# Wait for health check +health_url = f"http://{LITELLM_HOST}:{LITELLM_PORT}/health" +start = time.time() +ready = False + +while time.time() - start < HEALTH_TIMEOUT: + try: + resp = urlopen(Request(health_url), timeout=2) + if resp.status == 200: + ready = True + break + except (URLError, OSError): + pass + + # Check if process died + if proc.poll() is not None: + print(f"Error: LiteLLM proxy exited with code {proc.returncode}") + print(f"Check logs at {log_path}") + sys.exit(1) + + time.sleep(HEALTH_POLL_INTERVAL) + +if ready: + elapsed = time.time() - start + print(f"LiteLLM proxy ready on {LITELLM_HOST}:{LITELLM_PORT} ({elapsed:.1f}s)") +else: + print(f"Warning: LiteLLM health check timed out after {HEALTH_TIMEOUT}s") + print(f"Proxy may still be starting — check logs at {log_path}") diff --git a/setup_opencode.py b/setup_opencode.py index 5e46078..4dc463b 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -1,5 +1,11 @@ #!/usr/bin/env python -"""Configure OpenCode CLI with Databricks Model Serving as an OpenAI-compatible provider.""" +"""Configure OpenCode CLI with Databricks Model Serving via LiteLLM local proxy. + +Routes requests through a local LiteLLM proxy (localhost:4000) which sanitizes empty +text content blocks before forwarding to Databricks AI Gateway. This fixes OpenCode +issue #5028 where empty content blocks cause "Bad Request" errors. +See docs/plans/2026-03-11-litellm-empty-content-blocks-design.md for details. +""" import os import json import subprocess @@ -7,6 +13,10 @@ from utils import ensure_https +# LiteLLM local proxy — sanitizes empty content blocks before reaching Databricks +# (see https://github.com/sst/opencode/issues/5028) +LITELLM_PROXY_URL = "http://127.0.0.1:4000" + # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": os.environ["HOME"] = "/app/python/source_code" @@ -64,18 +74,17 @@ opencode_config_dir.mkdir(parents=True, exist_ok=True) if gateway_host: - # Gateway mode: separate providers for different API protocols - # SDK auto-appends /chat/completions and /responses to baseURL - # - Anthropic/Gemini models: baseURL={gateway}/mlflow/v1 → /mlflow/v1/chat/completions - # - OpenAI/GPT models: baseURL={gateway}/openai/v1 → /openai/v1/responses + # Gateway mode: route through LiteLLM proxy for content block sanitization + # LiteLLM forwards clean requests to Databricks AI Gateway + # OpenAI/GPT models go direct (not affected by the empty content block bug) opencode_config = { "$schema": "https://opencode.ai/config.json", "provider": { "databricks": { "npm": "@ai-sdk/openai-compatible", - "name": "Databricks AI Gateway (MLflow)", + "name": "Databricks AI Gateway via LiteLLM", "options": { - "baseURL": f"{gateway_host}/mlflow/v1", + "baseURL": LITELLM_PROXY_URL, "apiKey": "{env:DATABRICKS_TOKEN}" }, "models": { @@ -144,15 +153,16 @@ "model": f"databricks/{anthropic_model}" } else: - # Fallback: current gateway using DATABRICKS_HOST /serving-endpoints (OpenAI-compatible) + # Fallback: route through LiteLLM proxy for content block sanitization + # LiteLLM forwards clean requests to Databricks serving endpoints opencode_config = { "$schema": "https://opencode.ai/config.json", "provider": { "databricks": { "npm": "@ai-sdk/openai-compatible", - "name": "Databricks Model Serving", + "name": "Databricks Model Serving via LiteLLM", "options": { - "baseURL": f"{host}/serving-endpoints", + "baseURL": LITELLM_PROXY_URL, "apiKey": "{env:DATABRICKS_TOKEN}" }, "models": { From 42a78b78a87d13642fd3f95d1ac6d454aee2df80 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 13:01:32 -0400 Subject: [PATCH 02/21] docs: update design doc with clearer architecture description Co-Authored-By: Claude Opus 4.6 (1M context) --- ...-11-litellm-empty-content-blocks-design.md | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/docs/plans/2026-03-11-litellm-empty-content-blocks-design.md b/docs/plans/2026-03-11-litellm-empty-content-blocks-design.md index 75c775a..745def2 100644 --- a/docs/plans/2026-03-11-litellm-empty-content-blocks-design.md +++ b/docs/plans/2026-03-11-litellm-empty-content-blocks-design.md @@ -68,25 +68,35 @@ and forwards clean messages to Databricks AI Gateway. ### Architecture +In the current setup, **OpenCode** talks directly to the **Databricks AI Gateway**. +Because OpenCode sends malformed "empty text blocks," the Gateway rejects them +immediately with a 400 error. + +By introducing **LiteLLM**, we change the traffic flow inside the container: + ``` Users → port 8000 (Flask/xterm.js UI) ↓ spawns PTY OpenCode → localhost:4000 (LiteLLM) → Databricks AI Gateway → Claude/Gemini ``` +1. **OpenCode** (the agent) sends the request to `http://localhost:4000` (the **LiteLLM Proxy**). +2. **LiteLLM** intercepts the request *before* it leaves the container. +3. **LiteLLM** applies the sanitization logic (stripping the `{"type": "text", "text": ""}` blocks). +4. **LiteLLM** then forwards the "cleaned" request to the **Databricks AI Gateway**. +5. **Databricks** receives a perfectly valid request and processes it. + +So, while the traffic eventually reaches Databricks, it is "washed" by LiteLLM locally +first. This ensures that the Databricks Gateway never sees the malformed data that causes +it to throw an error. + - **Port 8000** — Flask/Gunicorn (exposed to users via Databricks Apps) - **Port 4000** — LiteLLM proxy (internal only, never exposed externally) - Databricks Apps only routes external traffic to port 8000 -### What LiteLLM Does - -For every outbound request, LiteLLM strips: -- Empty text blocks: `{"type": "text", "text": ""}` -- Whitespace-only text blocks: `{"type": "text", "text": " "}` - -This happens on **every request**, so even if OpenCode's conversation history is corrupted, -LiteLLM cleans it before it reaches Databricks. When upstream OpenCode eventually fixes -#5028, LiteLLM becomes a no-op (nothing to strip) — it degrades gracefully. +When upstream OpenCode eventually fixes #5028, LiteLLM becomes a no-op (nothing to +strip) — it degrades gracefully. At that point, remove `setup_litellm.py`, revert the +baseURL in `setup_opencode.py`, and drop the dependency. ### Implementation Plan From cc7fb63e82a035728cd1415180b2c87d7eef35fd Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 13:17:39 -0400 Subject: [PATCH 03/21] fix: remove invalid CLI flags from LiteLLM startup --detailed_debug is a boolean flag (not key-value) and --drop_params is a config setting, not a CLI arg. Invalid args were causing LiteLLM to fail to start. Moved drop_params into the YAML config under litellm_settings. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_litellm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup_litellm.py b/setup_litellm.py index 33c98f0..3ba231a 100644 --- a/setup_litellm.py +++ b/setup_litellm.py @@ -96,6 +96,9 @@ } }, ], + "litellm_settings": { + "drop_params": True, # Drop unsupported params instead of erroring + }, "general_settings": { "master_key": None, # No auth needed for localhost-only proxy } @@ -121,8 +124,6 @@ "--config", str(config_path), "--host", LITELLM_HOST, "--port", str(LITELLM_PORT), - "--detailed_debug", "false", - "--drop_params", # Drop unsupported params instead of erroring ], stdout=open(log_path, "w"), stderr=subprocess.STDOUT, From ba49d303cf283343a36a2c44ecdbd7ce5da0d196 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 13:23:18 -0400 Subject: [PATCH 04/21] fix: replace LiteLLM with lightweight custom proxy LiteLLM proxy requires litellm[proxy] (fastapi, uvicorn, etc.) which was failing to start in the container. Replaced with a minimal ~80 line HTTP proxy using stdlib http.server + requests (already installed). Same sanitization logic: strips empty/whitespace-only text content blocks from messages before forwarding to Databricks. Zero new dependencies. Co-Authored-By: Claude Opus 4.6 (1M context) --- requirements.txt | 2 +- setup_litellm.py | 253 +++++++++++++++++++++++++++++------------------ 2 files changed, 159 insertions(+), 96 deletions(-) diff --git a/requirements.txt b/requirements.txt index 027a070..f175656 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ claude-agent-sdk databricks-sdk>=0.20.0 mlflow[genai]>=3.4 opentelemetry-exporter-otlp-proto-grpc -litellm>=1.60 +requests>=2.28 diff --git a/setup_litellm.py b/setup_litellm.py index 3ba231a..03fe705 100644 --- a/setup_litellm.py +++ b/setup_litellm.py @@ -1,15 +1,14 @@ #!/usr/bin/env python -"""Start LiteLLM as a local proxy to sanitize empty content blocks before they reach Databricks. +"""Start a lightweight local proxy to sanitize empty content blocks before they reach Databricks. OpenCode occasionally produces empty text content blocks in messages, which the Databricks Foundation Model API rejects with: "messages: text content blocks must be non-empty" (see https://github.com/sst/opencode/issues/5028). -LiteLLM strips these empty blocks before forwarding requests to the Databricks AI Gateway, -fixing the issue without forking OpenCode. The proxy runs on localhost:4000 (internal only, -never exposed externally). +This proxy strips empty/whitespace-only text blocks before forwarding requests to the +Databricks AI Gateway. Runs on localhost:4000 (internal only, never exposed externally). -Related: https://github.com/BerriAI/litellm/pull/20384 +No external dependencies — uses stdlib + requests (already installed via databricks-sdk). """ import os import sys @@ -22,10 +21,10 @@ from utils import ensure_https -LITELLM_PORT = 4000 -LITELLM_HOST = "127.0.0.1" -HEALTH_TIMEOUT = 30 # seconds to wait for LiteLLM to be ready -HEALTH_POLL_INTERVAL = 1 # seconds between health checks +PROXY_PORT = 4000 +PROXY_HOST = "127.0.0.1" +HEALTH_TIMEOUT = 15 # seconds to wait for proxy to be ready +HEALTH_POLL_INTERVAL = 0.5 # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -39,92 +38,150 @@ token = os.environ.get("DATABRICKS_TOKEN", "") if not token: - print("Warning: DATABRICKS_TOKEN not set, skipping LiteLLM proxy setup") + print("Warning: DATABRICKS_TOKEN not set, skipping proxy setup") sys.exit(0) -# Determine the upstream base URL (AI Gateway or direct serving endpoints) +# Determine the upstream base URL if gateway_host: upstream_base = f"{gateway_host}/mlflow/v1" - print(f"LiteLLM proxy will forward to AI Gateway: {gateway_host}") + print(f"Content-filter proxy will forward to AI Gateway: {gateway_host}") else: upstream_base = f"{host}/serving-endpoints" - print(f"LiteLLM proxy will forward to: {host}/serving-endpoints") - -# Build LiteLLM config -# Models use the databricks/ prefix so LiteLLM's sanitization logic activates -# (see https://github.com/BerriAI/litellm/pull/20384) -litellm_config = { - "model_list": [ - { - "model_name": "databricks-claude-opus-4-6", - "litellm_params": { - "model": "databricks/databricks-claude-opus-4-6", - "api_key": f"os.environ/DATABRICKS_TOKEN", - "api_base": upstream_base, - } - }, - { - "model_name": "databricks-claude-sonnet-4-6", - "litellm_params": { - "model": "databricks/databricks-claude-sonnet-4-6", - "api_key": f"os.environ/DATABRICKS_TOKEN", - "api_base": upstream_base, - } - }, - { - "model_name": "databricks-gemini-2-5-flash", - "litellm_params": { - "model": "databricks/databricks-gemini-2-5-flash", - "api_key": f"os.environ/DATABRICKS_TOKEN", - "api_base": upstream_base, - } - }, - { - "model_name": "databricks-gemini-2-5-pro", - "litellm_params": { - "model": "databricks/databricks-gemini-2-5-pro", - "api_key": f"os.environ/DATABRICKS_TOKEN", - "api_base": upstream_base, - } - }, - { - "model_name": "databricks-gemini-3-1-pro", - "litellm_params": { - "model": "databricks/databricks-gemini-3-1-pro", - "api_key": f"os.environ/DATABRICKS_TOKEN", - "api_base": upstream_base, - } - }, - ], - "litellm_settings": { - "drop_params": True, # Drop unsupported params instead of erroring - }, - "general_settings": { - "master_key": None, # No auth needed for localhost-only proxy - } -} - -# Write config -config_dir = home / ".config" / "litellm" -config_dir.mkdir(parents=True, exist_ok=True) -config_path = config_dir / "config.yaml" - -# LiteLLM accepts YAML; write as JSON which is valid YAML -config_path.write_text(json.dumps(litellm_config, indent=2)) - -print(f"LiteLLM config written to {config_path}") - -# Start LiteLLM as a background process -log_path = home / ".litellm-proxy.log" -print(f"Starting LiteLLM proxy on {LITELLM_HOST}:{LITELLM_PORT}...") + print(f"Content-filter proxy will forward to: {host}/serving-endpoints") + +# Write the proxy server script +proxy_script = home / ".content-filter-proxy.py" +proxy_script.write_text(f'''#!/usr/bin/env python +"""Minimal HTTP proxy that strips empty text content blocks from OpenAI-compatible API requests.""" +import json +import sys +from http.server import HTTPServer, BaseHTTPRequestHandler +import requests + +UPSTREAM_BASE = "{upstream_base}" +LISTEN_HOST = "{PROXY_HOST}" +LISTEN_PORT = {PROXY_PORT} + + +def sanitize_messages(messages): + """Strip empty/whitespace-only text content blocks from messages.""" + if not isinstance(messages, list): + return messages + cleaned = [] + for msg in messages: + content = msg.get("content") + if isinstance(content, list): + filtered = [ + block for block in content + if not ( + isinstance(block, dict) + and block.get("type") == "text" + and block.get("text", "").strip() == "" + ) + ] + # If all content blocks were empty, keep the message but with empty list + # (let the API decide how to handle it) + msg = {{**msg, "content": filtered if filtered else content[:0]}} + elif isinstance(content, str) and content.strip() == "": + # Skip messages with empty string content + continue + cleaned.append(msg) + return cleaned + + +class ProxyHandler(BaseHTTPRequestHandler): + def do_POST(self): + # Read request body + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length) + + # Parse and sanitize + try: + data = json.loads(body) + if "messages" in data: + data["messages"] = sanitize_messages(data["messages"]) + body = json.dumps(data).encode() + except (json.JSONDecodeError, KeyError): + pass # Forward as-is if not JSON + + # Build upstream URL + upstream_url = UPSTREAM_BASE + self.path + + # Forward headers (pass through auth, content-type, etc.) + headers = {{}} + for key in self.headers: + if key.lower() not in ("host", "content-length", "transfer-encoding"): + headers[key] = self.headers[key] + headers["Content-Length"] = str(len(body)) + + # Check if client wants streaming + is_stream = False + try: + is_stream = json.loads(body).get("stream", False) + except Exception: + pass + + try: + resp = requests.post( + upstream_url, + data=body, + headers=headers, + stream=is_stream, + timeout=300, + ) + + # Send response status and headers + self.send_response(resp.status_code) + for key, value in resp.headers.items(): + if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"): + self.send_header(key, value) + self.end_headers() + + # Stream or send response body + if is_stream: + for chunk in resp.iter_content(chunk_size=1024): + if chunk: + self.wfile.write(chunk) + self.wfile.flush() + else: + self.wfile.write(resp.content) + + except requests.exceptions.ConnectionError as e: + self.send_error(502, f"Upstream connection failed: {{e}}") + except requests.exceptions.Timeout: + self.send_error(504, "Upstream timeout") + + def do_GET(self): + """Health check endpoint.""" + if self.path == "/health": + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({{"status": "ok", "upstream": UPSTREAM_BASE}}).encode()) + else: + self.send_error(404) + + def log_message(self, format, *args): + """Suppress request logging to keep container logs clean.""" + pass + + +if __name__ == "__main__": + server = HTTPServer((LISTEN_HOST, LISTEN_PORT), ProxyHandler) + print(f"Content-filter proxy listening on {{LISTEN_HOST}}:{{LISTEN_PORT}}") + print(f"Forwarding to: {{UPSTREAM_BASE}}") + sys.stdout.flush() + server.serve_forever() +''') + +print(f"Proxy script written to {proxy_script}") + +# Start proxy as a background process +log_path = home / ".content-filter-proxy.log" +print(f"Starting content-filter proxy on {PROXY_HOST}:{PROXY_PORT}...") proc = subprocess.Popen( - [ - sys.executable, "-m", "litellm", - "--config", str(config_path), - "--host", LITELLM_HOST, - "--port", str(LITELLM_PORT), - ], + [sys.executable, str(proxy_script)], stdout=open(log_path, "w"), stderr=subprocess.STDOUT, env=os.environ.copy(), @@ -132,12 +189,12 @@ ) # Write PID file for cleanup -pid_path = home / ".litellm-proxy.pid" +pid_path = home / ".content-filter-proxy.pid" pid_path.write_text(str(proc.pid)) -print(f"LiteLLM proxy started (PID: {proc.pid})") +print(f"Proxy started (PID: {proc.pid})") # Wait for health check -health_url = f"http://{LITELLM_HOST}:{LITELLM_PORT}/health" +health_url = f"http://{PROXY_HOST}:{PROXY_PORT}/health" start = time.time() ready = False @@ -152,15 +209,21 @@ # Check if process died if proc.poll() is not None: - print(f"Error: LiteLLM proxy exited with code {proc.returncode}") - print(f"Check logs at {log_path}") + print(f"Error: Proxy exited with code {proc.returncode}") + try: + print(f"Logs: {log_path.read_text()[:500]}") + except Exception: + pass sys.exit(1) time.sleep(HEALTH_POLL_INTERVAL) if ready: elapsed = time.time() - start - print(f"LiteLLM proxy ready on {LITELLM_HOST}:{LITELLM_PORT} ({elapsed:.1f}s)") + print(f"Content-filter proxy ready on {PROXY_HOST}:{PROXY_PORT} ({elapsed:.1f}s)") else: - print(f"Warning: LiteLLM health check timed out after {HEALTH_TIMEOUT}s") - print(f"Proxy may still be starting — check logs at {log_path}") + print(f"Warning: Proxy health check timed out after {HEALTH_TIMEOUT}s") + try: + print(f"Logs: {log_path.read_text()[:500]}") + except Exception: + pass From 517d21d4b1548e905dba59d6d89b89ca9b0f3bcf Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 15:32:29 -0400 Subject: [PATCH 05/21] fix: comprehensive content-filter proxy with request + response fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace inline f-string proxy with standalone content_filter_proxy.py: Request-side (sanitize what OpenCode sends): - Strip empty/whitespace-only text content blocks (#5028) - Strip orphaned tool_result blocks (Anthropic format) - Strip orphaned tool messages (OpenAI format) - Remove empty messages after filtering Response-side (fix what Databricks returns): - Remap 'databricks-tool-call' back to real tool names - Fix finish_reason: 'stop' → 'tool_calls' when tools invoked - SSE stream parsing with buffered tool name resolution Zero external dependencies (stdlib http.server + requests via databricks-sdk). ThreadingMixIn for concurrent health checks during streaming. All fixes verified with unit tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- content_filter_proxy.py | 478 ++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 - setup_litellm.py | 162 ++------------ 3 files changed, 497 insertions(+), 144 deletions(-) create mode 100644 content_filter_proxy.py diff --git a/content_filter_proxy.py b/content_filter_proxy.py new file mode 100644 index 0000000..2291a0d --- /dev/null +++ b/content_filter_proxy.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python +"""Lightweight HTTP proxy that sanitizes requests and responses between OpenCode and Databricks. + +Request-side fixes: + - Strips empty/whitespace-only text content blocks (OpenCode #5028) + - Strips orphaned tool_result blocks with no matching tool_use + - Removes empty messages after filtering + +Response-side fixes: + - Remaps 'databricks-tool-call' back to real tool names + - Fixes finish_reason when tool calls are present + +Runs on localhost (never exposed externally). Zero external dependencies +beyond stdlib + requests (already installed via databricks-sdk). + +See: https://github.com/sst/opencode/issues/5028 + https://github.com/BerriAI/litellm/pull/20384 +""" +import json +import os +import sys +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn + +import requests + +UPSTREAM_BASE = os.environ.get("PROXY_UPSTREAM_BASE", "") +LISTEN_HOST = os.environ.get("PROXY_HOST", "127.0.0.1") +LISTEN_PORT = int(os.environ.get("PROXY_PORT", "4000")) + + +# --------------------------------------------------------------------------- +# Request-side sanitization +# --------------------------------------------------------------------------- + +def sanitize_messages(messages): + """Strip empty text blocks and orphaned tool_result/tool messages.""" + if not isinstance(messages, list): + return messages + + # First pass: collect tool_use/tool_call IDs per assistant message index + # so we can validate tool_results in the following user/tool message. + assistant_tool_ids = {} # msg_index -> set of tool IDs + for i, msg in enumerate(messages): + role = msg.get("role", "") + if role != "assistant": + continue + ids = set() + # Anthropic format: content blocks with type=tool_use + content = msg.get("content") + if isinstance(content, list): + for block in content: + if isinstance(block, dict) and block.get("type") == "tool_use": + tid = block.get("id") + if tid: + ids.add(tid) + # OpenAI format: tool_calls array + for tc in msg.get("tool_calls", []): + tid = tc.get("id") + if tid: + ids.add(tid) + assistant_tool_ids[i] = ids + + # Second pass: clean messages + cleaned = [] + for i, msg in enumerate(messages): + role = msg.get("role", "") + content = msg.get("content") + + # Find the most recent preceding assistant message's tool IDs + prev_tool_ids = set() + for j in range(i - 1, -1, -1): + if messages[j].get("role") == "assistant": + prev_tool_ids = assistant_tool_ids.get(j, set()) + break + + # --- Handle list content (Anthropic format) --- + if isinstance(content, list): + filtered = [] + for block in content: + if not isinstance(block, dict): + filtered.append(block) + continue + + # Strip empty/whitespace-only text blocks + if block.get("type") == "text" and block.get("text", "").strip() == "": + continue + + # Strip orphaned tool_result blocks + if block.get("type") == "tool_result": + tool_use_id = block.get("tool_use_id") + if tool_use_id and tool_use_id not in prev_tool_ids: + continue + + filtered.append(block) + + if not filtered: + # Don't drop assistant messages (would break alternation) + if role == "assistant": + msg = {**msg, "content": filtered} + else: + continue + else: + msg = {**msg, "content": filtered} + + # --- Handle OpenAI tool messages --- + elif role == "tool": + tool_call_id = msg.get("tool_call_id") + if tool_call_id and tool_call_id not in prev_tool_ids: + continue # Orphaned tool response + + # --- Handle empty string content --- + elif isinstance(content, str) and content.strip() == "": + if role != "assistant": + continue + + cleaned.append(msg) + + return cleaned + + +# --------------------------------------------------------------------------- +# Response-side fixes +# --------------------------------------------------------------------------- + +def remap_tool_call(tool_call): + """If tool name is 'databricks-tool-call', extract real name from arguments.""" + func = tool_call.get("function", {}) + if func.get("name") != "databricks-tool-call": + return tool_call + + args_str = func.get("arguments", "") + try: + args = json.loads(args_str) + if isinstance(args, dict) and "name" in args: + real_name = args.pop("name") + tool_call = {**tool_call, "function": { + **func, + "name": real_name, + "arguments": json.dumps(args), + }} + except (json.JSONDecodeError, TypeError): + pass # Can't parse — leave as-is + + return tool_call + + +def fix_response_data(data): + """Fix tool names and finish_reason in a parsed response object.""" + if not isinstance(data, dict): + return data + + for choice in data.get("choices", []): + # Non-streaming: choice.message + message = choice.get("message", {}) + tool_calls = message.get("tool_calls", []) + if tool_calls: + message["tool_calls"] = [remap_tool_call(tc) for tc in tool_calls] + # Fix finish_reason: should be "tool_calls" if tools are invoked + if choice.get("finish_reason") == "stop" and tool_calls: + choice["finish_reason"] = "tool_calls" + + # Streaming: choice.delta + delta = choice.get("delta", {}) + delta_tool_calls = delta.get("tool_calls", []) + if delta_tool_calls: + delta["tool_calls"] = [remap_tool_call(tc) for tc in delta_tool_calls] + + # Fix finish_reason for streaming chunks + if choice.get("finish_reason") == "stop" and delta_tool_calls: + choice["finish_reason"] = "tool_calls" + + return data + + +# --------------------------------------------------------------------------- +# SSE stream processing +# --------------------------------------------------------------------------- + +class SSEProcessor: + """Buffers and fixes SSE events, handling tool name remapping across chunks.""" + + def __init__(self): + # Per tool-call-index state for streaming name resolution + # {index: {"args_buffer": str, "resolved_name": str|None, "buffered_lines": []}} + self._tool_state = {} + self._pending_flush = [] + + def process_line(self, line): + """Process one SSE line. Returns list of lines to send (may be empty if buffering).""" + # Non-data lines pass through immediately + if not line.startswith("data: "): + return [line] + + payload = line[6:] # Strip "data: " prefix + + # [DONE] signal passes through + if payload.strip() == "[DONE]": + # Flush any remaining buffered events + result = list(self._pending_flush) + self._pending_flush.clear() + result.append(line) + return result + + # Parse event JSON + try: + data = json.loads(payload) + except json.JSONDecodeError: + return [line] # Can't parse — pass through + + # Check for tool calls that need remapping + needs_buffering = False + for choice in data.get("choices", []): + delta = choice.get("delta", {}) + for tc in delta.get("tool_calls", []): + idx = tc.get("index", 0) + func = tc.get("function", {}) + + # First chunk with tool name + if "name" in func: + if func["name"] == "databricks-tool-call": + self._tool_state[idx] = { + "args_buffer": func.get("arguments", ""), + "resolved_name": None, + "buffered_lines": [], + } + needs_buffering = True + else: + # Normal tool name — no remapping needed + self._tool_state.pop(idx, None) + + # Argument chunks for a pending tool call + elif idx in self._tool_state and self._tool_state[idx]["resolved_name"] is None: + state = self._tool_state[idx] + state["args_buffer"] += func.get("arguments", "") + needs_buffering = True + + # Try to extract the real name from accumulated arguments + try: + args = json.loads(state["args_buffer"]) + if isinstance(args, dict) and "name" in args: + state["resolved_name"] = args.pop("name") + # Rewrite all buffered events with the real name + flushed = self._flush_tool_buffer(idx, state["resolved_name"], args) + return flushed + [self._rewrite_event_line(line, data)] + except json.JSONDecodeError: + pass # Arguments still incomplete — keep buffering + + # Subsequent chunks after name is resolved + elif idx in self._tool_state and self._tool_state[idx]["resolved_name"]: + # Name already resolved — strip "name" from args if present + pass # Just pass through, name was fixed in first event + + # Fix finish_reason + if choice.get("finish_reason") == "stop": + # Check if any tool calls were made in this response + if self._tool_state: + choice["finish_reason"] = "tool_calls" + + if needs_buffering: + # Buffer this event until we can resolve the tool name + for idx, state in self._tool_state.items(): + if state["resolved_name"] is None: + state["buffered_lines"].append(line) + return [] # Don't send yet + + # No buffering needed — fix and forward + fixed = fix_response_data(data) + return [f"data: {json.dumps(fixed)}"] + + def _flush_tool_buffer(self, idx, real_name, cleaned_args): + """Rewrite buffered events with the resolved tool name.""" + state = self._tool_state[idx] + result = [] + for buffered_line in state["buffered_lines"]: + payload = buffered_line[6:] # Strip "data: " + try: + bdata = json.loads(payload) + for choice in bdata.get("choices", []): + delta = choice.get("delta", {}) + for tc in delta.get("tool_calls", []): + if tc.get("index", 0) == idx: + func = tc.get("function", {}) + if "name" in func and func["name"] == "databricks-tool-call": + func["name"] = real_name + if "arguments" in func: + # Clear arguments in buffered events (we'll send clean args) + func["arguments"] = "" + result.append(f"data: {json.dumps(bdata)}") + except json.JSONDecodeError: + result.append(buffered_line) + + state["buffered_lines"].clear() + + # Send the cleaned arguments as a separate event + args_event = { + "choices": [{ + "delta": { + "tool_calls": [{ + "index": idx, + "function": {"arguments": json.dumps(cleaned_args)} + }] + }, + "finish_reason": None + }] + } + result.append(f"data: {json.dumps(args_event)}") + return result + + def _rewrite_event_line(self, line, data): + """Rewrite an event line with fixed data.""" + fixed = fix_response_data(data) + return f"data: {json.dumps(fixed)}" + + def flush_remaining(self): + """Flush any remaining buffered events (graceful fallback).""" + result = [] + for idx, state in self._tool_state.items(): + for buffered_line in state["buffered_lines"]: + result.append(buffered_line) + state["buffered_lines"].clear() + result.extend(self._pending_flush) + self._pending_flush.clear() + return result + + +# --------------------------------------------------------------------------- +# HTTP Server +# --------------------------------------------------------------------------- + +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + """Handle concurrent requests (e.g., health checks during streaming).""" + daemon_threads = True + + +class ProxyHandler(BaseHTTPRequestHandler): + """Proxy that sanitizes requests and fixes responses.""" + + def do_POST(self): + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length) + + # --- Sanitize request --- + try: + data = json.loads(body) + if "messages" in data: + data["messages"] = sanitize_messages(data["messages"]) + body = json.dumps(data).encode() + except (json.JSONDecodeError, KeyError): + pass # Forward as-is if not valid JSON + + # Build upstream URL + upstream_url = UPSTREAM_BASE + self.path + + # Forward headers + headers = {} + for key in self.headers: + if key.lower() not in ("host", "content-length", "transfer-encoding"): + headers[key] = self.headers[key] + headers["Content-Length"] = str(len(body)) + + # Detect streaming + is_stream = False + try: + is_stream = json.loads(body).get("stream", False) + except Exception: + pass + + try: + resp = requests.post( + upstream_url, + data=body, + headers=headers, + stream=is_stream, + timeout=300, + ) + + # --- Non-streaming response --- + if not is_stream: + # Fix response + try: + resp_data = resp.json() + resp_data = fix_response_data(resp_data) + resp_body = json.dumps(resp_data).encode() + except (json.JSONDecodeError, ValueError): + resp_body = resp.content + + self.send_response(resp.status_code) + for key, value in resp.headers.items(): + if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"): + self.send_header(key, value) + self.send_header("Content-Length", str(len(resp_body))) + self.end_headers() + self.wfile.write(resp_body) + return + + # --- Streaming response --- + self.send_response(resp.status_code) + for key, value in resp.headers.items(): + if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"): + self.send_header(key, value) + self.send_header("Transfer-Encoding", "chunked") + self.end_headers() + + processor = SSEProcessor() + + for raw_line in resp.iter_lines(decode_unicode=True): + if raw_line is None: + continue + + line = raw_line.strip() if isinstance(raw_line, str) else raw_line.decode().strip() + + if not line: + # Blank line = event boundary, send it + self._send_chunk(b"\r\n") + continue + + # Process through SSE fixer + output_lines = processor.process_line(line) + for out_line in output_lines: + self._send_chunk((out_line + "\r\n").encode()) + + # Flush any remaining buffered events + for remaining in processor.flush_remaining(): + self._send_chunk((remaining + "\r\n").encode()) + + # Send final zero-length chunk to end chunked transfer + self._send_chunk(b"") + + except requests.exceptions.ConnectionError as e: + self.send_error(502, f"Upstream connection failed: {e}") + except requests.exceptions.Timeout: + self.send_error(504, "Upstream timeout") + + def _send_chunk(self, data): + """Send a chunk in HTTP chunked transfer encoding.""" + if data: + chunk = f"{len(data):x}\r\n".encode() + data + b"\r\n" + else: + chunk = b"0\r\n\r\n" # Final chunk + try: + self.wfile.write(chunk) + self.wfile.flush() + except BrokenPipeError: + pass + + def do_GET(self): + """Health check endpoint.""" + if self.path == "/health": + body = json.dumps({"status": "ok", "upstream": UPSTREAM_BASE}).encode() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + else: + self.send_error(404) + + def log_message(self, format, *args): + """Suppress per-request logging to keep container logs clean.""" + pass + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + if not UPSTREAM_BASE: + print("Error: PROXY_UPSTREAM_BASE environment variable is required", file=sys.stderr) + sys.exit(1) + + server = ThreadedHTTPServer((LISTEN_HOST, LISTEN_PORT), ProxyHandler) + print(f"Content-filter proxy listening on {LISTEN_HOST}:{LISTEN_PORT}") + print(f"Forwarding to: {UPSTREAM_BASE}") + print(f"Fixes: empty text blocks, orphaned tool_results, tool name remapping, finish_reason") + sys.stdout.flush() + server.serve_forever() diff --git a/requirements.txt b/requirements.txt index f175656..a9c32c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,3 @@ claude-agent-sdk databricks-sdk>=0.20.0 mlflow[genai]>=3.4 opentelemetry-exporter-otlp-proto-grpc -requests>=2.28 diff --git a/setup_litellm.py b/setup_litellm.py index 03fe705..8d29682 100644 --- a/setup_litellm.py +++ b/setup_litellm.py @@ -1,18 +1,16 @@ #!/usr/bin/env python -"""Start a lightweight local proxy to sanitize empty content blocks before they reach Databricks. +"""Start the content-filter proxy between OpenCode and Databricks. -OpenCode occasionally produces empty text content blocks in messages, which the Databricks -Foundation Model API rejects with: "messages: text content blocks must be non-empty" -(see https://github.com/sst/opencode/issues/5028). +Fixes known OpenCode bugs by sanitizing requests and responses: + - Empty text content blocks (OpenCode #5028) + - Orphaned tool_result blocks with no matching tool_use + - Databricks 'databricks-tool-call' name mangling + - Incorrect finish_reason on tool call responses -This proxy strips empty/whitespace-only text blocks before forwarding requests to the -Databricks AI Gateway. Runs on localhost:4000 (internal only, never exposed externally). - -No external dependencies — uses stdlib + requests (already installed via databricks-sdk). +See docs/plans/2026-03-11-litellm-empty-content-blocks-design.md """ import os import sys -import json import time import subprocess from pathlib import Path @@ -23,7 +21,7 @@ PROXY_PORT = 4000 PROXY_HOST = "127.0.0.1" -HEALTH_TIMEOUT = 15 # seconds to wait for proxy to be ready +HEALTH_TIMEOUT = 15 HEALTH_POLL_INTERVAL = 0.5 # Set HOME if not properly set @@ -49,143 +47,22 @@ upstream_base = f"{host}/serving-endpoints" print(f"Content-filter proxy will forward to: {host}/serving-endpoints") -# Write the proxy server script -proxy_script = home / ".content-filter-proxy.py" -proxy_script.write_text(f'''#!/usr/bin/env python -"""Minimal HTTP proxy that strips empty text content blocks from OpenAI-compatible API requests.""" -import json -import sys -from http.server import HTTPServer, BaseHTTPRequestHandler -import requests - -UPSTREAM_BASE = "{upstream_base}" -LISTEN_HOST = "{PROXY_HOST}" -LISTEN_PORT = {PROXY_PORT} - - -def sanitize_messages(messages): - """Strip empty/whitespace-only text content blocks from messages.""" - if not isinstance(messages, list): - return messages - cleaned = [] - for msg in messages: - content = msg.get("content") - if isinstance(content, list): - filtered = [ - block for block in content - if not ( - isinstance(block, dict) - and block.get("type") == "text" - and block.get("text", "").strip() == "" - ) - ] - # If all content blocks were empty, keep the message but with empty list - # (let the API decide how to handle it) - msg = {{**msg, "content": filtered if filtered else content[:0]}} - elif isinstance(content, str) and content.strip() == "": - # Skip messages with empty string content - continue - cleaned.append(msg) - return cleaned - - -class ProxyHandler(BaseHTTPRequestHandler): - def do_POST(self): - # Read request body - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length) - - # Parse and sanitize - try: - data = json.loads(body) - if "messages" in data: - data["messages"] = sanitize_messages(data["messages"]) - body = json.dumps(data).encode() - except (json.JSONDecodeError, KeyError): - pass # Forward as-is if not JSON - - # Build upstream URL - upstream_url = UPSTREAM_BASE + self.path - - # Forward headers (pass through auth, content-type, etc.) - headers = {{}} - for key in self.headers: - if key.lower() not in ("host", "content-length", "transfer-encoding"): - headers[key] = self.headers[key] - headers["Content-Length"] = str(len(body)) - - # Check if client wants streaming - is_stream = False - try: - is_stream = json.loads(body).get("stream", False) - except Exception: - pass - - try: - resp = requests.post( - upstream_url, - data=body, - headers=headers, - stream=is_stream, - timeout=300, - ) - - # Send response status and headers - self.send_response(resp.status_code) - for key, value in resp.headers.items(): - if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"): - self.send_header(key, value) - self.end_headers() - - # Stream or send response body - if is_stream: - for chunk in resp.iter_content(chunk_size=1024): - if chunk: - self.wfile.write(chunk) - self.wfile.flush() - else: - self.wfile.write(resp.content) - - except requests.exceptions.ConnectionError as e: - self.send_error(502, f"Upstream connection failed: {{e}}") - except requests.exceptions.Timeout: - self.send_error(504, "Upstream timeout") - - def do_GET(self): - """Health check endpoint.""" - if self.path == "/health": - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.end_headers() - self.wfile.write(json.dumps({{"status": "ok", "upstream": UPSTREAM_BASE}}).encode()) - else: - self.send_error(404) - - def log_message(self, format, *args): - """Suppress request logging to keep container logs clean.""" - pass - - -if __name__ == "__main__": - server = HTTPServer((LISTEN_HOST, LISTEN_PORT), ProxyHandler) - print(f"Content-filter proxy listening on {{LISTEN_HOST}}:{{LISTEN_PORT}}") - print(f"Forwarding to: {{UPSTREAM_BASE}}") - sys.stdout.flush() - server.serve_forever() -''') - -print(f"Proxy script written to {proxy_script}") - # Start proxy as a background process +proxy_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "content_filter_proxy.py") log_path = home / ".content-filter-proxy.log" print(f"Starting content-filter proxy on {PROXY_HOST}:{PROXY_PORT}...") +env = os.environ.copy() +env["PROXY_UPSTREAM_BASE"] = upstream_base +env["PROXY_HOST"] = PROXY_HOST +env["PROXY_PORT"] = str(PROXY_PORT) + proc = subprocess.Popen( - [sys.executable, str(proxy_script)], + [sys.executable, proxy_script], stdout=open(log_path, "w"), stderr=subprocess.STDOUT, - env=os.environ.copy(), - start_new_session=True, # Detach from parent process group + env=env, + start_new_session=True, ) # Write PID file for cleanup @@ -207,11 +84,10 @@ def log_message(self, format, *args): except (URLError, OSError): pass - # Check if process died if proc.poll() is not None: print(f"Error: Proxy exited with code {proc.returncode}") try: - print(f"Logs: {log_path.read_text()[:500]}") + print(f"Logs: {log_path.read_text()[:1000]}") except Exception: pass sys.exit(1) @@ -224,6 +100,6 @@ def log_message(self, format, *args): else: print(f"Warning: Proxy health check timed out after {HEALTH_TIMEOUT}s") try: - print(f"Logs: {log_path.read_text()[:500]}") + print(f"Logs: {log_path.read_text()[:1000]}") except Exception: pass From b71a263f1b5dfea42f8c9847468eedcee0a1a587 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 15:41:27 -0400 Subject: [PATCH 06/21] fix: robust multi-pass orphan detection with diagnostic logging Key fix: prev_tool_ids now checks the CLEANED message list (not original indices), so cascading orphans are caught within the same pass. Multi-pass loop runs up to 5 passes for deeply cascading cases. Added diagnostic logging to ~/.content-filter-proxy-debug.log: - Full message structure on each request - Every strip/drop action with IDs and reason - Upstream error responses Handles toolu_bdrk_ format IDs from Databricks. Co-Authored-By: Claude Opus 4.6 (1M context) --- content_filter_proxy.py | 146 ++++++++++++++++++++++++++++++++-------- 1 file changed, 118 insertions(+), 28 deletions(-) diff --git a/content_filter_proxy.py b/content_filter_proxy.py index 2291a0d..3debd0c 100644 --- a/content_filter_proxy.py +++ b/content_filter_proxy.py @@ -17,6 +17,7 @@ https://github.com/BerriAI/litellm/pull/20384 """ import json +import logging import os import sys from http.server import HTTPServer, BaseHTTPRequestHandler @@ -28,50 +29,124 @@ LISTEN_HOST = os.environ.get("PROXY_HOST", "127.0.0.1") LISTEN_PORT = int(os.environ.get("PROXY_PORT", "4000")) +# Diagnostic logging — writes to ~/.content-filter-proxy-debug.log +_home = os.environ.get("HOME", "/app/python/source_code") +logging.basicConfig( + filename=os.path.join(_home, ".content-filter-proxy-debug.log"), + level=logging.INFO, + format="%(asctime)s %(message)s", +) +log = logging.getLogger("proxy") + # --------------------------------------------------------------------------- # Request-side sanitization # --------------------------------------------------------------------------- +def _extract_tool_ids_from_message(msg): + """Extract all tool_use/tool_call IDs from an assistant message.""" + ids = set() + # Anthropic format: content blocks with type=tool_use + content = msg.get("content") + if isinstance(content, list): + for block in content: + if isinstance(block, dict) and block.get("type") == "tool_use": + tid = block.get("id") + if tid: + ids.add(tid) + # OpenAI format: tool_calls array + for tc in msg.get("tool_calls") or []: + tid = tc.get("id") + if tid: + ids.add(tid) + return ids + + +def _extract_tool_refs_from_message(msg): + """Extract all tool_use_id/tool_call_id references from a user/tool message.""" + refs = set() + role = msg.get("role", "") + content = msg.get("content") + # Anthropic format: tool_result blocks + if isinstance(content, list): + for block in content: + if isinstance(block, dict) and block.get("type") == "tool_result": + ref = block.get("tool_use_id") + if ref: + refs.add(ref) + # OpenAI format: tool messages + if role == "tool": + ref = msg.get("tool_call_id") + if ref: + refs.add(ref) + return refs + + def sanitize_messages(messages): - """Strip empty text blocks and orphaned tool_result/tool messages.""" + """Strip empty text blocks and orphaned tool_result/tool messages. + + Runs multiple passes to handle cascading orphans (dropping one message + can make the next one orphaned too). + """ if not isinstance(messages, list): return messages - # First pass: collect tool_use/tool_call IDs per assistant message index - # so we can validate tool_results in the following user/tool message. - assistant_tool_ids = {} # msg_index -> set of tool IDs + log.info(f"Sanitizing {len(messages)} messages") + + # Log message structure for debugging for i, msg in enumerate(messages): role = msg.get("role", "") - if role != "assistant": - continue - ids = set() - # Anthropic format: content blocks with type=tool_use + tool_ids = _extract_tool_ids_from_message(msg) + tool_refs = _extract_tool_refs_from_message(msg) content = msg.get("content") + content_desc = "" if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tid = block.get("id") - if tid: - ids.add(tid) - # OpenAI format: tool_calls array - for tc in msg.get("tool_calls", []): - tid = tc.get("id") - if tid: - ids.add(tid) - assistant_tool_ids[i] = ids - - # Second pass: clean messages + types = [b.get("type", "?") if isinstance(b, dict) else "str" for b in content] + content_desc = f"[{', '.join(types)}]" + elif isinstance(content, str): + content_desc = f'str({len(content)} chars)' + elif content is None: + content_desc = "null" + extras = "" + if tool_ids: + extras += f" tool_ids={tool_ids}" + if tool_refs: + extras += f" tool_refs={tool_refs}" + if msg.get("tool_calls"): + extras += f" tool_calls={len(msg['tool_calls'])}" + log.info(f" [{i}] {role}: {content_desc}{extras}") + + # Multi-pass sanitization (handles cascading orphans) + prev_len = -1 + pass_num = 0 + result = list(messages) + + while len(result) != prev_len and pass_num < 5: + prev_len = len(result) + pass_num += 1 + result = _sanitize_single_pass(result, pass_num) + + stripped = len(messages) - len(result) + if stripped > 0: + log.info(f"Sanitization complete: stripped {stripped} messages/blocks in {pass_num} passes") + + return result + + +def _sanitize_single_pass(messages, pass_num): + """One pass of message sanitization.""" cleaned = [] + for i, msg in enumerate(messages): role = msg.get("role", "") content = msg.get("content") - # Find the most recent preceding assistant message's tool IDs + # Build valid tool IDs from the most recent assistant message IN THE + # CLEANED list (not the original), so cascading drops are handled. prev_tool_ids = set() - for j in range(i - 1, -1, -1): - if messages[j].get("role") == "assistant": - prev_tool_ids = assistant_tool_ids.get(j, set()) + for j in range(len(cleaned) - 1, -1, -1): + if cleaned[j].get("role") == "assistant": + prev_tool_ids = _extract_tool_ids_from_message(cleaned[j]) break # --- Handle list content (Anthropic format) --- @@ -84,21 +159,23 @@ def sanitize_messages(messages): # Strip empty/whitespace-only text blocks if block.get("type") == "text" and block.get("text", "").strip() == "": + log.info(f" pass {pass_num}: strip empty text block from msg[{i}] ({role})") continue # Strip orphaned tool_result blocks if block.get("type") == "tool_result": tool_use_id = block.get("tool_use_id") if tool_use_id and tool_use_id not in prev_tool_ids: + log.info(f" pass {pass_num}: strip orphaned tool_result {tool_use_id} from msg[{i}] (prev_ids={prev_tool_ids})") continue filtered.append(block) if not filtered: - # Don't drop assistant messages (would break alternation) if role == "assistant": msg = {**msg, "content": filtered} else: + log.info(f" pass {pass_num}: drop empty {role} msg[{i}]") continue else: msg = {**msg, "content": filtered} @@ -107,11 +184,13 @@ def sanitize_messages(messages): elif role == "tool": tool_call_id = msg.get("tool_call_id") if tool_call_id and tool_call_id not in prev_tool_ids: - continue # Orphaned tool response + log.info(f" pass {pass_num}: strip orphaned tool msg[{i}] {tool_call_id} (prev_ids={prev_tool_ids})") + continue # --- Handle empty string content --- elif isinstance(content, str) and content.strip() == "": if role != "assistant": + log.info(f" pass {pass_num}: strip empty string {role} msg[{i}]") continue cleaned.append(msg) @@ -340,13 +419,20 @@ def do_POST(self): content_length = int(self.headers.get("Content-Length", 0)) body = self.rfile.read(content_length) + log.info(f"POST {self.path} ({content_length} bytes)") + # --- Sanitize request --- try: data = json.loads(body) if "messages" in data: + before = len(data["messages"]) data["messages"] = sanitize_messages(data["messages"]) + after = len(data["messages"]) + if before != after: + log.info(f"Messages: {before} -> {after}") body = json.dumps(data).encode() - except (json.JSONDecodeError, KeyError): + except (json.JSONDecodeError, KeyError) as e: + log.warning(f"Could not parse request body: {e}") pass # Forward as-is if not valid JSON # Build upstream URL @@ -375,6 +461,10 @@ def do_POST(self): timeout=300, ) + # Log upstream errors + if resp.status_code >= 400: + log.error(f"Upstream returned {resp.status_code}: {resp.text[:500]}") + # --- Non-streaming response --- if not is_stream: # Fix response From c31bf4efee3bc162b2f8ffc89356f2216156b8b1 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:05:37 -0400 Subject: [PATCH 07/21] fix: use @ai-sdk/openai for GPT models (Responses API support) The GPT Codex endpoints on Databricks AI Gateway only support the Responses API (openai/v1/responses), not Chat Completions. The @ai-sdk/openai-compatible SDK defaults to /chat/completions which fails. Switch to @ai-sdk/openai which natively supports both APIs. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_opencode.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup_opencode.py b/setup_opencode.py index 4dc463b..e6e69c3 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -126,11 +126,12 @@ } }, "databricks-openai": { - "npm": "@ai-sdk/openai-compatible", + "npm": "@ai-sdk/openai", "name": "Databricks AI Gateway (OpenAI)", "options": { "baseURL": f"{gateway_host}/openai/v1", - "apiKey": "{env:DATABRICKS_TOKEN}" + "apiKey": "{env:DATABRICKS_TOKEN}", + "compatibility": "compatible" }, "models": { "databricks-gpt-5-2-codex": { From 228af714e92647dc86a1e4dbb465963f8c91c631 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:11:31 -0400 Subject: [PATCH 08/21] fix: explicitly install @ai-sdk/openai for GPT Responses API OpenCode doesn't auto-install npm packages from provider config. Add explicit npm install of @ai-sdk/openai alongside opencode-ai so GPT models can use the Responses API. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_opencode.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/setup_opencode.py b/setup_opencode.py index e6e69c3..577d04e 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -64,6 +64,17 @@ print(f"OpenCode CLI installed to {opencode_bin}") else: print(f"OpenCode install warning: {result.stderr}") + + # Install @ai-sdk/openai for GPT models (Responses API support) + result = subprocess.run( + ["npm", "install", "-g", f"--prefix={npm_prefix}", "@ai-sdk/openai"], + capture_output=True, text=True, + env={**os.environ, "HOME": str(home)} + ) + if result.returncode == 0: + print("@ai-sdk/openai installed (Responses API support)") + else: + print(f"@ai-sdk/openai install warning: {result.stderr}") else: print(f"OpenCode CLI already installed at {opencode_bin}") From f432f9986de5fe073078d0edeb694a6facfaf212 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:12:24 -0400 Subject: [PATCH 09/21] fix: rename LiteLLM references to content-filter proxy Update setup step label, comments, and provider names to reflect that we use a custom content-filter proxy, not LiteLLM. Co-Authored-By: Claude Opus 4.6 (1M context) --- app.py | 8 ++++---- setup_opencode.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/app.py b/app.py index 0b40547..889a9f7 100644 --- a/app.py +++ b/app.py @@ -83,7 +83,7 @@ def handle_sigterm(signum, frame): "steps": [ {"id": "git", "label": "Configuring git identity", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "micro", "label": "Installing micro editor", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "litellm", "label": "Starting LiteLLM proxy", "status": "pending", "started_at": None, "completed_at": None, "error": None}, + {"id": "litellm", "label": "Starting content-filter proxy", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "claude", "label": "Configuring Claude CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "codex", "label": "Configuring Codex CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "opencode", "label": "Configuring OpenCode CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, @@ -252,9 +252,9 @@ def run_setup(): _run_step("micro", ["bash", "-c", "mkdir -p ~/.local/bin && bash install_micro.sh && mv micro ~/.local/bin/ 2>/dev/null || true"]) - # --- LiteLLM proxy (must be running before OpenCode starts) --- - # Sanitizes empty text content blocks that cause "Bad Request" errors - # with Databricks Foundation Model API (see OpenCode #5028) + # --- Content-filter proxy (must be running before OpenCode starts) --- + # Sanitizes requests/responses between OpenCode and Databricks + # (see OpenCode #5028, docs/plans/2026-03-11-litellm-empty-content-blocks-design.md) _run_step("litellm", ["python", "setup_litellm.py"]) # --- Parallel agent setup (all independent of each other) --- diff --git a/setup_opencode.py b/setup_opencode.py index 577d04e..ff6e9e3 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -"""Configure OpenCode CLI with Databricks Model Serving via LiteLLM local proxy. +"""Configure OpenCode CLI with Databricks Model Serving (via content-filter proxy) local proxy. -Routes requests through a local LiteLLM proxy (localhost:4000) which sanitizes empty +Routes requests through a local content-filter proxy proxy (localhost:4000) which sanitizes empty text content blocks before forwarding to Databricks AI Gateway. This fixes OpenCode issue #5028 where empty content blocks cause "Bad Request" errors. See docs/plans/2026-03-11-litellm-empty-content-blocks-design.md for details. @@ -13,7 +13,7 @@ from utils import ensure_https -# LiteLLM local proxy — sanitizes empty content blocks before reaching Databricks +# content-filter proxy local proxy — sanitizes empty content blocks before reaching Databricks # (see https://github.com/sst/opencode/issues/5028) LITELLM_PROXY_URL = "http://127.0.0.1:4000" @@ -85,15 +85,15 @@ opencode_config_dir.mkdir(parents=True, exist_ok=True) if gateway_host: - # Gateway mode: route through LiteLLM proxy for content block sanitization - # LiteLLM forwards clean requests to Databricks AI Gateway + # Gateway mode: route through content-filter proxy proxy for content block sanitization + # content-filter proxy forwards clean requests to Databricks AI Gateway # OpenAI/GPT models go direct (not affected by the empty content block bug) opencode_config = { "$schema": "https://opencode.ai/config.json", "provider": { "databricks": { "npm": "@ai-sdk/openai-compatible", - "name": "Databricks AI Gateway via LiteLLM", + "name": "Databricks AI Gateway (via content-filter proxy)", "options": { "baseURL": LITELLM_PROXY_URL, "apiKey": "{env:DATABRICKS_TOKEN}" @@ -165,14 +165,14 @@ "model": f"databricks/{anthropic_model}" } else: - # Fallback: route through LiteLLM proxy for content block sanitization - # LiteLLM forwards clean requests to Databricks serving endpoints + # Fallback: route through content-filter proxy proxy for content block sanitization + # content-filter proxy forwards clean requests to Databricks serving endpoints opencode_config = { "$schema": "https://opencode.ai/config.json", "provider": { "databricks": { "npm": "@ai-sdk/openai-compatible", - "name": "Databricks Model Serving via LiteLLM", + "name": "Databricks Model Serving (via content-filter proxy)", "options": { "baseURL": LITELLM_PROXY_URL, "apiKey": "{env:DATABRICKS_TOKEN}" From 01d8ad81f899e3442c8b5cd98591a1fd63134fc9 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:22:51 -0400 Subject: [PATCH 10/21] =?UTF-8?q?fix:=20add=20Gemini=20compatibility=20?= =?UTF-8?q?=E2=80=94=20strip=20unsupported=20schema=20keys?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gemini rejects $schema, $ref, $defs, additionalProperties in tool parameter definitions, and stream_options at the top level. Proxy now detects Gemini models by name and recursively strips these fields before forwarding. Only applies to requests with "gemini" in the model name — Claude and GPT requests are untouched. Co-Authored-By: Claude Opus 4.6 (1M context) --- content_filter_proxy.py | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/content_filter_proxy.py b/content_filter_proxy.py index 3debd0c..7dfa24f 100644 --- a/content_filter_proxy.py +++ b/content_filter_proxy.py @@ -38,6 +38,59 @@ ) log = logging.getLogger("proxy") +# JSON Schema keywords that Gemini doesn't support +GEMINI_UNSUPPORTED_SCHEMA_KEYS = { + "$schema", "$ref", "$defs", "$id", "$comment", "additionalProperties", +} + +# Top-level request fields that Gemini doesn't support +GEMINI_UNSUPPORTED_REQUEST_KEYS = { + "stream_options", +} + + +# --------------------------------------------------------------------------- +# Gemini compatibility +# --------------------------------------------------------------------------- + +def strip_unsupported_schema_keys(obj): + """Recursively strip JSON Schema keywords that Gemini doesn't support.""" + if isinstance(obj, dict): + return { + k: strip_unsupported_schema_keys(v) + for k, v in obj.items() + if k not in GEMINI_UNSUPPORTED_SCHEMA_KEYS + } + elif isinstance(obj, list): + return [strip_unsupported_schema_keys(item) for item in obj] + return obj + + +def sanitize_for_gemini(data): + """Strip fields that Gemini's API rejects.""" + model = data.get("model", "") + if "gemini" not in model.lower(): + return data + + log.info(f"Applying Gemini compatibility fixes for model: {model}") + + # Strip unsupported schema keys from tool definitions + for tool in data.get("tools", []): + func = tool.get("function", {}) + if "parameters" in func: + func["parameters"] = strip_unsupported_schema_keys(func["parameters"]) + + # Strip unsupported top-level fields + for key in GEMINI_UNSUPPORTED_REQUEST_KEYS: + if key in data: + log.info(f" Stripped top-level field: {key}") + del data[key] + + # Strip $schema from top level if present + data.pop("$schema", None) + + return data + # --------------------------------------------------------------------------- # Request-side sanitization @@ -430,6 +483,8 @@ def do_POST(self): after = len(data["messages"]) if before != after: log.info(f"Messages: {before} -> {after}") + # Gemini compatibility: strip unsupported schema keys and fields + data = sanitize_for_gemini(data) body = json.dumps(data).encode() except (json.JSONDecodeError, KeyError) as e: log.warning(f"Could not parse request body: {e}") From 9f3bc1b7f9f1d016be9ea6fb2d97079537349048 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:30:21 -0400 Subject: [PATCH 11/21] fix: strip $schema from tool params for ALL models, not just Gemini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous approach tried to detect Gemini models by name — unreliable. Now strips $schema, additionalProperties, stream_options universally. These fields are never needed by any downstream API (Claude/GPT ignore them, Gemini rejects them). Safe for all models. Co-Authored-By: Claude Opus 4.6 (1M context) --- content_filter_proxy.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/content_filter_proxy.py b/content_filter_proxy.py index 7dfa24f..a7ed00c 100644 --- a/content_filter_proxy.py +++ b/content_filter_proxy.py @@ -66,16 +66,18 @@ def strip_unsupported_schema_keys(obj): return obj -def sanitize_for_gemini(data): - """Strip fields that Gemini's API rejects.""" - model = data.get("model", "") - if "gemini" not in model.lower(): - return data +def sanitize_tool_schemas(data): + """Strip JSON Schema keywords that some providers reject. - log.info(f"Applying Gemini compatibility fixes for model: {model}") + Applied universally — $schema, additionalProperties etc. are never + required by any downstream API. Claude/GPT ignore them, Gemini rejects them. + Stripping for all models is safe and avoids model detection issues. + """ + tools = data.get("tools", []) + if not tools: + return data - # Strip unsupported schema keys from tool definitions - for tool in data.get("tools", []): + for tool in tools: func = tool.get("function", {}) if "parameters" in func: func["parameters"] = strip_unsupported_schema_keys(func["parameters"]) @@ -483,8 +485,8 @@ def do_POST(self): after = len(data["messages"]) if before != after: log.info(f"Messages: {before} -> {after}") - # Gemini compatibility: strip unsupported schema keys and fields - data = sanitize_for_gemini(data) + # Strip unsupported schema keys from tool definitions (all models) + data = sanitize_tool_schemas(data) body = json.dumps(data).encode() except (json.JSONDecodeError, KeyError) as e: log.warning(f"Could not parse request body: {e}") From 96ee510737403e6091190090a8a6b878f9c4e80d Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:38:01 -0400 Subject: [PATCH 12/21] =?UTF-8?q?fix:=20logging=20not=20writing=20?= =?UTF-8?q?=E2=80=94=20use=20explicit=20FileHandler?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit logging.basicConfig does nothing if root logger is already configured (e.g., by 'import requests'). Switch to explicit FileHandler on a named logger so debug output always writes. Co-Authored-By: Claude Opus 4.6 (1M context) --- content_filter_proxy.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/content_filter_proxy.py b/content_filter_proxy.py index a7ed00c..405eb98 100644 --- a/content_filter_proxy.py +++ b/content_filter_proxy.py @@ -31,12 +31,14 @@ # Diagnostic logging — writes to ~/.content-filter-proxy-debug.log _home = os.environ.get("HOME", "/app/python/source_code") -logging.basicConfig( - filename=os.path.join(_home, ".content-filter-proxy-debug.log"), - level=logging.INFO, - format="%(asctime)s %(message)s", -) -log = logging.getLogger("proxy") +_log_path = os.path.join(_home, ".content-filter-proxy-debug.log") +log = logging.getLogger("content-filter-proxy") +log.setLevel(logging.INFO) +# Explicitly add file handler (basicConfig fails if root logger already configured) +if not log.handlers: + _fh = logging.FileHandler(_log_path) + _fh.setFormatter(logging.Formatter("%(asctime)s %(message)s")) + log.addHandler(_fh) # JSON Schema keywords that Gemini doesn't support GEMINI_UNSUPPORTED_SCHEMA_KEYS = { From c497445b77535e636ec45502f14ae97bc8531824 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:43:14 -0400 Subject: [PATCH 13/21] fix: logging to stderr (FileHandler buffering prevented writes) FileHandler buffers in long-running processes and never flushes. Switch to StreamHandler(stderr) which is already redirected to ~/.content-filter-proxy.log and writes immediately. Co-Authored-By: Claude Opus 4.6 (1M context) --- content_filter_proxy.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/content_filter_proxy.py b/content_filter_proxy.py index 405eb98..435430b 100644 --- a/content_filter_proxy.py +++ b/content_filter_proxy.py @@ -29,16 +29,13 @@ LISTEN_HOST = os.environ.get("PROXY_HOST", "127.0.0.1") LISTEN_PORT = int(os.environ.get("PROXY_PORT", "4000")) -# Diagnostic logging — writes to ~/.content-filter-proxy-debug.log -_home = os.environ.get("HOME", "/app/python/source_code") -_log_path = os.path.join(_home, ".content-filter-proxy-debug.log") +# Diagnostic logging — writes to stderr which goes to ~/.content-filter-proxy.log log = logging.getLogger("content-filter-proxy") log.setLevel(logging.INFO) -# Explicitly add file handler (basicConfig fails if root logger already configured) if not log.handlers: - _fh = logging.FileHandler(_log_path) - _fh.setFormatter(logging.Formatter("%(asctime)s %(message)s")) - log.addHandler(_fh) + _sh = logging.StreamHandler(sys.stderr) + _sh.setFormatter(logging.Formatter("%(asctime)s %(message)s")) + log.addHandler(_sh) # JSON Schema keywords that Gemini doesn't support GEMINI_UNSUPPORTED_SCHEMA_KEYS = { From 951e5204695daf29d709d5fa17232289c79c73a9 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:46:04 -0400 Subject: [PATCH 14/21] fix: kill stale proxy from previous deploy before starting new one On redeploy, the old proxy keeps running on port 4000 from the previous container init. New proxy crashes with "Address already in use". Now reads PID file and kills the old process first. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_litellm.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/setup_litellm.py b/setup_litellm.py index 8d29682..ea514a2 100644 --- a/setup_litellm.py +++ b/setup_litellm.py @@ -10,6 +10,7 @@ See docs/plans/2026-03-11-litellm-empty-content-blocks-design.md """ import os +import signal import sys import time import subprocess @@ -30,6 +31,23 @@ home = Path(os.environ["HOME"]) +# Kill any existing proxy (from previous deploy) before starting new one +pid_path = home / ".content-filter-proxy.pid" +if pid_path.exists(): + try: + old_pid = int(pid_path.read_text().strip()) + os.kill(old_pid, signal.SIGTERM) + time.sleep(1) + # Force kill if still running + try: + os.kill(old_pid, signal.SIGKILL) + except ProcessLookupError: + pass + print(f"Killed previous proxy (PID: {old_pid})") + except (ValueError, ProcessLookupError, PermissionError): + pass + pid_path.unlink(missing_ok=True) + # Databricks configuration gateway_host = ensure_https(os.environ.get("DATABRICKS_GATEWAY_HOST", "").rstrip("/")) host = ensure_https(os.environ.get("DATABRICKS_HOST", "").rstrip("/")) From 1684dc789e99b2d4cd31fab828db1e456f2ac9ff Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:51:52 -0400 Subject: [PATCH 15/21] fix: kill stale proxy by port instead of PID file PID file approach fails when old process has a different PID than recorded. Use fuser -k or lsof to find and kill whatever is listening on port 4000 before starting new proxy. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_litellm.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/setup_litellm.py b/setup_litellm.py index ea514a2..ed03f8b 100644 --- a/setup_litellm.py +++ b/setup_litellm.py @@ -31,22 +31,36 @@ home = Path(os.environ["HOME"]) -# Kill any existing proxy (from previous deploy) before starting new one -pid_path = home / ".content-filter-proxy.pid" -if pid_path.exists(): - try: - old_pid = int(pid_path.read_text().strip()) - os.kill(old_pid, signal.SIGTERM) +# Kill any existing proxy on our port (more reliable than PID file) +try: + result = subprocess.run( + ["fuser", "-k", f"{PROXY_PORT}/tcp"], + capture_output=True, text=True, timeout=5 + ) + if result.returncode == 0: + print(f"Killed previous process on port {PROXY_PORT}") time.sleep(1) - # Force kill if still running - try: - os.kill(old_pid, signal.SIGKILL) - except ProcessLookupError: - pass - print(f"Killed previous proxy (PID: {old_pid})") - except (ValueError, ProcessLookupError, PermissionError): +except (FileNotFoundError, subprocess.TimeoutExpired): + # fuser not available, try lsof + try: + result = subprocess.run( + ["lsof", "-ti", f":{PROXY_PORT}"], + capture_output=True, text=True, timeout=5 + ) + for pid in result.stdout.strip().split(): + try: + os.kill(int(pid), signal.SIGKILL) + print(f"Killed previous proxy (PID: {pid})") + except (ValueError, ProcessLookupError): + pass + if result.stdout.strip(): + time.sleep(1) + except (FileNotFoundError, subprocess.TimeoutExpired): pass - pid_path.unlink(missing_ok=True) + +# Clean up stale PID file +pid_path = home / ".content-filter-proxy.pid" +pid_path.unlink(missing_ok=True) # Databricks configuration gateway_host = ensure_https(os.environ.get("DATABRICKS_GATEWAY_HOST", "").rstrip("/")) From 396dbb86bcc953d07726381ab6d157714858d398 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 17:56:41 -0400 Subject: [PATCH 16/21] fix: replace empty assistant content with placeholder instead of keeping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Message [15] had assistant: str(0 chars) — empty string content that the API rejects. We were preserving it to avoid breaking alternation, but the API rejects it anyway. Now replaces with '.' as minimal valid content. Also handles null content on assistant messages without tool_calls. Co-Authored-By: Claude Opus 4.6 (1M context) --- content_filter_proxy.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/content_filter_proxy.py b/content_filter_proxy.py index 435430b..d830320 100644 --- a/content_filter_proxy.py +++ b/content_filter_proxy.py @@ -241,9 +241,17 @@ def _sanitize_single_pass(messages, pass_num): log.info(f" pass {pass_num}: strip orphaned tool msg[{i}] {tool_call_id} (prev_ids={prev_tool_ids})") continue - # --- Handle empty string content --- + # --- Handle empty/null string content --- + elif content is None and role == "assistant" and not msg.get("tool_calls"): + # Assistant message with null content and no tool_calls — replace + log.info(f" pass {pass_num}: replace null assistant content msg[{i}] with placeholder") + msg = {**msg, "content": "."} elif isinstance(content, str) and content.strip() == "": - if role != "assistant": + if role == "assistant": + # Can't drop assistant messages (breaks alternation), replace with minimal content + log.info(f" pass {pass_num}: replace empty assistant string msg[{i}] with placeholder") + msg = {**msg, "content": "."} + else: log.info(f" pass {pass_num}: strip empty string {role} msg[{i}]") continue From c023699ca7ce7a68c48dba43187912ef3900b1ad Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 18:33:00 -0400 Subject: [PATCH 17/21] feat: add DeepWiki and Exa MCP servers to OpenCode config Adds remote MCP servers for both gateway and fallback configs: - DeepWiki: AI-powered docs for any GitHub repo - Exa: web search and code context retrieval Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_opencode.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/setup_opencode.py b/setup_opencode.py index ff6e9e3..cd85ac3 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -162,6 +162,19 @@ } } }, + "mcp": { + "deepwiki": { + "type": "remote", + "url": "https://mcp.deepwiki.com/sse", + "enabled": True, + "oauth": False + }, + "exa": { + "type": "remote", + "url": "https://mcp.exa.ai/mcp", + "enabled": True + } + }, "model": f"databricks/{anthropic_model}" } else: @@ -216,6 +229,19 @@ } } }, + "mcp": { + "deepwiki": { + "type": "remote", + "url": "https://mcp.deepwiki.com/sse", + "enabled": True, + "oauth": False + }, + "exa": { + "type": "remote", + "url": "https://mcp.exa.ai/mcp", + "enabled": True + } + }, "model": f"databricks/{anthropic_model}" } From a03d981fb066955cabc5f07fdb570e5db8f6bd7f Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 18:44:35 -0400 Subject: [PATCH 18/21] refactor: remove all LiteLLM references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - setup_litellm.py → setup_proxy.py - LITELLM_PROXY_URL → CONTENT_FILTER_PROXY_URL - Step ID "litellm" → "proxy" Co-Authored-By: Claude Opus 4.6 (1M context) --- app.py | 4 ++-- setup_opencode.py | 6 +++--- setup_litellm.py => setup_proxy.py | 0 3 files changed, 5 insertions(+), 5 deletions(-) rename setup_litellm.py => setup_proxy.py (100%) diff --git a/app.py b/app.py index 889a9f7..dc99953 100644 --- a/app.py +++ b/app.py @@ -83,7 +83,7 @@ def handle_sigterm(signum, frame): "steps": [ {"id": "git", "label": "Configuring git identity", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "micro", "label": "Installing micro editor", "status": "pending", "started_at": None, "completed_at": None, "error": None}, - {"id": "litellm", "label": "Starting content-filter proxy", "status": "pending", "started_at": None, "completed_at": None, "error": None}, + {"id": "proxy", "label": "Starting content-filter proxy", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "claude", "label": "Configuring Claude CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "codex", "label": "Configuring Codex CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, {"id": "opencode", "label": "Configuring OpenCode CLI", "status": "pending", "started_at": None, "completed_at": None, "error": None}, @@ -255,7 +255,7 @@ def run_setup(): # --- Content-filter proxy (must be running before OpenCode starts) --- # Sanitizes requests/responses between OpenCode and Databricks # (see OpenCode #5028, docs/plans/2026-03-11-litellm-empty-content-blocks-design.md) - _run_step("litellm", ["python", "setup_litellm.py"]) + _run_step("proxy", ["python", "setup_proxy.py"]) # --- Parallel agent setup (all independent of each other) --- parallel_steps = [ diff --git a/setup_opencode.py b/setup_opencode.py index cd85ac3..f4f573c 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -15,7 +15,7 @@ # content-filter proxy local proxy — sanitizes empty content blocks before reaching Databricks # (see https://github.com/sst/opencode/issues/5028) -LITELLM_PROXY_URL = "http://127.0.0.1:4000" +CONTENT_FILTER_PROXY_URL = "http://127.0.0.1:4000" # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -95,7 +95,7 @@ "npm": "@ai-sdk/openai-compatible", "name": "Databricks AI Gateway (via content-filter proxy)", "options": { - "baseURL": LITELLM_PROXY_URL, + "baseURL": CONTENT_FILTER_PROXY_URL, "apiKey": "{env:DATABRICKS_TOKEN}" }, "models": { @@ -187,7 +187,7 @@ "npm": "@ai-sdk/openai-compatible", "name": "Databricks Model Serving (via content-filter proxy)", "options": { - "baseURL": LITELLM_PROXY_URL, + "baseURL": CONTENT_FILTER_PROXY_URL, "apiKey": "{env:DATABRICKS_TOKEN}" }, "models": { diff --git a/setup_litellm.py b/setup_proxy.py similarity index 100% rename from setup_litellm.py rename to setup_proxy.py From d91aa174bf83b3059da6646a1ac024a85f58798b Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 18:54:19 -0400 Subject: [PATCH 19/21] fix: update DeepWiki MCP URL from /sse to /mcp DeepWiki moved from SSE to StreamableHTTP transport. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup_opencode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup_opencode.py b/setup_opencode.py index f4f573c..8ec404c 100644 --- a/setup_opencode.py +++ b/setup_opencode.py @@ -165,7 +165,7 @@ "mcp": { "deepwiki": { "type": "remote", - "url": "https://mcp.deepwiki.com/sse", + "url": "https://mcp.deepwiki.com/mcp", "enabled": True, "oauth": False }, @@ -232,7 +232,7 @@ "mcp": { "deepwiki": { "type": "remote", - "url": "https://mcp.deepwiki.com/sse", + "url": "https://mcp.deepwiki.com/mcp", "enabled": True, "oauth": False }, From d1ad4e2c3cad1af1cc9fad1eb2890fee1658da49 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 19:15:47 -0400 Subject: [PATCH 20/21] security: fail closed when app_owner can't be resolved (#57) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the Databricks SDK fails to resolve the token owner at startup, auth was failing open — allowing unauthenticated access to the terminal and all coding agents. Now fails closed on Databricks Apps: if app_owner is None, deny all access. Fail-open is only allowed for local development (detected by absence of DATABRICKS_APP_PORT and /app/python/source_code). Also denies access when no user identity is in the request headers on Databricks Apps (shouldn't happen, but defense in depth). Fixes #57 Co-Authored-By: Claude Opus 4.6 (1M context) --- app.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index dc99953..12fb96a 100644 --- a/app.py +++ b/app.py @@ -302,16 +302,33 @@ def get_request_user(): request.headers.get("X-Databricks-User-Email") +def _is_databricks_apps(): + """Detect if we're running on Databricks Apps (not local dev).""" + return os.environ.get("DATABRICKS_APP_PORT") or os.path.isdir("/app/python/source_code") + + def check_authorization(): - """Check if the current user is authorized to access the app.""" - # If owner not set (local dev or SDK unavailable), allow access + """Check if the current user is authorized to access the app. + + Fails CLOSED on Databricks Apps: if we can't determine the owner, + deny all access rather than allowing unauthenticated terminal access. + Fails open only for local development. + Fixes: https://github.com/datasciencemonkey/coding-agents-databricks-apps/issues/57 + """ + # Fail closed on Databricks Apps if owner couldn't be resolved if not app_owner: - return True, None + if _is_databricks_apps(): + logger.error("SECURITY: app_owner not resolved — denying all access (fail-closed)") + return False, "unknown" + return True, None # Local dev only current_user = get_request_user() # If no user identity in request (local dev), allow access if not current_user: + if _is_databricks_apps(): + logger.warning("No user identity in request on Databricks Apps — denying access") + return False, "unknown" return True, None # Check if current user is the owner From 30a9e4eb5f6f0ee048214713b6000388c1249057 Mon Sep 17 00:00:00 2001 From: Sathish Gangichetty Date: Wed, 11 Mar 2026 19:17:19 -0400 Subject: [PATCH 21/21] security: add Content-Security-Policy header (#58) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds CSP that: - Restricts scripts to 'self' + 'unsafe-inline' (inline