From 6ef93cebcc211636f42657985a468172f769d82a Mon Sep 17 00:00:00 2001
From: Sathish Krishnan <10681383+SatyKrish@users.noreply.github.com>
Date: Sat, 25 Apr 2026 20:58:30 -0400
Subject: [PATCH] =?UTF-8?q?Fix=20README=E2=86=92bundle=20warehouse=20drift?=
 =?UTF-8?q?;=20materialize=20VS=20index=20on=20first=20deploy?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Plumb DOCINTEL_WAREHOUSE_ID through bundle as --var warehouse_id in
bootstrap + CI so dashboard/serving env match wait_for_kpis and
log_and_register, instead of falling back to the lookup-by-name default
that fails in workspaces lacking "Serverless Starter Warehouse".

Add an explicit `bundle run index_refresh` after stage-2 first-deploy
because the table_update trigger doesn't fire retroactively on rows
already produced by the stage-2 pipeline run.

README fixes: include pytest in install line, add DOCINTEL_WAREHOUSE_ID
+ .venv/bin/python to the steady-state agent registration command, and
correct the env-var-table description (the var isn't read by the eval).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/deploy.yml | 13 +++++++++----
 README.md                    | 10 ++++++----
 scripts/bootstrap-dev.sh     | 28 ++++++++++++++++++++++------
 3 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index d91b697..2609a2d 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -17,7 +17,8 @@ jobs:
         env:
           DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
           DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
-        run: databricks bundle validate --strict -t dev
+          DOCINTEL_WAREHOUSE_ID: ${{ vars.DOCINTEL_WAREHOUSE_ID }}
+        run: databricks bundle validate --strict -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID"
 
   deploy-dev:
     # CI assumes steady-state: the first-ever bring-up of a workspace must be
@@ -47,7 +48,11 @@ jobs:
         run: pip install -r agent/requirements.txt -r evals/requirements.txt
 
       - name: Deploy bundle (full — consumers already exist in steady-state)
-        run: databricks bundle deploy -t dev
+        # Pin warehouse_id so the dashboard + serving env match what
+        # wait_for_kpis / log_and_register use. Without --var, the bundle
+        # falls back to its `lookup: warehouse: Serverless Starter Warehouse`
+        # default and silently picks a different ID.
+        run: databricks bundle deploy -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID"
 
       - name: Wait for Lakebase instance to be AVAILABLE
         # Lakebase already exists in steady-state but a config change can
@@ -83,7 +88,7 @@ while True:
               "dbfs:/Volumes/${DOCINTEL_CATALOG}/${DOCINTEL_SCHEMA}/raw_filings/" \
               --overwrite
           done
-          databricks bundle run -t dev doc_intel_pipeline
+          databricks bundle run -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" doc_intel_pipeline
           python scripts/wait_for_kpis.py --min-rows 3 --timeout 900
           # --serving-endpoint repoints the existing endpoint to the new
           # model version in-place (steady-state idempotent operation).
@@ -107,7 +112,7 @@ while True:
         # Databricks Apps deploy docs:
         # https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy
         # `bundle deploy` alone uploads code but doesn't apply config/restart.
-        run: databricks bundle run -t dev analyst_app
+        run: databricks bundle run -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" analyst_app
 
       - name: Verify OBO scopes survived deploy
         # `bundle run` may wipe user_api_scopes (documented destructive-update
diff --git a/README.md b/README.md
index 471ae7b..030ee52 100644
--- a/README.md
+++ b/README.md
@@ -149,7 +149,7 @@ databricks auth profiles   # verify the DEFAULT profile is configured
 git clone https://github.com/<your-fork>/databricks-document-intelligence-agent.git
 cd databricks-document-intelligence-agent
 python -m venv .venv
-.venv/bin/pip install -r agent/requirements.txt -r evals/requirements.txt
+.venv/bin/pip install -r agent/requirements.txt -r evals/requirements.txt pytest
 ```
 
 ### 2. Discover your workspace IDs
@@ -213,8 +213,10 @@ databricks bundle run -t dev analyst_app                      # apply app config
 
 # Agent code changes (agent/*.py): register a new model version
 # and repoint the existing serving endpoint in-place.
-DOCINTEL_CATALOG=workspace DOCINTEL_SCHEMA=docintel_10k_dev \
-  python agent/log_and_register.py --target dev --serving-endpoint analyst-agent-dev
+DOCINTEL_CATALOG=workspace \
+DOCINTEL_SCHEMA=docintel_10k_dev \
+DOCINTEL_WAREHOUSE_ID=<from-step-2> \
+  .venv/bin/python agent/log_and_register.py --target dev --serving-endpoint analyst-agent-dev
 
 # Pipeline SQL changes that need to re-process existing filings
 databricks bundle run -t dev doc_intel_pipeline
@@ -596,7 +598,7 @@ Override via `--var name=value` on any `bundle` command.
 |---|---|---|
 | `DOCINTEL_CATALOG` | yes | Bootstrap, CI, eval |
 | `DOCINTEL_SCHEMA` | yes | Same |
-| `DOCINTEL_WAREHOUSE_ID` | yes | Bootstrap kpi-poll, eval slicer |
+| `DOCINTEL_WAREHOUSE_ID` | yes | Bootstrap (passed to bundle as `--var warehouse_id`, used by kpi-poll + smoke); `agent/log_and_register.py` (auth-policy SQL warehouse resource); `agent/tools.py` UC Function tool |
 | `DOCINTEL_TARGET` | no (default `dev`) | Bootstrap |
 | `DOCINTEL_ANALYST_GROUP` | no (default `account users`) | UC grants in bootstrap + CI |
 | `DOCINTEL_WAIT_SECONDS` | no (default 600) | Bootstrap KPI-table poll timeout |
diff --git a/scripts/bootstrap-dev.sh b/scripts/bootstrap-dev.sh
index 61bbc3c..7ee3cf2 100755
--- a/scripts/bootstrap-dev.sh
+++ b/scripts/bootstrap-dev.sh
@@ -66,6 +66,13 @@ if [[ "${DOCINTEL_FORCE_LOCK:-0}" == "1" ]]; then
   DEPLOY_FLAGS+=(--force-lock)
 fi
 
+# Pin the bundle's `warehouse_id` variable to the user-selected ID so the
+# dashboard + serving-endpoint env match wait_for_kpis / log_and_register.
+# Without this, the bundle falls back to its `lookup: warehouse: Serverless
+# Starter Warehouse` default — which fails validation in workspaces lacking
+# that named warehouse, and silently picks a different ID otherwise.
+VAR_FLAGS=(--var "warehouse_id=$DOCINTEL_WAREHOUSE_ID")
+
 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
 cd "$REPO_ROOT"
 export PYTHONPATH="$REPO_ROOT${PYTHONPATH:+:$PYTHONPATH}"
@@ -209,7 +216,7 @@ if [[ "$MODE" == "first" ]]; then
     mv "$f" "$f.skip"
   done
 
-  databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \
+  databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \
     die "stage-1 deploy failed (foundation should be self-contained — investigate)"
 
   restore_consumers
@@ -217,7 +224,7 @@ if [[ "$MODE" == "first" ]]; then
 
   log "step 2/6: producing data"
   upload_samples
-  databricks bundle run -t "$TARGET" "$PIPELINE_KEY" || \
+  databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" "$PIPELINE_KEY" || \
     die "pipeline run failed — inspect SDP UI before retrying"
   "$PYTHON" scripts/wait_for_kpis.py --min-rows 1 --timeout "$WAIT_SECONDS" || \
     die "timed out waiting for $KPI_TABLE"
@@ -226,18 +233,27 @@ if [[ "$MODE" == "first" ]]; then
   wait_for_lakebase_available
 
   log "step 3/6: stage-2 deploy (full bundle — consumers join the foundation)"
-  databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \
+  databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \
     die "stage-2 deploy failed; check logs"
 
+  # The index_refresh job is created by stage-2 deploy and is `table_update`-
+  # triggered. Triggers do not fire retroactively on the rows the pipeline
+  # produced in stage 2, so we have to materialize the Vector Search index
+  # explicitly the first time. sync_index.py is create-if-missing/sync-if-
+  # exists, so this is idempotent on subsequent runs.
+  log "step 3.5/6: triggering initial Vector Search index materialization"
+  databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" index_refresh || \
+    log "  warn: index_refresh failed; the table_update trigger will retry on the next pipeline run"
+
 else
   # ─── Steady-state path: single full deploy + in-place data refresh ────────
   log "step 1/6: full bundle deploy (steady-state — consumers already exist)"
-  databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \
+  databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \
     die "bundle deploy failed; if a prior deploy was interrupted, set DOCINTEL_FORCE_LOCK=1 and retry"
 
   log "step 2/6: refreshing data + repointing serving endpoint"
   upload_samples
-  databricks bundle run -t "$TARGET" "$PIPELINE_KEY" || \
+  databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" "$PIPELINE_KEY" || \
     die "pipeline run failed — inspect SDP UI before retrying"
   "$PYTHON" scripts/wait_for_kpis.py --min-rows 1 --timeout "$WAIT_SECONDS" || \
     die "timed out waiting for $KPI_TABLE"
@@ -250,7 +266,7 @@ fi
 
 # ─── Step 4: app run (both paths) ────────────────────────────────────────────
 log "step 4/6: applying app config + restart"
-databricks bundle run -t "$TARGET" analyst_app || \
+databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" analyst_app || \
   log "  warn: analyst_app run failed; retry manually with 'databricks bundle run -t $TARGET analyst_app'"
 
 # ─── Step 5: UC grants (idempotent) ──────────────────────────────────────────