From 6ef93cebcc211636f42657985a468172f769d82a Mon Sep 17 00:00:00 2001 From: Sathish Krishnan <10681383+SatyKrish@users.noreply.github.com> Date: Sat, 25 Apr 2026 20:58:30 -0400 Subject: [PATCH] =?UTF-8?q?Fix=20README=E2=86=92bundle=20warehouse=20drift?= =?UTF-8?q?;=20materialize=20VS=20index=20on=20first=20deploy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb DOCINTEL_WAREHOUSE_ID through bundle as --var warehouse_id in bootstrap + CI so dashboard/serving env match wait_for_kpis and log_and_register, instead of falling back to the lookup-by-name default that fails in workspaces lacking "Serverless Starter Warehouse". Add an explicit `bundle run index_refresh` after stage-2 first-deploy because the table_update trigger doesn't fire retroactively on rows already produced by the stage-2 pipeline run. README fixes: include pytest in install line, add DOCINTEL_WAREHOUSE_ID + .venv/bin/python to the steady-state agent registration command, and correct the env-var-table description (the var isn't read by the eval). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/deploy.yml | 13 +++++++++---- README.md | 10 ++++++---- scripts/bootstrap-dev.sh | 28 ++++++++++++++++++++++------ 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index d91b697..2609a2d 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,8 @@ jobs: env: DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} - run: databricks bundle validate --strict -t dev + DOCINTEL_WAREHOUSE_ID: ${{ vars.DOCINTEL_WAREHOUSE_ID }} + run: databricks bundle validate --strict -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" deploy-dev: # CI assumes steady-state: the first-ever bring-up of a workspace must be @@ -47,7 +48,11 @@ jobs: run: pip install -r agent/requirements.txt -r evals/requirements.txt - name: Deploy bundle (full — consumers already exist in steady-state) - run: databricks bundle deploy -t dev + # Pin warehouse_id so the dashboard + serving env match what + # wait_for_kpis / log_and_register use. Without --var, the bundle + # falls back to its `lookup: warehouse: Serverless Starter Warehouse` + # default and silently picks a different ID. + run: databricks bundle deploy -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" - name: Wait for Lakebase instance to be AVAILABLE # Lakebase already exists in steady-state but a config change can @@ -83,7 +88,7 @@ while True: "dbfs:/Volumes/${DOCINTEL_CATALOG}/${DOCINTEL_SCHEMA}/raw_filings/" \ --overwrite done - databricks bundle run -t dev doc_intel_pipeline + databricks bundle run -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" doc_intel_pipeline python scripts/wait_for_kpis.py --min-rows 3 --timeout 900 # --serving-endpoint repoints the existing endpoint to the new # model version in-place (steady-state idempotent operation). @@ -107,7 +112,7 @@ while True: # Databricks Apps deploy docs: # https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy # `bundle deploy` alone uploads code but doesn't apply config/restart. - run: databricks bundle run -t dev analyst_app + run: databricks bundle run -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" analyst_app - name: Verify OBO scopes survived deploy # `bundle run` may wipe user_api_scopes (documented destructive-update diff --git a/README.md b/README.md index 471ae7b..030ee52 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ databricks auth profiles # verify the DEFAULT profile is configured git clone https://github.com//databricks-document-intelligence-agent.git cd databricks-document-intelligence-agent python -m venv .venv -.venv/bin/pip install -r agent/requirements.txt -r evals/requirements.txt +.venv/bin/pip install -r agent/requirements.txt -r evals/requirements.txt pytest ``` ### 2. Discover your workspace IDs @@ -213,8 +213,10 @@ databricks bundle run -t dev analyst_app # apply app config # Agent code changes (agent/*.py): register a new model version # and repoint the existing serving endpoint in-place. -DOCINTEL_CATALOG=workspace DOCINTEL_SCHEMA=docintel_10k_dev \ - python agent/log_and_register.py --target dev --serving-endpoint analyst-agent-dev +DOCINTEL_CATALOG=workspace \ +DOCINTEL_SCHEMA=docintel_10k_dev \ +DOCINTEL_WAREHOUSE_ID= \ + .venv/bin/python agent/log_and_register.py --target dev --serving-endpoint analyst-agent-dev # Pipeline SQL changes that need to re-process existing filings databricks bundle run -t dev doc_intel_pipeline @@ -596,7 +598,7 @@ Override via `--var name=value` on any `bundle` command. |---|---|---| | `DOCINTEL_CATALOG` | yes | Bootstrap, CI, eval | | `DOCINTEL_SCHEMA` | yes | Same | -| `DOCINTEL_WAREHOUSE_ID` | yes | Bootstrap kpi-poll, eval slicer | +| `DOCINTEL_WAREHOUSE_ID` | yes | Bootstrap (passed to bundle as `--var warehouse_id`, used by kpi-poll + smoke); `agent/log_and_register.py` (auth-policy SQL warehouse resource); `agent/tools.py` UC Function tool | | `DOCINTEL_TARGET` | no (default `dev`) | Bootstrap | | `DOCINTEL_ANALYST_GROUP` | no (default `account users`) | UC grants in bootstrap + CI | | `DOCINTEL_WAIT_SECONDS` | no (default 600) | Bootstrap KPI-table poll timeout | diff --git a/scripts/bootstrap-dev.sh b/scripts/bootstrap-dev.sh index 61bbc3c..7ee3cf2 100755 --- a/scripts/bootstrap-dev.sh +++ b/scripts/bootstrap-dev.sh @@ -66,6 +66,13 @@ if [[ "${DOCINTEL_FORCE_LOCK:-0}" == "1" ]]; then DEPLOY_FLAGS+=(--force-lock) fi +# Pin the bundle's `warehouse_id` variable to the user-selected ID so the +# dashboard + serving-endpoint env match wait_for_kpis / log_and_register. +# Without this, the bundle falls back to its `lookup: warehouse: Serverless +# Starter Warehouse` default — which fails validation in workspaces lacking +# that named warehouse, and silently picks a different ID otherwise. +VAR_FLAGS=(--var "warehouse_id=$DOCINTEL_WAREHOUSE_ID") + REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$REPO_ROOT" export PYTHONPATH="$REPO_ROOT${PYTHONPATH:+:$PYTHONPATH}" @@ -209,7 +216,7 @@ if [[ "$MODE" == "first" ]]; then mv "$f" "$f.skip" done - databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \ + databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \ die "stage-1 deploy failed (foundation should be self-contained — investigate)" restore_consumers @@ -217,7 +224,7 @@ if [[ "$MODE" == "first" ]]; then log "step 2/6: producing data" upload_samples - databricks bundle run -t "$TARGET" "$PIPELINE_KEY" || \ + databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" "$PIPELINE_KEY" || \ die "pipeline run failed — inspect SDP UI before retrying" "$PYTHON" scripts/wait_for_kpis.py --min-rows 1 --timeout "$WAIT_SECONDS" || \ die "timed out waiting for $KPI_TABLE" @@ -226,18 +233,27 @@ if [[ "$MODE" == "first" ]]; then wait_for_lakebase_available log "step 3/6: stage-2 deploy (full bundle — consumers join the foundation)" - databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \ + databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \ die "stage-2 deploy failed; check logs" + # The index_refresh job is created by stage-2 deploy and is `table_update`- + # triggered. Triggers do not fire retroactively on the rows the pipeline + # produced in stage 2, so we have to materialize the Vector Search index + # explicitly the first time. sync_index.py is create-if-missing/sync-if- + # exists, so this is idempotent on subsequent runs. + log "step 3.5/6: triggering initial Vector Search index materialization" + databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" index_refresh || \ + log " warn: index_refresh failed; the table_update trigger will retry on the next pipeline run" + else # ─── Steady-state path: single full deploy + in-place data refresh ──────── log "step 1/6: full bundle deploy (steady-state — consumers already exist)" - databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \ + databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \ die "bundle deploy failed; if a prior deploy was interrupted, set DOCINTEL_FORCE_LOCK=1 and retry" log "step 2/6: refreshing data + repointing serving endpoint" upload_samples - databricks bundle run -t "$TARGET" "$PIPELINE_KEY" || \ + databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" "$PIPELINE_KEY" || \ die "pipeline run failed — inspect SDP UI before retrying" "$PYTHON" scripts/wait_for_kpis.py --min-rows 1 --timeout "$WAIT_SECONDS" || \ die "timed out waiting for $KPI_TABLE" @@ -250,7 +266,7 @@ fi # ─── Step 4: app run (both paths) ──────────────────────────────────────────── log "step 4/6: applying app config + restart" -databricks bundle run -t "$TARGET" analyst_app || \ +databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" analyst_app || \ log " warn: analyst_app run failed; retry manually with 'databricks bundle run -t $TARGET analyst_app'" # ─── Step 5: UC grants (idempotent) ──────────────────────────────────────────