From 855c89e8decf2a7b32fd95c1f2752bb5c99bc78a Mon Sep 17 00:00:00 2001
From: Steven Tan <cheeyutcy@gmail.com>
Date: Thu, 5 Mar 2026 20:57:43 +0800
Subject: [PATCH 1/3] Improve Unity Catalog, Structured Streaming, Vector
 Search skills; add Terraform skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unity Catalog (addresses #103):
- Expand SKILL.md from 120 to 170+ lines with full MCP tool table and governance quick start
- Add 4 new reference files documenting 7 previously undocumented MCP tools:
  1-objects-and-governance.md (manage_uc_objects, manage_uc_grants)
  2-tags-and-classification.md (manage_uc_tags)
  3-security-policies.md (manage_uc_security_policies)
  4-sharing-and-federation.md (manage_uc_sharing, manage_uc_connections, manage_uc_storage)

Structured Streaming:
- Expand SKILL.md from 66 to 247 lines — was just a table of contents despite having 8 good reference files
- Add actionable quick starts (Kafka-to-Delta, foreachBatch MERGE, availableNow)
- Add trigger selection guide, watermark essentials, join patterns, common issues table

Vector Search (addresses #106):
- Fix MCP tool names: replace non-existent consolidated tools with actual individual tools
  (create_vs_endpoint, list_vs_endpoints, sync_vs_index, upsert_vs_data, etc.)
- Add end-to-end-rag.md: complete walkthrough from source table to agent integration
- Add columns_to_sync and filter syntax guidance

Terraform (addresses #145):
- New skill with SKILL.md + 4 reference files (1,478 lines total)
- Covers provider auth (AWS/Azure/GCP), core resources, Unity Catalog IaC, best practices
- Includes modules, CI/CD patterns, state management, and multi-environment structure

All MCP tool examples were validated against a live Databricks workspace.
---
 .../SKILL.md                                  | 244 ++++++++++--
 .../1-provider-and-auth.md                    | 207 ++++++++++
 .../databricks-terraform/2-core-resources.md  | 350 +++++++++++++++++
 .../databricks-terraform/3-unity-catalog.md   | 336 ++++++++++++++++
 .../databricks-terraform/4-best-practices.md  | 369 ++++++++++++++++++
 .../databricks-terraform/SKILL.md             | 216 ++++++++++
 .../1-objects-and-governance.md               | 311 +++++++++++++++
 .../2-tags-and-classification.md              | 205 ++++++++++
 .../3-security-policies.md                    | 189 +++++++++
 .../4-sharing-and-federation.md               | 314 +++++++++++++++
 .../databricks-unity-catalog/SKILL.md         | 166 +++++---
 .../databricks-vector-search/SKILL.md         |  34 +-
 .../end-to-end-rag.md                         | 250 ++++++++++++
 13 files changed, 3090 insertions(+), 101 deletions(-)
 create mode 100644 databricks-skills/databricks-terraform/1-provider-and-auth.md
 create mode 100644 databricks-skills/databricks-terraform/2-core-resources.md
 create mode 100644 databricks-skills/databricks-terraform/3-unity-catalog.md
 create mode 100644 databricks-skills/databricks-terraform/4-best-practices.md
 create mode 100644 databricks-skills/databricks-terraform/SKILL.md
 create mode 100644 databricks-skills/databricks-unity-catalog/1-objects-and-governance.md
 create mode 100644 databricks-skills/databricks-unity-catalog/2-tags-and-classification.md
 create mode 100644 databricks-skills/databricks-unity-catalog/3-security-policies.md
 create mode 100644 databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md
 create mode 100644 databricks-skills/databricks-vector-search/end-to-end-rag.md

diff --git a/databricks-skills/databricks-spark-structured-streaming/SKILL.md b/databricks-skills/databricks-spark-structured-streaming/SKILL.md
index b1f59306..2d39d3b5 100644
--- a/databricks-skills/databricks-spark-structured-streaming/SKILL.md
+++ b/databricks-skills/databricks-spark-structured-streaming/SKILL.md
@@ -1,23 +1,59 @@
 ---
 name: databricks-spark-structured-streaming
-description: Comprehensive guide to Spark Structured Streaming for production workloads. Use when building streaming pipelines, implementing real-time data processing, handling stateful operations, or optimizing streaming performance.
+description: "Comprehensive guide to Spark Structured Streaming for production workloads. Use when building streaming pipelines, implementing real-time data processing, handling stateful operations, or optimizing streaming performance."
 ---
 
 # Spark Structured Streaming
 
-Production-ready streaming pipelines with Spark Structured Streaming. This skill provides navigation to detailed patterns and best practices.
+Production-ready streaming pipelines with Spark Structured Streaming on Databricks.
 
-## Quick Start
+## When to Use This Skill
+
+Use this skill when:
+- Building **Kafka-to-Delta** or **Kafka-to-Kafka** streaming pipelines
+- Implementing **stream-stream joins** or **stream-static joins**
+- Configuring **watermarks**, **state stores**, or **RocksDB** for stateful operations
+- Choosing between **processingTime**, **availableNow**, and **Real-Time Mode** triggers
+- Optimizing **streaming costs** (trigger tuning, cluster sizing, scheduled streaming)
+- Writing **foreachBatch MERGE** patterns for upserts
+- Managing **checkpoints** (location, recovery, migration)
+- Troubleshooting streaming issues (lag, state bloat, checkpoint corruption)
+
+## Reference Files
+
+| Topic | File | When to Read |
+|-------|------|--------------|
+| Kafka Streaming | [kafka-streaming.md](kafka-streaming.md) | Kafka-to-Delta ingestion, Kafka-to-Kafka, Real-Time Mode, authentication |
+| Stream-Stream Joins | [stream-stream-joins.md](stream-stream-joins.md) | Joining two streams with watermarks and time-range conditions |
+| Stream-Static Joins | [stream-static-joins.md](stream-static-joins.md) | Enriching streams with dimension tables, broadcast hints |
+| Multi-Sink Writes | [multi-sink-writes.md](multi-sink-writes.md) | Writing one stream to multiple Delta tables in parallel |
+| Merge Operations | [merge-operations.md](merge-operations.md) | foreachBatch MERGE, parallel merges, deduplication |
+| Checkpoints | [checkpoint-best-practices.md](checkpoint-best-practices.md) | Checkpoint location, recovery, migration, cleanup |
+| Stateful Operations | [stateful-operations.md](stateful-operations.md) | Watermarks, state stores, RocksDB, state monitoring |
+| Triggers & Cost | [trigger-and-cost-optimization.md](trigger-and-cost-optimization.md) | Trigger selection, cost optimization, cluster right-sizing |
+| Best Practices | [streaming-best-practices.md](streaming-best-practices.md) | Production checklist, beginner through expert tips |
+
+---
+
+## Quick Start: Kafka to Delta
 
 ```python
 from pyspark.sql.functions import col, from_json
+from pyspark.sql.types import StructType, StructField, StringType, TimestampType
+
+schema = StructType([
+    StructField("event_id", StringType()),
+    StructField("user_id", StringType()),
+    StructField("event_type", StringType()),
+    StructField("event_time", TimestampType()),
+])
 
-# Basic Kafka to Delta streaming
-df = (spark
-    .readStream
+df = (spark.readStream
     .format("kafka")
     .option("kafka.bootstrap.servers", "broker:9092")
-    .option("subscribe", "topic")
+    .option("subscribe", "events")
+    .option("startingOffsets", "earliest")
+    .option("minPartitions", "6")
     .load()
     .select(from_json(col("value").cast("string"), schema).alias("data"))
     .select("data.*")
@@ -26,40 +62,186 @@ df = (spark
 df.writeStream \
     .format("delta") \
     .outputMode("append") \
-    .option("checkpointLocation", "/Volumes/catalog/checkpoints/stream") \
+    .option("checkpointLocation", "/Volumes/catalog/schema/checkpoints/events") \
     .trigger(processingTime="30 seconds") \
-    .start("/delta/target_table")
+    .toTable("catalog.schema.bronze_events")
+```
+
+## Quick Start: foreachBatch MERGE (Upserts)
+
+```python
+from delta.tables import DeltaTable
+
+def upsert_batch(batch_df, batch_id):
+    target = DeltaTable.forName(spark, "catalog.schema.customers")
+    (target.alias("t")
+        .merge(batch_df.alias("s"), "t.customer_id = s.customer_id")
+        .whenMatchedUpdateAll()
+        .whenNotMatchedInsertAll()
+        .execute())
+
+(spark.readStream
+    .format("kafka")
+    .option("kafka.bootstrap.servers", "broker:9092")
+    .option("subscribe", "customer-updates")
+    .load()
+    .select(from_json(col("value").cast("string"), customer_schema).alias("data"))
+    .select("data.*")
+    .writeStream
+    .foreachBatch(upsert_batch)
+    .option("checkpointLocation", "/Volumes/catalog/schema/checkpoints/customers")
+    .trigger(processingTime="1 minute")
+    .start()
+)
+```
+
+## Quick Start: availableNow (Scheduled Streaming)
+
+```python
+(spark.readStream
+    .format("cloudFiles")
+    .option("cloudFiles.format", "json")
+    .option("cloudFiles.schemaLocation", "/Volumes/catalog/schema/schemas/events")
+    .load("/Volumes/catalog/schema/landing/events/")
+    .writeStream
+    .format("delta")
+    .option("checkpointLocation", "/Volumes/catalog/schema/checkpoints/events")
+    .trigger(availableNow=True)
+    .toTable("catalog.schema.bronze_events")
+)
+```
+
+Schedule via Databricks Jobs every 15 minutes for near-real-time at a fraction of continuous cost.
+
+---
+
+## Trigger Selection Guide
+
+| Latency Requirement | Trigger | Cost | Use Case |
+|---------------------|---------|------|----------|
+| < 800ms | `realTime=True` | $$$ | Real-time analytics, alerts |
+| 1–30 seconds | `processingTime="N seconds"` | $$ | Near real-time dashboards |
+| 15–60 minutes | `availableNow=True` (scheduled) | $ | Batch-style SLA |
+| > 1 hour | `availableNow=True` (scheduled) | $ | ETL pipelines |
+
+See [trigger-and-cost-optimization.md](trigger-and-cost-optimization.md) for detailed cost calculations and cluster sizing.
+
+---
+
+## Watermark Essentials
+
+Watermarks are **required** for stateful operations (joins, aggregations, deduplication) to bound state and handle late data.
+
+```python
+df.withWatermark("event_time", "10 minutes")
+```
+
+| Watermark | Effect | Use Case |
+|-----------|--------|----------|
+| `"5 minutes"` | Low latency, tight state | Real-time analytics |
+| `"10 minutes"` | Moderate latency | General streaming |
+| `"1 hour"` | High completeness | Financial transactions |
+| `"24 hours"` | Batch-like completeness | Backfill scenarios |
+
+**Rule of thumb**: Start with 2–3x your p95 event latency. Monitor late data rate and adjust.
+
+See [stateful-operations.md](stateful-operations.md) for RocksDB configuration, state monitoring, and advanced patterns.
+
+---
+
+## Stream Join Patterns
+
+### Stream-Stream Join
+
+Both sides must have watermarks. Use time-range conditions to bound state:
+
+```python
+orders = spark.readStream.table("catalog.schema.orders") \
+    .withWatermark("order_time", "10 minutes")
+
+payments = spark.readStream.table("catalog.schema.payments") \
+    .withWatermark("payment_time", "10 minutes")
+
+joined = orders.join(payments,
+    expr("""
+        order_id = payment_order_id
+        AND payment_time >= order_time
+        AND payment_time <= order_time + INTERVAL 1 HOUR
+    """),
+    "inner"
+)
 ```
 
-## Core Patterns
+See [stream-stream-joins.md](stream-stream-joins.md) for left outer joins, multiple join keys, and monitoring.
 
-| Pattern | Description | Reference |
-|---------|-------------|-----------|
-| **Kafka Streaming** | Kafka to Delta, Kafka to Kafka, Real-Time Mode | See [kafka-streaming.md](kafka-streaming.md) |
-| **Stream Joins** | Stream-stream joins, stream-static joins | See [stream-stream-joins.md](stream-stream-joins.md), [stream-static-joins.md](stream-static-joins.md) |
-| **Multi-Sink Writes** | Write to multiple tables, parallel merges | See [multi-sink-writes.md](multi-sink-writes.md) |
-| **Merge Operations** | MERGE performance, parallel merges, optimizations | See [merge-operations.md](merge-operations.md) |
+### Stream-Static Join
 
-## Configuration
+Use broadcast hints for small dimension tables:
 
-| Topic | Description | Reference |
-|-------|-------------|-----------|
-| **Checkpoints** | Checkpoint management and best practices | See [checkpoint-best-practices.md](checkpoint-best-practices.md) |
-| **Stateful Operations** | Watermarks, state stores, RocksDB configuration | See [stateful-operations.md](stateful-operations.md) |
-| **Trigger & Cost** | Trigger selection, cost optimization, RTM | See [trigger-and-cost-optimization.md](trigger-and-cost-optimization.md) |
+```python
+from pyspark.sql.functions import broadcast
 
-## Best Practices
+dim_products = spark.table("catalog.schema.products")
 
-| Topic | Description | Reference |
-|-------|-------------|-----------|
-| **Production Checklist** | Comprehensive best practices | See [streaming-best-practices.md](streaming-best-practices.md) |
+enriched = stream_df.join(
+    broadcast(dim_products),
+    "product_id",
+    "left"
+)
+```
+
+See [stream-static-joins.md](stream-static-joins.md) for refresh strategies and cache invalidation.
+
+---
+
+## Checkpoint Best Practices
+
+- **Always use UC Volumes** for checkpoint storage: `/Volumes/catalog/schema/volume/checkpoints/stream_name`
+- **One checkpoint per stream** — never share checkpoints between streams
+- **Never delete checkpoints** of a running stream — this resets offsets
+- **Fixed-size clusters** — autoscaling causes task redistribution issues with streaming
+
+See [checkpoint-best-practices.md](checkpoint-best-practices.md) for migration, recovery, and cleanup patterns.
+
+---
 
 ## Production Checklist
 
-- [ ] Checkpoint location is persistent (UC volumes, not DBFS)
+- [ ] Checkpoint location is persistent (UC Volumes, not DBFS)
 - [ ] Unique checkpoint per stream
 - [ ] Fixed-size cluster (no autoscaling for streaming)
-- [ ] Monitoring configured (input rate, lag, batch duration)
-- [ ] Exactly-once verified (txnVersion/txnAppId)
-- [ ] Watermark configured for stateful operations
-- [ ] Left joins for stream-static (not inner)
+- [ ] Trigger interval explicitly set (never use default continuous)
+- [ ] Monitoring configured (input rate, processing rate, batch duration)
+- [ ] Watermark configured for all stateful operations
+- [ ] Schema defined explicitly (not inferred) for Kafka sources
+- [ ] `minPartitions` set to match Kafka partition count
+- [ ] Error handling in foreachBatch (idempotent writes)
+- [ ] Exactly-once verified (txnVersion/txnAppId for foreachBatch MERGE)
+
+See [streaming-best-practices.md](streaming-best-practices.md) for the full beginner-to-expert checklist.
+
+---
+
+## Common Issues
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| **Increasing batch duration** | State store growing unbounded | Add or reduce watermark duration; enable RocksDB |
+| **High S3/ADLS listing costs** | No trigger interval set | Always set `processingTime` or `availableNow` |
+| **Duplicate records** | Missing deduplication in MERGE | Use `dropDuplicates` or add dedup logic in foreachBatch |
+| **Stream-static join stale data** | Static DataFrame cached at start | Restart stream periodically or use Delta change feed |
+| **Checkpoint corruption** | Cluster terminated mid-write | Delete last incomplete batch folder; restart stream |
+| **OOM on state operations** | In-memory state store too large | Switch to RocksDB state store provider |
+| **Late data dropped** | Watermark too aggressive | Increase watermark duration; monitor late event rate |
+
+## Related Skills
+
+- **[databricks-spark-declarative-pipelines](../databricks-spark-declarative-pipelines/SKILL.md)** — higher-level streaming with DLT/SDP (streaming tables, Auto Loader)
+- **[databricks-jobs](../databricks-jobs/SKILL.md)** — scheduling `availableNow` streaming jobs
+- **[databricks-unity-catalog](../databricks-unity-catalog/SKILL.md)** — checkpoint storage in UC Volumes, system tables for monitoring
+
+## Resources
+
+- [Structured Streaming Programming Guide](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html)
+- [Databricks Structured Streaming Docs](https://docs.databricks.com/en/structured-streaming/index.html)
+- [Real-Time Mode](https://docs.databricks.com/en/structured-streaming/real-time.html)
diff --git a/databricks-skills/databricks-terraform/1-provider-and-auth.md b/databricks-skills/databricks-terraform/1-provider-and-auth.md
new file mode 100644
index 00000000..5603352e
--- /dev/null
+++ b/databricks-skills/databricks-terraform/1-provider-and-auth.md
@@ -0,0 +1,207 @@
+# Provider Configuration & Authentication
+
+## Provider Block
+
+```hcl
+terraform {
+  required_providers {
+    databricks = {
+      source  = "databricks/databricks"
+      version = "~> 1.110"
+    }
+  }
+}
+```
+
+Always pin the provider version to avoid unexpected breaking changes.
+
+---
+
+## Authentication Patterns
+
+### Personal Access Token (Simplest)
+
+```hcl
+provider "databricks" {
+  host  = var.databricks_host
+  token = var.databricks_token
+}
+```
+
+```hcl
+variable "databricks_host" {
+  description = "Databricks workspace URL (e.g., https://adb-1234567890.1.azuredatabricks.net)"
+  type        = string
+}
+
+variable "databricks_token" {
+  description = "Databricks personal access token"
+  type        = string
+  sensitive   = true
+}
+```
+
+### Environment Variables
+
+```bash
+export DATABRICKS_HOST="https://my-workspace.cloud.databricks.com"
+export DATABRICKS_TOKEN="dapi..."
+```
+
+```hcl
+provider "databricks" {}
+```
+
+### Databricks CLI Profile
+
+```hcl
+provider "databricks" {
+  profile = "my-profile"
+}
+```
+
+Uses `~/.databrickscfg` profile configuration.
+
+---
+
+## AWS Authentication
+
+### Service Principal (OAuth M2M) — Recommended for CI/CD
+
+```hcl
+provider "databricks" {
+  host          = var.databricks_host
+  client_id     = var.client_id
+  client_secret = var.client_secret
+}
+```
+
+### AWS IAM Role (Account-Level)
+
+```hcl
+provider "databricks" {
+  host       = "https://accounts.cloud.databricks.com"
+  account_id = var.databricks_account_id
+}
+```
+
+---
+
+## Azure Authentication
+
+### Service Principal with Client Secret
+
+```hcl
+provider "databricks" {
+  host                        = var.databricks_host
+  azure_tenant_id             = var.azure_tenant_id
+  azure_client_id             = var.azure_client_id
+  azure_client_secret         = var.azure_client_secret
+}
+```
+
+### Azure CLI
+
+```hcl
+provider "databricks" {
+  host = var.databricks_host
+}
+```
+
+Requires `az login` before running Terraform.
+
+### Managed Identity (for Azure VMs / Azure DevOps)
+
+```hcl
+provider "databricks" {
+  host                    = var.databricks_host
+  azure_use_msi           = true
+  azure_client_id         = var.msi_client_id  # Optional for user-assigned MI
+}
+```
+
+---
+
+## GCP Authentication
+
+### Service Account
+
+```hcl
+provider "databricks" {
+  host                  = var.databricks_host
+  google_service_account = var.google_service_account
+}
+```
+
+### Google Default Credentials
+
+```hcl
+provider "databricks" {
+  host = var.databricks_host
+}
+```
+
+Requires `gcloud auth application-default login`.
+
+---
+
+## Multi-Provider Configuration
+
+Use aliases when managing resources across workspaces or account + workspace levels.
+
+### Account + Workspace
+
+```hcl
+provider "databricks" {
+  alias      = "account"
+  host       = "https://accounts.cloud.databricks.com"
+  account_id = var.databricks_account_id
+  client_id     = var.client_id
+  client_secret = var.client_secret
+}
+
+provider "databricks" {
+  alias         = "workspace"
+  host          = var.workspace_host
+  client_id     = var.client_id
+  client_secret = var.client_secret
+}
+
+resource "databricks_group" "admins" {
+  provider     = databricks.account
+  display_name = "workspace-admins"
+}
+
+resource "databricks_cluster" "shared" {
+  provider     = databricks.workspace
+  cluster_name = "shared"
+  # ...
+}
+```
+
+### Multiple Workspaces
+
+```hcl
+provider "databricks" {
+  alias = "dev"
+  host  = var.dev_workspace_host
+  token = var.dev_token
+}
+
+provider "databricks" {
+  alias = "prod"
+  host  = var.prod_workspace_host
+  token = var.prod_token
+}
+```
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **"Unauthorized"** | Verify `host` URL includes `https://` and has no trailing slash |
+| **Token expired** | Regenerate PAT; for CI/CD prefer service principal with OAuth |
+| **Azure auth fails** | Ensure service principal has "Contributor" role on the workspace resource |
+| **Account vs workspace confusion** | Account-level resources (groups, metastores) need account-level auth; workspace resources need workspace-level auth |
diff --git a/databricks-skills/databricks-terraform/2-core-resources.md b/databricks-skills/databricks-terraform/2-core-resources.md
new file mode 100644
index 00000000..c1c8b90f
--- /dev/null
+++ b/databricks-skills/databricks-terraform/2-core-resources.md
@@ -0,0 +1,350 @@
+# Core Resources
+
+Common Databricks resources managed via Terraform.
+
+## Clusters
+
+### All-Purpose Cluster
+
+```hcl
+data "databricks_spark_version" "latest_lts" {
+  long_term_support = true
+}
+
+data "databricks_node_type" "smallest" {
+  local_disk = true
+}
+
+resource "databricks_cluster" "shared" {
+  cluster_name            = "shared-analytics"
+  spark_version           = data.databricks_spark_version.latest_lts.id
+  node_type_id            = data.databricks_node_type.smallest.id
+  autotermination_minutes = 30
+
+  autoscale {
+    min_workers = 1
+    max_workers = 4
+  }
+
+  spark_conf = {
+    "spark.databricks.io.cache.enabled" = "true"
+  }
+
+  custom_tags = {
+    "Team"        = "data-engineering"
+    "Environment" = "production"
+  }
+}
+```
+
+### Single-Node Cluster (ML / Development)
+
+```hcl
+resource "databricks_cluster" "single_node" {
+  cluster_name            = "ml-single-node"
+  spark_version           = data.databricks_spark_version.latest_lts.id
+  node_type_id            = "i3.xlarge"
+  autotermination_minutes = 60
+  num_workers             = 0
+
+  spark_conf = {
+    "spark.databricks.cluster.profile" = "singleNode"
+    "spark.master"                     = "local[*]"
+  }
+}
+```
+
+---
+
+## Jobs
+
+### Notebook Job with Schedule
+
+```hcl
+resource "databricks_job" "daily_etl" {
+  name = "daily-etl"
+
+  task {
+    task_key = "ingest"
+
+    notebook_task {
+      notebook_path = "/Repos/team/etl/ingest"
+      base_parameters = {
+        "env" = "production"
+      }
+    }
+
+    new_cluster {
+      spark_version = data.databricks_spark_version.latest_lts.id
+      node_type_id  = data.databricks_node_type.smallest.id
+      num_workers   = 4
+    }
+  }
+
+  schedule {
+    quartz_cron_expression = "0 0 6 * * ?"
+    timezone_id            = "UTC"
+  }
+
+  email_notifications {
+    on_failure = ["team@company.com"]
+  }
+}
+```
+
+### Multi-Task Job with Dependencies
+
+```hcl
+resource "databricks_job" "pipeline" {
+  name = "data-pipeline"
+
+  task {
+    task_key = "bronze"
+    notebook_task {
+      notebook_path = "/Repos/team/pipeline/bronze"
+    }
+    existing_cluster_id = databricks_cluster.shared.id
+  }
+
+  task {
+    task_key = "silver"
+    depends_on {
+      task_key = "bronze"
+    }
+    notebook_task {
+      notebook_path = "/Repos/team/pipeline/silver"
+    }
+    existing_cluster_id = databricks_cluster.shared.id
+  }
+
+  task {
+    task_key = "gold"
+    depends_on {
+      task_key = "silver"
+    }
+    notebook_task {
+      notebook_path = "/Repos/team/pipeline/gold"
+    }
+    existing_cluster_id = databricks_cluster.shared.id
+  }
+}
+```
+
+### Python Script Job (Serverless)
+
+```hcl
+resource "databricks_job" "serverless_etl" {
+  name = "serverless-etl"
+
+  task {
+    task_key = "run"
+
+    python_wheel_task {
+      package_name = "my_etl"
+      entry_point  = "main"
+    }
+
+    environment_key = "default"
+  }
+
+  environment {
+    environment_key = "default"
+
+    spec {
+      client = "1"
+      dependencies = [
+        "my_etl==1.0.0"
+      ]
+    }
+  }
+}
+```
+
+---
+
+## SQL Warehouses
+
+```hcl
+resource "databricks_sql_endpoint" "analytics" {
+  name             = "analytics-warehouse"
+  cluster_size     = "Small"
+  max_num_clusters = 2
+  auto_stop_mins   = 15
+
+  warehouse_type = "PRO"
+
+  tags {
+    custom_tags {
+      key   = "Team"
+      value = "analytics"
+    }
+  }
+}
+```
+
+### Serverless SQL Warehouse
+
+```hcl
+resource "databricks_sql_endpoint" "serverless" {
+  name                = "serverless-warehouse"
+  cluster_size        = "Small"
+  max_num_clusters    = 1
+  auto_stop_mins      = 10
+  enable_serverless_compute = true
+}
+```
+
+---
+
+## DLT / SDP Pipelines
+
+```hcl
+resource "databricks_pipeline" "etl" {
+  name    = "etl-pipeline"
+  target  = "analytics.silver"
+  catalog = "analytics"
+
+  library {
+    notebook {
+      path = "/Repos/team/pipelines/etl"
+    }
+  }
+
+  continuous = false
+  development = false
+
+  cluster {
+    label       = "default"
+    num_workers = 4
+  }
+}
+```
+
+---
+
+## Model Serving
+
+```hcl
+resource "databricks_model_serving" "llm_endpoint" {
+  name = "llm-endpoint"
+
+  config {
+    served_entities {
+      entity_name    = "catalog.schema.my_model"
+      entity_version = "1"
+      workload_size  = "Small"
+      scale_to_zero_enabled = true
+    }
+
+    auto_capture_config {
+      catalog_name     = "analytics"
+      schema_name      = "inference_logs"
+      table_name_prefix = "llm_endpoint"
+      enabled          = true
+    }
+  }
+}
+```
+
+---
+
+## Secrets
+
+```hcl
+resource "databricks_secret_scope" "app" {
+  name = "app-secrets"
+}
+
+resource "databricks_secret" "api_key" {
+  scope        = databricks_secret_scope.app.name
+  key          = "api-key"
+  string_value = var.api_key
+}
+```
+
+---
+
+## Instance Pools
+
+```hcl
+resource "databricks_instance_pool" "shared" {
+  instance_pool_name = "shared-pool"
+  node_type_id       = data.databricks_node_type.smallest.id
+
+  min_idle_instances                  = 0
+  max_capacity                        = 20
+  idle_instance_autotermination_minutes = 10
+
+  preloaded_spark_versions = [
+    data.databricks_spark_version.latest_lts.id
+  ]
+}
+
+resource "databricks_cluster" "pooled" {
+  cluster_name            = "pooled-cluster"
+  spark_version           = data.databricks_spark_version.latest_lts.id
+  instance_pool_id        = databricks_instance_pool.shared.id
+  autotermination_minutes = 30
+  num_workers             = 2
+}
+```
+
+---
+
+## Cluster Policies
+
+```hcl
+resource "databricks_cluster_policy" "team_policy" {
+  name = "data-engineering-policy"
+
+  definition = jsonencode({
+    "autotermination_minutes" : {
+      "type" : "range",
+      "minValue" : 10,
+      "maxValue" : 120,
+      "defaultValue" : 30
+    },
+    "num_workers" : {
+      "type" : "range",
+      "minValue" : 1,
+      "maxValue" : 10
+    },
+    "node_type_id" : {
+      "type" : "allowlist",
+      "values" : ["i3.xlarge", "i3.2xlarge"]
+    },
+    "custom_tags.Team" : {
+      "type" : "fixed",
+      "value" : "data-engineering"
+    }
+  })
+}
+```
+
+---
+
+## Notebooks
+
+```hcl
+resource "databricks_notebook" "etl" {
+  path     = "/Repos/team/etl/ingest"
+  language = "PYTHON"
+  content_base64 = base64encode(<<-EOT
+    # Databricks notebook source
+    df = spark.read.format("json").load("/Volumes/catalog/schema/volume/data/")
+    df.write.mode("overwrite").saveAsTable("catalog.schema.bronze_events")
+  EOT
+  )
+}
+```
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **Cluster creation fails with "node type not found"** | Use `databricks_node_type` data source instead of hardcoding |
+| **Job fails with "notebook not found"** | Ensure notebook path exists; use `depends_on` if creating notebook in same config |
+| **SQL warehouse stuck creating** | Check workspace quotas; serverless warehouses may need admin enablement |
+| **Pipeline fails to start** | Verify catalog/schema exist and user has permissions |
diff --git a/databricks-skills/databricks-terraform/3-unity-catalog.md b/databricks-skills/databricks-terraform/3-unity-catalog.md
new file mode 100644
index 00000000..746ce7ef
--- /dev/null
+++ b/databricks-skills/databricks-terraform/3-unity-catalog.md
@@ -0,0 +1,336 @@
+# Unity Catalog Resources
+
+Manage the full Unity Catalog hierarchy and governance via Terraform.
+
+## Namespace Hierarchy
+
+```
+metastore (account-level)
+└── catalog
+    └── schema
+        ├── table / view / materialized view
+        ├── volume (managed or external)
+        └── function
+```
+
+---
+
+## Metastore (Account-Level)
+
+```hcl
+resource "databricks_metastore" "primary" {
+  provider      = databricks.account
+  name          = "primary-metastore"
+  region        = "us-west-2"
+  storage_root  = "s3://my-metastore-bucket/metastore"
+  force_destroy = true
+}
+
+resource "databricks_metastore_assignment" "default" {
+  provider     = databricks.account
+  metastore_id = databricks_metastore.primary.id
+  workspace_id = var.workspace_id
+}
+```
+
+---
+
+## Catalogs
+
+```hcl
+resource "databricks_catalog" "analytics" {
+  name           = "analytics"
+  comment        = "Production analytics catalog"
+  isolation_mode = "OPEN"
+}
+
+resource "databricks_catalog" "sandbox" {
+  name    = "sandbox"
+  comment = "Development sandbox"
+  properties = {
+    "environment" = "dev"
+  }
+}
+```
+
+### Foreign Catalog (Lakehouse Federation)
+
+```hcl
+resource "databricks_catalog" "postgres_erp" {
+  name            = "erp"
+  comment         = "Federated PostgreSQL ERP database"
+  connection_name = databricks_connection.postgres.name
+
+  options = {
+    "database" = "erp_production"
+  }
+}
+```
+
+---
+
+## Schemas
+
+```hcl
+resource "databricks_schema" "bronze" {
+  catalog_name = databricks_catalog.analytics.name
+  name         = "bronze"
+  comment      = "Raw ingestion layer"
+}
+
+resource "databricks_schema" "silver" {
+  catalog_name = databricks_catalog.analytics.name
+  name         = "silver"
+  comment      = "Cleaned and conformed data"
+}
+
+resource "databricks_schema" "gold" {
+  catalog_name = databricks_catalog.analytics.name
+  name         = "gold"
+  comment      = "Business-level aggregations"
+}
+```
+
+### Medallion Pattern Module
+
+```hcl
+variable "catalog_name" {
+  type = string
+}
+
+variable "layers" {
+  type    = list(string)
+  default = ["bronze", "silver", "gold"]
+}
+
+resource "databricks_schema" "layer" {
+  for_each     = toset(var.layers)
+  catalog_name = var.catalog_name
+  name         = each.value
+  comment      = "${title(each.value)} data layer"
+}
+```
+
+---
+
+## Volumes
+
+```hcl
+resource "databricks_volume" "raw_files" {
+  catalog_name = databricks_catalog.analytics.name
+  schema_name  = databricks_schema.bronze.name
+  name         = "raw_files"
+  volume_type  = "MANAGED"
+  comment      = "Raw ingestion files"
+}
+
+resource "databricks_volume" "landing_zone" {
+  catalog_name     = databricks_catalog.analytics.name
+  schema_name      = databricks_schema.bronze.name
+  name             = "landing_zone"
+  volume_type      = "EXTERNAL"
+  storage_location = "s3://my-bucket/landing/"
+  comment          = "External landing zone"
+}
+```
+
+---
+
+## Storage Credentials & External Locations
+
+```hcl
+resource "databricks_storage_credential" "s3_access" {
+  name = "s3-analytics-credential"
+
+  aws_iam_role {
+    role_arn = var.iam_role_arn
+  }
+
+  comment = "Access to analytics S3 bucket"
+}
+
+resource "databricks_external_location" "landing" {
+  name            = "analytics-landing"
+  url             = "s3://my-bucket/landing/"
+  credential_name = databricks_storage_credential.s3_access.name
+  comment         = "Landing zone for raw data"
+}
+```
+
+---
+
+## Grants
+
+Grants use a single `databricks_grants` resource per securable object.
+
+```hcl
+resource "databricks_grants" "catalog" {
+  catalog = databricks_catalog.analytics.name
+
+  grant {
+    principal  = "data-engineering"
+    privileges = ["USE_CATALOG", "CREATE_SCHEMA"]
+  }
+
+  grant {
+    principal  = "data-analysts"
+    privileges = ["USE_CATALOG"]
+  }
+}
+
+resource "databricks_grants" "schema" {
+  schema = "${databricks_catalog.analytics.name}.${databricks_schema.gold.name}"
+
+  grant {
+    principal  = "data-analysts"
+    privileges = ["USE_SCHEMA", "SELECT"]
+  }
+
+  grant {
+    principal  = "data-engineering"
+    privileges = ["USE_SCHEMA", "SELECT", "MODIFY", "CREATE_TABLE", "CREATE_VOLUME"]
+  }
+}
+
+resource "databricks_grants" "volume" {
+  volume = "${databricks_catalog.analytics.name}.${databricks_schema.bronze.name}.${databricks_volume.raw_files.name}"
+
+  grant {
+    principal  = "data-engineering"
+    privileges = ["READ_VOLUME", "WRITE_VOLUME"]
+  }
+}
+```
+
+### Grant on External Location
+
+```hcl
+resource "databricks_grants" "ext_location" {
+  external_location = databricks_external_location.landing.id
+
+  grant {
+    principal  = "data-engineering"
+    privileges = ["CREATE_EXTERNAL_TABLE", "CREATE_EXTERNAL_VOLUME", "READ_FILES", "WRITE_FILES"]
+  }
+}
+```
+
+---
+
+## Connections (Lakehouse Federation)
+
+```hcl
+resource "databricks_connection" "postgres" {
+  name            = "postgres-erp"
+  connection_type = "POSTGRESQL"
+  comment         = "ERP PostgreSQL database"
+
+  options = {
+    "host"     = var.postgres_host
+    "port"     = "5432"
+    "user"     = var.postgres_user
+    "password" = var.postgres_password
+  }
+}
+```
+
+---
+
+## Delta Sharing
+
+```hcl
+resource "databricks_share" "partner_data" {
+  name = "partner-data-share"
+
+  object {
+    name                        = "analytics.gold.quarterly_metrics"
+    data_object_type            = "TABLE"
+    shared_as                   = "quarterly_metrics"
+  }
+}
+
+resource "databricks_recipient" "partner" {
+  name                = "acme-corp"
+  authentication_type = "TOKEN"
+  comment             = "Acme Corp data team"
+}
+
+resource "databricks_grants" "share_grant" {
+  share = databricks_share.partner_data.name
+
+  grant {
+    principal  = databricks_recipient.partner.name
+    privileges = ["SELECT"]
+  }
+}
+```
+
+---
+
+## Complete Unity Catalog Setup
+
+```hcl
+locals {
+  catalog_name = "analytics"
+  layers       = ["bronze", "silver", "gold"]
+  teams = {
+    "data-engineering" = {
+      catalog_privs = ["USE_CATALOG", "CREATE_SCHEMA"]
+      schema_privs  = ["USE_SCHEMA", "SELECT", "MODIFY", "CREATE_TABLE"]
+    }
+    "data-analysts" = {
+      catalog_privs = ["USE_CATALOG"]
+      schema_privs  = ["USE_SCHEMA", "SELECT"]
+    }
+  }
+}
+
+resource "databricks_catalog" "main" {
+  name    = local.catalog_name
+  comment = "Main analytics catalog"
+}
+
+resource "databricks_schema" "layers" {
+  for_each     = toset(local.layers)
+  catalog_name = databricks_catalog.main.name
+  name         = each.value
+  comment      = "${title(each.value)} data layer"
+}
+
+resource "databricks_grants" "catalog_grants" {
+  catalog = databricks_catalog.main.name
+
+  dynamic "grant" {
+    for_each = local.teams
+    content {
+      principal  = grant.key
+      privileges = grant.value.catalog_privs
+    }
+  }
+}
+
+resource "databricks_grants" "schema_grants" {
+  for_each = databricks_schema.layers
+  schema   = "${databricks_catalog.main.name}.${each.value.name}"
+
+  dynamic "grant" {
+    for_each = local.teams
+    content {
+      principal  = grant.key
+      privileges = grant.value.schema_privs
+    }
+  }
+}
+```
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **"Catalog not found" on schema creation** | Ensure catalog resource is referenced (implicit dependency) or use `depends_on` |
+| **Grants conflict** | Only one `databricks_grants` resource per securable object; combine all grants in one block |
+| **External volume fails** | Storage credential and external location must exist first |
+| **Metastore operations fail** | Metastore resources require account-level provider (`provider = databricks.account`) |
+| **Import existing UC objects** | Use `terraform import databricks_catalog.name "catalog_name"` |
diff --git a/databricks-skills/databricks-terraform/4-best-practices.md b/databricks-skills/databricks-terraform/4-best-practices.md
new file mode 100644
index 00000000..84999c0d
--- /dev/null
+++ b/databricks-skills/databricks-terraform/4-best-practices.md
@@ -0,0 +1,369 @@
+# Best Practices
+
+Project structure, modules, state management, and CI/CD patterns for Databricks Terraform.
+
+## Project Structure
+
+### Single Workspace
+
+```
+databricks-infra/
+├── main.tf              # Provider config, data sources
+├── variables.tf         # Input variables
+├── outputs.tf           # Output values
+├── terraform.tfvars     # Variable values (gitignored)
+├── clusters.tf          # Cluster resources
+├── jobs.tf              # Job resources
+├── unity-catalog.tf     # UC hierarchy and grants
+├── warehouses.tf        # SQL warehouses
+└── backend.tf           # Remote state config
+```
+
+### Multi-Environment
+
+```
+databricks-infra/
+├── modules/
+│   ├── catalog/         # Reusable UC catalog module
+│   │   ├── main.tf
+│   │   ├── variables.tf
+│   │   └── outputs.tf
+│   ├── cluster/         # Reusable cluster module
+│   └── job/             # Reusable job module
+├── environments/
+│   ├── dev/
+│   │   ├── main.tf      # Module calls with dev values
+│   │   ├── variables.tf
+│   │   ├── terraform.tfvars
+│   │   └── backend.tf
+│   ├── staging/
+│   └── prod/
+└── README.md
+```
+
+---
+
+## Reusable Modules
+
+### Catalog Module
+
+```hcl
+# modules/catalog/variables.tf
+variable "catalog_name" { type = string }
+variable "layers" {
+  type    = list(string)
+  default = ["bronze", "silver", "gold"]
+}
+variable "teams" {
+  type = map(object({
+    catalog_privileges = list(string)
+    schema_privileges  = list(string)
+  }))
+}
+
+# modules/catalog/main.tf
+resource "databricks_catalog" "this" {
+  name    = var.catalog_name
+  comment = "Managed by Terraform"
+}
+
+resource "databricks_schema" "layers" {
+  for_each     = toset(var.layers)
+  catalog_name = databricks_catalog.this.name
+  name         = each.value
+}
+
+resource "databricks_grants" "catalog" {
+  catalog = databricks_catalog.this.name
+
+  dynamic "grant" {
+    for_each = var.teams
+    content {
+      principal  = grant.key
+      privileges = grant.value.catalog_privileges
+    }
+  }
+}
+
+# modules/catalog/outputs.tf
+output "catalog_name" { value = databricks_catalog.this.name }
+output "schema_names" { value = { for k, v in databricks_schema.layers : k => v.name } }
+```
+
+### Usage
+
+```hcl
+module "analytics" {
+  source       = "../../modules/catalog"
+  catalog_name = "analytics"
+
+  teams = {
+    "data-engineering" = {
+      catalog_privileges = ["USE_CATALOG", "CREATE_SCHEMA"]
+      schema_privileges  = ["USE_SCHEMA", "SELECT", "MODIFY", "CREATE_TABLE"]
+    }
+    "data-analysts" = {
+      catalog_privileges = ["USE_CATALOG"]
+      schema_privileges  = ["USE_SCHEMA", "SELECT"]
+    }
+  }
+}
+```
+
+---
+
+## Remote State
+
+### AWS S3
+
+```hcl
+terraform {
+  backend "s3" {
+    bucket         = "my-terraform-state"
+    key            = "databricks/prod/terraform.tfstate"
+    region         = "us-west-2"
+    dynamodb_table = "terraform-locks"
+    encrypt        = true
+  }
+}
+```
+
+### Azure Blob Storage
+
+```hcl
+terraform {
+  backend "azurerm" {
+    resource_group_name  = "terraform-state-rg"
+    storage_account_name = "tfstateaccount"
+    container_name       = "tfstate"
+    key                  = "databricks/prod/terraform.tfstate"
+  }
+}
+```
+
+### GCS
+
+```hcl
+terraform {
+  backend "gcs" {
+    bucket = "my-terraform-state"
+    prefix = "databricks/prod"
+  }
+}
+```
+
+---
+
+## Variable Management
+
+### Separate Sensitive Variables
+
+```hcl
+# variables.tf
+variable "databricks_host" { type = string }
+variable "databricks_token" {
+  type      = string
+  sensitive = true
+}
+variable "environment" {
+  type    = string
+  default = "dev"
+}
+```
+
+```hcl
+# terraform.tfvars (gitignored)
+databricks_host  = "https://my-workspace.cloud.databricks.com"
+databricks_token = "dapi..."
+environment      = "production"
+```
+
+### Use Locals for Derived Values
+
+```hcl
+locals {
+  name_prefix = "${var.project}-${var.environment}"
+  common_tags = {
+    "Project"     = var.project
+    "Environment" = var.environment
+    "ManagedBy"   = "terraform"
+  }
+}
+
+resource "databricks_cluster" "shared" {
+  cluster_name = "${local.name_prefix}-shared"
+  custom_tags  = local.common_tags
+  # ...
+}
+```
+
+---
+
+## CI/CD Integration
+
+### GitHub Actions
+
+```yaml
+name: Terraform Databricks
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  terraform:
+    runs-on: ubuntu-latest
+    env:
+      DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
+      DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: hashicorp/setup-terraform@v3
+        with:
+          terraform_version: "1.9"
+
+      - name: Terraform Init
+        run: terraform init
+
+      - name: Terraform Plan
+        run: terraform plan -out=tfplan
+        if: github.event_name == 'pull_request'
+
+      - name: Terraform Apply
+        run: terraform apply -auto-approve tfplan
+        if: github.ref == 'refs/heads/main'
+```
+
+### Azure DevOps
+
+```yaml
+trigger:
+  branches:
+    include: [main]
+
+pool:
+  vmImage: "ubuntu-latest"
+
+steps:
+  - task: TerraformInstaller@0
+    inputs:
+      terraformVersion: "1.9"
+
+  - script: |
+      terraform init
+      terraform plan -out=tfplan
+    env:
+      DATABRICKS_HOST: $(DATABRICKS_HOST)
+      DATABRICKS_TOKEN: $(DATABRICKS_TOKEN)
+
+  - script: terraform apply -auto-approve tfplan
+    condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
+    env:
+      DATABRICKS_HOST: $(DATABRICKS_HOST)
+      DATABRICKS_TOKEN: $(DATABRICKS_TOKEN)
+```
+
+---
+
+## Lifecycle Management
+
+### Prevent Accidental Destruction
+
+```hcl
+resource "databricks_catalog" "production" {
+  name = "production"
+
+  lifecycle {
+    prevent_destroy = true
+  }
+}
+```
+
+### Ignore External Changes
+
+```hcl
+resource "databricks_cluster" "shared" {
+  cluster_name = "shared"
+  # ...
+
+  lifecycle {
+    ignore_changes = [
+      spark_conf,
+      custom_tags,
+    ]
+  }
+}
+```
+
+### Import Existing Resources
+
+```bash
+# Import existing catalog
+terraform import databricks_catalog.analytics "analytics"
+
+# Import existing cluster
+terraform import databricks_cluster.shared "<cluster-id>"
+
+# Import existing job
+terraform import databricks_job.etl "<job-id>"
+
+# Import grants
+terraform import 'databricks_grants.catalog' "catalog/analytics"
+```
+
+---
+
+## Common Patterns
+
+### Environment-Specific Sizing
+
+```hcl
+variable "environment" { type = string }
+
+locals {
+  cluster_config = {
+    dev  = { min_workers = 1, max_workers = 2, node_type = "i3.xlarge" }
+    prod = { min_workers = 2, max_workers = 10, node_type = "i3.2xlarge" }
+  }
+  config = local.cluster_config[var.environment]
+}
+
+resource "databricks_cluster" "main" {
+  node_type_id = local.config.node_type
+  autoscale {
+    min_workers = local.config.min_workers
+    max_workers = local.config.max_workers
+  }
+}
+```
+
+### Conditional Resources
+
+```hcl
+variable "enable_monitoring" {
+  type    = bool
+  default = false
+}
+
+resource "databricks_sql_endpoint" "monitoring" {
+  count        = var.enable_monitoring ? 1 : 0
+  name         = "monitoring-warehouse"
+  cluster_size = "Small"
+}
+```
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **State lock stuck** | Force unlock: `terraform force-unlock <lock-id>` |
+| **Drift after manual changes** | Run `terraform plan` to detect; `terraform import` to reconcile |
+| **Circular dependencies** | Use `depends_on` explicitly or restructure resource references |
+| **Slow plan with many resources** | Use `-target` for focused operations; split into smaller state files |
+| **Secrets in state file** | Always use encrypted remote state; never commit `.tfstate` to git |
+| **Provider version conflicts** | Pin version with `version = "~> 1.110"` and run `terraform init -upgrade` |
diff --git a/databricks-skills/databricks-terraform/SKILL.md b/databricks-skills/databricks-terraform/SKILL.md
new file mode 100644
index 00000000..9cf60b77
--- /dev/null
+++ b/databricks-skills/databricks-terraform/SKILL.md
@@ -0,0 +1,216 @@
+---
+name: databricks-terraform
+description: "Generate, validate, and manage Databricks infrastructure using the Databricks Terraform Provider. Use when provisioning workspaces, Unity Catalog objects, clusters, jobs, pipelines, model serving, SQL warehouses, or any Databricks resource via Terraform."
+---
+
+# Databricks Terraform
+
+Infrastructure-as-code for Databricks using the [Databricks Terraform Provider](https://registry.terraform.io/providers/databricks/databricks/latest/docs).
+
+## When to Use This Skill
+
+Use this skill when:
+- Generating `.tf` files for Databricks resources (clusters, jobs, UC objects, pipelines, etc.)
+- Setting up **provider authentication** (PAT, service principal, Azure/AWS/GCP)
+- Scaffolding a **Unity Catalog hierarchy** (metastore → catalog → schema → tables/volumes)
+- Creating reusable **Terraform modules** for Databricks
+- Debugging `terraform plan` or `terraform apply` errors
+- Configuring **remote state** backends (S3, Azure Blob, GCS)
+
+## Reference Files
+
+| Topic | File | Description |
+|-------|------|-------------|
+| Provider & Auth | [1-provider-and-auth.md](1-provider-and-auth.md) | Provider configuration, authentication patterns for AWS/Azure/GCP |
+| Core Resources | [2-core-resources.md](2-core-resources.md) | Clusters, jobs, SQL warehouses, notebooks, secrets |
+| Unity Catalog | [3-unity-catalog.md](3-unity-catalog.md) | Catalogs, schemas, volumes, grants, external locations |
+| Best Practices | [4-best-practices.md](4-best-practices.md) | Project structure, modules, state management, CI/CD |
+
+## Quick Start
+
+### Minimal Provider Setup (AWS)
+
+```hcl
+terraform {
+  required_providers {
+    databricks = {
+      source  = "databricks/databricks"
+      version = "~> 1.110"
+    }
+  }
+}
+
+provider "databricks" {
+  host  = var.databricks_host
+  token = var.databricks_token
+}
+```
+
+### Create a Cluster
+
+```hcl
+data "databricks_spark_version" "latest_lts" {
+  long_term_support = true
+}
+
+data "databricks_node_type" "smallest" {
+  local_disk = true
+}
+
+resource "databricks_cluster" "shared" {
+  cluster_name            = "shared-analytics"
+  spark_version           = data.databricks_spark_version.latest_lts.id
+  node_type_id            = data.databricks_node_type.smallest.id
+  autotermination_minutes = 30
+  num_workers             = 2
+
+  spark_conf = {
+    "spark.databricks.cluster.profile" = "serverless"
+  }
+}
+```
+
+### Create a Unity Catalog Hierarchy
+
+```hcl
+resource "databricks_catalog" "analytics" {
+  name    = "analytics"
+  comment = "Production analytics catalog"
+}
+
+resource "databricks_schema" "gold" {
+  catalog_name = databricks_catalog.analytics.name
+  name         = "gold"
+  comment      = "Gold-layer aggregated tables"
+}
+
+resource "databricks_volume" "raw_files" {
+  catalog_name     = databricks_catalog.analytics.name
+  schema_name      = databricks_schema.gold.name
+  name             = "raw_files"
+  volume_type      = "MANAGED"
+  comment          = "Raw ingestion files"
+}
+
+resource "databricks_grants" "catalog_grants" {
+  catalog = databricks_catalog.analytics.name
+
+  grant {
+    principal  = "data-analysts"
+    privileges = ["USE_CATALOG"]
+  }
+}
+
+resource "databricks_grants" "schema_grants" {
+  schema = "${databricks_catalog.analytics.name}.${databricks_schema.gold.name}"
+
+  grant {
+    principal  = "data-analysts"
+    privileges = ["USE_SCHEMA", "SELECT"]
+  }
+}
+```
+
+### Create a Job
+
+```hcl
+resource "databricks_job" "etl_pipeline" {
+  name = "daily-etl-pipeline"
+
+  task {
+    task_key = "ingest"
+
+    notebook_task {
+      notebook_path = "/Repos/team/etl/ingest"
+    }
+
+    new_cluster {
+      spark_version = data.databricks_spark_version.latest_lts.id
+      node_type_id  = data.databricks_node_type.smallest.id
+      num_workers   = 4
+    }
+  }
+
+  task {
+    task_key = "transform"
+    depends_on {
+      task_key = "ingest"
+    }
+
+    notebook_task {
+      notebook_path = "/Repos/team/etl/transform"
+    }
+
+    new_cluster {
+      spark_version = data.databricks_spark_version.latest_lts.id
+      node_type_id  = data.databricks_node_type.smallest.id
+      num_workers   = 8
+    }
+  }
+
+  schedule {
+    quartz_cron_expression = "0 0 6 * * ?"
+    timezone_id            = "UTC"
+  }
+}
+```
+
+## Common Resources
+
+| Resource | Purpose |
+|----------|---------|
+| `databricks_cluster` | All-purpose and job clusters |
+| `databricks_job` | Scheduled and triggered jobs |
+| `databricks_sql_endpoint` | SQL warehouses |
+| `databricks_notebook` | Workspace notebooks |
+| `databricks_catalog` | Unity Catalog catalogs |
+| `databricks_schema` | Unity Catalog schemas |
+| `databricks_volume` | Unity Catalog volumes |
+| `databricks_grants` | Permissions on UC objects |
+| `databricks_external_location` | External storage locations |
+| `databricks_storage_credential` | Cloud storage credentials |
+| `databricks_model_serving` | Model serving endpoints |
+| `databricks_pipeline` | DLT/SDP pipelines |
+| `databricks_secret_scope` | Secret scopes |
+| `databricks_secret` | Secrets within scopes |
+| `databricks_cluster_policy` | Cluster policies |
+| `databricks_instance_pool` | Instance pools |
+| `databricks_token` | Personal access tokens |
+| `databricks_group` | Account/workspace groups |
+| `databricks_service_principal` | Service principals |
+
+## Common Data Sources
+
+| Data Source | Purpose |
+|-------------|---------|
+| `databricks_spark_version` | Look up Spark/DBR versions |
+| `databricks_node_type` | Find instance types by criteria |
+| `databricks_current_user` | Current authenticated user |
+| `databricks_catalogs` | List existing catalogs |
+| `databricks_schemas` | List schemas in a catalog |
+| `databricks_tables` | List tables in a schema |
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **"Provider produced inconsistent result"** | Pin provider version with `version = "~> 1.110"` to avoid breaking changes |
+| **"Unauthorized" on plan/apply** | Check `host` and `token` variables; ensure token has workspace admin access |
+| **Cluster creation fails** | Use `databricks_node_type` data source instead of hardcoding instance types |
+| **Grants fail with "not found"** | Ensure parent resources (catalog, schema) are created first with `depends_on` or implicit references |
+| **State drift after manual changes** | Run `terraform import` to reconcile, or use `lifecycle { ignore_changes }` |
+| **Slow plan with many resources** | Use `-target` for focused applies; split into modules |
+
+## Related Skills
+
+- **[databricks-unity-catalog](../databricks-unity-catalog/SKILL.md)** — UC concepts that Terraform resources map to
+- **[databricks-jobs](../databricks-jobs/SKILL.md)** — job configurations that `databricks_job` implements
+- **[databricks-asset-bundles](../databricks-asset-bundles/SKILL.md)** — alternative IaC approach using Databricks-native bundles
+- **[databricks-config](../databricks-config/SKILL.md)** — authentication setup used by the Terraform provider
+
+## Resources
+
+- [Terraform Registry — Databricks Provider](https://registry.terraform.io/providers/databricks/databricks/latest/docs)
+- [Databricks Terraform Docs](https://docs.databricks.com/en/dev-tools/terraform/index.html)
+- [Automate Unity Catalog with Terraform](https://docs.databricks.com/en/dev-tools/terraform/automate-uc.html)
+- [GitHub — terraform-provider-databricks](https://github.com/databricks/terraform-provider-databricks)
diff --git a/databricks-skills/databricks-unity-catalog/1-objects-and-governance.md b/databricks-skills/databricks-unity-catalog/1-objects-and-governance.md
new file mode 100644
index 00000000..d3cf3864
--- /dev/null
+++ b/databricks-skills/databricks-unity-catalog/1-objects-and-governance.md
@@ -0,0 +1,311 @@
+# Unity Catalog Objects & Governance
+
+Manage the UC namespace hierarchy (catalogs, schemas, volumes, functions) and permissions.
+
+## Namespace Hierarchy
+
+```
+metastore
+└── catalog
+    └── schema
+        ├── table / view / materialized view
+        ├── volume (managed or external)
+        └── function
+```
+
+## MCP Tools
+
+| Tool | Purpose |
+|------|---------|
+| `manage_uc_objects` | CRUD for catalogs, schemas, volumes, functions |
+| `manage_uc_grants` | Grant, revoke, and inspect permissions |
+
+---
+
+## Catalog Operations
+
+### Create a Catalog
+
+```python
+manage_uc_objects(
+    object_type="catalog",
+    action="create",
+    name="analytics",
+    comment="Production analytics catalog"
+)
+```
+
+With managed storage location (isolates data from metastore default):
+
+```python
+manage_uc_objects(
+    object_type="catalog",
+    action="create",
+    name="analytics",
+    comment="Production analytics catalog",
+    storage_root="s3://my-bucket/analytics/"
+)
+```
+
+### List / Get / Update / Delete
+
+```python
+# List all catalogs
+manage_uc_objects(object_type="catalog", action="list")
+
+# Get details
+manage_uc_objects(object_type="catalog", action="get", full_name="analytics")
+
+# Update comment or owner
+manage_uc_objects(
+    object_type="catalog",
+    action="update",
+    full_name="analytics",
+    comment="Updated description",
+    owner="data-engineering@company.com"
+)
+
+# Delete (force=True to delete non-empty catalogs)
+manage_uc_objects(object_type="catalog", action="delete", full_name="analytics", force=True)
+```
+
+### Catalog Isolation Mode
+
+```python
+# OPEN: all workspace users can see the catalog
+# ISOLATED: only explicitly bound workspaces can access
+manage_uc_objects(
+    object_type="catalog",
+    action="update",
+    full_name="analytics",
+    isolation_mode="ISOLATED"
+)
+```
+
+---
+
+## Schema Operations
+
+```python
+# Create schema
+manage_uc_objects(
+    object_type="schema",
+    action="create",
+    name="gold",
+    catalog_name="analytics",
+    comment="Gold-layer aggregated tables"
+)
+
+# List schemas in a catalog
+manage_uc_objects(object_type="schema", action="list", catalog_name="analytics")
+
+# Delete schema
+manage_uc_objects(object_type="schema", action="delete", full_name="analytics.gold", force=True)
+```
+
+---
+
+## Volume Operations
+
+```python
+# Create managed volume (data stored in catalog's managed location)
+manage_uc_objects(
+    object_type="volume",
+    action="create",
+    name="raw_files",
+    catalog_name="analytics",
+    schema_name="bronze",
+    volume_type="MANAGED",
+    comment="Raw ingestion files"
+)
+
+# Create external volume (data stays in your cloud storage)
+manage_uc_objects(
+    object_type="volume",
+    action="create",
+    name="landing_zone",
+    catalog_name="analytics",
+    schema_name="bronze",
+    volume_type="EXTERNAL",
+    storage_location="s3://my-bucket/landing/",
+    comment="External landing zone"
+)
+
+# List volumes in a schema
+manage_uc_objects(
+    object_type="volume",
+    action="list",
+    catalog_name="analytics",
+    schema_name="bronze"
+)
+```
+
+For file operations on volumes (upload, download, list files), see [6-volumes.md](6-volumes.md).
+
+---
+
+## Function Operations
+
+```python
+# List functions in a schema
+manage_uc_objects(
+    object_type="function",
+    action="list",
+    catalog_name="analytics",
+    schema_name="gold"
+)
+
+# Get function details
+manage_uc_objects(
+    object_type="function",
+    action="get",
+    full_name="analytics.gold.calculate_revenue"
+)
+
+# Delete function
+manage_uc_objects(
+    object_type="function",
+    action="delete",
+    full_name="analytics.gold.calculate_revenue"
+)
+```
+
+To **create** functions, use `execute_sql` or `manage_uc_security_policies` (for security functions):
+
+```sql
+CREATE FUNCTION analytics.gold.calculate_revenue(quantity INT, price DECIMAL(10,2))
+RETURNS DECIMAL(10,2)
+RETURN quantity * price;
+```
+
+---
+
+## Permissions (Grants)
+
+### Grant Privileges
+
+```python
+# Grant catalog-level access
+manage_uc_grants(
+    action="grant",
+    securable_type="catalog",
+    full_name="analytics",
+    principal="data-analysts",
+    privileges=["USE_CATALOG"]
+)
+
+# Grant schema-level read access
+manage_uc_grants(
+    action="grant",
+    securable_type="schema",
+    full_name="analytics.gold",
+    principal="data-analysts",
+    privileges=["USE_SCHEMA", "SELECT"]
+)
+
+# Grant table-level write access
+manage_uc_grants(
+    action="grant",
+    securable_type="table",
+    full_name="analytics.gold.revenue",
+    principal="data-engineering",
+    privileges=["SELECT", "MODIFY"]
+)
+
+# Grant volume access
+manage_uc_grants(
+    action="grant",
+    securable_type="volume",
+    full_name="analytics.bronze.raw_files",
+    principal="data-engineering",
+    privileges=["READ_VOLUME", "WRITE_VOLUME"]
+)
+```
+
+### Common Privilege Combinations
+
+| Role | Catalog | Schema | Tables | Volumes |
+|------|---------|--------|--------|---------|
+| **Reader** | `USE_CATALOG` | `USE_SCHEMA` | `SELECT` | `READ_VOLUME` |
+| **Writer** | `USE_CATALOG` | `USE_SCHEMA` | `SELECT`, `MODIFY` | `READ_VOLUME`, `WRITE_VOLUME` |
+| **Creator** | `USE_CATALOG` | `USE_SCHEMA`, `CREATE_TABLE`, `CREATE_VOLUME` | — | — |
+| **Admin** | `ALL_PRIVILEGES` | — | — | — |
+
+### Inspect and Revoke
+
+```python
+# Get current grants on an object
+manage_uc_grants(
+    action="get",
+    securable_type="catalog",
+    full_name="analytics"
+)
+
+# Get effective grants (includes inherited permissions)
+manage_uc_grants(
+    action="get_effective",
+    securable_type="table",
+    full_name="analytics.gold.revenue"
+)
+
+# Revoke privileges
+manage_uc_grants(
+    action="revoke",
+    securable_type="schema",
+    full_name="analytics.gold",
+    principal="former-team",
+    privileges=["SELECT", "USE_SCHEMA"]
+)
+```
+
+---
+
+## Common Patterns
+
+### Bootstrap a New Project
+
+```python
+# 1. Create catalog
+manage_uc_objects(object_type="catalog", action="create", name="my_project",
+                  comment="My project data")
+
+# 2. Create medallion schemas
+for layer in ["bronze", "silver", "gold"]:
+    manage_uc_objects(object_type="schema", action="create",
+                      name=layer, catalog_name="my_project",
+                      comment=f"{layer.title()} layer")
+
+# 3. Create a volume for raw file ingestion
+manage_uc_objects(object_type="volume", action="create",
+                  name="raw_files", catalog_name="my_project",
+                  schema_name="bronze", volume_type="MANAGED")
+
+# 4. Grant access to the team
+manage_uc_grants(action="grant", securable_type="catalog",
+                 full_name="my_project", principal="my-team",
+                 privileges=["USE_CATALOG", "CREATE_SCHEMA"])
+```
+
+### Audit Who Has Access
+
+```python
+# Check grants on a sensitive table
+grants = manage_uc_grants(
+    action="get",
+    securable_type="table",
+    full_name="analytics.gold.customer_pii"
+)
+# Review grants["assignments"] for unexpected principals
+```
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **"User does not have USE_CATALOG"** | Grant `USE_CATALOG` on the catalog AND `USE_SCHEMA` on the schema — both are required for access |
+| **Cannot create objects** | Need `CREATE_TABLE`/`CREATE_VOLUME` on the schema, plus `USE_SCHEMA` and `USE_CATALOG` |
+| **"Catalog not found"** | Check `isolation_mode` — ISOLATED catalogs are only visible to bound workspaces |
+| **Cannot delete non-empty catalog** | Use `force=True` or delete child schemas first |
+| **External volume creation fails** | Ensure a storage credential and external location exist for the cloud path |
diff --git a/databricks-skills/databricks-unity-catalog/2-tags-and-classification.md b/databricks-skills/databricks-unity-catalog/2-tags-and-classification.md
new file mode 100644
index 00000000..1a69a177
--- /dev/null
+++ b/databricks-skills/databricks-unity-catalog/2-tags-and-classification.md
@@ -0,0 +1,205 @@
+# Tags & Classification
+
+Tag Unity Catalog objects and columns for governance, data discovery, and compliance tracking.
+
+## MCP Tool
+
+| Tool | Purpose |
+|------|---------|
+| `manage_uc_tags` | Set/unset tags, set comments, query tags from system tables |
+
+---
+
+## Setting Tags
+
+Tags are key-value pairs attached to catalogs, schemas, tables, or columns.
+
+### Tag a Table
+
+```python
+manage_uc_tags(
+    action="set_tags",
+    object_type="table",
+    full_name="analytics.gold.customers",
+    tags={"pii": "true", "classification": "confidential", "owner_team": "data-eng"}
+)
+```
+
+### Tag a Column
+
+```python
+manage_uc_tags(
+    action="set_tags",
+    object_type="column",
+    full_name="analytics.gold.customers",
+    column_name="email",
+    tags={"pii": "true", "data_type": "email_address"}
+)
+```
+
+### Tag a Catalog or Schema
+
+```python
+manage_uc_tags(
+    action="set_tags",
+    object_type="catalog",
+    full_name="analytics",
+    tags={"environment": "production", "cost_center": "CC-1234"}
+)
+```
+
+---
+
+## Removing Tags
+
+```python
+manage_uc_tags(
+    action="unset_tags",
+    object_type="table",
+    full_name="analytics.gold.customers",
+    tag_names=["classification", "owner_team"]
+)
+```
+
+---
+
+## Setting Comments
+
+```python
+manage_uc_tags(
+    action="set_comment",
+    object_type="table",
+    full_name="analytics.gold.customers",
+    comment_text="Master customer table. Updated daily via SDP pipeline."
+)
+
+# Comment on a column
+manage_uc_tags(
+    action="set_comment",
+    object_type="column",
+    full_name="analytics.gold.customers",
+    column_name="customer_id",
+    comment_text="Unique customer identifier. FK to orders.customer_id."
+)
+```
+
+---
+
+## Querying Tags
+
+### Find All Tagged Tables
+
+```python
+manage_uc_tags(
+    action="query_table_tags",
+    catalog_filter="analytics"
+)
+# Returns rows from system.information_schema.table_tags
+```
+
+### Find PII Tables
+
+```python
+manage_uc_tags(
+    action="query_table_tags",
+    tag_name_filter="pii",
+    tag_value_filter="true"
+)
+```
+
+### Find Tagged Columns
+
+```python
+manage_uc_tags(
+    action="query_column_tags",
+    catalog_filter="analytics",
+    tag_name_filter="pii"
+)
+```
+
+### Filter by Table
+
+```python
+manage_uc_tags(
+    action="query_column_tags",
+    catalog_filter="analytics",
+    table_name_filter="customers"
+)
+```
+
+---
+
+## Common Tagging Patterns
+
+### PII Classification
+
+```python
+pii_columns = {
+    "analytics.gold.customers": ["email", "phone", "ssn", "address"],
+    "analytics.gold.employees": ["email", "salary", "ssn"],
+}
+
+for table, columns in pii_columns.items():
+    manage_uc_tags(action="set_tags", object_type="table",
+                   full_name=table, tags={"contains_pii": "true"})
+    for col in columns:
+        manage_uc_tags(action="set_tags", object_type="column",
+                       full_name=table, column_name=col,
+                       tags={"pii": "true"})
+```
+
+### Data Domain Tagging
+
+```python
+manage_uc_tags(action="set_tags", object_type="schema",
+               full_name="analytics.gold",
+               tags={"domain": "analytics", "sla": "99.9%", "refresh": "daily"})
+```
+
+### Cost Attribution
+
+```python
+manage_uc_tags(action="set_tags", object_type="catalog",
+               full_name="ml_features",
+               tags={"cost_center": "ML-OPS", "budget_owner": "ml-team@company.com"})
+```
+
+---
+
+## Using Tags with System Tables
+
+Tags stored via the MCP tool are queryable through `system.information_schema`:
+
+```sql
+-- Find all PII tables across all catalogs
+SELECT catalog_name, schema_name, table_name, tag_value
+FROM system.information_schema.table_tags
+WHERE tag_name = 'pii' AND tag_value = 'true';
+
+-- Find all PII columns
+SELECT catalog_name, schema_name, table_name, column_name, tag_value
+FROM system.information_schema.column_tags
+WHERE tag_name = 'pii';
+
+-- Cross-reference: tables tagged PII but missing column masks
+SELECT t.catalog_name, t.schema_name, t.table_name
+FROM system.information_schema.table_tags t
+LEFT JOIN system.information_schema.column_tags c
+  ON t.catalog_name = c.catalog_name
+  AND t.schema_name = c.schema_name
+  AND t.table_name = c.table_name
+  AND c.tag_name = 'masked'
+WHERE t.tag_name = 'pii' AND t.tag_value = 'true'
+  AND c.tag_name IS NULL;
+```
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **Tag query returns empty** | Tags are per-catalog; ensure `catalog_filter` matches. System tables require `system.information_schema` access |
+| **"Cannot set tag"** | Need `APPLY_TAG` privilege on the object, plus `USE_CATALOG` and `USE_SCHEMA` |
+| **Tag not visible in UI** | Tags set via API/MCP appear in Catalog Explorer under the object's "Tags" tab. Allow a few seconds for propagation |
+| **Too many tags** | Maximum 50 tags per object. Use structured naming conventions (e.g., `domain:`, `compliance:`) |
diff --git a/databricks-skills/databricks-unity-catalog/3-security-policies.md b/databricks-skills/databricks-unity-catalog/3-security-policies.md
new file mode 100644
index 00000000..a74c00bf
--- /dev/null
+++ b/databricks-skills/databricks-unity-catalog/3-security-policies.md
@@ -0,0 +1,189 @@
+# Row Filters & Column Masks
+
+Apply fine-grained access control to tables using SQL functions as row filters and column masks.
+
+## MCP Tool
+
+| Tool | Purpose |
+|------|---------|
+| `manage_uc_security_policies` | Create security functions, apply/remove row filters and column masks |
+
+---
+
+## How It Works
+
+- **Row filter**: A SQL function that returns `BOOLEAN`. Rows where the function returns `FALSE` are hidden from the user.
+- **Column mask**: A SQL function that returns the same type as the column. The function can redact, hash, or transform values based on the user's identity.
+
+Both use `IS_ACCOUNT_GROUP_MEMBER()` or `CURRENT_USER()` to make access decisions at query time.
+
+---
+
+## Row Filters
+
+### Step 1: Create a Filter Function
+
+```python
+manage_uc_security_policies(
+    action="create_security_function",
+    function_name="analytics.gold.region_filter",
+    parameter_name="region_val",
+    parameter_type="STRING",
+    return_type="BOOLEAN",
+    function_body="RETURN IF(IS_ACCOUNT_GROUP_MEMBER('global-admins'), TRUE, region_val = 'US')",
+    function_comment="Non-admins can only see US region data"
+)
+```
+
+### Step 2: Apply to a Table
+
+```python
+manage_uc_security_policies(
+    action="set_row_filter",
+    table_name="analytics.gold.orders",
+    filter_function="analytics.gold.region_filter",
+    filter_columns=["region"]
+)
+```
+
+Now when a non-admin queries `analytics.gold.orders`, they only see rows where `region = 'US'`.
+
+### Remove a Row Filter
+
+```python
+manage_uc_security_policies(
+    action="drop_row_filter",
+    table_name="analytics.gold.orders"
+)
+```
+
+---
+
+## Column Masks
+
+### Mask an Email Column
+
+```python
+# Create mask function
+manage_uc_security_policies(
+    action="create_security_function",
+    function_name="analytics.gold.mask_email",
+    parameter_name="email_val",
+    parameter_type="STRING",
+    return_type="STRING",
+    function_body="RETURN IF(IS_ACCOUNT_GROUP_MEMBER('pii-readers'), email_val, CONCAT(LEFT(email_val, 2), '***@***.com'))",
+    function_comment="Show full email only to PII readers group"
+)
+
+# Apply to column
+manage_uc_security_policies(
+    action="set_column_mask",
+    table_name="analytics.gold.customers",
+    column_name="email",
+    mask_function="analytics.gold.mask_email"
+)
+```
+
+Result: non-PII users see `st***@***.com` instead of `steven.tan@company.com`.
+
+### Mask a Numeric Column (e.g., Salary)
+
+```python
+manage_uc_security_policies(
+    action="create_security_function",
+    function_name="hr.secure.mask_salary",
+    parameter_name="salary_val",
+    parameter_type="DECIMAL(10,2)",
+    return_type="DECIMAL(10,2)",
+    function_body="RETURN IF(IS_ACCOUNT_GROUP_MEMBER('hr-admins'), salary_val, NULL)",
+    function_comment="Only HR admins can see salary values"
+)
+
+manage_uc_security_policies(
+    action="set_column_mask",
+    table_name="hr.employees.staff",
+    column_name="salary",
+    mask_function="hr.secure.mask_salary"
+)
+```
+
+### Remove a Column Mask
+
+```python
+manage_uc_security_policies(
+    action="drop_column_mask",
+    table_name="analytics.gold.customers",
+    column_name="email"
+)
+```
+
+---
+
+## Common Patterns
+
+### Multi-Column Masking
+
+Apply masks to several PII columns on the same table:
+
+```python
+pii_masks = [
+    ("email", "STRING", "CONCAT(LEFT(val, 2), '***@***.com')"),
+    ("phone", "STRING", "CONCAT('***-***-', RIGHT(val, 4))"),
+    ("ssn", "STRING", "'***-**-****'"),
+]
+
+for col, dtype, mask_expr in pii_masks:
+    func_name = f"analytics.gold.mask_{col}"
+    manage_uc_security_policies(
+        action="create_security_function",
+        function_name=func_name,
+        parameter_name="val",
+        parameter_type=dtype,
+        return_type=dtype,
+        function_body=f"RETURN IF(IS_ACCOUNT_GROUP_MEMBER('pii-readers'), val, {mask_expr})"
+    )
+    manage_uc_security_policies(
+        action="set_column_mask",
+        table_name="analytics.gold.customers",
+        column_name=col,
+        mask_function=func_name
+    )
+```
+
+### Row Filter by User's Department
+
+```python
+manage_uc_security_policies(
+    action="create_security_function",
+    function_name="analytics.gold.dept_filter",
+    parameter_name="dept_val",
+    parameter_type="STRING",
+    return_type="BOOLEAN",
+    function_body="""RETURN
+      IS_ACCOUNT_GROUP_MEMBER('global-admins')
+      OR IS_ACCOUNT_GROUP_MEMBER(CONCAT('dept-', LOWER(dept_val)))""",
+    function_comment="Users only see rows for their department group"
+)
+```
+
+---
+
+## Important Notes
+
+- Row filters and column masks are enforced at **query time** — they apply to all SQL, BI tools, and notebooks
+- Functions must be in the **same catalog** as the table (or in a catalog the user has access to)
+- `IS_ACCOUNT_GROUP_MEMBER()` checks account-level groups, not workspace-local groups
+- Masks and filters do NOT apply to the table owner or users with `ALL_PRIVILEGES`
+- Performance impact is minimal for simple functions; avoid expensive joins in filter/mask functions
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **"Function not found"** | Function must exist before applying. Use `create_security_function` first |
+| **Filter not working for admins** | Users with `ALL_PRIVILEGES` or table ownership bypass filters/masks by design |
+| **Type mismatch** | Column mask `return_type` must exactly match the column's data type |
+| **Cannot apply to view** | Row filters and column masks only work on tables, not views. Use view-level logic instead |
+| **"Cannot create function"** | Need `CREATE_FUNCTION` privilege on the schema |
diff --git a/databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md b/databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md
new file mode 100644
index 00000000..cfabeaed
--- /dev/null
+++ b/databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md
@@ -0,0 +1,314 @@
+# Delta Sharing & Lakehouse Federation
+
+Share data securely across organizations and connect to external data sources.
+
+## MCP Tools
+
+| Tool | Purpose |
+|------|---------|
+| `manage_uc_sharing` | Create/manage shares, recipients, and providers for Delta Sharing |
+| `manage_uc_connections` | Create/manage Lakehouse Federation connections to external databases |
+| `manage_uc_storage` | Create/manage storage credentials and external locations |
+
+---
+
+## Delta Sharing
+
+Delta Sharing enables secure, read-only sharing of data across Databricks workspaces and to non-Databricks consumers.
+
+### Concepts
+
+- **Share**: A named collection of tables/schemas to share
+- **Recipient**: An entity (person, org, workspace) that receives shared data
+- **Provider**: An entity that shares data (your workspace)
+
+### Create a Share
+
+```python
+manage_uc_sharing(
+    resource_type="share",
+    action="create",
+    name="partner_data_share",
+    comment="Quarterly metrics shared with partners"
+)
+```
+
+### Add Tables to a Share
+
+```python
+manage_uc_sharing(
+    resource_type="share",
+    action="update",
+    name="partner_data_share",
+    updates=[
+        {
+            "action": "ADD",
+            "data_object": {
+                "name": "analytics.gold.quarterly_metrics",
+                "data_object_type": "TABLE",
+                "shared_as": "quarterly_metrics"
+            }
+        }
+    ]
+)
+```
+
+### Add an Entire Schema
+
+```python
+manage_uc_sharing(
+    resource_type="share",
+    action="update",
+    name="partner_data_share",
+    updates=[
+        {
+            "action": "ADD",
+            "data_object": {
+                "name": "analytics.gold",
+                "data_object_type": "SCHEMA"
+            }
+        }
+    ]
+)
+```
+
+### Share with Partitions (Subset of Data)
+
+```python
+manage_uc_sharing(
+    resource_type="share",
+    action="update",
+    name="partner_data_share",
+    updates=[
+        {
+            "action": "ADD",
+            "data_object": {
+                "name": "analytics.gold.orders",
+                "data_object_type": "TABLE",
+                "shared_as": "orders",
+                "partitions": [
+                    {"values": [{"name": "region", "op": "EQUAL", "value": "US"}]}
+                ]
+            }
+        }
+    ]
+)
+```
+
+### Create a Recipient
+
+```python
+# Databricks-to-Databricks sharing (uses sharing_id)
+manage_uc_sharing(
+    resource_type="recipient",
+    action="create",
+    name="partner_acme",
+    authentication_type="DATABRICKS",
+    sharing_code="<sharing_identifier_from_partner>",
+    comment="Acme Corp data team"
+)
+
+# Open sharing (generates activation link for non-Databricks consumers)
+manage_uc_sharing(
+    resource_type="recipient",
+    action="create",
+    name="external_partner",
+    authentication_type="TOKEN",
+    comment="External partner using open Delta Sharing"
+)
+```
+
+### Grant Share to Recipient
+
+```python
+manage_uc_sharing(
+    resource_type="recipient",
+    action="update_permissions",
+    name="partner_acme",
+    changes=[
+        {"add": [{"privilege": "SELECT", "share_name": "partner_data_share"}]}
+    ]
+)
+```
+
+### List and Inspect
+
+```python
+# List all shares
+manage_uc_sharing(resource_type="share", action="list")
+
+# Get share details (shows included tables)
+manage_uc_sharing(resource_type="share", action="get", name="partner_data_share")
+
+# List recipients
+manage_uc_sharing(resource_type="recipient", action="list")
+
+# List providers (shares you've received)
+manage_uc_sharing(resource_type="provider", action="list")
+```
+
+---
+
+## Storage Credentials & External Locations
+
+Required for external tables, external volumes, and Lakehouse Federation.
+
+### Storage Credentials
+
+```python
+# List existing credentials
+manage_uc_storage(resource_type="credential", action="list")
+
+# Get credential details
+manage_uc_storage(resource_type="credential", action="get", name="my-s3-credential")
+
+# Create an AWS IAM role credential
+manage_uc_storage(
+    resource_type="credential",
+    action="create",
+    name="my-s3-credential",
+    aws_iam_role_arn="arn:aws:iam::123456789:role/unity-catalog-access",
+    comment="Access to analytics S3 bucket"
+)
+```
+
+### External Locations
+
+```python
+# Create external location
+manage_uc_storage(
+    resource_type="external_location",
+    action="create",
+    name="analytics-landing",
+    url="s3://my-bucket/landing/",
+    credential_name="my-s3-credential",
+    comment="Landing zone for raw data files"
+)
+
+# List external locations
+manage_uc_storage(resource_type="external_location", action="list")
+
+# Validate an external location
+manage_uc_storage(
+    resource_type="external_location",
+    action="validate",
+    name="analytics-landing"
+)
+```
+
+---
+
+## Lakehouse Federation
+
+Connect to external databases (PostgreSQL, MySQL, SQL Server, Snowflake, BigQuery, Redshift, etc.) and query them through Unity Catalog.
+
+### Create a Connection
+
+```python
+# PostgreSQL connection
+manage_uc_connections(
+    action="create",
+    name="postgres_erp",
+    connection_type="POSTGRESQL",
+    host="erp-db.company.com",
+    port="5432",
+    user="readonly_user",
+    password="<password>",
+    comment="ERP PostgreSQL database"
+)
+
+# MySQL connection
+manage_uc_connections(
+    action="create",
+    name="mysql_legacy",
+    connection_type="MYSQL",
+    host="legacy-db.company.com",
+    port="3306",
+    user="reader",
+    password="<password>"
+)
+```
+
+### Create a Foreign Catalog
+
+After creating a connection, create a foreign catalog to browse and query the external database:
+
+```sql
+CREATE FOREIGN CATALOG erp_catalog USING CONNECTION postgres_erp;
+```
+
+Now you can query external tables as if they were native UC tables:
+
+```sql
+SELECT * FROM erp_catalog.public.orders LIMIT 10;
+```
+
+### List and Manage Connections
+
+```python
+# List all connections
+manage_uc_connections(action="list")
+
+# Get connection details
+manage_uc_connections(action="get", name="postgres_erp")
+
+# Update connection
+manage_uc_connections(
+    action="update",
+    name="postgres_erp",
+    host="new-erp-db.company.com",
+    comment="Updated to new host"
+)
+
+# Delete connection
+manage_uc_connections(action="delete", name="postgres_erp")
+```
+
+---
+
+## Common Patterns
+
+### Share Data with Row-Level Security
+
+Combine Delta Sharing with partitions to share only relevant data:
+
+```python
+# Share only APAC data with APAC partner
+manage_uc_sharing(
+    resource_type="share",
+    action="update",
+    name="apac_partner_share",
+    updates=[{
+        "action": "ADD",
+        "data_object": {
+            "name": "analytics.gold.revenue",
+            "data_object_type": "TABLE",
+            "partitions": [
+                {"values": [{"name": "region", "op": "EQUAL", "value": "APAC"}]}
+            ]
+        }
+    }]
+)
+```
+
+### Federated Query Across Sources
+
+```sql
+-- Join Databricks table with federated PostgreSQL table
+SELECT d.customer_id, d.total_spend, e.erp_status
+FROM analytics.gold.customers d
+JOIN erp_catalog.public.customer_status e
+  ON d.customer_id = e.customer_id;
+```
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **"Share not found"** | Shares are metastore-level objects. Ensure you're connected to the right workspace/metastore |
+| **Recipient can't access shared data** | Verify: (1) recipient has SELECT on the share, (2) activation link was used (for TOKEN auth), (3) share contains the table |
+| **Federation query slow** | Predicate pushdown works for simple filters. Complex joins are pulled into Spark — add filters early |
+| **"Connection failed"** | Check: (1) network connectivity (firewall/VPC), (2) credentials, (3) host/port. Use `manage_uc_connections` to validate |
+| **Cannot create foreign catalog** | Need `CREATE_CATALOG` and `CREATE_FOREIGN_CATALOG` privileges, plus the connection must exist |
diff --git a/databricks-skills/databricks-unity-catalog/SKILL.md b/databricks-skills/databricks-unity-catalog/SKILL.md
index 30f34e3d..635813cc 100644
--- a/databricks-skills/databricks-unity-catalog/SKILL.md
+++ b/databricks-skills/databricks-unity-catalog/SKILL.md
@@ -1,80 +1,131 @@
 ---
 name: databricks-unity-catalog
-description: "Unity Catalog system tables and volumes. Use when querying system tables (audit, lineage, billing) or working with volume file operations (upload, download, list files in /Volumes/)."
+description: "Unity Catalog governance: manage catalogs/schemas/volumes/functions, grants & permissions, tags & classification, row filters & column masks, Delta Sharing, Lakehouse Federation, system tables, volumes, and data profiling."
 ---
 
 # Unity Catalog
 
-Guidance for Unity Catalog system tables, volumes, and governance.
+Comprehensive guidance for Unity Catalog — Databricks' unified governance layer for data, AI, and analytics assets.
 
 ## When to Use This Skill
 
 Use this skill when:
+- **Creating or managing** catalogs, schemas, volumes, or functions
+- **Granting or revoking** permissions on any UC object
+- **Tagging** tables or columns for governance, PII classification, or data discovery
+- **Applying row filters or column masks** for fine-grained access control
+- **Sharing data** across organizations with Delta Sharing
+- **Connecting to external databases** via Lakehouse Federation
+- **Managing storage credentials** and external locations
 - Working with **volumes** (upload, download, list files in `/Volumes/`)
-- Querying **lineage** (table dependencies, column-level lineage)
-- Analyzing **audit logs** (who accessed what, permission changes)
-- Monitoring **billing and usage** (DBU consumption, cost analysis)
-- Tracking **compute resources** (cluster usage, warehouse metrics)
-- Reviewing **job execution** (run history, success rates, failures)
-- Analyzing **query performance** (slow queries, warehouse utilization)
-- Profiling **data quality** (data profiling, drift detection, metric tables)
+- Querying **system tables** (lineage, audit, billing, compute, jobs, query history)
+- Setting up **data profiling** (monitors, drift detection, ML model monitoring)
+
+## MCP Tools
+
+| Tool | Purpose | Reference |
+|------|---------|-----------|
+| `manage_uc_objects` | CRUD for catalogs, schemas, volumes, functions | [1-objects-and-governance.md](1-objects-and-governance.md) |
+| `manage_uc_grants` | Grant, revoke, inspect permissions | [1-objects-and-governance.md](1-objects-and-governance.md) |
+| `manage_uc_tags` | Set/unset tags, set comments, query tags | [2-tags-and-classification.md](2-tags-and-classification.md) |
+| `manage_uc_security_policies` | Row filters, column masks, security functions | [3-security-policies.md](3-security-policies.md) |
+| `manage_uc_sharing` | Delta Sharing: shares, recipients, providers | [4-sharing-and-federation.md](4-sharing-and-federation.md) |
+| `manage_uc_connections` | Lakehouse Federation connections | [4-sharing-and-federation.md](4-sharing-and-federation.md) |
+| `manage_uc_storage` | Storage credentials and external locations | [4-sharing-and-federation.md](4-sharing-and-federation.md) |
+| `manage_uc_monitors` | Data profiling monitors | [7-data-profiling.md](7-data-profiling.md) |
+| `list_volume_files` / `upload_to_volume` / `download_from_volume` | Volume file operations | [6-volumes.md](6-volumes.md) |
+| `execute_sql` | Query system tables and run DDL | [5-system-tables.md](5-system-tables.md) |
 
 ## Reference Files
 
 | Topic | File | Description |
 |-------|------|-------------|
+| Objects & Governance | [1-objects-and-governance.md](1-objects-and-governance.md) | Catalog/schema/volume/function CRUD, permissions, common patterns |
+| Tags & Classification | [2-tags-and-classification.md](2-tags-and-classification.md) | Tagging tables/columns, PII classification, querying tags |
+| Security Policies | [3-security-policies.md](3-security-policies.md) | Row filters, column masks, security functions |
+| Sharing & Federation | [4-sharing-and-federation.md](4-sharing-and-federation.md) | Delta Sharing, storage credentials, external locations, Lakehouse Federation |
 | System Tables | [5-system-tables.md](5-system-tables.md) | Lineage, audit, billing, compute, jobs, query history |
 | Volumes | [6-volumes.md](6-volumes.md) | Volume file operations, permissions, best practices |
-| Data Profiling | [7-data-profiling.md](7-data-profiling.md) | Data profiling, drift detection, profile metrics |
+| Data Profiling | [7-data-profiling.md](7-data-profiling.md) | Data profiling, drift detection, ML model monitoring |
 
 ## Quick Start
 
-### Volume File Operations (MCP Tools)
+### Create a Catalog and Schema
 
 ```python
-# List files in a volume
-list_volume_files(volume_path="/Volumes/catalog/schema/volume/folder/")
+manage_uc_objects(object_type="catalog", action="create",
+                  name="analytics", comment="Production analytics")
 
-# Upload file to volume
-upload_to_volume(
-    local_path="/tmp/data.csv",
-    volume_path="/Volumes/catalog/schema/volume/data.csv"
-)
+manage_uc_objects(object_type="schema", action="create",
+                  name="gold", catalog_name="analytics",
+                  comment="Gold-layer aggregated tables")
+```
+
+### Grant Access
+
+```python
+manage_uc_grants(action="grant", securable_type="catalog",
+                 full_name="analytics", principal="data-team",
+                 privileges=["USE_CATALOG"])
+
+manage_uc_grants(action="grant", securable_type="schema",
+                 full_name="analytics.gold", principal="data-team",
+                 privileges=["USE_SCHEMA", "SELECT"])
+```
+
+### Tag a Table for PII
+
+```python
+manage_uc_tags(action="set_tags", object_type="table",
+               full_name="analytics.gold.customers",
+               tags={"pii": "true", "classification": "confidential"})
+```
 
-# Download file from volume
-download_from_volume(
-    volume_path="/Volumes/catalog/schema/volume/data.csv",
-    local_path="/tmp/downloaded.csv"
+### Apply a Column Mask
+
+```python
+manage_uc_security_policies(
+    action="create_security_function",
+    function_name="analytics.gold.mask_email",
+    parameter_name="email_val", parameter_type="STRING",
+    return_type="STRING",
+    function_body="RETURN IF(IS_ACCOUNT_GROUP_MEMBER('pii-readers'), email_val, CONCAT(LEFT(email_val, 2), '***@***.com'))"
 )
 
-# Create directory
-create_volume_directory(volume_path="/Volumes/catalog/schema/volume/new_folder")
+manage_uc_security_policies(
+    action="set_column_mask",
+    table_name="analytics.gold.customers",
+    column_name="email",
+    mask_function="analytics.gold.mask_email"
+)
 ```
 
-### Enable System Tables Access
+### Volume File Operations
 
-```sql
--- Grant access to system tables
-GRANT USE CATALOG ON CATALOG system TO `data_engineers`;
-GRANT USE SCHEMA ON SCHEMA system.access TO `data_engineers`;
-GRANT SELECT ON SCHEMA system.access TO `data_engineers`;
+```python
+list_volume_files(volume_path="/Volumes/analytics/bronze/raw_files/")
+
+upload_to_volume(local_path="/tmp/data.csv",
+                 volume_path="/Volumes/analytics/bronze/raw_files/data.csv")
+
+download_from_volume(volume_path="/Volumes/analytics/bronze/raw_files/data.csv",
+                     local_path="/tmp/downloaded.csv")
 ```
 
-### Common Queries
+### Query System Tables
 
 ```sql
--- Table lineage: What tables feed into this table?
+-- Table lineage: what feeds this table?
 SELECT source_table_full_name, source_column_name
 FROM system.access.table_lineage
 WHERE target_table_full_name = 'catalog.schema.table'
   AND event_date >= current_date() - 7;
 
--- Audit: Recent permission changes
+-- Audit: recent permission changes
 SELECT event_time, user_identity.email, action_name, request_params
 FROM system.access.audit
 WHERE action_name LIKE '%GRANT%' OR action_name LIKE '%REVOKE%'
-ORDER BY event_time DESC
-LIMIT 100;
+ORDER BY event_time DESC LIMIT 100;
 
 -- Billing: DBU usage by workspace
 SELECT workspace_id, sku_name, SUM(usage_quantity) AS total_dbus
@@ -83,37 +134,36 @@ WHERE usage_date >= current_date() - 30
 GROUP BY workspace_id, sku_name;
 ```
 
-## MCP Tool Integration
-
-Use `mcp__databricks__execute_sql` for system table queries:
+## Common Permission Combinations
 
-```python
-# Query lineage
-mcp__databricks__execute_sql(
-    sql_query="""
-        SELECT source_table_full_name, target_table_full_name
-        FROM system.access.table_lineage
-        WHERE event_date >= current_date() - 7
-    """,
-    catalog="system"
-)
-```
+| Role | Catalog | Schema | Tables | Volumes |
+|------|---------|--------|--------|---------|
+| **Reader** | `USE_CATALOG` | `USE_SCHEMA` | `SELECT` | `READ_VOLUME` |
+| **Writer** | `USE_CATALOG` | `USE_SCHEMA` | `SELECT`, `MODIFY` | `READ_VOLUME`, `WRITE_VOLUME` |
+| **Creator** | `USE_CATALOG` | `USE_SCHEMA`, `CREATE_TABLE`, `CREATE_VOLUME` | — | — |
+| **Admin** | `ALL_PRIVILEGES` | — | — | — |
 
 ## Best Practices
 
-1. **Filter by date** - System tables can be large; always use date filters
-2. **Use appropriate retention** - Check your workspace's retention settings
-3. **Grant minimal access** - System tables contain sensitive metadata
-4. **Schedule reports** - Create scheduled queries for regular monitoring
+1. **Filter system tables by date** — they are partitioned by date; always use `event_date >= current_date() - N`
+2. **Grant least privilege** — start with `USE_CATALOG` + `USE_SCHEMA` + `SELECT`, add more as needed
+3. **Tag PII early** — tag tables and columns at creation time; use tags to drive column masks
+4. **Use managed volumes** unless you need external storage control
+5. **Test security functions** — verify row filters and column masks with a non-admin user before production
+6. **Enable system schemas** early in your UC setup for audit and lineage visibility
 
 ## Related Skills
 
-- **[databricks-spark-declarative-pipelines](../databricks-spark-declarative-pipelines/SKILL.md)** - for pipelines that write to Unity Catalog tables
-- **[databricks-jobs](../databricks-jobs/SKILL.md)** - for job execution data visible in system tables
-- **[databricks-synthetic-data-gen](../databricks-synthetic-data-gen/SKILL.md)** - for generating data stored in Unity Catalog Volumes
-- **[databricks-aibi-dashboards](../databricks-aibi-dashboards/SKILL.md)** - for building dashboards on top of Unity Catalog data
+- **[databricks-spark-declarative-pipelines](../databricks-spark-declarative-pipelines/SKILL.md)** — pipelines that write to Unity Catalog tables
+- **[databricks-jobs](../databricks-jobs/SKILL.md)** — job execution data visible in system tables
+- **[databricks-synthetic-data-gen](../databricks-synthetic-data-gen/SKILL.md)** — generating data stored in Unity Catalog Volumes
+- **[databricks-aibi-dashboards](../databricks-aibi-dashboards/SKILL.md)** — building dashboards on top of Unity Catalog data
+- **[databricks-vector-search](../databricks-vector-search/SKILL.md)** — vector indexes on Unity Catalog tables
 
 ## Resources
 
+- [Unity Catalog Documentation](https://docs.databricks.com/en/data-governance/unity-catalog/index.html)
 - [Unity Catalog System Tables](https://docs.databricks.com/administration-guide/system-tables/)
-- [Audit Log Reference](https://docs.databricks.com/administration-guide/account-settings/audit-logs.html)
+- [Delta Sharing Documentation](https://docs.databricks.com/en/delta-sharing/index.html)
+- [Lakehouse Federation](https://docs.databricks.com/en/query-federation/index.html)
+- [Row Filters and Column Masks](https://docs.databricks.com/en/data-governance/unity-catalog/row-and-column-filters.html)
diff --git a/databricks-skills/databricks-vector-search/SKILL.md b/databricks-skills/databricks-vector-search/SKILL.md
index 3148cd7a..22bc276d 100644
--- a/databricks-skills/databricks-vector-search/SKILL.md
+++ b/databricks-skills/databricks-vector-search/SKILL.md
@@ -249,7 +249,10 @@ scan_result = w.vector_search_indexes.scan_index(
 
 ## Reference Files
 
-- [index-types.md](index-types.md) - Detailed comparison of index types and creation patterns
+| Topic | File | Description |
+|-------|------|-------------|
+| Index Types | [index-types.md](index-types.md) | Detailed comparison of Delta Sync (managed/self-managed) vs Direct Access |
+| End-to-End RAG | [end-to-end-rag.md](end-to-end-rag.md) | Complete walkthrough: source table → endpoint → index → query → agent integration |
 
 ## CLI Quick Reference
 
@@ -311,37 +314,44 @@ embedding_source_columns=[
 
 ## MCP Tools
 
-The following MCP tools are available for managing Vector Search infrastructure. These are **management tools** for creating and configuring endpoints/indexes. For agent-runtime querying, use the Databricks managed Vector Search MCP server or `VectorSearchRetrieverTool`.
+The following MCP tools are available for managing Vector Search infrastructure. For a full end-to-end walkthrough, see [end-to-end-rag.md](end-to-end-rag.md).
 
 ### Endpoint Management
 
 | Tool | Description |
 |------|-------------|
-| `create_or_update_vs_endpoint` | Create endpoint if it doesn't exist, or return existing (STANDARD or STORAGE_OPTIMIZED) |
-| `get_vs_endpoint` | Get endpoint details by name, or list all endpoints (omit name) |
+| `create_vs_endpoint` | Create endpoint (STANDARD or STORAGE_OPTIMIZED). Async — check status with `get_vs_endpoint` |
+| `get_vs_endpoint` | Get endpoint details and status by name |
+| `list_vs_endpoints` | List all Vector Search endpoints in the workspace |
 | `delete_vs_endpoint` | Delete an endpoint (indexes must be deleted first) |
 
 ### Index Management
 
 | Tool | Description |
 |------|-------------|
-| `create_or_update_vs_index` | Create index if it doesn't exist; auto-triggers initial sync for DELTA_SYNC |
-| `get_vs_index` | Get index details by name, or list indexes on endpoint (pass endpoint_name only) |
+| `create_vs_index` | Create a Delta Sync or Direct Access index on an endpoint |
+| `get_vs_index` | Get index details, status, and configuration |
+| `list_vs_indexes` | List all indexes on an endpoint |
 | `delete_vs_index` | Delete an index |
+| `sync_vs_index` | Trigger sync for TRIGGERED pipeline indexes |
 
 ### Query and Data
 
 | Tool | Description |
 |------|-------------|
-| `query_vs_index` | Query index with text, vector, or hybrid search (for testing) |
-| `manage_vs_data` | Upsert, delete, scan, or sync index data (operation: "upsert", "delete", "scan", or "sync") |
+| `query_vs_index` | Query index with `query_text`, `query_vector`, or hybrid (`query_type="HYBRID"`) |
+| `upsert_vs_data` | Upsert vectors into a Direct Access index |
+| `delete_vs_data` | Delete vectors from a Direct Access index by primary key |
+| `scan_vs_index` | Retrieve all vectors from an index (for debugging/export) |
 
 ## Notes
 
-- **Storage-Optimized is newer** - Better for most use cases unless you need <100ms latency
-- **Delta Sync recommended** - Easier than Direct Access for most scenarios
-- **Hybrid search** - Available for both Delta Sync and Direct Access indexes
-- **Management vs runtime** - MCP tools above handle lifecycle management; for agent tool-calling at runtime, use the Databricks managed Vector Search MCP server
+- **Storage-Optimized is newer** — better for most use cases unless you need <100ms latency
+- **Delta Sync recommended** — easier than Direct Access for most scenarios
+- **Hybrid search** — available for both Delta Sync and Direct Access indexes
+- **`columns_to_sync` matters** — only synced columns are available in query results; include all columns you need
+- **Filter syntax differs by endpoint** — Standard uses `filters_json` (dict), Storage-Optimized uses `filter_string` (SQL)
+- **Management vs runtime** — MCP tools above handle lifecycle management; for agent tool-calling at runtime, use `VectorSearchRetrieverTool` or the Databricks managed Vector Search MCP server
 
 ## Related Skills
 
diff --git a/databricks-skills/databricks-vector-search/end-to-end-rag.md b/databricks-skills/databricks-vector-search/end-to-end-rag.md
new file mode 100644
index 00000000..28aa697b
--- /dev/null
+++ b/databricks-skills/databricks-vector-search/end-to-end-rag.md
@@ -0,0 +1,250 @@
+# End-to-End RAG with Vector Search
+
+Build a complete Retrieval-Augmented Generation pipeline: prepare documents, create a vector index, query it, and wire it into an agent.
+
+## MCP Tools Used
+
+| Tool | Step |
+|------|------|
+| `execute_sql` | Create source table, insert documents |
+| `create_vs_endpoint` | Create compute endpoint |
+| `create_vs_index` | Create Delta Sync index with managed embeddings |
+| `sync_vs_index` | Trigger index sync |
+| `get_vs_index` | Check index status |
+| `query_vs_index` | Test similarity search |
+
+---
+
+## Step 1: Prepare Source Table
+
+The source Delta table needs a primary key column and a text column to embed.
+
+```sql
+CREATE TABLE IF NOT EXISTS catalog.schema.knowledge_base (
+    doc_id STRING,
+    title STRING,
+    content STRING,
+    category STRING,
+    updated_at TIMESTAMP DEFAULT current_timestamp()
+);
+
+INSERT INTO catalog.schema.knowledge_base VALUES
+('doc-001', 'Getting Started', 'Databricks is a unified analytics platform...', 'overview', current_timestamp()),
+('doc-002', 'Unity Catalog', 'Unity Catalog provides centralized governance...', 'governance', current_timestamp()),
+('doc-003', 'Delta Lake', 'Delta Lake is an open-source storage layer...', 'storage', current_timestamp());
+```
+
+Or via MCP:
+
+```python
+execute_sql(sql_query="""
+    CREATE TABLE IF NOT EXISTS catalog.schema.knowledge_base (
+        doc_id STRING,
+        title STRING,
+        content STRING,
+        category STRING,
+        updated_at TIMESTAMP DEFAULT current_timestamp()
+    )
+""")
+```
+
+## Step 2: Create Vector Search Endpoint
+
+```python
+create_vs_endpoint(
+    name="my-rag-endpoint",
+    endpoint_type="STORAGE_OPTIMIZED"
+)
+```
+
+Endpoint creation is asynchronous. Check status:
+
+```python
+get_vs_endpoint(name="my-rag-endpoint")
+# Wait for state: "ONLINE"
+```
+
+## Step 3: Create Delta Sync Index
+
+```python
+create_vs_index(
+    name="catalog.schema.knowledge_base_index",
+    endpoint_name="my-rag-endpoint",
+    primary_key="doc_id",
+    index_type="DELTA_SYNC",
+    delta_sync_index_spec={
+        "source_table": "catalog.schema.knowledge_base",
+        "embedding_source_columns": [
+            {
+                "name": "content",
+                "embedding_model_endpoint_name": "databricks-gte-large-en"
+            }
+        ],
+        "pipeline_type": "TRIGGERED",
+        "columns_to_sync": ["doc_id", "title", "content", "category"]
+    }
+)
+```
+
+Key decisions:
+- **`embedding_source_columns`**: Databricks computes embeddings automatically from the `content` column
+- **`pipeline_type`**: `TRIGGERED` for manual sync (cheaper), `CONTINUOUS` for auto-sync on table changes
+- **`columns_to_sync`**: Only sync columns you need in query results (reduces storage and improves performance)
+
+## Step 4: Sync and Verify
+
+```python
+# Trigger initial sync
+sync_vs_index(index_name="catalog.schema.knowledge_base_index")
+
+# Check status
+get_vs_index(index_name="catalog.schema.knowledge_base_index")
+# Wait for state: "ONLINE"
+```
+
+## Step 5: Query the Index
+
+```python
+# Semantic search
+query_vs_index(
+    index_name="catalog.schema.knowledge_base_index",
+    columns=["doc_id", "title", "content", "category"],
+    query_text="How do I govern my data?",
+    num_results=3
+)
+```
+
+### With Filters
+
+```python
+# Storage-Optimized endpoint: SQL-like filter_string
+query_vs_index(
+    index_name="catalog.schema.knowledge_base_index",
+    columns=["doc_id", "title", "content"],
+    query_text="How do I govern my data?",
+    num_results=3,
+    filter_string="category = 'governance'"
+)
+
+# Standard endpoint: JSON filters_json
+query_vs_index(
+    index_name="catalog.schema.knowledge_base_index",
+    columns=["doc_id", "title", "content"],
+    query_text="How do I govern my data?",
+    num_results=3,
+    filters_json='{"category": "governance"}'
+)
+```
+
+### Hybrid Search (Vector + Keyword)
+
+```python
+query_vs_index(
+    index_name="catalog.schema.knowledge_base_index",
+    columns=["doc_id", "title", "content"],
+    query_text="Delta Lake ACID transactions",
+    num_results=5,
+    query_type="HYBRID"
+)
+```
+
+---
+
+## Step 6: Use in an Agent
+
+### As a UC Function Tool (ChatAgent)
+
+Register the vector search index as a tool for your agent:
+
+```python
+from databricks.sdk import WorkspaceClient
+from databricks.agents.udfs import VectorSearchRetrieverUDF
+
+w = WorkspaceClient()
+
+retriever = VectorSearchRetrieverUDF(
+    index_name="catalog.schema.knowledge_base_index",
+    columns=["doc_id", "title", "content"],
+    num_results=5,
+)
+
+# Register as UC function
+retriever.register(
+    catalog="catalog",
+    schema="schema",
+    function_name="search_knowledge_base"
+)
+```
+
+### In a ChatAgent
+
+```python
+from databricks.agents import ChatAgent
+
+class RAGAgent(ChatAgent):
+    def __init__(self):
+        self.w = WorkspaceClient()
+
+    def predict(self, messages, context=None):
+        query = messages[-1]["content"]
+
+        results = self.w.vector_search_indexes.query_index(
+            index_name="catalog.schema.knowledge_base_index",
+            columns=["title", "content"],
+            query_text=query,
+            num_results=3,
+        )
+
+        context_docs = "\n\n".join(
+            f"**{row[0]}**: {row[1]}"
+            for row in results.result.data_array
+        )
+
+        response = self.w.serving_endpoints.query(
+            name="databricks-claude-sonnet-4-20250514",
+            messages=[
+                {"role": "system", "content": f"Answer using this context:\n{context_docs}"},
+                {"role": "user", "content": query},
+            ],
+        )
+
+        return {"content": response.choices[0].message.content}
+```
+
+---
+
+## Updating the Index
+
+### Add New Documents
+
+```sql
+INSERT INTO catalog.schema.knowledge_base VALUES
+('doc-004', 'MLflow', 'MLflow is an open-source platform for ML lifecycle...', 'ml', current_timestamp());
+```
+
+Then sync:
+
+```python
+sync_vs_index(index_name="catalog.schema.knowledge_base_index")
+```
+
+### Delete Documents
+
+```sql
+DELETE FROM catalog.schema.knowledge_base WHERE doc_id = 'doc-001';
+```
+
+Then sync — the index automatically handles deletions via Delta change data feed.
+
+---
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| **Index stuck in PROVISIONING** | Endpoint may still be creating. Check `get_vs_endpoint` first |
+| **Query returns no results** | Index may not be synced yet. Run `sync_vs_index` and wait for ONLINE state |
+| **"Column not found in index"** | Column must be in `columns_to_sync`. Recreate index with the column included |
+| **Embeddings not computed** | Ensure `embedding_model_endpoint_name` is a valid serving endpoint |
+| **Stale results after table update** | For TRIGGERED pipelines, you must call `sync_vs_index` manually |
+| **Filter not working** | Standard endpoints use `filters_json` (dict), Storage-Optimized use `filter_string` (SQL) |

From 252b991a51dac61fd5d5977538db843a7901db35 Mon Sep 17 00:00:00 2001
From: Steven Tan <cheeyutcy@gmail.com>
Date: Thu, 5 Mar 2026 21:13:30 +0800
Subject: [PATCH 2/3] Fix accuracy issues found during review

- 4-sharing-and-federation.md: Rewrite Delta Sharing examples to use correct
  MCP tool actions (add_table, remove_table, grant_to_recipient) and flat
  parameters instead of incorrect nested updates/changes arrays. Fix
  manage_uc_connections to use options dict instead of top-level host/port/user.
  Add create_foreign_catalog action documentation.

- 3-security-policies.md: Correct misleading claim that ALL_PRIVILEGES bypasses
  row filters/column masks. Only metastore admins and account admins bypass them.

- Structured Streaming SKILL.md: Fix realTime trigger syntax from realTime=True
  (invalid) to realTime="5 minutes" (correct, DBR 16.4+). Fix production
  checklist wording about default trigger behavior.

- end-to-end-rag.md: Replace non-existent VectorSearchRetrieverUDF with
  VectorSearchRetrieverTool. Clarify filter example context (Standard vs
  Storage-Optimized endpoint). Fix ChatAgent message access to use attribute
  style.

- Terraform 2-core-resources.md: Replace deprecated pipeline target attribute
  with schema. SKILL.md: Remove invalid serverless cluster profile spark_conf.
---
 .../SKILL.md                                  |   4 +-
 .../databricks-terraform/2-core-resources.md  |   4 +-
 .../databricks-terraform/SKILL.md             |   2 +-
 .../3-security-policies.md                    |   2 +-
 .../4-sharing-and-federation.md               | 155 ++++++++----------
 .../end-to-end-rag.md                         |  39 ++---
 6 files changed, 90 insertions(+), 116 deletions(-)

diff --git a/databricks-skills/databricks-spark-structured-streaming/SKILL.md b/databricks-skills/databricks-spark-structured-streaming/SKILL.md
index 2d39d3b5..a495cd3f 100644
--- a/databricks-skills/databricks-spark-structured-streaming/SKILL.md
+++ b/databricks-skills/databricks-spark-structured-streaming/SKILL.md
@@ -119,7 +119,7 @@ Schedule via Databricks Jobs every 15 minutes for near-real-time at a fraction o
 
 | Latency Requirement | Trigger | Cost | Use Case |
 |---------------------|---------|------|----------|
-| < 800ms | `realTime=True` | $$$ | Real-time analytics, alerts |
+| Sub-second (ms) | `realTime="5 minutes"` | $$$ | Fraud detection, real-time personalization (DBR 16.4+) |
 | 1–30 seconds | `processingTime="N seconds"` | $$ | Near real-time dashboards |
 | 15–60 minutes | `availableNow=True` (scheduled) | $ | Batch-style SLA |
 | > 1 hour | `availableNow=True` (scheduled) | $ | ETL pipelines |
@@ -210,7 +210,7 @@ See [checkpoint-best-practices.md](checkpoint-best-practices.md) for migration,
 - [ ] Checkpoint location is persistent (UC Volumes, not DBFS)
 - [ ] Unique checkpoint per stream
 - [ ] Fixed-size cluster (no autoscaling for streaming)
-- [ ] Trigger interval explicitly set (never use default continuous)
+- [ ] Trigger interval explicitly set (default processes micro-batches as fast as possible, which is expensive)
 - [ ] Monitoring configured (input rate, processing rate, batch duration)
 - [ ] Watermark configured for all stateful operations
 - [ ] Schema defined explicitly (not inferred) for Kafka sources
diff --git a/databricks-skills/databricks-terraform/2-core-resources.md b/databricks-skills/databricks-terraform/2-core-resources.md
index c1c8b90f..8d75dc0b 100644
--- a/databricks-skills/databricks-terraform/2-core-resources.md
+++ b/databricks-skills/databricks-terraform/2-core-resources.md
@@ -201,8 +201,8 @@ resource "databricks_sql_endpoint" "serverless" {
 ```hcl
 resource "databricks_pipeline" "etl" {
   name    = "etl-pipeline"
-  target  = "analytics.silver"
   catalog = "analytics"
+  schema  = "silver"
 
   library {
     notebook {
@@ -210,7 +210,7 @@ resource "databricks_pipeline" "etl" {
     }
   }
 
-  continuous = false
+  continuous  = false
   development = false
 
   cluster {
diff --git a/databricks-skills/databricks-terraform/SKILL.md b/databricks-skills/databricks-terraform/SKILL.md
index 9cf60b77..8f91499b 100644
--- a/databricks-skills/databricks-terraform/SKILL.md
+++ b/databricks-skills/databricks-terraform/SKILL.md
@@ -65,7 +65,7 @@ resource "databricks_cluster" "shared" {
   num_workers             = 2
 
   spark_conf = {
-    "spark.databricks.cluster.profile" = "serverless"
+    "spark.databricks.io.cache.enabled" = "true"
   }
 }
 ```
diff --git a/databricks-skills/databricks-unity-catalog/3-security-policies.md b/databricks-skills/databricks-unity-catalog/3-security-policies.md
index a74c00bf..462d6ebf 100644
--- a/databricks-skills/databricks-unity-catalog/3-security-policies.md
+++ b/databricks-skills/databricks-unity-catalog/3-security-policies.md
@@ -173,7 +173,7 @@ manage_uc_security_policies(
 - Row filters and column masks are enforced at **query time** — they apply to all SQL, BI tools, and notebooks
 - Functions must be in the **same catalog** as the table (or in a catalog the user has access to)
 - `IS_ACCOUNT_GROUP_MEMBER()` checks account-level groups, not workspace-local groups
-- Masks and filters do NOT apply to the table owner or users with `ALL_PRIVILEGES`
+- Only **metastore admins** and **account admins** bypass row filters and column masks. Table owners and users with `ALL_PRIVILEGES` do NOT automatically bypass them
 - Performance impact is minimal for simple functions; avoid expensive joins in filter/mask functions
 
 ---
diff --git a/databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md b/databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md
index cfabeaed..cfca23e8 100644
--- a/databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md
+++ b/databricks-skills/databricks-unity-catalog/4-sharing-and-federation.md
@@ -18,7 +18,7 @@ Delta Sharing enables secure, read-only sharing of data across Databricks worksp
 
 ### Concepts
 
-- **Share**: A named collection of tables/schemas to share
+- **Share**: A named collection of tables to share
 - **Recipient**: An entity (person, org, workspace) that receives shared data
 - **Provider**: An entity that shares data (your workspace)
 
@@ -33,65 +33,39 @@ manage_uc_sharing(
 )
 ```
 
-### Add Tables to a Share
+### Add a Table to a Share
 
 ```python
 manage_uc_sharing(
     resource_type="share",
-    action="update",
+    action="add_table",
     name="partner_data_share",
-    updates=[
-        {
-            "action": "ADD",
-            "data_object": {
-                "name": "analytics.gold.quarterly_metrics",
-                "data_object_type": "TABLE",
-                "shared_as": "quarterly_metrics"
-            }
-        }
-    ]
+    table_name="analytics.gold.quarterly_metrics",
+    shared_as="quarterly_metrics"
 )
 ```
 
-### Add an Entire Schema
+### Add a Table with Partition Filter
 
 ```python
 manage_uc_sharing(
     resource_type="share",
-    action="update",
+    action="add_table",
     name="partner_data_share",
-    updates=[
-        {
-            "action": "ADD",
-            "data_object": {
-                "name": "analytics.gold",
-                "data_object_type": "SCHEMA"
-            }
-        }
-    ]
+    table_name="analytics.gold.orders",
+    shared_as="orders",
+    partition_spec="region = 'US'"
 )
 ```
 
-### Share with Partitions (Subset of Data)
+### Remove a Table from a Share
 
 ```python
 manage_uc_sharing(
     resource_type="share",
-    action="update",
+    action="remove_table",
     name="partner_data_share",
-    updates=[
-        {
-            "action": "ADD",
-            "data_object": {
-                "name": "analytics.gold.orders",
-                "data_object_type": "TABLE",
-                "shared_as": "orders",
-                "partitions": [
-                    {"values": [{"name": "region", "op": "EQUAL", "value": "US"}]}
-                ]
-            }
-        }
-    ]
+    table_name="analytics.gold.quarterly_metrics"
 )
 ```
 
@@ -104,7 +78,7 @@ manage_uc_sharing(
     action="create",
     name="partner_acme",
     authentication_type="DATABRICKS",
-    sharing_code="<sharing_identifier_from_partner>",
+    sharing_id="<sharing_identifier_from_partner>",
     comment="Acme Corp data team"
 )
 
@@ -118,16 +92,25 @@ manage_uc_sharing(
 )
 ```
 
-### Grant Share to Recipient
+### Grant a Share to a Recipient
 
 ```python
 manage_uc_sharing(
-    resource_type="recipient",
-    action="update_permissions",
-    name="partner_acme",
-    changes=[
-        {"add": [{"privilege": "SELECT", "share_name": "partner_data_share"}]}
-    ]
+    resource_type="share",
+    action="grant_to_recipient",
+    share_name="partner_data_share",
+    recipient_name="partner_acme"
+)
+```
+
+### Revoke a Share from a Recipient
+
+```python
+manage_uc_sharing(
+    resource_type="share",
+    action="revoke_from_recipient",
+    share_name="partner_data_share",
+    recipient_name="partner_acme"
 )
 ```
 
@@ -145,6 +128,9 @@ manage_uc_sharing(resource_type="recipient", action="list")
 
 # List providers (shares you've received)
 manage_uc_sharing(resource_type="provider", action="list")
+
+# List shares from a specific provider
+manage_uc_sharing(resource_type="provider", action="list_shares", name="databricks-partner")
 ```
 
 ---
@@ -167,9 +153,16 @@ manage_uc_storage(
     resource_type="credential",
     action="create",
     name="my-s3-credential",
-    aws_iam_role_arn="arn:aws:iam::123456789:role/unity-catalog-access",
+    aws_iam_role_arn="arn:aws:iam::123456789012:role/unity-catalog-access",
     comment="Access to analytics S3 bucket"
 )
+
+# Validate a credential
+manage_uc_storage(
+    resource_type="credential",
+    action="validate",
+    name="my-s3-credential"
+)
 ```
 
 ### External Locations
@@ -187,20 +180,13 @@ manage_uc_storage(
 
 # List external locations
 manage_uc_storage(resource_type="external_location", action="list")
-
-# Validate an external location
-manage_uc_storage(
-    resource_type="external_location",
-    action="validate",
-    name="analytics-landing"
-)
 ```
 
 ---
 
 ## Lakehouse Federation
 
-Connect to external databases (PostgreSQL, MySQL, SQL Server, Snowflake, BigQuery, Redshift, etc.) and query them through Unity Catalog.
+Connect to external databases (PostgreSQL, MySQL, SQL Server, Snowflake, BigQuery) and query them through Unity Catalog.
 
 ### Create a Connection
 
@@ -210,10 +196,12 @@ manage_uc_connections(
     action="create",
     name="postgres_erp",
     connection_type="POSTGRESQL",
-    host="erp-db.company.com",
-    port="5432",
-    user="readonly_user",
-    password="<password>",
+    options={
+        "host": "erp-db.company.com",
+        "port": "5432",
+        "user": "readonly_user",
+        "password": "<password>"
+    },
     comment="ERP PostgreSQL database"
 )
 
@@ -222,10 +210,12 @@ manage_uc_connections(
     action="create",
     name="mysql_legacy",
     connection_type="MYSQL",
-    host="legacy-db.company.com",
-    port="3306",
-    user="reader",
-    password="<password>"
+    options={
+        "host": "legacy-db.company.com",
+        "port": "3306",
+        "user": "reader",
+        "password": "<password>"
+    }
 )
 ```
 
@@ -233,8 +223,13 @@ manage_uc_connections(
 
 After creating a connection, create a foreign catalog to browse and query the external database:
 
-```sql
-CREATE FOREIGN CATALOG erp_catalog USING CONNECTION postgres_erp;
+```python
+manage_uc_connections(
+    action="create_foreign_catalog",
+    connection_name="postgres_erp",
+    catalog_name="erp_catalog",
+    catalog_options={"database": "erp_production"}
+)
 ```
 
 Now you can query external tables as if they were native UC tables:
@@ -256,7 +251,7 @@ manage_uc_connections(action="get", name="postgres_erp")
 manage_uc_connections(
     action="update",
     name="postgres_erp",
-    host="new-erp-db.company.com",
+    options={"host": "new-erp-db.company.com"},
     comment="Updated to new host"
 )
 
@@ -268,26 +263,16 @@ manage_uc_connections(action="delete", name="postgres_erp")
 
 ## Common Patterns
 
-### Share Data with Row-Level Security
-
-Combine Delta Sharing with partitions to share only relevant data:
+### Share Region-Specific Data
 
 ```python
-# Share only APAC data with APAC partner
+# Share only US data with a US partner
 manage_uc_sharing(
     resource_type="share",
-    action="update",
-    name="apac_partner_share",
-    updates=[{
-        "action": "ADD",
-        "data_object": {
-            "name": "analytics.gold.revenue",
-            "data_object_type": "TABLE",
-            "partitions": [
-                {"values": [{"name": "region", "op": "EQUAL", "value": "APAC"}]}
-            ]
-        }
-    }]
+    action="add_table",
+    name="us_partner_share",
+    table_name="analytics.gold.revenue",
+    partition_spec="region = 'US'"
 )
 ```
 
@@ -308,7 +293,7 @@ JOIN erp_catalog.public.customer_status e
 | Issue | Solution |
 |-------|----------|
 | **"Share not found"** | Shares are metastore-level objects. Ensure you're connected to the right workspace/metastore |
-| **Recipient can't access shared data** | Verify: (1) recipient has SELECT on the share, (2) activation link was used (for TOKEN auth), (3) share contains the table |
+| **Recipient can't access shared data** | Verify: (1) share is granted to recipient via `grant_to_recipient`, (2) activation link was used (for TOKEN auth), (3) table is added to the share |
 | **Federation query slow** | Predicate pushdown works for simple filters. Complex joins are pulled into Spark — add filters early |
-| **"Connection failed"** | Check: (1) network connectivity (firewall/VPC), (2) credentials, (3) host/port. Use `manage_uc_connections` to validate |
+| **"Connection failed"** | Check: (1) network connectivity (firewall/VPC), (2) credentials in `options`, (3) host/port |
 | **Cannot create foreign catalog** | Need `CREATE_CATALOG` and `CREATE_FOREIGN_CATALOG` privileges, plus the connection must exist |
diff --git a/databricks-skills/databricks-vector-search/end-to-end-rag.md b/databricks-skills/databricks-vector-search/end-to-end-rag.md
index 28aa697b..22956383 100644
--- a/databricks-skills/databricks-vector-search/end-to-end-rag.md
+++ b/databricks-skills/databricks-vector-search/end-to-end-rag.md
@@ -116,8 +116,10 @@ query_vs_index(
 
 ### With Filters
 
+The filter syntax depends on the endpoint type used when creating the index.
+
 ```python
-# Storage-Optimized endpoint: SQL-like filter_string
+# Storage-Optimized endpoint (used in this walkthrough): SQL-like filter_string
 query_vs_index(
     index_name="catalog.schema.knowledge_base_index",
     columns=["doc_id", "title", "content"],
@@ -126,9 +128,9 @@ query_vs_index(
     filter_string="category = 'governance'"
 )
 
-# Standard endpoint: JSON filters_json
+# Standard endpoint (if you created a Standard endpoint instead): JSON filters_json
 query_vs_index(
-    index_name="catalog.schema.knowledge_base_index",
+    index_name="catalog.schema.my_standard_index",
     columns=["doc_id", "title", "content"],
     query_text="How do I govern my data?",
     num_results=3,
@@ -152,41 +154,28 @@ query_vs_index(
 
 ## Step 6: Use in an Agent
 
-### As a UC Function Tool (ChatAgent)
+### As a Tool in a ChatAgent
 
-Register the vector search index as a tool for your agent:
+Use `VectorSearchRetrieverTool` to wire the index into an agent deployed on Model Serving:
 
 ```python
+from databricks.agents import ChatAgent
+from databricks.agents.tools import VectorSearchRetrieverTool
 from databricks.sdk import WorkspaceClient
-from databricks.agents.udfs import VectorSearchRetrieverUDF
-
-w = WorkspaceClient()
 
-retriever = VectorSearchRetrieverUDF(
+# Define the retriever tool
+retriever_tool = VectorSearchRetrieverTool(
     index_name="catalog.schema.knowledge_base_index",
     columns=["doc_id", "title", "content"],
-    num_results=5,
-)
-
-# Register as UC function
-retriever.register(
-    catalog="catalog",
-    schema="schema",
-    function_name="search_knowledge_base"
+    num_results=3,
 )
-```
-
-### In a ChatAgent
-
-```python
-from databricks.agents import ChatAgent
 
 class RAGAgent(ChatAgent):
     def __init__(self):
         self.w = WorkspaceClient()
 
     def predict(self, messages, context=None):
-        query = messages[-1]["content"]
+        query = messages[-1].content
 
         results = self.w.vector_search_indexes.query_index(
             index_name="catalog.schema.knowledge_base_index",
@@ -201,7 +190,7 @@ class RAGAgent(ChatAgent):
         )
 
         response = self.w.serving_endpoints.query(
-            name="databricks-claude-sonnet-4-20250514",
+            name="databricks-meta-llama-3-3-70b-instruct",
             messages=[
                 {"role": "system", "content": f"Answer using this context:\n{context_docs}"},
                 {"role": "user", "content": query},

From da17686058f97e7f8c332e361770837fe5f938d2 Mon Sep 17 00:00:00 2001
From: Steven Tan <cheeyutcy@gmail.com>
Date: Thu, 5 Mar 2026 21:28:45 +0800
Subject: [PATCH 3/3] Register new skill and files in install_skills.sh

- Add databricks-terraform to DATABRICKS_SKILLS list
- Add databricks-terraform description and extra files
- Update databricks-unity-catalog extra files (add 4 new reference files + 6-volumes.md + 7-data-profiling.md)
- Update databricks-vector-search extra files (add end-to-end-rag.md)
- Update databricks-unity-catalog description
---
 databricks-skills/install_skills.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/databricks-skills/install_skills.sh b/databricks-skills/install_skills.sh
index 763489c8..e490379e 100755
--- a/databricks-skills/install_skills.sh
+++ b/databricks-skills/install_skills.sh
@@ -42,7 +42,7 @@ MLFLOW_REPO_RAW_URL="https://raw.githubusercontent.com/mlflow/skills"
 MLFLOW_REPO_REF="main"
 
 # Databricks skills (hosted in this repo)
-DATABRICKS_SKILLS="databricks-agent-bricks databricks-aibi-dashboards databricks-asset-bundles databricks-app-apx databricks-app-python databricks-config databricks-dbsql databricks-docs databricks-genie databricks-iceberg databricks-jobs databricks-lakebase-autoscale databricks-lakebase-provisioned databricks-metric-views databricks-mlflow-evaluation databricks-model-serving databricks-parsing databricks-python-sdk databricks-spark-declarative-pipelines databricks-spark-structured-streaming databricks-synthetic-data-gen databricks-unity-catalog databricks-unstructured-pdf-generation databricks-vector-search databricks-zerobus-ingest spark-python-data-source"
+DATABRICKS_SKILLS="databricks-agent-bricks databricks-aibi-dashboards databricks-asset-bundles databricks-app-apx databricks-app-python databricks-config databricks-dbsql databricks-docs databricks-genie databricks-iceberg databricks-jobs databricks-lakebase-autoscale databricks-lakebase-provisioned databricks-metric-views databricks-mlflow-evaluation databricks-model-serving databricks-parsing databricks-python-sdk databricks-spark-declarative-pipelines databricks-spark-structured-streaming databricks-synthetic-data-gen databricks-terraform databricks-unity-catalog databricks-unstructured-pdf-generation databricks-vector-search databricks-zerobus-ingest spark-python-data-source"
 
 # MLflow skills (fetched from mlflow/skills repo)
 MLFLOW_SKILLS="agent-evaluation analyze-mlflow-chat-session analyze-mlflow-trace instrumenting-with-mlflow-tracing mlflow-onboarding querying-mlflow-metrics retrieving-mlflow-traces searching-mlflow-docs"
@@ -66,7 +66,8 @@ get_skill_description() {
         "databricks-iceberg") echo "Apache Iceberg - managed tables, UniForm, IRC, Snowflake interop, migration" ;;
         "databricks-jobs") echo "Databricks Lakeflow Jobs - workflow orchestration" ;;
         "databricks-python-sdk") echo "Databricks Python SDK, Connect, and REST API" ;;
-        "databricks-unity-catalog") echo "System tables for lineage, audit, billing" ;;
+        "databricks-terraform") echo "Databricks Terraform provider - IaC for workspaces, UC, clusters, jobs" ;;
+        "databricks-unity-catalog") echo "Unity Catalog governance - objects, grants, tags, security, sharing, system tables" ;;
         "databricks-lakebase-autoscale") echo "Lakebase Autoscale - managed PostgreSQL with autoscaling" ;;
         "databricks-lakebase-provisioned") echo "Lakebase Provisioned - data connections and reverse ETL" ;;
         "databricks-metric-views") echo "Unity Catalog Metric Views - governed business metrics in YAML" ;;
@@ -105,7 +106,8 @@ get_skill_extra_files() {
         "databricks-app-python") echo "dash.md streamlit.md README.md" ;;
         "databricks-jobs") echo "task-types.md triggers-schedules.md notifications-monitoring.md examples.md" ;;
         "databricks-python-sdk") echo "doc-index.md examples/1-authentication.py examples/2-clusters-and-jobs.py examples/3-sql-and-warehouses.py examples/4-unity-catalog.py examples/5-serving-and-vector-search.py" ;;
-        "databricks-unity-catalog") echo "5-system-tables.md" ;;
+        "databricks-terraform") echo "1-provider-and-auth.md 2-core-resources.md 3-unity-catalog.md 4-best-practices.md" ;;
+        "databricks-unity-catalog") echo "1-objects-and-governance.md 2-tags-and-classification.md 3-security-policies.md 4-sharing-and-federation.md 5-system-tables.md 6-volumes.md 7-data-profiling.md" ;;
         "databricks-lakebase-autoscale") echo "projects.md branches.md computes.md connection-patterns.md reverse-etl.md" ;;
         "databricks-lakebase-provisioned") echo "connection-patterns.md reverse-etl.md" ;;
         "databricks-metric-views") echo "yaml-reference.md patterns.md" ;;
@@ -113,7 +115,7 @@ get_skill_extra_files() {
         "databricks-mlflow-evaluation") echo "references/CRITICAL-interfaces.md references/GOTCHAS.md references/patterns-context-optimization.md references/patterns-datasets.md references/patterns-evaluation.md references/patterns-scorers.md references/patterns-trace-analysis.md references/user-journeys.md" ;;
         "databricks-spark-declarative-pipelines") echo "1-ingestion-patterns.md 2-streaming-patterns.md 3-scd-patterns.md 4-performance-tuning.md 5-python-api.md 6-dlt-migration.md 7-advanced-configuration.md 8-project-initialization.md" ;;
         "databricks-spark-structured-streaming") echo "checkpoint-best-practices.md kafka-streaming.md merge-operations.md multi-sink-writes.md stateful-operations.md stream-static-joins.md stream-stream-joins.md streaming-best-practices.md trigger-and-cost-optimization.md" ;;
-        "databricks-vector-search") echo "index-types.md" ;;
+        "databricks-vector-search") echo "index-types.md end-to-end-rag.md" ;;
         "databricks-zerobus-ingest") echo "1-setup-and-authentication.md 2-python-client.md 3-multilanguage-clients.md 4-protobuf-schema.md 5-operations-and-limits.md" ;;
         *) echo "" ;;
     esac