SQLMesh
diff --git a/‎.github/scripts/wait-for-db.sh‎
Lines changed: 31 additions & 0 deletions b/‎.github/scripts/wait-for-db.sh‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎.github/workflows/pr.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pr.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.readthedocs.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.readthedocs.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 7 additions & 2 deletions b/‎Makefile‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎docs/concepts/models/python_models.md‎
Lines changed: 27 additions & 0 deletions b/‎docs/concepts/models/python_models.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎docs/guides/configuration.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/guides/configuration.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/guides/connections.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/guides/connections.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/integrations/dlt.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/integrations/dlt.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/integrations/engines/clickhouse.md‎
Lines changed: 50 additions & 1 deletion b/‎docs/integrations/engines/clickhouse.md‎
Lines changed: 50 additions & 1 deletion
@@ -50,6 +50,37 @@ spark_ready() {
     probe_port 15002
 }
 
+starrocks_ready() {
+    probe_port 9030
+
+    echo "Checking for 1 alive StarRocks backends..."
+    sleep 5
+
+    while true; do
+        echo "Checking StarRocks backends..."
+        ALIVE_BACKENDS=$(docker exec -i starrocks-fe mysql -h127.0.0.1 -P9030 -uroot -e "show backends \G" | grep -c "^ *Alive: true *$")
+
+        # fallback value if failed to get number
+        if ! [[ "$ALIVE_BACKENDS" =~ ^[0-9]+$ ]]; then
+            echo "WARN: Unable to parse number of alive backends, got: '$ALIVE_BACKENDS'"
+            ALIVE_BACKENDS=0
+        fi
+
+        echo "Found $ALIVE_BACKENDS alive backends"
+
+        if [ "$ALIVE_BACKENDS" -ge 1 ]; then
+            echo "StarRocks has 1 or more alive backends"
+            break
+        fi
+
+        echo "Waiting for more backends to become alive..."
+        sleep 5
+    done
+
+    # set default replication num to 1 (there is only one be in the docker compose file)
+    docker exec -i starrocks-fe mysql -h127.0.0.1 -P9030 -uroot -e "ADMIN SET frontend config ('default_replication_num' = '1');"
+}
+
 trino_ready() {
     # Trino has a built-in healthcheck script, just call that
     docker compose -f tests/core/engine_adapter/integration/docker/compose.trino.yaml exec trino /bin/bash -c '/usr/lib/trino/bin/health-check'
 
@@ -252,7 +252,7 @@ jobs:
       fail-fast: false
       matrix:
         engine:
-          [duckdb, postgres, mysql, mssql, trino, spark, clickhouse, risingwave]
+          [duckdb, postgres, mysql, mssql, trino, spark, clickhouse, risingwave, starrocks]
     env:
       PYTEST_XDIST_AUTO_NUM_WORKERS: 2
       SQLMESH__DISABLE_ANONYMIZED_ANALYTICS: '1'
 
@@ -6,7 +6,7 @@ build:
     python: "3.10"
   jobs:
     pre_build:
-      - pip install -e ".[athena,azuresql,bigframes,bigquery,clickhouse,databricks,dbt,dlt,gcppostgres,github,llm,mssql,mysql,mwaa,postgres,redshift,slack,snowflake,trino,web,risingwave]"
+      - pip install -e ".[athena,azuresql,bigframes,bigquery,clickhouse,databricks,dbt,dlt,gcppostgres,github,llm,mssql,mysql,mwaa,postgres,redshift,slack,snowflake,starrocks,trino,web,risingwave]"
       - make api-docs
 
 mkdocs:
 
@@ -49,11 +49,13 @@ install-dev-dbt-%:
 	$(MAKE) install-dev; \
 	if [ "$$version" = "1.6.0" ]; then \
 		echo "Applying overrides for dbt 1.6.0"; \
-		$(PIP) install 'pydantic>=2.0.0' 'google-cloud-bigquery==3.30.0' 'databricks-sdk==0.28.0' --reinstall; \
+		$(PIP) install 'pydantic>=2.0.0' 'google-cloud-bigquery==3.30.0' 'databricks-sdk==0.28.0' \
+			'pyOpenSSL>=24.0.0' --reinstall; \
 	fi; \
 	if [ "$$version" = "1.7.0" ]; then \
 		echo "Applying overrides for dbt 1.7.0"; \
-		$(PIP) install 'databricks-sdk==0.28.0' --reinstall; \
+		$(PIP) install 'databricks-sdk==0.28.0' \
+			'pyOpenSSL>=24.0.0' --reinstall; \
 	fi; \
 	if [ "$$version" = "1.5.0" ]; then \
 		echo "Applying overrides for dbt 1.5.0"; \
@@ -212,6 +214,9 @@ trino-test: engine-trino-up
 risingwave-test: engine-risingwave-up
 	pytest -n auto -m "risingwave" --reruns 3 --junitxml=test-results/junit-risingwave.xml
 
+starrocks-test: engine-starrocks-up
+	pytest -n auto -m "starrocks" --reruns 3 --junitxml=test-results/junit-starrocks.xml
+
 #################
 # Cloud Engines #
 #################
 
@@ -369,6 +369,33 @@ def entrypoint(
     )
 ```
 
+Blueprint variables can also be used as **column names and column types** in the `columns` dictionary. For example, if each blueprint produces a model with a different set of column names and types, both can be parameterized using the same `@{variable}` syntax:
+
+```python linenums="1"
+import pandas as pd
+from sqlmesh import ExecutionContext, model
+
+@model(
+    "@{customer}.metrics",
+    kind="FULL",
+    blueprints=[
+        {"customer": "customer1", "primary_metric": "revenue", "primary_type": "int",  "secondary_metric": "cost",   "secondary_type": "double"},
+        {"customer": "customer2", "primary_metric": "sales",   "primary_type": "text", "secondary_metric": "profit", "secondary_type": "double"},
+    ],
+    columns={
+        "@{primary_metric}": "@{primary_type}",
+        "@{secondary_metric}": "@{secondary_type}",
+    },
+)
+def entrypoint(context: ExecutionContext, **kwargs) -> pd.DataFrame:
+    return pd.DataFrame({
+        context.blueprint_var("primary_metric"): [1],
+        context.blueprint_var("secondary_metric"): [1.5],
+    })
+```
+
+Global variables (defined in the project config) can also be used as column names and types in the same way.
+
 Note the use of curly brace syntax `@{customer}` in the model name above. It is used to ensure SQLMesh can combine the macro variable into the model name identifier correctly - learn more [here](../../concepts/macros/sqlmesh_macros.md#embedding-variables-in-strings).
 
 Blueprint variable mappings can also be constructed dynamically, e.g., by using a macro: `blueprints="@gen_blueprints()"`. This is useful in cases where the `blueprints` list needs to be sourced from external sources, such as CSV files.
 
@@ -920,6 +920,7 @@ These pages describe the connection configuration options for each execution eng
 * [GCP Postgres](../integrations/engines/gcp-postgres.md)
 * [Redshift](../integrations/engines/redshift.md)
 * [Snowflake](../integrations/engines/snowflake.md)
+* [StarRocks](../integrations/engines/starrocks.md)
 * [Spark](../integrations/engines/spark.md)
 * [Trino](../integrations/engines/trino.md)
 
@@ -952,6 +953,7 @@ Unsupported state engines, even for development:
 
 * [ClickHouse](../integrations/engines/clickhouse.md)
 * [Spark](../integrations/engines/spark.md)
+* [StarRocks](../integrations/engines/starrocks.md)
 * [Trino](../integrations/engines/trino.md)
 
 This example gateway configuration uses Snowflake for the data warehouse connection and Postgres for the state backend connection:
 
@@ -90,4 +90,5 @@ default_gateway: local_db
 * [Redshift](../integrations/engines/redshift.md)
 * [Snowflake](../integrations/engines/snowflake.md)
 * [Spark](../integrations/engines/spark.md)
+* [StarRocks](../integrations/engines/starrocks.md)
 * [Trino](../integrations/engines/trino.md)
@@ -28,12 +28,12 @@ This will create the configuration file and directories, which are found in all
 
 SQLMesh will also automatically generate models to ingest data from the pipeline incrementally. Incremental loading is ideal for large datasets where recomputing entire tables is resource-intensive. In this case utilizing the [`INCREMENTAL_BY_TIME_RANGE` model kind](../concepts/models/model_kinds.md#incremental_by_time_range). However, these model definitions can be customized to meet your specific project needs.
 
-#### Specify the path to the pipelines directory
+#### Specify the path to the pipelines working directory
 
-The default location for dlt pipelines is `~/.dlt/pipelines/<pipeline_name>`. If your pipelines are in a [different directory](https://dlthub.com/docs/general-usage/pipeline#separate-working-environments-with-pipelines_dir), use the `--dlt-path` argument to specify the path explicitly:
+The default location for dlt pipeline working state is `~/.dlt/pipelines/<pipeline_name>`. If dlt stores your pipeline state in a [different pipelines working directory](https://dlthub.com/docs/general-usage/pipeline#separate-working-environments-with-pipelines_dir), use the `--dlt-path` argument to specify that directory explicitly. This should be the directory where dlt stores pipeline state, not the directory containing your pipeline scripts:
 
 ```bash
-sqlmesh init -t dlt --dlt-pipeline <pipeline-name> --dlt-path <pipelines-directory> dialect
+sqlmesh init -t dlt --dlt-pipeline <pipeline-name> --dlt-path <pipelines-working-directory> dialect
 ```
 
 ### Generating models on demand
@@ -58,10 +58,10 @@ sqlmesh dlt_refresh <pipeline-name> --force
 sqlmesh dlt_refresh <pipeline-name> --table <dlt-table>
 ```
 
-- **Provide the explicit path to the pipelines directory** (using `--dlt-path`):
+- **Provide the explicit path to the pipelines working directory** (using `--dlt-path`):
 
 ```bash
-sqlmesh dlt_refresh <pipeline-name> --dlt-path <pipelines-directory>
+sqlmesh dlt_refresh <pipeline-name> --dlt-path <pipelines-working-directory>
 ```
 
 #### Configuration
 
@@ -420,6 +420,54 @@ If a model has many records in each partition, you may see additional performanc
 
     Choose a model's time partitioning granularity based on the characteristics of the data it will process, making sure the total number of partitions is 1000 or fewer.
 
+## Multi-gateway setup
+
+ClickHouse does not have a catalog concept — its fully-qualified table names are two-level (`database.table`), not three-level (`catalog.database.table`).
+
+When a SQLMesh project uses ClickHouse alongside a catalog-aware gateway such as Trino or BigQuery, the two gateway types produce FQNs with different nesting depths. SQLMesh's internal schema tracking requires uniform nesting, so it assigns a **virtual catalog** to ClickHouse models at load time.
+
+### How the virtual catalog works
+
+- SQLMesh automatically detects the nesting mismatch and injects a virtual catalog into each ClickHouse adapter when a catalog-aware gateway is also present.
+- ClickHouse models will appear with three-level FQNs in `sqlmesh plan` output and logs — for example, `__ch_prod__.mydb.mytable` for a gateway named `ch_prod`.
+- The virtual catalog prefix is **never sent to ClickHouse**. It is stripped from every DDL and DML statement before execution.
+- When ClickHouse is the only gateway in a project, no virtual catalog is assigned and models remain two-level.
+
+### Adding a second gateway to an existing ClickHouse-only project
+
+!!! warning "Re-materialization required"
+    Adding a catalog-aware gateway (such as Trino or BigQuery) to a project that previously used ClickHouse as the only gateway triggers a **full re-materialization of every ClickHouse model** on the next `sqlmesh apply`. Plan for this before making the change.
+
+If your project previously used ClickHouse as the only gateway, your models were fingerprinted with 2-level FQNs (`db.table`). Adding a catalog-aware gateway causes all ClickHouse models to be treated as new versions (their FQNs change to `__{gateway_name}__.db.table`):
+
+- `FULL` models are recreated once — cost is proportional to the size of each table.
+- `INCREMENTAL_BY_TIME_RANGE` models require a **full historical backfill** from the model's configured start date.
+- The old 2-level model names appear as **Removed** in the plan and will be cleaned up after the environment TTL expires.
+
+This is a one-time cost at the transition point and does not recur. There is no way to skip it — `--forward-only` does not apply because SQLMesh treats the 3-level names as new models, not modified ones.
+
+### Virtual catalog naming
+
+By default, the virtual catalog name is derived from **the gateway name you chose in your config**, wrapped in double underscores — for example, a gateway named `clickhouse` produces `__clickhouse__`, and a gateway named `ch_prod` produces `__ch_prod__`. The double-underscore wrapping makes it visually clear that this is an internal SQLMesh concept, not a real ClickHouse object.
+
+You can override the default name by setting `virtual_catalog` in your ClickHouse connection configuration:
+
+```yaml
+gateways:
+  clickhouse:
+    connection:
+      type: clickhouse
+      host: my-clickhouse-host
+      username: default
+      virtual_catalog: ch_virtual  # optional; defaults to __{gateway_name}__ (e.g. __clickhouse__)
+  trino:
+    connection:
+      type: trino
+      ...
+```
+
+With this configuration, ClickHouse models will appear as `ch_virtual.mydb.mytable` in plan output instead of `__clickhouse__.mydb.mytable`.
+
 ## Local/Built-in Scheduler
 
 **Engine Adapter Type**: `clickhouse`
@@ -446,4 +494,5 @@ If a model has many records in each partition, you may see additional performanc
 | `server_host_name`        | The ClickHouse server hostname as identified by the CN or SNI of its TLS certificate. Set this to avoid SSL errors when connecting through a proxy or tunnel with a different hostname.                                                                                         | string |    N     |
 | `tls_mode`                | Controls advanced TLS behavior. proxy and strict do not invoke ClickHouse mutual TLS connection, but do send client cert and key. mutual assumes ClickHouse mutual TLS auth with a client certificate.                                                                          | string |    N     |
 | `connection_settings`     | Additional [connection settings](https://clickhouse.com/docs/integrations/python#settings-argument)                                                                                                                                                                             |  dict  |    N     |
-| `connection_pool_options` | Additional [options](https://clickhouse.com/docs/integrations/python#customizing-the-http-connection-pool)                                                                                                                                         for the HTTP connection pool |  dict  |    N     |
+| `connection_pool_options` | Additional [options](https://clickhouse.com/docs/integrations/python#customizing-the-http-connection-pool)                                                                                                                                         for the HTTP connection pool |  dict  |    N     |
+| `virtual_catalog`         | Override the virtual catalog name used when ClickHouse runs alongside a catalog-aware gateway (e.g. Trino). Defaults to `__{gateway_name}__`. See [Multi-gateway setup](#multi-gateway-setup) for details.                                                                      | string |    N     |