diff --git a/controlplane/configstore/models.go b/controlplane/configstore/models.go index 87a77e84..78bd8d7a 100644 --- a/controlplane/configstore/models.go +++ b/controlplane/configstore/models.go @@ -139,8 +139,33 @@ type ManagedWarehouseMetadataStore struct { // status password the worker activator reads. Empty for cnpg-shard // (which mints its own credentials). PasswordAWSSecret string `gorm:"size:255" json:"password_aws_secret,omitempty"` + + // Flavor is the metadata store's Postgres wire-protocol dialect. + // Empty (the default) means stock PostgreSQL — postgres_scanner runs + // with its native binary COPY + ctid scan path. "cockroachdb" makes + // the worker emit two SET GLOBALs before ATTACH so the scanner uses + // the text protocol and disables ctid scans, which CockroachDB + // doesn't implement (CRDB #96590). See server/server.go's + // buildDuckLakePreAttachStatements. The control plane is expected to + // pin a tenant to workers of one flavor for its lifetime — these + // settings are process-global on the worker's DuckDB instance, so + // flipping between flavors on the same worker would taint the other + // tenants' postgres_scanner behaviour. + Flavor MetadataStoreFlavor `gorm:"size:32" json:"flavor,omitempty"` } +// MetadataStoreFlavor identifies the Postgres wire-protocol dialect of the +// metadata store, so the worker can emit the right postgres_scanner GLOBAL +// settings before ATTACH. Keep the zero value mapped to PostgreSQL — older +// rows in the config store predate this field and must continue to behave +// exactly as before. +type MetadataStoreFlavor string + +const ( + MetadataStoreFlavorPostgres MetadataStoreFlavor = "" + MetadataStoreFlavorCockroachDB MetadataStoreFlavor = "cockroachdb" +) + // ManagedWarehouseDataStore captures the org's object-store provisioning // intent — the shape the Duckling CR's spec.dataStore takes. Distinct from // ManagedWarehouseS3 (the resolved, activation-time object-store config): diff --git a/controlplane/provisioning/api.go b/controlplane/provisioning/api.go index 0f605292..b39f0107 100644 --- a/controlplane/provisioning/api.go +++ b/controlplane/provisioning/api.go @@ -119,12 +119,17 @@ type provisionDuckLakeReq struct { // provisionExternalReq describes a pre-existing (external) Postgres metadata // store. Endpoint (RDS host) and PasswordAWSSecret (the AWS Secrets Manager // secret NAME holding the password) are required; User/Database default to -// "postgres" when omitted. +// "postgres" when omitted. Flavor selects the Postgres dialect: empty (or +// "postgres") for stock PostgreSQL, "cockroachdb" for CockroachDB — +// recorded on the config-store row so the worker can emit the +// postgres_scanner GLOBALs CRDB needs before ATTACH (see +// server.buildDuckLakePreAttachStatements). type provisionExternalReq struct { Endpoint string `json:"endpoint"` PasswordAWSSecret string `json:"password_aws_secret"` User string `json:"user,omitempty"` Database string `json:"database,omitempty"` + Flavor string `json:"flavor,omitempty"` } // provisionDataStoreReq selects the object store. Type "s3bucket" (or omitted) @@ -153,6 +158,26 @@ func icebergNamespace(req *provisionIcebergReq) string { return req.Namespace } +// resolveMetadataStoreFlavor maps the optional external.flavor field on a +// provision request into the typed configstore enum. Empty (the default) and +// "postgres" both map to stock PostgreSQL; "cockroachdb" makes the worker +// emit the postgres_scanner GLOBAL settings CRDB needs before ATTACH. +// Unknown values reject the request rather than silently degrading to +// "postgres" — a mis-typed flavor that goes through would only surface as +// "DuckLake migration check failed: read DuckLake spec version" at first +// activation, long after the provision call returned 202. +func resolveMetadataStoreFlavor(raw string) (configstore.MetadataStoreFlavor, error) { + switch raw { + case "", "postgres": + return configstore.MetadataStoreFlavorPostgres, nil + case string(configstore.MetadataStoreFlavorCockroachDB): + return configstore.MetadataStoreFlavorCockroachDB, nil + default: + return "", fmt.Errorf("metadata_store.external.flavor must be empty, %q, or %q (got %q)", + "postgres", string(configstore.MetadataStoreFlavorCockroachDB), raw) + } +} + // resolveDataStore validates and normalizes the data-store request into the // stored intent. Nil or "s3bucket" provisions a fresh per-org bucket; // "external" reuses an existing bucket and requires a bucket name. @@ -244,12 +269,18 @@ func (h *handler) provisionWarehouse(c *gin.Context) { c.JSON(http.StatusBadRequest, gin.H{"error": "metadata_store.type 'external' requires metadata_store.external.endpoint and metadata_store.external.password_aws_secret"}) return } + flavor, err := resolveMetadataStoreFlavor(ext.Flavor) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } warehouse.MetadataStore = configstore.ManagedWarehouseMetadataStore{ Kind: configstore.MetadataStoreKindExternal, Endpoint: ext.Endpoint, Username: ext.User, DatabaseName: ext.Database, PasswordAWSSecret: ext.PasswordAWSSecret, + Flavor: flavor, } default: diff --git a/controlplane/provisioning/api_test.go b/controlplane/provisioning/api_test.go index bd33f106..8e3b30d3 100644 --- a/controlplane/provisioning/api_test.go +++ b/controlplane/provisioning/api_test.go @@ -644,6 +644,56 @@ func TestProvisionDuckLakeExternal(t *testing.T) { } } +func TestProvisionExternalFlavor(t *testing.T) { + cases := []struct { + name string + bodyFlavor string + wantStatus int + wantFlavor configstore.MetadataStoreFlavor + }{ + {name: "omitted defaults to postgres", bodyFlavor: ``, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorPostgres}, + {name: "explicit postgres", bodyFlavor: `, "flavor":"postgres"`, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorPostgres}, + {name: "cockroachdb stored verbatim", bodyFlavor: `, "flavor":"cockroachdb"`, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorCockroachDB}, + {name: "unknown flavor rejected", bodyFlavor: `, "flavor":"yugabytedb"`, wantStatus: http.StatusBadRequest, wantFlavor: ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + store := newFakeStore() + router := newTestRouter(store) + body := []byte(`{ + "database_name": "extdl-db", + "metadata_store": {"type": "external", "external": { + "endpoint": "rds.example.us-east-1.rds.amazonaws.com", + "password_aws_secret": "duckling-example-rds-password"` + tc.bodyFlavor + ` + }}, + "data_store": {"type": "external", "bucket_name": "posthog-duckling-example", "region": "us-east-1"}, + "ducklake": {"enabled": true} + }`) + req := httptest.NewRequest(http.MethodPost, "/api/v1/orgs/extdl/provision", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + router.ServeHTTP(rec, req) + if rec.Code != tc.wantStatus { + t.Fatalf("status = %d, want %d: %s", rec.Code, tc.wantStatus, rec.Body.String()) + } + if tc.wantStatus != http.StatusAccepted { + if _, ok := store.warehouses["extdl"]; ok { + t.Error("warehouse must not be created when flavor is rejected") + } + return + } + w := store.warehouses["extdl"] + if w == nil { + t.Fatal("expected warehouse to be created") + return + } + if w.MetadataStore.Flavor != tc.wantFlavor { + t.Errorf("flavor = %q, want %q", w.MetadataStore.Flavor, tc.wantFlavor) + } + }) + } +} + func TestProvisionExternalRequiresEndpointAndSecret(t *testing.T) { for name, body := range map[string]string{ "missing external block": `{"database_name":"e-db","ducklake":{"enabled":true},"metadata_store":{"type":"external"}}`, diff --git a/controlplane/shared_worker_activator.go b/controlplane/shared_worker_activator.go index 13436477..9c2e9001 100644 --- a/controlplane/shared_worker_activator.go +++ b/controlplane/shared_worker_activator.go @@ -397,6 +397,13 @@ func (a *SharedWorkerActivator) BuildActivationRequest(ctx context.Context, org } dl.SpecVersion = targetSpecVersion + // Metadata-store flavor (Postgres vs CockroachDB) lives only on the + // config-store row, not on the Duckling CR, so set it here regardless of + // which buildDuckLakeConfigFrom*() path resolved the infrastructure. + // The worker uses this to emit the right postgres_scanner GLOBAL + // settings before ATTACH — see server.buildDuckLakePreAttachStatements. + dl.MetadataStoreFlavor = string(org.Warehouse.MetadataStore.Flavor) + ic, err := a.buildIcebergConfig(ctx, assignment.OrgID, &org.Warehouse.Iceberg) if err != nil { return TenantActivationPayload{}, err diff --git a/server/ducklake/config.go b/server/ducklake/config.go index e880788d..c7040dcd 100644 --- a/server/ducklake/config.go +++ b/server/ducklake/config.go @@ -101,4 +101,16 @@ type Config struct { // non-"postgres:" metadata stores. If empty, no application_name is // injected (libpq picks its own default, usually "psql"). ApplicationName string `json:"application_name,omitempty" yaml:"-"` + + // MetadataStoreFlavor identifies the Postgres wire-protocol dialect of + // the metadata store. Empty (the default) means stock PostgreSQL. + // "cockroachdb" makes the worker emit + // SET GLOBAL pg_use_text_protocol = true + // SET GLOBAL pg_use_ctid_scan = false + // before ATTACH so postgres_scanner runs in CRDB-compatible mode + // (CRDB lacks binary COPY and ctid row addressing — CRDB #96590). + // These SET GLOBALs are process-wide on the worker's DuckDB instance, + // so the control plane is expected to pin a tenant to workers of one + // flavor for its lifetime (no flavor flips on the same worker). + MetadataStoreFlavor string `json:"metadata_store_flavor,omitempty" yaml:"-"` } diff --git a/server/server.go b/server/server.go index d52af09e..8126b7da 100644 --- a/server/server.go +++ b/server/server.go @@ -1398,6 +1398,28 @@ func buildDuckLakePreAttachStatements(dlCfg DuckLakeConfig) []string { if dlCfg.ViaPgBouncer { statements = append(statements, "SET GLOBAL pg_pool_max_connections = 0") } + if dlCfg.MetadataStoreFlavor == "cockroachdb" { + // CockroachDB has not implemented the binary COPY wire format + // (cockroachdb#96590) and does not expose ctid row addressing, so + // postgres_scanner's default scan path fails on the first metadata + // ATTACH with "syntax error: unimplemented" or + // "column \"ctid\" does not exist". These two GLOBALs swap the + // scanner onto the text protocol and disable ctid scans, which is + // CRDB-compatible at the cost of ~2x slower metadata reads on the + // scanner. Session-level SET doesn't help here — DuckLake holds an + // internal metadata connection that doesn't see session-local + // state — so the settings have to be GLOBAL. + // + // These GLOBALs are process-wide on the worker's DuckDB instance, + // so duckgres assumes a tenant is pinned to workers of one flavor + // for its lifetime; flipping flavors on the same worker would + // taint the other tenants' postgres_scanner behaviour. See + // PostHog/ducklake#24 for the upstream support report. + statements = append(statements, + "SET GLOBAL pg_use_text_protocol = true", + "SET GLOBAL pg_use_ctid_scan = false", + ) + } return statements } diff --git a/server/server_test.go b/server/server_test.go index 3c508046..34198394 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -241,6 +241,38 @@ func TestBuildDuckLakePreAttachStatements(t *testing.T) { "SET GLOBAL pg_pool_max_connections = 0", }, }, + { + name: "cockroachdb flavor emits text-protocol + ctid-disable globals", + cfg: DuckLakeConfig{ + DisableMetadataThreadLocalCache: boolPtr(false), + MetadataStoreFlavor: "cockroachdb", + }, + want: []string{ + "SET GLOBAL pg_use_text_protocol = true", + "SET GLOBAL pg_use_ctid_scan = false", + }, + }, + { + name: "cockroachdb flavor composes with other settings in stable order", + cfg: DuckLakeConfig{ + ViaPgBouncer: true, + MetadataStoreFlavor: "cockroachdb", + }, + want: []string{ + "SET GLOBAL pg_pool_enable_thread_local_cache = false", + "SET GLOBAL pg_pool_max_connections = 0", + "SET GLOBAL pg_use_text_protocol = true", + "SET GLOBAL pg_use_ctid_scan = false", + }, + }, + { + name: "unknown flavor is treated as plain postgres (no extra globals)", + cfg: DuckLakeConfig{ + DisableMetadataThreadLocalCache: boolPtr(false), + MetadataStoreFlavor: "postgres", + }, + want: nil, + }, } for _, tt := range tests {