Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions controlplane/configstore/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,33 @@ type ManagedWarehouseMetadataStore struct {
// status password the worker activator reads. Empty for cnpg-shard
// (which mints its own credentials).
PasswordAWSSecret string `gorm:"size:255" json:"password_aws_secret,omitempty"`

// Flavor is the metadata store's Postgres wire-protocol dialect.
// Empty (the default) means stock PostgreSQL — postgres_scanner runs
// with its native binary COPY + ctid scan path. "cockroachdb" makes
// the worker emit two SET GLOBALs before ATTACH so the scanner uses
// the text protocol and disables ctid scans, which CockroachDB
// doesn't implement (CRDB #96590). See server/server.go's
// buildDuckLakePreAttachStatements. The control plane is expected to
// pin a tenant to workers of one flavor for its lifetime — these
// settings are process-global on the worker's DuckDB instance, so
// flipping between flavors on the same worker would taint the other
// tenants' postgres_scanner behaviour.
Flavor MetadataStoreFlavor `gorm:"size:32" json:"flavor,omitempty"`
}

// MetadataStoreFlavor identifies the Postgres wire-protocol dialect of the
// metadata store, so the worker can emit the right postgres_scanner GLOBAL
// settings before ATTACH. Keep the zero value mapped to PostgreSQL — older
// rows in the config store predate this field and must continue to behave
// exactly as before.
type MetadataStoreFlavor string

const (
MetadataStoreFlavorPostgres MetadataStoreFlavor = ""
MetadataStoreFlavorCockroachDB MetadataStoreFlavor = "cockroachdb"
)

// ManagedWarehouseDataStore captures the org's object-store provisioning
// intent — the shape the Duckling CR's spec.dataStore takes. Distinct from
// ManagedWarehouseS3 (the resolved, activation-time object-store config):
Expand Down
33 changes: 32 additions & 1 deletion controlplane/provisioning/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,17 @@ type provisionDuckLakeReq struct {
// provisionExternalReq describes a pre-existing (external) Postgres metadata
// store. Endpoint (RDS host) and PasswordAWSSecret (the AWS Secrets Manager
// secret NAME holding the password) are required; User/Database default to
// "postgres" when omitted.
// "postgres" when omitted. Flavor selects the Postgres dialect: empty (or
// "postgres") for stock PostgreSQL, "cockroachdb" for CockroachDB —
// recorded on the config-store row so the worker can emit the
// postgres_scanner GLOBALs CRDB needs before ATTACH (see
// server.buildDuckLakePreAttachStatements).
type provisionExternalReq struct {
Endpoint string `json:"endpoint"`
PasswordAWSSecret string `json:"password_aws_secret"`
User string `json:"user,omitempty"`
Database string `json:"database,omitempty"`
Flavor string `json:"flavor,omitempty"`
}

// provisionDataStoreReq selects the object store. Type "s3bucket" (or omitted)
Expand Down Expand Up @@ -153,6 +158,26 @@ func icebergNamespace(req *provisionIcebergReq) string {
return req.Namespace
}

// resolveMetadataStoreFlavor maps the optional external.flavor field on a
// provision request into the typed configstore enum. Empty (the default) and
// "postgres" both map to stock PostgreSQL; "cockroachdb" makes the worker
// emit the postgres_scanner GLOBAL settings CRDB needs before ATTACH.
// Unknown values reject the request rather than silently degrading to
// "postgres" — a mis-typed flavor that goes through would only surface as
// "DuckLake migration check failed: read DuckLake spec version" at first
// activation, long after the provision call returned 202.
func resolveMetadataStoreFlavor(raw string) (configstore.MetadataStoreFlavor, error) {
switch raw {
case "", "postgres":
return configstore.MetadataStoreFlavorPostgres, nil
case string(configstore.MetadataStoreFlavorCockroachDB):
return configstore.MetadataStoreFlavorCockroachDB, nil
default:
return "", fmt.Errorf("metadata_store.external.flavor must be empty, %q, or %q (got %q)",
"postgres", string(configstore.MetadataStoreFlavorCockroachDB), raw)
}
}

// resolveDataStore validates and normalizes the data-store request into the
// stored intent. Nil or "s3bucket" provisions a fresh per-org bucket;
// "external" reuses an existing bucket and requires a bucket name.
Expand Down Expand Up @@ -244,12 +269,18 @@ func (h *handler) provisionWarehouse(c *gin.Context) {
c.JSON(http.StatusBadRequest, gin.H{"error": "metadata_store.type 'external' requires metadata_store.external.endpoint and metadata_store.external.password_aws_secret"})
return
}
flavor, err := resolveMetadataStoreFlavor(ext.Flavor)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
warehouse.MetadataStore = configstore.ManagedWarehouseMetadataStore{
Kind: configstore.MetadataStoreKindExternal,
Endpoint: ext.Endpoint,
Username: ext.User,
DatabaseName: ext.Database,
PasswordAWSSecret: ext.PasswordAWSSecret,
Flavor: flavor,
}

default:
Expand Down
50 changes: 50 additions & 0 deletions controlplane/provisioning/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,56 @@ func TestProvisionDuckLakeExternal(t *testing.T) {
}
}

func TestProvisionExternalFlavor(t *testing.T) {
cases := []struct {
name string
bodyFlavor string
wantStatus int
wantFlavor configstore.MetadataStoreFlavor
}{
{name: "omitted defaults to postgres", bodyFlavor: ``, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorPostgres},
{name: "explicit postgres", bodyFlavor: `, "flavor":"postgres"`, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorPostgres},
{name: "cockroachdb stored verbatim", bodyFlavor: `, "flavor":"cockroachdb"`, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorCockroachDB},
{name: "unknown flavor rejected", bodyFlavor: `, "flavor":"yugabytedb"`, wantStatus: http.StatusBadRequest, wantFlavor: ""},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
store := newFakeStore()
router := newTestRouter(store)
body := []byte(`{
"database_name": "extdl-db",
"metadata_store": {"type": "external", "external": {
"endpoint": "rds.example.us-east-1.rds.amazonaws.com",
"password_aws_secret": "duckling-example-rds-password"` + tc.bodyFlavor + `
}},
"data_store": {"type": "external", "bucket_name": "posthog-duckling-example", "region": "us-east-1"},
"ducklake": {"enabled": true}
}`)
req := httptest.NewRequest(http.MethodPost, "/api/v1/orgs/extdl/provision", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
router.ServeHTTP(rec, req)
if rec.Code != tc.wantStatus {
t.Fatalf("status = %d, want %d: %s", rec.Code, tc.wantStatus, rec.Body.String())
}
if tc.wantStatus != http.StatusAccepted {
if _, ok := store.warehouses["extdl"]; ok {
t.Error("warehouse must not be created when flavor is rejected")
}
return
}
w := store.warehouses["extdl"]
if w == nil {
t.Fatal("expected warehouse to be created")
return
}
if w.MetadataStore.Flavor != tc.wantFlavor {
t.Errorf("flavor = %q, want %q", w.MetadataStore.Flavor, tc.wantFlavor)
}
})
}
}

func TestProvisionExternalRequiresEndpointAndSecret(t *testing.T) {
for name, body := range map[string]string{
"missing external block": `{"database_name":"e-db","ducklake":{"enabled":true},"metadata_store":{"type":"external"}}`,
Expand Down
7 changes: 7 additions & 0 deletions controlplane/shared_worker_activator.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,13 @@ func (a *SharedWorkerActivator) BuildActivationRequest(ctx context.Context, org
}
dl.SpecVersion = targetSpecVersion

// Metadata-store flavor (Postgres vs CockroachDB) lives only on the
// config-store row, not on the Duckling CR, so set it here regardless of
// which buildDuckLakeConfigFrom*() path resolved the infrastructure.
// The worker uses this to emit the right postgres_scanner GLOBAL
// settings before ATTACH — see server.buildDuckLakePreAttachStatements.
dl.MetadataStoreFlavor = string(org.Warehouse.MetadataStore.Flavor)

ic, err := a.buildIcebergConfig(ctx, assignment.OrgID, &org.Warehouse.Iceberg)
if err != nil {
return TenantActivationPayload{}, err
Expand Down
12 changes: 12 additions & 0 deletions server/ducklake/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,16 @@ type Config struct {
// non-"postgres:" metadata stores. If empty, no application_name is
// injected (libpq picks its own default, usually "psql").
ApplicationName string `json:"application_name,omitempty" yaml:"-"`

// MetadataStoreFlavor identifies the Postgres wire-protocol dialect of
// the metadata store. Empty (the default) means stock PostgreSQL.
// "cockroachdb" makes the worker emit
// SET GLOBAL pg_use_text_protocol = true
// SET GLOBAL pg_use_ctid_scan = false
// before ATTACH so postgres_scanner runs in CRDB-compatible mode
// (CRDB lacks binary COPY and ctid row addressing — CRDB #96590).
// These SET GLOBALs are process-wide on the worker's DuckDB instance,
// so the control plane is expected to pin a tenant to workers of one
// flavor for its lifetime (no flavor flips on the same worker).
MetadataStoreFlavor string `json:"metadata_store_flavor,omitempty" yaml:"-"`
}
22 changes: 22 additions & 0 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1398,6 +1398,28 @@ func buildDuckLakePreAttachStatements(dlCfg DuckLakeConfig) []string {
if dlCfg.ViaPgBouncer {
statements = append(statements, "SET GLOBAL pg_pool_max_connections = 0")
}
if dlCfg.MetadataStoreFlavor == "cockroachdb" {
// CockroachDB has not implemented the binary COPY wire format
// (cockroachdb#96590) and does not expose ctid row addressing, so
// postgres_scanner's default scan path fails on the first metadata
// ATTACH with "syntax error: unimplemented" or
// "column \"ctid\" does not exist". These two GLOBALs swap the
// scanner onto the text protocol and disable ctid scans, which is
// CRDB-compatible at the cost of ~2x slower metadata reads on the
// scanner. Session-level SET doesn't help here — DuckLake holds an
// internal metadata connection that doesn't see session-local
// state — so the settings have to be GLOBAL.
//
// These GLOBALs are process-wide on the worker's DuckDB instance,
// so duckgres assumes a tenant is pinned to workers of one flavor
// for its lifetime; flipping flavors on the same worker would
// taint the other tenants' postgres_scanner behaviour. See
// PostHog/ducklake#24 for the upstream support report.
statements = append(statements,
"SET GLOBAL pg_use_text_protocol = true",
"SET GLOBAL pg_use_ctid_scan = false",
)
}
return statements
}

Expand Down
32 changes: 32 additions & 0 deletions server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,38 @@ func TestBuildDuckLakePreAttachStatements(t *testing.T) {
"SET GLOBAL pg_pool_max_connections = 0",
},
},
{
name: "cockroachdb flavor emits text-protocol + ctid-disable globals",
cfg: DuckLakeConfig{
DisableMetadataThreadLocalCache: boolPtr(false),
MetadataStoreFlavor: "cockroachdb",
},
want: []string{
"SET GLOBAL pg_use_text_protocol = true",
"SET GLOBAL pg_use_ctid_scan = false",
},
},
{
name: "cockroachdb flavor composes with other settings in stable order",
cfg: DuckLakeConfig{
ViaPgBouncer: true,
MetadataStoreFlavor: "cockroachdb",
},
want: []string{
"SET GLOBAL pg_pool_enable_thread_local_cache = false",
"SET GLOBAL pg_pool_max_connections = 0",
"SET GLOBAL pg_use_text_protocol = true",
"SET GLOBAL pg_use_ctid_scan = false",
},
},
{
name: "unknown flavor is treated as plain postgres (no extra globals)",
cfg: DuckLakeConfig{
DisableMetadataThreadLocalCache: boolPtr(false),
MetadataStoreFlavor: "postgres",
},
want: nil,
},
}

for _, tt := range tests {
Expand Down
Loading