From e05655bd30712311fa28d4bc6db7bd8305492a02 Mon Sep 17 00:00:00 2001 From: Benjamin Knofe-Vider Date: Mon, 15 Jun 2026 10:26:54 +0200 Subject: [PATCH 1/3] ducklake: add CockroachDB metadata-store flavor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets a tenant declare its external Postgres-protocol metadata store is CockroachDB. The worker then emits two postgres_scanner GLOBALs before ATTACH: SET GLOBAL pg_use_text_protocol = true SET GLOBAL pg_use_ctid_scan = false CRDB has not implemented binary COPY (cockroachdb#96590) and does not expose ctid row addressing, so postgres_scanner's default scan path fails on the first metadata ATTACH with COPY (...) TO STDOUT (FORMAT "binary"): ERROR: at or near "binary": syntax error: unimplemented Plumbing: - configstore.ManagedWarehouseMetadataStore gains a typed Flavor field ("" → postgres, "cockroachdb" → CRDB). Zero value preserves the old behaviour for every existing row. - provisioning API accepts metadata_store.external.flavor; unknown values reject the request rather than silently degrading. - SharedWorkerActivator copies the configstore flavor onto DuckLakeConfig.MetadataStoreFlavor regardless of which buildDuckLakeConfigFrom*() path resolved the rest of the infrastructure — the field doesn't live on the Duckling CR. - server.buildDuckLakePreAttachStatements emits the two CRDB GLOBALs when MetadataStoreFlavor == "cockroachdb"; the SET GLOBALs are process-wide on the worker's DuckDB instance, so the control plane is expected to pin a tenant to workers of one flavor for its lifetime — no flavor flips on the same worker. Not addressed in this PR — out-of-scope upstream gaps from PostHog/ducklake#24: - ducklake_expire_snapshots() / ducklake_flush_inlined_data() still fail on CRDB (ctid DELETE path; needs a routing change in the DuckLake extension to go through postgres_execute). - Concurrent writers lose ~24% of commits to "40001 restart transaction" because DuckLake's RetryOnError() only matches the Postgres "duplicate key ... unique" message. So this PR unblocks ATTACH + DDL + DML + queries on a CRDB-backed external metadata store, but a CRDB-backed catalog is not yet production-grade for snapshot expiry or concurrent-writer workloads. --- controlplane/configstore/models.go | 25 +++++++++++++ controlplane/provisioning/api.go | 33 +++++++++++++++- controlplane/provisioning/api_test.go | 50 +++++++++++++++++++++++++ controlplane/shared_worker_activator.go | 7 ++++ server/ducklake/config.go | 12 ++++++ server/server.go | 22 +++++++++++ server/server_test.go | 32 ++++++++++++++++ 7 files changed, 180 insertions(+), 1 deletion(-) diff --git a/controlplane/configstore/models.go b/controlplane/configstore/models.go index 87a77e84..78bd8d7a 100644 --- a/controlplane/configstore/models.go +++ b/controlplane/configstore/models.go @@ -139,8 +139,33 @@ type ManagedWarehouseMetadataStore struct { // status password the worker activator reads. Empty for cnpg-shard // (which mints its own credentials). PasswordAWSSecret string `gorm:"size:255" json:"password_aws_secret,omitempty"` + + // Flavor is the metadata store's Postgres wire-protocol dialect. + // Empty (the default) means stock PostgreSQL — postgres_scanner runs + // with its native binary COPY + ctid scan path. "cockroachdb" makes + // the worker emit two SET GLOBALs before ATTACH so the scanner uses + // the text protocol and disables ctid scans, which CockroachDB + // doesn't implement (CRDB #96590). See server/server.go's + // buildDuckLakePreAttachStatements. The control plane is expected to + // pin a tenant to workers of one flavor for its lifetime — these + // settings are process-global on the worker's DuckDB instance, so + // flipping between flavors on the same worker would taint the other + // tenants' postgres_scanner behaviour. + Flavor MetadataStoreFlavor `gorm:"size:32" json:"flavor,omitempty"` } +// MetadataStoreFlavor identifies the Postgres wire-protocol dialect of the +// metadata store, so the worker can emit the right postgres_scanner GLOBAL +// settings before ATTACH. Keep the zero value mapped to PostgreSQL — older +// rows in the config store predate this field and must continue to behave +// exactly as before. +type MetadataStoreFlavor string + +const ( + MetadataStoreFlavorPostgres MetadataStoreFlavor = "" + MetadataStoreFlavorCockroachDB MetadataStoreFlavor = "cockroachdb" +) + // ManagedWarehouseDataStore captures the org's object-store provisioning // intent — the shape the Duckling CR's spec.dataStore takes. Distinct from // ManagedWarehouseS3 (the resolved, activation-time object-store config): diff --git a/controlplane/provisioning/api.go b/controlplane/provisioning/api.go index 0f605292..b39f0107 100644 --- a/controlplane/provisioning/api.go +++ b/controlplane/provisioning/api.go @@ -119,12 +119,17 @@ type provisionDuckLakeReq struct { // provisionExternalReq describes a pre-existing (external) Postgres metadata // store. Endpoint (RDS host) and PasswordAWSSecret (the AWS Secrets Manager // secret NAME holding the password) are required; User/Database default to -// "postgres" when omitted. +// "postgres" when omitted. Flavor selects the Postgres dialect: empty (or +// "postgres") for stock PostgreSQL, "cockroachdb" for CockroachDB — +// recorded on the config-store row so the worker can emit the +// postgres_scanner GLOBALs CRDB needs before ATTACH (see +// server.buildDuckLakePreAttachStatements). type provisionExternalReq struct { Endpoint string `json:"endpoint"` PasswordAWSSecret string `json:"password_aws_secret"` User string `json:"user,omitempty"` Database string `json:"database,omitempty"` + Flavor string `json:"flavor,omitempty"` } // provisionDataStoreReq selects the object store. Type "s3bucket" (or omitted) @@ -153,6 +158,26 @@ func icebergNamespace(req *provisionIcebergReq) string { return req.Namespace } +// resolveMetadataStoreFlavor maps the optional external.flavor field on a +// provision request into the typed configstore enum. Empty (the default) and +// "postgres" both map to stock PostgreSQL; "cockroachdb" makes the worker +// emit the postgres_scanner GLOBAL settings CRDB needs before ATTACH. +// Unknown values reject the request rather than silently degrading to +// "postgres" — a mis-typed flavor that goes through would only surface as +// "DuckLake migration check failed: read DuckLake spec version" at first +// activation, long after the provision call returned 202. +func resolveMetadataStoreFlavor(raw string) (configstore.MetadataStoreFlavor, error) { + switch raw { + case "", "postgres": + return configstore.MetadataStoreFlavorPostgres, nil + case string(configstore.MetadataStoreFlavorCockroachDB): + return configstore.MetadataStoreFlavorCockroachDB, nil + default: + return "", fmt.Errorf("metadata_store.external.flavor must be empty, %q, or %q (got %q)", + "postgres", string(configstore.MetadataStoreFlavorCockroachDB), raw) + } +} + // resolveDataStore validates and normalizes the data-store request into the // stored intent. Nil or "s3bucket" provisions a fresh per-org bucket; // "external" reuses an existing bucket and requires a bucket name. @@ -244,12 +269,18 @@ func (h *handler) provisionWarehouse(c *gin.Context) { c.JSON(http.StatusBadRequest, gin.H{"error": "metadata_store.type 'external' requires metadata_store.external.endpoint and metadata_store.external.password_aws_secret"}) return } + flavor, err := resolveMetadataStoreFlavor(ext.Flavor) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } warehouse.MetadataStore = configstore.ManagedWarehouseMetadataStore{ Kind: configstore.MetadataStoreKindExternal, Endpoint: ext.Endpoint, Username: ext.User, DatabaseName: ext.Database, PasswordAWSSecret: ext.PasswordAWSSecret, + Flavor: flavor, } default: diff --git a/controlplane/provisioning/api_test.go b/controlplane/provisioning/api_test.go index bd33f106..8e3b30d3 100644 --- a/controlplane/provisioning/api_test.go +++ b/controlplane/provisioning/api_test.go @@ -644,6 +644,56 @@ func TestProvisionDuckLakeExternal(t *testing.T) { } } +func TestProvisionExternalFlavor(t *testing.T) { + cases := []struct { + name string + bodyFlavor string + wantStatus int + wantFlavor configstore.MetadataStoreFlavor + }{ + {name: "omitted defaults to postgres", bodyFlavor: ``, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorPostgres}, + {name: "explicit postgres", bodyFlavor: `, "flavor":"postgres"`, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorPostgres}, + {name: "cockroachdb stored verbatim", bodyFlavor: `, "flavor":"cockroachdb"`, wantStatus: http.StatusAccepted, wantFlavor: configstore.MetadataStoreFlavorCockroachDB}, + {name: "unknown flavor rejected", bodyFlavor: `, "flavor":"yugabytedb"`, wantStatus: http.StatusBadRequest, wantFlavor: ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + store := newFakeStore() + router := newTestRouter(store) + body := []byte(`{ + "database_name": "extdl-db", + "metadata_store": {"type": "external", "external": { + "endpoint": "rds.example.us-east-1.rds.amazonaws.com", + "password_aws_secret": "duckling-example-rds-password"` + tc.bodyFlavor + ` + }}, + "data_store": {"type": "external", "bucket_name": "posthog-duckling-example", "region": "us-east-1"}, + "ducklake": {"enabled": true} + }`) + req := httptest.NewRequest(http.MethodPost, "/api/v1/orgs/extdl/provision", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + router.ServeHTTP(rec, req) + if rec.Code != tc.wantStatus { + t.Fatalf("status = %d, want %d: %s", rec.Code, tc.wantStatus, rec.Body.String()) + } + if tc.wantStatus != http.StatusAccepted { + if _, ok := store.warehouses["extdl"]; ok { + t.Error("warehouse must not be created when flavor is rejected") + } + return + } + w := store.warehouses["extdl"] + if w == nil { + t.Fatal("expected warehouse to be created") + return + } + if w.MetadataStore.Flavor != tc.wantFlavor { + t.Errorf("flavor = %q, want %q", w.MetadataStore.Flavor, tc.wantFlavor) + } + }) + } +} + func TestProvisionExternalRequiresEndpointAndSecret(t *testing.T) { for name, body := range map[string]string{ "missing external block": `{"database_name":"e-db","ducklake":{"enabled":true},"metadata_store":{"type":"external"}}`, diff --git a/controlplane/shared_worker_activator.go b/controlplane/shared_worker_activator.go index 13436477..9c2e9001 100644 --- a/controlplane/shared_worker_activator.go +++ b/controlplane/shared_worker_activator.go @@ -397,6 +397,13 @@ func (a *SharedWorkerActivator) BuildActivationRequest(ctx context.Context, org } dl.SpecVersion = targetSpecVersion + // Metadata-store flavor (Postgres vs CockroachDB) lives only on the + // config-store row, not on the Duckling CR, so set it here regardless of + // which buildDuckLakeConfigFrom*() path resolved the infrastructure. + // The worker uses this to emit the right postgres_scanner GLOBAL + // settings before ATTACH — see server.buildDuckLakePreAttachStatements. + dl.MetadataStoreFlavor = string(org.Warehouse.MetadataStore.Flavor) + ic, err := a.buildIcebergConfig(ctx, assignment.OrgID, &org.Warehouse.Iceberg) if err != nil { return TenantActivationPayload{}, err diff --git a/server/ducklake/config.go b/server/ducklake/config.go index e880788d..c7040dcd 100644 --- a/server/ducklake/config.go +++ b/server/ducklake/config.go @@ -101,4 +101,16 @@ type Config struct { // non-"postgres:" metadata stores. If empty, no application_name is // injected (libpq picks its own default, usually "psql"). ApplicationName string `json:"application_name,omitempty" yaml:"-"` + + // MetadataStoreFlavor identifies the Postgres wire-protocol dialect of + // the metadata store. Empty (the default) means stock PostgreSQL. + // "cockroachdb" makes the worker emit + // SET GLOBAL pg_use_text_protocol = true + // SET GLOBAL pg_use_ctid_scan = false + // before ATTACH so postgres_scanner runs in CRDB-compatible mode + // (CRDB lacks binary COPY and ctid row addressing — CRDB #96590). + // These SET GLOBALs are process-wide on the worker's DuckDB instance, + // so the control plane is expected to pin a tenant to workers of one + // flavor for its lifetime (no flavor flips on the same worker). + MetadataStoreFlavor string `json:"metadata_store_flavor,omitempty" yaml:"-"` } diff --git a/server/server.go b/server/server.go index d52af09e..8126b7da 100644 --- a/server/server.go +++ b/server/server.go @@ -1398,6 +1398,28 @@ func buildDuckLakePreAttachStatements(dlCfg DuckLakeConfig) []string { if dlCfg.ViaPgBouncer { statements = append(statements, "SET GLOBAL pg_pool_max_connections = 0") } + if dlCfg.MetadataStoreFlavor == "cockroachdb" { + // CockroachDB has not implemented the binary COPY wire format + // (cockroachdb#96590) and does not expose ctid row addressing, so + // postgres_scanner's default scan path fails on the first metadata + // ATTACH with "syntax error: unimplemented" or + // "column \"ctid\" does not exist". These two GLOBALs swap the + // scanner onto the text protocol and disable ctid scans, which is + // CRDB-compatible at the cost of ~2x slower metadata reads on the + // scanner. Session-level SET doesn't help here — DuckLake holds an + // internal metadata connection that doesn't see session-local + // state — so the settings have to be GLOBAL. + // + // These GLOBALs are process-wide on the worker's DuckDB instance, + // so duckgres assumes a tenant is pinned to workers of one flavor + // for its lifetime; flipping flavors on the same worker would + // taint the other tenants' postgres_scanner behaviour. See + // PostHog/ducklake#24 for the upstream support report. + statements = append(statements, + "SET GLOBAL pg_use_text_protocol = true", + "SET GLOBAL pg_use_ctid_scan = false", + ) + } return statements } diff --git a/server/server_test.go b/server/server_test.go index 3c508046..34198394 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -241,6 +241,38 @@ func TestBuildDuckLakePreAttachStatements(t *testing.T) { "SET GLOBAL pg_pool_max_connections = 0", }, }, + { + name: "cockroachdb flavor emits text-protocol + ctid-disable globals", + cfg: DuckLakeConfig{ + DisableMetadataThreadLocalCache: boolPtr(false), + MetadataStoreFlavor: "cockroachdb", + }, + want: []string{ + "SET GLOBAL pg_use_text_protocol = true", + "SET GLOBAL pg_use_ctid_scan = false", + }, + }, + { + name: "cockroachdb flavor composes with other settings in stable order", + cfg: DuckLakeConfig{ + ViaPgBouncer: true, + MetadataStoreFlavor: "cockroachdb", + }, + want: []string{ + "SET GLOBAL pg_pool_enable_thread_local_cache = false", + "SET GLOBAL pg_pool_max_connections = 0", + "SET GLOBAL pg_use_text_protocol = true", + "SET GLOBAL pg_use_ctid_scan = false", + }, + }, + { + name: "unknown flavor is treated as plain postgres (no extra globals)", + cfg: DuckLakeConfig{ + DisableMetadataThreadLocalCache: boolPtr(false), + MetadataStoreFlavor: "postgres", + }, + want: nil, + }, } for _, tt := range tests { From 6b7f8c973048d97cce205380b00c4ebdf155d3a6 Mon Sep 17 00:00:00 2001 From: Benjamin Knofe-Vider Date: Mon, 15 Jun 2026 10:32:43 +0200 Subject: [PATCH 2/3] ci: manual workflow_dispatch to deploy a branch to mw-dev Mirrors container-image-controlplane-cd.yml + container-image-worker-cd.yml but only fires on workflow_dispatch from the GitHub UI, only builds arm64 (mw-dev is arm64-only), and only tags with mw-dev- + mw-dev-latest. Doesn't touch :latest so the prod CD pipeline keeps owning that tag. Pick controlplane / worker / both from the dispatch input. Worker pins the same default DuckDB row the CD matrix uses (1.5.3 + cred-refresh httpfs + posthog.4 ducklake); the env block calls out the dependency so if the CD matrix moves, this workflow has to follow. Use to validate a feature branch against real mw-dev infra (cnpg-shards, external RDS, Lakekeeper, Crossplane) before merging. --- .github/workflows/manual-deploy-mw-dev.yml | 197 +++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 .github/workflows/manual-deploy-mw-dev.yml diff --git a/.github/workflows/manual-deploy-mw-dev.yml b/.github/workflows/manual-deploy-mw-dev.yml new file mode 100644 index 00000000..b5a9c84d --- /dev/null +++ b/.github/workflows/manual-deploy-mw-dev.yml @@ -0,0 +1,197 @@ +name: Manual Deploy to mw-dev + +# Manually-triggered build + push of the controlplane and/or worker images +# from the dispatched ref, tagged for posthog-mw-dev only. Mirrors the shape +# of container-image-controlplane-cd.yml and container-image-worker-cd.yml but +# never promotes to `:latest` — that tag stays reserved for main → prod. +# +# Image tags pushed: +# - :mw-dev- immutable, points at the dispatched commit +# - :mw-dev-latest mutable convenience tag for tracking +# +# mw-dev is arm64-only, so this is arm64-only (no multi-arch manifest needed — +# saves the ~5 min the amd64 leg would cost and the manifest job entirely). +# +# Use when you want to validate a branch on real mw-dev infra (cnpg-shards, +# external RDS, Lakekeeper, Crossplane) before merging. For prod, merge to +# main and let container-image-{worker,controlplane}-cd.yml do its normal CD. + +on: + workflow_dispatch: + inputs: + components: + description: "Which images to build." + required: true + default: both + type: choice + options: + - both + - controlplane + - worker + +env: + ECR_REGISTRY: 795637471508.dkr.ecr.us-east-1.amazonaws.com + GHCR_REGISTRY: ghcr.io + # Pin to the same default DuckDB row as container-image-worker-cd.yml's + # matrix `default: true` entry. If that matrix moves, bump these too. + DUCKDB_VERSION: "1.5.3" + DUCKDB_GO_VERSION: "v2.10503.0" + DUCKDB_BINDINGS_VERSION: "v0.10503.0" + HTTPFS_EXTENSION_TAG: "v1.5.3-cred-refresh" + DUCKLAKE_EXTENSION_TAG: "v1.0-posthog.4" + POSTGRES_SCANNER_REPOSITORY: "https://extensions.duckdb.org" + +jobs: + compute-tag: + name: Compute mw-dev tag + if: github.repository == 'PostHog/duckgres' + runs-on: ubuntu-24.04 + outputs: + short-sha: ${{ steps.tag.outputs.short-sha }} + steps: + - name: Short SHA + id: tag + env: + FULL_SHA: ${{ github.sha }} + run: echo "short-sha=${FULL_SHA:0:12}" >> "$GITHUB_OUTPUT" + + controlplane: + name: Build controlplane (mw-dev) + needs: compute-tag + if: ${{ github.repository == 'PostHog/duckgres' && (inputs.components == 'both' || inputs.components == 'controlplane') }} + runs-on: ubuntu-24.04-arm + permissions: + id-token: write + contents: read + packages: write + env: + IMAGE_NAME: duckgres-controlplane + SHORT_SHA: ${{ needs.compute-tag.outputs.short-sha }} + + steps: + - name: Check out + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} + aws-region: us-east-1 + + - name: Login to Amazon ECR + uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 + + - name: Login to GHCR + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 + with: + registry: ${{ env.GHCR_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push controlplane (arm64) + uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 + with: + context: . + file: Dockerfile.controlplane + push: true + platforms: linux/arm64 + tags: | + ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} + ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest + ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} + ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-latest + build-args: | + VERSION=mw-dev-${{ env.SHORT_SHA }} + COMMIT=${{ github.sha }} + BUILD_TAGS=kubernetes + cache-from: type=gha,scope=cp-mw-dev-arm64 + cache-to: type=gha,mode=max,scope=cp-mw-dev-arm64 + + - name: Summary + env: + IMAGE_REF: ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} + run: | + { + echo "## controlplane image pushed" + echo "" + echo "- ECR: \`$IMAGE_REF\`" + echo "- Floating tag: \`${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest\`" + echo "- Ref: \`${{ github.ref }}\` @ \`${{ github.sha }}\`" + } >> "$GITHUB_STEP_SUMMARY" + + worker: + name: Build worker (mw-dev) + needs: compute-tag + if: ${{ github.repository == 'PostHog/duckgres' && (inputs.components == 'both' || inputs.components == 'worker') }} + runs-on: ubuntu-24.04-arm + permissions: + id-token: write + contents: read + packages: write + env: + IMAGE_NAME: duckgres + SHORT_SHA: ${{ needs.compute-tag.outputs.short-sha }} + + steps: + - name: Check out + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} + aws-region: us-east-1 + + - name: Login to Amazon ECR + uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 + + - name: Login to GHCR + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 + with: + registry: ${{ env.GHCR_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push worker (arm64, default DuckDB row) + uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 + with: + context: . + file: Dockerfile.worker + push: true + platforms: linux/arm64 + tags: | + ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} + ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest + ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} + ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-latest + build-args: | + VERSION=mw-dev-${{ env.SHORT_SHA }} + COMMIT=${{ github.sha }} + BUILD_TAGS=kubernetes + DUCKDB_GO_VERSION=${{ env.DUCKDB_GO_VERSION }} + DUCKDB_BINDINGS_VERSION=${{ env.DUCKDB_BINDINGS_VERSION }} + DUCKDB_EXTENSION_VERSION=${{ env.DUCKDB_VERSION }} + HTTPFS_EXTENSION_TAG=${{ env.HTTPFS_EXTENSION_TAG }} + DUCKLAKE_EXTENSION_TAG=${{ env.DUCKLAKE_EXTENSION_TAG }} + POSTGRES_SCANNER_REPOSITORY=${{ env.POSTGRES_SCANNER_REPOSITORY }} + cache-from: type=gha,scope=worker-mw-dev-arm64 + cache-to: type=gha,mode=max,scope=worker-mw-dev-arm64 + + - name: Summary + env: + IMAGE_REF: ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} + run: | + { + echo "## worker image pushed" + echo "" + echo "- ECR: \`$IMAGE_REF\`" + echo "- Floating tag: \`${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest\`" + echo "- DuckDB: \`${{ env.DUCKDB_VERSION }}\` (go ${{ env.DUCKDB_GO_VERSION }})" + echo "- Ref: \`${{ github.ref }}\` @ \`${{ github.sha }}\`" + } >> "$GITHUB_STEP_SUMMARY" From 43368071a438d032a8ecfb1564715bdc5f69e634 Mon Sep 17 00:00:00 2001 From: Benjamin Knofe-Vider Date: Mon, 15 Jun 2026 11:20:57 +0200 Subject: [PATCH 3/3] Revert ci: drop manual mw-dev workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A workflow file only becomes dispatchable from the GitHub UI once it lives on the default branch, so adding it on this feature branch wouldn't have helped — the dispatch dropdown only lists workflows that already exist on main. Use the existing Container Image CD (workflow_dispatch enabled) on this ref instead — it pushes the image and ships a commit_state_update to PostHog/charts, bumping state.duckgres.image.sha. From charts, dispatch ArgoCD Deploy with app=duckgres to promote image.sha → image.dev; the image-argo helper consumes image.dev for the dev environment and image.prod for prod, so prod stays pinned to its existing sha. --- .github/workflows/manual-deploy-mw-dev.yml | 197 --------------------- 1 file changed, 197 deletions(-) delete mode 100644 .github/workflows/manual-deploy-mw-dev.yml diff --git a/.github/workflows/manual-deploy-mw-dev.yml b/.github/workflows/manual-deploy-mw-dev.yml deleted file mode 100644 index b5a9c84d..00000000 --- a/.github/workflows/manual-deploy-mw-dev.yml +++ /dev/null @@ -1,197 +0,0 @@ -name: Manual Deploy to mw-dev - -# Manually-triggered build + push of the controlplane and/or worker images -# from the dispatched ref, tagged for posthog-mw-dev only. Mirrors the shape -# of container-image-controlplane-cd.yml and container-image-worker-cd.yml but -# never promotes to `:latest` — that tag stays reserved for main → prod. -# -# Image tags pushed: -# - :mw-dev- immutable, points at the dispatched commit -# - :mw-dev-latest mutable convenience tag for tracking -# -# mw-dev is arm64-only, so this is arm64-only (no multi-arch manifest needed — -# saves the ~5 min the amd64 leg would cost and the manifest job entirely). -# -# Use when you want to validate a branch on real mw-dev infra (cnpg-shards, -# external RDS, Lakekeeper, Crossplane) before merging. For prod, merge to -# main and let container-image-{worker,controlplane}-cd.yml do its normal CD. - -on: - workflow_dispatch: - inputs: - components: - description: "Which images to build." - required: true - default: both - type: choice - options: - - both - - controlplane - - worker - -env: - ECR_REGISTRY: 795637471508.dkr.ecr.us-east-1.amazonaws.com - GHCR_REGISTRY: ghcr.io - # Pin to the same default DuckDB row as container-image-worker-cd.yml's - # matrix `default: true` entry. If that matrix moves, bump these too. - DUCKDB_VERSION: "1.5.3" - DUCKDB_GO_VERSION: "v2.10503.0" - DUCKDB_BINDINGS_VERSION: "v0.10503.0" - HTTPFS_EXTENSION_TAG: "v1.5.3-cred-refresh" - DUCKLAKE_EXTENSION_TAG: "v1.0-posthog.4" - POSTGRES_SCANNER_REPOSITORY: "https://extensions.duckdb.org" - -jobs: - compute-tag: - name: Compute mw-dev tag - if: github.repository == 'PostHog/duckgres' - runs-on: ubuntu-24.04 - outputs: - short-sha: ${{ steps.tag.outputs.short-sha }} - steps: - - name: Short SHA - id: tag - env: - FULL_SHA: ${{ github.sha }} - run: echo "short-sha=${FULL_SHA:0:12}" >> "$GITHUB_OUTPUT" - - controlplane: - name: Build controlplane (mw-dev) - needs: compute-tag - if: ${{ github.repository == 'PostHog/duckgres' && (inputs.components == 'both' || inputs.components == 'controlplane') }} - runs-on: ubuntu-24.04-arm - permissions: - id-token: write - contents: read - packages: write - env: - IMAGE_NAME: duckgres-controlplane - SHORT_SHA: ${{ needs.compute-tag.outputs.short-sha }} - - steps: - - name: Check out - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 - with: - role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} - aws-region: us-east-1 - - - name: Login to Amazon ECR - uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 - - - name: Login to GHCR - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 - with: - registry: ${{ env.GHCR_REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push controlplane (arm64) - uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 - with: - context: . - file: Dockerfile.controlplane - push: true - platforms: linux/arm64 - tags: | - ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} - ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest - ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} - ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-latest - build-args: | - VERSION=mw-dev-${{ env.SHORT_SHA }} - COMMIT=${{ github.sha }} - BUILD_TAGS=kubernetes - cache-from: type=gha,scope=cp-mw-dev-arm64 - cache-to: type=gha,mode=max,scope=cp-mw-dev-arm64 - - - name: Summary - env: - IMAGE_REF: ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} - run: | - { - echo "## controlplane image pushed" - echo "" - echo "- ECR: \`$IMAGE_REF\`" - echo "- Floating tag: \`${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest\`" - echo "- Ref: \`${{ github.ref }}\` @ \`${{ github.sha }}\`" - } >> "$GITHUB_STEP_SUMMARY" - - worker: - name: Build worker (mw-dev) - needs: compute-tag - if: ${{ github.repository == 'PostHog/duckgres' && (inputs.components == 'both' || inputs.components == 'worker') }} - runs-on: ubuntu-24.04-arm - permissions: - id-token: write - contents: read - packages: write - env: - IMAGE_NAME: duckgres - SHORT_SHA: ${{ needs.compute-tag.outputs.short-sha }} - - steps: - - name: Check out - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 - with: - role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} - aws-region: us-east-1 - - - name: Login to Amazon ECR - uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 - - - name: Login to GHCR - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 - with: - registry: ${{ env.GHCR_REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push worker (arm64, default DuckDB row) - uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 - with: - context: . - file: Dockerfile.worker - push: true - platforms: linux/arm64 - tags: | - ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} - ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest - ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} - ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:mw-dev-latest - build-args: | - VERSION=mw-dev-${{ env.SHORT_SHA }} - COMMIT=${{ github.sha }} - BUILD_TAGS=kubernetes - DUCKDB_GO_VERSION=${{ env.DUCKDB_GO_VERSION }} - DUCKDB_BINDINGS_VERSION=${{ env.DUCKDB_BINDINGS_VERSION }} - DUCKDB_EXTENSION_VERSION=${{ env.DUCKDB_VERSION }} - HTTPFS_EXTENSION_TAG=${{ env.HTTPFS_EXTENSION_TAG }} - DUCKLAKE_EXTENSION_TAG=${{ env.DUCKLAKE_EXTENSION_TAG }} - POSTGRES_SCANNER_REPOSITORY=${{ env.POSTGRES_SCANNER_REPOSITORY }} - cache-from: type=gha,scope=worker-mw-dev-arm64 - cache-to: type=gha,mode=max,scope=worker-mw-dev-arm64 - - - name: Summary - env: - IMAGE_REF: ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-${{ env.SHORT_SHA }} - run: | - { - echo "## worker image pushed" - echo "" - echo "- ECR: \`$IMAGE_REF\`" - echo "- Floating tag: \`${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:mw-dev-latest\`" - echo "- DuckDB: \`${{ env.DUCKDB_VERSION }}\` (go ${{ env.DUCKDB_GO_VERSION }})" - echo "- Ref: \`${{ github.ref }}\` @ \`${{ github.sha }}\`" - } >> "$GITHUB_STEP_SUMMARY"