diff --git a/.github/workflows/aot.yml b/.github/workflows/aot.yml index 17dddb47..32e7dfe4 100644 --- a/.github/workflows/aot.yml +++ b/.github/workflows/aot.yml @@ -32,13 +32,10 @@ jobs: env: AWS_DEFAULT_REGION: us-west-1 REG_MAX_VERSIONS: 3 - REG_MAX_AGE_MONTHS: 6 - REG_WEBSITE_DIR: _deno_website + REG_MAX_AGE_MONTHS: 6 + REG_WEBSITE_DIR: _website REG_PROVIDER_PATH: providers/dist REG_ARTIFACT_REPO_BUCKET: stackql-registry-artifacts - REG_DENO_DEPLOY_ASSET_REPO: deno-deploy-registry - REG_DENO_DEPLOY_API_DEV: stackql-dev-registry - REG_DENO_DEPLOY_API_PROD: stackql-registry steps: - uses: actions/checkout@v7 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3d9488f4..34a9a16d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,13 +21,10 @@ jobs: env: AWS_DEFAULT_REGION: us-west-1 REG_MAX_VERSIONS: 3 - REG_MAX_AGE_MONTHS: 6 - REG_WEBSITE_DIR: _deno_website + REG_MAX_AGE_MONTHS: 6 + REG_WEBSITE_DIR: _website REG_PROVIDER_PATH: providers/dist REG_ARTIFACT_REPO_BUCKET: stackql-registry-artifacts - REG_DENO_DEPLOY_ASSET_REPO: deno-deploy-registry - REG_DENO_DEPLOY_API_DEV: stackql-dev-registry - REG_DENO_DEPLOY_API_PROD: stackql-registry steps: - uses: actions/checkout@v7 @@ -132,7 +129,7 @@ jobs: # - name: "[PUBLISH] configure aws credentials" - uses: aws-actions/configure-aws-credentials@v4 + uses: aws-actions/configure-aws-credentials@v6 if: env.REG_EVENT == 'push' with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} @@ -144,32 +141,18 @@ jobs: run: | python scripts/publish/publish-provider-docs-to-artifact-repo.py - - name: "[DEPLOY] setup SSH" - uses: MrSquaare/ssh-setup-action@v3.1.0 - if: env.REG_EVENT == 'push' - with: - host: github.com - private-key: ${{ secrets.SSH_PRIVATE_KEY }} - - - name: "[DEPLOY] pull deno deploy assets" - if: env.REG_EVENT == 'push' - run: | - git clone git@github.com:stackql/${REG_DENO_DEPLOY_ASSET_REPO}.git - cp ${REG_DENO_DEPLOY_ASSET_REPO}/website/index.ts $REG_WEBSITE_DIR - - name: "[DEPLOY] pull additional docs from artifact repo" if: env.REG_EVENT == 'push' run: | python scripts/deploy/pull-additional-docs-from-artifact-repo.py # - # Cloudflare (green) dual-publish. Runs here, BEFORE clean-deploy-dir.py - # flattens/destroys the working tree (which removes the origin/ Worker - # source). The full docs tree (changed providers + everything pulled from - # the artifact repo, plus the freshly generated providers.yaml) lives at - # ${REG_WEBSITE_DIR}/${REG_PROVIDER_PATH} at this point, byte-identical to - # what the Deno origin is about to deploy. Same push/branch gating as the - # Deno steps, so blue and green stay in sync during the transition window. + # Cloudflare serving layer. S3 (stackql-registry-artifacts) remains the + # master/archive: the steps above publish changed providers to S3 and + # reconstruct the full docs tree from it. Here that full tree (changed + # providers + everything pulled from the artifact repo, plus the freshly + # generated providers.yaml) lives at ${REG_WEBSITE_DIR}/${REG_PROVIDER_PATH} + # and is mirrored to R2, which the Worker serves from at the edge. # - name: "[DEPLOY-CF] install worker deps" @@ -228,32 +211,3 @@ jobs: CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} run: | cd origin && npx wrangler deploy --env production - - - name: "[DEPLOY] install deno" - if: env.REG_EVENT == 'push' - uses: denoland/setup-deno@main - with: - deno-version: 1.18.2 - - - name: "[DEPLOY] clean deploy dir" - if: env.REG_EVENT == 'push' - run: | - python scripts/deploy/clean-deploy-dir.py - echo "deployment dir contents: " - tree . - echo "providers.yaml contents: " - cat ${REG_PROVIDER_PATH}/providers.yaml - - - name: "[DEPLOY] deploy to deno deploy (dev)" - if: env.REG_TARGET_BRANCH == 'dev' && env.REG_EVENT == 'push' - uses: denoland/deployctl@1.13.1 - with: - project: ${{ env.REG_DENO_DEPLOY_API_DEV }} - entrypoint: index.ts - - - name: "[DEPLOY] deploy to deno deploy (prod)" - if: env.REG_TARGET_BRANCH == 'main' && env.REG_EVENT == 'push' - uses: denoland/deployctl@1.13.1 - with: - project: ${{ env.REG_DENO_DEPLOY_API_PROD }} - entrypoint: index.ts diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index da1282eb..09b3e7ab 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -76,13 +76,10 @@ jobs: env: AWS_DEFAULT_REGION: us-west-1 REG_MAX_VERSIONS: 3 - REG_MAX_AGE_MONTHS: 6 - REG_WEBSITE_DIR: _deno_website + REG_MAX_AGE_MONTHS: 6 + REG_WEBSITE_DIR: _website REG_PROVIDER_PATH: providers/dist REG_ARTIFACT_REPO_BUCKET: stackql-registry-artifacts - REG_DENO_DEPLOY_ASSET_REPO: deno-deploy-registry - REG_DENO_DEPLOY_API_DEV: stackql-dev-registry - REG_DENO_DEPLOY_API_PROD: stackql-registry steps: - uses: actions/checkout@v7 @@ -120,7 +117,7 @@ jobs: python-version: '3.12' - name: Download python package dist folder - uses: actions/download-artifact@v4.1.2 + uses: actions/download-artifact@v8 with: name: python-package-dist-folder path: test/dist diff --git a/.gitignore b/.gitignore index 3e43a9ca..68550868 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ stackql-core-pkg/ stackql-any-sdk/ .venv/ *.py[co] +/tmp diff --git a/docs/build-and-deployment.md b/docs/build-and-deployment.md index 6d6749bd..5dcaae8b 100644 --- a/docs/build-and-deployment.md +++ b/docs/build-and-deployment.md @@ -12,10 +12,11 @@ The following diagram shows the CI/CD flow for this repository. SignAndPackage --> Tests[provider\ne2e tests] end subgraph Push[triggered by protected branch merge commit] - Tests --> PushArtifacts[/register\nand publish\nartifacts/] - PushArtifacts --> Deno((Deno\nDeploy)) + Tests --> PushArtifacts[/register\nand publish\nartifacts to S3/] + PushArtifacts --> SyncR2[/sync docs\nto R2/] + SyncR2 --> Worker((Cloudflare\nWorker)) end - Deno --> End(end) + Worker --> End(end) ProvUpd -->|no| End end ``` @@ -27,7 +28,7 @@ The nodes in the above graph are described in the sections below: * [package artifacts](#package-artifacts) * [provider tests](#provider-tests) * [register and store artifacts](#register-and-store-artifacts) - * [Deno Deploy](#deno-deploy) + * [serve from Cloudflare](#serve-from-cloudflare) The following steps are performed on all pull requests to protected branches `dev` or `main` (if providers were updated): @@ -73,27 +74,23 @@ Steps include: #### Publish Artifacts to Provider Registry Artifact Repository -Packaged artifacts are published to the artifact repository in AWS S3 bucket (`stackql-registry-artifacts`). Steps include: +Packaged artifacts are published to the master/archive artifact repository in AWS S3 bucket (`stackql-registry-artifacts`). The full registry tree is then reconstructed from S3 so the complete set of provider docs (plus a freshly generated `providers.yaml`) is available for the serving layer. Steps include: - `[PUBLISH] configure aws credentials` - `[PUBLISH] publish provider docs to artifact repo` +- `[DEPLOY] pull additional docs from artifact repo` -#### Deno Deploy +#### Serve from Cloudflare -Provider docs are prepared for distribution via [Deno Deploy](https://deno.com/deploy); this includes pulling the latest versions of the provider docs from the artifact repository and preparing the `index.ts` file for distribution. Steps include: +S3 remains the master/archive store. The reconstructed docs tree is mirrored to Cloudflare R2, and a [Cloudflare Worker](../origin) (source in [origin/](../origin)) serves provider docs from R2 at the edge, logging download analytics to D1. Steps include: -- `[DEPLOY] setup SSH` -- `[DEPLOY] pull deno deploy assets` -- `[DEPLOY] pull additional docs from artifact repo` -- `[DEPLOY] install deno` +- `[DEPLOY-CF] install worker deps` +- `[DEPLOY-CF] sync docs to R2 (dev)` / `[DEPLOY-CF] sync docs to R2 (prod)` +- `[DEPLOY-CF] deploy worker (dev)` / `[DEPLOY-CF] deploy worker (prod)` -The public StackQL Provider Registry is distributed via [Deno Deploy](https://deno.com/deploy), using the following endpoints: +The public StackQL Provider Registry is served from Cloudflare, using the following endpoints: | Endpoint | Description | | --- | --- | | [registry.stackql.app](https://registry.stackql.app/ping) | Production registry (built from `main`) | -| [registry-dev.stackql.app](https://registry.stackql.app/ping) | Development registry (built from `dev`) | - -Steps include: - -- `[DEPLOY] deploy to deno deploy` +| [registry-dev.stackql.app](https://registry-dev.stackql.app/ping) | Development registry (built from `dev`) | diff --git a/origin/MIGRATION.md b/origin/MIGRATION.md deleted file mode 100644 index 7c98ffd7..00000000 --- a/origin/MIGRATION.md +++ /dev/null @@ -1,213 +0,0 @@ -# Registry origin migration: Deno Deploy -> Cloudflare (blue-green) - -Runbook for moving the registry origin from Deno Deploy (blue) to Cloudflare -Workers + R2 + D1 (green) without changing the client URL contract. - -## URL contract (must not change) - -- Production: `https://registry.stackql.app` -- Development: `https://registry-dev.stackql.app` -- Paths: `/providers/dist/providers.yaml`, `/providers/dist//.tgz`, - `/ping`, `/analytics`, `/analytics/last24hours` - -Green is stood up on parallel hostnames first, then the real hostnames are -repointed by DNS (Phase 4). Rollback is reverting that DNS change. - -## What is already in the repo (Phases 1-2, code complete) - -- `origin/src/index.ts` - Worker port of the Deno origin, validated locally - against all acceptance checks (routing, R2 serving, D1 logging). -- `origin/wrangler.toml` - `dev` and `production` environments, R2 + D1 bindings. - Cutover hostnames are commented custom-domain routes; until the Phase 4 cutover - the Worker is reachable on its `workers.dev` route (used for validation). -- `origin/schema.sql` - D1 `downloads` table + index. -- `.github/workflows/main.yml` - dual-publish steps (`[DEPLOY-CF] ...`) that sync - docs to R2 and `wrangler deploy` the Worker, alongside the unchanged Deno steps. - -Build / sign / package / test steps in `main.yml` are untouched. - -## Phase 0: confirm inputs (do before deploying) - -1. **DNS control of `stackql.app`.** Determine whether the zone is already on - Cloudflare DNS. - - If yes: Phase 4 is a custom-domain attach on the Worker (Cloudflare creates - the proxied record automatically). - - If no: either migrate the zone to Cloudflare, or keep DNS where it is and - point the hostnames at the Worker via a CNAME to the Worker's - `*.workers.dev` route / a Cloudflare custom domain. Custom domains require - the zone to be on Cloudflare, so a zone migration is the clean path. -2. **S3 artifact bucket (`stackql-registry-artifacts`).** The Worker reads only - from R2. Decide: keep S3 as the build archive (recommended - CI still - publishes to it and the R2 sync is sourced from the assembled tree), or retire - it later. No Worker change either way. -3. **Client default registry URL.** Confirm the deployed StackQL binaries already - resolve to `https://registry.stackql.app/providers` (the `deno-deploy-registry` - README noted this as a planned change). If clients already use the contract - hostnames, the cutover is zero-client-change. If any client still points at - `cdn.statically.io/...`, that is a separate client change and out of scope - here - but note those clients will not follow the cutover. - -## Phase 1: create Cloudflare resources - -Authenticate wrangler (`wrangler login`, or set `CLOUDFLARE_API_TOKEN`). - -```bash -cd origin -npm install - -npx wrangler r2 bucket create stackql-provider-registry-dev -npx wrangler r2 bucket create stackql-provider-registry - -npx wrangler d1 create stackql-registry-analytics-dev # copy database_id -npx wrangler d1 create stackql-registry-analytics # copy database_id - -npx wrangler d1 execute stackql-registry-analytics-dev --remote --file=./schema.sql -npx wrangler d1 execute stackql-registry-analytics --remote --file=./schema.sql -``` - -Then edit `wrangler.toml`: replace `` and `` with the -returned database IDs. CI cannot deploy until these are real. - -Local acceptance (optional, already verified): see `origin/README.md`. - -## GitHub Actions secrets required by the dual-path CI - -Add these repo secrets (Settings -> Secrets and variables -> Actions): - -| Secret | Purpose | -| ----------------------- | ------------------------------------------------------------- | -| `CLOUDFLARE_API_TOKEN` | `wrangler deploy`. Scope: Workers Scripts edit, D1 edit, Workers R2 Storage edit, Account/Zone read. | -| `CLOUDFLARE_ACCOUNT_ID` | R2 S3 endpoint host and wrangler account. | -| `R2_ACCESS_KEY_ID` | R2 S3 API token (R2 -> Manage API tokens) for `aws s3 sync`. | -| `R2_SECRET_ACCESS_KEY` | R2 S3 API secret. | - -These are independent of the existing AWS S3 credentials; the R2 sync steps -override the AWS env vars locally so they do not collide. - -## Phase 2: dual-path CI (in place) - -The `[DEPLOY-CF]` steps in `main.yml` run on push, gated on the same `dev`/`main` -split as the Deno steps: - -- `dev` branch -> R2 bucket `stackql-provider-registry-dev`, `wrangler deploy --env dev` -- `main` branch -> R2 bucket `stackql-provider-registry`, `wrangler deploy --env production` - -Acceptance: a push to `dev` updates both the dev Deno origin and the dev Worker -with byte-identical content (both sourced from the same assembled -`_deno_website/providers/dist` tree). - -If you want to seed R2 once before the first CI push (so green is immediately -servable), run a one-off sync from a local checkout that has the full tree, or -trigger a `workflow_dispatch` / no-op push. - -## Phase 3: validate green on the workers.dev URL - -No temporary green DNS records are used. Each deployed Worker is reachable at its -`*.workers.dev` URL (dev: `stackql-provider-registry-dev..workers.dev`, -prod: `stackql-provider-registry-prod..workers.dev`). Validate there. - -Run against the Worker URL: - -```bash -BASE=https://stackql-provider-registry-dev..workers.dev -curl -i $BASE/providers/dist/providers.yaml # 200 text/plain -curl -i $BASE/providers/dist/aws/v0.1.3.tgz # 200 application/gzip -curl -i $BASE/providers/dist/fred # 404 -curl -i $BASE/ping # 202 pong -curl -i $BASE/analytics # 200 text/html -curl -i $BASE/analytics/last24hours # 200 application/json -``` - -Byte parity vs blue (Deno): - -```bash -GREEN=https://stackql-provider-registry-dev..workers.dev -diff <(curl -s https://registry-dev.stackql.app/providers/dist/providers.yaml) \ - <(curl -s $GREEN/providers/dist/providers.yaml) -# repeat for a sample of .tgz using sha256sum -for v in aws/v0.1.3 ...; do - a=$(curl -s https://registry-dev.stackql.app/providers/dist/$v.tgz | sha256sum) - b=$(curl -s $GREEN/providers/dist/$v.tgz | sha256sum) - [ "$a" = "$b" ] && echo "$v OK" || echo "$v MISMATCH" -done -``` - -Live pull from a StackQL client pointed at the Worker URL: - -```bash -export STACKQL_REGISTRY='{"url": "https://stackql-provider-registry-dev..workers.dev/providers"}' -stackql exec "REGISTRY PULL aws" -``` - -Analytics: confirm a `.tgz` pull writes one D1 row and `providers.yaml` writes -none, and that `/analytics` renders the three windows plus the 12-month matrix. - -```bash -npx wrangler d1 execute stackql-registry-analytics-dev --remote \ - --command "SELECT count(*), provider FROM downloads GROUP BY provider" -``` - -Acceptance: full endpoint + pull parity, analytics writing correctly. - -## Phase 4: DNS last mile (gated cutover) - -Cut dev first, soak, then prod. - -1. **Dev.** Attach `registry-dev.stackql.app` as a custom domain on the dev - Worker. Either add it to `[env.dev]` routes in `wrangler.toml` (commented - block is ready) and `wrangler deploy --env dev`, or attach via the dashboard - (Workers -> the dev worker -> Settings -> Domains & Routes -> Add custom - domain). Cloudflare repoints the proxied DNS record to the Worker. - - Verify endpoint parity and a live `REGISTRY PULL` against - `https://registry-dev.stackql.app`. - - Soak. -2. **Prod.** Repeat for `registry.stackql.app` on the production Worker - (`[env.production]` routes block, or dashboard). Verify parity + live pull. - -**Rollback (either hostname):** detach the custom domain from the Worker / revert -the DNS record to the Deno origin. Blue is still live and still receiving CI -updates throughout the transition window, so rollback is immediate with no data -loss. - -Acceptance: both production hostnames serve from the Worker with passing pull -tests; this rollback step is the documented procedure. - -## Phase 5: decommission blue - -Only after the soak window passes with green stable on the production hostnames: - -1. In `main.yml`, remove the Deno steps: - - `[DEPLOY] setup SSH`, `[DEPLOY] pull deno deploy assets`, - `[DEPLOY] install deno`, `[DEPLOY] clean deploy dir`, - `[DEPLOY] deploy to deno deploy (dev)`, `[DEPLOY] deploy to deno deploy (prod)`. - - Keep `[DEPLOY] pull additional docs from artifact repo` (it assembles the - full tree and regenerates `providers.yaml` that the R2 sync consumes). - - Once `clean-deploy-dir.py` is no longer in the pipeline, `origin/` survives - to the end of the job, so the `[DEPLOY-CF]` steps can move later if desired; - no functional change needed. - - Remove the `REG_DENO_DEPLOY_*` env vars. -2. Delete the Deno Deploy projects `stackql-registry` and `stackql-dev-registry`. -3. Revoke `DENO_KV_ACCESS_TOKEN` (the token in the old `deno-deploy-registry/env.sh`). -4. Cancel the Deno Deploy subscription. -5. Retire the `deno-deploy-registry` repo (archive). - -Acceptance: CI deploys only to Cloudflare, no Deno dependency remains, the -registry serves entirely from the Worker. - -## Notes / guardrails honored - -- No secrets in the repo. `deno-deploy-registry/env.sh` is not carried across; - `DENO_KV_ACCESS_TOKEN` is revoked in Phase 5. All credentials are GitHub - Actions secrets / `wrangler secret`. -- Build/sign/package/test steps in `main.yml` are unchanged. -- Response paths, status codes, and content types match the Deno origin exactly - (verified locally). Caching adds `Cache-Control` only: `immutable` long max-age - on `.tgz`, `max-age=60` on `providers.yaml`. -- `dev` -> dev origin, `main` -> prod origin split preserved. - -## Cost expectation - -Within Workers free (100k req/day), R2 free (10GB storage, zero egress), and D1 -free (5GB) at current volume. Moves to the $5/mo Workers plan plus R2 storage -overage only once those limits are exceeded. Immutable `.tgz` edge caching keeps -R2 read ops low. diff --git a/origin/README.md b/origin/README.md index 74282fe9..64e65311 100644 --- a/origin/README.md +++ b/origin/README.md @@ -100,8 +100,3 @@ deploy `--env dev`, pushes to `main` deploy `--env production`. Manual deploys: npm run deploy:dev # wrangler deploy --env dev npm run deploy:prod # wrangler deploy --env production ``` - -## Migration - -See [MIGRATION.md](MIGRATION.md) for the full blue-green cutover runbook -(parallel-hostname validation, DNS last mile, and Deno decommission). diff --git a/origin/RUNSHEET.md b/origin/RUNSHEET.md deleted file mode 100644 index e454bf63..00000000 --- a/origin/RUNSHEET.md +++ /dev/null @@ -1,100 +0,0 @@ -# Cutover run sheet - -Ordered steps to deploy the Cloudflare green origin alongside the live Deno blue -origin, validate, and (later) cut over by DNS. The `stackql.app` zone is on -Cloudflare (account `4132d7d5587ee99b9d482ecfc2c1853c`). - -## Safety invariant (holds until the Phase 4 cutover) - -Merging this PR does NOT touch production traffic: - -- The Deno steps in `main.yml` are unchanged - blue keeps deploying and serving. -- `wrangler.toml` route blocks are commented out, so `wrangler deploy` attaches - no custom domains and makes no DNS changes. The green Worker is validated on its - `*.workers.dev` URL - no temporary green DNS records are used. -- The existing DNS records are untouched: `registry.stackql.app` and - `registry-dev.stackql.app` keep their A/AAAA records pointing at the Deno - origin (`34.120.54.55` / `2600:1901:0:6d85::`). - -So after merge, green runs fully in parallel; production still serves from Deno. -Cutover is a separate, deliberate DNS change in Phase 4, instantly reversible. - -## A. Provision (local, on the feature branch) - me/you - -1. `cd origin && npm install` -2. `npx wrangler login` -3. `bash bootstrap.sh` - - creates R2 buckets `stackql-provider-registry-dev` + `stackql-provider-registry` - - creates D1 `stackql-registry-analytics-dev` + `stackql-registry-analytics` - - patches `wrangler.toml` with the real D1 IDs, applies `schema.sql` to both - - (D1 IDs are not secrets; committing them to the public repo is expected) -4. `git --no-pager diff origin/wrangler.toml` - confirm `` / - `` are replaced with UUIDs. - -## B. Secrets (GitHub repo -> Settings -> Secrets and variables -> Actions) - -| Secret | Value / source | -| --- | --- | -| `CLOUDFLARE_ACCOUNT_ID` | `4132d7d5587ee99b9d482ecfc2c1853c` | -| `CLOUDFLARE_API_TOKEN` | API token, scopes below | -| `R2_ACCESS_KEY_ID` | from R2 -> Manage R2 API Tokens | -| `R2_SECRET_ACCESS_KEY` | from R2 -> Manage R2 API Tokens | - -CLOUDFLARE_API_TOKEN scopes (one token, covers through Phase 4): - -- Account / Workers Scripts / Edit -- Account / D1 / Edit -- Account / Workers R2 Storage / Edit -- Account / Account Settings / Read -- Zone / Workers Routes / Edit (stackql.app) - needed when routes are uncommented -- Zone / DNS / Edit (stackql.app) - custom-domain attach writes DNS -- Zone / Zone / Read (stackql.app) - -R2 API token (for `aws s3 sync`): R2 -> Manage R2 API Tokens -> Create -> permission -"Object Read & Write", scoped to the two buckets (or account-wide). Use the -returned Access Key ID / Secret Access Key as the two `R2_*` secrets. - -## C. PR and merge - -5. Commit `origin/` + the `main.yml` change, push the feature branch, open the PR. - - PR CI runs build/sign/package/test only. Both the Deno deploy steps and the - `[DEPLOY-CF]` steps are push-gated (`REG_EVENT == 'push'`), so nothing - deploys on the PR itself. -6. Merge to `dev`. The push to `dev` runs, in the same job: - - blue: deploy to `stackql-dev-registry` (Deno) - unchanged - - green: `aws s3 sync` -> `stackql-provider-registry-dev` R2, then - `wrangler deploy --env dev` - -## D. Phase 3 validation (dev green, on workers.dev) - -7. Find the dev Worker URL (`stackql-provider-registry-dev..workers.dev`). - Run the endpoint checks, byte-parity diff vs `https://registry-dev.stackql.app`, - and a live pull (commands in `MIGRATION.md` Phase 3). Confirm D1 rows are - written for `.tgz` and not for `providers.yaml`. -8. Merge `dev` -> `main`. The push to `main` deploys blue prod (Deno) and green - prod (R2 `stackql-provider-registry` + `wrangler deploy --env production`). - Validate prod green the same way on its workers.dev URL. - -## E. Phase 4 cutover (separate change, after soak) - DNS last mile - -Cut dev first, soak, then prod. For each hostname: - -9. Dev: in Cloudflare DNS, delete the existing `registry-dev.stackql.app` A and - AAAA records (they point at Deno), then attach `registry-dev.stackql.app` as a - custom domain on the dev Worker - uncomment the dev route in `wrangler.toml` - and `wrangler deploy --env dev`, or use the dashboard (Worker -> Settings -> - Domains & Routes -> Add custom domain). Cloudflare creates the managed proxied - record. Verify endpoint parity + live `REGISTRY PULL` against - `https://registry-dev.stackql.app`. Soak. -10. Prod: repeat step 9 for `registry.stackql.app` on the production Worker. - -Rollback (either hostname): detach the Worker custom domain and re-create the A -record `34.120.54.55` + AAAA `2600:1901:0:6d85::` (proxied) pointing back at Deno. -Blue is still deploying and serving throughout, so rollback is immediate with no -data loss. - -## F. Phase 5 decommission (later, after stable soak) - -See `MIGRATION.md` Phase 5: remove the Deno steps from `main.yml`, delete the -`_acme-challenge.registry*` CNAMEs, delete the Deno Deploy projects, revoke -`DENO_KV_ACCESS_TOKEN`, cancel the Deno subscription, archive `deno-deploy-registry`. diff --git a/scripts/deploy/clean-deploy-dir.py b/scripts/deploy/clean-deploy-dir.py deleted file mode 100644 index 59cb3fc8..00000000 --- a/scripts/deploy/clean-deploy-dir.py +++ /dev/null @@ -1,24 +0,0 @@ -import os, shutil - -print("getting REG_WEBSITE_DIR env var...") -website_dir = os.getenv('REG_WEBSITE_DIR') - -print("removing all objects except %s..." % (website_dir)) -objects = os.listdir('.') -for obj in objects: - if os.path.isdir(obj): - if obj != website_dir: - shutil.rmtree(obj) - else: - os.remove(obj) - -print("copying objects from %s to root of working dir..." % (website_dir)) -objects = os.listdir(website_dir) -for obj in objects: - if os.path.isdir("%s/%s" % (website_dir, obj)): - shutil.copytree("%s/%s" % (website_dir, obj), obj) - else: - shutil.copy("%s/%s" % (website_dir, obj), obj) - -print("removing %s..." % (website_dir)) -shutil.rmtree(website_dir) diff --git a/smoke-test.sh b/smoke-test.sh new file mode 100755 index 00000000..a3b701a4 --- /dev/null +++ b/smoke-test.sh @@ -0,0 +1,204 @@ +#!/usr/bin/env bash +# +# smoke-test.sh - manual smoke test for the StackQL provider registry origin. +# +# Exercises every route in the URL contract plus live stackql registry +# list/pull, against dev and prod on BOTH the workers.dev hostnames and the +# production DNS names. This is run by hand, NOT in CI: +# +# bash smoke-test.sh +# +# Override the workers.dev subdomain if yours differs: +# CF_WORKERS_SUBDOMAIN=myacct bash smoke-test.sh +# +# Note: pulling a provider and fetching a .tgz both write a row to the D1 +# analytics table by design, so running this adds to the download counts. +# +# Exit code: 0 if all tests pass, 1 if any fail. + +set -uo pipefail + +SUBDOMAIN="${CF_WORKERS_SUBDOMAIN:-javen3010}" +CURL="curl -s --max-time 30" + +# colors only when stdout is a terminal +if [ -t 1 ]; then + GREEN=$'\033[0;32m'; RED=$'\033[0;31m'; YELLOW=$'\033[0;33m'; BOLD=$'\033[1m'; NC=$'\033[0m' +else + GREEN=''; RED=''; YELLOW=''; BOLD=''; NC='' +fi + +PASS=0; FAIL=0; SKIP=0 +declare -a FAILURES=() + +pass() { PASS=$((PASS+1)); printf " ${GREEN}PASS${NC} %s\n" "$1"; } +fail() { FAIL=$((FAIL+1)); FAILURES+=("$1"); printf " ${RED}FAIL${NC} %s\n" "$1"; } +skip() { SKIP=$((SKIP+1)); printf " ${YELLOW}SKIP${NC} %s\n" "$1"; } + +# --- HTTP helpers --------------------------------------------------------- + +status_of() { $CURL -o /dev/null -w '%{http_code}' "$@"; } +ctype_of() { $CURL -o /dev/null -w '%{content_type}' "$@"; } +headers_of(){ $CURL -D - -o /dev/null "$@"; } # GET, dump response headers + +expect_status() { # desc expected url [extra curl args...] + local desc="$1" exp="$2" url="$3"; shift 3 + local got; got=$(status_of "$@" "$url") + if [ "$got" = "$exp" ]; then pass "$desc -> $got"; else fail "$desc -> expected $exp, got $got"; fi +} + +expect_ctype() { # desc url substring + local desc="$1" url="$2" want="$3" got + got=$(ctype_of "$url") + case "$got" in *"$want"*) pass "$desc -> $got";; *) fail "$desc -> expected ~$want, got '${got:-}'";; esac +} + +expect_body() { # desc url substring + local desc="$1" url="$2" want="$3" body + body=$($CURL "$url") + case "$body" in *"$want"*) pass "$desc";; *) fail "$desc -> body missing '$want'";; esac +} + +expect_header() { # desc url substring (case-insensitive) + local desc="$1" url="$2" want="$3" + if headers_of "$url" | grep -iq -- "$want"; then pass "$desc"; else fail "$desc -> header missing '$want'"; fi +} + +aws_latest_version() { # base -> latest aws version, e.g. v26.05.00395 + $CURL "$1/providers/dist/providers.yaml" | awk ' + /^ aws:/ {f=1; next} + f && /^ [A-Za-z0-9_.]+:/ {f=0} + f && /^ - / {sub(/^[[:space:]]*-[[:space:]]*/,""); v=$0} + END{print v}' +} + +run_http_suite() { # label base + local label="$1" base="$2" + printf "\n${BOLD}HTTP: %s${NC} %s\n" "$label" "$base" + + # served by Cloudflare (Worker / workers.dev both emit these) + expect_header "served by Cloudflare (server header)" "$base/ping" "server: cloudflare" + expect_header "served by Cloudflare (cf-ray header)" "$base/ping" "cf-ray:" + + # ping/pong (202, body "pong") + expect_status "GET /ping" 202 "$base/ping" + local pong; pong=$($CURL "$base/ping") + if [ "$pong" = "pong" ]; then pass "GET /ping body == pong"; else fail "GET /ping body == pong (got '${pong}')"; fi + + # providers.yaml: 200 text/plain, body starts providers:, short cache + expect_status "GET providers.yaml" 200 "$base/providers/dist/providers.yaml" + expect_ctype "GET providers.yaml content-type" "$base/providers/dist/providers.yaml" "text/plain" + expect_body "GET providers.yaml body has providers:" "$base/providers/dist/providers.yaml" "providers:" + expect_header "GET providers.yaml cache-control max-age=60" "$base/providers/dist/providers.yaml" "max-age=60" + + # a real .tgz: 200 application/gzip + immutable cache-control (one download, parsed) + local ver tgz resp st ct + ver=$(aws_latest_version "$base") + if [ -n "$ver" ]; then + tgz="$base/providers/dist/aws/${ver}.tgz" + resp=$($CURL -D - -o /dev/null -w $'\n__STATUS__%{http_code}\n__CTYPE__%{content_type}' "$tgz") + st=$(printf '%s' "$resp" | grep -o '__STATUS__[0-9]*' | sed 's/__STATUS__//') + ct=$(printf '%s' "$resp" | sed -n 's/.*__CTYPE__//p') + if [ "$st" = "200" ]; then pass "GET aws/${ver}.tgz -> 200"; else fail "GET aws/${ver}.tgz -> ${st:-}"; fi + case "$ct" in *application/gzip*) pass "GET aws .tgz content-type -> $ct";; *) fail "GET aws .tgz content-type -> '${ct:-}'";; esac + if printf '%s' "$resp" | grep -iq immutable; then pass "GET aws .tgz immutable cache-control"; else fail "GET aws .tgz immutable cache-control"; fi + else + fail "could not resolve aws version from providers.yaml" + fi + + # 404 (unknown doc) and 405 (non-GET) + expect_status "GET unknown doc" 404 "$base/providers/dist/does-not-exist" + expect_status "POST /ping" 405 "$base/ping" -X POST + + # analytics + expect_status "GET /analytics" 200 "$base/analytics" + expect_ctype "GET /analytics content-type" "$base/analytics" "text/html" + expect_body "GET /analytics renders dashboard" "$base/analytics" "StackQL Registry Analytics" + expect_status "GET /analytics/last24hours" 200 "$base/analytics/last24hours" + expect_ctype "GET /analytics/last24hours content-type" "$base/analytics/last24hours" "application/json" + local j; j=$($CURL "$base/analytics/last24hours") + case "$j" in \{*|\[*) pass "GET /analytics/last24hours is JSON";; *) fail "GET /analytics/last24hours not JSON ('${j:0:40}')";; esac +} + +run_stackql_suite() { # label base + local label="$1" base="$2" + printf "\n${BOLD}stackql: %s${NC} %s\n" "$label" "$base" + if [ -z "$STACKQL" ]; then + skip "[$label] stackql registry list (no stackql binary)" + skip "[$label] stackql registry pull aws (no stackql binary)" + return + fi + local reg="{ \"url\": \"$base/providers\" }" out + if out=$("$STACKQL" --registry="$reg" exec "registry list" 2>&1) && printf '%s' "$out" | grep -q "aws"; then + pass "[$label] stackql registry list" + else + fail "[$label] stackql registry list" + fi + if out=$("$STACKQL" --registry="$reg" exec "registry pull aws" 2>&1) && printf '%s' "$out" | grep -q "successfully installed"; then + pass "[$label] stackql registry pull aws" + else + fail "[$label] stackql registry pull aws" + fi +} + +# All execution lives in main(), invoked at the very end. This forces bash to +# parse the entire script before running anything, so the install step's +# subprocesses cannot corrupt incremental script reads when this file lives on +# a Windows drive mount (/mnt/c) under WSL - which otherwise yields a spurious +# "unexpected EOF" parse error mid-run. +main() { + +# --- preflight: stackql --------------------------------------------------- + +printf "${BOLD}Preflight${NC}\n" +STACKQL="" +if [ -x ./stackql ]; then + STACKQL=./stackql +elif command -v stackql >/dev/null 2>&1; then + STACKQL=$(command -v stackql) +else + printf " stackql not found in cwd; installing via get-stackql.io...\n" + # the documented Linux install - drops ./stackql into the cwd + curl -fsSL https://get-stackql.io/install | sh + if [ -x ./stackql ]; then STACKQL=./stackql + elif command -v stackql >/dev/null 2>&1; then STACKQL=$(command -v stackql); fi +fi +if [ -n "$STACKQL" ]; then + printf " using stackql: %s\n" "$STACKQL" +else + printf " ${YELLOW}stackql unavailable - stackql tests will be skipped${NC}\n" +fi + +# --- targets (label|base) ------------------------------------------------- + +TARGETS=( + "dev workers.dev|https://stackql-provider-registry-dev.${SUBDOMAIN}.workers.dev" + "prod workers.dev|https://stackql-provider-registry-prod.${SUBDOMAIN}.workers.dev" + "dev dns |https://registry-dev.stackql.app" + "prod dns |https://registry.stackql.app" +) + +for entry in "${TARGETS[@]}"; do + run_http_suite "${entry%%|*}" "${entry#*|}" +done + +for entry in "${TARGETS[@]}"; do + run_stackql_suite "${entry%%|*}" "${entry#*|}" +done + +# --- summary -------------------------------------------------------------- + +printf "\n${BOLD}==== SUMMARY ====${NC}\n" +printf " ${GREEN}PASS${NC}: %d\n" "$PASS" +printf " ${RED}FAIL${NC}: %d\n" "$FAIL" +[ "$SKIP" -gt 0 ] && printf " ${YELLOW}SKIP${NC}: %d\n" "$SKIP" +if [ "$FAIL" -gt 0 ]; then + printf "\n${RED}Failures:${NC}\n" + for f in "${FAILURES[@]}"; do printf " - %s\n" "$f"; done + exit 1 +fi +printf "\n${GREEN}All tests passed.${NC}\n" +exit 0 +} + +main "$@"