From c5067a47c3064608d314186aafb5783a9f86db93 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Tue, 23 Jun 2026 13:10:42 -0700 Subject: [PATCH 1/2] build(harness): package the integration suite as a container image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test/integration/Dockerfile — `go test -c -tags integration` → distroless/nonroot binary whose entrypoint runs the selected suite (args: -test.run TestX). The fault + seiload manifests are //go:embed-ed, so the binary is self-contained (no scenario files to COPY). Add the "Build and push integration-harness" step to ecr.yml (mirrors the seitask step), and re-include test/integration's *_test.go + *.tmpl in .dockerignore (both excluded by the existing rules; the harness is entirely build-tagged _test.go). First step of the cutover that retires the seitask-runner image + the Chaos-Mesh Workflow nightly. Validated locally: image builds, and the binary lists TestBenchmark/TestChaosSuite/TestChainUpgrade/TestRelease. Co-Authored-By: Claude Opus 4.8 --- .dockerignore | 8 ++++++++ .github/workflows/ecr.yml | 14 ++++++++++++++ test/integration/Dockerfile | 29 +++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 test/integration/Dockerfile diff --git a/.dockerignore b/.dockerignore index 23008802..621d317b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -24,3 +24,11 @@ # lesson as runner/templates/**: the seitask Dockerfile COPYs each # scenario asset dir; without the re-include, builds fail with "not found". !scenarios/*/** + +# Re-include the integration harness suite for test/integration/Dockerfile. It is +# entirely *_test.go (excluded above) behind the integration build tag, and its +# fault/seiload manifests are //go:embed-ed *.tmpl (excluded by **). Without +# these the `go test -c -tags integration ./test/integration/` build fails with +# "directory not found". +!test/integration/**/*_test.go +!test/integration/**/*.tmpl diff --git a/.github/workflows/ecr.yml b/.github/workflows/ecr.yml index a2007883..53a7f547 100644 --- a/.github/workflows/ecr.yml +++ b/.github/workflows/ecr.yml @@ -54,3 +54,17 @@ jobs: tags: ${{ steps.ecr-login.outputs.registry }}/sei/seitask-runner:${{ inputs.tag || github.sha }} cache-from: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:shared cache-to: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:shared,mode=max + + # The Go-native integration harness (go test -c -tags integration), run by + # one CronJob per target (-test.run TestX). Replaces seitask-runner + the + # Chaos-Mesh Workflow scenarios once the nightly CronJobs cut over. + - name: Build and push integration-harness image + uses: docker/build-push-action@v6 + with: + context: . + file: test/integration/Dockerfile + push: true + platforms: linux/amd64 + tags: ${{ steps.ecr-login.outputs.registry }}/sei/integration-harness:${{ inputs.tag || github.sha }} + cache-from: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:shared + cache-to: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:shared,mode=max diff --git a/test/integration/Dockerfile b/test/integration/Dockerfile new file mode 100644 index 00000000..e5bfc383 --- /dev/null +++ b/test/integration/Dockerfile @@ -0,0 +1,29 @@ +# The integration harness image: the build-tagged test binary, compiled once and +# run by one in-cluster CronJob per target (args: -test.run TestX). It replaces +# the seitask-runner image + the Chaos-Mesh Workflow scenarios — the suites carry +# their fault/seiload templates via //go:embed, so the binary is self-contained +# (no scenario files to COPY). +FROM golang:1.26 AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY . . + +# `go test -c` compiles the suite to a standalone binary whose entrypoint runs +# the selected test; the integration build tag is what gates the suites into it. +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go test -c -tags integration -ldflags="-s -w" -o harness.test ./test/integration/ + +FROM gcr.io/distroless/static-debian12:nonroot +WORKDIR / +COPY --from=builder /workspace/harness.test /harness.test +USER 65532:65532 + +# A CronJob selects a suite + budget via args, e.g. +# ["-test.run", "TestBenchmark", "-test.v", "-test.timeout", "0"] +ENTRYPOINT ["/harness.test"] From 46b8363ded6fcd49ba3ad4460e4eb8b42c0371d8 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Tue, 23 Jun 2026 13:17:30 -0700 Subject: [PATCH 2/2] build(harness): isolate the harness build cache from the controller image Per the supply-chain review: the harness image is a test-image build over the whole test tree; sharing the controller's mode=max build cache would let a poisoned test-build layer reach the production controller image. Give the harness build its own cache ref (sei/build-cache:integration-harness). Note in .dockerignore that the re-included test files enter the controller/seitask contexts but are inert there. Co-Authored-By: Claude Opus 4.8 --- .dockerignore | 4 +++- .github/workflows/ecr.yml | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.dockerignore b/.dockerignore index 621d317b..285bc19e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -29,6 +29,8 @@ # entirely *_test.go (excluded above) behind the integration build tag, and its # fault/seiload manifests are //go:embed-ed *.tmpl (excluded by **). Without # these the `go test -c -tags integration ./test/integration/` build fails with -# "directory not found". +# "directory not found". These also enter the controller + seitask build contexts +# (both COPY . .) but are inert there — neither build compiles ./test/integration +# nor passes the integration tag. !test/integration/**/*_test.go !test/integration/**/*.tmpl diff --git a/.github/workflows/ecr.yml b/.github/workflows/ecr.yml index 53a7f547..ece11d73 100644 --- a/.github/workflows/ecr.yml +++ b/.github/workflows/ecr.yml @@ -66,5 +66,9 @@ jobs: push: true platforms: linux/amd64 tags: ${{ steps.ecr-login.outputs.registry }}/sei/integration-harness:${{ inputs.tag || github.sha }} - cache-from: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:shared - cache-to: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:shared,mode=max + # Dedicated cache ref (NOT the shared one the controller image uses): + # this is a test-image build over the whole test tree, so isolating its + # cache keeps a poisoned test-build layer out of the production + # controller image's build. + cache-from: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:integration-harness + cache-to: type=registry,ref=${{ steps.ecr-login.outputs.registry }}/sei/build-cache:integration-harness,mode=max