diff --git a/.github/actions/docker-cache/action.yml b/.github/actions/docker-cache/action.yml deleted file mode 100644 index 253885e2..00000000 --- a/.github/actions/docker-cache/action.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: 'Docker Image Cache' -description: 'Cache and load Docker images for CI jobs' - -inputs: - images: - description: 'Space-separated list of Docker images to cache' - required: true - -runs: - using: 'composite' - steps: - - name: Generate cache key from images - id: cache-key - shell: bash - env: - IMAGES_INPUT: ${{ inputs.images }} - run: | - # Create a stable hash from the sorted image list - # Using env var to prevent script injection - IMAGES_HASH=$(echo "$IMAGES_INPUT" | tr ' ' '\n' | sort | md5sum | cut -d' ' -f1) - echo "key=docker-${{ runner.os }}-${IMAGES_HASH}" >> $GITHUB_OUTPUT - - - name: Cache Docker images - uses: actions/cache@v5 - id: docker-cache - with: - path: /tmp/docker-cache - key: ${{ steps.cache-key.outputs.key }} - - - name: Load cached Docker images - if: steps.docker-cache.outputs.cache-hit == 'true' - shell: bash - run: | - echo "Loading cached images..." - for f in /tmp/docker-cache/*.tar.zst; do - zstd -d -c "$f" | docker load & - done - wait - docker images - - - name: Pull and save Docker images - if: steps.docker-cache.outputs.cache-hit != 'true' - shell: bash - env: - IMAGES_INPUT: ${{ inputs.images }} - run: | - mkdir -p /tmp/docker-cache - - echo "Pulling images in parallel..." - for img in $IMAGES_INPUT; do - docker pull "$img" & - done - wait - - echo "Saving images with zstd compression..." - for img in $IMAGES_INPUT; do - # Create filename from image name (replace special chars) - filename=$(echo "$img" | tr '/:' '_') - docker save "$img" | zstd -T0 -3 > "/tmp/docker-cache/${filename}.tar.zst" & - done - wait - - echo "Cache size:" - du -sh /tmp/docker-cache/ diff --git a/.github/actions/e2e-boot/action.yml b/.github/actions/e2e-boot/action.yml new file mode 100644 index 00000000..01850ea1 --- /dev/null +++ b/.github/actions/e2e-boot/action.yml @@ -0,0 +1,43 @@ +name: 'E2E Boot' +description: 'Kick off slow background tasks: GHCR auth, image pull + infra pre-warm, k3s install' + +inputs: + image-tag: + description: 'GHCR image tag (e.g., sha-abc1234)' + required: true + github-token: + description: 'GitHub token for GHCR authentication' + required: true + +runs: + using: 'composite' + steps: + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ inputs.github-token }} + + - name: Pull images and pre-warm infra (background) + shell: bash + env: + IMAGE_TAG: ${{ inputs.image-tag }} + run: | + nohup bash -c ' + IMAGE_TAG='"$IMAGE_TAG"' docker compose pull --quiet 2>&1 + echo "--- pull done, starting infra ---" + docker compose up -d --no-build \ + mongo redis shared-ca zookeeper-certgen zookeeper kafka schema-registry 2>&1 + echo $? > /tmp/infra-pull.exit + ' > /tmp/infra-pull.log 2>&1 & + echo $! > /tmp/infra-pull.pid + + - name: Install k3s + shell: bash + run: | + K3S_TAG=$(echo "$K3S_VERSION" | sed 's/+/%2B/g') + curl -sfL "https://raw.githubusercontent.com/k3s-io/k3s/${K3S_TAG}/install.sh" -o /tmp/k3s-install.sh + echo "$K3S_INSTALL_SHA256 /tmp/k3s-install.sh" | sha256sum -c - + chmod +x /tmp/k3s-install.sh + INSTALL_K3S_VERSION="$K3S_VERSION" INSTALL_K3S_EXEC="--disable=traefik --bind-address 0.0.0.0 --tls-san host.docker.internal" /tmp/k3s-install.sh diff --git a/.github/actions/e2e-ready/action.yml b/.github/actions/e2e-ready/action.yml new file mode 100644 index 00000000..fb794382 --- /dev/null +++ b/.github/actions/e2e-ready/action.yml @@ -0,0 +1,85 @@ +name: 'E2E Ready' +description: 'Finalize k3s, wait for infra, start compose stack, health-check, seed test users' + +inputs: + image-tag: + description: 'GHCR image tag (e.g., sha-abc1234)' + required: true + wait-for-frontend: + description: 'Also wait for frontend health check (default: false)' + required: false + default: 'false' + +runs: + using: 'composite' + steps: + - name: Finalize k3s + shell: bash + run: | + mkdir -p /home/runner/.kube + sudo k3s kubectl config view --raw > /home/runner/.kube/config + sudo chmod 600 /home/runner/.kube/config + export KUBECONFIG=/home/runner/.kube/config + timeout 90 bash -c 'until kubectl cluster-info 2>/dev/null; do sleep 3; done' + kubectl create namespace integr8scode --dry-run=client -o yaml | kubectl apply -f - + sed -E 's#https://(127\.0\.0\.1|0\.0\.0\.0):6443#https://host.docker.internal:6443#g' \ + /home/runner/.kube/config > backend/kubeconfig.yaml + chmod 644 backend/kubeconfig.yaml + + - name: Start cert-generator (background) + shell: bash + env: + IMAGE_TAG: ${{ inputs.image-tag }} + run: | + nohup docker compose up -d --no-build cert-generator \ + > /tmp/cert-gen.log 2>&1 & + + - name: Use test environment config + shell: bash + run: | + cp backend/config.test.toml backend/config.toml + cp backend/secrets.example.toml backend/secrets.toml + + - name: Wait for image pull and infra + shell: bash + run: | + if [ -f /tmp/infra-pull.pid ]; then + PID=$(cat /tmp/infra-pull.pid) + if kill -0 "$PID" 2>/dev/null; then + echo "Waiting for image pull + infra startup..." + tail --pid="$PID" -f /dev/null 2>/dev/null || true + fi + fi + cat /tmp/infra-pull.log 2>/dev/null || true + cat /tmp/cert-gen.log 2>/dev/null || true + if [ -f /tmp/infra-pull.exit ]; then + EXIT_CODE=$(cat /tmp/infra-pull.exit) + if [ "$EXIT_CODE" != "0" ]; then + echo "::error::Background image pull / infra pre-warm failed (exit $EXIT_CODE)" + exit 1 + fi + fi + + - name: Start stack + shell: bash + env: + IMAGE_TAG: ${{ inputs.image-tag }} + run: docker compose up -d --no-build + + - name: Wait for services + shell: bash + env: + WAIT_FOR_FRONTEND: ${{ inputs.wait-for-frontend }} + run: | + echo "Waiting for backend health..." + timeout 120 bash -c 'until curl -ksf https://localhost/api/v1/health/live 2>/dev/null; do sleep 2; done' + echo "Backend ready" + if [ "$WAIT_FOR_FRONTEND" = "true" ]; then + echo "Waiting for frontend health..." + timeout 60 bash -c 'until curl -ksf https://localhost:5001 2>/dev/null; do sleep 2; done' + echo "Frontend ready" + fi + + - name: Seed test users + shell: bash + run: docker compose exec -T backend uv run python scripts/seed_users.py diff --git a/.github/actions/k3s-setup/action.yml b/.github/actions/k3s-setup/action.yml deleted file mode 100644 index d21c4a43..00000000 --- a/.github/actions/k3s-setup/action.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: 'K3s Setup' -description: 'Install k3s and create kubeconfig for Docker containers' - -inputs: - namespace: - description: 'Kubernetes namespace to create' - required: false - default: 'integr8scode' - kubeconfig-path: - description: 'Path to write the Docker-accessible kubeconfig' - required: false - default: 'backend/kubeconfig.yaml' - -outputs: - kubeconfig: - description: 'Path to the kubeconfig file for Docker containers' - value: ${{ inputs.kubeconfig-path }} - -runs: - using: 'composite' - steps: - - name: Install k3s - shell: bash - run: | - # --bind-address 0.0.0.0: Listen on all interfaces so Docker containers can reach it - # --tls-san host.docker.internal: Include in cert SANs for Docker container access - curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--disable=traefik --bind-address 0.0.0.0 --tls-san host.docker.internal" sh - - mkdir -p /home/runner/.kube - sudo k3s kubectl config view --raw > /home/runner/.kube/config - sudo chmod 600 /home/runner/.kube/config - - - name: Wait for k3s to be ready - shell: bash - run: | - export KUBECONFIG=/home/runner/.kube/config - timeout 90 bash -c 'until kubectl cluster-info; do sleep 5; done' - - - name: Create namespace - shell: bash - env: - NAMESPACE: ${{ inputs.namespace }} - run: | - export KUBECONFIG=/home/runner/.kube/config - kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - - - - name: Create kubeconfig for Docker containers - shell: bash - env: - KUBECONFIG_PATH: ${{ inputs.kubeconfig-path }} - run: | - # Replace localhost/0.0.0.0 with host.docker.internal for container access - # (k3s may use 0.0.0.0 when started with --bind-address 0.0.0.0) - sed -E 's#https://(127\.0\.0\.1|0\.0\.0\.0):6443#https://host.docker.internal:6443#g' \ - /home/runner/.kube/config > "$KUBECONFIG_PATH" - chmod 644 "$KUBECONFIG_PATH" - echo "Kubeconfig written to $KUBECONFIG_PATH" - echo "Server URL: $(grep server "$KUBECONFIG_PATH" | head -1)" diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 2cdd4f40..9778e7a3 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,215 +1,70 @@ -name: Docker Build, Scan & Publish +name: Docker Scan & Promote +# Runs after Stack Tests completes on main — promotes sha-xxx → latest. +# "latest" is NEVER set during build. Only this workflow can set it, +# and only after all tests pass. If any test fails, latest stays unchanged. on: - push: - branches: [ main ] - tags: [ 'v*' ] - pull_request: - branches: [ main ] + workflow_run: + workflows: ["Stack Tests"] + types: [completed] workflow_dispatch: + inputs: + sha: + description: 'Full commit SHA to promote (defaults to latest main)' + required: false env: REGISTRY: ghcr.io jobs: - build-base: - name: Build Base + scan: + name: Scan ${{ matrix.image }} + if: > + github.event_name == 'workflow_dispatch' || + (github.event.workflow_run.conclusion == 'success' && + github.event.workflow_run.head_branch == 'main') runs-on: ubuntu-latest permissions: contents: read - packages: write - - outputs: - image-tag: ${{ steps.image-tag.outputs.tag }} - - steps: - - uses: actions/checkout@v6 - - - name: Set lowercase image prefix - run: echo "IMAGE_PREFIX=${GITHUB_REPOSITORY_OWNER,,}/integr8scode" >> $GITHUB_ENV - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/base - tags: | - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=sha,prefix=sha- - type=raw,value=latest,enable={{is_default_branch}} - - - name: Determine image tag for dependent builds - id: image-tag - run: | - if [ "${{ github.event_name }}" = "pull_request" ]; then - echo "tag=pr-${{ github.event.number }}" >> $GITHUB_OUTPUT - else - echo "tag=latest" >> $GITHUB_OUTPUT - fi - - - name: Build and push - uses: docker/build-push-action@v6 - with: - context: ./backend - file: ./backend/Dockerfile.base - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha,scope=base - cache-to: type=gha,mode=max,scope=base - - build-backend: - name: Build Backend - needs: build-base - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - - outputs: - image-ref: ${{ steps.image-ref.outputs.ref }} - - steps: - - uses: actions/checkout@v6 - - - name: Set lowercase image prefix - run: echo "IMAGE_PREFIX=${GITHUB_REPOSITORY_OWNER,,}/integr8scode" >> $GITHUB_ENV - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/backend - tags: | - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=sha,prefix=sha- - type=raw,value=latest,enable={{is_default_branch}} - - - name: Set image reference for scan - id: image-ref - run: | - if [ "${{ github.event_name }}" = "pull_request" ]; then - echo "ref=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/backend:pr-${{ github.event.number }}" >> $GITHUB_OUTPUT - else - echo "ref=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/backend:latest" >> $GITHUB_OUTPUT - fi - - - name: Build and push - uses: docker/build-push-action@v6 - with: - context: ./backend - file: ./backend/Dockerfile - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha,scope=backend - cache-to: type=gha,mode=max,scope=backend - build-contexts: | - base=docker-image://${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/base:${{ needs.build-base.outputs.image-tag }} - - build-frontend: - name: Build Frontend - needs: build-base - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - - outputs: - image-ref: ${{ steps.image-ref.outputs.ref }} - + security-events: write + packages: read + strategy: + fail-fast: false + matrix: + image: + - base + - backend + - frontend + - coordinator + - k8s-worker + - pod-monitor + - result-processor + - saga-orchestrator + - event-replay + - dlq-processor + - cert-generator + - zookeeper-certgen steps: - uses: actions/checkout@v6 - - name: Set lowercase image prefix - run: echo "IMAGE_PREFIX=${GITHUB_REPOSITORY_OWNER,,}/integr8scode" >> $GITHUB_ENV - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/frontend - tags: | - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=sha,prefix=sha- - type=raw,value=latest,enable={{is_default_branch}} - - - name: Set image reference for scan - id: image-ref + - name: Compute image ref + id: ref run: | - if [ "${{ github.event_name }}" = "pull_request" ]; then - echo "ref=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/frontend:pr-${{ github.event.number }}" >> $GITHUB_OUTPUT + PREFIX="${GITHUB_REPOSITORY_OWNER,,}/integr8scode" + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + SHA="${{ github.event.inputs.sha || github.sha }}" else - echo "ref=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/frontend:latest" >> $GITHUB_OUTPUT + SHA="${{ github.event.workflow_run.head_sha }}" fi - - - name: Build and push - uses: docker/build-push-action@v6 - with: - context: ./frontend - file: ./frontend/Dockerfile.prod - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha,scope=frontend - cache-to: type=gha,mode=max,scope=frontend - - scan-backend: - name: Scan Backend - needs: build-backend - runs-on: ubuntu-latest - permissions: - contents: read - security-events: write - - steps: - - uses: actions/checkout@v6 + TAG="sha-${SHA::7}" + echo "image=${{ env.REGISTRY }}/$PREFIX/${{ matrix.image }}:$TAG" >> $GITHUB_OUTPUT - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@0.33.1 with: - image-ref: ${{ needs.build-backend.outputs.image-ref }} + image-ref: ${{ steps.ref.outputs.image }} format: 'sarif' - output: 'trivy-backend-results.sarif' + output: 'trivy-${{ matrix.image }}-results.sarif' ignore-unfixed: true severity: 'CRITICAL,HIGH' timeout: '5m0s' @@ -220,56 +75,85 @@ jobs: if: always() uses: github/codeql-action/upload-sarif@v4 with: - sarif_file: 'trivy-backend-results.sarif' - category: 'trivy-backend' - - scan-frontend: - name: Scan Frontend - needs: build-frontend + sarif_file: 'trivy-${{ matrix.image }}-results.sarif' + category: 'trivy-${{ matrix.image }}' + + # Promote SHA tag → latest using crane (registry-level manifest copy, no rebuild) + promote: + name: Promote to Latest + needs: [scan] + if: > + github.event_name == 'workflow_dispatch' || + (github.event.workflow_run.conclusion == 'success' && + github.event.workflow_run.head_branch == 'main') runs-on: ubuntu-latest permissions: - contents: read - security-events: write - + packages: write steps: - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.33.1 + - name: Log in to GHCR + uses: docker/login-action@v3 with: - image-ref: ${{ needs.build-frontend.outputs.image-ref }} - format: 'sarif' - output: 'trivy-frontend-results.sarif' - ignore-unfixed: true - severity: 'CRITICAL,HIGH' - timeout: '5m0s' - version: 'v0.68.2' + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Upload Trivy scan results - if: always() - uses: github/codeql-action/upload-sarif@v4 - with: - sarif_file: 'trivy-frontend-results.sarif' - category: 'trivy-frontend' + - name: Install crane + uses: imjasonh/setup-crane@v0.4 + + - name: Promote images (SHA → latest) + run: | + PREFIX="${GITHUB_REPOSITORY_OWNER,,}/integr8scode" + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + SHA="${{ github.event.inputs.sha || github.sha }}" + else + SHA="${{ github.event.workflow_run.head_sha }}" + fi + TAG="sha-${SHA::7}" + + echo "Promoting tag: $TAG → latest" + echo "" + + crane copy "$REGISTRY/$PREFIX/base:$TAG" "$REGISTRY/$PREFIX/base:latest" + crane copy "$REGISTRY/$PREFIX/backend:$TAG" "$REGISTRY/$PREFIX/backend:latest" + crane copy "$REGISTRY/$PREFIX/frontend:$TAG" "$REGISTRY/$PREFIX/frontend:latest" + crane copy "$REGISTRY/$PREFIX/coordinator:$TAG" "$REGISTRY/$PREFIX/coordinator:latest" + crane copy "$REGISTRY/$PREFIX/k8s-worker:$TAG" "$REGISTRY/$PREFIX/k8s-worker:latest" + crane copy "$REGISTRY/$PREFIX/pod-monitor:$TAG" "$REGISTRY/$PREFIX/pod-monitor:latest" + crane copy "$REGISTRY/$PREFIX/result-processor:$TAG" "$REGISTRY/$PREFIX/result-processor:latest" + crane copy "$REGISTRY/$PREFIX/saga-orchestrator:$TAG" "$REGISTRY/$PREFIX/saga-orchestrator:latest" + crane copy "$REGISTRY/$PREFIX/event-replay:$TAG" "$REGISTRY/$PREFIX/event-replay:latest" + crane copy "$REGISTRY/$PREFIX/dlq-processor:$TAG" "$REGISTRY/$PREFIX/dlq-processor:latest" + crane copy "$REGISTRY/$PREFIX/cert-generator:$TAG" "$REGISTRY/$PREFIX/cert-generator:latest" + crane copy "$REGISTRY/$PREFIX/zookeeper-certgen:$TAG" "$REGISTRY/$PREFIX/zookeeper-certgen:latest" summary: name: Summary - if: github.event_name != 'pull_request' - needs: [build-base, build-backend, build-frontend, scan-backend, scan-frontend] + needs: [promote] runs-on: ubuntu-latest - steps: - - name: Set lowercase image prefix - run: echo "IMAGE_PREFIX=${GITHUB_REPOSITORY_OWNER,,}/integr8scode" >> $GITHUB_ENV - - name: Generate summary run: | - echo "## Docker Images Published" >> $GITHUB_STEP_SUMMARY + PREFIX="${GITHUB_REPOSITORY_OWNER,,}/integr8scode" + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + SHA="${{ github.event.inputs.sha || github.sha }}" + else + SHA="${{ github.event.workflow_run.head_sha }}" + fi + TAG="sha-${SHA::7}" + + echo "## Docker Images Promoted to Latest" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + if [ "$GITHUB_EVENT_NAME" = "workflow_dispatch" ]; then + echo "Images promoted manually from \`$TAG\` to \`latest\` — Stack Tests may not have run." >> $GITHUB_STEP_SUMMARY + else + echo "All Stack Tests passed. Images promoted from \`$TAG\` to \`latest\`." >> $GITHUB_STEP_SUMMARY + fi echo "" >> $GITHUB_STEP_SUMMARY echo "| Image | Pull Command |" >> $GITHUB_STEP_SUMMARY echo "|-------|--------------|" >> $GITHUB_STEP_SUMMARY - echo "| Base | \`docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/base:latest\` |" >> $GITHUB_STEP_SUMMARY - echo "| Backend | \`docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/backend:latest\` |" >> $GITHUB_STEP_SUMMARY - echo "| Frontend | \`docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}/frontend:latest\` |" >> $GITHUB_STEP_SUMMARY + echo "| Base | \`docker pull $REGISTRY/$PREFIX/base:latest\` |" >> $GITHUB_STEP_SUMMARY + echo "| Backend | \`docker pull $REGISTRY/$PREFIX/backend:latest\` |" >> $GITHUB_STEP_SUMMARY + echo "| Frontend | \`docker pull $REGISTRY/$PREFIX/frontend:latest\` |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "### Scan Results" >> $GITHUB_STEP_SUMMARY - echo "- Backend scan: ✅ Passed" >> $GITHUB_STEP_SUMMARY - echo "- Frontend scan: ✅ Passed" >> $GITHUB_STEP_SUMMARY + echo "### Security Scans" >> $GITHUB_STEP_SUMMARY + echo "All 12 images scanned with Trivy (CRITICAL + HIGH, unfixed ignored)." >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/frontend-ci.yml b/.github/workflows/frontend-ci.yml index fe29a033..e6303aa1 100644 --- a/.github/workflows/frontend-ci.yml +++ b/.github/workflows/frontend-ci.yml @@ -2,12 +2,12 @@ name: Frontend CI on: push: - branches: [main, dev] + branches: [main] paths: - 'frontend/**' - '.github/workflows/frontend-ci.yml' pull_request: - branches: [main, dev] + branches: [main] paths: - 'frontend/**' - '.github/workflows/frontend-ci.yml' diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index d4752b08..34070e65 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -2,9 +2,9 @@ name: MyPy Type Checking on: push: - branches: [ main, dev ] + branches: [ main ] pull_request: - branches: [ main, dev ] + branches: [ main ] workflow_dispatch: jobs: diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index 3ddec835..c670ce34 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -2,9 +2,9 @@ name: Ruff Linting on: push: - branches: [ main, dev ] + branches: [ main ] pull_request: - branches: [ main, dev ] + branches: [ main ] workflow_dispatch: jobs: diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 4452c432..10837590 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -2,9 +2,9 @@ name: Security Scanning on: push: - branches: [ main, dev ] + branches: [ main ] pull_request: - branches: [ main, dev ] + branches: [ main ] workflow_dispatch: jobs: diff --git a/.github/workflows/stack-tests.yml b/.github/workflows/stack-tests.yml index c2804f73..373565ab 100644 --- a/.github/workflows/stack-tests.yml +++ b/.github/workflows/stack-tests.yml @@ -2,19 +2,22 @@ name: Stack Tests on: push: - branches: [main, dev] + branches: [main] + tags: ['v*'] paths: - 'backend/**' - 'frontend/**' + - 'cert-generator/**' - 'docker-compose.yaml' - 'deploy.sh' - '.github/workflows/stack-tests.yml' - '.github/actions/**' pull_request: - branches: [main, dev] + branches: [main] paths: - 'backend/**' - 'frontend/**' + - 'cert-generator/**' - 'docker-compose.yaml' - 'deploy.sh' - '.github/workflows/stack-tests.yml' @@ -22,11 +25,14 @@ on: workflow_dispatch: env: + REGISTRY: ghcr.io MONGO_IMAGE: mongo:8.0 REDIS_IMAGE: redis:7-alpine KAFKA_IMAGE: confluentinc/cp-kafka:7.8.2 ZOOKEEPER_IMAGE: confluentinc/cp-zookeeper:7.8.2 SCHEMA_REGISTRY_IMAGE: confluentinc/cp-schema-registry:7.8.2 + K3S_VERSION: v1.32.11+k3s1 + K3S_INSTALL_SHA256: d75e014f2d2ab5d30a318efa5c326f3b0b7596f194afcff90fa7a7a91166d5f7 jobs: # Fast unit tests (no infrastructure needed) @@ -102,17 +108,41 @@ jobs: fail_ci_if_error: false verbose: true - # Build all images once, cache for test jobs + # Build all images, push to GHCR with immutable SHA tag. + # Fork PRs skip GHCR push (no write access) — E2E tests require pushed images. build-images: - name: Build Images + name: Build & Push Images needs: [backend-unit, frontend-unit] runs-on: ubuntu-latest + permissions: + contents: read + packages: write + outputs: + sha-tag: ${{ steps.tags.outputs.sha-tag }} + image-prefix: ${{ steps.tags.outputs.image-prefix }} steps: - uses: actions/checkout@v6 - name: Setup Docker Buildx uses: docker/setup-buildx-action@v3 + - name: Log in to GHCR + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Compute image tags + id: tags + run: | + PREFIX="${GITHUB_REPOSITORY_OWNER,,}/integr8scode" + SHA_TAG="sha-${GITHUB_SHA::7}" + echo "sha-tag=$SHA_TAG" >> "$GITHUB_OUTPUT" + echo "image-prefix=$PREFIX" >> "$GITHUB_OUTPUT" + + # ── Base image (cached separately — rarely changes) ────────────── - name: Cache base image uses: actions/cache@v5 id: base-cache @@ -139,15 +169,19 @@ jobs: if: steps.base-cache.outputs.cache-hit != 'true' run: docker save integr8scode-base:latest | zstd -T0 -3 > /tmp/base-image.tar.zst - - name: Build all images + # ── Backend + workers (depend on local base image) ─────────────── + - name: Build backend and worker images run: | docker build -t integr8scode-backend:latest --build-context base=docker-image://integr8scode-base:latest -f ./backend/Dockerfile ./backend - docker build -t integr8scode-coordinator:latest -f backend/workers/Dockerfile.coordinator --build-context base=docker-image://integr8scode-base:latest ./backend - docker build -t integr8scode-k8s-worker:latest -f backend/workers/Dockerfile.k8s_worker --build-context base=docker-image://integr8scode-base:latest ./backend - docker build -t integr8scode-pod-monitor:latest -f backend/workers/Dockerfile.pod_monitor --build-context base=docker-image://integr8scode-base:latest ./backend - docker build -t integr8scode-result-processor:latest -f backend/workers/Dockerfile.result_processor --build-context base=docker-image://integr8scode-base:latest ./backend - docker build -t integr8scode-saga-orchestrator:latest -f backend/workers/Dockerfile.saga_orchestrator --build-context base=docker-image://integr8scode-base:latest ./backend - + docker build -t integr8scode-coordinator:latest --build-context base=docker-image://integr8scode-base:latest -f backend/workers/Dockerfile.coordinator ./backend + docker build -t integr8scode-k8s-worker:latest --build-context base=docker-image://integr8scode-base:latest -f backend/workers/Dockerfile.k8s_worker ./backend + docker build -t integr8scode-pod-monitor:latest --build-context base=docker-image://integr8scode-base:latest -f backend/workers/Dockerfile.pod_monitor ./backend + docker build -t integr8scode-result-processor:latest --build-context base=docker-image://integr8scode-base:latest -f backend/workers/Dockerfile.result_processor ./backend + docker build -t integr8scode-saga-orchestrator:latest --build-context base=docker-image://integr8scode-base:latest -f backend/workers/Dockerfile.saga_orchestrator ./backend + docker build -t integr8scode-event-replay:latest --build-context base=docker-image://integr8scode-base:latest -f backend/workers/Dockerfile.event_replay ./backend + docker build -t integr8scode-dlq-processor:latest --build-context base=docker-image://integr8scode-base:latest -f backend/workers/Dockerfile.dlq_processor ./backend + + # ── Utility images (GHA-cached, independent of base) ──────────── - name: Build cert-generator image uses: docker/build-push-action@v6 with: @@ -158,6 +192,17 @@ jobs: cache-from: type=gha,scope=cert-generator cache-to: type=gha,mode=max,scope=cert-generator + - name: Build zookeeper-certgen image + uses: docker/build-push-action@v6 + with: + context: ./backend/zookeeper + file: ./backend/zookeeper/Dockerfile.certgen + load: true + tags: integr8scode-zookeeper-certgen:latest + cache-from: type=gha,scope=zookeeper-certgen + cache-to: type=gha,mode=max,scope=zookeeper-certgen + + # ── Frontend (dev for E2E, prod for scanning/deployment) ───────── - name: Build frontend image uses: docker/build-push-action@v6 with: @@ -168,61 +213,77 @@ jobs: cache-from: type=gha,scope=frontend cache-to: type=gha,mode=max,scope=frontend - - name: Save all images - run: | - docker save \ - integr8scode-backend:latest \ - integr8scode-coordinator:latest \ - integr8scode-k8s-worker:latest \ - integr8scode-pod-monitor:latest \ - integr8scode-result-processor:latest \ - integr8scode-saga-orchestrator:latest \ - integr8scode-cert-generator:latest \ - integr8scode-frontend:latest \ - | zstd -T0 -3 > /tmp/all-images.tar.zst - - - name: Upload images artifact - uses: actions/upload-artifact@v6 + - name: Build frontend-prod image + uses: docker/build-push-action@v6 with: - name: docker-images - path: /tmp/all-images.tar.zst - retention-days: 1 - - # Parallel test jobs (backend-e2e, frontend-e2e) + context: ./frontend + file: ./frontend/Dockerfile.prod + load: true + tags: integr8scode-frontend-prod:latest + cache-from: type=gha,scope=frontend-prod + cache-to: type=gha,mode=max,scope=frontend-prod + + # ── Push all images to GHCR in parallel ──────────────────────── + - name: Push all images to GHCR + if: ${{ !github.event.pull_request.head.repo.fork }} + env: + TAG: ${{ steps.tags.outputs.sha-tag }} + IMG: ${{ env.REGISTRY }}/${{ steps.tags.outputs.image-prefix }} + run: | + # Tag all images for GHCR + docker tag integr8scode-base:latest "$IMG/base:$TAG" + docker tag integr8scode-backend:latest "$IMG/backend:$TAG" + docker tag integr8scode-coordinator:latest "$IMG/coordinator:$TAG" + docker tag integr8scode-k8s-worker:latest "$IMG/k8s-worker:$TAG" + docker tag integr8scode-pod-monitor:latest "$IMG/pod-monitor:$TAG" + docker tag integr8scode-result-processor:latest "$IMG/result-processor:$TAG" + docker tag integr8scode-saga-orchestrator:latest "$IMG/saga-orchestrator:$TAG" + docker tag integr8scode-event-replay:latest "$IMG/event-replay:$TAG" + docker tag integr8scode-dlq-processor:latest "$IMG/dlq-processor:$TAG" + docker tag integr8scode-cert-generator:latest "$IMG/cert-generator:$TAG" + docker tag integr8scode-zookeeper-certgen:latest "$IMG/zookeeper-certgen:$TAG" + docker tag integr8scode-frontend:latest "$IMG/frontend-dev:$TAG" + docker tag integr8scode-frontend-prod:latest "$IMG/frontend:$TAG" + + # Push all 13 images in parallel, tracking each PID + declare -A PIDS + for name in base backend coordinator k8s-worker pod-monitor \ + result-processor saga-orchestrator event-replay \ + dlq-processor cert-generator zookeeper-certgen \ + frontend-dev frontend; do + docker push "$IMG/$name:$TAG" & + PIDS[$name]=$! + done + + FAILED=0 + for name in "${!PIDS[@]}"; do + if ! wait "${PIDS[$name]}"; then + echo "::error::Failed to push $name" + FAILED=1 + fi + done + [ "$FAILED" -eq 0 ] || exit 1 + + # Parallel E2E test jobs — compose pulls from GHCR using IMAGE_TAG backend-e2e: name: Backend E2E Tests needs: [build-images] + if: ${{ !github.event.pull_request.head.repo.fork }} runs-on: ubuntu-latest + permissions: + contents: read + packages: read steps: - uses: actions/checkout@v6 - - name: Cache and load Docker images - uses: ./.github/actions/docker-cache + - uses: ./.github/actions/e2e-boot with: - images: ${{ env.MONGO_IMAGE }} ${{ env.REDIS_IMAGE }} ${{ env.KAFKA_IMAGE }} ${{ env.ZOOKEEPER_IMAGE }} ${{ env.SCHEMA_REGISTRY_IMAGE }} + image-tag: ${{ needs.build-images.outputs.sha-tag }} + github-token: ${{ secrets.GITHUB_TOKEN }} - - name: Download built images - uses: actions/download-artifact@v7 + - uses: ./.github/actions/e2e-ready with: - name: docker-images - path: /tmp - - - name: Load built images - run: zstd -d -c /tmp/all-images.tar.zst | docker load - - - name: Setup k3s - uses: ./.github/actions/k3s-setup - - - name: Use test environment config - run: | - cp backend/config.test.toml backend/config.toml - cp backend/secrets.example.toml backend/secrets.toml - - - name: Start stack - run: ./deploy.sh dev --wait - - - name: Seed test users - run: docker compose exec -T backend uv run python scripts/seed_users.py + image-tag: ${{ needs.build-images.outputs.sha-tag }} - name: Run E2E tests timeout-minutes: 15 @@ -253,19 +314,11 @@ jobs: run: | mkdir -p logs docker compose logs --timestamps > logs/docker-compose.log 2>&1 - docker compose logs --timestamps backend > logs/backend.log 2>&1 - docker compose logs --timestamps mongo > logs/mongo.log 2>&1 || true - docker compose logs --timestamps redis > logs/redis.log 2>&1 || true - docker compose logs --timestamps kafka > logs/kafka.log 2>&1 || true - docker compose logs --timestamps zookeeper > logs/zookeeper.log 2>&1 || true - docker compose logs --timestamps schema-registry > logs/schema-registry.log 2>&1 || true - docker compose logs --timestamps coordinator > logs/coordinator.log 2>&1 || true - docker compose logs --timestamps k8s-worker > logs/k8s-worker.log 2>&1 || true - docker compose logs --timestamps pod-monitor > logs/pod-monitor.log 2>&1 || true - docker compose logs --timestamps result-processor > logs/result-processor.log 2>&1 || true - docker compose logs --timestamps saga-orchestrator > logs/saga-orchestrator.log 2>&1 || true - docker compose logs --timestamps event-replay > logs/event-replay.log 2>&1 || true - docker compose logs --timestamps dlq-processor > logs/dlq-processor.log 2>&1 || true + for svc in backend mongo redis kafka zookeeper schema-registry \ + coordinator k8s-worker pod-monitor result-processor \ + saga-orchestrator event-replay dlq-processor; do + docker compose logs --timestamps "$svc" > "logs/$svc.log" 2>&1 || true + done kubectl get events --sort-by='.metadata.creationTimestamp' -A > logs/k8s-events.log 2>&1 || true - name: Upload logs @@ -276,12 +329,28 @@ jobs: path: logs/ frontend-e2e: - name: Frontend E2E Tests + name: Frontend E2E (${{ matrix.shardIndex }}/${{ matrix.shardTotal }}) needs: [build-images] + if: ${{ !github.event.pull_request.head.repo.fork }} runs-on: ubuntu-latest + permissions: + contents: read + packages: read + strategy: + fail-fast: false + matrix: + shardIndex: [1, 2] + shardTotal: [2] steps: - uses: actions/checkout@v6 + # Phase 1: kick off image pull + infra + k3s in background + - uses: ./.github/actions/e2e-boot + with: + image-tag: ${{ needs.build-images.outputs.sha-tag }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + # Phase 2: Node + Playwright setup (overlaps with k3s boot + image pull) - name: Setup Node.js uses: actions/setup-node@v6 with: @@ -309,57 +378,36 @@ jobs: working-directory: frontend run: npx playwright install chromium - - name: Cache and load Docker images - uses: ./.github/actions/docker-cache - with: - images: ${{ env.MONGO_IMAGE }} ${{ env.REDIS_IMAGE }} ${{ env.KAFKA_IMAGE }} ${{ env.ZOOKEEPER_IMAGE }} ${{ env.SCHEMA_REGISTRY_IMAGE }} - - - name: Download built images - uses: actions/download-artifact@v7 + # Phase 3: finalize k3s + start stack (k3s has been booting since e2e-boot) + - uses: ./.github/actions/e2e-ready with: - name: docker-images - path: /tmp - - - name: Load built images - run: zstd -d -c /tmp/all-images.tar.zst | docker load - - - name: Setup k3s - uses: ./.github/actions/k3s-setup - - - name: Use test environment config - run: | - cp backend/config.test.toml backend/config.toml - cp backend/secrets.example.toml backend/secrets.toml - - - name: Start stack - run: ./deploy.sh dev --wait - - - name: Seed test users - run: docker compose exec -T backend uv run python scripts/seed_users.py + image-tag: ${{ needs.build-images.outputs.sha-tag }} + wait-for-frontend: 'true' - name: Run Playwright tests timeout-minutes: 10 working-directory: frontend - run: CI=true npx playwright test + run: CI=true npx playwright test --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} - name: Upload Playwright report uses: actions/upload-artifact@v6 if: always() with: - name: playwright-report + name: playwright-report-${{ matrix.shardIndex }} path: frontend/playwright-report/ - name: Collect logs on failure if: failure() run: | mkdir -p logs - docker compose logs > logs/docker-compose.log 2>&1 - docker compose logs backend > logs/backend.log 2>&1 - docker compose logs frontend > logs/frontend.log 2>&1 + docker compose logs --timestamps > logs/docker-compose.log 2>&1 + for svc in backend frontend; do + docker compose logs --timestamps "$svc" > "logs/$svc.log" 2>&1 || true + done - name: Upload logs if: failure() uses: actions/upload-artifact@v6 with: - name: frontend-e2e-logs + name: frontend-e2e-logs-${{ matrix.shardIndex }} path: logs/ diff --git a/deploy.sh b/deploy.sh index f25c480f..a7dc8bec 100755 --- a/deploy.sh +++ b/deploy.sh @@ -56,9 +56,12 @@ show_help() { echo "" echo "Commands:" echo " dev [options] Start full stack (docker-compose)" - echo " --build Rebuild images" + echo " --build Rebuild images locally" + echo " --no-build Use pre-built images only (no build fallback)" echo " --wait Wait for services to be healthy" echo " --timeout Health check timeout (default: 300)" + echo " --observability Include Grafana, Jaeger, etc." + echo " --debug Include observability + Kafdrop" echo " infra [options] Start infrastructure only (mongo, redis, kafka, etc.)" echo " --wait Wait for services to be healthy" echo " --timeout Health check timeout (default: 120)" @@ -97,8 +100,10 @@ cmd_dev() { print_header "Starting Local Development Environment" local BUILD_FLAG="" + local NO_BUILD_FLAG="" local WAIT_FLAG="" local WAIT_TIMEOUT="300" + local PROFILE_FLAGS="" while [[ $# -gt 0 ]]; do case "$1" in @@ -106,6 +111,10 @@ cmd_dev() { BUILD_FLAG="--build" print_info "Rebuilding images..." ;; + --no-build) + NO_BUILD_FLAG="--no-build" + print_info "Using pre-built images (skipping build)..." + ;; --wait) WAIT_FLAG="--wait" ;; @@ -113,6 +122,14 @@ cmd_dev() { shift WAIT_TIMEOUT="$1" ;; + --observability) + PROFILE_FLAGS="--profile observability" + print_info "Including observability stack (Grafana, Jaeger, etc.)" + ;; + --debug) + PROFILE_FLAGS="--profile observability --profile debug" + print_info "Including observability + debug tools (Kafdrop, etc.)" + ;; esac shift done @@ -122,7 +139,7 @@ cmd_dev() { WAIT_TIMEOUT_FLAG="--wait-timeout $WAIT_TIMEOUT" fi - docker compose --profile observability up -d $BUILD_FLAG $WAIT_FLAG $WAIT_TIMEOUT_FLAG + docker compose $PROFILE_FLAGS up -d $BUILD_FLAG $NO_BUILD_FLAG $WAIT_FLAG $WAIT_TIMEOUT_FLAG echo "" print_success "Development environment started!" @@ -130,9 +147,13 @@ cmd_dev() { echo "Services:" echo " Backend: https://localhost:443" echo " Frontend: https://localhost:5001" - echo " Kafdrop: http://localhost:9000" - echo " Jaeger: http://localhost:16686" - echo " Grafana: http://localhost:3000" + if [[ "$PROFILE_FLAGS" == *"debug"* ]]; then + echo " Kafdrop: http://localhost:9000" + fi + if [[ "$PROFILE_FLAGS" == *"observability"* ]]; then + echo " Jaeger: http://localhost:16686" + echo " Grafana: http://localhost:3000" + fi echo "" echo "Commands:" echo " ./deploy.sh logs # View all logs" diff --git a/docker-compose.yaml b/docker-compose.yaml index bbbb40a1..80c215c8 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,20 +1,21 @@ services: # Shared base image for all Python backend services base: + image: ghcr.io/hardmax71/integr8scode/base:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: Dockerfile.base - image: integr8scode-base:latest shared-ca: image: alpine:latest volumes: - shared_ca:/shared_ca - command: sh -c "mkdir -p /shared_ca && chmod 777 /shared_ca && echo 'Shared CA directory ready' && sleep 2" + command: sh -c "mkdir -p /shared_ca && chmod 777 /shared_ca && echo 'Shared CA directory ready'" networks: - app-network cert-generator: + image: ghcr.io/hardmax71/integr8scode/cert-generator:${IMAGE_TAG:-latest} build: context: ./cert-generator dockerfile: Dockerfile @@ -57,10 +58,10 @@ services: hard: 65536 healthcheck: test: echo 'db.runCommand("ping").ok' | mongosh localhost/integr8scode -u ${MONGO_ROOT_USER:-root} -p ${MONGO_ROOT_PASSWORD:-rootpassword} --authenticationDatabase admin --quiet - interval: 5s + interval: 3s timeout: 5s - retries: 10 - start_period: 10s + retries: 15 + start_period: 5s redis: image: redis:7-alpine @@ -74,12 +75,13 @@ services: - app-network healthcheck: test: ["CMD", "redis-cli", "ping"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 10s + interval: 2s + timeout: 3s + retries: 10 + start_period: 2s backend: + image: ghcr.io/hardmax71/integr8scode/backend:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: Dockerfile @@ -120,14 +122,14 @@ services: extra_hosts: - "host.docker.internal:host-gateway" healthcheck: - # Simpler, reliable healthcheck: curl fails non-zero for HTTP >=400 with -f test: ["CMD-SHELL", "curl -k -f -s https://localhost:443/api/v1/health/live >/dev/null || exit 1"] - interval: 3s + interval: 2s timeout: 3s - retries: 50 - start_period: 10s + retries: 30 + start_period: 3s frontend: + image: ghcr.io/hardmax71/integr8scode/frontend-dev:${IMAGE_TAG:-latest} container_name: frontend build: context: ./frontend @@ -136,7 +138,7 @@ services: cert-generator: condition: service_completed_successfully backend: - condition: service_healthy + condition: service_started volumes: - ./frontend:/app - /app/node_modules @@ -151,10 +153,10 @@ services: - NODE_EXTRA_CA_CERTS=/shared_ca/mkcert-ca.pem healthcheck: test: ["CMD-SHELL", "curl -k -f -s https://localhost:5001 >/dev/null || exit 1"] - interval: 3s + interval: 2s timeout: 3s retries: 30 - start_period: 10s + start_period: 3s grafana: @@ -176,6 +178,7 @@ services: # Kafka Infrastructure for Event-Driven Design # Certificate generator for Zookeeper/Kafka SSL zookeeper-certgen: + image: ghcr.io/hardmax71/integr8scode/zookeeper-certgen:${IMAGE_TAG:-latest} build: context: ./backend/zookeeper dockerfile: Dockerfile.certgen @@ -258,10 +261,10 @@ services: hard: 65536 healthcheck: test: ["CMD-SHELL", "echo ruok | nc localhost 2181 | grep imok"] - interval: 5s + interval: 3s timeout: 5s - retries: 10 - start_period: 10s + retries: 15 + start_period: 5s kafka: image: confluentinc/cp-kafka:7.8.2 @@ -319,10 +322,10 @@ services: hard: 65536 healthcheck: test: ["CMD-SHELL", "kafka-broker-api-versions --bootstrap-server localhost:9092"] - interval: 5s + interval: 3s timeout: 10s - retries: 12 - start_period: 15s + retries: 15 + start_period: 3s schema-registry: image: confluentinc/cp-schema-registry:7.8.2 @@ -340,14 +343,15 @@ services: - app-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8081/config"] - interval: 5s + interval: 3s timeout: 5s - retries: 10 - start_period: 10s + retries: 15 + start_period: 5s kafdrop: image: obsidiandynamics/kafdrop:3.31.0 container_name: kafdrop + profiles: ["debug"] depends_on: - kafka - schema-registry @@ -362,6 +366,7 @@ services: # Kafka topic initialization kafka-init: + image: ghcr.io/hardmax71/integr8scode/backend:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: Dockerfile @@ -388,6 +393,7 @@ services: # Seed default users (runs once after mongo is ready) user-seed: + image: ghcr.io/hardmax71/integr8scode/backend:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: Dockerfile @@ -412,6 +418,7 @@ services: # Event-driven workers coordinator: + image: ghcr.io/hardmax71/integr8scode/coordinator:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: workers/Dockerfile.coordinator @@ -436,6 +443,7 @@ services: restart: unless-stopped k8s-worker: + image: ghcr.io/hardmax71/integr8scode/k8s-worker:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: workers/Dockerfile.k8s_worker @@ -463,6 +471,7 @@ services: restart: unless-stopped pod-monitor: + image: ghcr.io/hardmax71/integr8scode/pod-monitor:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: workers/Dockerfile.pod_monitor @@ -488,6 +497,7 @@ services: restart: unless-stopped result-processor: + image: ghcr.io/hardmax71/integr8scode/result-processor:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: workers/Dockerfile.result_processor @@ -515,6 +525,7 @@ services: restart: unless-stopped saga-orchestrator: + image: ghcr.io/hardmax71/integr8scode/saga-orchestrator:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: workers/Dockerfile.saga_orchestrator @@ -560,6 +571,7 @@ services: # Event replay service event-replay: + image: ghcr.io/hardmax71/integr8scode/event-replay:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: workers/Dockerfile.event_replay @@ -586,6 +598,7 @@ services: # DLQ Processor Service dlq-processor: + image: ghcr.io/hardmax71/integr8scode/dlq-processor:${IMAGE_TAG:-latest} build: context: ./backend dockerfile: workers/Dockerfile.dlq_processor diff --git a/docs/operations/cicd.md b/docs/operations/cicd.md index 54ff0130..0db2fee6 100644 --- a/docs/operations/cicd.md +++ b/docs/operations/cicd.md @@ -1,41 +1,40 @@ # CI/CD Pipeline -The project uses GitHub Actions to automate code quality checks, security scanning, testing, and documentation -deployment. Every push to `main` or `dev` and every pull request triggers the pipeline, with workflows running in -parallel to provide fast feedback. +The project uses GitHub Actions to automate code quality checks, security scanning, testing, image publishing, and +documentation deployment. The pipeline is split across several workflow files that trigger independently based on path +filters, so only relevant checks run for each change. ## Pipeline overview ```mermaid graph LR - subgraph "Code Quality" + subgraph "Code Quality (lightweight)" Ruff["Ruff Linting"] MyPy["MyPy Type Check"] - ESLint["ESLint + TypeScript"] + ESLint["ESLint + Svelte Check"] end subgraph "Security" Bandit["Bandit SAST"] + SBOM["SBOM & Grype"] end - subgraph "Docker Build & Scan" - Base["Build Base"] - Backend["Build Backend"] - Frontend["Build Frontend"] - ScanBE["Scan Backend"] - ScanFE["Scan Frontend"] - Base --> Backend - Base --> Frontend - Backend --> ScanBE - Frontend --> ScanFE - end - - subgraph "Testing (stack-tests.yml)" + subgraph "Stack Tests" UnitBE["Backend Unit"] UnitFE["Frontend Unit"] - Stack["Stack Tests"] - UnitBE --> Stack - UnitFE --> Stack + Build["Build & Push Images"] + E2E_BE["Backend E2E"] + E2E_FE["Frontend E2E"] + UnitBE --> Build + UnitFE --> Build + Build --> E2E_BE + Build --> E2E_FE + end + + subgraph "Docker Scan & Promote" + Scan["Trivy Scan (12 images)"] + Promote["Promote SHA → latest"] + Scan --> Promote end subgraph "Documentation" @@ -43,143 +42,293 @@ graph LR Pages["GitHub Pages"] end - Push["Push / PR"] --> Ruff - Push --> MyPy - Push --> ESLint - Push --> Bandit - Push --> Base - Push --> UnitBE - Push --> UnitFE - Push --> Docs + Push["Push / PR"] --> Ruff & MyPy & ESLint & Bandit & SBOM & UnitBE & UnitFE & Docs + Build -->|main, all tests pass| Scan Docs -->|main only| Pages ``` -All workflows trigger on pushes to `main` and `dev` branches, pull requests against those branches, and can be triggered -manually via `workflow_dispatch`. Path filters ensure workflows only run when relevant files change. - -## Linting and type checking - -Three lightweight workflows run first since they catch obvious issues quickly. - -**Backend (Python):** -- [Ruff](https://docs.astral.sh/ruff/) checks for style violations, import ordering, and common bugs -- [mypy](https://mypy.readthedocs.io/) with strict settings catches type mismatches and missing return types - -**Frontend (TypeScript):** -- ESLint checks for code quality issues -- TypeScript compiler (`tsc --noEmit`) verifies type correctness - -Both use dependency caching to skip reinstallation when lockfiles haven't changed. - -## Security scanning - -The security workflow uses [Bandit](https://bandit.readthedocs.io/) to perform static analysis on Python source files, -flagging issues like hardcoded credentials, SQL injection patterns, and unsafe deserialization. It excludes the test -directory and reports only medium-severity and above findings. Container-level vulnerability scanning with Trivy runs -as part of the Docker workflow. +The two heavyweight workflows are **Stack Tests** (builds images, runs all tests) and **Docker Scan & Promote** +(scans images with Trivy and promotes to `latest`). They're connected: Docker Scan & Promote triggers automatically +after Stack Tests succeeds on `main`, forming a build-test-scan-promote pipeline where the `latest` tag only moves +forward when everything passes. -## Docker build and scan - -The Docker workflow is structured as multiple jobs with dependencies, enabling parallel execution and early failure -detection. If any job fails, dependent jobs are skipped immediately. - -```mermaid -graph TD - A[build-base] --> B[build-backend] - A --> C[build-frontend] - B --> D[scan-backend] - C --> E[scan-frontend] - D --> F[summary] - E --> F - - style A fill:#e1f5fe - style B fill:#fff3e0 - style C fill:#fff3e0 - style D fill:#ffebee - style E fill:#ffebee - style F fill:#e8f5e9 -``` +## Workflow files -| Job | Depends On | Purpose | -|------------------|------------------|------------------------------------------------------| -| `build-base` | - | Build shared base image with Python and dependencies | -| `build-backend` | `build-base` | Build backend image using base as build context | -| `build-frontend` | `build-base` | Build frontend image (runs parallel with backend) | -| `scan-backend` | `build-backend` | Trivy vulnerability scan on backend image | -| `scan-frontend` | `build-frontend` | Trivy vulnerability scan on frontend image | -| `summary` | All scans | Generate summary (main branch only) | +| Workflow | File | Trigger | Purpose | +|-------------------------|----------------------------------------------|-----------------------------------------------|--------------------------------------------| +| Stack Tests | `.github/workflows/stack-tests.yml` | Push/PR to `main`, tags `v*` | Unit tests, image build, E2E tests | +| Docker Scan & Promote | `.github/workflows/docker.yml` | After Stack Tests completes on `main` | Trivy scan + promote SHA tag to `latest` | +| SBOM & Supply Chain | `.github/workflows/sbom-compliance.yml` | Push/PR to `main`, weekly schedule | SPDX SBOM generation + Grype vulnerability scan | +| Ruff Linting | `.github/workflows/ruff.yml` | Push/PR to `main` | Python code style and import checks | +| MyPy Type Checking | `.github/workflows/mypy.yml` | Push/PR to `main` | Python static type analysis | +| Frontend CI | `.github/workflows/frontend-ci.yml` | Push/PR to `main` (frontend changes) | ESLint + Svelte type check | +| Security Scanning | `.github/workflows/security.yml` | Push/PR to `main` | Bandit SAST | +| Documentation | `.github/workflows/docs.yml` | Push/PR (`docs/`, `mkdocs.yml`) | MkDocs build and GitHub Pages deploy | -### Base image +## Composite actions -The base image (`Dockerfile.base`) contains Python, system dependencies, and all pip packages. It -uses [uv](https://docs.astral.sh/uv/) to install dependencies from the lockfile with `uv sync --locked --no-dev`, -ensuring reproducible builds without development tools. +Shared steps are extracted into reusable composite actions under `.github/actions/`. This eliminates duplication between +the backend and frontend E2E jobs, which both need k3s and the full docker compose stack but set it up differently. -### Security scanning +| Action | File | Purpose | +|-------------------------|----------------------------------------------|--------------------------------------------| +| E2E Boot | `.github/actions/e2e-boot/action.yml` | GHCR login, background image pull + infra pre-warm, k3s install | +| E2E Ready | `.github/actions/e2e-ready/action.yml` | Finalize k3s, start compose stack, health checks, seed users | -After each image builds, [Trivy](https://trivy.dev/) scans it for known vulnerabilities in OS packages and Python -dependencies. The scan fails if it finds any critical or high severity issues with available fixes. +The split is intentional. Frontend E2E needs to install Node.js and Playwright browsers _between_ boot and ready, +overlapping that work with k3s installation to save wall-clock time. Backend E2E calls them back-to-back since it has +no setup to overlap. -## Stack tests (unified testing) +## Stack Tests (the main workflow) -The `stack-tests.yml` workflow consolidates all testing that requires infrastructure into a single job, avoiding -redundant stack setup across multiple jobs. +This is the core testing workflow. It builds all 13 container images, pushes them to GHCR with immutable SHA-based +tags, then runs E2E tests on separate runners that pull images from the registry. ```mermaid graph TD - subgraph "Parallel (fast)" - A[Backend Unit Tests] - B[Frontend Unit Tests] - end - - subgraph "Build" - C[Build Images] + subgraph "Phase 1: Fast feedback" + A["Backend Unit Tests"] + B["Frontend Unit Tests"] end - subgraph "Backend E2E (own runner)" - D1[Setup k3s + Stack] - E[Backend E2E Tests] - D1 --> E + subgraph "Phase 2: Build" + C["Build & Push 13 Images to GHCR"] end - subgraph "Frontend E2E (own runner)" - D2[Setup k3s + Stack] - F[Frontend E2E Tests] - D2 --> F + subgraph "Phase 3: E2E (parallel runners)" + D["Backend E2E
(k3s + full stack)"] + E["Frontend E2E Shard 1/2
(k3s + Playwright)"] + F["Frontend E2E Shard 2/2
(k3s + Playwright)"] end A --> C B --> C - C --> D1 - C --> D2 + C --> D & E & F style A fill:#e8f5e9 style B fill:#e8f5e9 style C fill:#e1f5fe - style D1 fill:#e1f5fe - style D2 fill:#e1f5fe + style D fill:#fff3e0 style E fill:#fff3e0 style F fill:#fff3e0 ``` -### Test execution order +### Phase 1: Unit tests + +Backend and frontend unit tests run in parallel. They need no infrastructure and complete quickly. If either fails, +the image build is skipped entirely. -1. **Unit tests (parallel)**: Backend and frontend unit tests run simultaneously. They require no infrastructure and - complete quickly (~1-2 min each). +### Phase 2: Build and push + +All 13 images are built on a single runner and pushed to GHCR with an immutable `sha-<7chars>` tag: + +| Image | Source | +|----------------------|---------------------------------------------| +| `base` | `backend/Dockerfile.base` | +| `backend` | `backend/Dockerfile` | +| `coordinator` | `backend/workers/Dockerfile.coordinator` | +| `k8s-worker` | `backend/workers/Dockerfile.k8s_worker` | +| `pod-monitor` | `backend/workers/Dockerfile.pod_monitor` | +| `result-processor` | `backend/workers/Dockerfile.result_processor` | +| `saga-orchestrator` | `backend/workers/Dockerfile.saga_orchestrator` | +| `event-replay` | `backend/workers/Dockerfile.event_replay` | +| `dlq-processor` | `backend/workers/Dockerfile.dlq_processor` | +| `cert-generator` | `cert-generator/Dockerfile` | +| `zookeeper-certgen` | `backend/zookeeper/Dockerfile.certgen` | +| `frontend-dev` | `frontend/Dockerfile` | +| `frontend` | `frontend/Dockerfile.prod` | + +Of these 13 images, 12 are scanned by Trivy and promoted to `latest` in the +[Docker Scan & Promote](#docker-scan--promote) workflow. The `frontend-dev` image is excluded — it's the Vite dev +server build used only for E2E tests in CI and is never deployed to production. + +The base image is cached separately as a zstd-compressed tarball since its dependencies rarely change. Worker images +depend on it via `--build-context base=docker-image://integr8scode-base:latest`. Utility and frontend images use GHA +layer caching. + +All 13 images are pushed to GHCR in parallel, with each push tracked by PID so individual failures are reported: + +```yaml +declare -A PIDS +for name in base backend coordinator k8s-worker ...; do + docker push "$IMG/$name:$TAG" & + PIDS[$name]=$! +done +FAILED=0 +for name in "${!PIDS[@]}"; do + if ! wait "${PIDS[$name]}"; then + echo "::error::Failed to push $name" + FAILED=1 + fi +done +[ "$FAILED" -eq 0 ] || exit 1 +``` -2. **Image build**: After unit tests pass, all Docker images are built with GHA layer caching. +Fork PRs skip the GHCR push (no write access), so E2E tests only run for non-fork PRs. -3. **E2E tests (parallel)**: Backend and frontend E2E tests run in parallel on separate runners, each with its own - isolated stack (k3s + docker compose): - - Backend E2E tests (pytest with k8s) - - Frontend E2E tests (Playwright) +### Phase 3: E2E tests + +Backend and frontend E2E tests run on separate runners. Each runner provisions its own k3s cluster and docker compose +stack, pulling pre-built images from GHCR. + +#### E2E Boot (`.github/actions/e2e-boot`) + +This action kicks off three slow tasks that can overlap: + +1. **GHCR login** using `docker/login-action@v3` +2. **Background image pull + infra pre-warm** — pulls all compose images then starts infrastructure services + (mongo, redis, kafka, zookeeper, schema-registry) in a background `nohup` process. The exit status is persisted + to `/tmp/infra-pull.exit` so the next action can check for failures. +3. **k3s install** — downloads and installs a pinned k3s version with SHA256 checksum verification (see + [supply-chain hardening](#supply-chain-hardening) below) + +#### E2E Ready (`.github/actions/e2e-ready`) + +This action finalizes the environment after boot tasks complete: + +1. **Finalize k3s** — copies kubeconfig, rewrites the API server address to `host.docker.internal` so containers + inside docker compose can reach the k3s API server, creates the `integr8scode` namespace +2. **Start cert-generator** in the background +3. **Copy test config** — uses `config.test.toml` and `secrets.example.toml` +4. **Wait for image pull and infra** — blocks until the background pull completes and checks the exit code from + `/tmp/infra-pull.exit`, failing fast if the background process had errors +5. **Start compose stack** with `docker compose up -d --no-build` +6. **Health checks** — waits for backend (`/api/v1/health/live`), and optionally frontend (`https://localhost:5001`) +7. **Seed test users** via `scripts/seed_users.py` + +#### Frontend E2E sharding + +Frontend E2E tests use Playwright with 2 shards running in parallel on separate runners. Between `e2e-boot` and +`e2e-ready`, each shard installs Node.js dependencies and Playwright browsers (with caching), overlapping that work +with k3s booting in the background. + +``` +e2e-boot (GHCR login + pull + k3s install) + | + ├── npm ci + playwright install (overlapped with k3s) + | +e2e-ready (finalize k3s + start stack + health check) + | + └── npx playwright test --shard=N/2 +``` ### Coverage reporting -Each test suite reports coverage to [Codecov](https://codecov.io/): -- `backend-unit` flag for unit tests -- `backend-e2e` flag for E2E tests -- `frontend-unit` flag for frontend unit tests +Each test suite reports coverage to [Codecov](https://codecov.io/) with separate flags: + +- `backend-unit` — backend unit tests +- `backend-e2e` — backend E2E tests +- `frontend-unit` — frontend unit tests (Vitest with `lcov` output) + +### Log collection on failure + +When E2E tests fail, logs are collected automatically and uploaded as artifacts: + +- All docker compose service logs with timestamps +- Individual service logs for each worker +- Kubernetes events sorted by timestamp (backend E2E only) + +## Docker Scan & Promote + +This workflow implements the promotion model: the `latest` tag is never set during the build. Only this workflow +sets it, and only after all tests pass. + +```mermaid +graph LR + ST["Stack Tests
(main, success)"] -->|workflow_run trigger| Scan + Scan["Trivy Scan
(12 images in parallel)"] --> Promote["crane copy
sha-xxx → latest"] + Promote --> Summary["Step Summary"] +``` + +### Trigger + +Runs automatically when `Stack Tests` completes successfully on `main`. Can also be triggered manually via +`workflow_dispatch` with an optional SHA input to promote a specific commit. + +### Scan + +Uses [Trivy](https://trivy.dev/) (pinned at `v0.68.2`) to scan all 12 deployed images in parallel via matrix strategy. +Scans for `CRITICAL` and `HIGH` severity vulnerabilities with unfixed issues ignored. Results are uploaded as SARIF +files to GitHub's Security tab. + +### Promote + +Uses [crane](https://github.com/google/go-containerregistry/blob/main/cmd/crane/README.md) to copy manifests at the +registry level (`crane copy sha-tag latest`), avoiding any rebuild or re-push. This is a fast, atomic operation that +simply re-tags existing image manifests. + +## SBOM & Supply Chain Security + +The `sbom-compliance.yml` workflow generates [SPDX](https://spdx.dev/) Software Bills of Materials for both backend +(Python) and frontend (JavaScript) components. It runs on every push/PR to `main` and weekly on a schedule. + +For each component: + +1. **Generate SBOM** using [anchore/sbom-action](https://github.com/anchore/sbom-action) — produces an SPDX JSON file + listing all direct and transitive dependencies +2. **Scan SBOM** using [anchore/scan-action](https://github.com/anchore/scan-action) (Grype) — checks for known + vulnerabilities with a `high` severity cutoff +3. **Upload** — SBOM artifacts are retained for 5 days; vulnerability results are uploaded as SARIF to GitHub's + Security tab + +## Supply-chain hardening + +### k3s version pinning and checksum verification + +The k3s installation in CI is hardened against supply-chain attacks: + +1. **Pinned version** — `K3S_VERSION` is set as a workflow-level env var (`v1.32.11+k3s1`), not fetched dynamically +2. **Source pinning** — the install script is fetched from the k3s GitHub repository at the exact tagged version + (e.g., `https://raw.githubusercontent.com/k3s-io/k3s/v1.32.11%2Bk3s1/install.sh`), not from the `get.k3s.io` CDN +3. **SHA256 verification** — the install script is verified against a known checksum before execution: + +```bash +K3S_TAG=$(echo "$K3S_VERSION" | sed 's/+/%2B/g') +curl -sfL "https://raw.githubusercontent.com/k3s-io/k3s/${K3S_TAG}/install.sh" -o /tmp/k3s-install.sh +echo "$K3S_INSTALL_SHA256 /tmp/k3s-install.sh" | sha256sum -c - +chmod +x /tmp/k3s-install.sh +INSTALL_K3S_VERSION="$K3S_VERSION" ... /tmp/k3s-install.sh +``` + +This prevents the common `curl | sh` anti-pattern where a compromised CDN or MITM could inject malicious code. + +### GHCR image tags + +Images are tagged with `sha-<7chars>` (immutable, tied to a specific commit) during build. The `latest` tag is only +applied by the Docker Scan & Promote workflow after all tests and security scans pass. This means: + +- Every E2E test runs against exactly the images built from that commit +- `latest` is never stale or untested +- Any commit's images can be pulled by their SHA tag for debugging + +### Dependency pinning + +All GitHub Actions are pinned to major versions (e.g., `actions/checkout@v6`, `docker/build-push-action@v6`). Trivy is +pinned to a specific version (`aquasecurity/trivy-action@0.33.1`) for scan reproducibility. + +## Linting and type checking + +Three lightweight workflows run independently since they catch obvious issues quickly. + +**Backend (Python):** + +- [Ruff](https://docs.astral.sh/ruff/) checks for style violations, import ordering, and common bugs +- [mypy](https://mypy.readthedocs.io/) with strict settings catches type mismatches and missing return types + +**Frontend (TypeScript/Svelte):** + +- ESLint checks for code quality issues +- `svelte-check` verifies TypeScript types and Svelte component correctness + +Both use dependency caching ([uv](https://docs.astral.sh/uv/) for Python, npm for Node.js) to skip reinstallation +when lockfiles haven't changed. + +## Security scanning + +The `security.yml` workflow uses [Bandit](https://bandit.readthedocs.io/) to perform static analysis on Python source +files, flagging issues like hardcoded credentials, SQL injection patterns, and unsafe deserialization. It excludes the +test directory and reports only medium-severity and above findings. Container-level vulnerability scanning with Trivy +runs as part of the [Docker Scan & Promote](#docker-scan--promote) workflow. ## Documentation @@ -189,6 +338,47 @@ the [Material theme](https://squidfunk.github.io/mkdocs-material/). It triggers On pushes to main, the workflow deploys the built site to GitHub Pages. +## Build optimizations + +### Docker layer caching + +All image builds use [docker/build-push-action](https://github.com/docker/build-push-action) with GitHub Actions +cache. Each service has its own cache scope, preventing pollution between unrelated builds: + +```yaml +- name: Build cert-generator image + uses: docker/build-push-action@v6 + with: + context: ./cert-generator + file: ./cert-generator/Dockerfile + load: true + tags: integr8scode-cert-generator:latest + cache-from: type=gha,scope=cert-generator + cache-to: type=gha,mode=max,scope=cert-generator +``` + +### Base image caching + +The base image (Python + all pip dependencies) changes infrequently, so it's cached as a zstd-compressed tarball keyed +on `Dockerfile.base`, `pyproject.toml`, and `uv.lock`. On cache hit the image is loaded directly with `docker load`, +skipping the entire build. + +### Background infra pre-warm + +The `e2e-boot` action pulls all docker compose images and starts infrastructure services _in the background_ while k3s +installs. This overlaps network-bound (image pull) and CPU-bound (k3s compilation) work, saving several minutes per +E2E job. + +### Frontend Playwright caching + +Playwright browsers are cached by `package-lock.json` hash. On cache hit, only system dependencies are installed +(`playwright install-deps chromium`), skipping the browser download. + +### Parallel image push + +All 13 images are pushed to GHCR concurrently using background processes with PID tracking. Each push failure is +reported individually via `::error::` annotations. + ## Running locally You can run most checks locally before pushing. @@ -197,10 +387,10 @@ You can run most checks locally before pushing. cd backend # Linting -uv run ruff check . +uv run ruff check . --config pyproject.toml # Type checking -uv run mypy . +uv run mypy --config-file pyproject.toml --strict . # Security scan uv tool run bandit -r . -x tests/ -ll @@ -216,76 +406,23 @@ cd frontend npm run lint # Type checking -npx tsc --noEmit +npm run check # Unit tests npm run test ``` -For E2E tests, use the same deployment as CI: +For E2E tests, use the deployment script to bring up the full stack: ```bash -# Start full stack (requires k8s configured locally) -./deploy.sh dev +# Start full stack with k8s configured locally +./deploy.sh dev --wait -# Run tests inside the running backend container +# Run backend E2E tests inside the running container docker compose exec -T backend uv run pytest tests/e2e -v # Run frontend E2E tests cd frontend && npx playwright test ``` -Or use `./deploy.sh test` which handles everything automatically. - -## Build optimizations - -The CI pipeline employs several caching strategies to minimize build times. - -### Docker layer caching - -All image builds use [docker/build-push-action](https://github.com/docker/build-push-action) with GitHub Actions cache: - -```yaml -- name: Build base image - uses: docker/build-push-action@v6 - with: - context: ./backend - file: ./backend/Dockerfile.base - load: true - tags: integr8scode-base:latest - cache-from: type=gha,scope=backend-base - cache-to: type=gha,mode=max,scope=backend-base -``` - -Each service has its own cache scope (`backend-base`, `backend`, `frontend`, `cert-generator`), preventing cache -pollution between unrelated builds. - -### Infrastructure image caching - -A reusable action at `.github/actions/docker-cache` handles infrastructure images (MongoDB, Redis, Kafka, Schema -Registry). It stores pulled images as zstd-compressed tarballs in the GitHub Actions cache, saving ~30 seconds per run -and avoiding Docker Hub rate limits. - -### k3s setup action - -A reusable composite action at `.github/actions/k3s-setup` handles Kubernetes setup: -- Installs k3s with traefik disabled -- Creates the `integr8scode` namespace -- Generates a kubeconfig accessible from Docker containers (via `host.docker.internal`) - -This eliminates copy-paste across workflows and ensures consistent k8s setup. - -## Workflow files - -| Workflow | File | Purpose | -|--------------------|--------------------------------------|------------------------------------| -| Ruff Linting | `.github/workflows/ruff.yml` | Python code style and import checks | -| MyPy Type Checking | `.github/workflows/mypy.yml` | Python static type analysis | -| Frontend CI | `.github/workflows/frontend-ci.yml` | TypeScript lint and type check | -| Security Scanning | `.github/workflows/security.yml` | Bandit SAST | -| Docker Build & Scan| `.github/workflows/docker.yml` | Image build and Trivy scan | -| Stack Tests | `.github/workflows/stack-tests.yml` | All unit and E2E tests | -| Documentation | `.github/workflows/docs.yml` | MkDocs build and deploy | - -All workflows use [uv](https://docs.astral.sh/uv/) for Python dependency management and npm for Node.js, with caching -enabled for both. +Or use `./deploy.sh test` which handles stack setup, testing, and teardown automatically.