hyperdxio · teeohhem · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/.github/workflows/ui-preview-smoke.yml b/.github/workflows/ui-preview-smoke.yml
@@ -64,100 +64,334 @@ jobs:
             core.setOutput('number', String(pr.number));
             core.setOutput('head_sha', pr.head.sha);
 
+      # Cheap pre-flight: parse the PR body for a UI test plan before we
+      # spend ~5 min waiting for Vercel + ~$1 of agent runtime. If the
+      # author didn't fill in `### How to test on Vercel preview`, we post
+      # a skip comment and exit immediately. Without this gate, no-plan
+      # PRs cost the same as full smoke runs.
+      - name: Check for UI test plan
+        id: plan
+        uses: actions/github-script@v9
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('/tmp/pr-body.md', 'utf8');
+
+            const headingMatch = body.match(
+              /^###\s+How to test on Vercel preview\s*$/im,
+            );
+            if (!headingMatch) {
+              core.setOutput('has_plan', 'false');
+              core.notice('No "### How to test on Vercel preview" section.');
+              return;
+            }
+
+            const start = headingMatch.index + headingMatch[0].length;
+            const remainder = body.slice(start);
+            const nextHeading = remainder.match(/^###\s/m);
+            const section = nextHeading
+              ? remainder.slice(0, nextHeading.index)
+              : remainder;
+
+            // Strip HTML comments — both the template explainer and any
+            // placeholder hints like "<!-- e.g. /chart -->" inline.
+            const cleaned = section.replace(/<!--[\s\S]*?-->/g, '');
+            const trimmed = cleaned.trim();
+
+            if (!trimmed) {
+              core.setOutput('has_plan', 'false');
+              core.notice('Section is empty after stripping comments.');
+              return;
+            }
+            if (/^(n\/?a\b|non[-\s]?ui|no[-\s]+ui)/i.test(trimmed)) {
+              core.setOutput('has_plan', 'false');
+              core.notice('Section is marked N/A.');
+              return;
+            }
+
+            const routesMatch = cleaned.match(
+              /\*\*Preview routes:\*\*\s*([^\n]*)/i,
+            );
+            const routes = routesMatch ? routesMatch[1].trim() : '';
+            if (!routes) {
+              core.setOutput('has_plan', 'false');
+              core.notice('"**Preview routes:**" line is empty.');
+              return;
+            }
+
+            const stepsMatch = cleaned.match(/\*\*Steps:\*\*([\s\S]*)/i);
+            const stepsBlock = stepsMatch ? stepsMatch[1] : '';
+            // Need at least one numbered list item with non-whitespace
+            // content after the "1. " marker.
+            const hasStep = /^\s*\d+\.\s+\S/m.test(stepsBlock);
+            if (!hasStep) {
+              core.setOutput('has_plan', 'false');
+              core.notice('No numbered **Steps:** with content.');
+              return;
+            }
+
+            core.setOutput('has_plan', 'true');
+            core.setOutput('routes', routes);
+            core.notice(`UI test plan found. Routes: ${routes}`);
+
+      # Run unconditionally (including when plan-check threw) so the
+      # consolidated infrastructure-failure poster below can update the
+      # sticky comment instead of creating a fresh one each broken run.
+      - name: Find existing smoke comment
+        id: find-comment
+        if: always() && steps.pr.outcome == 'success'
+        uses: peter-evans/find-comment@v4
+        with:
+          issue-number: ${{ steps.pr.outputs.number }}
+          comment-author: github-actions[bot]
+          body-includes: '<!-- ui-preview-smoke -->'
+          direction: last
+
+      # ─── Skip path ────────────────────────────────────────────────────
+      - name: Post skip comment
+        if: steps.plan.outputs.has_plan == 'false'
+        uses: peter-evans/create-or-update-comment@v5
+        with:
+          comment-id: ${{ steps.find-comment.outputs.comment-id }}
+          issue-number: ${{ steps.pr.outputs.number }}
+          edit-mode: replace
+          body: |
+            <!-- ui-preview-smoke -->
+            ## UI Preview Smoke
+
+            Skipped: this PR has no `How to test on Vercel preview` plan.
+            Add `**Preview routes:**` and a numbered `**Steps:**` list to
+            enable automated smoke testing.
+
+      # ─── Full smoke path ──────────────────────────────────────────────
+      # `continue-on-error: true` so a Vercel timeout/deploy-error doesn't
+      # short-circuit the job — downstream steps gate on
+      # `vercel.outcome == 'success'` and the consolidated fallback
+      # poster reports the failure to the PR.
       - name: Wait for Vercel preview
+        if: steps.plan.outputs.has_plan == 'true'
         id: vercel
+        continue-on-error: true
         uses: patrickedqvist/wait-for-vercel-preview@v1.3.1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
           max_timeout: 600
           check_interval: 10
-          # For workflow_dispatch we need to point at the PR head commit.
-          # For pull_request_target the action picks up the PR sha automatically.
 
       - name: Setup Node
+        if:
+          steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
+          'success'
         uses: actions/setup-node@v4
         with:
           node-version: 22
 
       - name: Install Playwright + Chromium
+        if:
+          steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
+          'success'
         run: |
           npm install -g playwright
           playwright install --with-deps chromium
 
-      - name: Run agent against preview
-        uses: anthropics/claude-code-action@v1
-        with:
-          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          mcp_servers: |
-            {
+      # claude-code-action@v1 has no `mcp_servers` input (the action ignores
+      # it and warns at runtime). The supported mechanism is a `.mcp.json`
+      # at the working-directory root, which the action picks up because it
+      # auto-sets `enableAllProjectMcpServers: true` in Claude's settings.
+      #
+      # Parse the Vercel preview URL into a `scheme://host` origin and
+      # pass it to `@playwright/mcp` as `--allowed-origins`. The version
+      # is pinned (not `@latest`) so a future MCP release can't silently
+      # change browser/tool behavior under us. We abort the step if the
+      # URL doesn't match `^https?://[^/]+`, rather than the prior `sed`
+      # which silently passed bogus input through and produced a
+      # malformed `--allowed-origins=` arg.
+      #
+      # Residual: per the package's own README, `--allowed-origins` is
+      # "not a security boundary" — it's a navigation hint, not a
+      # process-level egress control. A determined attacker who lands
+      # arbitrary JS in the preview origin can still issue cross-origin
+      # `fetch`. We accept that as residual; the upstream fix is at the
+      # MCP/browser layer.
+      - name: Write MCP config (Playwright)
+        if:
+          steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
+          'success'
+        env:
+          VERCEL_URL: ${{ steps.vercel.outputs.url }}
+        run: |
+          set -euo pipefail
+          if [[ ! "$VERCEL_URL" =~ ^(https?://[^/]+) ]]; then
+            echo "::error::Vercel URL '$VERCEL_URL' is not a valid origin" >&2
+            exit 1
+          fi
+          ORIGIN="${BASH_REMATCH[1]}"
+          cat > .mcp.json <<EOF
+          {
+            "mcpServers": {
               "playwright": {
                 "command": "npx",
                 "args": [
                   "-y",
-                  "@playwright/mcp@latest",
+                  "@playwright/mcp@0.0.75",
                   "--browser=chromium",
-                  "--headless"
+                  "--headless",
+                  "--allowed-origins=${ORIGIN}"
                 ]
               }
             }
+          }
+          EOF
+
+      - name: Run agent against preview
+        if:
+          steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
+          'success'
+        id: agent
+        continue-on-error: true
+        uses: anthropics/claude-code-action@v1
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
           prompt: |
-            Execute the UI test plan for PR #${{ steps.pr.outputs.number }}
-            on its Vercel preview deploy.
+            Smoke-test PR #${{ steps.pr.outputs.number }} on its Vercel
+            preview deploy.
 
             Preview URL: ${{ steps.vercel.outputs.url }}
             Repo: ${{ github.repository }}
-            PR body: read /tmp/pr-body.md (use the Bash cat tool).
+
+            Read the PR body with: cat /tmp/pr-body.md
+
+            The PR body is guaranteed to contain a
+            "### How to test on Vercel preview" section with a non-empty
+            `**Preview routes:**` line and at least one numbered step
+            (the workflow gates on this before invoking you).
 
             This preview is built in LOCAL_MODE with a pre-configured demo
             ClickHouse connection and otel_logs / otel_traces sources. No
             registration or source setup is needed — open the URL and go.
 
-            Workflow:
-
-            1. Read /tmp/pr-body.md.
-            2. Find the section headed exactly
-               "### How to test on Vercel preview". Within it, parse:
-                 - "**Preview routes:**" line — comma-separated list of paths
-                   (e.g. "/chart, /dashboards/<id>"). Strip whitespace.
-                 - "**Steps:**" — a numbered list of imperative actions.
-            3. If the section is missing, empty, contains only the HTML
-               comment template placeholder, or is marked "N/A" or
-               "non-UI change": post a single PR comment containing exactly
-               the text below, then exit with status 0.
-
-               > <!-- ui-preview-smoke -->
-               > ## UI Preview Smoke
-               >
-               > Skipped: this PR has no `How to test on Vercel preview`
-               > plan. Add `**Preview routes:**` and a numbered `**Steps:**`
-               > list to enable automated smoke testing.
-
-            4. Otherwise, for each Preview route in order:
-                 a. Open `<Preview URL><route>` in the Playwright browser.
+            CRITICAL OUTPUT REQUIREMENTS:
+              1. Return a JSON object with a single "summary" field whose
+                 VALUE is a plain markdown STRING. Do NOT put another JSON
+                 envelope inside the string — that posts raw JSON in the
+                 comment.
+              2. The summary markdown MUST start with EXACTLY these two
+                 lines (the comment marker is required for the workflow to
+                 update the same comment on subsequent runs):
+                   <!-- ui-preview-smoke -->
+                   ## UI Preview Smoke
+              3. Do NOT post comments yourself with `gh` or any other tool.
+                 The workflow posts (or updates) the PR comment using your
+                 `summary` field.
+
+            Procedure:
+
+            1. Read /tmp/pr-body.md and parse the "### How to test on
+               Vercel preview" section:
+                 - "**Preview routes:**" line — comma-separated paths.
+                 - "**Steps:**" — numbered list of imperative actions.
+            2. For each Preview route in order:
+                 a. Open `<Preview URL><route>` via the Playwright MCP
+                    browser tools (mcp__playwright__*).
                  b. Execute the numbered steps verbatim, in order.
                  c. Treat any step beginning with "Verify", "Confirm",
                     "Assert", "Check", or "Ensure" as an assertion. If an
-                    assertion fails, record the failure and continue to the
-                    next route.
-                 d. After each route capture: full-page screenshot, any
-                    console errors at level "error", any 4xx/5xx network
-                    responses, any uncaught exception dialogs.
-            5. Post a single PR comment via the JSON schema below. Use ✅
-               for passed routes, ❌ for any route with at least one failed
-               assertion or runtime error. For every failure, include the
-               step text, what was asserted, and what you observed instead.
+                    assertion fails, record the failure and continue to
+                    the next route.
+                 d. After each route capture: any console errors at level
+                    "error", any 4xx/5xx network responses, any uncaught
+                    exception dialogs.
+            3. Build the summary markdown with one section per route.
+               Use ✅ for routes that passed every assertion, ❌ for any
+               route with a failed assertion, console error, or 5xx
+               response. For each failure include the step text, what was
+               asserted, and what you observed instead.
 
             Constraints:
               - Do not invent steps the author didn't write.
               - Do not exercise routes outside the "Preview routes:" list.
-              - If a step is ambiguous, note the ambiguity in your comment
+              - If a step is ambiguous, note the ambiguity in the summary
                 and proceed with your best interpretation. Never fabricate
                 an assertion that wasn't requested.
-              - Cap total runtime at 8 minutes. If a single step hangs
-                more than 30s, mark it failed and continue.
+              - Cap total runtime at 8 minutes. If a single step hangs more
+                than 30s, mark it failed and continue.
 
           claude_args: |
             --setting-sources user
-            --allowedTools "Bash(cat /tmp/pr-body.md),Bash(gh pr view:*),mcp__playwright__*"
+            --allowedTools "Bash(cat /tmp/pr-body.md),mcp__playwright__*"
             --json-schema '{"type":"object","properties":{"summary":{"type":"string","description":"Complete markdown summary starting with <!-- ui-preview-smoke --> on the first line and ## UI Preview Smoke on the second line"}},"required":["summary"]}'
+
+      # The agent's structured_output is a JSON string. Pull the `summary`
+      # field via jq. Defensive double-unwrap mirrors the workaround in
+      # claude-code-review.yml: the model has been observed to nest its
+      # output as `{"summary":"{\"summary\":\"<markdown>\"}"}`, which would
+      # post raw JSON instead of markdown.
+      # Per-run random heredoc delimiter so attacker-influenced summary
+      # content (the agent's output reflects the PR body, which on a fork
+      # PR is fully attacker-controlled) can't land the literal delimiter
+      # on its own line and inject `name=value` pairs into $GITHUB_OUTPUT.
+      # `set -euo pipefail` plus `|| SUMMARY=''` on the jq parse means
+      # any malformed structured_output yields an empty SUMMARY; the
+      # consolidated fallback poster below picks that up.
+      - name: Extract summary from structured output
+        if:
+          steps.plan.outputs.has_plan == 'true' &&
+          steps.agent.outputs.structured_output != ''
+        id: extract
+        continue-on-error: true
+        env:
+          STRUCTURED_OUTPUT: ${{ steps.agent.outputs.structured_output }}
+        run: |
+          set -euo pipefail
+          SUMMARY="$(printf '%s' "$STRUCTURED_OUTPUT" | jq -r '.summary')" || SUMMARY=''
+          if printf '%s' "$SUMMARY" | jq -e 'type == "object" and has("summary")' >/dev/null 2>&1; then
+            SUMMARY="$(printf '%s' "$SUMMARY" | jq -r '.summary')" || SUMMARY=''
+          fi
+          DELIM="EOF_$(openssl rand -hex 16)"
+          {
+            printf 'summary<<%s\n' "$DELIM"
+            printf '%s\n' "$SUMMARY"
+            printf '%s\n' "$DELIM"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Post or update smoke comment
+        if:
+          steps.plan.outputs.has_plan == 'true' && steps.extract.outputs.summary
+          != ''
+        uses: peter-evans/create-or-update-comment@v5
+        with:
+          comment-id: ${{ steps.find-comment.outputs.comment-id }}
+          issue-number: ${{ steps.pr.outputs.number }}
+          body: ${{ steps.extract.outputs.summary }}
+          edit-mode: replace
+
+      # Consolidated infrastructure-failure poster. Fires when the PR
+      # would otherwise be left with no contextual comment, in any of:
+      #   - plan step itself errored (regex bug, runner exception)
+      #   - has_plan == 'true' but Vercel never produced a usable preview
+      #   - has_plan == 'true' and Vercel succeeded but the agent /
+      #     extract step produced no summary (timeout, malformed JSON,
+      #     missing `.summary` field)
+      # Without this, F4/F5/F6 leave the PR with a red check and either
+      # no comment or a stale prior-run comment.
+      - name: Post infrastructure-failure comment
+        if: |
+          always() && steps.pr.outcome == 'success' && (
+            steps.plan.outcome == 'failure' ||
+            (steps.plan.outputs.has_plan == 'true' &&
+             steps.vercel.outcome != 'success') ||
+            (steps.plan.outputs.has_plan == 'true' &&
+             steps.vercel.outcome == 'success' &&
+             steps.extract.outputs.summary == '')
+          )
+        uses: peter-evans/create-or-update-comment@v5
+        with:
+          comment-id: ${{ steps.find-comment.outputs.comment-id }}
+          issue-number: ${{ steps.pr.outputs.number }}
+          edit-mode: replace
+          body: |
+            <!-- ui-preview-smoke -->
+            ## UI Preview Smoke
+
+            Smoke run did not complete. See [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.