diff --git a/.github/workflows/ui-preview-smoke.yml b/.github/workflows/ui-preview-smoke.yml index a2e4b13e1c..3571b2a56e 100644 --- a/.github/workflows/ui-preview-smoke.yml +++ b/.github/workflows/ui-preview-smoke.yml @@ -64,100 +64,334 @@ jobs: core.setOutput('number', String(pr.number)); core.setOutput('head_sha', pr.head.sha); + # Cheap pre-flight: parse the PR body for a UI test plan before we + # spend ~5 min waiting for Vercel + ~$1 of agent runtime. If the + # author didn't fill in `### How to test on Vercel preview`, we post + # a skip comment and exit immediately. Without this gate, no-plan + # PRs cost the same as full smoke runs. + - name: Check for UI test plan + id: plan + uses: actions/github-script@v9 + with: + script: | + const fs = require('fs'); + const body = fs.readFileSync('/tmp/pr-body.md', 'utf8'); + + const headingMatch = body.match( + /^###\s+How to test on Vercel preview\s*$/im, + ); + if (!headingMatch) { + core.setOutput('has_plan', 'false'); + core.notice('No "### How to test on Vercel preview" section.'); + return; + } + + const start = headingMatch.index + headingMatch[0].length; + const remainder = body.slice(start); + const nextHeading = remainder.match(/^###\s/m); + const section = nextHeading + ? remainder.slice(0, nextHeading.index) + : remainder; + + // Strip HTML comments — both the template explainer and any + // placeholder hints like "" inline. + const cleaned = section.replace(//g, ''); + const trimmed = cleaned.trim(); + + if (!trimmed) { + core.setOutput('has_plan', 'false'); + core.notice('Section is empty after stripping comments.'); + return; + } + if (/^(n\/?a\b|non[-\s]?ui|no[-\s]+ui)/i.test(trimmed)) { + core.setOutput('has_plan', 'false'); + core.notice('Section is marked N/A.'); + return; + } + + const routesMatch = cleaned.match( + /\*\*Preview routes:\*\*\s*([^\n]*)/i, + ); + const routes = routesMatch ? routesMatch[1].trim() : ''; + if (!routes) { + core.setOutput('has_plan', 'false'); + core.notice('"**Preview routes:**" line is empty.'); + return; + } + + const stepsMatch = cleaned.match(/\*\*Steps:\*\*([\s\S]*)/i); + const stepsBlock = stepsMatch ? stepsMatch[1] : ''; + // Need at least one numbered list item with non-whitespace + // content after the "1. " marker. + const hasStep = /^\s*\d+\.\s+\S/m.test(stepsBlock); + if (!hasStep) { + core.setOutput('has_plan', 'false'); + core.notice('No numbered **Steps:** with content.'); + return; + } + + core.setOutput('has_plan', 'true'); + core.setOutput('routes', routes); + core.notice(`UI test plan found. Routes: ${routes}`); + + # Run unconditionally (including when plan-check threw) so the + # consolidated infrastructure-failure poster below can update the + # sticky comment instead of creating a fresh one each broken run. + - name: Find existing smoke comment + id: find-comment + if: always() && steps.pr.outcome == 'success' + uses: peter-evans/find-comment@v4 + with: + issue-number: ${{ steps.pr.outputs.number }} + comment-author: github-actions[bot] + body-includes: '' + direction: last + + # ─── Skip path ──────────────────────────────────────────────────── + - name: Post skip comment + if: steps.plan.outputs.has_plan == 'false' + uses: peter-evans/create-or-update-comment@v5 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ steps.pr.outputs.number }} + edit-mode: replace + body: | + + ## UI Preview Smoke + + Skipped: this PR has no `How to test on Vercel preview` plan. + Add `**Preview routes:**` and a numbered `**Steps:**` list to + enable automated smoke testing. + + # ─── Full smoke path ────────────────────────────────────────────── + # `continue-on-error: true` so a Vercel timeout/deploy-error doesn't + # short-circuit the job — downstream steps gate on + # `vercel.outcome == 'success'` and the consolidated fallback + # poster reports the failure to the PR. - name: Wait for Vercel preview + if: steps.plan.outputs.has_plan == 'true' id: vercel + continue-on-error: true uses: patrickedqvist/wait-for-vercel-preview@v1.3.1 with: token: ${{ secrets.GITHUB_TOKEN }} max_timeout: 600 check_interval: 10 - # For workflow_dispatch we need to point at the PR head commit. - # For pull_request_target the action picks up the PR sha automatically. - name: Setup Node + if: + steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome == + 'success' uses: actions/setup-node@v4 with: node-version: 22 - name: Install Playwright + Chromium + if: + steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome == + 'success' run: | npm install -g playwright playwright install --with-deps chromium - - name: Run agent against preview - uses: anthropics/claude-code-action@v1 - with: - anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - github_token: ${{ secrets.GITHUB_TOKEN }} - mcp_servers: | - { + # claude-code-action@v1 has no `mcp_servers` input (the action ignores + # it and warns at runtime). The supported mechanism is a `.mcp.json` + # at the working-directory root, which the action picks up because it + # auto-sets `enableAllProjectMcpServers: true` in Claude's settings. + # + # Parse the Vercel preview URL into a `scheme://host` origin and + # pass it to `@playwright/mcp` as `--allowed-origins`. The version + # is pinned (not `@latest`) so a future MCP release can't silently + # change browser/tool behavior under us. We abort the step if the + # URL doesn't match `^https?://[^/]+`, rather than the prior `sed` + # which silently passed bogus input through and produced a + # malformed `--allowed-origins=` arg. + # + # Residual: per the package's own README, `--allowed-origins` is + # "not a security boundary" — it's a navigation hint, not a + # process-level egress control. A determined attacker who lands + # arbitrary JS in the preview origin can still issue cross-origin + # `fetch`. We accept that as residual; the upstream fix is at the + # MCP/browser layer. + - name: Write MCP config (Playwright) + if: + steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome == + 'success' + env: + VERCEL_URL: ${{ steps.vercel.outputs.url }} + run: | + set -euo pipefail + if [[ ! "$VERCEL_URL" =~ ^(https?://[^/]+) ]]; then + echo "::error::Vercel URL '$VERCEL_URL' is not a valid origin" >&2 + exit 1 + fi + ORIGIN="${BASH_REMATCH[1]}" + cat > .mcp.json <"). Strip whitespace. - - "**Steps:**" — a numbered list of imperative actions. - 3. If the section is missing, empty, contains only the HTML - comment template placeholder, or is marked "N/A" or - "non-UI change": post a single PR comment containing exactly - the text below, then exit with status 0. - - > - > ## UI Preview Smoke - > - > Skipped: this PR has no `How to test on Vercel preview` - > plan. Add `**Preview routes:**` and a numbered `**Steps:**` - > list to enable automated smoke testing. - - 4. Otherwise, for each Preview route in order: - a. Open `` in the Playwright browser. + CRITICAL OUTPUT REQUIREMENTS: + 1. Return a JSON object with a single "summary" field whose + VALUE is a plain markdown STRING. Do NOT put another JSON + envelope inside the string — that posts raw JSON in the + comment. + 2. The summary markdown MUST start with EXACTLY these two + lines (the comment marker is required for the workflow to + update the same comment on subsequent runs): + + ## UI Preview Smoke + 3. Do NOT post comments yourself with `gh` or any other tool. + The workflow posts (or updates) the PR comment using your + `summary` field. + + Procedure: + + 1. Read /tmp/pr-body.md and parse the "### How to test on + Vercel preview" section: + - "**Preview routes:**" line — comma-separated paths. + - "**Steps:**" — numbered list of imperative actions. + 2. For each Preview route in order: + a. Open `` via the Playwright MCP + browser tools (mcp__playwright__*). b. Execute the numbered steps verbatim, in order. c. Treat any step beginning with "Verify", "Confirm", "Assert", "Check", or "Ensure" as an assertion. If an - assertion fails, record the failure and continue to the - next route. - d. After each route capture: full-page screenshot, any - console errors at level "error", any 4xx/5xx network - responses, any uncaught exception dialogs. - 5. Post a single PR comment via the JSON schema below. Use ✅ - for passed routes, ❌ for any route with at least one failed - assertion or runtime error. For every failure, include the - step text, what was asserted, and what you observed instead. + assertion fails, record the failure and continue to + the next route. + d. After each route capture: any console errors at level + "error", any 4xx/5xx network responses, any uncaught + exception dialogs. + 3. Build the summary markdown with one section per route. + Use ✅ for routes that passed every assertion, ❌ for any + route with a failed assertion, console error, or 5xx + response. For each failure include the step text, what was + asserted, and what you observed instead. Constraints: - Do not invent steps the author didn't write. - Do not exercise routes outside the "Preview routes:" list. - - If a step is ambiguous, note the ambiguity in your comment + - If a step is ambiguous, note the ambiguity in the summary and proceed with your best interpretation. Never fabricate an assertion that wasn't requested. - - Cap total runtime at 8 minutes. If a single step hangs - more than 30s, mark it failed and continue. + - Cap total runtime at 8 minutes. If a single step hangs more + than 30s, mark it failed and continue. claude_args: | --setting-sources user - --allowedTools "Bash(cat /tmp/pr-body.md),Bash(gh pr view:*),mcp__playwright__*" + --allowedTools "Bash(cat /tmp/pr-body.md),mcp__playwright__*" --json-schema '{"type":"object","properties":{"summary":{"type":"string","description":"Complete markdown summary starting with on the first line and ## UI Preview Smoke on the second line"}},"required":["summary"]}' + + # The agent's structured_output is a JSON string. Pull the `summary` + # field via jq. Defensive double-unwrap mirrors the workaround in + # claude-code-review.yml: the model has been observed to nest its + # output as `{"summary":"{\"summary\":\"\"}"}`, which would + # post raw JSON instead of markdown. + # Per-run random heredoc delimiter so attacker-influenced summary + # content (the agent's output reflects the PR body, which on a fork + # PR is fully attacker-controlled) can't land the literal delimiter + # on its own line and inject `name=value` pairs into $GITHUB_OUTPUT. + # `set -euo pipefail` plus `|| SUMMARY=''` on the jq parse means + # any malformed structured_output yields an empty SUMMARY; the + # consolidated fallback poster below picks that up. + - name: Extract summary from structured output + if: + steps.plan.outputs.has_plan == 'true' && + steps.agent.outputs.structured_output != '' + id: extract + continue-on-error: true + env: + STRUCTURED_OUTPUT: ${{ steps.agent.outputs.structured_output }} + run: | + set -euo pipefail + SUMMARY="$(printf '%s' "$STRUCTURED_OUTPUT" | jq -r '.summary')" || SUMMARY='' + if printf '%s' "$SUMMARY" | jq -e 'type == "object" and has("summary")' >/dev/null 2>&1; then + SUMMARY="$(printf '%s' "$SUMMARY" | jq -r '.summary')" || SUMMARY='' + fi + DELIM="EOF_$(openssl rand -hex 16)" + { + printf 'summary<<%s\n' "$DELIM" + printf '%s\n' "$SUMMARY" + printf '%s\n' "$DELIM" + } >> "$GITHUB_OUTPUT" + + - name: Post or update smoke comment + if: + steps.plan.outputs.has_plan == 'true' && steps.extract.outputs.summary + != '' + uses: peter-evans/create-or-update-comment@v5 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ steps.pr.outputs.number }} + body: ${{ steps.extract.outputs.summary }} + edit-mode: replace + + # Consolidated infrastructure-failure poster. Fires when the PR + # would otherwise be left with no contextual comment, in any of: + # - plan step itself errored (regex bug, runner exception) + # - has_plan == 'true' but Vercel never produced a usable preview + # - has_plan == 'true' and Vercel succeeded but the agent / + # extract step produced no summary (timeout, malformed JSON, + # missing `.summary` field) + # Without this, F4/F5/F6 leave the PR with a red check and either + # no comment or a stale prior-run comment. + - name: Post infrastructure-failure comment + if: | + always() && steps.pr.outcome == 'success' && ( + steps.plan.outcome == 'failure' || + (steps.plan.outputs.has_plan == 'true' && + steps.vercel.outcome != 'success') || + (steps.plan.outputs.has_plan == 'true' && + steps.vercel.outcome == 'success' && + steps.extract.outputs.summary == '') + ) + uses: peter-evans/create-or-update-comment@v5 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ steps.pr.outputs.number }} + edit-mode: replace + body: | + + ## UI Preview Smoke + + Smoke run did not complete. See [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.