Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
332 changes: 283 additions & 49 deletions .github/workflows/ui-preview-smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,100 +64,334 @@ jobs:
core.setOutput('number', String(pr.number));
core.setOutput('head_sha', pr.head.sha);

# Cheap pre-flight: parse the PR body for a UI test plan before we
# spend ~5 min waiting for Vercel + ~$1 of agent runtime. If the
# author didn't fill in `### How to test on Vercel preview`, we post
# a skip comment and exit immediately. Without this gate, no-plan
# PRs cost the same as full smoke runs.
- name: Check for UI test plan
id: plan
uses: actions/github-script@v9
with:
script: |
const fs = require('fs');
const body = fs.readFileSync('/tmp/pr-body.md', 'utf8');

const headingMatch = body.match(
/^###\s+How to test on Vercel preview\s*$/im,
);
if (!headingMatch) {
core.setOutput('has_plan', 'false');
core.notice('No "### How to test on Vercel preview" section.');
return;
}

const start = headingMatch.index + headingMatch[0].length;
const remainder = body.slice(start);
const nextHeading = remainder.match(/^###\s/m);
const section = nextHeading
? remainder.slice(0, nextHeading.index)
: remainder;

// Strip HTML comments — both the template explainer and any
// placeholder hints like "<!-- e.g. /chart -->" inline.
const cleaned = section.replace(/<!--[\s\S]*?-->/g, '');
const trimmed = cleaned.trim();

if (!trimmed) {
core.setOutput('has_plan', 'false');
core.notice('Section is empty after stripping comments.');
return;
}
if (/^(n\/?a\b|non[-\s]?ui|no[-\s]+ui)/i.test(trimmed)) {
core.setOutput('has_plan', 'false');
core.notice('Section is marked N/A.');
return;
}

const routesMatch = cleaned.match(
/\*\*Preview routes:\*\*\s*([^\n]*)/i,
);
const routes = routesMatch ? routesMatch[1].trim() : '';
if (!routes) {
core.setOutput('has_plan', 'false');
core.notice('"**Preview routes:**" line is empty.');
return;
}

const stepsMatch = cleaned.match(/\*\*Steps:\*\*([\s\S]*)/i);
const stepsBlock = stepsMatch ? stepsMatch[1] : '';
// Need at least one numbered list item with non-whitespace
// content after the "1. " marker.
const hasStep = /^\s*\d+\.\s+\S/m.test(stepsBlock);
if (!hasStep) {
core.setOutput('has_plan', 'false');
core.notice('No numbered **Steps:** with content.');
return;
}

core.setOutput('has_plan', 'true');
core.setOutput('routes', routes);
core.notice(`UI test plan found. Routes: ${routes}`);

# Run unconditionally (including when plan-check threw) so the
# consolidated infrastructure-failure poster below can update the
# sticky comment instead of creating a fresh one each broken run.
- name: Find existing smoke comment
id: find-comment
if: always() && steps.pr.outcome == 'success'
uses: peter-evans/find-comment@v4
with:
issue-number: ${{ steps.pr.outputs.number }}
comment-author: github-actions[bot]
body-includes: '<!-- ui-preview-smoke -->'
direction: last

# ─── Skip path ────────────────────────────────────────────────────
- name: Post skip comment
if: steps.plan.outputs.has_plan == 'false'
uses: peter-evans/create-or-update-comment@v5
with:
comment-id: ${{ steps.find-comment.outputs.comment-id }}
issue-number: ${{ steps.pr.outputs.number }}
edit-mode: replace
body: |
<!-- ui-preview-smoke -->
## UI Preview Smoke

Skipped: this PR has no `How to test on Vercel preview` plan.
Add `**Preview routes:**` and a numbered `**Steps:**` list to
enable automated smoke testing.

# ─── Full smoke path ──────────────────────────────────────────────
# `continue-on-error: true` so a Vercel timeout/deploy-error doesn't
# short-circuit the job — downstream steps gate on
# `vercel.outcome == 'success'` and the consolidated fallback
# poster reports the failure to the PR.
- name: Wait for Vercel preview
if: steps.plan.outputs.has_plan == 'true'
id: vercel
continue-on-error: true
uses: patrickedqvist/wait-for-vercel-preview@v1.3.1
with:
token: ${{ secrets.GITHUB_TOKEN }}
max_timeout: 600
check_interval: 10
# For workflow_dispatch we need to point at the PR head commit.
# For pull_request_target the action picks up the PR sha automatically.

- name: Setup Node
if:
steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
'success'
uses: actions/setup-node@v4
with:
node-version: 22

- name: Install Playwright + Chromium
if:
steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
'success'
run: |
npm install -g playwright
playwright install --with-deps chromium

- name: Run agent against preview
uses: anthropics/claude-code-action@v1
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ secrets.GITHUB_TOKEN }}
mcp_servers: |
{
# claude-code-action@v1 has no `mcp_servers` input (the action ignores
# it and warns at runtime). The supported mechanism is a `.mcp.json`
# at the working-directory root, which the action picks up because it
# auto-sets `enableAllProjectMcpServers: true` in Claude's settings.
#
# Parse the Vercel preview URL into a `scheme://host` origin and
# pass it to `@playwright/mcp` as `--allowed-origins`. The version
# is pinned (not `@latest`) so a future MCP release can't silently
# change browser/tool behavior under us. We abort the step if the
# URL doesn't match `^https?://[^/]+`, rather than the prior `sed`
# which silently passed bogus input through and produced a
# malformed `--allowed-origins=` arg.
#
# Residual: per the package's own README, `--allowed-origins` is
# "not a security boundary" — it's a navigation hint, not a
# process-level egress control. A determined attacker who lands
# arbitrary JS in the preview origin can still issue cross-origin
# `fetch`. We accept that as residual; the upstream fix is at the
# MCP/browser layer.
- name: Write MCP config (Playwright)
if:
steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
'success'
env:
VERCEL_URL: ${{ steps.vercel.outputs.url }}
run: |
set -euo pipefail
if [[ ! "$VERCEL_URL" =~ ^(https?://[^/]+) ]]; then
echo "::error::Vercel URL '$VERCEL_URL' is not a valid origin" >&2
exit 1
fi
ORIGIN="${BASH_REMATCH[1]}"
cat > .mcp.json <<EOF
{
"mcpServers": {
"playwright": {
"command": "npx",
"args": [
"-y",
"@playwright/mcp@latest",
"@playwright/mcp@0.0.75",
"--browser=chromium",
"--headless"
"--headless",
"--allowed-origins=${ORIGIN}"
]
}
}
}
EOF

- name: Run agent against preview
if:
steps.plan.outputs.has_plan == 'true' && steps.vercel.outcome ==
'success'
id: agent
continue-on-error: true
uses: anthropics/claude-code-action@v1
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ secrets.GITHUB_TOKEN }}
prompt: |
Execute the UI test plan for PR #${{ steps.pr.outputs.number }}
on its Vercel preview deploy.
Smoke-test PR #${{ steps.pr.outputs.number }} on its Vercel
preview deploy.

Preview URL: ${{ steps.vercel.outputs.url }}
Repo: ${{ github.repository }}
PR body: read /tmp/pr-body.md (use the Bash cat tool).

Read the PR body with: cat /tmp/pr-body.md

The PR body is guaranteed to contain a
"### How to test on Vercel preview" section with a non-empty
`**Preview routes:**` line and at least one numbered step
(the workflow gates on this before invoking you).

This preview is built in LOCAL_MODE with a pre-configured demo
ClickHouse connection and otel_logs / otel_traces sources. No
registration or source setup is needed — open the URL and go.

Workflow:

1. Read /tmp/pr-body.md.
2. Find the section headed exactly
"### How to test on Vercel preview". Within it, parse:
- "**Preview routes:**" line — comma-separated list of paths
(e.g. "/chart, /dashboards/<id>"). Strip whitespace.
- "**Steps:**" — a numbered list of imperative actions.
3. If the section is missing, empty, contains only the HTML
comment template placeholder, or is marked "N/A" or
"non-UI change": post a single PR comment containing exactly
the text below, then exit with status 0.

> <!-- ui-preview-smoke -->
> ## UI Preview Smoke
>
> Skipped: this PR has no `How to test on Vercel preview`
> plan. Add `**Preview routes:**` and a numbered `**Steps:**`
> list to enable automated smoke testing.

4. Otherwise, for each Preview route in order:
a. Open `<Preview URL><route>` in the Playwright browser.
CRITICAL OUTPUT REQUIREMENTS:
1. Return a JSON object with a single "summary" field whose
VALUE is a plain markdown STRING. Do NOT put another JSON
envelope inside the string — that posts raw JSON in the
comment.
2. The summary markdown MUST start with EXACTLY these two
lines (the comment marker is required for the workflow to
update the same comment on subsequent runs):
<!-- ui-preview-smoke -->
## UI Preview Smoke
3. Do NOT post comments yourself with `gh` or any other tool.
The workflow posts (or updates) the PR comment using your
`summary` field.

Procedure:

1. Read /tmp/pr-body.md and parse the "### How to test on
Vercel preview" section:
- "**Preview routes:**" line — comma-separated paths.
- "**Steps:**" — numbered list of imperative actions.
2. For each Preview route in order:
a. Open `<Preview URL><route>` via the Playwright MCP
browser tools (mcp__playwright__*).
b. Execute the numbered steps verbatim, in order.
c. Treat any step beginning with "Verify", "Confirm",
"Assert", "Check", or "Ensure" as an assertion. If an
assertion fails, record the failure and continue to the
next route.
d. After each route capture: full-page screenshot, any
console errors at level "error", any 4xx/5xx network
responses, any uncaught exception dialogs.
5. Post a single PR comment via the JSON schema below. Use ✅
for passed routes, ❌ for any route with at least one failed
assertion or runtime error. For every failure, include the
step text, what was asserted, and what you observed instead.
assertion fails, record the failure and continue to
the next route.
d. After each route capture: any console errors at level
"error", any 4xx/5xx network responses, any uncaught
exception dialogs.
3. Build the summary markdown with one section per route.
Use ✅ for routes that passed every assertion, ❌ for any
route with a failed assertion, console error, or 5xx
response. For each failure include the step text, what was
asserted, and what you observed instead.

Constraints:
- Do not invent steps the author didn't write.
- Do not exercise routes outside the "Preview routes:" list.
- If a step is ambiguous, note the ambiguity in your comment
- If a step is ambiguous, note the ambiguity in the summary
and proceed with your best interpretation. Never fabricate
an assertion that wasn't requested.
- Cap total runtime at 8 minutes. If a single step hangs
more than 30s, mark it failed and continue.
- Cap total runtime at 8 minutes. If a single step hangs more
than 30s, mark it failed and continue.

claude_args: |
--setting-sources user
--allowedTools "Bash(cat /tmp/pr-body.md),Bash(gh pr view:*),mcp__playwright__*"
--allowedTools "Bash(cat /tmp/pr-body.md),mcp__playwright__*"
--json-schema '{"type":"object","properties":{"summary":{"type":"string","description":"Complete markdown summary starting with <!-- ui-preview-smoke --> on the first line and ## UI Preview Smoke on the second line"}},"required":["summary"]}'

# The agent's structured_output is a JSON string. Pull the `summary`
# field via jq. Defensive double-unwrap mirrors the workaround in
# claude-code-review.yml: the model has been observed to nest its
# output as `{"summary":"{\"summary\":\"<markdown>\"}"}`, which would
# post raw JSON instead of markdown.
# Per-run random heredoc delimiter so attacker-influenced summary
# content (the agent's output reflects the PR body, which on a fork
# PR is fully attacker-controlled) can't land the literal delimiter
# on its own line and inject `name=value` pairs into $GITHUB_OUTPUT.
# `set -euo pipefail` plus `|| SUMMARY=''` on the jq parse means
# any malformed structured_output yields an empty SUMMARY; the
# consolidated fallback poster below picks that up.
- name: Extract summary from structured output
if:
steps.plan.outputs.has_plan == 'true' &&
steps.agent.outputs.structured_output != ''
id: extract
continue-on-error: true
env:
STRUCTURED_OUTPUT: ${{ steps.agent.outputs.structured_output }}
run: |
set -euo pipefail
SUMMARY="$(printf '%s' "$STRUCTURED_OUTPUT" | jq -r '.summary')" || SUMMARY=''
if printf '%s' "$SUMMARY" | jq -e 'type == "object" and has("summary")' >/dev/null 2>&1; then
SUMMARY="$(printf '%s' "$SUMMARY" | jq -r '.summary')" || SUMMARY=''
fi
DELIM="EOF_$(openssl rand -hex 16)"
{
printf 'summary<<%s\n' "$DELIM"
printf '%s\n' "$SUMMARY"
printf '%s\n' "$DELIM"
} >> "$GITHUB_OUTPUT"

- name: Post or update smoke comment
if:
steps.plan.outputs.has_plan == 'true' && steps.extract.outputs.summary
!= ''
uses: peter-evans/create-or-update-comment@v5
with:
comment-id: ${{ steps.find-comment.outputs.comment-id }}
issue-number: ${{ steps.pr.outputs.number }}
body: ${{ steps.extract.outputs.summary }}
edit-mode: replace

# Consolidated infrastructure-failure poster. Fires when the PR
# would otherwise be left with no contextual comment, in any of:
# - plan step itself errored (regex bug, runner exception)
# - has_plan == 'true' but Vercel never produced a usable preview
# - has_plan == 'true' and Vercel succeeded but the agent /
# extract step produced no summary (timeout, malformed JSON,
# missing `.summary` field)
# Without this, F4/F5/F6 leave the PR with a red check and either
# no comment or a stale prior-run comment.
- name: Post infrastructure-failure comment
if: |
always() && steps.pr.outcome == 'success' && (
steps.plan.outcome == 'failure' ||
(steps.plan.outputs.has_plan == 'true' &&
steps.vercel.outcome != 'success') ||
(steps.plan.outputs.has_plan == 'true' &&
steps.vercel.outcome == 'success' &&
steps.extract.outputs.summary == '')
)
uses: peter-evans/create-or-update-comment@v5
with:
comment-id: ${{ steps.find-comment.outputs.comment-id }}
issue-number: ${{ steps.pr.outputs.number }}
edit-mode: replace
body: |
<!-- ui-preview-smoke -->
## UI Preview Smoke

Smoke run did not complete. See [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
Loading