Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions .github/workflows/docs-ui-tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
name: Docs UI drift tests

# Verifies Console-UI claims extracted from the platform docs against Console
# staging with Playwright. Assertions are a committed, human-reviewed baseline
# (docs-tests/assertions/); this job only evaluates them — extraction runs
# locally, never in CI.
#
# Credentials come from repo Actions secrets (CONSOLE_STAGING_*). The seeded
# staging user is low-privilege and has no 2FA; login is scripted fresh each run
# and the session is held in memory (no auth file).

on:
schedule:
- cron: '0 6 * * 1' # Every Monday 06:00 UTC
workflow_dispatch: # Manual on-demand run

defaults:
run:
working-directory: docs-tests

jobs:
ui-drift:
runs-on: ubuntu-latest
permissions:
contents: read
issues: write # the failure step files a drift issue for maintainers
steps:
- uses: actions/checkout@v6

- name: Use Node.js 24
uses: actions/setup-node@v6
with:
node-version: 24

- name: Install pnpm
# Version is read from the repo root package.json ("packageManager"
# field); do not also pass `version` here or the action errors on a
# version conflict.
uses: pnpm/action-setup@v4

- name: Install docs-tests dependencies
run: pnpm install --frozen-lockfile

- name: Install Playwright browser
run: pnpm exec playwright install --with-deps chromium

- name: Run docs UI drift tests
env:
CI: 'true'
CONSOLE_STAGING_URL: ${{ secrets.CONSOLE_STAGING_URL }}
CONSOLE_STAGING_USER_EMAIL: ${{ secrets.CONSOLE_STAGING_USER_EMAIL }}
CONSOLE_STAGING_USER_PASSWORD: ${{ secrets.CONSOLE_STAGING_USER_PASSWORD }}
run: pnpm test

- name: Upload Playwright report
if: always()
uses: actions/upload-artifact@v4
with:
name: playwright-report
path: docs-tests/playwright-report/
retention-days: 14

- name: Upload drift report (issues.json)
if: always()
uses: actions/upload-artifact@v4
with:
name: drift-report
path: docs-tests/output/
retention-days: 14

# On any failure (drift found, or the job broke before tests ran),
# open a GitHub issue for the docs maintainers. To avoid a new issue
# every week, reuse the open `docs-ui-drift` issue if one exists and
# comment on it instead. The body is kept public-safe: it links to
# the run (where the report/artifacts live) and lists only assertion
# IDs + their doc source — never the staging URL or credentials.
- name: File a drift issue for maintainers
if: failure()
env:
GH_TOKEN: ${{ github.token }}
REPO: ${{ github.repository }}
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
set -euo pipefail
REPORT=output/issues.json
BODY="$(mktemp)"

if [[ -f "$REPORT" ]]; then
PASSED=$(jq -r '.summary.passed' "$REPORT")
FAILED=$(jq -r '.summary.failed' "$REPORT")
SKIPPED=$(jq -r '.summary.skipped' "$REPORT")
TOTAL=$(jq -r '.summary.total' "$REPORT")
{
echo "The scheduled docs-UI drift check found **${FAILED} failing assertion(s)** (${PASSED}/${TOTAL} passed, ${SKIPPED} skipped)."
echo ""
echo "Filed automatically by the \`Docs UI drift tests\` workflow. Logs, the Playwright report, and \`issues.json\` are on the [workflow run](${RUN_URL})."
echo ""
echo "### Failing assertions"
echo ""
echo "| Assertion | Kind | Target | Doc source |"
echo "| --- | --- | --- | --- |"
jq -r '.issues[] | "| `\(.id)` | \(.kind) | \(.target) | `\(.source_file):\(.source_line)` |"' "$REPORT"
echo ""
echo "Each failure is either genuine doc drift (the Console changed, the docs did not) or a harness limitation (selector needs a click first, or a logged-out/session-gated page). Triage against the Playwright report."
} > "$BODY"
else
{
echo "The \`Docs UI drift tests\` workflow failed before producing a drift report (likely a setup or login error)."
echo ""
echo "See the [workflow run](${RUN_URL}) for logs."
} > "$BODY"
fi

# Ensure the dedup label exists (upsert), then reuse an open
# issue if present, otherwise create a new one. No assignee;
# labels are docs-ui-drift (dedup marker) + t-docs.
gh label create docs-ui-drift --repo "$REPO" \
--color BFD4F2 --description "Automated Console UI drift report" --force || true

EXISTING=$(gh issue list --repo "$REPO" --state open --label docs-ui-drift \
--limit 1 --json number --jq '.[0].number // empty')

if [[ -n "$EXISTING" ]]; then
gh issue comment "$EXISTING" --repo "$REPO" --body-file "$BODY"
echo "Commented on existing drift issue #${EXISTING}"
else
gh issue create --repo "$REPO" \
--title "Docs UI drift detected by scheduled tests" \
--body-file "$BODY" \
--label docs-ui-drift --label t-docs
fi
1 change: 1 addition & 0 deletions .markdownlintignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ CLAUDE.md
GEMINI.md
standards
sources/_partials/*
docs-tests
7 changes: 7 additions & 0 deletions .vale.ini
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,10 @@ BasedOnStyles =

[.agents/**/*.md]
BasedOnStyles =

[docs-tests/**/*.md]
# Ignore all Vale checks in docs-tests (internal tooling, not published docs).
# BasedOnStyles = clears style-based rules; Microsoft.Dashes is toggled on
# explicitly above, so it must be turned off explicitly here too.
BasedOnStyles =
Microsoft.Dashes = NO
8 changes: 8 additions & 0 deletions docs-tests/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Staging Console base URL (required).
CONSOLE_STAGING_URL=https://console.staging-example.apify.dev

# Seeded staging user (required to run the tests). Used by tests/auth.fixture.ts
# to log in fresh each run. Locally these live here; in CI they come from GitHub
# Secrets, never from a committed file. The seeded account has no 2FA.
CONSOLE_STAGING_USER_EMAIL=
CONSOLE_STAGING_USER_PASSWORD=
11 changes: 11 additions & 0 deletions docs-tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Never commit a logged-in session or local secrets to this public repo.
node_modules/
.env
auth.json
playwright-report/
test-results/
.DS_Store

# Runtime outputs. The stored assertion baseline (assertions/) IS committed;
# only the per-run report lives here.
output/
135 changes: 135 additions & 0 deletions docs-tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# docs-tests

Docs-as-tests for the Apify Console. Every UI claim in the platform docs — a
route resolves, a tab is named X, a button exists on page Y — is a testable
assertion. This package extracts those assertions from the docs with an LLM,
stores them as a reviewed baseline, and verifies them against Console staging
with Playwright, so documentation drift is caught automatically.

```
pages.json ──extract──▶ assertions/*.json ──Playwright──▶ output/issues.json
(page list) (claude -p) (committed baseline) (vs staging) (drift report)
```

## Model

1. **`pages.json`** is an adjustable list of documentation pages (real source
files under `sources/platform/…`) to cover.
2. **`scripts/extract.sh`** feeds one page to `claude -p` with a strict JSON
schema and writes the result to `assertions/<slug>.json`.
`scripts/extract-all.sh` does the whole manifest.
3. **`assertions/`** is the *stored, reviewed baseline* — committed to the repo.
Regenerate it with the LLM whenever docs change, review the diff, commit.
The assertion set is owned by humans even though a model drafts it.
4. **`tests/from-doc.spec.ts`** reads every stored assertion and emits one
Playwright `test()` per assertion, run against `$CONSOLE_STAGING_URL`.
5. Failures point back to `source_file:line` so the offending prose is one click
away, and land in `output/issues.json` for downstream triage.

The Notion plan *"AI-based testing for docs"* (its Part 1 routes + Part 2
elements) is the inspiration for which pages and claims to cover — not a fixed
transcription. The authoritative set is whatever the manifest + extractor
produce and a human commits.

## Assertion kinds

| Kind | Checks |
| ---------------- | ------------------------------------------------------------------ |
| `route` | Documented path is reachable (HTTP < 400) |
| `element_tab` | Documented tab label exists on the page named in `at` |
| `element_button` | Documented button label exists on the page named in `at` |
| `element_text` | Documented heading/label/field is visible on the page named in `at` |

## One-time setup

```bash
pnpm install
pnpm exec playwright install chromium
cp .env.example .env # fill in CONSOLE_STAGING_URL + seeded-user email/password
```

## Generate / refresh the assertion baseline

```bash
# Every page in pages.json:
pnpm extract:all

# Or a single page:
pnpm extract sources/platform/console/settings.md
```

Review the diff in `assertions/`, then commit. This is the step a human owns.

## Run the tests

```bash
pnpm test # evaluate all stored assertions against staging
pnpm issues # machine-readable, action-oriented failures
pnpm report # HTML report (failures include screenshots, video, trace)
```

Authentication is automatic: a worker-scoped fixture (`tests/auth.fixture.ts`)
logs in once per run with `CONSOLE_STAGING_USER_EMAIL` / `_PASSWORD` and keeps
the session in memory. **No `auth.json` is written or read** — nothing has to
pre-exist, so it behaves identically locally and in CI (where the credentials
come from GitHub Secrets). The seeded staging user has no 2FA.

`pnpm test` always writes `output/issues.json` — a summary plus one entry per
failing assertion, sorted by `source_line`, each carrying `source_file:line`,
the offending `source_quote`, and a one-line error. For `element_*` failures it
also captures the live page's same-kind labels (`observed_candidates`) and, when
unambiguous, a `suggested_target`, so a downstream LLM can propose a doc fix
without re-running the browser.

## Adjusting coverage

Edit `pages.json` and re-run `pnpm extract:all`. Add a page → it gets an
assertion set; remove one → delete its `assertions/<slug>.json`.

## CI

`.github/workflows/docs-ui-tests.yaml` runs the evaluation on a weekly schedule
and on manual dispatch: it installs Playwright, logs in with the
`CONSOLE_STAGING_*` repo secrets, evaluates the committed baseline against
staging, uploads the report, and files a `docs-ui-drift` issue when an assertion
fails. Extraction never runs in CI — the reviewed baseline is the only input.

## Known gaps (deferred)

- **Coverage is a starting slice.** `pages.json` covers the Console section
(index, settings, billing, store) — routes and landing-page elements. Widening
to more pages is a follow-up: add to `pages.json`, re-extract, review, commit.
- **Detail-page fixtures.** Assertions about Actor-detail, Schedule-detail, etc.
need a known fixture to navigate to. The runner currently *skips* element
assertions with no `at` route — surfacing the gap without false negatives.
Requires the seeded-user fixtures (1 Actor, 1 task, 1 schedule, named storages,
1 webhook, 1 completed run) from the Notion plan.
- **Left-nav group check.** The documented global nav items (Dashboard/Store/
Actors/…) are a Console-wide check, not a per-page claim — not modeled yet.
- **Multi-step flows.** The schema only supports atomic claims (one
navigate-then-check). "Click X, then Y, then Z" sequences are not modeled.
- **Surface mismatch.** Some docs describe the public marketing site (e.g.
`apify.com/store`) while the harness tests the Console (`/store`); the two can
use different labels, so those element claims may not map. Check the surface a
page actually documents before adding element assertions for it.

## Files

```
docs-tests/
├── pages.json # adjustable list of docs pages to cover
├── assertions/ # committed baseline, one JSON per page (generated)
├── prompts/
│ ├── extract-system.md # system prompt + known-routes table
│ └── assertion-schema.json # JSON Schema for the extractor output
├── scripts/
│ ├── extract.sh # one page → assertions/<slug>.json
│ └── extract-all.sh # whole manifest
├── reporters/issues-reporter.ts # custom Playwright reporter → output/issues.json
├── tests/
│ ├── auth.fixture.ts # worker-scoped login from env creds (in-memory session)
│ ├── from-doc.spec.ts # reads assertions/*.json, emits tests
│ └── similarity.ts # suggest-replacement helper for failures
├── playwright.config.ts
└── .env # CONSOLE_STAGING_URL (gitignored)
```
1 change: 1 addition & 0 deletions docs-tests/assertions/console-billing.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"source_file":"sources/platform/console/billing.md","assertions":[{"id":"billing-route","kind":"route","target":"/billing","page_context":"Billing > Current period page","source_quote":"The **Current period** tab is a comprehensive resource for understanding your platform usage during the ongoing billing cycle.","source_line":11,"needs_auth":true},{"id":"billing-historical-usage-route","kind":"route","target":"/billing/historical-usage","page_context":"Billing > Historical usage page","source_quote":"The **Historical usage** tab provides a detailed view of your monthly platform usage, excluding any free Actor compute units or discounts from your subscription plan.","source_line":21,"needs_auth":true},{"id":"billing-subscription-route","kind":"route","target":"/billing/subscription","page_context":"Billing > Subscription page","source_quote":"Navigate to [Subscription](https://console.apify.com/billing/subscription) section in Apify Console, and click the **Buy add-ons** button to explore the available options.","source_line":53,"needs_auth":true},{"id":"billing-limits-route","kind":"route","target":"/billing#/limits","page_context":"Billing > Limits page","source_quote":"The **Limits** tab displays the usage limits for the Apify platform based on your current subscription plan.","source_line":73,"needs_auth":true},{"id":"current-period-tab","kind":"element_tab","target":"Current period","at":"/billing","page_context":"Billing page tab navigation","source_quote":"The **Current period** tab is a comprehensive resource for understanding your platform usage during the ongoing billing cycle.","source_line":11,"needs_auth":true},{"id":"historical-usage-tab","kind":"element_tab","target":"Historical usage","at":"/billing","page_context":"Billing page tab navigation","source_quote":"The **Historical usage** tab provides a detailed view of your monthly platform usage","source_line":21,"needs_auth":true},{"id":"subscription-tab","kind":"element_tab","target":"Subscription","at":"/billing","page_context":"Billing page tab navigation","source_quote":"The **Subscription** tab offers a central location to manage various aspects of your subscription plan.","source_line":37,"needs_auth":true},{"id":"pricing-tab","kind":"element_tab","target":"Pricing","at":"/billing","page_context":"Billing page tab navigation","source_quote":"The **Pricing** tab offers a way to quickly check all unit pricing for various platform services related to Apify usage for your account.","source_line":61,"needs_auth":true},{"id":"invoices-tab","kind":"element_tab","target":"Invoices","at":"/billing","page_context":"Billing page tab navigation","source_quote":"The **Invoices** tab is where you can find your current and previous invoices for Apify platform usage.","source_line":67,"needs_auth":true},{"id":"limits-tab","kind":"element_tab","target":"Limits","at":"/billing","page_context":"Billing page tab navigation","source_quote":"The **Limits** tab displays the usage limits for the Apify platform based on your current subscription plan.","source_line":73,"needs_auth":true},{"id":"plan-consumption-graph-text","kind":"element_text","target":"Plan Consumption","at":"/billing","page_context":"Billing > Current period tab, Plan Consumption graph","source_quote":"the tab features a **Plan Consumption** Graph. It shows how much of your free or paid plan has been utilized up to this point.","source_line":13,"needs_auth":true},{"id":"actors-statistics-section","kind":"element_text","target":"Actors","at":"/billing","page_context":"Billing > Current period tab, statistics sections under graph","source_quote":"You can access detailed statistics related to **Actors**, **Data transfer**, **Proxy**, and **Storage**.","source_line":15,"needs_auth":true},{"id":"data-transfer-statistics-section","kind":"element_text","target":"Data transfer","at":"/billing","page_context":"Billing > Current period tab, statistics sections under graph","source_quote":"You can access detailed statistics related to **Actors**, **Data transfer**, **Proxy**, and **Storage**.","source_line":15,"needs_auth":true},{"id":"proxy-statistics-section","kind":"element_text","target":"Proxy","at":"/billing","page_context":"Billing > Current period tab, statistics sections under graph","source_quote":"You can access detailed statistics related to **Actors**, **Data transfer**, **Proxy**, and **Storage**.","source_line":15,"needs_auth":true},{"id":"storage-statistics-section","kind":"element_text","target":"Storage","at":"/billing","page_context":"Billing > Current period tab, statistics sections under graph","source_quote":"You can access detailed statistics related to **Actors**, **Data transfer**, **Proxy**, and **Storage**.","source_line":15,"needs_auth":true},{"id":"usage-by-actors-table","kind":"element_text","target":"Usage by Actors","at":"/billing/historical-usage","page_context":"Billing > Historical usage tab, table below the bar chart","source_quote":"Below the bar chart, there is a table titled **Usage by Actors**. This table presents a detailed breakdown of the Compute units used per Actor and the associated costs.","source_line":31,"needs_auth":true},{"id":"buy-add-ons-button","kind":"element_button","target":"Buy add-ons","at":"/billing/subscription","page_context":"Billing > Subscription tab, Plan add-ons section","source_quote":"click the **Buy add-ons** button to explore the available options.","source_line":53,"needs_auth":true}]}
Loading
Loading