From 43b8674f3444a80663a5734eb50bc143670069d9 Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 8 Jun 2026 22:51:41 +0000 Subject: [PATCH 1/2] Add webchat HTML test skill --- .github/skills/webchat-html-tests/SKILL.md | 97 +++++++++++++++++++ .../references/architecture.md | 35 +++++++ .../references/failure-modes.md | 30 ++++++ .../scripts/cleanup-grid-sessions.py | 82 ++++++++++++++++ .../scripts/run-html-tests.sh | 24 +++++ .../webchat-html-tests/scripts/start-grid.sh | 10 ++ .../scripts/wait-for-grid.py | 56 +++++++++++ 7 files changed, 334 insertions(+) create mode 100644 .github/skills/webchat-html-tests/SKILL.md create mode 100644 .github/skills/webchat-html-tests/references/architecture.md create mode 100644 .github/skills/webchat-html-tests/references/failure-modes.md create mode 100755 .github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py create mode 100755 .github/skills/webchat-html-tests/scripts/run-html-tests.sh create mode 100755 .github/skills/webchat-html-tests/scripts/start-grid.sh create mode 100755 .github/skills/webchat-html-tests/scripts/wait-for-grid.py diff --git a/.github/skills/webchat-html-tests/SKILL.md b/.github/skills/webchat-html-tests/SKILL.md new file mode 100644 index 0000000000..195442a002 --- /dev/null +++ b/.github/skills/webchat-html-tests/SKILL.md @@ -0,0 +1,97 @@ +--- +name: webchat-html-tests +description: 'Run Bot Framework Web Chat HTML tests in Docker/Selenium Grid. Use when: running __tests__/html2, debugging failing HTML tests, updating snapshots, checking grid health, or cleaning leaked Selenium sessions.' +argument-hint: 'Optional HTML test regex' +--- + +# Web Chat HTML Tests + +Run the HTML test harness, keep Selenium Grid healthy, and debug failures without keeping all of the operational detail in the skill body. + +## When to Use + +- Running `__tests__/html2` for a branch or PR +- Debugging a failing HTML or snapshot test +- Updating snapshots after an intentional visual change +- Checking whether Selenium Grid is ready or leaking sessions + +## Procedure + +### 1. Start Selenium Grid + +Use the bundled script instead of pasting long Docker commands. + +For a focused test run, keep the default scale of 2 Chrome nodes: + +```sh +./.github/skills/webchat-html-tests/scripts/start-grid.sh +``` + +For a full run, match Jest's 4 workers: + +```sh +CHROME_SCALE=4 ./.github/skills/webchat-html-tests/scripts/start-grid.sh +``` + +### 2. Wait for Grid Readiness + +```sh +python3 ./.github/skills/webchat-html-tests/scripts/wait-for-grid.py +``` + +Stop if the script times out or if the node summary does not show ready nodes. + +### 3. Run Tests + +Run the full suite: + +```sh +./.github/skills/webchat-html-tests/scripts/run-html-tests.sh +``` + +Run a focused HTML test with a regex anchored to the exact file: + +```sh +./.github/skills/webchat-html-tests/scripts/run-html-tests.sh "__tests__/html2/activity/message-status\.html$" +``` + +Update snapshots for an expected visual change: + +```sh +./.github/skills/webchat-html-tests/scripts/run-html-tests.sh --update "__tests__/html2/activity/message-status\.html$" +``` + +### 4. Clean Grid Sessions After Every Run + +```sh +python3 ./.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py +``` + +If leaked sessions remain, clean them before the next Jest run or the grid can stall. + +### 5. Recover Common Infra Problems + +If dist files return 404s after a local build, restart `webchat2`: + +```sh +docker compose -f docker-compose-wsl2.yml restart webchat2 +``` + +If a failure is not obvious, load the reference docs before changing code: + +- [Architecture and test layout](./references/architecture.md) +- [Failure modes and snapshot workflow](./references/failure-modes.md) + +### 6. Tear Down + +```sh +docker compose -f docker-compose-wsl2.yml down +``` + +## Checklist + +- [ ] Grid is ready before running Jest +- [ ] Focused tests use an anchored regex when targeting one HTML file +- [ ] Sessions are cleaned after every run +- [ ] Snapshot updates are rerun without `--update` +- [ ] `npm run precommit` passes before opening the PR diff --git a/.github/skills/webchat-html-tests/references/architecture.md b/.github/skills/webchat-html-tests/references/architecture.md new file mode 100644 index 0000000000..daba31abe9 --- /dev/null +++ b/.github/skills/webchat-html-tests/references/architecture.md @@ -0,0 +1,35 @@ +# Architecture and Test Layout + +HTML tests live under `__tests__/html2/`. + +Jest picks them up through the `html2` project declared in `jest.config.js`, then runs them in a WebDriver-backed environment against Selenium Grid. + +## Key Pieces + +- `docker-compose-wsl2.yml`: Selenium hub, Chrome nodes, `webchat2`, and `jest-server` +- `jest.config.js`: top-level worker count and project wiring +- `packages/test/harness/`: WebDriver environment, host bridge, and snapshot support +- `packages/test/page-object/src/globals`: `host`, `pageConditions`, and `pageElements` + +## Redirect Test Pattern + +Many tests have a base file plus small redirect files for theme or variant coverage. + +- Base file example: `message-status.html` +- Redirect file example: `message-status.copilot.html` + +Redirect files usually change `location` to the base test with query parameters such as: + +- `?variant=fluent` +- `?variant=copilot` +- `?fluent-theme=dark` +- `?variant=copilot&fluent-theme=dark` + +When debugging a redirect failure, inspect the redirect target first because the real test logic usually lives there. + +## Regex Targeting Reminder + +`--testPathPattern` is a regex. + +- Use `message-status\.html$` when you want only the base file +- Use `message-status\.` when you want the base file plus its redirect variants diff --git a/.github/skills/webchat-html-tests/references/failure-modes.md b/.github/skills/webchat-html-tests/references/failure-modes.md new file mode 100644 index 0000000000..6010ac0ffa --- /dev/null +++ b/.github/skills/webchat-html-tests/references/failure-modes.md @@ -0,0 +1,30 @@ +# Failure Modes and Snapshot Workflow + +## Common Failures + +| Error pattern | Likely cause | Recovery | +| --------------------------------------------------------------------- | ----------------------------------------- | --------------------------------------------------------------------------------------------------- | +| `Expected image to match or be a close match to snapshot` | Real visual change or regression | Inspect the diff PNG, then decide whether to update snapshots or fix the code | +| Timeout or hanging Jest run | Selenium sessions leaked | Run `python3 ./.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py` before rerunning | +| `Failed to load resource: 404 (Not Found)` for dist files | `webchat2` serving stale or empty content | `docker compose -f docker-compose-wsl2.yml restart webchat2` | +| `Cannot read properties of undefined (reading 'FluentThemeProvider')` | Fluent bundle returned 404 | Restart `webchat2` and confirm the local build completed | +| `Cannot read properties of undefined (reading 'ReactWebChat')` | Main bundle returned 404 | Restart `webchat2` and confirm the local build completed | + +## Snapshot Update Flow + +1. Reproduce the failure without `--update`. +2. Inspect the generated `.snap-N-diff.png` file next to the HTML test. +3. Only if the change is intentional, rerun with `--update`. +4. Rerun the same test again without `--update` to confirm the snapshot is now stable. +5. Clean leaked Selenium sessions between runs. + +## HTML Test Anatomy + +Most HTML tests follow this structure: + +- Load `/test-harness.js` and `/test-page-object.js` +- Create a Direct Line emulator with `testHelpers.createDirectLineEmulator()` +- Drive activities with `emulateIncomingActivity()` or `emulateOutgoingActivity()` +- Assert through `pageConditions` and `pageElements` +- Capture snapshots with `await host.snapshot('local')` +- Use `expect`, not `assert` diff --git a/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py new file mode 100755 index 0000000000..723c0c3ad0 --- /dev/null +++ b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +import urllib.error +import urllib.request + + +GRID_STATUS_URL = os.environ.get('GRID_STATUS_URL', 'http://localhost:4444/status') +GRID_SESSION_URL = os.environ.get('GRID_SESSION_URL', 'http://localhost:4444/session') + + +def fetch_status(): + with urllib.request.urlopen(GRID_STATUS_URL) as response: + return json.load(response) + + +def delete_session(session_id): + request = urllib.request.Request(f'{GRID_SESSION_URL}/{session_id}', method='DELETE') + + with urllib.request.urlopen(request): + return + + +try: + payload = fetch_status() +except urllib.error.URLError as error: + print(f'Failed to fetch Selenium Grid status: {error}', file=sys.stderr) + sys.exit(1) + +busy_sessions = [] + +for node in payload.get('value', {}).get('nodes') or []: + for slot in node.get('slots') or []: + session = slot.get('session') + session and busy_sessions.append(session.get('sessionId')) + +print(f'Busy sessions: {len(busy_sessions)}') + +for session_id in busy_sessions: + try: + delete_session(session_id) + except urllib.error.URLError as error: + print(f'Error deleting {session_id}: {error}', file=sys.stderr) + continue + + print(f'Deleted {session_id}')#!/usr/bin/env python3 + +import json +import sys +import urllib.request + + +STATUS_URL = 'http://localhost:4444/status' + + +def main() -> int: + with urllib.request.urlopen(STATUS_URL) as response: + status = json.load(response) + + nodes = (status.get('value') or {}).get('nodes') or [] + sessions = [] + + for node in nodes: + for slot in node.get('slots') or []: + session = slot.get('session') + session and sessions.append(session.get('sessionId')) + + print(f'Busy sessions: {len(sessions)}') + + for session_id in sessions: + request = urllib.request.Request(f'http://localhost:4444/session/{session_id}', method='DELETE') + + with urllib.request.urlopen(request): + print(f'Deleted {session_id}') + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/.github/skills/webchat-html-tests/scripts/run-html-tests.sh b/.github/skills/webchat-html-tests/scripts/run-html-tests.sh new file mode 100755 index 0000000000..a8a818460b --- /dev/null +++ b/.github/skills/webchat-html-tests/scripts/run-html-tests.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -euo pipefail + +update_snapshots=false +test_pattern='' + +for argument in "$@"; do + if [[ "$argument" == '--update' ]]; then + update_snapshots=true + elif [[ -z "$test_pattern" ]]; then + test_pattern="$argument" + else + printf 'Usage: %s [--update] [test-path-regex]\n' "$0" >&2 + exit 1 + fi +done + +arguments=() + +$update_snapshots && arguments+=('-u') +[[ -n "$test_pattern" ]] && arguments+=('--testPathPattern' "$test_pattern") + +npm test -- "${arguments[@]}" \ No newline at end of file diff --git a/.github/skills/webchat-html-tests/scripts/start-grid.sh b/.github/skills/webchat-html-tests/scripts/start-grid.sh new file mode 100755 index 0000000000..30f62247b7 --- /dev/null +++ b/.github/skills/webchat-html-tests/scripts/start-grid.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -euo pipefail + +readonly registry="${REGISTRY:-mcr.microsoft.com}" +readonly chrome_scale="${CHROME_SCALE:-2}" + +docker compose -f docker-compose-wsl2.yml build --build-arg "REGISTRY=${registry}" +docker compose -f docker-compose-wsl2.yml up --detach --scale "chrome=${chrome_scale}" +docker compose -f docker-compose-wsl2.yml restart webchat2 \ No newline at end of file diff --git a/.github/skills/webchat-html-tests/scripts/wait-for-grid.py b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py new file mode 100755 index 0000000000..b2883942b3 --- /dev/null +++ b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +import time +import urllib.error +import urllib.request + + +GRID_URL = os.environ.get('GRID_URL', 'http://localhost:4444/wd/hub/status') +POLL_INTERVAL_SECONDS = float(os.environ.get('GRID_POLL_INTERVAL_SECONDS', '2')) +TIMEOUT_SECONDS = float(os.environ.get('GRID_TIMEOUT_SECONDS', '60')) + + +def fetch_status(): + with urllib.request.urlopen(GRID_URL) as response: + return json.load(response) + + +def print_summary(payload): + value = payload.get('value') or {} + message = value.get('message', 'No message returned') + ready = value.get('ready', False) + print(f'{message} [ready={ready}]') + + for node in value.get('nodes') or []: + node_id = node.get('id', '') + availability = node.get('availability', '') + print(f'node {node_id} {availability}') + + +deadline = time.monotonic() + TIMEOUT_SECONDS +last_payload = None + +while time.monotonic() < deadline: + try: + payload = fetch_status() + except (OSError, urllib.error.URLError) as error: + print(f'Waiting for Selenium Grid: {error}', file=sys.stderr) + time.sleep(POLL_INTERVAL_SECONDS) + continue + + last_payload = payload + value = payload.get('value') or {} + + if value.get('ready'): + print_summary(payload) + sys.exit(0) + + print_summary(payload) + time.sleep(POLL_INTERVAL_SECONDS) + +print('Timed out waiting for Selenium Grid readiness.', file=sys.stderr) +last_payload and print_summary(last_payload) +sys.exit(1) \ No newline at end of file From fd437e8b0d16f60f165ba6e580eaa6939ee7e43e Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 8 Jun 2026 23:35:21 +0000 Subject: [PATCH 2/2] Fix webchat HTML test skill review issues --- .../scripts/cleanup-grid-sessions.py | 68 ++++++------------- .../scripts/wait-for-grid.py | 3 +- 2 files changed, 23 insertions(+), 48 deletions(-) diff --git a/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py index 723c0c3ad0..d772ef5143 100755 --- a/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py +++ b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py @@ -7,73 +7,47 @@ import urllib.request -GRID_STATUS_URL = os.environ.get('GRID_STATUS_URL', 'http://localhost:4444/status') -GRID_SESSION_URL = os.environ.get('GRID_SESSION_URL', 'http://localhost:4444/session') +GRID_REQUEST_TIMEOUT_SECONDS = float(os.environ.get('GRID_REQUEST_TIMEOUT_SECONDS', '5')) +GRID_SESSION_URL = os.environ.get('GRID_SESSION_URL', 'http://localhost:4444/wd/hub/session') +GRID_STATUS_URL = os.environ.get('GRID_STATUS_URL', 'http://localhost:4444/wd/hub/status') def fetch_status(): - with urllib.request.urlopen(GRID_STATUS_URL) as response: + with urllib.request.urlopen(GRID_STATUS_URL, timeout=GRID_REQUEST_TIMEOUT_SECONDS) as response: return json.load(response) def delete_session(session_id): request = urllib.request.Request(f'{GRID_SESSION_URL}/{session_id}', method='DELETE') - with urllib.request.urlopen(request): + with urllib.request.urlopen(request, timeout=GRID_REQUEST_TIMEOUT_SECONDS): return -try: - payload = fetch_status() -except urllib.error.URLError as error: - print(f'Failed to fetch Selenium Grid status: {error}', file=sys.stderr) - sys.exit(1) - -busy_sessions = [] - -for node in payload.get('value', {}).get('nodes') or []: - for slot in node.get('slots') or []: - session = slot.get('session') - session and busy_sessions.append(session.get('sessionId')) - -print(f'Busy sessions: {len(busy_sessions)}') - -for session_id in busy_sessions: - try: - delete_session(session_id) - except urllib.error.URLError as error: - print(f'Error deleting {session_id}: {error}', file=sys.stderr) - continue - - print(f'Deleted {session_id}')#!/usr/bin/env python3 - -import json -import sys -import urllib.request - - -STATUS_URL = 'http://localhost:4444/status' - - def main() -> int: - with urllib.request.urlopen(STATUS_URL) as response: - status = json.load(response) + try: + payload = fetch_status() + except (OSError, urllib.error.URLError) as error: + print(f'Failed to fetch Selenium Grid status: {error}', file=sys.stderr) + return 1 - nodes = (status.get('value') or {}).get('nodes') or [] - sessions = [] + busy_sessions = [] - for node in nodes: + for node in payload.get('value', {}).get('nodes') or []: for slot in node.get('slots') or []: session = slot.get('session') - session and sessions.append(session.get('sessionId')) + session and busy_sessions.append(session.get('sessionId')) - print(f'Busy sessions: {len(sessions)}') + print(f'Busy sessions: {len(busy_sessions)}') - for session_id in sessions: - request = urllib.request.Request(f'http://localhost:4444/session/{session_id}', method='DELETE') + for session_id in busy_sessions: + try: + delete_session(session_id) + except (OSError, urllib.error.URLError) as error: + print(f'Error deleting {session_id}: {error}', file=sys.stderr) + continue - with urllib.request.urlopen(request): - print(f'Deleted {session_id}') + print(f'Deleted {session_id}') return 0 diff --git a/.github/skills/webchat-html-tests/scripts/wait-for-grid.py b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py index b2883942b3..d2d437bcb0 100755 --- a/.github/skills/webchat-html-tests/scripts/wait-for-grid.py +++ b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py @@ -10,11 +10,12 @@ GRID_URL = os.environ.get('GRID_URL', 'http://localhost:4444/wd/hub/status') POLL_INTERVAL_SECONDS = float(os.environ.get('GRID_POLL_INTERVAL_SECONDS', '2')) +REQUEST_TIMEOUT_SECONDS = float(os.environ.get('GRID_REQUEST_TIMEOUT_SECONDS', '5')) TIMEOUT_SECONDS = float(os.environ.get('GRID_TIMEOUT_SECONDS', '60')) def fetch_status(): - with urllib.request.urlopen(GRID_URL) as response: + with urllib.request.urlopen(GRID_URL, timeout=REQUEST_TIMEOUT_SECONDS) as response: return json.load(response)