From 43b8674f3444a80663a5734eb50bc143670069d9 Mon Sep 17 00:00:00 2001
From: Eugene <EOlonov@gmail.com>
Date: Mon, 8 Jun 2026 22:51:41 +0000
Subject: [PATCH 1/2] Add webchat HTML test skill

---
 .github/skills/webchat-html-tests/SKILL.md    | 97 +++++++++++++++++++
 .../references/architecture.md                | 35 +++++++
 .../references/failure-modes.md               | 30 ++++++
 .../scripts/cleanup-grid-sessions.py          | 82 ++++++++++++++++
 .../scripts/run-html-tests.sh                 | 24 +++++
 .../webchat-html-tests/scripts/start-grid.sh  | 10 ++
 .../scripts/wait-for-grid.py                  | 56 +++++++++++
 7 files changed, 334 insertions(+)
 create mode 100644 .github/skills/webchat-html-tests/SKILL.md
 create mode 100644 .github/skills/webchat-html-tests/references/architecture.md
 create mode 100644 .github/skills/webchat-html-tests/references/failure-modes.md
 create mode 100755 .github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py
 create mode 100755 .github/skills/webchat-html-tests/scripts/run-html-tests.sh
 create mode 100755 .github/skills/webchat-html-tests/scripts/start-grid.sh
 create mode 100755 .github/skills/webchat-html-tests/scripts/wait-for-grid.py

diff --git a/.github/skills/webchat-html-tests/SKILL.md b/.github/skills/webchat-html-tests/SKILL.md
new file mode 100644
index 0000000000..195442a002
--- /dev/null
+++ b/.github/skills/webchat-html-tests/SKILL.md
@@ -0,0 +1,97 @@
+---
+name: webchat-html-tests
+description: 'Run Bot Framework Web Chat HTML tests in Docker/Selenium Grid. Use when: running __tests__/html2, debugging failing HTML tests, updating snapshots, checking grid health, or cleaning leaked Selenium sessions.'
+argument-hint: 'Optional HTML test regex'
+---
+
+# Web Chat HTML Tests
+
+Run the HTML test harness, keep Selenium Grid healthy, and debug failures without keeping all of the operational detail in the skill body.
+
+## When to Use
+
+- Running `__tests__/html2` for a branch or PR
+- Debugging a failing HTML or snapshot test
+- Updating snapshots after an intentional visual change
+- Checking whether Selenium Grid is ready or leaking sessions
+
+## Procedure
+
+### 1. Start Selenium Grid
+
+Use the bundled script instead of pasting long Docker commands.
+
+For a focused test run, keep the default scale of 2 Chrome nodes:
+
+```sh
+./.github/skills/webchat-html-tests/scripts/start-grid.sh
+```
+
+For a full run, match Jest's 4 workers:
+
+```sh
+CHROME_SCALE=4 ./.github/skills/webchat-html-tests/scripts/start-grid.sh
+```
+
+### 2. Wait for Grid Readiness
+
+```sh
+python3 ./.github/skills/webchat-html-tests/scripts/wait-for-grid.py
+```
+
+Stop if the script times out or if the node summary does not show ready nodes.
+
+### 3. Run Tests
+
+Run the full suite:
+
+```sh
+./.github/skills/webchat-html-tests/scripts/run-html-tests.sh
+```
+
+Run a focused HTML test with a regex anchored to the exact file:
+
+```sh
+./.github/skills/webchat-html-tests/scripts/run-html-tests.sh "__tests__/html2/activity/message-status\.html$"
+```
+
+Update snapshots for an expected visual change:
+
+```sh
+./.github/skills/webchat-html-tests/scripts/run-html-tests.sh --update "__tests__/html2/activity/message-status\.html$"
+```
+
+### 4. Clean Grid Sessions After Every Run
+
+```sh
+python3 ./.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py
+```
+
+If leaked sessions remain, clean them before the next Jest run or the grid can stall.
+
+### 5. Recover Common Infra Problems
+
+If dist files return 404s after a local build, restart `webchat2`:
+
+```sh
+docker compose -f docker-compose-wsl2.yml restart webchat2
+```
+
+If a failure is not obvious, load the reference docs before changing code:
+
+- [Architecture and test layout](./references/architecture.md)
+- [Failure modes and snapshot workflow](./references/failure-modes.md)
+
+### 6. Tear Down
+
+```sh
+docker compose -f docker-compose-wsl2.yml down
+```
+
+## Checklist
+
+- [ ] Grid is ready before running Jest
+- [ ] Focused tests use an anchored regex when targeting one HTML file
+- [ ] Sessions are cleaned after every run
+- [ ] Snapshot updates are rerun without `--update`
+- [ ] `npm run precommit` passes before opening the PR
diff --git a/.github/skills/webchat-html-tests/references/architecture.md b/.github/skills/webchat-html-tests/references/architecture.md
new file mode 100644
index 0000000000..daba31abe9
--- /dev/null
+++ b/.github/skills/webchat-html-tests/references/architecture.md
@@ -0,0 +1,35 @@
+# Architecture and Test Layout
+
+HTML tests live under `__tests__/html2/`.
+
+Jest picks them up through the `html2` project declared in `jest.config.js`, then runs them in a WebDriver-backed environment against Selenium Grid.
+
+## Key Pieces
+
+- `docker-compose-wsl2.yml`: Selenium hub, Chrome nodes, `webchat2`, and `jest-server`
+- `jest.config.js`: top-level worker count and project wiring
+- `packages/test/harness/`: WebDriver environment, host bridge, and snapshot support
+- `packages/test/page-object/src/globals`: `host`, `pageConditions`, and `pageElements`
+
+## Redirect Test Pattern
+
+Many tests have a base file plus small redirect files for theme or variant coverage.
+
+- Base file example: `message-status.html`
+- Redirect file example: `message-status.copilot.html`
+
+Redirect files usually change `location` to the base test with query parameters such as:
+
+- `?variant=fluent`
+- `?variant=copilot`
+- `?fluent-theme=dark`
+- `?variant=copilot&fluent-theme=dark`
+
+When debugging a redirect failure, inspect the redirect target first because the real test logic usually lives there.
+
+## Regex Targeting Reminder
+
+`--testPathPattern` is a regex.
+
+- Use `message-status\.html$` when you want only the base file
+- Use `message-status\.` when you want the base file plus its redirect variants
diff --git a/.github/skills/webchat-html-tests/references/failure-modes.md b/.github/skills/webchat-html-tests/references/failure-modes.md
new file mode 100644
index 0000000000..6010ac0ffa
--- /dev/null
+++ b/.github/skills/webchat-html-tests/references/failure-modes.md
@@ -0,0 +1,30 @@
+# Failure Modes and Snapshot Workflow
+
+## Common Failures
+
+| Error pattern                                                         | Likely cause                              | Recovery                                                                                            |
+| --------------------------------------------------------------------- | ----------------------------------------- | --------------------------------------------------------------------------------------------------- |
+| `Expected image to match or be a close match to snapshot`             | Real visual change or regression          | Inspect the diff PNG, then decide whether to update snapshots or fix the code                       |
+| Timeout or hanging Jest run                                           | Selenium sessions leaked                  | Run `python3 ./.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py` before rerunning |
+| `Failed to load resource: 404 (Not Found)` for dist files             | `webchat2` serving stale or empty content | `docker compose -f docker-compose-wsl2.yml restart webchat2`                                        |
+| `Cannot read properties of undefined (reading 'FluentThemeProvider')` | Fluent bundle returned 404                | Restart `webchat2` and confirm the local build completed                                            |
+| `Cannot read properties of undefined (reading 'ReactWebChat')`        | Main bundle returned 404                  | Restart `webchat2` and confirm the local build completed                                            |
+
+## Snapshot Update Flow
+
+1. Reproduce the failure without `--update`.
+2. Inspect the generated `.snap-N-diff.png` file next to the HTML test.
+3. Only if the change is intentional, rerun with `--update`.
+4. Rerun the same test again without `--update` to confirm the snapshot is now stable.
+5. Clean leaked Selenium sessions between runs.
+
+## HTML Test Anatomy
+
+Most HTML tests follow this structure:
+
+- Load `/test-harness.js` and `/test-page-object.js`
+- Create a Direct Line emulator with `testHelpers.createDirectLineEmulator()`
+- Drive activities with `emulateIncomingActivity()` or `emulateOutgoingActivity()`
+- Assert through `pageConditions` and `pageElements`
+- Capture snapshots with `await host.snapshot('local')`
+- Use `expect`, not `assert`
diff --git a/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py
new file mode 100755
index 0000000000..723c0c3ad0
--- /dev/null
+++ b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import sys
+import urllib.error
+import urllib.request
+
+
+GRID_STATUS_URL = os.environ.get('GRID_STATUS_URL', 'http://localhost:4444/status')
+GRID_SESSION_URL = os.environ.get('GRID_SESSION_URL', 'http://localhost:4444/session')
+
+
+def fetch_status():
+    with urllib.request.urlopen(GRID_STATUS_URL) as response:
+        return json.load(response)
+
+
+def delete_session(session_id):
+    request = urllib.request.Request(f'{GRID_SESSION_URL}/{session_id}', method='DELETE')
+
+    with urllib.request.urlopen(request):
+        return
+
+
+try:
+    payload = fetch_status()
+except urllib.error.URLError as error:
+    print(f'Failed to fetch Selenium Grid status: {error}', file=sys.stderr)
+    sys.exit(1)
+
+busy_sessions = []
+
+for node in payload.get('value', {}).get('nodes') or []:
+    for slot in node.get('slots') or []:
+        session = slot.get('session')
+        session and busy_sessions.append(session.get('sessionId'))
+
+print(f'Busy sessions: {len(busy_sessions)}')
+
+for session_id in busy_sessions:
+    try:
+        delete_session(session_id)
+    except urllib.error.URLError as error:
+        print(f'Error deleting {session_id}: {error}', file=sys.stderr)
+        continue
+
+    print(f'Deleted {session_id}')#!/usr/bin/env python3
+
+import json
+import sys
+import urllib.request
+
+
+STATUS_URL = 'http://localhost:4444/status'
+
+
+def main() -> int:
+    with urllib.request.urlopen(STATUS_URL) as response:
+        status = json.load(response)
+
+    nodes = (status.get('value') or {}).get('nodes') or []
+    sessions = []
+
+    for node in nodes:
+        for slot in node.get('slots') or []:
+            session = slot.get('session')
+            session and sessions.append(session.get('sessionId'))
+
+    print(f'Busy sessions: {len(sessions)}')
+
+    for session_id in sessions:
+        request = urllib.request.Request(f'http://localhost:4444/session/{session_id}', method='DELETE')
+
+        with urllib.request.urlopen(request):
+            print(f'Deleted {session_id}')
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
\ No newline at end of file
diff --git a/.github/skills/webchat-html-tests/scripts/run-html-tests.sh b/.github/skills/webchat-html-tests/scripts/run-html-tests.sh
new file mode 100755
index 0000000000..a8a818460b
--- /dev/null
+++ b/.github/skills/webchat-html-tests/scripts/run-html-tests.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+update_snapshots=false
+test_pattern=''
+
+for argument in "$@"; do
+  if [[ "$argument" == '--update' ]]; then
+    update_snapshots=true
+  elif [[ -z "$test_pattern" ]]; then
+    test_pattern="$argument"
+  else
+    printf 'Usage: %s [--update] [test-path-regex]\n' "$0" >&2
+    exit 1
+  fi
+done
+
+arguments=()
+
+$update_snapshots && arguments+=('-u')
+[[ -n "$test_pattern" ]] && arguments+=('--testPathPattern' "$test_pattern")
+
+npm test -- "${arguments[@]}"
\ No newline at end of file
diff --git a/.github/skills/webchat-html-tests/scripts/start-grid.sh b/.github/skills/webchat-html-tests/scripts/start-grid.sh
new file mode 100755
index 0000000000..30f62247b7
--- /dev/null
+++ b/.github/skills/webchat-html-tests/scripts/start-grid.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+readonly registry="${REGISTRY:-mcr.microsoft.com}"
+readonly chrome_scale="${CHROME_SCALE:-2}"
+
+docker compose -f docker-compose-wsl2.yml build --build-arg "REGISTRY=${registry}"
+docker compose -f docker-compose-wsl2.yml up --detach --scale "chrome=${chrome_scale}"
+docker compose -f docker-compose-wsl2.yml restart webchat2
\ No newline at end of file
diff --git a/.github/skills/webchat-html-tests/scripts/wait-for-grid.py b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py
new file mode 100755
index 0000000000..b2883942b3
--- /dev/null
+++ b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import sys
+import time
+import urllib.error
+import urllib.request
+
+
+GRID_URL = os.environ.get('GRID_URL', 'http://localhost:4444/wd/hub/status')
+POLL_INTERVAL_SECONDS = float(os.environ.get('GRID_POLL_INTERVAL_SECONDS', '2'))
+TIMEOUT_SECONDS = float(os.environ.get('GRID_TIMEOUT_SECONDS', '60'))
+
+
+def fetch_status():
+    with urllib.request.urlopen(GRID_URL) as response:
+        return json.load(response)
+
+
+def print_summary(payload):
+    value = payload.get('value') or {}
+    message = value.get('message', 'No message returned')
+    ready = value.get('ready', False)
+    print(f'{message} [ready={ready}]')
+
+    for node in value.get('nodes') or []:
+        node_id = node.get('id', '<unknown>')
+        availability = node.get('availability', '<unknown>')
+        print(f'node {node_id} {availability}')
+
+
+deadline = time.monotonic() + TIMEOUT_SECONDS
+last_payload = None
+
+while time.monotonic() < deadline:
+    try:
+        payload = fetch_status()
+    except (OSError, urllib.error.URLError) as error:
+        print(f'Waiting for Selenium Grid: {error}', file=sys.stderr)
+        time.sleep(POLL_INTERVAL_SECONDS)
+        continue
+
+    last_payload = payload
+    value = payload.get('value') or {}
+
+    if value.get('ready'):
+        print_summary(payload)
+        sys.exit(0)
+
+    print_summary(payload)
+    time.sleep(POLL_INTERVAL_SECONDS)
+
+print('Timed out waiting for Selenium Grid readiness.', file=sys.stderr)
+last_payload and print_summary(last_payload)
+sys.exit(1)
\ No newline at end of file

From fd437e8b0d16f60f165ba6e580eaa6939ee7e43e Mon Sep 17 00:00:00 2001
From: Eugene <EOlonov@gmail.com>
Date: Mon, 8 Jun 2026 23:35:21 +0000
Subject: [PATCH 2/2] Fix webchat HTML test skill review issues

---
 .../scripts/cleanup-grid-sessions.py          | 68 ++++++-------------
 .../scripts/wait-for-grid.py                  |  3 +-
 2 files changed, 23 insertions(+), 48 deletions(-)

diff --git a/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py
index 723c0c3ad0..d772ef5143 100755
--- a/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py
+++ b/.github/skills/webchat-html-tests/scripts/cleanup-grid-sessions.py
@@ -7,73 +7,47 @@
 import urllib.request
 
 
-GRID_STATUS_URL = os.environ.get('GRID_STATUS_URL', 'http://localhost:4444/status')
-GRID_SESSION_URL = os.environ.get('GRID_SESSION_URL', 'http://localhost:4444/session')
+GRID_REQUEST_TIMEOUT_SECONDS = float(os.environ.get('GRID_REQUEST_TIMEOUT_SECONDS', '5'))
+GRID_SESSION_URL = os.environ.get('GRID_SESSION_URL', 'http://localhost:4444/wd/hub/session')
+GRID_STATUS_URL = os.environ.get('GRID_STATUS_URL', 'http://localhost:4444/wd/hub/status')
 
 
 def fetch_status():
-    with urllib.request.urlopen(GRID_STATUS_URL) as response:
+    with urllib.request.urlopen(GRID_STATUS_URL, timeout=GRID_REQUEST_TIMEOUT_SECONDS) as response:
         return json.load(response)
 
 
 def delete_session(session_id):
     request = urllib.request.Request(f'{GRID_SESSION_URL}/{session_id}', method='DELETE')
 
-    with urllib.request.urlopen(request):
+    with urllib.request.urlopen(request, timeout=GRID_REQUEST_TIMEOUT_SECONDS):
         return
 
 
-try:
-    payload = fetch_status()
-except urllib.error.URLError as error:
-    print(f'Failed to fetch Selenium Grid status: {error}', file=sys.stderr)
-    sys.exit(1)
-
-busy_sessions = []
-
-for node in payload.get('value', {}).get('nodes') or []:
-    for slot in node.get('slots') or []:
-        session = slot.get('session')
-        session and busy_sessions.append(session.get('sessionId'))
-
-print(f'Busy sessions: {len(busy_sessions)}')
-
-for session_id in busy_sessions:
-    try:
-        delete_session(session_id)
-    except urllib.error.URLError as error:
-        print(f'Error deleting {session_id}: {error}', file=sys.stderr)
-        continue
-
-    print(f'Deleted {session_id}')#!/usr/bin/env python3
-
-import json
-import sys
-import urllib.request
-
-
-STATUS_URL = 'http://localhost:4444/status'
-
-
 def main() -> int:
-    with urllib.request.urlopen(STATUS_URL) as response:
-        status = json.load(response)
+    try:
+        payload = fetch_status()
+    except (OSError, urllib.error.URLError) as error:
+        print(f'Failed to fetch Selenium Grid status: {error}', file=sys.stderr)
+        return 1
 
-    nodes = (status.get('value') or {}).get('nodes') or []
-    sessions = []
+    busy_sessions = []
 
-    for node in nodes:
+    for node in payload.get('value', {}).get('nodes') or []:
         for slot in node.get('slots') or []:
             session = slot.get('session')
-            session and sessions.append(session.get('sessionId'))
+            session and busy_sessions.append(session.get('sessionId'))
 
-    print(f'Busy sessions: {len(sessions)}')
+    print(f'Busy sessions: {len(busy_sessions)}')
 
-    for session_id in sessions:
-        request = urllib.request.Request(f'http://localhost:4444/session/{session_id}', method='DELETE')
+    for session_id in busy_sessions:
+        try:
+            delete_session(session_id)
+        except (OSError, urllib.error.URLError) as error:
+            print(f'Error deleting {session_id}: {error}', file=sys.stderr)
+            continue
 
-        with urllib.request.urlopen(request):
-            print(f'Deleted {session_id}')
+        print(f'Deleted {session_id}')
 
     return 0
 
diff --git a/.github/skills/webchat-html-tests/scripts/wait-for-grid.py b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py
index b2883942b3..d2d437bcb0 100755
--- a/.github/skills/webchat-html-tests/scripts/wait-for-grid.py
+++ b/.github/skills/webchat-html-tests/scripts/wait-for-grid.py
@@ -10,11 +10,12 @@
 
 GRID_URL = os.environ.get('GRID_URL', 'http://localhost:4444/wd/hub/status')
 POLL_INTERVAL_SECONDS = float(os.environ.get('GRID_POLL_INTERVAL_SECONDS', '2'))
+REQUEST_TIMEOUT_SECONDS = float(os.environ.get('GRID_REQUEST_TIMEOUT_SECONDS', '5'))
 TIMEOUT_SECONDS = float(os.environ.get('GRID_TIMEOUT_SECONDS', '60'))
 
 
 def fetch_status():
-    with urllib.request.urlopen(GRID_URL) as response:
+    with urllib.request.urlopen(GRID_URL, timeout=REQUEST_TIMEOUT_SECONDS) as response:
         return json.load(response)