diff --git a/app/src/main/hl/engines/browsercode/adapter.ts b/app/src/main/hl/engines/browsercode/adapter.ts index 5aeb06de..b05d6289 100644 --- a/app/src/main/hl/engines/browsercode/adapter.ts +++ b/app/src/main/hl/engines/browsercode/adapter.ts @@ -8,7 +8,7 @@ import { register } from '../registry'; import { applyBrowserHarnessEnv } from '../browserHarnessEnv'; -import { buildSkillIndexPrompt, SKILL_DISCOVERY_AND_LIFECYCLE_LINES, htmlBlockGuidanceLines, optionsBlockGuidanceLines, askBlockGuidanceLines } from '../skillIndexPrompt'; +import { buildSkillIndexPrompt, SKILL_DISCOVERY_AND_LIFECYCLE_LINES, htmlBlockGuidanceLines, optionsBlockGuidanceLines, askBlockGuidanceLines, loginBlockGuidanceLines } from '../skillIndexPrompt'; import { resolveThemeMode } from '../../../themeMode'; import { enrichedEnv } from '../pathEnrich'; import { runCliCapture } from '../cliSpawn'; @@ -179,6 +179,7 @@ const browserCodeAdapter: EngineAdapter = { ...htmlBlockGuidanceLines(resolveThemeMode()), ...optionsBlockGuidanceLines(), ...askBlockGuidanceLines(), + ...loginBlockGuidanceLines(), "Use the `browser-harness-js` CLI for browser actions. Start with `browser-harness-js 'await connectToAssignedTarget()'`.", 'Do not use old helpers.js convenience APIs for browser control.', 'Do not edit harness files unless the user asks or a confirmed Browser Harness JS defect blocks the task.', diff --git a/app/src/main/hl/engines/claude-code/adapter.ts b/app/src/main/hl/engines/claude-code/adapter.ts index 1fd45a96..ec2c3f58 100644 --- a/app/src/main/hl/engines/claude-code/adapter.ts +++ b/app/src/main/hl/engines/claude-code/adapter.ts @@ -13,7 +13,7 @@ import { mainLogger } from '../../../logger'; import { register } from '../registry'; import { applyBrowserHarnessEnv } from '../browserHarnessEnv'; -import { buildSkillIndexPrompt, SKILL_DISCOVERY_AND_LIFECYCLE_LINES, htmlBlockGuidanceLines, optionsBlockGuidanceLines, askBlockGuidanceLines } from '../skillIndexPrompt'; +import { buildSkillIndexPrompt, SKILL_DISCOVERY_AND_LIFECYCLE_LINES, htmlBlockGuidanceLines, optionsBlockGuidanceLines, askBlockGuidanceLines, loginBlockGuidanceLines } from '../skillIndexPrompt'; import { resolveThemeMode } from '../../../themeMode'; import { enrichedEnv } from '../pathEnrich'; import { runCliCapture, spawnCli } from '../cliSpawn'; @@ -126,6 +126,7 @@ const claudeCodeAdapter: EngineAdapter = { ...htmlBlockGuidanceLines(resolveThemeMode()), ...optionsBlockGuidanceLines(), ...askBlockGuidanceLines(), + ...loginBlockGuidanceLines(), "Use the `browser-harness-js` CLI for browser actions. Start with `browser-harness-js 'await connectToAssignedTarget()'`.", 'Do not use old helpers.js convenience APIs for browser control.', 'Do not edit harness files unless the user asks or a confirmed Browser Harness JS defect blocks the task.', diff --git a/app/src/main/hl/engines/codex/adapter.ts b/app/src/main/hl/engines/codex/adapter.ts index 2d818e39..2c062350 100644 --- a/app/src/main/hl/engines/codex/adapter.ts +++ b/app/src/main/hl/engines/codex/adapter.ts @@ -21,7 +21,7 @@ import path from 'node:path'; import { mainLogger } from '../../../logger'; import { register } from '../registry'; import { applyBrowserHarnessEnv } from '../browserHarnessEnv'; -import { buildSkillIndexPrompt, SKILL_DISCOVERY_AND_LIFECYCLE_LINES, htmlBlockGuidanceLines, optionsBlockGuidanceLines, askBlockGuidanceLines } from '../skillIndexPrompt'; +import { buildSkillIndexPrompt, SKILL_DISCOVERY_AND_LIFECYCLE_LINES, htmlBlockGuidanceLines, optionsBlockGuidanceLines, askBlockGuidanceLines, loginBlockGuidanceLines } from '../skillIndexPrompt'; import { resolveThemeMode } from '../../../themeMode'; import { enrichedEnv } from '../pathEnrich'; import { runCliCapture } from '../cliSpawn'; @@ -117,6 +117,7 @@ const codexAdapter: EngineAdapter = { ...htmlBlockGuidanceLines(resolveThemeMode()), ...optionsBlockGuidanceLines(), ...askBlockGuidanceLines(), + ...loginBlockGuidanceLines(), "Use the `browser-harness-js` CLI for browser actions. Start with `browser-harness-js 'await connectToAssignedTarget()'`.", 'Do not use old helpers.js convenience APIs for browser control.', 'Do not edit harness files unless the user asks or a confirmed Browser Harness JS defect blocks the task.', diff --git a/app/src/main/hl/engines/skillIndexPrompt.ts b/app/src/main/hl/engines/skillIndexPrompt.ts index d70b9583..08e09a59 100644 --- a/app/src/main/hl/engines/skillIndexPrompt.ts +++ b/app/src/main/hl/engines/skillIndexPrompt.ts @@ -98,6 +98,23 @@ export function askBlockGuidanceLines(): string[] { ]; } +/** + * Provider-neutral nudge for the `login` fenced block — the renderer + * surfaces it as a username/password form with a "log in manually in the + * browser" escape hatch. The agent reads the credentials from the next + * user turn and types them into the live browser view. See the + * `login-block` interaction skill for the full schema and the + * manual-login fallback contract. + */ +export function loginBlockGuidanceLines(): string[] { + return [ + 'When the live browser hits a login wall and you need the user to provide credentials, emit a ```login fenced block carrying JSON: { site, url, prompt?, usernameLabel?, passwordLabel? }. `site` is the brand token (e.g. "Amazon", not "amazon.com"); `url` is the absolute http(s) login URL.', + 'The `login` block ENDS YOUR TURN. After emitting it, do not call any more tools — stop and wait for the user. Their reply arrives as "Login for :\\nusername: \\npassword:

" — type these verbatim into the username/password fields of the live tab, then submit. Do NOT echo the password back in your own response.', + 'The form also offers the user a "log in on myself" affordance that opens the in-app browser view directly; if they take that path you will not get a structured reply, just whatever they type next (e.g. "done"). Treat any plain follow-up message as the signal to resume.', + 'Use `login` only for real credential walls. For multiple-choice disambiguation, use `ask`; for picking among visible options, use `options`. See the `login-block` interaction skill for the full schema and worked examples.', + ]; +} + function normalizeSlash(value: string): string { return value.split(path.sep).join('/'); } diff --git a/app/src/main/hl/stock/interaction-skills/capture-block.md b/app/src/main/hl/stock/interaction-skills/capture-block.md new file mode 100644 index 00000000..703cae2c --- /dev/null +++ b/app/src/main/hl/stock/interaction-skills/capture-block.md @@ -0,0 +1,247 @@ +# Capture block — reCAPTCHA 3×3 tile picker + +When you hit a Google reCAPTCHA image challenge ("Select all squares with +motorcycles", "Select all images with traffic lights"), emit a fenced +` ```capture ` block. The renderer slices your single screenshot into 9 +clickable tiles + a reCAPTCHA-style header bar; the user picks tiles and +their selection comes back to you as a new user turn. + +## Hard constraints — read first + +1. **Only the 3×3 grid.** No prompt header, no Verify/Audio/Info row, no + surrounding chrome. The renderer evenly subdivides whatever PNG you + send into 9 tiles; any extra pixels break the tile boundaries. +2. **CSS pixels everywhere.** `Page.captureScreenshot`'s `clip` uses CSS + pixels regardless of devicePixelRatio. NEVER reason about the output + PNG's pixel dimensions when picking clip coordinates. Don't run `sips + --cropOffset` on the saved PNG — that's output-pixel space and you'll + double-scale. +3. **No tool calls after the closing fence.** Your turn ends; the agent + process idles until the user submits. + +## The reliable recipe — copy this + +The bframe (`https://www.google.com/recaptcha/api2/bframe?...`) is +**cross-origin with any third-party host** that embeds reCAPTCHA +(Cloudflare-protected sites, 2captcha demos, etc.). You cannot reach +its `contentDocument` from the parent page — you must attach to its +CDP target and run the DOM queries inside that session. + +Compute the grid rect from the **individual tile cells** — +*never* from the `` element itself, because that element's +bounding rect includes extra layout space (the floating toolbar sometimes +sits absolutely positioned inside it, and certain challenge variants +pad the table beyond the visible tile area). + +```js +browser-harness-js <<'EOF' +const fs = await import('fs') + +// 1. Resolve the bframe's outer rect *in the parent page* first, before +// we route Runtime calls into the OOPIF. +const parentTargetId = session.targetId +const outerR = await session.Runtime.evaluate({ + expression: `JSON.stringify((() => { + const el = Array.from(document.querySelectorAll('iframe')) + .find(e => e.src.includes('/recaptcha/api2/bframe')); + if (!el) return null; + const r = el.getBoundingClientRect(); + return { x: r.x, y: r.y, w: r.width, h: r.height }; + })())`, + returnByValue: true, +}) +const outer = JSON.parse(outerR.result.value) +if (!outer) throw new Error('bframe