diff --git a/src/agents/planner-executor/category-pruner.ts b/src/agents/planner-executor/category-pruner.ts index 3232518..e55c856 100644 --- a/src/agents/planner-executor/category-pruner.ts +++ b/src/agents/planner-executor/category-pruner.ts @@ -284,6 +284,21 @@ export function detectPruningCategory( return PruningTaskCategory.FORM_FILLING; } + // Form-fill keyword detection takes priority over extraction + // because "Display name" or "email" in a form task are field labels, not extraction. + if ( + normalizedGoal.includes('form') || + normalizedGoal.includes('fill') || + normalizedGoal.includes('fill out') || + normalizedGoal.includes('submit') || + normalizedGoal.includes('onboarding') || + normalizedGoal.includes('sign up') || + normalizedGoal.includes('signup') || + normalizedGoal.includes('register') + ) { + return PruningTaskCategory.FORM_FILLING; + } + // Extraction keyword detection takes priority over TRANSACTION/SHOPPING // because "find the title of X" or "extract Y" on an e-commerce site is // still an extraction task, not a shopping task. diff --git a/src/agents/planner-executor/extraction-keywords.ts b/src/agents/planner-executor/extraction-keywords.ts index 0092c30..98e8ded 100644 --- a/src/agents/planner-executor/extraction-keywords.ts +++ b/src/agents/planner-executor/extraction-keywords.ts @@ -177,6 +177,23 @@ export const TEXT_EXTRACTION_KEYWORDS: readonly string[] = [ * @param task - The task or step description to analyse * @returns true if this looks like a text extraction task */ +const FORM_FILL_SIGNALS: readonly string[] = [ + 'form', + 'fill', + 'submit', + 'onboarding', + 'sign up', + 'signup', + 'register', + 'checkbox', + 'dropdown', + 'radio button', + 'next button', + 'click the', + 'type ', + 'enter ', +]; + export function isTextExtractionTask(task: string): boolean { if (!task) { return false; @@ -184,6 +201,13 @@ export function isTextExtractionTask(task: string): boolean { const taskLower = task.toLowerCase(); + // Form-fill negative signal: if the task clearly involves filling a form, + // it's not extraction even if it contains extraction-like keywords + // (e.g., "Display name", "email" are field labels, not extraction targets) + if (FORM_FILL_SIGNALS.some(signal => taskLower.includes(signal))) { + return false; + } + // Tier 1: Strong extraction phrases (multi-word substring match) for (const phrase of EXTRACTION_PHRASES) { if (taskLower.includes(phrase)) { diff --git a/src/agents/planner-executor/plan-models.ts b/src/agents/planner-executor/plan-models.ts index 1b50406..d8c0388 100644 --- a/src/agents/planner-executor/plan-models.ts +++ b/src/agents/planner-executor/plan-models.ts @@ -175,6 +175,8 @@ export interface ActionRecord { action: string; /** Element description or URL */ target: string | null; + /** Planner intent for this action, if provided */ + intent?: string | null; /** Outcome (success, failed) */ result: string; /** URL after action completed */ @@ -213,6 +215,7 @@ export interface StepOutcome { urlBefore?: string; urlAfter?: string; extractedData?: unknown; + pageContentPreview?: string; } // --------------------------------------------------------------------------- diff --git a/src/agents/planner-executor/plan-utils.ts b/src/agents/planner-executor/plan-utils.ts index cefde8d..1cbe390 100644 --- a/src/agents/planner-executor/plan-utils.ts +++ b/src/agents/planner-executor/plan-utils.ts @@ -182,6 +182,17 @@ function stripThinkingTags(content: string): string { function repairJson(text: string): string { let repaired = text; + // Fix backslash-escaped quotes that make JSON unparseable. + // Some LLMs output: {"action":"CLICK",...,\"reasoning\":\"text\"} + // where the outer quotes are real but inner key/value quotes are escaped. + // Heuristic: if the text has unescaped quotes AND backslash-escaped quotes + // mixed together, unescape the backslash-escaped ones. + const hasUnescapedQuotes = /[^\\]"/.test(repaired); + const hasEscapedQuotes = /\\"/.test(repaired); + if (hasUnescapedQuotes && hasEscapedQuotes) { + repaired = repaired.replace(/\\"/g, '"'); + } + // Add double quotes around unquoted object keys // Matches: word-characters followed by colon (not already inside a string) // Pattern: start of object `{` or comma `,`, optional whitespace, then unquoted key, then `:` @@ -189,7 +200,7 @@ function repairJson(text: string): string { // Replace single-quoted strings with double-quoted strings // This is a simple heuristic — it won't handle escaped single quotes inside strings, - // but it handles the common case of LLMs outputting `'text'` instead of `"text"` + // but it handles the common case of LLMs outputting 'text' instead of "text" repaired = repaired.replace(/'([^']*)'/g, '"$1"'); // Remove trailing commas before } or ] @@ -338,6 +349,8 @@ const ACTION_ALIASES: Record = { CLICK_ELEMENT: 'CLICK', CLICK_BUTTON: 'CLICK', CLICK_LINK: 'CLICK', + CLICK_XY: 'CLICK', + TYPE_AT: 'TYPE', INPUT: 'TYPE_AND_SUBMIT', TYPE_TEXT: 'TYPE_AND_SUBMIT', ENTER_TEXT: 'TYPE_AND_SUBMIT', @@ -347,6 +360,8 @@ const ACTION_ALIASES: Record = { OPEN: 'NAVIGATE', SCROLL_DOWN: 'SCROLL', SCROLL_UP: 'SCROLL', + SCROLL_TO: 'SCROLL', + SCROLL_INTO_VIEW: 'SCROLL', }; /** @@ -517,6 +532,20 @@ function normalizeStep(step: Record): Record { if ('target' in normalizedStep && normalizedStep.target === null) { delete normalizedStep.target; } + if ('input' in normalizedStep && typeof normalizedStep.input !== 'string') { + const inputValue = normalizedStep.input; + if (inputValue === null || inputValue === undefined) { + delete normalizedStep.input; + } else if ( + typeof inputValue === 'number' || + typeof inputValue === 'boolean' || + typeof inputValue === 'bigint' + ) { + normalizedStep.input = String(inputValue); + } else { + normalizedStep.input = JSON.stringify(inputValue); + } + } if ('id' in normalizedStep && typeof normalizedStep.id === 'string') { const parsed = parseInt(normalizedStep.id, 10); diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts index e365908..8e71424 100644 --- a/src/agents/planner-executor/planner-executor-agent.ts +++ b/src/agents/planner-executor/planner-executor-agent.ts @@ -64,7 +64,6 @@ import { } from './boundary-detection'; import { ComposableHeuristics } from './composable-heuristics'; import { normalizeTaskCategory, TaskCategory } from './task-category'; -import { getCommonHint } from './common-hints'; import type { ResolvedAgentProfile } from './profile-types'; import { HeuristicHint } from './heuristic-hint'; import { @@ -1158,6 +1157,7 @@ export class PlannerExecutorAgent { stepNum, action: plannerAction.action, target: this.summarizePlannerActionTarget(plannerAction), + intent: plannerAction.intent || null, result: 'failed', urlAfter, }); @@ -1210,7 +1210,8 @@ export class PlannerExecutorAgent { stepNum, action: plannerAction.action, target: this.summarizePlannerActionTarget(plannerAction), - result: finalOutcome.status === StepStatus.SUCCESS ? 'success' : 'failed', + intent: plannerAction.intent || null, + result: this.actionHistoryResult(finalOutcome.status), urlAfter, extractedData: extractedText || undefined, }); @@ -1229,6 +1230,14 @@ export class PlannerExecutorAgent { success = true; } + if ( + !success && + finalOutcome.status === StepStatus.SUCCESS && + this.isFinalFormSubmissionAction(task, plannerAction) + ) { + success = true; + } + // Auto-complete extraction tasks: if the action was a successful EXTRACT // with actual data, and the overall task is an extraction task, mark as done. // This prevents infinite EXTRACT loops on extraction-focused tasks. @@ -1292,6 +1301,12 @@ export class PlannerExecutorAgent { break; } + const repeatedCompletedFormSkip = this.detectRepeatedCompletedFormSkip(stepOutcomes); + if (repeatedCompletedFormSkip) { + error = repeatedCompletedFormSkip; + break; + } + // Check for URL/action cycles (e.g., click → external site → navigate back → repeat) const cycleDetected = this.detectActionCycle(this.actionHistory); if (cycleDetected) { @@ -1455,12 +1470,27 @@ export class PlannerExecutorAgent { const currentUrl = ctx.snapshot?.url || ''; const stepGoal = plannerAction.intent || plannerAction.action; + if (this.isPreviouslyCompletedFormDataEntry(plannerAction, currentUrl)) { + return { + stepId: stepNum, + goal: stepGoal, + status: StepStatus.SKIPPED, + actionTaken: 'SKIPPED(previously_completed_form_step)', + verificationPassed: true, + usedVision: false, + durationMs: Date.now() - stepStart, + urlBefore: currentUrl, + urlAfter: currentUrl, + }; + } + if ( this.config.preStepVerification && (plannerAction.verify?.length || 0) > 0 && plannerAction.action !== 'TYPE_AND_SUBMIT' && plannerAction.action !== 'TYPE' && - plannerAction.action !== 'CLICK' + !this.isFormDataEntryPlannerAction(plannerAction) && + !this.isForwardNavigationPlannerAction(plannerAction) ) { const alreadySatisfied = await this.checkPreStepVerification(runtime, plannerAction); if (alreadySatisfied) { @@ -1669,6 +1699,7 @@ export class PlannerExecutorAgent { usedVision: false, durationMs: Date.now() - stepStart, error: `Could not find requested data: ${extractQuery}`, + pageContentPreview: pageContent.slice(0, 500), }; } } else { @@ -1916,22 +1947,62 @@ export class PlannerExecutorAgent { } } await new Promise(r => setTimeout(r, 500)); - const urlAfter = await runtime.getCurrentUrl(); - const verificationPassed = await this.verifyStepOutcome(runtime, plannerAction); + let urlAfter = await runtime.getCurrentUrl(); + let verificationPassed = await this.verifyStepOutcome(runtime, plannerAction); + let finalActionTaken = + isVisionTypeAction && plannerAction.input + ? `CLICK_XY(${vx}, ${vy}) + TYPE_AT("${plannerAction.input}")` + : `CLICK_XY(${vx}, ${vy})`; + let postNextPreview: string | undefined; + if ( + !verificationPassed && + plannerAction.action === 'TYPE' && + (plannerAction.verify?.length || 0) > 0 + ) { + const nextButtonId = this.findSubmitButton(activeCtx.snapshot?.elements || [], 0, false); + if (nextButtonId !== null) { + try { + await runtime.click(nextButtonId); + finalActionTaken = `${finalActionTaken} -> CLICK(${nextButtonId})`; + verificationPassed = await this.verifyStepOutcome(runtime, plannerAction); + urlAfter = await runtime.getCurrentUrl(); + if (!verificationPassed) { + const postNextSnapshot = await runtime.snapshot({ + limit: this.config.snapshot.limitBase, + screenshot: false, + goal: plannerAction.intent || plannerAction.action, + }); + if (postNextSnapshot) { + verificationPassed = this.inferSameUrlWizardProgressAfterNext( + plannerAction, + activeCtx.snapshot, + postNextSnapshot + ); + postNextPreview = formatContext( + postNextSnapshot.elements || [], + this.config.snapshot.limitBase + ).slice(0, 500); + } + } + } catch (e) { + if (this.config.verbose) { + console.log(`[VISION-TYPE-FORM-NEXT] Next click failed: ${e}`); + } + } + } + } return { stepId: stepNum, goal: stepGoal, status: verificationPassed ? StepStatus.SUCCESS : StepStatus.FAILED, - actionTaken: - isVisionTypeAction && plannerAction.input - ? `CLICK_XY(${vx}, ${vy}) + TYPE_AT("${plannerAction.input}")` - : `CLICK_XY(${vx}, ${vy})`, + actionTaken: finalActionTaken, llmResponseText: executorResp?.content, verificationPassed, usedVision: true, durationMs: Date.now() - stepStart, urlBefore: currentUrl, urlAfter, + pageContentPreview: verificationPassed ? undefined : postNextPreview, }; } @@ -2057,6 +2128,37 @@ export class PlannerExecutorAgent { (!requiresNavigation && !hasVerificationPredicates) || modalHandled; + if (!verificationPassed) { + const postClickSnapshot = await runtime.snapshot({ + limit: this.config.snapshot.limitBase, + screenshot: false, + goal: plannerAction.intent || plannerAction.action, + }); + if (postClickSnapshot) { + verificationPassed = this.inferSameUrlWizardProgressAfterNavigationClick( + plannerAction, + targetElement, + activeCtx.snapshot, + postClickSnapshot + ); + if (!verificationPassed) { + verificationPassed = this.inferTerminalFormSubmissionAfterClick( + plannerAction, + targetElement, + activeCtx.snapshot, + postClickSnapshot + ); + } + if (!verificationPassed) { + verificationPassed = this.inferFormChoiceSelectionAfterClick( + plannerAction, + targetElement, + postClickSnapshot + ); + } + } + } + if ( verificationPassed && (this.currentTaskCategory === TaskCategory.EXTRACTION || isTextExtractionTask(task)) @@ -2078,7 +2180,16 @@ export class PlannerExecutorAgent { let finalActionTaken = `CLICK(${elementId})`; let finalUrlAfter = urlAfter; - if (!verificationPassed && requiresNavigation && activeCtx.snapshot?.elements) { + const clickedElementWasNavigationLink = + targetElement && + ((targetElement.role || '').toLowerCase() === 'link' || + Boolean(targetElement.href && !targetElement.href.startsWith('#'))); + if ( + !verificationPassed && + requiresNavigation && + clickedElementWasNavigationLink && + activeCtx.snapshot?.elements + ) { const fallbackElementId = this.findFallbackNavigationClickTarget( activeCtx.snapshot.elements, elementId, @@ -2130,7 +2241,9 @@ export class PlannerExecutorAgent { const elements = activeCtx.snapshot?.elements || []; const inputElement = elements.find(element => element.id === elementId) || null; const isSearchLike = isSearchLikeTypeAndSubmit(plannerAction, inputElement); + const hasVerificationPredicates = (plannerAction.verify?.length || 0) > 0; let submissionSatisfied = false; + let finalActionTaken = `TYPE(${elementId}, "${text}")`; // Submit with Enter key for TYPE_AND_SUBMIT, plus planner TYPE actions that clearly target search. if ( @@ -2228,15 +2341,52 @@ export class PlannerExecutorAgent { } } - const verificationPassed = + let verificationPassed = submissionSatisfied || (await this.verifyStepOutcome(runtime, plannerAction)); const urlAfter = await runtime.getCurrentUrl(); + if ( + !verificationPassed && + plannerAction.action === 'TYPE' && + !isSearchLike && + hasVerificationPredicates + ) { + const nextButtonId = this.findSubmitButton(elements, elementId, false); + if (nextButtonId !== null) { + try { + await runtime.click(nextButtonId); + finalActionTaken = `${finalActionTaken} -> CLICK(${nextButtonId})`; + let advanced = await this.verifyStepOutcome(runtime, plannerAction); + if (!advanced) { + const postNextSnapshot = await runtime.snapshot({ + limit: this.config.snapshot.limitBase, + screenshot: false, + goal: plannerAction.intent || plannerAction.action, + }); + if (postNextSnapshot) { + advanced = this.inferSameUrlWizardProgressAfterNext( + plannerAction, + activeCtx.snapshot, + postNextSnapshot + ); + } + } + if (advanced) { + verificationPassed = true; + } + } catch (e) { + if (this.config.verbose) { + console.log(`[TYPE-FORM-NEXT] Next click failed: ${e}`); + } + } + } + } + return { stepId: stepNum, goal: plannerAction.intent || 'Type text', status: verificationPassed ? StepStatus.SUCCESS : StepStatus.FAILED, - actionTaken: `TYPE(${elementId}, "${text}")`, + actionTaken: finalActionTaken, llmResponseText: executorResp?.content, verificationPassed, usedVision: shouldUseVision, @@ -2635,11 +2785,7 @@ export class PlannerExecutorAgent { return { action: 'TYPE', args: [elementId, plannerAction.input] }; } - if ( - plannerAction.action === 'TYPE' && - plannerAction.input && - isSearchLikeTypeAndSubmit(plannerAction, matchedElement) - ) { + if (plannerAction.action === 'TYPE' && plannerAction.input) { return { action: 'TYPE', args: [elementId, plannerAction.input] }; } @@ -2652,12 +2798,15 @@ export class PlannerExecutorAgent { task: string, forceVision: boolean = false ): Promise<{ parsed: ParsedAction; shouldUseVision: boolean; executorResp: LLMResponse | null }> { - const hasExplicitStepHints = (plannerAction.heuristicHints?.length || 0) > 0; - const hasCommonHint = Boolean(plannerAction.intent && getCommonHint(plannerAction.intent)); - const allowHeuristicsDespiteVision = + const lowElementHtmlSnapshot = ctx.requiresVision && - ctx.visionReason === 'too_few_elements' && - (hasExplicitStepHints || hasCommonHint); + (ctx.visionReason === 'below_threshold' || ctx.visionReason === 'too_few_elements'); + const typingAction = + plannerAction.action === 'TYPE' || plannerAction.action === 'TYPE_AND_SUBMIT'; + const hasTextElements = (ctx.snapshot?.elements?.length || 0) > 0; + const allowHeuristicsDespiteVision = + (typingAction && (lowElementHtmlSnapshot || hasTextElements)) || + Boolean(plannerAction.heuristicHints?.length); const heuristicAction = (!ctx.requiresVision || allowHeuristicsDespiteVision) && plannerAction.intent ? this.resolveHeuristicAction(plannerAction, ctx, task) @@ -2918,6 +3067,113 @@ export class PlannerExecutorAgent { return plannerAction.intent || plannerAction.input || plannerAction.target || null; } + private isPreviouslyCompletedFormDataEntry( + plannerAction: StepwisePlannerResponse, + currentUrl: string + ): boolean { + if (!plannerAction.intent) { + return false; + } + + const action = plannerAction.action; + if (!['TYPE', 'TYPE_AND_SUBMIT', 'CLICK'].includes(action)) { + return false; + } + + const intentKey = this.normalizeCompletedFormIntent(plannerAction.intent); + if (!intentKey || !this.isFormDataEntryIntent(intentKey)) { + return false; + } + + return this.actionHistory.some(record => { + if (!['success', 'skipped', 'vision_fallback'].includes(record.result)) { + return false; + } + const recordIntent = record.intent ? this.normalizeCompletedFormIntent(record.intent) : ''; + if (!this.isFormDataEntryIntent(recordIntent)) { + return false; + } + if (!this.completedFormIntentsMatch(recordIntent, intentKey)) { + return false; + } + if (record.urlAfter && currentUrl) { + return ( + this.normalizeNavigationUrl(record.urlAfter) === this.normalizeNavigationUrl(currentUrl) + ); + } + return true; + }); + } + + private isFormDataEntryPlannerAction(plannerAction: StepwisePlannerResponse): boolean { + const intentKey = plannerAction.intent + ? this.normalizeCompletedFormIntent(plannerAction.intent) + : ''; + return Boolean(intentKey && this.isFormDataEntryIntent(intentKey)); + } + + private isForwardNavigationPlannerAction(plannerAction: StepwisePlannerResponse): boolean { + if (plannerAction.action !== 'CLICK') { + return false; + } + + const actionText = `${plannerAction.intent || ''} ${plannerAction.input || ''} ${ + plannerAction.target || '' + }`; + return /\b(next|continue)\b/i.test(actionText); + } + + private normalizeCompletedFormIntent(intent: string): string { + return intent + .toLowerCase() + .replace( + /\b(field|input|textbox|text box|radio button|checkbox|dropdown|select|button)\b/g, + '' + ) + .replace(/\s+/g, ' ') + .trim(); + } + + private completedFormIntentsMatch(previousIntent: string, nextIntent: string): boolean { + if (!previousIntent || !nextIntent) { + return false; + } + if (previousIntent === nextIntent) { + return true; + } + + const previousCategories = this.completedFormIntentCategories(previousIntent); + const nextCategories = this.completedFormIntentCategories(nextIntent); + return [...previousCategories].some(category => nextCategories.has(category)); + } + + private completedFormIntentCategories(intent: string): Set { + const categories = new Set(); + if (/\bemail\b/.test(intent)) { + categories.add('email'); + } + if (/\bdisplay\b/.test(intent) || /\bname\b/.test(intent)) { + categories.add('display_name'); + } + if (/\bplan\b/.test(intent) || /\bpro\b/.test(intent)) { + categories.add('plan'); + } + if (/\bterms?\b/.test(intent) || /\bagree\b/.test(intent) || /\bconsent\b/.test(intent)) { + categories.add('terms'); + } + return categories; + } + + private isFormDataEntryIntent(intent: string): boolean { + if (/\b(next|continue|submit|done|finish|back|previous)\b/.test(intent)) { + return false; + } + + return /\b(email|display|name|plan|pro|terms?|agree|consent|checkbox|radio|dropdown)\b/.test( + intent + ); + } + private classifyStepFailure( plannerAction: StepwisePlannerResponse, outcome: StepOutcome, @@ -3079,7 +3335,8 @@ export class PlannerExecutorAgent { stepNum: substepId, action: substep.action, target: this.summarizePlannerActionTarget(substep), - result: substepOutcome.status === StepStatus.SUCCESS ? 'success' : 'failed', + intent: substep.intent || null, + result: this.actionHistoryResult(substepOutcome.status), urlAfter: substepOutcome.urlAfter || (await runtime.getCurrentUrl()), }); } @@ -3087,6 +3344,17 @@ export class PlannerExecutorAgent { return substepOutcomes; } + private actionHistoryResult(status: StepStatus): string { + if ( + status === StepStatus.SUCCESS || + status === StepStatus.SKIPPED || + status === StepStatus.VISION_FALLBACK + ) { + return status; + } + return 'failed'; + } + private async checkPreStepVerification( runtime: AgentRuntime, plannerAction: StepwisePlannerResponse @@ -3209,9 +3477,20 @@ export class PlannerExecutorAgent { return null; } - const hosts = nonExtractRecent + const urlsAfter = nonExtractRecent .filter(entry => entry.urlAfter) - .map(entry => this.urlHostKey(entry.urlAfter!)); + .map(entry => entry.urlAfter!); + + // Same-page interaction is not a navigation cycle. Multi-step forms, + // SPAs, and paginated lists all stay on the same URL while the agent + // makes forward progress. Only flag cycles involving actual navigation + // (different URLs visited repeatedly). + const distinctUrls = new Set(urlsAfter); + if (distinctUrls.size <= 1) { + return null; + } + + const hosts = urlsAfter.map(url => this.urlHostKey(url)); if (hosts.length < 4) { return null; @@ -3253,6 +3532,46 @@ export class PlannerExecutorAgent { return null; } + private detectRepeatedCompletedFormSkip(stepOutcomes: StepOutcome[]): string | null { + if (stepOutcomes.length < 3) { + return null; + } + + const recent = stepOutcomes.slice(-3); + if ( + !recent.every( + outcome => + outcome.status === StepStatus.SKIPPED && + outcome.actionTaken === 'SKIPPED(previously_completed_form_step)' + ) + ) { + return null; + } + + const intentKeys = recent.map(outcome => this.normalizeCompletedFormIntent(outcome.goal || '')); + if (intentKeys.some(intent => !intent || !this.isFormDataEntryIntent(intent))) { + return null; + } + + const firstIntent = intentKeys[0]; + const repeatedIntent = intentKeys.every(intent => + this.completedFormIntentsMatch(firstIntent, intent) + ); + if (!repeatedIntent) { + return null; + } + + const urls = recent.map(outcome => outcome.urlAfter || outcome.urlBefore || '').filter(Boolean); + const sameUrl = + urls.length === recent.length && + new Set(urls.map(url => this.normalizeNavigationUrl(url))).size === 1; + if (!sameUrl) { + return null; + } + + return `Planner repeated already completed form step "${recent[0].goal}" without moving to the next incomplete goal`; + } + private urlHostKey(url: string): string { try { const parsed = new URL(url); @@ -3417,13 +3736,35 @@ export class PlannerExecutorAgent { this.getTraceStepId() ); try { - await runtime.goto(checkpoint.url); + const currentUrl = await runtime.getCurrentUrl(); + const alreadyAtCheckpointUrl = + this.normalizeNavigationUrl(currentUrl) === this.normalizeNavigationUrl(checkpoint.url); + if (!alreadyAtCheckpointUrl) { + await runtime.goto(checkpoint.url); + } const verificationSnapshot = await runtime.snapshot({ limit: this.config.snapshot.limitBase, screenshot: false, goal: 'recovery verification', }); const recovered = verifyRecoveryCheckpoint(checkpoint, verificationSnapshot); + if (!recovered && alreadyAtCheckpointUrl) { + this.tracer?.emit( + 'recovery', + { + step_index: this.currentStepIndex, + goal: 'recovery', + success: false, + details: { + phase: 'skipped_same_url_navigation', + checkpoint_url: checkpointUrl, + }, + }, + this.getTraceStepId() + ); + this.recoveryState.clearRecoveryTarget(); + return false; + } this.tracer?.emit( 'recovery', { @@ -3625,7 +3966,7 @@ export class PlannerExecutorAgent { // Submit-related patterns const submitPatterns = searchLike ? ['search', 'go', 'find', 'submit', 'apply', 'done', 'ok', 'send', 'enter'] - : ['submit', 'continue', 'save', 'send', 'sign in', 'log in', 'apply', 'ok', 'done']; + : ['next', 'continue', 'submit', 'save', 'send', 'sign in', 'log in', 'apply', 'ok', 'done']; // Icon patterns (exact match) const iconPatterns = ['>', '→', '🔍', '⌕']; @@ -3674,4 +4015,254 @@ export class PlannerExecutorAgent { candidates.sort((a, b) => b.score - a.score); return candidates[0].id; } + + private inferSameUrlWizardProgressAfterNext( + plannerAction: StepwisePlannerResponse, + beforeSnapshot: Snapshot | null, + afterSnapshot: Snapshot + ): boolean { + if (plannerAction.action !== 'TYPE' || !plannerAction.input) { + return false; + } + + const beforeElements = beforeSnapshot?.elements || []; + const afterElements = afterSnapshot.elements || []; + if (afterElements.length === 0) { + return false; + } + + const beforeSignature = this.formPaneSignature(beforeElements); + const afterSignature = this.formPaneSignature(afterElements); + if (!afterSignature || beforeSignature === afterSignature) { + return false; + } + + if (this.hasFormValidationError(afterElements)) { + return false; + } + + const hasBack = afterElements.some(element => this.elementText(element) === 'back'); + const hasForward = afterElements.some(element => + /^(next|continue|submit|done|finish)$/i.test(this.elementText(element)) + ); + if (!hasBack || !hasForward) { + return false; + } + + const typedValue = plannerAction.input.trim().toLowerCase(); + const textboxes = afterElements.filter(element => + ['textbox', 'searchbox', 'combobox'].includes((element.role || '').toLowerCase()) + ); + return ( + textboxes.length === 0 || + textboxes.some(element => { + const text = this.elementText(element); + return text.length > 0 && !text.includes(typedValue); + }) + ); + } + + private inferSameUrlWizardProgressAfterNavigationClick( + plannerAction: StepwisePlannerResponse, + clickedElement: SnapshotElement | null, + beforeSnapshot: Snapshot | null, + afterSnapshot: Snapshot + ): boolean { + if (plannerAction.action !== 'CLICK') { + return false; + } + + const clickText = clickedElement ? this.elementText(clickedElement) : ''; + const plannerText = `${plannerAction.intent || ''} ${plannerAction.goal || ''}`.toLowerCase(); + const plannerRequestedForwardNavigation = /\b(next|continue)\b/.test(plannerText); + const clickedForwardNavigation = this.isForwardNavigationControlText(clickText); + if (!plannerRequestedForwardNavigation || !clickedForwardNavigation) { + return false; + } + + const beforeSignature = this.formPaneSignature(beforeSnapshot?.elements || []); + const afterSignature = this.formPaneSignature(afterSnapshot.elements || []); + if (!afterSignature || beforeSignature === afterSignature) { + return false; + } + + if (this.hasFormValidationError(afterSnapshot.elements || [])) { + return false; + } + + const hasBack = (afterSnapshot.elements || []).some( + element => this.elementText(element) === 'back' + ); + const hasForwardOrSubmit = (afterSnapshot.elements || []).some(element => + /^(next|continue|submit|done|finish)$/i.test(this.elementText(element)) + ); + + return hasBack && hasForwardOrSubmit; + } + + private inferTerminalFormSubmissionAfterClick( + plannerAction: StepwisePlannerResponse, + clickedElement: SnapshotElement | null, + beforeSnapshot: Snapshot | null, + afterSnapshot: Snapshot + ): boolean { + if (plannerAction.action !== 'CLICK') { + return false; + } + + const clickText = clickedElement ? this.elementText(clickedElement) : ''; + const plannerText = `${plannerAction.intent || ''} ${plannerAction.goal || ''}`.toLowerCase(); + const plannerRequestedSubmission = this.isTerminalSubmissionText(plannerText); + const clickedSubmissionControl = this.isTerminalSubmissionControlText(clickText); + if (!plannerRequestedSubmission || !clickedSubmissionControl) { + return false; + } + + const afterElements = afterSnapshot.elements || []; + if (afterElements.length === 0 || this.hasFormValidationError(afterElements)) { + return false; + } + + const beforePageText = (beforeSnapshot?.elements || []) + .map(element => this.elementText(element)) + .join(' '); + const pageText = afterElements.map(element => this.elementText(element)).join(' '); + const confirmationPattern = + /\b(thanks?|thank you|submitted|complete|completed|success|received|confirmation)\b/i; + if (confirmationPattern.test(pageText) && !confirmationPattern.test(beforePageText)) { + return true; + } + + const beforeSignature = this.formPaneSignature(beforeSnapshot?.elements || []); + const afterSignature = this.formPaneSignature(afterElements); + if (beforeSignature && beforeSignature === afterSignature) { + return false; + } + + if (confirmationPattern.test(pageText)) { + return true; + } + + const hasForwardOrTerminalControl = afterElements.some(element => + /^(next|continue|submit)$/i.test(this.elementText(element)) + ); + return beforeSignature.length > 0 && !hasForwardOrTerminalControl; + } + + private isFinalFormSubmissionAction( + task: string, + plannerAction: StepwisePlannerResponse + ): boolean { + if (plannerAction.action !== 'CLICK') { + return false; + } + + const actionText = `${plannerAction.intent || ''} ${plannerAction.goal || ''} ${ + plannerAction.input || '' + } ${plannerAction.target || ''}`; + if (!this.isTerminalSubmissionText(actionText)) { + return false; + } + + return /\b(lastly|finally|final step|submit\s+(?:the\s+)?(?:multi-step\s+)?form|complete\s+(?:the\s+)?(?:form|onboarding|registration)|confirm registration|place order)\b/i.test( + task + ); + } + + private isForwardNavigationControlText(text: string): boolean { + return /^(next|continue)$/i.test(text.trim()); + } + + private isTerminalSubmissionText(text: string): boolean { + return /\b(submit|send|confirm|complete|completed|done|finish|register|sign\s*up|signup|create account|place order)\b/i.test( + text + ); + } + + private isTerminalSubmissionControlText(text: string): boolean { + const normalized = text.trim(); + return ( + this.isTerminalSubmissionText(normalized) && !this.isForwardNavigationControlText(normalized) + ); + } + + private inferFormChoiceSelectionAfterClick( + plannerAction: StepwisePlannerResponse, + clickedElement: SnapshotElement | null, + afterSnapshot: Snapshot + ): boolean { + if (plannerAction.action !== 'CLICK' || !clickedElement) { + return false; + } + + const role = (clickedElement.role || '').toLowerCase(); + if (role !== 'radio' && role !== 'checkbox') { + return false; + } + + const afterElements = afterSnapshot.elements || []; + if (afterElements.length === 0 || this.hasFormValidationError(afterElements)) { + return false; + } + + const clickedText = this.elementText(clickedElement); + const plannerText = `${plannerAction.intent || ''} ${plannerAction.goal || ''}`.toLowerCase(); + return this.selectionControlMatchesIntent(clickedText, plannerText); + } + + private selectionControlMatchesIntent(controlText: string, intentText: string): boolean { + const controlWords = new Set( + controlText + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter(word => word.length >= 3) + ); + if (controlWords.size === 0) { + return false; + } + + const ignoredIntentWords = new Set([ + 'button', + 'checkbox', + 'radio', + 'field', + 'input', + 'select', + 'choose', + 'click', + 'option', + ]); + const intentWords = intentText + .split(/[^a-z0-9]+/) + .filter(word => word.length >= 3 && !ignoredIntentWords.has(word)); + + return intentWords.some(word => controlWords.has(word)); + } + + private formPaneSignature(elements: SnapshotElement[]): string { + return elements + .filter(element => { + const role = (element.role || '').toLowerCase(); + return ['textbox', 'searchbox', 'combobox', 'checkbox', 'radio', 'button'].includes(role); + }) + .map(element => `${(element.role || '').toLowerCase()}:${this.elementText(element)}`) + .filter(Boolean) + .join('|'); + } + + private hasFormValidationError(elements: SnapshotElement[]): boolean { + return elements.some(element => + /\b(required|invalid|error|please enter|must enter|try again|failed)\b/i.test( + this.elementText(element) + ) + ); + } + + private elementText(element: SnapshotElement): string { + return [element.text, element.name, element.ariaLabel, element.nearbyText] + .filter((part): part is string => typeof part === 'string' && part.trim().length > 0) + .join(' ') + .trim() + .toLowerCase(); + } } diff --git a/src/agents/planner-executor/predicates.ts b/src/agents/planner-executor/predicates.ts index b45a306..3a776b3 100644 --- a/src/agents/planner-executor/predicates.ts +++ b/src/agents/planner-executor/predicates.ts @@ -10,6 +10,7 @@ * - url_matches: Check if URL matches a regex pattern * - exists: Check if element with text/selector exists * - not_exists: Check if element does not exist + * - element_exists: Check if element with role and optional text/label exists * - element_count: Check element count within range * - any_of: Any of the sub-predicates passes * - all_of: All sub-predicates pass @@ -251,6 +252,28 @@ export function buildPredicate(spec: PredicateSpec): Predicate { return elementCount(selector, minCount, maxCount); } + case 'element_exists': { + const role = String(args[0] || '').toLowerCase(); + const textQuery = args[1] ? String(args[1]).toLowerCase() : ''; + return { + name: 'element_exists', + evaluate(snapshot: Snapshot): boolean { + const elements = snapshot.elements || []; + return elements.some(el => { + const elRole = (el.role || '').toLowerCase(); + if (role && elRole !== role) return false; + if (!textQuery) return true; + const elText = (el.text || '').toLowerCase(); + const elName = (el.name || '').toLowerCase(); + const elAria = (el.ariaLabel || '').toLowerCase(); + return ( + elText.includes(textQuery) || elName.includes(textQuery) || elAria.includes(textQuery) + ); + }); + }, + }; + } + case 'any_of': return anyOf(...(args as PredicateSpec[]).map(buildPredicate)); diff --git a/src/agents/planner-executor/prompts.ts b/src/agents/planner-executor/prompts.ts index fcdba5b..05c0d14 100644 --- a/src/agents/planner-executor/prompts.ts +++ b/src/agents/planner-executor/prompts.ts @@ -59,6 +59,7 @@ export function buildStepwisePlannerPrompt( Actions: - NAVIGATE: Go directly to a URL when the next destination is known. Set "target" to the URL. - CLICK: Click an element. Set "intent" to describe the SPECIFIC element (include label, placeholder, or nearby text, e.g. "email textbox", "display name field", "Next button", NOT just "textbox" or "button"). Set "input" to EXACT text from elements list. +- TYPE: Type text into a form field (not a search box). Set "input" to the VALUE from the goal. Set "intent" to describe the field (e.g., "email field", "name field"). - TYPE_AND_SUBMIT: Type text into a search box and submit. Set "input" to the SEARCH QUERY from the goal (NOT the element label). - SCROLL: Scroll page. Set "direction" to "up" or "down". - WAIT: Wait for content to appear when a follow-up verification is needed. @@ -87,11 +88,11 @@ CRITICAL RULE FOR ADD TO CART: - Set "input" to "Add to Cart" (or exact button text from elements) Output ONLY valid JSON (no markdown, no \`\`\`): -{"action":"NAVIGATE","target":"https://shop.test/search","verify":[{"predicate":"url_contains","args":["search"]}],"reasoning":"open the known search page"} -{"action":"TYPE_AND_SUBMIT","intent":"searchbox","input":"wireless headphones","verify":[{"predicate":"url_contains","args":["search"]}],"reasoning":"search for product"} -{"action":"CLICK","intent":"product link","input":"Sony WH-1000XM4 Wireless...","verify":[],"required":true,"heuristic_hints":[{"intent_pattern":"product_link","text_patterns":["sony wh-1000xm4"],"role_filter":["link"],"priority":8}],"reasoning":"click first product result"} -{"action":"CLICK","intent":"add to cart button","input":"Add to Cart","verify":[],"required":true,"heuristic_hints":[{"intent_pattern":"add_to_cart","text_patterns":["add to cart","buy now"],"role_filter":["button"],"priority":10}],"reasoning":"add item to cart"} -{"action":"DONE","intent":"completed","reasoning":"clicked add to cart - goal complete"} +{"action":"NAVIGATE","target":"https://shop.test/search","verify":[{"predicate":"url_contains","args":["search"]}]} +{"action":"TYPE_AND_SUBMIT","intent":"searchbox","input":"wireless headphones","verify":[{"predicate":"url_contains","args":["search"]}]} +{"action":"CLICK","intent":"product link","input":"Sony WH-1000XM4 Wireless...","verify":[]} +{"action":"CLICK","intent":"add to cart button","input":"Add to Cart","verify":[]} +{"action":"DONE","intent":"completed"} RULES: 1. For TYPE_AND_SUBMIT: "input" = search query from goal (what you want to search for) @@ -105,8 +106,9 @@ RULES: 9. Do NOT output or any reasoning 10. Do NOT return DONE until ALL parts of the goal are complete 11. Never copy example URLs from these instructions. Only NAVIGATE to a URL from the user's task, the current page, or a visible element. -12. For multi-step forms: process each field as a separate step, then CLICK the Next/Submit button as a separate step. -13. "intent" must be SPECIFIC: describe the element with its label or context (e.g., "email field", "plan dropdown", "Next button on step 2")`; +12. For multi-step forms: TYPE into each field (action: TYPE) BEFORE clicking Next. Never click Next without filling required fields first. +13. "intent" must be SPECIFIC: describe the element with its label or context (e.g., "email field", "plan dropdown", "Next button on step 2") +14. Treat history results "success", "skipped", and "vision_fallback" as already satisfied. Do not repeat those steps; choose the next incomplete part of the goal.`; // Inject extraction-specific guidance when the goal is an extraction task const extractionGuidance = isExtractionTask(goal) ? getExtractionDomainGuidance() : ''; diff --git a/src/agents/planner-executor/vision-fallback.ts b/src/agents/planner-executor/vision-fallback.ts index 6e616e1..738760f 100644 --- a/src/agents/planner-executor/vision-fallback.ts +++ b/src/agents/planner-executor/vision-fallback.ts @@ -94,15 +94,13 @@ export function detectSnapshotFailure(snapshot: Snapshot | null): VisionFallback } } - // If we have sufficient elements, the snapshot is usable - // regardless of what diagnostics say - const VISION_THRESHOLD = 10; - if (elementCount >= VISION_THRESHOLD) { - return { shouldUseVision: false, reason: null }; + // Too few elements to be useful — always trigger vision + if (elementCount < 3) { + return { shouldUseVision: true, reason: 'too_few_elements' }; } - // Below threshold — trigger vision fallback - return { shouldUseVision: true, reason: 'below_threshold' }; + // 3+ elements: snapshot is usable unless explicit conditions triggered above + return { shouldUseVision: false, reason: null }; } /** diff --git a/tests/agents/planner-executor/agent-parity.test.ts b/tests/agents/planner-executor/agent-parity.test.ts index 069289c..9395a6d 100644 --- a/tests/agents/planner-executor/agent-parity.test.ts +++ b/tests/agents/planner-executor/agent-parity.test.ts @@ -659,6 +659,7 @@ describe('PlannerExecutorAgent parity', () => { return; } if (id === 2) { + runtime.currentUrl = 'https://shop.test/error'; throw new Error('click failed'); } }, diff --git a/tests/agents/planner-executor/composable-heuristics-integration.test.ts b/tests/agents/planner-executor/composable-heuristics-integration.test.ts index 1012e2f..03a7ef8 100644 --- a/tests/agents/planner-executor/composable-heuristics-integration.test.ts +++ b/tests/agents/planner-executor/composable-heuristics-integration.test.ts @@ -233,14 +233,6 @@ describe('Composable heuristics integration', () => { action: 'CLICK', intent: 'continue', input: 'Continue', - heuristic_hints: [ - { - intent_pattern: 'continue', - text_patterns: ['continue'], - role_filter: ['button'], - priority: 10, - }, - ], }), JSON.stringify({ action: 'DONE' }), ]); diff --git a/tests/agents/planner-executor/recovery-integration.test.ts b/tests/agents/planner-executor/recovery-integration.test.ts index cf3fb49..365ccd0 100644 --- a/tests/agents/planner-executor/recovery-integration.test.ts +++ b/tests/agents/planner-executor/recovery-integration.test.ts @@ -64,6 +64,7 @@ class RuntimeStub implements AgentRuntime { } async click(): Promise { + this.currentUrl = this.currentUrl.replace(/\/(checkout|cart)/, '/error'); throw new Error('executor click failed'); } diff --git a/tests/agents/planner-executor/search-submit.test.ts b/tests/agents/planner-executor/search-submit.test.ts index 0f71a2c..e23fceb 100644 --- a/tests/agents/planner-executor/search-submit.test.ts +++ b/tests/agents/planner-executor/search-submit.test.ts @@ -9,15 +9,20 @@ import { isSearchLikeTypeAndSubmit, isUrlChangeRelevantToIntent, } from '../../../src/agents/planner-executor/boundary-detection'; +import { normalizeReplanPatch } from '../../../src/agents/planner-executor/plan-utils'; import type { SnapshotElement } from '../../../src/agents/planner-executor/plan-models'; class ProviderStub extends LLMProvider { private responses: string[]; public calls: Array<{ system?: string; user?: string; options?: any }> = []; + public imageCalls: Array<{ system?: string; user?: string; imageBase64: string; options?: any }> = + []; + private readonly vision: boolean; - constructor(responses: string[] = []) { + constructor(responses: string[] = [], options: { vision?: boolean } = {}) { super(); this.responses = [...responses]; + this.vision = options.vision ?? false; } get modelName(): string { @@ -28,6 +33,10 @@ class ProviderStub extends LLMProvider { return true; } + supportsVision(): boolean { + return this.vision; + } + async generate( system?: string, user?: string, @@ -45,12 +54,76 @@ class ProviderStub extends LLMProvider { totalTokens: 15, }; } + + async generateWithImage( + system: string, + user: string, + imageBase64: string, + options: Record = {} + ): Promise { + this.imageCalls.push({ system, user, imageBase64, options }); + const content = this.responses.length ? this.responses.shift()! : 'NONE'; + return { + content, + modelName: this.modelName, + promptTokens: 10, + completionTokens: 5, + totalTokens: 15, + }; + } +} + +class AdaptiveProviderStub extends LLMProvider { + public calls: Array<{ system?: string; user?: string; options?: any }> = []; + + get modelName(): string { + return 'adaptive-stub'; + } + + supportsJsonMode(): boolean { + return true; + } + + async generate( + system?: string, + user?: string, + options: Record = {} + ): Promise { + this.calls.push({ system, user, options }); + const content = + this.calls.length === 1 + ? JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Display name'] }], + }) + : user?.includes('TYPE(user@example.com) → skipped') + ? JSON.stringify({ action: 'DONE', reasoning: 'stale email skip was accepted' }) + : JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Email'] }], + }); + + return { + content, + modelName: this.modelName, + promptTokens: 10, + completionTokens: 5, + totalTokens: 15, + }; + } } class RuntimeStub implements AgentRuntime { public currentUrl: string; + public gotoCalls: string[] = []; public clickCalls: number[] = []; + public coordinateClickCalls: Array<{ x: number; y: number }> = []; public typeCalls: Array<{ elementId: number; text: string }> = []; + public coordinateTypeCalls: string[] = []; public keyCalls: string[] = []; constructor( @@ -74,6 +147,7 @@ class RuntimeStub implements AgentRuntime { } async goto(url: string): Promise { + this.gotoCalls.push(url); this.currentUrl = url; } @@ -82,11 +156,19 @@ class RuntimeStub implements AgentRuntime { await this.handlers.onClick?.(elementId, this); } + async clickCoordinate(x: number, y: number): Promise { + this.coordinateClickCalls.push({ x, y }); + } + async type(elementId: number, text: string): Promise { this.typeCalls.push({ elementId, text }); await this.handlers.onType?.(elementId, text, this); } + async typeCoordinate(text: string): Promise { + this.coordinateTypeCalls.push(text); + } + async pressKey(key: string): Promise { this.keyCalls.push(key); await this.handlers.onPressKey?.(key, this); @@ -107,11 +189,16 @@ class RuntimeStub implements AgentRuntime { } } -function makeSnapshot(url: string, elements: Snapshot['elements']): Snapshot { +function makeSnapshot( + url: string, + elements: Snapshot['elements'], + extra: Partial = {} +): Snapshot { return { url, title: 'Test Page', elements, + ...extra, }; } @@ -335,6 +422,1279 @@ describe('PlannerExecutorAgent search submission parity', () => { expect(executor.calls).toHaveLength(0); }); + it('uses deterministic field heuristics on sparse multi-step form pages', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + }), + JSON.stringify({ action: 'DONE', reasoning: 'email field completed' }), + ]); + const executor = new ProviderStub(['NONE']); + const runtime = new RuntimeStub('https://forms.test/signup', rt => + makeSnapshot(rt.currentUrl, [ + { + id: 20, + role: 'textbox', + ariaLabel: 'Email', + text: 'Email', + clickable: true, + importance: 100, + }, + { id: 21, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ]) + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Fill the multi-step signup form with user@example.com, then continue', + }); + + expect(result.success).toBe(true); + expect(runtime.typeCalls).toEqual([{ elementId: 20, text: 'user@example.com' }]); + expect(executor.calls).toHaveLength(0); + }); + + it('types a form field and clicks Next when verification expects the next step', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Display name'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'advanced to display name step' }), + ]); + const executor = new ProviderStub(['NONE']); + let stage: 'email' | 'displayName' = 'email'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'email' + ? [ + { + id: 20, + role: 'textbox', + ariaLabel: 'Email', + text: 'Email', + clickable: true, + importance: 100, + }, + { id: 21, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + : [ + { + id: 30, + role: 'textbox', + ariaLabel: 'Display name', + text: 'Display name', + clickable: true, + importance: 100, + }, + ], + { status: 'require_vision' } + ), + { + onClick: elementId => { + if (elementId === 21) { + stage = 'displayName'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Fill the multi-step signup form with user@example.com, then continue', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0].actionTaken).toBe('TYPE(20, "user@example.com") -> CLICK(21)'); + expect(runtime.typeCalls).toEqual([{ elementId: 20, text: 'user@example.com' }]); + expect(runtime.clickCalls).toEqual([21]); + expect(executor.calls).toHaveLength(0); + }); + + it('clicks Next after vision coordinate typing when verification expects the next form step', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Display name'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'advanced to display name step' }), + ]); + const executor = new ProviderStub(['CLICK_XY(499, 337)'], { vision: true }); + let stage: 'email' | 'displayName' = 'email'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'email' + ? [{ id: 21, role: 'button', text: 'Next', clickable: true, importance: 90 }] + : [ + { + id: 30, + role: 'textbox', + ariaLabel: 'Display name', + text: 'Display name', + clickable: true, + importance: 100, + }, + ], + { status: 'require_vision', screenshot: 'ZmFrZQ==' } + ), + { + onClick: elementId => { + if (elementId === 21) { + stage = 'displayName'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Fill the multi-step signup form with user@example.com, then continue', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0].actionTaken).toBe( + 'CLICK_XY(499, 337) + TYPE_AT("user@example.com") -> CLICK(21)' + ); + expect(runtime.coordinateClickCalls).toEqual([{ x: 499, y: 337 }]); + expect(runtime.coordinateTypeCalls).toEqual(['user@example.com']); + expect(runtime.clickCalls).toEqual([21]); + expect(executor.imageCalls).toHaveLength(1); + }); + + it('treats a same-url wizard pane transition as success after vision typing and Next', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Display name'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'advanced to next wizard pane' }), + ]); + const executor = new ProviderStub(['CLICK_XY(499, 337)'], { vision: true }); + let stage: 'email' | 'displayName' = 'email'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'email' + ? [{ id: 21, role: 'button', text: 'Next', clickable: true, importance: 90 }] + : [ + { id: 30, role: 'textbox', text: 'Llama Rider', clickable: true, importance: 100 }, + { id: 20, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 21, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ], + { status: 'require_vision', screenshot: 'ZmFrZQ==' } + ), + { + onClick: elementId => { + if (elementId === 21) { + stage = 'displayName'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Fill the multi-step signup form with user@example.com, then continue', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0].actionTaken).toBe( + 'CLICK_XY(499, 337) + TYPE_AT("user@example.com") -> CLICK(21)' + ); + expect(runtime.coordinateTypeCalls).toEqual(['user@example.com']); + expect(runtime.clickCalls).toEqual([21]); + }); + + it('treats a same-url Next click as progress before final Submit', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Next button on step 1', + verify: [{ predicate: 'element_exists', args: ['heading', 'Review'] }], + }), + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'submitted onboarding form' }), + ]); + const executor = new ProviderStub(['CLICK(2)', 'CLICK(6)']); + let stage: 'terms' | 'review' | 'submitted' = 'terms'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'terms' + ? [ + { + id: 1, + role: 'checkbox', + text: 'Agree to terms', + clickable: true, + importance: 100, + }, + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + : stage === 'review' + ? [ + { id: 5, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 6, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + ] + : [{ id: 7, role: 'status', text: 'Submitted', importance: 100 }] + ), + { + onClick: elementId => { + if (elementId === 2) { + stage = 'review'; + } + if (elementId === 6) { + stage = 'submitted'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Complete onboarding, agree to the terms, and lastly submit the multi-step form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(2)', + verificationPassed: true, + }); + expect(result.stepOutcomes[1]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(6)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([2, 6]); + }); + + it('does not pre-skip a same-url Next click just because the downstream Submit is visible', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Next button on step 2', + verify: [{ predicate: 'element_exists', args: ['button', 'Submit'] }], + }), + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'form submitted' }), + ]); + const executor = new ProviderStub(['CLICK(2)', 'CLICK(4)']); + let stage: 'plan' | 'review' | 'submitted' = 'plan'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'plan' + ? [ + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + { id: 3, role: 'checkbox', text: 'Terms', clickable: true, importance: 80 }, + { id: 4, role: 'button', text: 'Submit', clickable: true, importance: 40 }, + ] + : stage === 'review' + ? [ + { id: 4, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + { id: 5, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + : [{ id: 6, role: 'status', text: 'Submitted', importance: 100 }] + ), + { + onClick: elementId => { + if (elementId === 2) { + stage = 'review'; + } else if (elementId === 4) { + stage = 'submitted'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Complete a same-url onboarding wizard and lastly submit the form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(2)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([2, 4]); + }); + + it('does not treat a Next click as successful final submission', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + ]); + const executor = new ProviderStub(['CLICK(2)']); + let stage: 'terms' | 'review' = 'terms'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'terms' + ? [ + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + { id: 3, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + : [ + { id: 4, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 6, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + ] + ), + { + onClick: elementId => { + if (elementId === 2) { + stage = 'review'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Lastly, submit the multi-step form', + }); + + expect(result.success).toBe(false); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.FAILED, + actionTaken: 'CLICK(2)', + verificationPassed: false, + }); + expect(runtime.clickCalls).toEqual([2]); + expect(stage).toBe('review'); + }); + + it('treats a real Submit click as terminal when confirmation text differs from planner predicate', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'form submitted' }), + ]); + const executor = new ProviderStub(['CLICK(6)']); + let stage: 'review' | 'complete' = 'review'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'review' + ? [ + { id: 5, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 6, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + ] + : [ + { + id: 8, + role: 'heading', + text: 'Thanks for completing onboarding', + importance: 100, + }, + { id: 9, role: 'text', text: 'Your Pro plan is ready.', importance: 80 }, + ] + ), + { + onClick: elementId => { + if (elementId === 6) { + stage = 'complete'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Lastly, submit the multi-step form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(6)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([6]); + }); + + it('treats a same-pane Submit click as terminal when a confirmation status appears', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'form submitted' }), + ]); + const executor = new ProviderStub(['CLICK(1)']); + let submitted = false; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + submitted + ? [ + { id: 1, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 3, role: 'status', text: 'Onboarding complete', importance: 100 }, + ] + : [ + { id: 1, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + ), + { + onClick: elementId => { + if (elementId === 1) { + submitted = true; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Lastly, submit the multi-step form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(1)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([1]); + }); + + it('treats a Submit click as terminal when Submit disappears and only Back remains', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'form submitted' }), + ]); + const executor = new ProviderStub(['CLICK(1)']); + let submitted = false; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + submitted + ? [ + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 3, role: 'heading', text: 'Welcome aboard', importance: 100 }, + ] + : [ + { id: 1, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + ), + { + onClick: elementId => { + if (elementId === 1) { + submitted = true; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Lastly, submit the multi-step form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(1)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([1]); + }); + + it('treats a Submit click as terminal when the submit control changes to Done', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'form submitted' }), + ]); + const executor = new ProviderStub(['CLICK(1)']); + let submitted = false; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + submitted + ? [ + { id: 1, role: 'button', text: 'Done', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + : [ + { id: 1, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + ), + { + onClick: elementId => { + if (elementId === 1) { + submitted = true; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Lastly, submit the multi-step form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(1)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([1]); + }); + + it('does not require clicking Done after a terminal Submit succeeds', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Submit button', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ + action: 'CLICK', + intent: 'Done button', + verify: [{ predicate: 'element_exists', args: ['status', 'Done'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'form submitted' }), + ]); + const executor = new ProviderStub(['CLICK(1)', 'CLICK(1)']); + let submitted = false; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + submitted + ? [ + { id: 1, role: 'button', text: 'Done', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + : [ + { id: 1, role: 'button', text: 'Submit', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ] + ), + { + onClick: elementId => { + if (elementId === 1) { + submitted = true; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Lastly, submit the multi-step form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes).toHaveLength(1); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(1)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([1]); + expect(planner.calls).toHaveLength(1); + }); + + it('treats other common final form buttons as terminal submissions', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Confirm registration button', + verify: [{ predicate: 'element_exists', args: ['status', 'Registered'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'registration completed' }), + ]); + const executor = new ProviderStub(['CLICK(10)']); + let stage: 'review' | 'complete' = 'review'; + const runtime = new RuntimeStub( + 'https://forms.test/register', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'review' + ? [ + { id: 9, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { + id: 10, + role: 'button', + text: 'Confirm registration', + clickable: true, + importance: 100, + }, + ] + : [{ id: 11, role: 'heading', text: 'Account complete', importance: 100 }] + ), + { + onClick: elementId => { + if (elementId === 10) { + stage = 'complete'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Confirm registration and complete the form', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(10)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([10]); + }); + + it('does not target Next with strict submit intent heuristics', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'submit', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + ]); + const executor = new ProviderStub(['NONE']); + const runtime = new RuntimeStub('https://forms.test/signup', rt => + makeSnapshot(rt.currentUrl, [ + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + { id: 3, role: 'button', text: 'Back', clickable: true, importance: 80 }, + ]) + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Lastly, submit the multi-step form', + }); + + expect(result.success).toBe(false); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.FAILED, + verificationPassed: false, + error: 'Executor could not find suitable element', + }); + expect(runtime.clickCalls).toEqual([]); + }); + + it('does not reload a same-url wizard checkpoint during recovery', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'Next button on email step', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Display name'] }], + }), + JSON.stringify({ + action: 'CLICK', + intent: 'stale email field', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Email'] }], + }), + ]); + const executor = new ProviderStub(['CLICK(2)', 'NONE']); + let stage: 'email' | 'displayName' = 'email'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'email' + ? [ + { id: 1, role: 'textbox', text: 'Email', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + : [ + { id: 3, role: 'textbox', text: 'Display name', clickable: true, importance: 100 }, + { id: 4, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 5, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + ), + { + onClick: elementId => { + if (elementId === 2) { + stage = 'displayName'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Complete a same-url onboarding wizard', + }); + + expect(result.stepOutcomes.length).toBeGreaterThanOrEqual(2); + expect(result.stepOutcomes[0].status).toBe(StepStatus.SUCCESS); + expect(result.stepOutcomes[1].status).toBe(StepStatus.FAILED); + expect(runtime.gotoCalls).toEqual([]); + expect(stage).toBe('displayName'); + }); + + it('skips repeated completed form-field intents on later wizard panes', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Display name'] }], + }), + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Email'] }], + }), + JSON.stringify({ + action: 'TYPE', + intent: 'display name field', + input: 'Tony W', + verify: [{ predicate: 'element_exists', args: ['button', 'Next'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'continued past stale email repeat' }), + ]); + const executor = new ProviderStub(['TYPE(1, "user@example.com")', 'TYPE(3, "Tony W")']); + let stage: 'email' | 'displayName' = 'email'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'email' + ? [ + { id: 1, role: 'textbox', text: 'Email', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + : [ + { id: 3, role: 'textbox', text: 'Display name', clickable: true, importance: 100 }, + { id: 4, role: 'button', text: 'Back', clickable: true, importance: 80 }, + { id: 5, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + ), + { + onType: (elementId, _text) => { + if (elementId === 1) { + stage = 'displayName'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Complete a same-url onboarding wizard', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[1]).toMatchObject({ + status: StepStatus.SKIPPED, + actionTaken: 'SKIPPED(previously_completed_form_step)', + verificationPassed: true, + }); + expect(runtime.typeCalls).toEqual([ + { elementId: 1, text: 'user@example.com' }, + { elementId: 3, text: 'Tony W' }, + ]); + }); + + it('records previously completed form skips as skipped so the planner can move on', async () => { + const planner = new AdaptiveProviderStub(); + const executor = new ProviderStub(['TYPE(1, "user@example.com")']); + let stage: 'email' | 'displayName' = 'email'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'email' + ? [ + { id: 1, role: 'textbox', text: 'Email', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + : [ + { id: 3, role: 'textbox', text: 'Display name', clickable: true, importance: 100 }, + { id: 4, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + ), + { + onType: (elementId, _text) => { + if (elementId === 1) { + stage = 'displayName'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + stepwise: { maxSteps: 4 }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Complete a same-url onboarding wizard', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[1]).toMatchObject({ + status: StepStatus.SKIPPED, + actionTaken: 'SKIPPED(previously_completed_form_step)', + }); + expect(planner.calls[2]?.user).toContain('TYPE(user@example.com) → skipped'); + expect(result.error).toBeUndefined(); + }); + + it('stops repeated completed form skip loops before exhausting max steps', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Display name'] }], + }), + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Email'] }], + }), + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Email'] }], + }), + JSON.stringify({ + action: 'TYPE', + intent: 'email field', + input: 'user@example.com', + verify: [{ predicate: 'element_exists', args: ['textbox', 'Email'] }], + }), + ]); + const executor = new ProviderStub(['TYPE(1, "user@example.com")']); + let stage: 'email' | 'displayName' = 'email'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'email' + ? [ + { id: 1, role: 'textbox', text: 'Email', clickable: true, importance: 100 }, + { id: 2, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + : [ + { id: 3, role: 'textbox', text: 'Display name', clickable: true, importance: 100 }, + { id: 4, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + ), + { + onType: (elementId, _text) => { + if (elementId === 1) { + stage = 'displayName'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + stepwise: { maxSteps: 10 }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Complete a same-url onboarding wizard', + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('Planner repeated already completed form step'); + expect(result.error).not.toContain('Exceeded maximum steps'); + expect(result.stepOutcomes).toHaveLength(4); + }); + + it('skips narrower repeated plan-choice intents after the plan step succeeds', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'plan radio button', + verify: [{ predicate: 'element_exists', args: ['checkbox', 'Terms'] }], + }), + JSON.stringify({ + action: 'CLICK', + intent: 'Pro plan radio button', + verify: [{ predicate: 'element_exists', args: ['status', 'Plan confirmed'] }], + }), + JSON.stringify({ + action: 'CLICK', + intent: 'terms checkbox', + verify: [{ predicate: 'element_exists', args: ['status', 'Submitted'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'plan selected and terms accepted' }), + ]); + const executor = new ProviderStub(['CLICK(4)', 'CLICK(5)']); + let stage: 'plan' | 'terms' | 'submitted' = 'plan'; + const runtime = new RuntimeStub( + 'https://forms.test/signup', + rt => + makeSnapshot( + rt.currentUrl, + stage === 'plan' + ? [ + { id: 4, role: 'radio', text: 'Pro', clickable: true, importance: 100 }, + { id: 6, role: 'button', text: 'Next', clickable: true, importance: 90 }, + ] + : stage === 'terms' + ? [ + { id: 5, role: 'checkbox', text: 'Terms', clickable: true, importance: 100 }, + { id: 7, role: 'button', text: 'Submit', clickable: true, importance: 90 }, + ] + : [{ id: 8, role: 'status', text: 'Submitted', importance: 100 }] + ), + { + onClick: elementId => { + if (elementId === 4) { + stage = 'terms'; + } + if (elementId === 5) { + stage = 'submitted'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Complete onboarding with the Pro plan and accept terms', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[1]).toMatchObject({ + status: StepStatus.SKIPPED, + actionTaken: 'SKIPPED(previously_completed_form_step)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([4, 5]); + }); + + it('does not pre-skip a form choice just because its next-step verification is already visible', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'pro plan radio button', + verify: [{ predicate: 'element_exists', args: ['checkbox', 'Terms'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'plan selected' }), + ]); + const executor = new ProviderStub(['CLICK(4)']); + const runtime = new RuntimeStub('https://forms.test/signup', rt => + makeSnapshot(rt.currentUrl, [ + { id: 4, role: 'radio', text: 'Pro', clickable: true, importance: 100 }, + { id: 5, role: 'radio', text: 'Basic', clickable: true, importance: 80 }, + { id: 6, role: 'checkbox', text: 'Terms', clickable: true, importance: 90 }, + { id: 7, role: 'button', text: 'Next', clickable: true, importance: 70 }, + ]) + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Choose the Pro plan', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(4)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([4]); + }); + + it('treats clicking the intended radio option as success when verification is too strict', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'pro plan radio button', + verify: [{ predicate: 'element_exists', args: ['status', 'Plan selected'] }], + }), + JSON.stringify({ action: 'DONE', reasoning: 'plan selected' }), + ]); + const executor = new ProviderStub(['CLICK(4)']); + const runtime = new RuntimeStub('https://forms.test/signup', rt => + makeSnapshot(rt.currentUrl, [ + { id: 4, role: 'radio', text: 'Pro', clickable: true, importance: 100 }, + { id: 5, role: 'radio', text: 'Basic', clickable: true, importance: 90 }, + { id: 6, role: 'button', text: 'Next', clickable: true, importance: 80 }, + ]) + ); + + const agent = new PlannerExecutorAgent({ + planner, + executor, + config: { + retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 }, + recovery: { enabled: false }, + }, + }); + + const result = await agent.runStepwise(runtime, { + task: 'Choose the Pro plan', + }); + + expect(result.success).toBe(true); + expect(result.stepOutcomes[0]).toMatchObject({ + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(4)', + verificationPassed: true, + }); + expect(runtime.clickCalls).toEqual([4]); + }); + + it('normalizes repair optional substep aliases and numeric inputs', () => { + expect( + normalizeReplanPatch({ + replaceSteps: [ + { + id: '1', + step: { + action: 'CLICK', + intent: 'repair plan step', + optionalSubsteps: [ + { action: 'TYPE', intent: 'retry field', input: 4 }, + { action: 'SCROLL_TO', intent: 'scroll to submit' }, + { action: 'SCROLL_INTO_VIEW', intent: 'scroll submit into view' }, + ], + }, + }, + ], + }) + ).toMatchObject({ + replaceSteps: [ + { + id: 1, + step: { + optionalSubsteps: [ + { action: 'TYPE', input: '4' }, + { action: 'SCROLL' }, + { action: 'SCROLL' }, + ], + }, + }, + ], + }); + }); + it('treats a relevant search URL change as success even when planner verification is too strict', async () => { const planner = new ProviderStub([ JSON.stringify({ diff --git a/tests/agents/planner-executor/tracing-parity.test.ts b/tests/agents/planner-executor/tracing-parity.test.ts index 11b30e2..d2b2dd3 100644 --- a/tests/agents/planner-executor/tracing-parity.test.ts +++ b/tests/agents/planner-executor/tracing-parity.test.ts @@ -319,7 +319,7 @@ describe('PlannerExecutorAgent tracing parity', () => { }), JSON.stringify({ action: 'DONE', reasoning: 'replanned after recovery failed' }), ]); - const executor = new ProviderStub(['NONE']); + const executor = new ProviderStub(['CLICK(7)', 'CLICK(7)']); const traceSink = new MemoryTraceSink(); const tracer = new Tracer('trace-replan', traceSink); let recoveryBroken = false;