Skip to content
248 changes: 236 additions & 12 deletions src/agent/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@ import {
ActionContext,
ActionType,
AgentActionDefinition,
ActionCacheOutput,
ActionCacheReplayResult,
RunFromActionCacheParams,
endTaskStatuses,
Task,
TaskOutput,
TaskParams,
TaskState,
TaskStatus,
} from "@/types";
import fs from "fs";
import {
CompleteActionDefinition,
DEFAULT_ACTIONS,
Expand All @@ -37,7 +41,12 @@ import {
} from "../context-providers/a11y-dom/types";
import { MCPClient } from "./mcp/client";
import { runAgentTask } from "./tools/agent";
import { HyperPage, HyperVariable } from "../types/agent/types";
import type {
HyperPage,
HyperVariable,
ActionCacheEntry,
AgentTaskOutput,
} from "../types/agent/types";
import { z } from "zod";
import { ErrorEmitter } from "../utils";
import { waitForSettledDOM } from "@/utils/waitForSettledDOM";
Expand All @@ -48,6 +57,9 @@ import { markDomSnapshotDirty } from "@/context-providers/a11y-dom/dom-cache";
import { setDebugOptions } from "@/debug/options";
import { initializeRuntimeContext } from "./shared/runtime-context";
import { performAction } from "./actions/shared/perform-action";
import { createScriptFromActionCache } from "./shared/action-cache-script";
import { attachCachedActionHelpers } from "./shared/action-cache-exec";
import { AgentDeps } from "@/types/agent/types";

export class HyperAgent<T extends BrowserProviders = "Local"> {
// aiAction configuration constants
Expand All @@ -71,6 +83,7 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
private browserProviderType: T;
private actions: Array<AgentActionDefinition> = [...DEFAULT_ACTIONS];
private cdpActionsEnabled: boolean;
private actionCacheByTaskId: Record<string, ActionCacheOutput> = {};

public browser: Browser | null = null;
public context: BrowserContext | null = null;
Expand Down Expand Up @@ -248,6 +261,15 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
delete this._variables[key];
}

public getActionCache(taskId: string): ActionCacheOutput | null {
const cache = this.actionCacheByTaskId[taskId];
if (!cache) return null;
return {
...cache,
steps: [...cache.steps],
};
}

/**
* Get all pages in the context
* @returns Array of HyperPage objects
Expand Down Expand Up @@ -352,6 +374,7 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
throw new HyperagentError(`Task ${taskId} not found`);
}
return {
id: taskId,
getStatus: () => taskState.status,
pause: () => {
if (taskState.status === TaskStatus.RUNNING) {
Expand Down Expand Up @@ -432,7 +455,10 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
taskState,
mergedParams
)
.then(() => cleanup())
.then((result) => {
this.actionCacheByTaskId[taskId] = result.actionCache;
cleanup();
})
.catch((error: Error) => {
cleanup();
// Retrieve the correct state to update
Expand Down Expand Up @@ -463,7 +489,7 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
task: string,
params?: TaskParams,
initPage?: Page
): Promise<TaskOutput> {
): Promise<AgentTaskOutput> {
const taskId = uuidv4();
let activeTaskPage = initPage || (await this.getCurrentPage());

Expand Down Expand Up @@ -510,6 +536,7 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
mergedParams
);
this.context?.off("page", onPage);
this.actionCacheByTaskId[taskId] = result.actionCache;
return result;
} catch (error) {
this.context?.off("page", onPage);
Expand All @@ -518,6 +545,172 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
}
}

public async runFromActionCache(
cache: ActionCacheOutput,
pageOrGetter: Page | (() => Page),
params?: RunFromActionCacheParams
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Method signature accepts Page but requires HyperPage internally

The public runFromActionCache method accepts pageOrGetter: Page | (() => Page) but internally casts the page to HyperPage and calls methods that only exist on HyperPage, not on the base Playwright Page. Specifically, hyperPage.extract() and hyperPage.perform() are called, as well as dispatchPerformHelper which invokes performClick, performFill, etc. If a caller passes a plain Playwright Page directly to agent.runFromActionCache(), they'll get runtime errors like "extract is not a function". The type signature is misleading.

Additional Locations (2)

Fix in Cursor Fix in Web

): Promise<ActionCacheReplayResult> {
const replayId = uuidv4();
const maxXPathRetries = params?.maxXPathRetries ?? 3;
const debug = params?.debug ?? this.debug;
const getPage = () =>
typeof pageOrGetter === "function" ? pageOrGetter() : pageOrGetter;

const stepsResult: ActionCacheReplayResult["steps"] = [];
let replayStatus: TaskStatus.COMPLETED | TaskStatus.FAILED =
TaskStatus.COMPLETED;

const helperMap: Record<string, string> = {
click: "performClick",
fill: "performFill",
type: "performType",
press: "performPress",
selectOptionFromDropdown: "performSelectOption",
check: "performCheck",
uncheck: "performUncheck",
hover: "performHover",
scrollToElement: "performScrollToElement",
scrollToPercentage: "performScrollToPercentage",
nextChunk: "performNextChunk",
prevChunk: "performPrevChunk",
};

for (const step of [...cache.steps].sort(
(a, b) => a.stepIndex - b.stepIndex
)) {
const page = getPage();
const hyperPage = page as HyperPage;
let result: TaskOutput;

if (step.actionType === "goToUrl") {
const url =
(step.arguments && step.arguments[0]) ||
(step.actionParams as any)?.url ||
"";
if (!url || typeof url !== "string") {
result = {
taskId: cache.taskId,
status: TaskStatus.FAILED,
steps: [],
output: "Missing URL for goToUrl",
};
} else {
await hyperPage.goto(url, { waitUntil: "domcontentloaded" });
await waitForSettledDOM(hyperPage);
markDomSnapshotDirty(hyperPage);
result = {
taskId: cache.taskId,
status: TaskStatus.COMPLETED,
steps: [],
output: `Navigated to ${url}`,
replayStepMeta: {
usedCachedAction: true,
fallbackUsed: false,
retries: 0,
cachedXPath: null,
fallbackXPath: null,
fallbackElementId: null,
},
};
}
} else if (step.actionType === "complete") {
result = {
taskId: cache.taskId,
status: TaskStatus.COMPLETED,
steps: [],
output: "Task Complete",
replayStepMeta: {
usedCachedAction: true,
fallbackUsed: false,
retries: 0,
cachedXPath: null,
fallbackXPath: null,
fallbackElementId: null,
},
};
} else {
const helperName =
step.method && helperMap[step.method] ? helperMap[step.method] : null;
if (
helperName &&
typeof (hyperPage as any)[helperName] === "function"
) {
const options: any = {
performInstruction: step.instruction,
maxSteps: maxXPathRetries,
};
if (step.frameIndex !== null && step.frameIndex !== undefined) {
options.frameIndex = step.frameIndex;
}
const valueArg = step.arguments?.[0];
if (
[
"type",
"fill",
"press",
"selectOptionFromDropdown",
"scrollToPercentage",
].includes(step.method ?? "")
) {
result = await (hyperPage as any)[helperName](
step.xpath ?? "",
valueArg,
options
);
} else {
result = await (hyperPage as any)[helperName](
step.xpath ?? "",
options
);
}
} else {
result = await hyperPage.perform(step.instruction);
}
}

const finalMeta = result.replayStepMeta;
const finalSuccess = result.status === TaskStatus.COMPLETED;

stepsResult.push({
stepIndex: step.stepIndex,
actionType: step.actionType,
usedXPath: finalMeta?.usedCachedAction ?? false,
fallbackUsed: finalMeta?.fallbackUsed ?? false,
cachedXPath: finalMeta?.cachedXPath ?? null,
fallbackXPath: finalMeta?.fallbackXPath ?? null,
fallbackElementId: finalMeta?.fallbackElementId ?? null,
retries: finalMeta?.retries ?? 0,
success: finalSuccess,
message:
result.output ||
(finalSuccess ? "Completed" : "Failed to execute cached action"),
});

if (!finalSuccess) {
replayStatus = TaskStatus.FAILED;
break;
}
}

const replayResult: ActionCacheReplayResult = {
replayId,
sourceTaskId: cache.taskId,
steps: stepsResult,
status: replayStatus,
};

if (debug) {
const debugDir = "debug/action-cache";
fs.mkdirSync(debugDir, { recursive: true });
fs.writeFileSync(
`${debugDir}/replay-${replayId}.json`,
JSON.stringify(replayResult, null, 2)
);
}

return replayResult;
}

/**
* Find element with retry logic
* Retries element finding with DOM refetch until element is found or max retries reached
Expand Down Expand Up @@ -766,6 +959,7 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
pageOrGetter: Page | (() => Page),
_params?: TaskParams
): Promise<TaskOutput> {
const taskId = uuidv4();
const actionStart = performance.now();
const startTime = new Date().toISOString();
if (this.debug) {
Expand Down Expand Up @@ -831,7 +1025,8 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
400
);
}
let actionXPath: string | undefined;
let actionXPath: string | null =
domState?.xpathMap?.[element.elementId] ?? null;

// Use shared runtime context
const { cdpClient, frameContextManager } = await initializeRuntimeContext(
Expand Down Expand Up @@ -884,14 +1079,6 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
confidence: 1, // Implicit confidence for single action
});

if (
actionOutput.debug &&
typeof actionOutput.debug === "object" &&
"requestedAction" in actionOutput.debug
) {
actionXPath = (actionOutput.debug as any).elementMetadata?.xpath;
}

if (!actionOutput.success) {
throw new Error(actionOutput.message);
}
Expand Down Expand Up @@ -930,9 +1117,24 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {

logPerf(this.debug, "[Perf][executeSingleAction] total", actionStart);
return {
taskId,
status: TaskStatus.COMPLETED,
steps: [],
output: `Successfully executed: ${instruction}`,
actionCache: {
taskId,
createdAt: startTime,
status: TaskStatus.COMPLETED,
steps: [],
},
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Single action returns empty actionCache steps array

The executeSingleAction method returns an actionCache with an empty steps array, even though the action was successfully executed and all necessary data (element ID, method, arguments, xpath, instruction) is available. This makes the returned action cache unusable for replay via runFromActionCache, which iterates over the steps array. Unlike runAgentTask which properly builds cache entries using buildActionCacheEntry, this method doesn't populate the steps.

Fix in Cursor Fix in Web

replayStepMeta: {
usedCachedAction: false,
fallbackUsed: false,
retries: 1,
cachedXPath: null,
fallbackXPath: actionXPath ?? null,
fallbackElementId: element.elementId ?? null,
},
};
} catch (error) {
// If page switched during execution, prioritize that over the error
Expand Down Expand Up @@ -1139,6 +1341,13 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
return session;
}

public createScriptFromActionCache(
steps: ActionCacheEntry[],
taskId?: string
): string {
return createScriptFromActionCache({ steps, taskId });
}

private setupHyperPage(page: Page): HyperPage {
const hyperPage = page as HyperPage;

Expand Down Expand Up @@ -1236,6 +1445,21 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
return executeSingleActionWithRetry(instruction, params);
};

hyperPage.getActionCache = (taskId: string) => this.getActionCache(taskId);

hyperPage.runFromActionCache = (cache, params) =>
this.runFromActionCache(cache, getActivePage, params);

const deps: AgentDeps = {
debug: this.debug,
tokenLimit: this.tokenLimit,
llm: this.llm,
mcpClient: this.mcpClient,
variables: Object.values(this._variables),
cdpActionsEnabled: this.cdpActionsEnabled,
};
attachCachedActionHelpers(deps, hyperPage);

// aiAsync tasks run in background, so we just use the current scope start point.
// The task itself has internal auto-following logic (from executeTaskAsync implementation).
hyperPage.aiAsync = (task: string, params?: TaskParams) =>
Expand Down
Loading