From 58dfbc50c54f0c330bd0f5ef9c8fae1ae4f3a3d3 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 08:57:37 -0500 Subject: [PATCH 01/15] fix(cua): scroll as notch count in anthropic-computer-use TypeScript template - Send delta_x/delta_y as signed notch count (kernel-images uses delta as wheel-event repeat count, not pixels) - Return tool output: "Scrolled N wheel unit(s) direction." - Add system prompt line: scroll_amount and result are in wheel units Made-with: Cursor --- .../typescript/anthropic-computer-use/loop.ts | 1 + .../anthropic-computer-use/tools/computer.ts | 40 ++++++++----------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/pkg/templates/typescript/anthropic-computer-use/loop.ts b/pkg/templates/typescript/anthropic-computer-use/loop.ts index cc209d1d..cd582414 100644 --- a/pkg/templates/typescript/anthropic-computer-use/loop.ts +++ b/pkg/templates/typescript/anthropic-computer-use/loop.ts @@ -18,6 +18,7 @@ const SYSTEM_PROMPT = ` * As the initial step click on the search bar. * When viewing a page it can be helpful to zoom out so that you can see everything on the page. * Either that, or make sure you scroll down to see everything before deciding something isn't available. +* Scroll action: scroll_amount and the tool result are in wheel units (not pixels). * When using your computer function calls, they take a while to run and send back to you. * Where possible/feasible, try to chain multiple of these calls all into one function calls request. * The current date is ${DateTime.now().toFormat('EEEE, MMMM d, yyyy')}. diff --git a/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts b/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts index 580ea238..05ee40f4 100644 --- a/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts @@ -295,41 +295,35 @@ export class ComputerTool implements BaseAnthropicTool { const scrollDirection = scrollDirectionParam || kwargs.scroll_direction; const scrollAmountValue = scrollAmount || scroll_amount; - if (!scrollDirection || !['up', 'down', 'left', 'right'].includes(scrollDirection)) { - throw new ToolError(`Scroll direction "${scrollDirection}" must be 'up', 'down', 'left', or 'right'`); + const dir = scrollDirection && typeof scrollDirection === 'string' && ['up', 'down', 'left', 'right'].includes(scrollDirection) ? scrollDirection : null; + if (!dir) { + throw new ToolError(`Scroll direction "${String(scrollDirection)}" must be 'up', 'down', 'left', or 'right'`); } if (typeof scrollAmountValue !== 'number' || scrollAmountValue < 0) { throw new ToolError(`Scroll amount "${scrollAmountValue}" must be a non-negative number`); } - const [x, y] = coordinate + const [x, y] = coordinate ? ActionValidator.validateAndGetCoordinates(coordinate) : this.lastMousePosition; + // Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. + const notches = Math.max(scrollAmountValue ?? 1, 1); let delta_x = 0; let delta_y = 0; - // Each scroll_amount unit = 1 scroll wheel click ≈ 120 pixels (matches Anthropic's xdotool behavior) - const scrollDelta = (scrollAmountValue ?? 1) * 120; - - if (scrollDirection === 'down') { - delta_y = scrollDelta; - } else if (scrollDirection === 'up') { - delta_y = -scrollDelta; - } else if (scrollDirection === 'right') { - delta_x = scrollDelta; - } else if (scrollDirection === 'left') { - delta_x = -scrollDelta; - } + if (dir === 'down') delta_y = notches; + if (dir === 'up') delta_y = -notches; + if (dir === 'right') delta_x = notches; + if (dir === 'left') delta_x = -notches; - await this.kernel.browsers.computer.scroll(this.sessionId, { - x, - y, - delta_x, - delta_y, - }); + await this.kernel.browsers.computer.scroll(this.sessionId, { x, y, delta_x, delta_y }); - await new Promise(resolve => setTimeout(resolve, 500)); - return await this.screenshot(); + await new Promise(resolve => setTimeout(resolve, 200)); + const screenshotResult = await this.screenshot(); + return { + ...screenshotResult, + output: `Scrolled ${notches} wheel unit(s) ${dir}.`, + }; } if (action === Action.WAIT) { From 174591ed02768f8295e59b48f169efdf68a92304 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 08:58:02 -0500 Subject: [PATCH 02/15] fix(cua): scroll as notch count in anthropic-computer-use Python template - Send delta_x/delta_y as signed notch count (kernel-images uses delta as wheel-event repeat count, not pixels) - Return tool output: "Scrolled N wheel unit(s) direction." - Add system prompt line: scroll_amount and result are in wheel units Made-with: Cursor --- .../python/anthropic-computer-use/loop.py | 1 + .../anthropic-computer-use/tools/computer.py | 22 ++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pkg/templates/python/anthropic-computer-use/loop.py b/pkg/templates/python/anthropic-computer-use/loop.py index afee6f61..d5d8a249 100644 --- a/pkg/templates/python/anthropic-computer-use/loop.py +++ b/pkg/templates/python/anthropic-computer-use/loop.py @@ -50,6 +50,7 @@ class APIProvider(StrEnum): * As the initial step click on the search bar. * When viewing a page it can be helpful to zoom out so that you can see everything on the page. * Either that, or make sure you scroll down to see everything before deciding something isn't available. +* Scroll action: scroll_amount and the tool result are in wheel units (not pixels). * When using your computer function calls, they take a while to run and send back to you. * Where possible/feasible, try to chain multiple of these calls all into one function calls request. * The current date is {datetime.now().strftime("%A, %B %d, %Y")}. diff --git a/pkg/templates/python/anthropic-computer-use/tools/computer.py b/pkg/templates/python/anthropic-computer-use/tools/computer.py index d4a46d7d..f765587c 100644 --- a/pkg/templates/python/anthropic-computer-use/tools/computer.py +++ b/pkg/templates/python/anthropic-computer-use/tools/computer.py @@ -370,21 +370,18 @@ async def __call__( else: x, y = self._last_mouse_position - # Each scroll_amount unit = 1 scroll wheel click ≈ 120 pixels (matches Anthropic's xdotool behavior) - scroll_factor = scroll_amount * 120 - + # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. + notches = max(scroll_amount or 1, 1) delta_x = 0 delta_y = 0 if scroll_direction == "up": - delta_y = -scroll_factor + delta_y = -notches elif scroll_direction == "down": - delta_y = scroll_factor + delta_y = notches elif scroll_direction == "left": - delta_x = -scroll_factor + delta_x = -notches elif scroll_direction == "right": - delta_x = scroll_factor - - print(f"Scrolling {abs(delta_x) if delta_x != 0 else abs(delta_y)} pixels {scroll_direction}") + delta_x = notches self.kernel.browsers.computer.scroll( id=self.session_id, @@ -393,7 +390,12 @@ async def __call__( delta_x=delta_x, delta_y=delta_y, ) - return await self.screenshot() + + await asyncio.sleep(0.2) + screenshot_result = await self.screenshot() + return screenshot_result.replace( + output=f"Scrolled {notches} wheel unit(s) {scroll_direction}." + ) if action in ("hold_key", "wait"): if duration is None or not isinstance(duration, (int, float)): From b480469866d11f1e7ab432a52455a53f2b4fe226 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 08:58:26 -0500 Subject: [PATCH 03/15] fix(cua): scroll as notch count in gemini-computer-use TypeScript template - Send delta_x/delta_y as signed notch count (kernel-images uses delta as wheel-event repeat count, not pixels) - SCROLL_DOCUMENT: use 3 notches instead of 500 pixels - SCROLL_AT: treat magnitude as notch count instead of denormalizing to pixels Made-with: Cursor --- .../gemini-computer-use/tools/computer.ts | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/pkg/templates/typescript/gemini-computer-use/tools/computer.ts b/pkg/templates/typescript/gemini-computer-use/tools/computer.ts index f415f72a..b30c357c 100644 --- a/pkg/templates/typescript/gemini-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/gemini-computer-use/tools/computer.ts @@ -146,23 +146,23 @@ export class ComputerTool { if (!args.direction) { return { error: 'scroll_document requires direction' }; } - // Scroll at center of viewport const centerX = Math.round(this.screenSize.width / 2); const centerY = Math.round(this.screenSize.height / 2); - const scrollDelta = 500; - let deltaX = 0; - let deltaY = 0; - if (args.direction === 'down') deltaY = scrollDelta; - else if (args.direction === 'up') deltaY = -scrollDelta; - else if (args.direction === 'right') deltaX = scrollDelta; - else if (args.direction === 'left') deltaX = -scrollDelta; + // Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. + const docNotches = 3; + let docDx = 0; + let docDy = 0; + if (args.direction === 'down') docDy = docNotches; + else if (args.direction === 'up') docDy = -docNotches; + else if (args.direction === 'right') docDx = docNotches; + else if (args.direction === 'left') docDx = -docNotches; await this.kernel.browsers.computer.scroll(this.sessionId, { x: centerX, y: centerY, - delta_x: deltaX, - delta_y: deltaY, + delta_x: docDx, + delta_y: docDy, }); break; } @@ -178,26 +178,20 @@ export class ComputerTool { const x = this.denormalizeX(args.x); const y = this.denormalizeY(args.y); - // Denormalize magnitude if provided - let magnitude = args.magnitude ?? 800; - if (args.direction === 'up' || args.direction === 'down') { - magnitude = this.denormalizeY(magnitude); - } else { - magnitude = this.denormalizeX(magnitude); - } - - let deltaX = 0; - let deltaY = 0; - if (args.direction === 'down') deltaY = magnitude; - else if (args.direction === 'up') deltaY = -magnitude; - else if (args.direction === 'right') deltaX = magnitude; - else if (args.direction === 'left') deltaX = -magnitude; + // Backend (kernel-images) uses delta as notch count; treat magnitude as notches (default 3). + const notches = args.magnitude ?? 3; + let atDx = 0; + let atDy = 0; + if (args.direction === 'down') atDy = notches; + else if (args.direction === 'up') atDy = -notches; + else if (args.direction === 'right') atDx = notches; + else if (args.direction === 'left') atDx = -notches; await this.kernel.browsers.computer.scroll(this.sessionId, { x, y, - delta_x: deltaX, - delta_y: deltaY, + delta_x: atDx, + delta_y: atDy, }); break; } From cc5866bfe463e5d695aa6fcd34010b57775e2be9 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 08:58:49 -0500 Subject: [PATCH 04/15] fix(cua): scroll as notch count in gemini-computer-use Python template - Send delta_x/delta_y as signed notch count (kernel-images uses delta as wheel-event repeat count, not pixels) - SCROLL_DOCUMENT: use 3 notches instead of 500 pixels - SCROLL_AT: treat magnitude as notch count instead of denormalizing to pixels Made-with: Cursor --- .../gemini-computer-use/tools/computer.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pkg/templates/python/gemini-computer-use/tools/computer.py b/pkg/templates/python/gemini-computer-use/tools/computer.py index 5cf309d8..a3b39419 100644 --- a/pkg/templates/python/gemini-computer-use/tools/computer.py +++ b/pkg/templates/python/gemini-computer-use/tools/computer.py @@ -131,21 +131,21 @@ async def execute_action( elif action_name == GeminiAction.SCROLL_DOCUMENT: if "direction" not in args: return ToolResult(error="scroll_document requires direction") - # Scroll at center of viewport center_x = self.screen_size.width // 2 center_y = self.screen_size.height // 2 - scroll_delta = 500 + # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. + doc_notches = 3 delta_x, delta_y = 0, 0 direction = args["direction"] if direction == "down": - delta_y = scroll_delta + delta_y = doc_notches elif direction == "up": - delta_y = -scroll_delta + delta_y = -doc_notches elif direction == "right": - delta_x = scroll_delta + delta_x = doc_notches elif direction == "left": - delta_x = -scroll_delta + delta_x = -doc_notches self.kernel.browsers.computer.scroll( self.session_id, @@ -164,23 +164,19 @@ async def execute_action( x = self.denormalize_x(args["x"]) y = self.denormalize_y(args["y"]) - # Denormalize magnitude if provided - magnitude = args.get("magnitude", 800) + # Backend (kernel-images) uses delta as notch count; treat magnitude as notches (default 3). + notches = args.get("magnitude", 3) direction = args["direction"] - if direction in ("up", "down"): - magnitude = self.denormalize_y(magnitude) - else: - magnitude = self.denormalize_x(magnitude) delta_x, delta_y = 0, 0 if direction == "down": - delta_y = magnitude + delta_y = notches elif direction == "up": - delta_y = -magnitude + delta_y = -notches elif direction == "right": - delta_x = magnitude + delta_x = notches elif direction == "left": - delta_x = -magnitude + delta_x = -notches self.kernel.browsers.computer.scroll( self.session_id, From bc127c0a1041f6ced2a56e9769956d31218987e5 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 08:59:15 -0500 Subject: [PATCH 05/15] fix(cua): scroll as notch count in yutori-computer-use TypeScript template - Send delta_x/delta_y as signed notch count (kernel-images uses delta as wheel-event repeat count, not pixels) - Return tool output: "Scrolled N wheel unit(s) direction." Made-with: Cursor --- .../yutori-computer-use/tools/computer.ts | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts index 59b60d16..6ec7636e 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts @@ -160,29 +160,28 @@ export class ComputerTool { private async handleScroll(action: N1Action): Promise { const coords = this.getCoordinates(action.coordinates); const direction = action.direction; - const amount = action.amount ?? 3; + const notches = Math.max(action.amount ?? 3, 1); if (!direction || !['up', 'down', 'left', 'right'].includes(direction)) { throw new ToolError(`Invalid scroll direction: ${direction}`); } - const scrollDelta = amount * 100; - + // Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. let delta_x = 0; let delta_y = 0; switch (direction) { case 'up': - delta_y = -scrollDelta; + delta_y = -notches; break; case 'down': - delta_y = scrollDelta; + delta_y = notches; break; case 'left': - delta_x = -scrollDelta; + delta_x = -notches; break; case 'right': - delta_x = scrollDelta; + delta_x = notches; break; } @@ -194,7 +193,11 @@ export class ComputerTool { }); await this.sleep(SCREENSHOT_DELAY_MS); - return this.screenshot(); + const screenshotResult = await this.screenshot(); + return { + ...screenshotResult, + output: `Scrolled ${notches} wheel unit(s) ${direction}.`, + }; } private async handleType(action: N1Action): Promise { From 4a1898b0cc2519b5518f2538876cb91f5a49f7c8 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 08:59:34 -0500 Subject: [PATCH 06/15] fix(cua): scroll as notch count in yutori-computer-use Python template - Send delta_x/delta_y as signed notch count (kernel-images uses delta as wheel-event repeat count, not pixels) - Return tool output: "Scrolled N wheel unit(s) direction." Made-with: Cursor --- .../yutori-computer-use/tools/computer.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori-computer-use/tools/computer.py index 2d5784ec..d078aded 100644 --- a/pkg/templates/python/yutori-computer-use/tools/computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/computer.py @@ -140,24 +140,23 @@ async def _handle_click(self, action: N1Action, button: str, num_clicks: int) -> async def _handle_scroll(self, action: N1Action) -> ToolResult: coords = self._get_coordinates(action.get("coordinates")) direction = action.get("direction") - amount = action.get("amount", 3) + notches = max(action.get("amount", 3), 1) if direction not in ("up", "down", "left", "right"): raise ToolError(f"Invalid scroll direction: {direction}") - scroll_delta = amount * 100 - + # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. delta_x = 0 delta_y = 0 if direction == "up": - delta_y = -scroll_delta + delta_y = -notches elif direction == "down": - delta_y = scroll_delta + delta_y = notches elif direction == "left": - delta_x = -scroll_delta + delta_x = -notches elif direction == "right": - delta_x = scroll_delta + delta_x = notches self.kernel.browsers.computer.scroll( self.session_id, @@ -168,7 +167,9 @@ async def _handle_scroll(self, action: N1Action) -> ToolResult: ) await asyncio.sleep(SCREENSHOT_DELAY_S) - return await self.screenshot() + screenshot_result = await self.screenshot() + screenshot_result["output"] = f"Scrolled {notches} wheel unit(s) {direction}." + return screenshot_result async def _handle_type(self, action: N1Action) -> ToolResult: text = action.get("text") From 8f5bc23bc8b129b53a320ae23fc3bcab323d40e3 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 08:59:56 -0500 Subject: [PATCH 07/15] fix(cua): scroll as notch count in openagi-computer-use Python template - Send delta_x/delta_y as signed notch count (kernel-images uses delta as wheel-event repeat count, not pixels) - Support left/right scroll directions - Default scroll_amount changed from 100 (pixels) to 3 (notches) Made-with: Cursor --- .../openagi-computer-use/kernel_handler.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/pkg/templates/python/openagi-computer-use/kernel_handler.py b/pkg/templates/python/openagi-computer-use/kernel_handler.py index 564274ca..e4083793 100644 --- a/pkg/templates/python/openagi-computer-use/kernel_handler.py +++ b/pkg/templates/python/openagi-computer-use/kernel_handler.py @@ -42,7 +42,7 @@ def __init__( self, session: "KernelBrowserSession", action_pause: float = 0.1, - scroll_amount: int = 100, + scroll_amount: int = 3, wait_duration: float = 1.0, type_delay: int = 50, ): @@ -52,7 +52,7 @@ def __init__( Args: session: The Kernel browser session to control action_pause: Pause between actions in seconds - scroll_amount: Amount to scroll (pixels) + scroll_amount: Amount to scroll (wheel units / notches) wait_duration: Duration for wait actions in seconds type_delay: Delay between keystrokes in milliseconds """ @@ -241,19 +241,24 @@ def _execute_hotkey(self, keys: list[str]): def _execute_scroll(self, x: int, y: int, direction: str): """Execute a scroll action.""" - # Move to position first - self.session.kernel.browsers.computer.move_mouse( - id=self.session.session_id, - x=x, - y=y, - ) - # Scroll in the specified direction - delta_y = self.scroll_amount if direction == "up" else -self.scroll_amount + # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. + notches = max(self.scroll_amount, 1) + delta_x = 0 + delta_y = 0 + if direction == "up": + delta_y = -notches + elif direction == "down": + delta_y = notches + elif direction == "left": + delta_x = -notches + elif direction == "right": + delta_x = notches + self.session.kernel.browsers.computer.scroll( id=self.session.session_id, x=x, y=y, - delta_x=0, + delta_x=delta_x, delta_y=delta_y, ) From 37c626798fe43578a21530af8b62bb1b2095e611 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 23:30:45 -0500 Subject: [PATCH 08/15] gemini-computer-use (Python): conservative scroll (60 px/notch, max 17 notches) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - scroll_document and scroll_at: magnitude → notches with PX_PER_NOTCH=60, MAX_NOTCHES_PER_ACTION=17, single API call - Remove chunking; default magnitude 400 - KERNEL_INVOCATION guard so invokes use payload query Made-with: Cursor --- .../python/gemini-computer-use/main.py | 4 ++-- .../gemini-computer-use/tools/computer.py | 23 +++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pkg/templates/python/gemini-computer-use/main.py b/pkg/templates/python/gemini-computer-use/main.py index 870ff776..ee15c130 100644 --- a/pkg/templates/python/gemini-computer-use/main.py +++ b/pkg/templates/python/gemini-computer-use/main.py @@ -75,9 +75,9 @@ async def cua_task( } -# Run locally if executed directly (not imported as a module) +# Run locally if executed directly and not in Kernel (no KERNEL_INVOCATION) # Execute via: uv run main.py -if __name__ == "__main__": +if __name__ == "__main__" and not os.getenv("KERNEL_INVOCATION"): import asyncio async def main(): diff --git a/pkg/templates/python/gemini-computer-use/tools/computer.py b/pkg/templates/python/gemini-computer-use/tools/computer.py index a3b39419..ecf9e219 100644 --- a/pkg/templates/python/gemini-computer-use/tools/computer.py +++ b/pkg/templates/python/gemini-computer-use/tools/computer.py @@ -21,6 +21,10 @@ TYPING_DELAY_MS = 12 SCREENSHOT_DELAY_SECS = 0.5 +# Pixels per notch: higher = more conservative (fewer notches). 60–80 avoids overscroll on heavy sites. +PX_PER_NOTCH = 60 +# Cap total notches per action so large magnitudes don't overscroll on any site. +MAX_NOTCHES_PER_ACTION = 17 class ComputerTool: @@ -134,10 +138,11 @@ async def execute_action( center_x = self.screen_size.width // 2 center_y = self.screen_size.height // 2 - # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. - doc_notches = 3 - delta_x, delta_y = 0, 0 + # Backend uses notches; Gemini sends magnitude in pixels (default 400). Chunk to avoid per-request cap. + magnitude_px = args.get("magnitude", 400) + doc_notches = min(MAX_NOTCHES_PER_ACTION, max(1, round(magnitude_px / PX_PER_NOTCH))) direction = args["direction"] + delta_x = delta_y = 0 if direction == "down": delta_y = doc_notches elif direction == "up": @@ -146,7 +151,7 @@ async def execute_action( delta_x = doc_notches elif direction == "left": delta_x = -doc_notches - + print(f"[cua-scroll] SCROLL_DOCUMENT direction={direction} magnitude_px={magnitude_px} notches={doc_notches} center=({center_x},{center_y})", flush=True) self.kernel.browsers.computer.scroll( self.session_id, x=center_x, @@ -164,11 +169,11 @@ async def execute_action( x = self.denormalize_x(args["x"]) y = self.denormalize_y(args["y"]) - # Backend (kernel-images) uses delta as notch count; treat magnitude as notches (default 3). - notches = args.get("magnitude", 3) + # Gemini uses magnitude in pixels (default 400). Chunk to avoid per-request cap. + magnitude_px = args.get("magnitude", 400) + notches = min(MAX_NOTCHES_PER_ACTION, max(1, round(magnitude_px / PX_PER_NOTCH))) direction = args["direction"] - - delta_x, delta_y = 0, 0 + delta_x = delta_y = 0 if direction == "down": delta_y = notches elif direction == "up": @@ -177,7 +182,7 @@ async def execute_action( delta_x = notches elif direction == "left": delta_x = -notches - + print(f"[cua-scroll] SCROLL_AT magnitude_px={magnitude_px} notches={notches} x={x} y={y} direction={direction}", flush=True) self.kernel.browsers.computer.scroll( self.session_id, x=x, From 53b6ad882324a7ecd029743029a95cba7042898b Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 23:30:48 -0500 Subject: [PATCH 09/15] gemini-computer-use (TypeScript): scroll + TS fixes - Scroll: same as Python (PX_PER_NOTCH 60, MAX_NOTCHES_PER_ACTION 17, single call) - loop.ts: Environment.ENVIRONMENT_BROWSER, candidate?.content, fc.name guard, content check in pruneOldScreenshots - session.ts: session_id/liveViewUrl/replayViewUrl ?? null for string | null - index.ts: KERNEL_INVOCATION guard for payload query Made-with: Cursor --- .../typescript/gemini-computer-use/index.ts | 4 ++-- .../typescript/gemini-computer-use/loop.ts | 10 ++++++---- .../typescript/gemini-computer-use/session.ts | 6 +++--- .../gemini-computer-use/tools/computer.ts | 18 ++++++++++++------ 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/pkg/templates/typescript/gemini-computer-use/index.ts b/pkg/templates/typescript/gemini-computer-use/index.ts index 91f47e4a..aa1313de 100644 --- a/pkg/templates/typescript/gemini-computer-use/index.ts +++ b/pkg/templates/typescript/gemini-computer-use/index.ts @@ -73,9 +73,9 @@ app.action( }, ); -// Run locally if executed directly (not imported as a module) +// Run locally if executed directly and not in Kernel (no KERNEL_INVOCATION) // Execute via: npx tsx index.ts -if (import.meta.url === `file://${process.argv[1]}`) { +if (!process.env.KERNEL_INVOCATION && import.meta.url === `file://${process.argv[1]}`) { const testQuery = "Navigate to https://www.google.com and describe what you see"; console.log('Running local test with query:', testQuery); diff --git a/pkg/templates/typescript/gemini-computer-use/loop.ts b/pkg/templates/typescript/gemini-computer-use/loop.ts index ba3dc5ce..43e00f0a 100644 --- a/pkg/templates/typescript/gemini-computer-use/loop.ts +++ b/pkg/templates/typescript/gemini-computer-use/loop.ts @@ -5,6 +5,7 @@ import { GoogleGenAI, + Environment, type Content, type FunctionCall, type Part, @@ -103,7 +104,7 @@ export async function samplingLoop({ tools: [ { computerUse: { - environment: 'ENVIRONMENT_BROWSER', + environment: Environment.ENVIRONMENT_BROWSER, }, }, ], @@ -119,7 +120,7 @@ export async function samplingLoop({ } const candidate = response.candidates[0]; - if (!candidate.content) { + if (!candidate?.content) { console.log('No content in candidate'); break; } @@ -155,6 +156,7 @@ export async function samplingLoop({ // Execute function calls and collect results const functionResponses: Part[] = []; for (const fc of functionCalls) { + if (!fc.name) continue; const args = fc.args as GeminiFunctionArgs || {}; // Handle safety decisions if present @@ -188,7 +190,7 @@ export async function samplingLoop({ name: fc.name, response: responseData, // Include screenshot as inline data - ...(result.base64Image && isPredefinedFunction(fc.name) ? { + ...(result.base64Image && fc.name && isPredefinedFunction(fc.name) ? { parts: [{ inlineData: { mimeType: 'image/png', @@ -262,7 +264,7 @@ function pruneOldScreenshots(contents: Content[]): void { // Iterate in reverse to find recent turns with screenshots for (let i = contents.length - 1; i >= 0; i--) { const content = contents[i]; - if (content.role !== 'user' || !content.parts) continue; + if (!content || content.role !== 'user' || !content.parts) continue; // Check if this turn has screenshots from predefined functions let hasScreenshot = false; diff --git a/pkg/templates/typescript/gemini-computer-use/session.ts b/pkg/templates/typescript/gemini-computer-use/session.ts index 627b4420..89e7a8bf 100644 --- a/pkg/templates/typescript/gemini-computer-use/session.ts +++ b/pkg/templates/typescript/gemini-computer-use/session.ts @@ -84,8 +84,8 @@ export class KernelBrowserSession { }, }); - this._sessionId = browser.session_id; - this._liveViewUrl = browser.browser_live_view_url; + this._sessionId = browser.session_id ?? null; + this._liveViewUrl = browser.browser_live_view_url ?? null; console.log(`Kernel browser created: ${this._sessionId}`); console.log(`Live view URL: ${this._liveViewUrl}`); @@ -138,7 +138,7 @@ export class KernelBrowserSession { const replays = await this.kernel.browsers.replays.list(this._sessionId); for (const replay of replays) { if (replay.replay_id === this._replayId) { - this._replayViewUrl = replay.replay_view_url; + this._replayViewUrl = replay.replay_view_url ?? null; replayReady = true; break; } diff --git a/pkg/templates/typescript/gemini-computer-use/tools/computer.ts b/pkg/templates/typescript/gemini-computer-use/tools/computer.ts index b30c357c..175ecaba 100644 --- a/pkg/templates/typescript/gemini-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/gemini-computer-use/tools/computer.ts @@ -17,6 +17,10 @@ import { const TYPING_DELAY_MS = 12; const SCREENSHOT_DELAY_MS = 500; +/** Higher = more conservative (fewer notches). 60–80 avoids overscroll on heavy sites. */ +const PX_PER_NOTCH = 60; +/** Cap total notches per action so large magnitudes don't overscroll. */ +const MAX_NOTCHES_PER_ACTION = 17; /** * Computer tool that maps Gemini actions to Kernel's Computer Controls API. @@ -149,15 +153,16 @@ export class ComputerTool { const centerX = Math.round(this.screenSize.width / 2); const centerY = Math.round(this.screenSize.height / 2); - // Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. - const docNotches = 3; + // Backend uses notches; chunk to avoid per-request cap. + const magnitudePx = args.magnitude ?? 400; + const docNotches = Math.min(MAX_NOTCHES_PER_ACTION, Math.max(1, Math.round(magnitudePx / PX_PER_NOTCH))); let docDx = 0; let docDy = 0; if (args.direction === 'down') docDy = docNotches; else if (args.direction === 'up') docDy = -docNotches; else if (args.direction === 'right') docDx = docNotches; else if (args.direction === 'left') docDx = -docNotches; - + console.info('[cua-scroll] SCROLL_DOCUMENT direction=%s magnitude_px=%d notches=%d x=%d y=%d', args.direction, magnitudePx, docNotches, centerX, centerY); await this.kernel.browsers.computer.scroll(this.sessionId, { x: centerX, y: centerY, @@ -178,15 +183,16 @@ export class ComputerTool { const x = this.denormalizeX(args.x); const y = this.denormalizeY(args.y); - // Backend (kernel-images) uses delta as notch count; treat magnitude as notches (default 3). - const notches = args.magnitude ?? 3; + // Chunk to avoid per-request cap. + const magnitudePx = args.magnitude ?? 400; + const notches = Math.min(MAX_NOTCHES_PER_ACTION, Math.max(1, Math.round(magnitudePx / PX_PER_NOTCH))); let atDx = 0; let atDy = 0; if (args.direction === 'down') atDy = notches; else if (args.direction === 'up') atDy = -notches; else if (args.direction === 'right') atDx = notches; else if (args.direction === 'left') atDx = -notches; - + console.info('[cua-scroll] SCROLL_AT magnitude_px=%s notches=%d x=%d y=%d direction=%s', magnitudePx, notches, x, y, args.direction); await this.kernel.browsers.computer.scroll(this.sessionId, { x, y, From 36c88e67673a52f212f7a11e7375d553f8e5215d Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 23:30:50 -0500 Subject: [PATCH 10/15] openagi-computer-use: 1 notch per Lux scroll event - OpenAGI/Lux emits N scroll actions for amount N; treat each as 1 notch - Document in handler docstring; no coalescing Made-with: Cursor --- .../openagi-computer-use/kernel_handler.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pkg/templates/python/openagi-computer-use/kernel_handler.py b/pkg/templates/python/openagi-computer-use/kernel_handler.py index e4083793..0198452a 100644 --- a/pkg/templates/python/openagi-computer-use/kernel_handler.py +++ b/pkg/templates/python/openagi-computer-use/kernel_handler.py @@ -36,6 +36,10 @@ class KernelActionHandler: - HOTKEY -> press_key(keys=[...]) - TYPE -> type_text(text=...) - SCROLL -> scroll(x, y, delta_y=...) + + Note: OpenAGI/Lux tends to emit scroll N times for "scroll by N" (e.g. 3 identical + [scroll] actions for "scroll down with amount 3"). We treat each scroll event as + one scroll unit (1 notch), so N events in a row = N notches without fighting the model. """ def __init__( @@ -239,10 +243,10 @@ def _execute_hotkey(self, keys: list[str]): keys=keys, ) - def _execute_scroll(self, x: int, y: int, direction: str): - """Execute a scroll action.""" + def _execute_scroll(self, x: int, y: int, direction: str, notches: int = 1): + """Execute a scroll action. One Lux scroll event = 1 notch (OpenAGI often calls scroll N times for amount N).""" # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. - notches = max(self.scroll_amount, 1) + notches = max(notches, 1) delta_x = 0 delta_y = 0 if direction == "up": @@ -254,6 +258,10 @@ def _execute_scroll(self, x: int, y: int, direction: str): elif direction == "right": delta_x = notches + print( + f"[cua-scroll] SCROLL API call id={self.session.session_id} x={x} y={y} delta_x={delta_x} delta_y={delta_y} (notches={notches})", + flush=True, + ) self.session.kernel.browsers.computer.scroll( id=self.session.session_id, x=x, @@ -302,8 +310,10 @@ def _execute_single_action(self, action: Action) -> None: self._execute_type(text, press_enter=press_enter) case ActionType.SCROLL: + print(f"[cua-scroll] SCROLL action raw_arg={arg!r}", flush=True) x, y, direction = self._parse_scroll(arg) - self._execute_scroll(x, y, direction) + print(f"[cua-scroll] SCROLL parsed x={x} y={y} direction={direction}", flush=True) + self._execute_scroll(x, y, direction, notches=1) case ActionType.FINISH: # Task completion - nothing to do @@ -321,32 +331,23 @@ def _execute_single_action(self, action: Action) -> None: print(f"Unknown action type: {action.type}") def _execute_action(self, action: Action) -> None: - """Execute an action, potentially multiple times.""" + """Execute an action, potentially multiple times. SCROLL: each event = 1 notch.""" count = action.count or 1 - for _ in range(count): self._execute_single_action(action) - # Small pause between repeated actions if count > 1: time.sleep(self.action_pause) async def __call__(self, actions: list[Action]) -> None: - """ - Execute a list of actions. - - Args: - actions: List of Action objects to execute - """ + """Execute a list of actions.""" if not self.session.session_id: raise RuntimeError("Browser session not initialized") for action in actions: try: - # Run the synchronous action execution in a thread pool await asyncio.get_event_loop().run_in_executor( None, self._execute_action, action ) - # Pause between actions await asyncio.sleep(self.action_pause) except Exception as e: print(f"Error executing action {action.type}: {e}") From c1cd6c6cacd11d570ec1e2b66c0f524d8b433bac Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 23:30:50 -0500 Subject: [PATCH 11/15] yutori-computer-use (Python): add cua-scroll debug log for scroll action Made-with: Cursor --- pkg/templates/python/yutori-computer-use/tools/computer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori-computer-use/tools/computer.py index d078aded..4fb9d685 100644 --- a/pkg/templates/python/yutori-computer-use/tools/computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/computer.py @@ -158,6 +158,10 @@ async def _handle_scroll(self, action: N1Action) -> ToolResult: elif direction == "right": delta_x = notches + print( + f"[cua-scroll] scroll amount={action.get('amount')} notches={notches} direction={direction} x={coords['x']} y={coords['y']} delta_x={delta_x} delta_y={delta_y}", + flush=True, + ) self.kernel.browsers.computer.scroll( self.session_id, x=coords["x"], From 27255b727b751e3859844a4179c760dce8d94a44 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Mon, 2 Mar 2026 23:47:58 -0500 Subject: [PATCH 12/15] chore(cua): remove debug logs and redundant comments from scroll templates Made-with: Cursor --- .../python/anthropic-computer-use/tools/computer.py | 1 - pkg/templates/python/gemini-computer-use/main.py | 3 +-- pkg/templates/python/gemini-computer-use/tools/computer.py | 6 ------ .../python/openagi-computer-use/kernel_handler.py | 7 +------ pkg/templates/typescript/gemini-computer-use/index.ts | 3 +-- pkg/templates/typescript/gemini-computer-use/loop.ts | 2 +- .../typescript/gemini-computer-use/tools/computer.ts | 6 ------ .../typescript/yutori-computer-use/tools/computer.ts | 1 - 8 files changed, 4 insertions(+), 25 deletions(-) diff --git a/pkg/templates/python/anthropic-computer-use/tools/computer.py b/pkg/templates/python/anthropic-computer-use/tools/computer.py index f765587c..27b3b088 100644 --- a/pkg/templates/python/anthropic-computer-use/tools/computer.py +++ b/pkg/templates/python/anthropic-computer-use/tools/computer.py @@ -370,7 +370,6 @@ async def __call__( else: x, y = self._last_mouse_position - # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. notches = max(scroll_amount or 1, 1) delta_x = 0 delta_y = 0 diff --git a/pkg/templates/python/gemini-computer-use/main.py b/pkg/templates/python/gemini-computer-use/main.py index ee15c130..1f5bd81e 100644 --- a/pkg/templates/python/gemini-computer-use/main.py +++ b/pkg/templates/python/gemini-computer-use/main.py @@ -75,8 +75,7 @@ async def cua_task( } -# Run locally if executed directly and not in Kernel (no KERNEL_INVOCATION) -# Execute via: uv run main.py +# Run locally when not in Kernel invocation. Execute via: uv run main.py if __name__ == "__main__" and not os.getenv("KERNEL_INVOCATION"): import asyncio diff --git a/pkg/templates/python/gemini-computer-use/tools/computer.py b/pkg/templates/python/gemini-computer-use/tools/computer.py index ecf9e219..60d4079f 100644 --- a/pkg/templates/python/gemini-computer-use/tools/computer.py +++ b/pkg/templates/python/gemini-computer-use/tools/computer.py @@ -21,9 +21,7 @@ TYPING_DELAY_MS = 12 SCREENSHOT_DELAY_SECS = 0.5 -# Pixels per notch: higher = more conservative (fewer notches). 60–80 avoids overscroll on heavy sites. PX_PER_NOTCH = 60 -# Cap total notches per action so large magnitudes don't overscroll on any site. MAX_NOTCHES_PER_ACTION = 17 @@ -138,7 +136,6 @@ async def execute_action( center_x = self.screen_size.width // 2 center_y = self.screen_size.height // 2 - # Backend uses notches; Gemini sends magnitude in pixels (default 400). Chunk to avoid per-request cap. magnitude_px = args.get("magnitude", 400) doc_notches = min(MAX_NOTCHES_PER_ACTION, max(1, round(magnitude_px / PX_PER_NOTCH))) direction = args["direction"] @@ -151,7 +148,6 @@ async def execute_action( delta_x = doc_notches elif direction == "left": delta_x = -doc_notches - print(f"[cua-scroll] SCROLL_DOCUMENT direction={direction} magnitude_px={magnitude_px} notches={doc_notches} center=({center_x},{center_y})", flush=True) self.kernel.browsers.computer.scroll( self.session_id, x=center_x, @@ -169,7 +165,6 @@ async def execute_action( x = self.denormalize_x(args["x"]) y = self.denormalize_y(args["y"]) - # Gemini uses magnitude in pixels (default 400). Chunk to avoid per-request cap. magnitude_px = args.get("magnitude", 400) notches = min(MAX_NOTCHES_PER_ACTION, max(1, round(magnitude_px / PX_PER_NOTCH))) direction = args["direction"] @@ -182,7 +177,6 @@ async def execute_action( delta_x = notches elif direction == "left": delta_x = -notches - print(f"[cua-scroll] SCROLL_AT magnitude_px={magnitude_px} notches={notches} x={x} y={y} direction={direction}", flush=True) self.kernel.browsers.computer.scroll( self.session_id, x=x, diff --git a/pkg/templates/python/openagi-computer-use/kernel_handler.py b/pkg/templates/python/openagi-computer-use/kernel_handler.py index 0198452a..c3950a04 100644 --- a/pkg/templates/python/openagi-computer-use/kernel_handler.py +++ b/pkg/templates/python/openagi-computer-use/kernel_handler.py @@ -244,8 +244,7 @@ def _execute_hotkey(self, keys: list[str]): ) def _execute_scroll(self, x: int, y: int, direction: str, notches: int = 1): - """Execute a scroll action. One Lux scroll event = 1 notch (OpenAGI often calls scroll N times for amount N).""" - # Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. + """Execute a scroll action.""" notches = max(notches, 1) delta_x = 0 delta_y = 0 @@ -258,10 +257,6 @@ def _execute_scroll(self, x: int, y: int, direction: str, notches: int = 1): elif direction == "right": delta_x = notches - print( - f"[cua-scroll] SCROLL API call id={self.session.session_id} x={x} y={y} delta_x={delta_x} delta_y={delta_y} (notches={notches})", - flush=True, - ) self.session.kernel.browsers.computer.scroll( id=self.session.session_id, x=x, diff --git a/pkg/templates/typescript/gemini-computer-use/index.ts b/pkg/templates/typescript/gemini-computer-use/index.ts index aa1313de..dd55ff10 100644 --- a/pkg/templates/typescript/gemini-computer-use/index.ts +++ b/pkg/templates/typescript/gemini-computer-use/index.ts @@ -73,8 +73,7 @@ app.action( }, ); -// Run locally if executed directly and not in Kernel (no KERNEL_INVOCATION) -// Execute via: npx tsx index.ts +// Run locally when not in Kernel invocation. Execute via: npx tsx index.ts if (!process.env.KERNEL_INVOCATION && import.meta.url === `file://${process.argv[1]}`) { const testQuery = "Navigate to https://www.google.com and describe what you see"; diff --git a/pkg/templates/typescript/gemini-computer-use/loop.ts b/pkg/templates/typescript/gemini-computer-use/loop.ts index 43e00f0a..bc917966 100644 --- a/pkg/templates/typescript/gemini-computer-use/loop.ts +++ b/pkg/templates/typescript/gemini-computer-use/loop.ts @@ -190,7 +190,7 @@ export async function samplingLoop({ name: fc.name, response: responseData, // Include screenshot as inline data - ...(result.base64Image && fc.name && isPredefinedFunction(fc.name) ? { + ...(result.base64Image && isPredefinedFunction(fc.name) ? { parts: [{ inlineData: { mimeType: 'image/png', diff --git a/pkg/templates/typescript/gemini-computer-use/tools/computer.ts b/pkg/templates/typescript/gemini-computer-use/tools/computer.ts index 175ecaba..9c459513 100644 --- a/pkg/templates/typescript/gemini-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/gemini-computer-use/tools/computer.ts @@ -17,9 +17,7 @@ import { const TYPING_DELAY_MS = 12; const SCREENSHOT_DELAY_MS = 500; -/** Higher = more conservative (fewer notches). 60–80 avoids overscroll on heavy sites. */ const PX_PER_NOTCH = 60; -/** Cap total notches per action so large magnitudes don't overscroll. */ const MAX_NOTCHES_PER_ACTION = 17; /** @@ -153,7 +151,6 @@ export class ComputerTool { const centerX = Math.round(this.screenSize.width / 2); const centerY = Math.round(this.screenSize.height / 2); - // Backend uses notches; chunk to avoid per-request cap. const magnitudePx = args.magnitude ?? 400; const docNotches = Math.min(MAX_NOTCHES_PER_ACTION, Math.max(1, Math.round(magnitudePx / PX_PER_NOTCH))); let docDx = 0; @@ -162,7 +159,6 @@ export class ComputerTool { else if (args.direction === 'up') docDy = -docNotches; else if (args.direction === 'right') docDx = docNotches; else if (args.direction === 'left') docDx = -docNotches; - console.info('[cua-scroll] SCROLL_DOCUMENT direction=%s magnitude_px=%d notches=%d x=%d y=%d', args.direction, magnitudePx, docNotches, centerX, centerY); await this.kernel.browsers.computer.scroll(this.sessionId, { x: centerX, y: centerY, @@ -183,7 +179,6 @@ export class ComputerTool { const x = this.denormalizeX(args.x); const y = this.denormalizeY(args.y); - // Chunk to avoid per-request cap. const magnitudePx = args.magnitude ?? 400; const notches = Math.min(MAX_NOTCHES_PER_ACTION, Math.max(1, Math.round(magnitudePx / PX_PER_NOTCH))); let atDx = 0; @@ -192,7 +187,6 @@ export class ComputerTool { else if (args.direction === 'up') atDy = -notches; else if (args.direction === 'right') atDx = notches; else if (args.direction === 'left') atDx = -notches; - console.info('[cua-scroll] SCROLL_AT magnitude_px=%s notches=%d x=%d y=%d direction=%s', magnitudePx, notches, x, y, args.direction); await this.kernel.browsers.computer.scroll(this.sessionId, { x, y, diff --git a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts index 6ec7636e..5ba8e09e 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts @@ -166,7 +166,6 @@ export class ComputerTool { throw new ToolError(`Invalid scroll direction: ${direction}`); } - // Backend (kernel-images) uses delta_x/delta_y as wheel-event repeat count (notches), not pixels. let delta_x = 0; let delta_y = 0; From 27b9d7e225fba411cd94380237f5154304c0db1b Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Tue, 3 Mar 2026 00:02:32 -0500 Subject: [PATCH 13/15] openagi: remove dead scroll_amount parameter Scroll amount was stored but never used; the only call to _execute_scroll hardcoded notches=1. Align with docstring: 1 scroll event = 1 notch, model controls amount by emitting N scroll actions. Remove parameter and assignment to fix misleading API. Made-with: Cursor --- pkg/templates/python/openagi-computer-use/kernel_handler.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pkg/templates/python/openagi-computer-use/kernel_handler.py b/pkg/templates/python/openagi-computer-use/kernel_handler.py index c3950a04..3304af62 100644 --- a/pkg/templates/python/openagi-computer-use/kernel_handler.py +++ b/pkg/templates/python/openagi-computer-use/kernel_handler.py @@ -46,7 +46,6 @@ def __init__( self, session: "KernelBrowserSession", action_pause: float = 0.1, - scroll_amount: int = 3, wait_duration: float = 1.0, type_delay: int = 50, ): @@ -56,13 +55,11 @@ def __init__( Args: session: The Kernel browser session to control action_pause: Pause between actions in seconds - scroll_amount: Amount to scroll (wheel units / notches) wait_duration: Duration for wait actions in seconds type_delay: Delay between keystrokes in milliseconds """ self.session = session self.action_pause = action_pause - self.scroll_amount = scroll_amount self.wait_duration = wait_duration self.type_delay = type_delay From 0190aeabfc9eaaea962983bffabbee836ffbaad2 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Tue, 3 Mar 2026 00:10:44 -0500 Subject: [PATCH 14/15] Remove [cua-scroll] debug print statements from openagi and yutori templates Made-with: Cursor --- pkg/templates/python/openagi-computer-use/kernel_handler.py | 2 -- pkg/templates/python/yutori-computer-use/tools/computer.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/pkg/templates/python/openagi-computer-use/kernel_handler.py b/pkg/templates/python/openagi-computer-use/kernel_handler.py index 3304af62..364a8762 100644 --- a/pkg/templates/python/openagi-computer-use/kernel_handler.py +++ b/pkg/templates/python/openagi-computer-use/kernel_handler.py @@ -302,9 +302,7 @@ def _execute_single_action(self, action: Action) -> None: self._execute_type(text, press_enter=press_enter) case ActionType.SCROLL: - print(f"[cua-scroll] SCROLL action raw_arg={arg!r}", flush=True) x, y, direction = self._parse_scroll(arg) - print(f"[cua-scroll] SCROLL parsed x={x} y={y} direction={direction}", flush=True) self._execute_scroll(x, y, direction, notches=1) case ActionType.FINISH: diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori-computer-use/tools/computer.py index 4fb9d685..d078aded 100644 --- a/pkg/templates/python/yutori-computer-use/tools/computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/computer.py @@ -158,10 +158,6 @@ async def _handle_scroll(self, action: N1Action) -> ToolResult: elif direction == "right": delta_x = notches - print( - f"[cua-scroll] scroll amount={action.get('amount')} notches={notches} direction={direction} x={coords['x']} y={coords['y']} delta_x={delta_x} delta_y={delta_y}", - flush=True, - ) self.kernel.browsers.computer.scroll( self.session_id, x=coords["x"], From 6f3501d27967c5d3c7019da2847672eaa5a84bd8 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Tue, 3 Mar 2026 08:45:52 -0500 Subject: [PATCH 15/15] Rename yutori-computer-use template to yutori - Template key: yutori-computer-use -> yutori in pkg/create/templates.go - Rename pkg/templates/python/yutori-computer-use -> python/yutori - Rename pkg/templates/typescript/yutori-computer-use -> typescript/yutori - Update .cursor/commands/qa.md to use -t yutori Made-with: Cursor --- .cursor/commands/qa.md | 8 ++++---- pkg/create/templates.go | 2 +- .../python/{yutori-computer-use => yutori}/README.md | 0 .../python/{yutori-computer-use => yutori}/_gitignore | 0 .../python/{yutori-computer-use => yutori}/loop.py | 0 .../python/{yutori-computer-use => yutori}/main.py | 0 .../python/{yutori-computer-use => yutori}/pyproject.toml | 0 .../python/{yutori-computer-use => yutori}/session.py | 0 .../{yutori-computer-use => yutori}/tools/__init__.py | 0 .../python/{yutori-computer-use => yutori}/tools/base.py | 0 .../{yutori-computer-use => yutori}/tools/computer.py | 0 .../typescript/{yutori-computer-use => yutori}/README.md | 0 .../typescript/{yutori-computer-use => yutori}/_gitignore | 0 .../typescript/{yutori-computer-use => yutori}/index.ts | 0 .../typescript/{yutori-computer-use => yutori}/loop.ts | 0 .../{yutori-computer-use => yutori}/package.json | 0 .../typescript/{yutori-computer-use => yutori}/session.ts | 0 .../{yutori-computer-use => yutori}/tools/computer.ts | 0 .../{yutori-computer-use => yutori}/tsconfig.json | 0 19 files changed, 5 insertions(+), 5 deletions(-) rename pkg/templates/python/{yutori-computer-use => yutori}/README.md (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/_gitignore (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/loop.py (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/main.py (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/pyproject.toml (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/session.py (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/tools/__init__.py (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/tools/base.py (100%) rename pkg/templates/python/{yutori-computer-use => yutori}/tools/computer.py (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/README.md (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/_gitignore (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/index.ts (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/loop.ts (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/package.json (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/session.ts (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/tools/computer.ts (100%) rename pkg/templates/typescript/{yutori-computer-use => yutori}/tsconfig.json (100%) diff --git a/.cursor/commands/qa.md b/.cursor/commands/qa.md index 12504189..efcd1cfe 100644 --- a/.cursor/commands/qa.md +++ b/.cursor/commands/qa.md @@ -58,7 +58,7 @@ Here are all valid language + template combinations: | typescript | openai-computer-use | ts-openai-cua | ts-openai-cua | Yes | OPENAI_API_KEY | | typescript | gemini-computer-use | ts-gemini-cua | ts-gemini-cua | Yes | GOOGLE_API_KEY | | typescript | claude-agent-sdk | ts-claude-agent-sdk | ts-claude-agent-sdk | Yes | ANTHROPIC_API_KEY | -| typescript | yutori-computer-use | ts-yutori-cua | ts-yutori-cua | Yes | YUTORI_API_KEY | +| typescript | yutori | ts-yutori-cua | ts-yutori-cua | Yes | YUTORI_API_KEY | | python | sample-app | py-sample-app | python-basic | No | - | | python | gemini-computer-use | py-gemini-cua | python-gemini-cua | Yes | GOOGLE_API_KEY | @@ -68,7 +68,7 @@ Here are all valid language + template combinations: | python | openai-computer-use | py-openai-cua | python-openai-cua | Yes | OPENAI_API_KEY | | python | openagi-computer-use | py-openagi-cua | python-openagi-cua | Yes | OAGI_API_KEY | | python | claude-agent-sdk | py-claude-agent-sdk | py-claude-agent-sdk | Yes | ANTHROPIC_API_KEY | -| python | yutori-computer-use | py-yutori-cua | python-yutori-cua | Yes | YUTORI_API_KEY | +| python | yutori | py-yutori-cua | python-yutori-cua | Yes | YUTORI_API_KEY | > **Yutori:** Test both default browser and `"kiosk": true` (uses Playwright for goto_url when kiosk is enabled). @@ -86,7 +86,7 @@ Run each of these (they are non-interactive when all flags are provided): ../bin/kernel create -n ts-openai-cua -l typescript -t openai-computer-use ../bin/kernel create -n ts-gemini-cua -l typescript -t gemini-computer-use ../bin/kernel create -n ts-claude-agent-sdk -l typescript -t claude-agent-sdk -../bin/kernel create -n ts-yutori-cua -l typescript -t yutori-computer-use +../bin/kernel create -n ts-yutori-cua -l typescript -t yutori # Python templates ../bin/kernel create -n py-sample-app -l python -t sample-app @@ -97,7 +97,7 @@ Run each of these (they are non-interactive when all flags are provided): ../bin/kernel create -n py-openagi-cua -l python -t openagi-computer-use ../bin/kernel create -n py-claude-agent-sdk -l python -t claude-agent-sdk ../bin/kernel create -n py-gemini-cua -l python -t gemini-computer-use -../bin/kernel create -n py-yutori-cua -l python -t yutori-computer-use +../bin/kernel create -n py-yutori-cua -l python -t yutori ``` ## Step 5: Deploy Each Template diff --git a/pkg/create/templates.go b/pkg/create/templates.go index c9b07b7d..fb5845f3 100644 --- a/pkg/create/templates.go +++ b/pkg/create/templates.go @@ -18,7 +18,7 @@ const ( TemplateStagehand = "stagehand" TemplateOpenAGIComputerUse = "openagi-computer-use" TemplateClaudeAgentSDK = "claude-agent-sdk" - TemplateYutoriComputerUse = "yutori-computer-use" + TemplateYutoriComputerUse = "yutori" ) type TemplateInfo struct { diff --git a/pkg/templates/python/yutori-computer-use/README.md b/pkg/templates/python/yutori/README.md similarity index 100% rename from pkg/templates/python/yutori-computer-use/README.md rename to pkg/templates/python/yutori/README.md diff --git a/pkg/templates/python/yutori-computer-use/_gitignore b/pkg/templates/python/yutori/_gitignore similarity index 100% rename from pkg/templates/python/yutori-computer-use/_gitignore rename to pkg/templates/python/yutori/_gitignore diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori/loop.py similarity index 100% rename from pkg/templates/python/yutori-computer-use/loop.py rename to pkg/templates/python/yutori/loop.py diff --git a/pkg/templates/python/yutori-computer-use/main.py b/pkg/templates/python/yutori/main.py similarity index 100% rename from pkg/templates/python/yutori-computer-use/main.py rename to pkg/templates/python/yutori/main.py diff --git a/pkg/templates/python/yutori-computer-use/pyproject.toml b/pkg/templates/python/yutori/pyproject.toml similarity index 100% rename from pkg/templates/python/yutori-computer-use/pyproject.toml rename to pkg/templates/python/yutori/pyproject.toml diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori/session.py similarity index 100% rename from pkg/templates/python/yutori-computer-use/session.py rename to pkg/templates/python/yutori/session.py diff --git a/pkg/templates/python/yutori-computer-use/tools/__init__.py b/pkg/templates/python/yutori/tools/__init__.py similarity index 100% rename from pkg/templates/python/yutori-computer-use/tools/__init__.py rename to pkg/templates/python/yutori/tools/__init__.py diff --git a/pkg/templates/python/yutori-computer-use/tools/base.py b/pkg/templates/python/yutori/tools/base.py similarity index 100% rename from pkg/templates/python/yutori-computer-use/tools/base.py rename to pkg/templates/python/yutori/tools/base.py diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori/tools/computer.py similarity index 100% rename from pkg/templates/python/yutori-computer-use/tools/computer.py rename to pkg/templates/python/yutori/tools/computer.py diff --git a/pkg/templates/typescript/yutori-computer-use/README.md b/pkg/templates/typescript/yutori/README.md similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/README.md rename to pkg/templates/typescript/yutori/README.md diff --git a/pkg/templates/typescript/yutori-computer-use/_gitignore b/pkg/templates/typescript/yutori/_gitignore similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/_gitignore rename to pkg/templates/typescript/yutori/_gitignore diff --git a/pkg/templates/typescript/yutori-computer-use/index.ts b/pkg/templates/typescript/yutori/index.ts similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/index.ts rename to pkg/templates/typescript/yutori/index.ts diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori/loop.ts similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/loop.ts rename to pkg/templates/typescript/yutori/loop.ts diff --git a/pkg/templates/typescript/yutori-computer-use/package.json b/pkg/templates/typescript/yutori/package.json similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/package.json rename to pkg/templates/typescript/yutori/package.json diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori/session.ts similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/session.ts rename to pkg/templates/typescript/yutori/session.ts diff --git a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts b/pkg/templates/typescript/yutori/tools/computer.ts similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/tools/computer.ts rename to pkg/templates/typescript/yutori/tools/computer.ts diff --git a/pkg/templates/typescript/yutori-computer-use/tsconfig.json b/pkg/templates/typescript/yutori/tsconfig.json similarity index 100% rename from pkg/templates/typescript/yutori-computer-use/tsconfig.json rename to pkg/templates/typescript/yutori/tsconfig.json