diff --git a/.gitignore b/.gitignore index 010dc12a..092b5596 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,7 @@ kernel # QA testing directories qa-* + + +__pycache__ +.dmux/ diff --git a/pkg/templates/python/openai-computer-use/.env.example b/pkg/templates/python/openai-computer-use/.env.example index b74e0a29..3ff84207 100644 --- a/pkg/templates/python/openai-computer-use/.env.example +++ b/pkg/templates/python/openai-computer-use/.env.example @@ -1,2 +1,3 @@ -# Copy this file to .env and fill in your API key +# Copy this file to .env and fill in your API keys OPENAI_API_KEY=your_openai_api_key_here +KERNEL_API_KEY=your_kernel_api_key_here diff --git a/pkg/templates/python/openai-computer-use/README.md b/pkg/templates/python/openai-computer-use/README.md index e45b15d4..684b36d3 100644 --- a/pkg/templates/python/openai-computer-use/README.md +++ b/pkg/templates/python/openai-computer-use/README.md @@ -1,7 +1,27 @@ # Kernel Python Sample App - OpenAI Computer Use -This is a Kernel application that demonstrates using the Computer Use Agent (CUA) from OpenAI. +This is a Kernel application that demonstrates using the Computer Use Agent (CUA) from OpenAI with Kernel's native browser control API. -It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation. +It uses Kernel's computer control endpoints (screenshot, click, type, scroll, batch, etc.) and includes a `batch_computer_actions` tool that executes multiple actions in a single API call for lower latency. -See the [docs](https://www.kernel.sh/docs/quickstart) for more information. \ No newline at end of file +## Local testing + +You can test against a remote Kernel browser without deploying: + +```bash +cp .env.example .env +# Fill in OPENAI_API_KEY and KERNEL_API_KEY in .env +uv run run_local.py +uv run run_local.py --task "go to https://news.ycombinator.com and get the top 5 articles" +``` + +The local runner defaults to a built-in sample task. Pass `--task "..."` to run a custom prompt locally, and add `--debug` to include verbose in-flight events. + +## Deploy to Kernel + +```bash +kernel deploy main.py --env-file .env +kernel invoke python-openai-cua cua-task -p '{"task":"go to https://news.ycombinator.com and list top 5 articles"}' +``` + +See the [docs](https://www.kernel.sh/docs/quickstart) for more information. diff --git a/pkg/templates/python/openai-computer-use/agent/agent.py b/pkg/templates/python/openai-computer-use/agent/agent.py index d7f4267f..400ce12b 100644 --- a/pkg/templates/python/openai-computer-use/agent/agent.py +++ b/pkg/templates/python/openai-computer-use/agent/agent.py @@ -1,4 +1,11 @@ -from computers import Computer +import json +import time +from typing import Any, Callable +from computers.kernel_computer import ( + KernelComputer, + _describe_action, + _describe_batch_actions, +) from utils import ( create_response, show_image, @@ -6,112 +13,314 @@ sanitize_message, check_blocklisted_url, ) -import json -from typing import Callable +BATCH_FUNC_NAME = "batch_computer_actions" +EXTRA_FUNC_NAME = "computer_use_extra" +POST_ACTION_SETTLE_SECONDS = 0.3 -class Agent: - """ - A sample agent class that can be used to interact with a computer. +BATCH_INSTRUCTIONS = """You have three ways to perform actions: +1. The standard computer tool — use for single actions when you need screenshot feedback after each step. +2. batch_computer_actions — use to execute multiple actions at once when you can predict the outcome. +3. computer_use_extra — use high-level browser actions: goto, back, and url. + +ALWAYS prefer batch_computer_actions when performing predictable sequences like: +- Clicking a text field, typing text, and pressing Enter +- Any sequence where you don't need to see intermediate results + +Use computer_use_extra for: +- action="goto" only when changing the page URL +- action="back" to go back in history +- action="url" to read the exact current URL + +When interacting with page content (search boxes, forms, chat inputs): +- Click the target input first, then type. +- Do not use URL-navigation actions for in-page text entry. + +For drag actions in batch_computer_actions: +- Always include a path field. +- path must be an array of at least two points. +- If one drag is likely to change the position, order, or layout of other targets, do not batch multiple drags together. +- In those cases, prefer one drag at a time and inspect the updated screenshot before planning the next drag. +- Each point must be an object like {"x": 123, "y": 456}.""" + +BATCH_TOOL = { + "type": "function", + "name": BATCH_FUNC_NAME, + "description": ( + "Execute multiple computer actions in sequence without waiting for " + "screenshots between them. Use this when you can predict the outcome of a " + "sequence of actions without needing intermediate visual feedback. After all " + "actions execute, a single screenshot is taken and returned.\n\n" + "PREFER this over individual computer actions when:\n" + "- Typing text followed by pressing Enter\n" + "- Clicking a field and then typing into it\n" + "- Any sequence where intermediate screenshots aren't needed\n\n" + "Constraint: return-value actions (url, screenshot) can appear at most once " + "and only as the final action in the batch." + ), + "parameters": { + "type": "object", + "properties": { + "actions": { + "type": "array", + "description": "Ordered list of actions to execute", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "click", + "double_click", + "type", + "keypress", + "scroll", + "move", + "drag", + "wait", + "goto", + "back", + "url", + "screenshot", + ], + }, + "x": {"type": "number"}, + "y": {"type": "number"}, + "text": {"type": "string"}, + "url": {"type": "string"}, + "keys": {"type": "array", "items": {"type": "string"}}, + "hold_keys": {"type": "array", "items": {"type": "string"}}, + "button": {"type": "string"}, + "scroll_x": {"type": "number"}, + "scroll_y": {"type": "number"}, + "path": { + "type": "array", + "description": "Required for drag actions. Provide at least two points as objects with x/y coordinates.", + "items": { + "type": "object", + "properties": { + "x": {"type": "number"}, + "y": {"type": "number"}, + }, + "required": ["x", "y"], + }, + }, + }, + "required": ["type"], + }, + }, + }, + "required": ["actions"], + }, + "strict": False, +} + +EXTRA_TOOL = { + "type": "function", + "name": EXTRA_FUNC_NAME, + "description": "High-level browser actions for navigation and URL retrieval.", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["goto", "back", "url"], + "description": "Action to perform: goto, back, or url.", + }, + "url": { + "type": "string", + "description": "Required when action is goto. Fully qualified URL to navigate to.", + }, + }, + "required": ["action"], + }, + "strict": False, +} - (See simple_cua_loop.py for a simple example without an agent.) - """ +# Keep this shape aligned with CUA and current OpenAI Responses API. +OPENAI_COMPUTER_TOOL = {"type": "computer"} + + +class Agent: + """An agent that uses OpenAI CUA with Kernel's native computer control API.""" def __init__( self, - model="computer-use-preview", - computer: Computer = None, + model="gpt-5.4", + computer: KernelComputer = None, tools: list[dict] = [], acknowledge_safety_check_callback: Callable = lambda message: False, ): self.model = model self.computer = computer - self.tools = tools + self.tools = list(tools) self.print_steps = True self.debug = False self.show_images = False + self.on_event: Callable[[dict], None] | None = None + self._model_request_started_at: float | None = None self.acknowledge_safety_check_callback = acknowledge_safety_check_callback if computer: - dimensions = computer.get_dimensions() self.tools += [ - { - "type": "computer-preview", - "display_width": dimensions[0], - "display_height": dimensions[1], - "environment": computer.get_environment(), - }, - { - "type": "function", - "name": "back", - "description": "Go back to the previous page.", - "parameters": {}, - }, - { - "type": "function", - "name": "goto", - "description": "Go to a specific URL.", - "parameters": { - "type": "object", - "properties": { - "url": { - "type": "string", - "description": "Fully qualified URL to navigate to.", - }, - }, - "additionalProperties": False, - "required": ["url"], - }, - }, - { - "type": "function", - "name": "forward", - "description": "Go forward to the next page.", - "parameters": {}, - }, + dict(OPENAI_COMPUTER_TOOL), + BATCH_TOOL, + EXTRA_TOOL, ] def debug_print(self, *args): if self.debug: pp(*args) + def _emit_event(self, event: str, data: dict | None = None) -> None: + if self.print_steps and self.on_event: + self.on_event({"event": event, "data": data or {}}) + + def _current_model_elapsed_ms(self) -> int | None: + if self._model_request_started_at is None: + return None + return int((time.time() - self._model_request_started_at) * 1000) + + def _capture_post_action_screenshot(self) -> str: + time.sleep(POST_ACTION_SETTLE_SECONDS) + return self.computer.screenshot() + + def _extract_reasoning_text(self, item: dict[str, Any]) -> str: + summary = item.get("summary") + if not isinstance(summary, list): + return "" + pieces: list[str] = [] + for part in summary: + if not isinstance(part, dict): + continue + text = part.get("text") + if isinstance(text, str) and text: + pieces.append(text) + return " ".join(pieces).strip() + + def _extract_prompt_text(self, item: dict[str, Any]) -> str | None: + if item.get("role") != "user": + return None + content = item.get("content") + if isinstance(content, str): + return content + if not isinstance(content, list): + return None + parts: list[str] = [] + for entry in content: + if not isinstance(entry, dict): + continue + text = entry.get("text") + if isinstance(text, str) and text: + parts.append(text) + return " ".join(parts) if parts else None + + def _batch_terminal_read_action(self, actions: list[dict[str, Any]]) -> str: + if not actions: + return "" + action_type = str(actions[-1].get("type", "")) + if action_type in ("url", "screenshot"): + return action_type + return "" + def handle_item(self, item): """Handle each item; may cause a computer action + screenshot.""" + if item["type"] == "reasoning": + text = self._extract_reasoning_text(item) + if text: + self._emit_event("reasoning_delta", {"text": text}) + if item["type"] == "message": - if self.print_steps: - print(item["content"][0]["text"]) + if item.get("role") == "assistant": + content = item.get("content", []) + if isinstance(content, list): + for part in content: + if isinstance(part, dict) and isinstance(part.get("text"), str): + self._emit_event("text_delta", {"text": part["text"]}) + self._emit_event("text_done", {}) if item["type"] == "function_call": name, args = item["name"], json.loads(item["arguments"]) - if self.print_steps: - print(f"{name}({args})") + elapsed_ms = self._current_model_elapsed_ms() + if name == BATCH_FUNC_NAME: + actions = args.get("actions", []) + if isinstance(actions, list): + typed_actions = [a for a in actions if isinstance(a, dict)] + payload = { + "action_type": "batch", + "description": _describe_batch_actions(typed_actions), + "action": {"type": "batch", "actions": typed_actions}, + } + if elapsed_ms is not None: + payload["elapsed_ms"] = elapsed_ms + self._emit_event( + "action", + payload, + ) + else: + payload = { + "action_type": name, + "description": f"{name}({json.dumps(args)})", + "action": args, + } + if elapsed_ms is not None: + payload["elapsed_ms"] = elapsed_ms + self._emit_event( + "action", + payload, + ) + + if name == BATCH_FUNC_NAME: + return self._handle_batch_call(item["call_id"], args) + if name == EXTRA_FUNC_NAME: + return self._handle_extra_call(item["call_id"], args) - if hasattr(self.computer, name): # if function exists on computer, call it - method = getattr(self.computer, name) - method(**args) return [ { "type": "function_call_output", "call_id": item["call_id"], - "output": "success", # hard-coded output for demo + "output": f"Unsupported function call: {name}", } ] if item["type"] == "computer_call": - action = item["action"] - action_type = action["type"] - action_args = {k: v for k, v in action.items() if k != "type"} - if self.print_steps: - print(f"{action_type}({action_args})") + elapsed_ms = self._current_model_elapsed_ms() + actions = item.get("actions") + if not isinstance(actions, list): + single = item.get("action") + actions = [single] if isinstance(single, dict) else [] + typed_actions = [a for a in actions if isinstance(a, dict)] - method = getattr(self.computer, action_type) - method(**action_args) + if len(typed_actions) == 1: + action_type = str(typed_actions[0].get("type", "unknown")) + action_payload: dict[str, Any] = typed_actions[0] + description = _describe_action( + action_type, + {k: v for k, v in typed_actions[0].items() if k != "type"}, + ) + else: + action_type = "batch" + action_payload = {"type": "batch", "actions": typed_actions} + description = _describe_batch_actions(typed_actions) - screenshot_base64 = self.computer.screenshot() + payload = { + "action_type": action_type, + "description": description, + "action": action_payload, + } + if elapsed_ms is not None: + payload["elapsed_ms"] = elapsed_ms + self._emit_event("action", payload) + self.computer.batch_actions(typed_actions) + + screenshot_base64 = self._capture_post_action_screenshot() + self._emit_event( + "screenshot", + {"captured": True, "bytes_base64": len(screenshot_base64)}, + ) if self.show_images: show_image(screenshot_base64) - # if user doesn't ack all safety checks exit with error pending_checks = item.get("pending_safety_checks", []) for check in pending_checks: message = check["message"] @@ -125,46 +334,133 @@ def handle_item(self, item): "call_id": item["call_id"], "acknowledged_safety_checks": pending_checks, "output": { - "type": "input_image", + "type": "computer_screenshot", "image_url": f"data:image/png;base64,{screenshot_base64}", }, } - # additional URL safety checks for browser environments if self.computer.get_environment() == "browser": current_url = self.computer.get_current_url() check_blocklisted_url(current_url) - call_output["output"]["current_url"] = current_url return [call_output] return [] + def _handle_batch_call(self, call_id, args): + actions = args.get("actions", []) + if not isinstance(actions, list): + actions = [] + self.computer.batch_actions(actions) + status_text = "Actions executed successfully." + terminal_action = self._batch_terminal_read_action(actions if isinstance(actions, list) else []) + if terminal_action == "url": + try: + current_url = self.computer.get_current_url() + status_text = f"Actions executed successfully. Current URL: {current_url}" + except Exception as exc: + status_text = f"Actions executed, but url() failed: {exc}" + screenshot_base64 = self._capture_post_action_screenshot() + output_items: list[dict[str, Any]] = [{"type": "input_text", "text": status_text}] + output_items.append( + { + "type": "input_image", + "image_url": f"data:image/png;base64,{screenshot_base64}", + "detail": "original", + } + ) + return [ + { + "type": "function_call_output", + "call_id": call_id, + "output": output_items, + } + ] + + def _handle_extra_call(self, call_id, args): + action = args.get("action", "") + url = args.get("url", "") + if action == "goto": + self.computer.batch_actions([{"type": "goto", "url": url}]) + status_text = "goto executed successfully." + elif action == "back": + self.computer.batch_actions([{"type": "back"}]) + status_text = "back executed successfully." + elif action == "url": + status_text = f"Current URL: {self.computer.get_current_url()}" + else: + status_text = f"unknown {EXTRA_FUNC_NAME} action: {action}" + + screenshot_base64 = self._capture_post_action_screenshot() + output_items: list[dict[str, Any]] = [{"type": "input_text", "text": status_text}] + output_items.append( + { + "type": "input_image", + "image_url": f"data:image/png;base64,{screenshot_base64}", + "detail": "original", + } + ) + return [ + { + "type": "function_call_output", + "call_id": call_id, + "output": output_items, + } + ] + def run_full_turn( - self, input_items, print_steps=True, debug=False, show_images=False + self, + input_items, + print_steps=True, + debug=False, + show_images=False, + on_event: Callable[[dict], None] | None = None, ): self.print_steps = print_steps self.debug = debug self.show_images = show_images + self.on_event = on_event new_items = [] + turns = 0 - # keep looping until we get a final response - while new_items[-1].get("role") != "assistant" if new_items else True: - self.debug_print([sanitize_message(msg) for msg in input_items + new_items]) + for message in input_items: + if isinstance(message, dict): + prompt = self._extract_prompt_text(message) + if prompt: + self._emit_event("prompt", {"text": prompt}) - response = create_response( - model=self.model, - input=input_items + new_items, - tools=self.tools, - truncation="auto", - ) - self.debug_print(response) + try: + while new_items[-1].get("role") != "assistant" if new_items else True: + turns += 1 + self.debug_print([sanitize_message(msg) for msg in input_items + new_items]) + + self._model_request_started_at = time.time() + response = create_response( + model=self.model, + input=input_items + new_items, + tools=self.tools, + truncation="auto", + reasoning={ + "effort": "low", + "summary": "concise", + }, + instructions=BATCH_INSTRUCTIONS, + ) + self.debug_print(response) + + if "output" not in response: + if self.debug: + print(response) + raise ValueError("No output from model") - if "output" not in response and self.debug: - print(response) - raise ValueError("No output from model") - else: new_items += response["output"] for item in response["output"]: new_items += self.handle_item(item) + self._model_request_started_at = None + self._emit_event("turn_done", {"turn": turns}) + except Exception as exc: + self._model_request_started_at = None + self._emit_event("error", {"message": str(exc)}) + raise + self._emit_event("run_complete", {"turns": turns}) return new_items diff --git a/pkg/templates/python/openai-computer-use/agent/logging.py b/pkg/templates/python/openai-computer-use/agent/logging.py new file mode 100644 index 00000000..2241aed2 --- /dev/null +++ b/pkg/templates/python/openai-computer-use/agent/logging.py @@ -0,0 +1,332 @@ +import logging +import sys +import threading +import time +from datetime import datetime +from typing import Callable + +MAX_LINE_WIDTH = 120 + + +def quiet_http_transport_logs() -> None: + # The Kernel Python SDK uses httpx underneath, and those request logs can + # become noisy when the surrounding process configures root logging at INFO. + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("httpcore").setLevel(logging.WARNING) + + +def _timestamp() -> str: + return datetime.now().strftime("%H:%M:%S.%f")[:-3] + + +def _truncate_one_line(text: str, max_len: int = 90) -> str: + one_line = " ".join(text.split()) + if len(one_line) <= max_len: + return one_line + return f"{one_line[: max_len - 3]}..." + + +def _format_kernel_op(op: str) -> str: + if not op: + return op + if "(" in op or "[" in op: + return op + return f"{op}()" + + +class _ThinkingSpinner: + def __init__(self, enabled: bool): + self.enabled = enabled + self.active = False + self.frame = 0 + self.start_at = 0.0 + self.start_ts = "" + self.reasoning = "" + self._thread: threading.Thread | None = None + self._stop_event = threading.Event() + self._lock = threading.Lock() + + def start(self) -> None: + if not self.enabled: + return + with self._lock: + if self.active: + return + self.active = True + self.frame = 0 + self.reasoning = "" + self.start_at = time.time() + self.start_ts = _timestamp() + self._stop_event.clear() + self._thread = threading.Thread(target=self._run, daemon=True) + self._thread.start() + + def add_reasoning(self, text: str) -> None: + with self._lock: + if not self.active: + return + self.reasoning += text + + def stop(self, action: str | None = None, elapsed_seconds: float | None = None) -> None: + with self._lock: + if not self.active: + if action: + elapsed_prefix = ( + f"[{elapsed_seconds:.3f}s] " + if isinstance(elapsed_seconds, (int, float)) + else "" + ) + sys.stdout.write(f"{_timestamp()} agent> {elapsed_prefix}{action}\n") + sys.stdout.flush() + return + self.active = False + self._stop_event.set() + elapsed = ( + float(elapsed_seconds) + if isinstance(elapsed_seconds, (int, float)) + else (time.time() - self.start_at) + ) + elapsed_text = f"{elapsed:.3f}s" + if self.reasoning.strip(): + reasoning = _truncate_one_line(self.reasoning, 70) + suffix = f" -> {action}" if action else "" + sys.stdout.write( + f"\r\033[2K{self.start_ts} agent> [{elapsed_text}] {reasoning}{suffix}\n" + ) + elif action: + sys.stdout.write( + f"\r\033[2K{self.start_ts} agent> [{elapsed_text}] {action}\n" + ) + else: + sys.stdout.write( + f"\r\033[2K{self.start_ts} agent> [{elapsed_text}] thinking...\n" + ) + sys.stdout.flush() + + def _run(self) -> None: + while not self._stop_event.wait(0.1): + with self._lock: + if not self.active: + return + self.frame += 1 + elapsed = time.time() - self.start_at + elapsed_text = f"{elapsed:.3f}s" + if self.reasoning.strip(): + prefix = f"{self.start_ts} agent> [{elapsed_text}] " + max_text = max(20, MAX_LINE_WIDTH - len(prefix)) + reasoning = _truncate_one_line(self.reasoning, max_text) + sys.stdout.write(f"\r\033[2K{prefix}{reasoning}") + else: + dots = "." * ((self.frame % 3) + 1) + dots = f"{dots:<3}" + sys.stdout.write( + f"\r\033[2K{self.start_ts} agent> [{elapsed_text}] thinking{dots}" + ) + sys.stdout.flush() + + +def create_event_logger(verbose: bool = False) -> Callable[[dict], None]: + spinner = _ThinkingSpinner(sys.stdout.isatty()) + in_text = False + last_live_view_url = "" + + def render_text(event: dict) -> None: + nonlocal in_text, last_live_view_url + + event_name = event.get("event", "") + data = event.get("data", {}) + if not isinstance(data, dict): + data = {} + + if event_name == "session_state": + live_url = data.get("live_view_url") + if ( + isinstance(live_url, str) + and live_url + and live_url != last_live_view_url + ): + sys.stdout.write(f"{_timestamp()} kernel> live view: {live_url}\n") + sys.stdout.flush() + last_live_view_url = live_url + return + + if event_name == "backend": + op = data.get("op") + if not isinstance(op, str) or not op: + return + + if in_text: + sys.stdout.write("\n") + sys.stdout.flush() + in_text = False + + if op == "live_url": + detail = data.get("detail") + if ( + isinstance(detail, str) + and detail + and detail != last_live_view_url + ): + sys.stdout.write(f"{_timestamp()} kernel> live view: {detail}\n") + sys.stdout.flush() + last_live_view_url = detail + return + + if op.endswith(".done"): + base_op = op[: -len(".done")] + display_op = _format_kernel_op(base_op) + detail = data.get("detail") + detail_text = detail if isinstance(detail, str) else "" + elapsed_ms = data.get("elapsed_ms") + elapsed_prefix = "" + if isinstance(elapsed_ms, (int, float)) and not isinstance(elapsed_ms, bool): + elapsed_prefix = f"[{float(elapsed_ms) / 1000:.3f}s] " + suffix = f" {detail_text}" if detail_text else "" + sys.stdout.write( + f"{_timestamp()} kernel> {elapsed_prefix}{display_op}{suffix}\n" + ) + sys.stdout.flush() + if base_op == "browsers.new" and detail_text: + last_live_view_url = detail_text + return + + if verbose: + sys.stdout.write(f"{_timestamp()} kernel> {op}\n") + sys.stdout.flush() + return + + if event_name == "prompt": + text = data.get("text") + if isinstance(text, str) and text: + sys.stdout.write(f"{_timestamp()} user> {text}\n") + sys.stdout.flush() + return + + if event_name == "reasoning_delta": + text = data.get("text") + if not isinstance(text, str): + return + if sys.stdout.isatty(): + spinner.start() + spinner.add_reasoning(text) + elif verbose and text: + sys.stdout.write( + f"{_timestamp()} agent> thinking: {_truncate_one_line(text)}\n" + ) + sys.stdout.flush() + return + + if event_name == "text_delta": + spinner.stop() + text = data.get("text") + if not isinstance(text, str) or not text: + return + if not in_text: + sys.stdout.write(f"{_timestamp()} agent> ") + in_text = True + sys.stdout.write(text) + sys.stdout.flush() + return + + if event_name == "text_done": + if in_text: + sys.stdout.write("\n") + sys.stdout.flush() + in_text = False + return + + if event_name == "action": + action_type = data.get("action_type") + description = data.get("description") + if not isinstance(description, str) or not description: + description = action_type if isinstance(action_type, str) else "action" + elapsed_ms = data.get("elapsed_ms") + elapsed_seconds = ( + float(elapsed_ms) / 1000 + if isinstance(elapsed_ms, (int, float)) and not isinstance(elapsed_ms, bool) + else None + ) + if in_text: + sys.stdout.write("\n") + in_text = False + spinner.stop(description, elapsed_seconds=elapsed_seconds) + return + + if event_name == "screenshot": + if verbose: + sys.stdout.write(f"{_timestamp()} debug> screenshot captured\n") + sys.stdout.flush() + return + + if event_name in ("turn_done", "run_complete"): + spinner.stop() + if in_text: + sys.stdout.write("\n") + sys.stdout.flush() + in_text = False + return + + if event_name == "error": + spinner.stop() + if in_text: + sys.stdout.write("\n") + sys.stdout.flush() + in_text = False + message = data.get("message") + if not isinstance(message, str) or not message: + message = "unknown error" + sys.stderr.write(f"{_timestamp()} error> {message}\n") + sys.stderr.flush() + + return render_text + + +def emit_browser_new_started(on_event: Callable[[dict], None]) -> None: + on_event({"event": "backend", "data": {"op": "browsers.new"}}) + + +def emit_browser_new_done( + on_event: Callable[[dict], None], started_at: datetime, live_view_url: str | None +) -> None: + on_event( + { + "event": "backend", + "data": { + "op": "browsers.new.done", + "detail": live_view_url or "", + "elapsed_ms": int((datetime.now() - started_at).total_seconds() * 1000), + }, + } + ) + + +def emit_session_state( + on_event: Callable[[dict], None], session_id: str, live_view_url: str | None +) -> None: + on_event( + { + "event": "session_state", + "data": { + "session_id": session_id, + "live_view_url": live_view_url or "", + }, + } + ) + + +def emit_browser_delete_started(on_event: Callable[[dict], None]) -> None: + on_event({"event": "backend", "data": {"op": "browsers.delete"}}) + + +def emit_browser_delete_done( + on_event: Callable[[dict], None], started_at: datetime +) -> None: + on_event( + { + "event": "backend", + "data": { + "op": "browsers.delete.done", + "elapsed_ms": int((datetime.now() - started_at).total_seconds() * 1000), + }, + } + ) diff --git a/pkg/templates/python/openai-computer-use/computers/__init__.py b/pkg/templates/python/openai-computer-use/computers/__init__.py index 0e8c132d..843071d0 100644 --- a/pkg/templates/python/openai-computer-use/computers/__init__.py +++ b/pkg/templates/python/openai-computer-use/computers/__init__.py @@ -1,11 +1,7 @@ -from . import default -from . import contrib +from .kernel_computer import KernelComputer from .computer import Computer -from .config import computers_config __all__ = [ - "default", - "contrib", + "KernelComputer", "Computer", - "computers_config", ] diff --git a/pkg/templates/python/openai-computer-use/computers/computer.py b/pkg/templates/python/openai-computer-use/computers/computer.py index 80986509..cc35eddd 100644 --- a/pkg/templates/python/openai-computer-use/computers/computer.py +++ b/pkg/templates/python/openai-computer-use/computers/computer.py @@ -1,8 +1,8 @@ -from typing import Protocol, List, Literal, Dict +from typing import Protocol, List, Literal, Dict, Any class Computer(Protocol): - """Defines the 'shape' (methods/properties) our loop expects.""" + """Defines the shape (methods/properties) the agent loop expects.""" def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: ... @@ -22,8 +22,16 @@ def wait(self, ms: int = 1000) -> None: ... def move(self, x: int, y: int) -> None: ... - def keypress(self, keys: List[str]) -> None: ... + def keypress(self, keys: List[str], hold_keys: List[str] | None = None) -> None: ... def drag(self, path: List[Dict[str, int]]) -> None: ... - def get_current_url() -> str: ... + def batch_actions(self, actions: List[Dict[str, Any]]) -> None: ... + + def goto(self, url: str) -> None: ... + + def back(self) -> None: ... + + def forward(self) -> None: ... + + def get_current_url(self) -> str: ... diff --git a/pkg/templates/python/openai-computer-use/computers/config.py b/pkg/templates/python/openai-computer-use/computers/config.py deleted file mode 100644 index 4bf314c4..00000000 --- a/pkg/templates/python/openai-computer-use/computers/config.py +++ /dev/null @@ -1,7 +0,0 @@ -from .default import * -from .contrib import * - -computers_config = { - "local-playwright": LocalPlaywrightBrowser, - "kernel": KernelPlaywrightBrowser, -} diff --git a/pkg/templates/python/openai-computer-use/computers/default/__init__.py b/pkg/templates/python/openai-computer-use/computers/default/__init__.py deleted file mode 100644 index 5e168f70..00000000 --- a/pkg/templates/python/openai-computer-use/computers/default/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .local_playwright import LocalPlaywrightBrowser -from .kernel import KernelPlaywrightBrowser diff --git a/pkg/templates/python/openai-computer-use/computers/default/kernel.py b/pkg/templates/python/openai-computer-use/computers/default/kernel.py deleted file mode 100644 index 5fbb7e5b..00000000 --- a/pkg/templates/python/openai-computer-use/computers/default/kernel.py +++ /dev/null @@ -1,48 +0,0 @@ -from playwright.sync_api import Browser, Page -from ..shared.base_playwright import BasePlaywrightComputer - -class KernelPlaywrightBrowser(BasePlaywrightComputer): - """ - Connects to a remote Chromium instance using a provided CDP URL. - Expects a dict as input: {'cdp_ws_url': ..., 'width': ..., 'height': ...} - Width and height are optional, defaulting to 1024x768. - """ - - def __init__(self, config: dict): - super().__init__() - self.cdp_ws_url = config.get("cdp_ws_url") - if not self.cdp_ws_url: - raise ValueError("cdp_ws_url must be provided in config dict") - self.width = config.get("width", 1024) - self.height = config.get("height", 768) - self.dimensions = (self.width, self.height) - - def get_dimensions(self): - return self.dimensions - - def _get_browser_and_page(self) -> tuple[Browser, Page]: - # Connect to the remote browser using the CDP URL - browser = self._playwright.chromium.connect_over_cdp(self.cdp_ws_url) - context = browser.contexts[0] if browser.contexts else browser.new_context() - page = context.pages[0] if context.pages else context.new_page() - page.set_viewport_size({"width": self.width, "height": self.height}) - page.on("close", self._handle_page_close) - # Optionally, navigate to a default page - # page.goto("about:blank") - return browser, page - - def _handle_new_page(self, page: Page): - """Handle the creation of a new page.""" - print("New page created") - self._page = page - page.on("close", self._handle_page_close) - - def _handle_page_close(self, page: Page): - """Handle the closure of a page.""" - print("Page closed") - if hasattr(self, "_browser") and self._page == page: - if self._browser.contexts[0].pages: - self._page = self._browser.contexts[0].pages[-1] - else: - print("Warning: All pages have been closed.") - self._page = None diff --git a/pkg/templates/python/openai-computer-use/computers/default/local_playwright.py b/pkg/templates/python/openai-computer-use/computers/default/local_playwright.py deleted file mode 100644 index 6810f34b..00000000 --- a/pkg/templates/python/openai-computer-use/computers/default/local_playwright.py +++ /dev/null @@ -1,54 +0,0 @@ -from playwright.sync_api import Browser, Page -from ..shared.base_playwright import BasePlaywrightComputer - - -class LocalPlaywrightBrowser(BasePlaywrightComputer): - """Launches a local Chromium instance using Playwright.""" - - def __init__(self, headless: bool = False): - super().__init__() - self.headless = headless - - def _get_browser_and_page(self) -> tuple[Browser, Page]: - width, height = self.get_dimensions() - launch_args = [ - f"--window-size={width},{height}", - "--disable-extensions", - "--disable-file-system", - ] - browser = self._playwright.chromium.launch( - chromium_sandbox=True, - headless=self.headless, - args=launch_args, - env={"DISPLAY": ":0"}, - ) - - context = browser.contexts[0] if browser.contexts else browser.new_context() - - - # Add event listeners for page creation and closure - context.on("page", self._handle_new_page) - - page = context.pages[0] if context.pages else context.new_page() - page.set_viewport_size({"width": width, "height": height}) - page.on("close", self._handle_page_close) - - # page.goto("about:blank") - - return browser, page - - def _handle_new_page(self, page: Page): - """Handle the creation of a new page.""" - print("New page created") - self._page = page - page.on("close", self._handle_page_close) - - def _handle_page_close(self, page: Page): - """Handle the closure of a page.""" - print("Page closed") - if self._page == page: - if self._browser.contexts[0].pages: - self._page = self._browser.contexts[0].pages[-1] - else: - print("Warning: All pages have been closed.") - self._page = None diff --git a/pkg/templates/python/openai-computer-use/computers/kernel_computer.py b/pkg/templates/python/openai-computer-use/computers/kernel_computer.py new file mode 100644 index 00000000..c70dc419 --- /dev/null +++ b/pkg/templates/python/openai-computer-use/computers/kernel_computer.py @@ -0,0 +1,612 @@ +import base64 +import time +from typing import List, Dict, Any, Callable + +from kernel import Kernel + +# CUA model key names -> X11 keysym names for the Kernel computer API +KEYSYM_MAP = { + "ENTER": "Return", + "Enter": "Return", + "RETURN": "Return", + "BACKSPACE": "BackSpace", + "Backspace": "BackSpace", + "DELETE": "Delete", + "TAB": "Tab", + "ESCAPE": "Escape", + "Escape": "Escape", + "ESC": "Escape", + "SPACE": "space", + "Space": "space", + "UP": "Up", + "DOWN": "Down", + "LEFT": "Left", + "RIGHT": "Right", + "HOME": "Home", + "END": "End", + "PAGEUP": "Prior", + "PAGE_UP": "Prior", + "PageUp": "Prior", + "PAGEDOWN": "Next", + "PAGE_DOWN": "Next", + "PageDown": "Next", + "CAPS_LOCK": "Caps_Lock", + "CapsLock": "Caps_Lock", + "CTRL": "Control_L", + "Ctrl": "Control_L", + "CONTROL": "Control_L", + "Control": "Control_L", + "ALT": "Alt_L", + "Alt": "Alt_L", + "SHIFT": "Shift_L", + "Shift": "Shift_L", + "META": "Super_L", + "Meta": "Super_L", + "SUPER": "Super_L", + "Super": "Super_L", + "CMD": "Super_L", + "COMMAND": "Super_L", + "F1": "F1", "F2": "F2", "F3": "F3", "F4": "F4", + "F5": "F5", "F6": "F6", "F7": "F7", "F8": "F8", + "F9": "F9", "F10": "F10", "F11": "F11", "F12": "F12", + "INSERT": "Insert", + "Insert": "Insert", + "PRINT": "Print", + "SCROLLLOCK": "Scroll_Lock", + "PAUSE": "Pause", + "NUMLOCK": "Num_Lock", +} +MODIFIER_KEYSYMS = { + "Control_L", + "Control_R", + "Alt_L", + "Alt_R", + "Shift_L", + "Shift_R", + "Super_L", + "Super_R", + "Meta_L", + "Meta_R", +} +GOTO_CHORD_DELAY_MS = 200 + + +def _translate_keys(keys: List[str]) -> List[str]: + return [KEYSYM_MAP.get(k, k) for k in keys] + + +def _expand_combo_keys(keys: List[str]) -> List[str]: + out: List[str] = [] + for raw in keys: + if not isinstance(raw, str): + continue + parts = raw.split("+") if "+" in raw else [raw] + for part in parts: + token = part.strip() + if token: + out.append(token) + return out + + +def _normalize_keypress_payload( + keys: List[str] | None = None, hold_keys: List[str] | None = None +) -> Dict[str, List[str]]: + translated_hold = _translate_keys(_expand_combo_keys(hold_keys or [])) + translated_keys = _translate_keys(_expand_combo_keys(keys or [])) + + hold_from_keys: List[str] = [] + primary_keys: List[str] = [] + for key in translated_keys: + if key in MODIFIER_KEYSYMS: + hold_from_keys.append(key) + else: + primary_keys.append(key) + + if not primary_keys: + return {"keys": translated_keys, "hold_keys": translated_hold} + + merged_hold = translated_hold + hold_from_keys + deduped_hold: List[str] = [] + for key in merged_hold: + if key not in deduped_hold: + deduped_hold.append(key) + return {"keys": primary_keys, "hold_keys": deduped_hold} + + +def _normalize_button(button) -> str: + if button is None: + return "left" + if isinstance(button, int): + return {1: "left", 2: "middle", 3: "right"}.get(button, "left") + return str(button) + + +def _normalize_drag_path(path: Any) -> List[List[int]]: + points: List[List[int]] = [] + if isinstance(path, list): + for point in path: + if isinstance(point, (list, tuple)) and len(point) >= 2: + x, y = point[0], point[1] + if ( + isinstance(x, (int, float)) + and not isinstance(x, bool) + and isinstance(y, (int, float)) + and not isinstance(y, bool) + ): + points.append([int(x), int(y)]) + continue + if not isinstance(point, dict): + continue + x = point.get("x") + y = point.get("y") + if ( + isinstance(x, (int, float)) + and not isinstance(x, bool) + and isinstance(y, (int, float)) + and not isinstance(y, bool) + ): + points.append([int(x), int(y)]) + return points + + +def _validate_drag_path(path: List[List[int]]) -> None: + if len(path) >= 2: + return + raise ValueError(f"drag action requires path with at least two points; got {path!r}") + + +def _translate_cua_action(action: Dict[str, Any]) -> Dict[str, Any]: + action_type = action.get("type", "") + if action_type == "click": + button = action.get("button") + if button == "back": + return { + "type": "press_key", + "press_key": {"hold_keys": ["Alt"], "keys": ["Left"]}, + } + if button == "forward": + return { + "type": "press_key", + "press_key": {"hold_keys": ["Alt"], "keys": ["Right"]}, + } + if button == "wheel": + return { + "type": "scroll", + "scroll": { + "x": action.get("x", 0), + "y": action.get("y", 0), + "delta_x": int(action.get("scroll_x", 0)), + "delta_y": int(action.get("scroll_y", 0)), + }, + } + return { + "type": "click_mouse", + "click_mouse": { + "x": action.get("x", 0), + "y": action.get("y", 0), + "button": _normalize_button(button), + }, + } + elif action_type == "double_click": + return { + "type": "click_mouse", + "click_mouse": { + "x": action.get("x", 0), + "y": action.get("y", 0), + "num_clicks": 2, + }, + } + elif action_type == "type": + return {"type": "type_text", "type_text": {"text": action.get("text", "")}} + elif action_type == "keypress": + normalized = _normalize_keypress_payload( + action.get("keys", []), action.get("hold_keys", []) + ) + payload: Dict[str, Any] = {"keys": normalized["keys"]} + if normalized["hold_keys"]: + payload["hold_keys"] = normalized["hold_keys"] + return {"type": "press_key", "press_key": payload} + elif action_type == "scroll": + return { + "type": "scroll", + "scroll": { + "x": action.get("x", 0), + "y": action.get("y", 0), + "delta_x": int(action.get("scroll_x", 0)), + "delta_y": int(action.get("scroll_y", 0)), + }, + } + elif action_type == "move": + return {"type": "move_mouse", "move_mouse": {"x": action.get("x", 0), "y": action.get("y", 0)}} + elif action_type == "drag": + path = _normalize_drag_path(action.get("path", [])) + _validate_drag_path(path) + return {"type": "drag_mouse", "drag_mouse": {"path": path}} + elif action_type == "wait": + return {"type": "sleep", "sleep": {"duration_ms": action.get("ms", 1000)}} + else: + raise ValueError(f"Unknown CUA action type: {action_type}") + + +def _is_batch_computer_action_type(action_type: str) -> bool: + return action_type in { + "click", + "double_click", + "type", + "keypress", + "scroll", + "move", + "drag", + "wait", + } + + +def _press_key_action( + keys: List[str], hold_keys: List[str] | None = None +) -> Dict[str, Any]: + payload = _normalize_keypress_payload(keys=keys, hold_keys=hold_keys) + return {"type": "press_key", "press_key": payload} + + +def _goto_batch_actions(url: str) -> List[Dict[str, Any]]: + return [ + _press_key_action(["l"], hold_keys=["Ctrl"]), + { + "type": "sleep", + "sleep": {"duration_ms": GOTO_CHORD_DELAY_MS}, + }, + _press_key_action(["a"], hold_keys=["Ctrl"]), + { + "type": "type_text", + "type_text": {"text": url}, + }, + _press_key_action(["Return"]), + ] + + +def _back_batch_actions() -> List[Dict[str, Any]]: + return [_press_key_action(["Left"], hold_keys=["Alt"])] + + +def _forward_batch_actions() -> List[Dict[str, Any]]: + return [_press_key_action(["Right"], hold_keys=["Alt"])] + + +def _current_url_batch_actions() -> List[Dict[str, Any]]: + return [ + _press_key_action(["l"], hold_keys=["Ctrl"]), + _press_key_action(["a"], hold_keys=["Ctrl"]), + _press_key_action(["c"], hold_keys=["Ctrl"]), + _press_key_action(["Escape"]), + ] + + +def _validate_batch_terminal_read_actions(actions: List[Dict[str, Any]]) -> None: + read_idx = -1 + read_type = "" + for idx, action in enumerate(actions): + action_type = str(action.get("type", "")) + if action_type not in ("url", "screenshot"): + continue + if read_idx >= 0: + raise ValueError( + f"batch can include at most one return-value action ({read_type} or {action_type}); " + f"found {read_type} at index {read_idx} and {action_type} at index {idx}" + ) + if idx != len(actions) - 1: + raise ValueError(f'return-value action "{action_type}" must be last in batch') + read_idx = idx + read_type = action_type + + +def _build_pending_batch(actions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + pending: List[Dict[str, Any]] = [] + for action in actions: + action_type = str(action.get("type", "")) + if _is_batch_computer_action_type(action_type): + pending.append(_translate_cua_action(action)) + continue + if action_type == "goto": + pending.extend(_goto_batch_actions(str(action.get("url", "")))) + continue + if action_type == "back": + pending.extend(_back_batch_actions()) + continue + if action_type in ("url", "screenshot"): + continue + raise ValueError(f"Unknown CUA action type: {action_type}") + return pending + + +def _describe_translated_batch(actions: List[Dict[str, Any]]) -> str: + parts: List[str] = [] + for action in actions: + action_type = str(action.get("type", "")) + if action_type == "click_mouse": + click = action.get("click_mouse", {}) + if not isinstance(click, dict): + parts.append(action_type) + continue + if int(click.get("num_clicks", 0)) > 1: + parts.append(f"double_click({int(click.get('x', 0))},{int(click.get('y', 0))})") + else: + parts.append(f"click({int(click.get('x', 0))},{int(click.get('y', 0))})") + continue + if action_type == "type_text": + type_text = action.get("type_text", {}) + text = str(type_text.get("text", "")) if isinstance(type_text, dict) else "" + parts.append(f"type({_truncate(text, 30)!r})") + continue + if action_type == "press_key": + press_key = action.get("press_key", {}) + keys = press_key.get("keys", []) if isinstance(press_key, dict) else [] + hold_keys = ( + press_key.get("hold_keys", []) if isinstance(press_key, dict) else [] + ) + parts.append(f"key(hold={hold_keys}, keys={keys})") + continue + if action_type == "scroll": + parts.append("scroll") + continue + if action_type == "move_mouse": + parts.append("move") + continue + if action_type == "drag_mouse": + parts.append("drag") + continue + if action_type == "sleep": + sleep = action.get("sleep", {}) + duration = int(sleep.get("duration_ms", 0)) if isinstance(sleep, dict) else 0 + parts.append(f"sleep({duration}ms)") + continue + parts.append(action_type) + return "batch[" + " -> ".join(parts) + "]" + + +def _truncate(text: str, max_len: int = 60) -> str: + if len(text) <= max_len: + return text + return f"{text[: max_len - 3]}..." + + +def _describe_action(action_type: str, action_args: Dict[str, Any]) -> str: + if action_type == "click": + x = int(action_args.get("x", 0)) + y = int(action_args.get("y", 0)) + button = str(action_args.get("button", "left")) + if button in ("", "left"): + return f"click({x}, {y})" + return f"click({x}, {y}, {button})" + if action_type == "double_click": + return f"double_click({int(action_args.get('x', 0))}, {int(action_args.get('y', 0))})" + if action_type == "type": + text = _truncate(str(action_args.get("text", ""))) + return f"type({text!r})" + if action_type == "keypress": + hold_keys = action_args.get("hold_keys", []) + keys = action_args.get("keys", []) + if hold_keys: + return f"keypress(hold={hold_keys}, keys={keys})" + return f"keypress({keys})" + if action_type == "scroll": + return ( + f"scroll({int(action_args.get('x', 0))}, {int(action_args.get('y', 0))}, " + f"dx={int(action_args.get('scroll_x', 0))}, dy={int(action_args.get('scroll_y', 0))})" + ) + if action_type == "move": + return f"move({int(action_args.get('x', 0))}, {int(action_args.get('y', 0))})" + if action_type == "drag": + return "drag(...)" + if action_type == "wait": + return f"wait({int(action_args.get('ms', 1000))}ms)" + if action_type == "goto": + return f"goto({action_args.get('url', '')!r})" + if action_type == "back": + return "back()" + if action_type == "url": + return "url()" + if action_type == "screenshot": + return "screenshot()" + return action_type + + +def _describe_batch_actions(actions: List[Dict[str, Any]]) -> str: + pieces = [] + for action in actions: + action_type = str(action.get("type", "unknown")) + action_args = {k: v for k, v in action.items() if k != "type"} + pieces.append(_describe_action(action_type, action_args)) + return "batch[" + " -> ".join(pieces) + "]" + + +class KernelComputer: + """Wraps Kernel's native computer control API for browser automation.""" + + def __init__( + self, + client: Kernel, + session_id: str, + on_event: Callable[[dict], None] | None = None, + ): + self.client = client + self.session_id = session_id + self.on_event = on_event + + def get_environment(self): + return "browser" + + def get_dimensions(self): + return (1920, 1080) + + def _emit_backend( + self, op: str, detail: str | None = None, elapsed_ms: int | None = None + ) -> None: + if not self.on_event: + return + data: Dict[str, Any] = {"op": op} + if detail: + data["detail"] = detail + if elapsed_ms is not None: + data["elapsed_ms"] = elapsed_ms + self.on_event({"event": "backend", "data": data}) + + def _trace_backend( + self, + op: str, + fn: Callable[[], Any], + detail: str | Callable[[Any], str | None] | None = None, + ) -> Any: + self._emit_backend(op) + started_at = time.time() + completed = False + result = None + try: + result = fn() + completed = True + return result + finally: + resolved_detail = None + if completed: + if callable(detail): + try: + resolved_detail = detail(result) + except Exception: + resolved_detail = None + elif isinstance(detail, str): + resolved_detail = detail + elapsed_ms = int((time.time() - started_at) * 1000) + self._emit_backend(f"{op}.done", resolved_detail, elapsed_ms) + + def screenshot(self) -> str: + def _do() -> str: + resp = self.client.browsers.computer.capture_screenshot(self.session_id) + return base64.b64encode(resp.read()).decode("utf-8") + + return self._trace_backend("screenshot", _do) + + def click(self, x: int, y: int, button="left") -> None: + if button == "back": + self.back() + return + if button == "forward": + self.forward() + return + if button == "wheel": + self.scroll(x, y, 0, 0) + return + normalized_button = _normalize_button(button) + op = _describe_action("click", {"x": x, "y": y, "button": normalized_button}) + self._trace_backend( + op, + lambda: self.client.browsers.computer.click_mouse( + self.session_id, x=x, y=y, button=normalized_button + ), + ) + + def double_click(self, x: int, y: int) -> None: + op = _describe_action("double_click", {"x": x, "y": y}) + self._trace_backend( + op, + lambda: self.client.browsers.computer.click_mouse( + self.session_id, x=x, y=y, num_clicks=2 + ), + ) + + def type(self, text: str) -> None: + op = _describe_action("type", {"text": text}) + self._trace_backend( + op, lambda: self.client.browsers.computer.type_text(self.session_id, text=text) + ) + + def keypress(self, keys: List[str], hold_keys: List[str] | None = None) -> None: + normalized = _normalize_keypress_payload(keys, hold_keys or []) + op = _describe_action( + "keypress", + { + "keys": normalized["keys"], + **({"hold_keys": normalized["hold_keys"]} if normalized["hold_keys"] else {}), + }, + ) + self._trace_backend( + op, + lambda: self.client.browsers.computer.press_key( + self.session_id, + keys=normalized["keys"], + **({"hold_keys": normalized["hold_keys"]} if normalized["hold_keys"] else {}), + ), + ) + + def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + op = _describe_action( + "scroll", {"x": x, "y": y, "scroll_x": scroll_x, "scroll_y": scroll_y} + ) + self._trace_backend( + op, + lambda: self.client.browsers.computer.scroll( + self.session_id, x=x, y=y, delta_x=scroll_x, delta_y=scroll_y + ), + ) + + def move(self, x: int, y: int) -> None: + op = _describe_action("move", {"x": x, "y": y}) + self._trace_backend( + op, lambda: self.client.browsers.computer.move_mouse(self.session_id, x=x, y=y) + ) + + def drag(self, path: List[Dict[str, int]]) -> None: + op = _describe_action("drag", {"path": path}) + + def _do() -> None: + normalized_path = _normalize_drag_path(path) + _validate_drag_path(normalized_path) + self.client.browsers.computer.drag_mouse(self.session_id, path=normalized_path) + + self._trace_backend(op, _do) + + def wait(self, ms: int = 1000) -> None: + time.sleep(ms / 1000) + + def batch_actions(self, actions: List[Dict[str, Any]]) -> None: + _validate_batch_terminal_read_actions(actions) + pending = _build_pending_batch(actions) + op = _describe_translated_batch(pending) + + def _do() -> None: + if pending: + self.client.browsers.computer.batch(self.session_id, actions=pending) + + self._trace_backend(op, _do) + + def goto(self, url: str) -> None: + self.batch_actions([{"type": "goto", "url": url}]) + + def back(self) -> None: + self.batch_actions([{"type": "back"}]) + + def forward(self) -> None: + actions = _forward_batch_actions() + op = _describe_translated_batch(actions) + self._trace_backend( + op, + lambda: self.client.browsers.computer.batch( + self.session_id, actions=actions + ), + ) + + def get_current_url(self) -> str: + def _do() -> str: + copy_actions = _current_url_batch_actions() + copy_op = _describe_translated_batch(copy_actions) + self._trace_backend( + copy_op, + lambda: self.client.browsers.computer.batch( + self.session_id, actions=copy_actions + ), + ) + result = self.client.browsers.computer.read_clipboard(self.session_id) + current_url = (result.text or "").strip() + if not current_url: + raise ValueError("clipboard URL was empty") + return current_url + + return self._trace_backend("get_current_url()", _do) diff --git a/pkg/templates/python/openai-computer-use/computers/shared/__init__.py b/pkg/templates/python/openai-computer-use/computers/shared/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/pkg/templates/python/openai-computer-use/computers/shared/base_playwright.py b/pkg/templates/python/openai-computer-use/computers/shared/base_playwright.py deleted file mode 100644 index 0c38e24f..00000000 --- a/pkg/templates/python/openai-computer-use/computers/shared/base_playwright.py +++ /dev/null @@ -1,154 +0,0 @@ -import time -import base64 -from typing import List, Dict, Literal -from playwright.sync_api import sync_playwright, Browser, Page -from utils import check_blocklisted_url - -# Optional: key mapping if your model uses "CUA" style keys -CUA_KEY_TO_PLAYWRIGHT_KEY = { - "/": "Divide", - "\\": "Backslash", - "alt": "Alt", - "arrowdown": "ArrowDown", - "arrowleft": "ArrowLeft", - "arrowright": "ArrowRight", - "arrowup": "ArrowUp", - "backspace": "Backspace", - "capslock": "CapsLock", - "cmd": "Meta", - "ctrl": "Control", - "delete": "Delete", - "end": "End", - "enter": "Enter", - "esc": "Escape", - "home": "Home", - "insert": "Insert", - "option": "Alt", - "pagedown": "PageDown", - "pageup": "PageUp", - "shift": "Shift", - "space": " ", - "super": "Meta", - "tab": "Tab", - "win": "Meta", -} - - -class BasePlaywrightComputer: - """ - Abstract base for Playwright-based computers: - - - Subclasses override `_get_browser_and_page()` to do local or remote connection, - returning (Browser, Page). - - This base class handles context creation (`__enter__`/`__exit__`), - plus standard "Computer" actions like click, scroll, etc. - - We also have extra browser actions: `goto(url)` and `back()`. - """ - - def get_environment(self): - return "browser" - - def get_dimensions(self): - return (1024, 768) - - def __init__(self): - self._playwright = None - self._browser: Browser | None = None - self._page: Page | None = None - - def __enter__(self): - # Start Playwright and call the subclass hook for getting browser/page - self._playwright = sync_playwright().start() - self._browser, self._page = self._get_browser_and_page() - - # Set up network interception to flag URLs matching domains in BLOCKED_DOMAINS - def handle_route(route, request): - - url = request.url - if check_blocklisted_url(url): - print(f"Flagging blocked domain: {url}") - route.abort() - else: - route.continue_() - - self._page.route("**/*", handle_route) - - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if self._browser: - self._browser.close() - if self._playwright: - self._playwright.stop() - - def get_current_url(self) -> str: - return self._page.url - - # --- Common "Computer" actions --- - def screenshot(self) -> str: - """Capture only the viewport (not full_page).""" - png_bytes = self._page.screenshot(full_page=False) - return base64.b64encode(png_bytes).decode("utf-8") - - def click(self, x: int, y: int, button: str = "left") -> None: - match button: - case "back": - self.back() - case "forward": - self.forward() - case "wheel": - self._page.mouse.wheel(x, y) - case _: - button_mapping = {"left": "left", "right": "right"} - button_type = button_mapping.get(button, "left") - self._page.mouse.click(x, y, button=button_type) - - def double_click(self, x: int, y: int) -> None: - self._page.mouse.dblclick(x, y) - - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - self._page.mouse.move(x, y) - self._page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") - - def type(self, text: str) -> None: - self._page.keyboard.type(text) - - def wait(self, ms: int = 1000) -> None: - time.sleep(ms / 1000) - - def move(self, x: int, y: int) -> None: - self._page.mouse.move(x, y) - - def keypress(self, keys: List[str]) -> None: - mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys] - for key in mapped_keys: - self._page.keyboard.down(key) - for key in reversed(mapped_keys): - self._page.keyboard.up(key) - - def drag(self, path: List[Dict[str, int]]) -> None: - if not path: - return - self._page.mouse.move(path[0]["x"], path[0]["y"]) - self._page.mouse.down() - for point in path[1:]: - self._page.mouse.move(point["x"], point["y"]) - self._page.mouse.up() - - # --- Extra browser-oriented actions --- - def goto(self, url: str) -> None: - try: - return self._page.goto(url) - except Exception as e: - print(f"Error navigating to {url}: {e}") - - def back(self) -> None: - return self._page.go_back() - - def forward(self) -> None: - return self._page.go_forward() - - # --- Subclass hook --- - def _get_browser_and_page(self) -> tuple[Browser, Page]: - """Subclasses must implement, returning (Browser, Page).""" - raise NotImplementedError diff --git a/pkg/templates/python/openai-computer-use/main.py b/pkg/templates/python/openai-computer-use/main.py index 6ab17b17..a20c3598 100644 --- a/pkg/templates/python/openai-computer-use/main.py +++ b/pkg/templates/python/openai-computer-use/main.py @@ -1,12 +1,22 @@ import asyncio import datetime import os -from typing import TypedDict +from typing import NotRequired, TypedDict import kernel from agent import Agent -from computers.default import KernelPlaywrightBrowser +from agent.logging import ( + create_event_logger, + emit_browser_delete_done, + emit_browser_delete_started, + emit_browser_new_done, + emit_browser_new_started, + emit_session_state, + quiet_http_transport_logs, +) +from computers.kernel_computer import KernelComputer from kernel import Kernel +from replay import maybe_start_replay, maybe_stop_replay """ Example app that runs an agent using openai CUA @@ -24,16 +34,19 @@ class CuaInput(TypedDict): task: str + replay: NotRequired[bool] class CuaOutput(TypedDict): result: str + replay_url: NotRequired[str] api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY is not set") +quiet_http_transport_logs() client = Kernel() app = kernel.App("python-openai-cua") @@ -43,66 +56,96 @@ async def cua_task( ctx: kernel.KernelContext, payload: CuaInput, ) -> CuaOutput: - # A function that processes a user task using the kernel browser and agent - if not payload or not payload.get("task"): raise ValueError("task is required") + on_event = create_event_logger() + + browser_create_started_at = datetime.datetime.now() + emit_browser_new_started(on_event) kernel_browser = await asyncio.to_thread( client.browsers.create, invocation_id=ctx.invocation_id, stealth=True ) - print("Kernel browser live view url: ", kernel_browser.browser_live_view_url) - cdp_ws_url = kernel_browser.cdp_ws_url + emit_browser_new_done( + on_event, browser_create_started_at, kernel_browser.browser_live_view_url + ) + emit_session_state( + on_event, kernel_browser.session_id, kernel_browser.browser_live_view_url + ) + replay = await asyncio.to_thread( + maybe_start_replay, + client, + kernel_browser.session_id, + bool(payload.get("replay", False)), + on_event, + ) + replay_url: str | None = None def run_agent(): - with KernelPlaywrightBrowser({"cdp_ws_url": cdp_ws_url}) as computer: - # Navigate to DuckDuckGo as starting page (less likely to trigger captchas than Google) - computer.goto("https://duckduckgo.com") - - # messages to provide to the agent - items = [ - { - "role": "system", - "content": f"- Current date and time: {datetime.datetime.utcnow().isoformat()} ({datetime.datetime.utcnow().strftime('%A')})", - }, - {"role": "user", "content": payload["task"]}, - ] - - # setup the agent - agent = Agent( - computer=computer, - tools=[], # can provide additional tools to the agent - acknowledge_safety_check_callback=lambda message: ( - print(f"> agent : safety check message (skipping): {message}") - or True - ), # safety check function , now defaults to true - ) + computer = KernelComputer(client, kernel_browser.session_id, on_event=on_event) + computer.goto("https://duckduckgo.com") + + now_utc = datetime.datetime.now(datetime.UTC) + items = [ + { + "role": "system", + "content": f"- Current date and time: {now_utc.isoformat()} ({now_utc.strftime('%A')})", + }, + {"role": "user", "content": payload["task"]}, + ] + + agent = Agent( + model="gpt-5.4", + computer=computer, + tools=[], + acknowledge_safety_check_callback=lambda message: ( + print(f"> agent : safety check message (skipping): {message}") + or True + ), + ) + + response_items = agent.run_full_turn( + items, + debug=False, + show_images=False, + on_event=on_event, + ) + + if not response_items or "content" not in response_items[-1]: + raise ValueError("No response from agent") + content = response_items[-1]["content"] + if ( + isinstance(content, list) + and content + and isinstance(content[0], dict) + and "text" in content[0] + ): + result = content[0]["text"] + elif isinstance(content, str): + result = content + else: + result = str(content) + return {"result": result} - # run the agent - response_items = agent.run_full_turn( - items, - debug=True, - show_images=False, + try: + result = await asyncio.to_thread(run_agent) + finally: + browser_delete_started_at = datetime.datetime.now() + emit_browser_delete_started(on_event) + try: + replay_url = await asyncio.to_thread( + maybe_stop_replay, + client, + kernel_browser.session_id, + replay, + on_event, ) + await asyncio.to_thread(client.browsers.delete_by_id, kernel_browser.session_id) + finally: + emit_browser_delete_done(on_event, browser_delete_started_at) + + if replay_url: + result["replay_url"] = replay_url + return result - if not response_items or "content" not in response_items[-1]: - raise ValueError("No response from agent") - # The content may be a list of blocks, get the first text block - content = response_items[-1]["content"] - if ( - isinstance(content, list) - and content - and isinstance(content[0], dict) - and "text" in content[0] - ): - result = content[0]["text"] - elif isinstance(content, str): - result = content - else: - result = str(content) - return {"result": result} - try: - return await asyncio.to_thread(run_agent) - finally: - await asyncio.to_thread(client.browsers.delete_by_id, kernel_browser.session_id) diff --git a/pkg/templates/python/openai-computer-use/pyproject.toml b/pkg/templates/python/openai-computer-use/pyproject.toml index 3ea73870..e0f50500 100644 --- a/pkg/templates/python/openai-computer-use/pyproject.toml +++ b/pkg/templates/python/openai-computer-use/pyproject.toml @@ -6,10 +6,7 @@ readme = "README.md" requires-python = ">=3.11" dependencies = [ "httpx>=0.28.1", - "pillow>=12.0.0", - "kernel>=0.23.0", - "playwright>=1.56.0", - "pydantic>=2.12.5", + "kernel>=0.43.0", "python-dotenv>=1.2.1", "requests>=2.32.5", ] diff --git a/pkg/templates/python/openai-computer-use/replay.py b/pkg/templates/python/openai-computer-use/replay.py new file mode 100644 index 00000000..c98316ff --- /dev/null +++ b/pkg/templates/python/openai-computer-use/replay.py @@ -0,0 +1,114 @@ +import datetime +import time +from dataclasses import dataclass +from typing import Callable + +from kernel import Kernel + +DEFAULT_REPLAY_GRACE_SECONDS = 5.0 +REPLAY_PROCESSING_DELAY_SECONDS = 2.0 +REPLAY_POLL_TIMEOUT_SECONDS = 60.0 +REPLAY_POLL_INTERVAL_SECONDS = 1.0 + + +@dataclass +class ReplayState: + enabled: bool + replay_id: str | None = None + replay_view_url: str | None = None + + +def maybe_start_replay( + client: Kernel, + session_id: str, + enabled: bool = False, + on_event: Callable[[dict], None] | None = None, +) -> ReplayState: + state = ReplayState(enabled=enabled) + if not enabled: + return state + + started_at = datetime.datetime.now() + if on_event: + on_event({"event": "backend", "data": {"op": "browsers.replays.start"}}) + + try: + replay = client.browsers.replays.start(session_id) + state.replay_id = replay.replay_id + if on_event: + on_event( + { + "event": "backend", + "data": { + "op": "browsers.replays.start.done", + "detail": state.replay_id or "", + "elapsed_ms": int( + (datetime.datetime.now() - started_at).total_seconds() * 1000 + ), + }, + } + ) + except Exception as exc: + print(f"Warning: failed to start replay recording: {exc}") + print("Continuing without replay recording.") + state.enabled = False + + return state + + +def maybe_stop_replay( + client: Kernel, + session_id: str, + replay: ReplayState, + on_event: Callable[[dict], None] | None = None, + grace_period_seconds: float = DEFAULT_REPLAY_GRACE_SECONDS, +) -> str | None: + if not replay.enabled or not replay.replay_id: + return replay.replay_view_url + + if grace_period_seconds > 0: + time.sleep(grace_period_seconds) + + started_at = datetime.datetime.now() + if on_event: + on_event({"event": "backend", "data": {"op": "browsers.replays.stop"}}) + + try: + client.browsers.replays.stop(replay_id=replay.replay_id, id=session_id) + time.sleep(REPLAY_PROCESSING_DELAY_SECONDS) + + deadline = time.time() + REPLAY_POLL_TIMEOUT_SECONDS + while time.time() < deadline: + try: + replays = client.browsers.replays.list(session_id) + for replay_item in replays: + if replay_item.replay_id == replay.replay_id: + replay.replay_view_url = replay_item.replay_view_url + break + if replay.replay_view_url: + break + except Exception: + pass + + time.sleep(REPLAY_POLL_INTERVAL_SECONDS) + + if on_event: + on_event( + { + "event": "backend", + "data": { + "op": "browsers.replays.stop.done", + "detail": replay.replay_view_url or replay.replay_id or "", + "elapsed_ms": int( + (datetime.datetime.now() - started_at).total_seconds() * 1000 + ), + }, + } + ) + + if not replay.replay_view_url: + print("Warning: replay may still be processing") + except Exception as exc: + print(f"Warning: failed to stop replay recording cleanly: {exc}") + + return replay.replay_view_url diff --git a/pkg/templates/python/openai-computer-use/run_local.py b/pkg/templates/python/openai-computer-use/run_local.py new file mode 100644 index 00000000..baa68a28 --- /dev/null +++ b/pkg/templates/python/openai-computer-use/run_local.py @@ -0,0 +1,122 @@ +""" +Local test script that creates a remote Kernel browser and runs the CUA agent. +No Kernel app deployment needed. + +Usage: + KERNEL_API_KEY=... OPENAI_API_KEY=... uv run run_local.py --task "go to example.com and summarize it" +""" + +import argparse +import datetime +import os + +from dotenv import load_dotenv + +load_dotenv(override=True) + +from kernel import Kernel +from agent import Agent +from agent.logging import ( + create_event_logger, + emit_browser_delete_done, + emit_browser_delete_started, + emit_browser_new_done, + emit_browser_new_started, + emit_session_state, + quiet_http_transport_logs, +) +from computers.kernel_computer import KernelComputer +from replay import maybe_start_replay, maybe_stop_replay + +DEFAULT_TASK = "go to example.com and summarize what the page says" + + +def parse_args(): + parser = argparse.ArgumentParser(description="Run OpenAI CUA local test") + parser.add_argument( + "--debug", + action="store_true", + help="Enable verbose debug payload logging", + ) + parser.add_argument( + "--task", + default=DEFAULT_TASK, + help="User task prompt to run in the browser session", + ) + parser.add_argument( + "--replay", + action="store_true", + help="Record a Kernel browser replay for this local run", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + if not os.getenv("KERNEL_API_KEY"): + raise ValueError("KERNEL_API_KEY is not set") + if not os.getenv("OPENAI_API_KEY"): + raise ValueError("OPENAI_API_KEY is not set") + + quiet_http_transport_logs() + client = Kernel(api_key=os.getenv("KERNEL_API_KEY")) + on_event = create_event_logger(verbose=args.debug) + + browser_create_started_at = datetime.datetime.now() + emit_browser_new_started(on_event) + browser = client.browsers.create(timeout_seconds=300) + emit_browser_new_done( + on_event, browser_create_started_at, browser.browser_live_view_url + ) + emit_session_state(on_event, browser.session_id, browser.browser_live_view_url) + replay = maybe_start_replay(client, browser.session_id, args.replay, on_event) + + computer = KernelComputer(client, browser.session_id, on_event=on_event) + + try: + computer.goto("https://duckduckgo.com") + + now_utc = datetime.datetime.now(datetime.UTC) + items = [ + { + "role": "system", + "content": f"- Current date and time: {now_utc.isoformat()} ({now_utc.strftime('%A')})", + }, + { + "role": "user", + "content": args.task, + }, + ] + + agent = Agent( + model="gpt-5.4", + computer=computer, + tools=[], + acknowledge_safety_check_callback=lambda message: ( + print(f"> safety check: {message}") or True + ), + ) + + response_items = agent.run_full_turn( + items, + debug=args.debug, + show_images=False, + on_event=on_event, + ) + if not response_items: + raise ValueError("No response from agent") + finally: + browser_delete_started_at = datetime.datetime.now() + emit_browser_delete_started(on_event) + try: + replay_url = maybe_stop_replay(client, browser.session_id, replay, on_event) + if replay_url: + print(f"> Replay URL: {replay_url}") + client.browsers.delete_by_id(browser.session_id) + finally: + emit_browser_delete_done(on_event, browser_delete_started_at) + print("> Browser session deleted") + + +if __name__ == "__main__": + main() diff --git a/pkg/templates/python/openai-computer-use/utils.py b/pkg/templates/python/openai-computer-use/utils.py index b17ee811..cc3510c1 100644 --- a/pkg/templates/python/openai-computer-use/utils.py +++ b/pkg/templates/python/openai-computer-use/utils.py @@ -2,10 +2,7 @@ import requests from dotenv import load_dotenv import json -import base64 -from PIL import Image -from io import BytesIO -import io +import time from urllib.parse import urlparse load_dotenv(override=True) @@ -21,19 +18,19 @@ def pp(obj): - print(json.dumps(obj, indent=4)) + print(json.dumps(obj, indent=4, default=str)) def show_image(base_64_image): - image_data = base64.b64decode(base_64_image) - image = Image.open(BytesIO(image_data)) - image.show() - - -def calculate_image_dimensions(base_64_image): - image_data = base64.b64decode(base_64_image) - image = Image.open(io.BytesIO(image_data)) - return image.size + import base64 + from io import BytesIO + try: + from PIL import Image + image_data = base64.b64decode(base_64_image) + image = Image.open(BytesIO(image_data)) + image.show() + except ImportError: + print("[show_image] PIL not installed, skipping image display") def sanitize_message(msg: dict) -> dict: @@ -44,6 +41,25 @@ def sanitize_message(msg: dict) -> dict: sanitized = msg.copy() sanitized["output"] = {**output, "image_url": "[omitted]"} return sanitized + if msg.get("type") == "function_call_output": + output = msg.get("output") + if isinstance(output, list): + sanitized_items = [] + changed = False + for item in output: + if ( + isinstance(item, dict) + and item.get("type") == "input_image" + and isinstance(item.get("image_url"), str) + ): + sanitized_items.append({**item, "image_url": "[omitted]"}) + changed = True + else: + sanitized_items.append(item) + if changed: + sanitized = msg.copy() + sanitized["output"] = sanitized_items + return sanitized return msg @@ -58,17 +74,48 @@ def create_response(**kwargs): if openai_org: headers["Openai-Organization"] = openai_org - response = requests.post(url, headers=headers, json=kwargs) - - if response.status_code != 200: - print(f"Error: {response.status_code} {response.text}") - - return response.json() + max_attempts = int(os.getenv("OPENAI_RETRY_MAX_ATTEMPTS", "4")) + base_delay_seconds = float(os.getenv("OPENAI_RETRY_BASE_DELAY_SECONDS", "0.5")) + timeout_seconds = float(os.getenv("OPENAI_REQUEST_TIMEOUT_SECONDS", "120")) + + for attempt in range(1, max_attempts + 1): + try: + response = requests.post(url, headers=headers, json=kwargs, timeout=timeout_seconds) + except requests.RequestException as exc: + if attempt < max_attempts: + delay = base_delay_seconds * (2 ** (attempt - 1)) + print( + f"Warning: request failed ({exc}); retrying in {delay:.1f}s " + f"({attempt}/{max_attempts})" + ) + time.sleep(delay) + continue + raise RuntimeError(f"OpenAI request failed after {max_attempts} attempts: {exc}") from exc + + if response.status_code == 200: + return response.json() + + # Retry transient OpenAI server errors (5xx). + if 500 <= response.status_code < 600 and attempt < max_attempts: + delay = base_delay_seconds * (2 ** (attempt - 1)) + print( + f"Warning: OpenAI server error {response.status_code}; retrying in " + f"{delay:.1f}s ({attempt}/{max_attempts})" + ) + time.sleep(delay) + continue + + raise RuntimeError(f"OpenAI API error {response.status_code}: {response.text}") + + raise RuntimeError("OpenAI request failed unexpectedly") def check_blocklisted_url(url: str) -> None: """Raise ValueError if the given URL (including subdomains) is in the blocklist.""" - hostname = urlparse(url).hostname or "" + try: + hostname = urlparse(url).hostname or "" + except Exception: + return if any( hostname == blocked or hostname.endswith(f".{blocked}") for blocked in BLOCKED_DOMAINS diff --git a/pkg/templates/python/openai-computer-use/uv.lock b/pkg/templates/python/openai-computer-use/uv.lock index 5ab5090c..c4fbfed6 100644 --- a/pkg/templates/python/openai-computer-use/uv.lock +++ b/pkg/templates/python/openai-computer-use/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.11" [[package]] @@ -115,53 +115,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] -[[package]] -name = "greenlet" -version = "3.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/e5/40dbda2736893e3e53d25838e0f19a2b417dfc122b9989c91918db30b5d3/greenlet-3.3.0.tar.gz", hash = "sha256:a82bb225a4e9e4d653dd2fb7b8b2d36e4fb25bc0165422a11e48b88e9e6f78fb", size = 190651, upload-time = "2025-12-04T14:49:44.05Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/cb/48e964c452ca2b92175a9b2dca037a553036cb053ba69e284650ce755f13/greenlet-3.3.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e29f3018580e8412d6aaf5641bb7745d38c85228dacf51a73bd4e26ddf2a6a8e", size = 274908, upload-time = "2025-12-04T14:23:26.435Z" }, - { url = "https://files.pythonhosted.org/packages/28/da/38d7bff4d0277b594ec557f479d65272a893f1f2a716cad91efeb8680953/greenlet-3.3.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a687205fb22794e838f947e2194c0566d3812966b41c78709554aa883183fb62", size = 577113, upload-time = "2025-12-04T14:50:05.493Z" }, - { url = "https://files.pythonhosted.org/packages/3c/f2/89c5eb0faddc3ff014f1c04467d67dee0d1d334ab81fadbf3744847f8a8a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4243050a88ba61842186cb9e63c7dfa677ec146160b0efd73b855a3d9c7fcf32", size = 590338, upload-time = "2025-12-04T14:57:41.136Z" }, - { url = "https://files.pythonhosted.org/packages/80/d7/db0a5085035d05134f8c089643da2b44cc9b80647c39e93129c5ef170d8f/greenlet-3.3.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:670d0f94cd302d81796e37299bcd04b95d62403883b24225c6b5271466612f45", size = 601098, upload-time = "2025-12-04T15:07:11.898Z" }, - { url = "https://files.pythonhosted.org/packages/dc/a6/e959a127b630a58e23529972dbc868c107f9d583b5a9f878fb858c46bc1a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cb3a8ec3db4a3b0eb8a3c25436c2d49e3505821802074969db017b87bc6a948", size = 590206, upload-time = "2025-12-04T14:26:01.254Z" }, - { url = "https://files.pythonhosted.org/packages/48/60/29035719feb91798693023608447283b266b12efc576ed013dd9442364bb/greenlet-3.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2de5a0b09eab81fc6a382791b995b1ccf2b172a9fec934747a7a23d2ff291794", size = 1550668, upload-time = "2025-12-04T15:04:22.439Z" }, - { url = "https://files.pythonhosted.org/packages/0a/5f/783a23754b691bfa86bd72c3033aa107490deac9b2ef190837b860996c9f/greenlet-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4449a736606bd30f27f8e1ff4678ee193bc47f6ca810d705981cfffd6ce0d8c5", size = 1615483, upload-time = "2025-12-04T14:27:28.083Z" }, - { url = "https://files.pythonhosted.org/packages/1d/d5/c339b3b4bc8198b7caa4f2bd9fd685ac9f29795816d8db112da3d04175bb/greenlet-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:7652ee180d16d447a683c04e4c5f6441bae7ba7b17ffd9f6b3aff4605e9e6f71", size = 301164, upload-time = "2025-12-04T14:42:51.577Z" }, - { url = "https://files.pythonhosted.org/packages/f8/0a/a3871375c7b9727edaeeea994bfff7c63ff7804c9829c19309ba2e058807/greenlet-3.3.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b01548f6e0b9e9784a2c99c5651e5dc89ffcbe870bc5fb2e5ef864e9cc6b5dcb", size = 276379, upload-time = "2025-12-04T14:23:30.498Z" }, - { url = "https://files.pythonhosted.org/packages/43/ab/7ebfe34dce8b87be0d11dae91acbf76f7b8246bf9d6b319c741f99fa59c6/greenlet-3.3.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:349345b770dc88f81506c6861d22a6ccd422207829d2c854ae2af8025af303e3", size = 597294, upload-time = "2025-12-04T14:50:06.847Z" }, - { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" }, - { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" }, - { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" }, - { url = "https://files.pythonhosted.org/packages/49/0e/49b46ac39f931f59f987b7cd9f34bfec8ef81d2a1e6e00682f55be5de9f4/greenlet-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d9ad37fc657b1102ec880e637cccf20191581f75c64087a549e66c57e1ceb53", size = 1567424, upload-time = "2025-12-04T15:04:23.757Z" }, - { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" }, - { url = "https://files.pythonhosted.org/packages/6c/79/3912a94cf27ec503e51ba493692d6db1e3cd8ac7ac52b0b47c8e33d7f4f9/greenlet-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7a34b13d43a6b78abf828a6d0e87d3385680eaf830cd60d20d52f249faabf39", size = 301964, upload-time = "2025-12-04T14:36:58.316Z" }, - { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" }, - { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" }, - { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" }, - { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" }, - { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" }, - { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" }, - { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" }, - { url = "https://files.pythonhosted.org/packages/7e/71/ba21c3fb8c5dce83b8c01f458a42e99ffdb1963aeec08fff5a18588d8fd7/greenlet-3.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:9ee1942ea19550094033c35d25d20726e4f1c40d59545815e1128ac58d416d38", size = 301833, upload-time = "2025-12-04T14:32:23.929Z" }, - { url = "https://files.pythonhosted.org/packages/d7/7c/f0a6d0ede2c7bf092d00bc83ad5bafb7e6ec9b4aab2fbdfa6f134dc73327/greenlet-3.3.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:60c2ef0f578afb3c8d92ea07ad327f9a062547137afe91f38408f08aacab667f", size = 275671, upload-time = "2025-12-04T14:23:05.267Z" }, - { url = "https://files.pythonhosted.org/packages/44/06/dac639ae1a50f5969d82d2e3dd9767d30d6dbdbab0e1a54010c8fe90263c/greenlet-3.3.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5d554d0712ba1de0a6c94c640f7aeba3f85b3a6e1f2899c11c2c0428da9365", size = 646360, upload-time = "2025-12-04T14:50:10.026Z" }, - { url = "https://files.pythonhosted.org/packages/e0/94/0fb76fe6c5369fba9bf98529ada6f4c3a1adf19e406a47332245ef0eb357/greenlet-3.3.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3a898b1e9c5f7307ebbde4102908e6cbfcb9ea16284a3abe15cab996bee8b9b3", size = 658160, upload-time = "2025-12-04T14:57:45.41Z" }, - { url = "https://files.pythonhosted.org/packages/93/79/d2c70cae6e823fac36c3bbc9077962105052b7ef81db2f01ec3b9bf17e2b/greenlet-3.3.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dcd2bdbd444ff340e8d6bdf54d2f206ccddbb3ccfdcd3c25bf4afaa7b8f0cf45", size = 671388, upload-time = "2025-12-04T15:07:15.789Z" }, - { url = "https://files.pythonhosted.org/packages/b8/14/bab308fc2c1b5228c3224ec2bf928ce2e4d21d8046c161e44a2012b5203e/greenlet-3.3.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5773edda4dc00e173820722711d043799d3adb4f01731f40619e07ea2750b955", size = 660166, upload-time = "2025-12-04T14:26:05.099Z" }, - { url = "https://files.pythonhosted.org/packages/4b/d2/91465d39164eaa0085177f61983d80ffe746c5a1860f009811d498e7259c/greenlet-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ac0549373982b36d5fd5d30beb8a7a33ee541ff98d2b502714a09f1169f31b55", size = 1615193, upload-time = "2025-12-04T15:04:27.041Z" }, - { url = "https://files.pythonhosted.org/packages/42/1b/83d110a37044b92423084d52d5d5a3b3a73cafb51b547e6d7366ff62eff1/greenlet-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d198d2d977460358c3b3a4dc844f875d1adb33817f0613f663a656f463764ccc", size = 1683653, upload-time = "2025-12-04T14:27:32.366Z" }, - { url = "https://files.pythonhosted.org/packages/7c/9a/9030e6f9aa8fd7808e9c31ba4c38f87c4f8ec324ee67431d181fe396d705/greenlet-3.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:73f51dd0e0bdb596fb0417e475fa3c5e32d4c83638296e560086b8d7da7c4170", size = 305387, upload-time = "2025-12-04T14:26:51.063Z" }, - { url = "https://files.pythonhosted.org/packages/a0/66/bd6317bc5932accf351fc19f177ffba53712a202f9df10587da8df257c7e/greenlet-3.3.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d6ed6f85fae6cdfdb9ce04c9bf7a08d666cfcfb914e7d006f44f840b46741931", size = 282638, upload-time = "2025-12-04T14:25:20.941Z" }, - { url = "https://files.pythonhosted.org/packages/30/cf/cc81cb030b40e738d6e69502ccbd0dd1bced0588e958f9e757945de24404/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9125050fcf24554e69c4cacb086b87b3b55dc395a8b3ebe6487b045b2614388", size = 651145, upload-time = "2025-12-04T14:50:11.039Z" }, - { url = "https://files.pythonhosted.org/packages/9c/ea/1020037b5ecfe95ca7df8d8549959baceb8186031da83d5ecceff8b08cd2/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:87e63ccfa13c0a0f6234ed0add552af24cc67dd886731f2261e46e241608bee3", size = 654236, upload-time = "2025-12-04T14:57:47.007Z" }, - { url = "https://files.pythonhosted.org/packages/69/cc/1e4bae2e45ca2fa55299f4e85854606a78ecc37fead20d69322f96000504/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2662433acbca297c9153a4023fe2161c8dcfdcc91f10433171cf7e7d94ba2221", size = 662506, upload-time = "2025-12-04T15:07:16.906Z" }, - { url = "https://files.pythonhosted.org/packages/57/b9/f8025d71a6085c441a7eaff0fd928bbb275a6633773667023d19179fe815/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3c6e9b9c1527a78520357de498b0e709fb9e2f49c3a513afd5a249007261911b", size = 653783, upload-time = "2025-12-04T14:26:06.225Z" }, - { url = "https://files.pythonhosted.org/packages/f6/c7/876a8c7a7485d5d6b5c6821201d542ef28be645aa024cfe1145b35c120c1/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:286d093f95ec98fdd92fcb955003b8a3d054b4e2cab3e2707a5039e7b50520fd", size = 1614857, upload-time = "2025-12-04T15:04:28.484Z" }, - { url = "https://files.pythonhosted.org/packages/4f/dc/041be1dff9f23dac5f48a43323cd0789cb798342011c19a248d9c9335536/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c10513330af5b8ae16f023e8ddbfb486ab355d04467c4679c5cfe4659975dd9", size = 1676034, upload-time = "2025-12-04T14:27:33.531Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -210,7 +163,7 @@ wheels = [ [[package]] name = "kernel" -version = "0.23.0" +version = "0.43.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -220,115 +173,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/84/917ef7d15d8b05660d72728771e662c870b9ab0adcc8eaf3bc64a3809b95/kernel-0.23.0.tar.gz", hash = "sha256:2cea5de91ddb4fc0882e2dadaa1c62e659d23c8acafd5c7df814c36007f73eb9", size = 170960, upload-time = "2025-12-11T20:19:26.62Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/e2/04abb962657c06b87d3469fd0bf355470588d12ecfa57f7bafa96aa7d10b/kernel-0.23.0-py3-none-any.whl", hash = "sha256:c5b7055bfc4bef6b36d984a870a3c779eb1018766ab0fe2845b11f130e88d83d", size = 199616, upload-time = "2025-12-11T20:19:24.24Z" }, -] - -[[package]] -name = "pillow" -version = "12.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" }, - { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" }, - { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" }, - { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" }, - { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" }, - { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" }, - { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" }, - { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" }, - { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" }, - { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" }, - { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" }, - { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" }, - { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" }, - { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" }, - { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" }, - { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" }, - { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" }, - { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" }, - { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" }, - { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" }, - { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" }, - { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" }, - { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" }, - { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" }, - { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" }, - { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" }, - { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" }, - { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" }, - { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" }, - { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" }, - { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" }, - { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" }, - { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" }, - { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" }, - { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" }, - { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" }, - { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" }, - { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" }, - { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" }, - { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" }, - { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" }, - { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" }, - { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" }, - { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" }, - { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" }, - { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" }, - { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" }, - { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" }, - { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" }, - { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" }, - { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" }, - { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" }, - { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" }, - { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" }, - { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" }, - { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" }, - { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" }, - { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" }, - { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" }, - { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" }, - { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" }, - { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" }, - { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" }, - { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" }, - { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" }, - { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" }, - { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" }, - { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" }, - { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" }, - { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" }, - { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" }, - { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" }, - { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" }, - { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" }, - { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" }, - { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" }, - { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" }, - { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" }, -] - -[[package]] -name = "playwright" -version = "1.56.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "greenlet" }, - { name = "pyee" }, -] +sdist = { url = "https://files.pythonhosted.org/packages/29/99/639401caa99d752ce430e85d2aacbf1e0da3e748d0b7cff8758b4e49f62f/kernel-0.43.0.tar.gz", hash = "sha256:f3a4c8959eb26e783ece943507871f12ae5b884c841dc81d640a2f46f22b6ed2", size = 196586, upload-time = "2026-03-10T17:30:39.461Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/31/a5362cee43f844509f1f10d8a27c9cc0e2f7bdce5353d304d93b2151c1b1/playwright-1.56.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33eb89c516cbc6723f2e3523bada4a4eb0984a9c411325c02d7016a5d625e9c", size = 40611424, upload-time = "2025-11-11T18:39:10.175Z" }, - { url = "https://files.pythonhosted.org/packages/ef/95/347eef596d8778fb53590dc326c344d427fa19ba3d42b646fce2a4572eb3/playwright-1.56.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b228b3395212b9472a4ee5f1afe40d376eef9568eb039fcb3e563de8f4f4657b", size = 39400228, upload-time = "2025-11-11T18:39:13.915Z" }, - { url = "https://files.pythonhosted.org/packages/b9/54/6ad97b08b2ca1dfcb4fbde4536c4f45c0d9d8b1857a2d20e7bbfdf43bf15/playwright-1.56.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:0ef7e6fd653267798a8a968ff7aa2dcac14398b7dd7440ef57524e01e0fbbd65", size = 40611424, upload-time = "2025-11-11T18:39:17.093Z" }, - { url = "https://files.pythonhosted.org/packages/e4/76/6d409e37e82cdd5dda3df1ab958130ae32b46e42458bd4fc93d7eb8749cb/playwright-1.56.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:404be089b49d94bc4c1fe0dfb07664bda5ffe87789034a03bffb884489bdfb5c", size = 46263122, upload-time = "2025-11-11T18:39:20.619Z" }, - { url = "https://files.pythonhosted.org/packages/4f/84/fb292cc5d45f3252e255ea39066cd1d2385c61c6c1596548dfbf59c88605/playwright-1.56.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64cda7cf4e51c0d35dab55190841bfcdfb5871685ec22cb722cd0ad2df183e34", size = 46110645, upload-time = "2025-11-11T18:39:24.005Z" }, - { url = "https://files.pythonhosted.org/packages/61/bd/8c02c3388ae14edc374ac9f22cbe4e14826c6a51b2d8eaf86e89fabee264/playwright-1.56.0-py3-none-win32.whl", hash = "sha256:d87b79bcb082092d916a332c27ec9732e0418c319755d235d93cc6be13bdd721", size = 35639837, upload-time = "2025-11-11T18:39:27.174Z" }, - { url = "https://files.pythonhosted.org/packages/64/27/f13b538fbc6b7a00152f4379054a49f6abc0bf55ac86f677ae54bc49fb82/playwright-1.56.0-py3-none-win_amd64.whl", hash = "sha256:3c7fc49bb9e673489bf2622855f9486d41c5101bbed964638552b864c4591f94", size = 35639843, upload-time = "2025-11-11T18:39:30.851Z" }, - { url = "https://files.pythonhosted.org/packages/f2/c7/3ee8b556107995846576b4fe42a08ed49b8677619421f2afacf6ee421138/playwright-1.56.0-py3-none-win_arm64.whl", hash = "sha256:2745490ae8dd58d27e5ea4d9aa28402e8e2991eb84fb4b2fd5fbde2106716f6f", size = 31248959, upload-time = "2025-11-11T18:39:33.998Z" }, + { url = "https://files.pythonhosted.org/packages/94/fb/519de9d31f1eb5b0c5bb374e31584af5a8191e25dfa05ca4014e7bd38dba/kernel-0.43.0-py3-none-any.whl", hash = "sha256:c5a1b311e318d04ec7f1bd5b7400fc38fefe72ca1d248f48ebf921a49ee3f608", size = 229540, upload-time = "2026-03-10T17:30:37.775Z" }, ] [[package]] @@ -444,27 +291,21 @@ wheels = [ ] [[package]] -name = "pyee" -version = "13.0.0" +name = "python-dotenv" +version = "1.2.1" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37", size = 31250, upload-time = "2025-03-17T18:53:15.955Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" }, + { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, ] [[package]] -name = "python-cua" +name = "python-openai-cua" version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "httpx" }, { name = "kernel" }, - { name = "pillow" }, - { name = "playwright" }, - { name = "pydantic" }, { name = "python-dotenv" }, { name = "requests" }, ] @@ -472,23 +313,11 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "httpx", specifier = ">=0.28.1" }, - { name = "kernel", specifier = ">=0.23.0" }, - { name = "pillow", specifier = ">=12.0.0" }, - { name = "playwright", specifier = ">=1.56.0" }, - { name = "pydantic", specifier = ">=2.12.5" }, + { name = "kernel", specifier = ">=0.43.0" }, { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "requests", specifier = ">=2.32.5" }, ] -[[package]] -name = "python-dotenv" -version = "1.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, -] - [[package]] name = "requests" version = "2.32.5" diff --git a/pkg/templates/typescript/openai-computer-use/.env.example b/pkg/templates/typescript/openai-computer-use/.env.example index b74e0a29..3ff84207 100644 --- a/pkg/templates/typescript/openai-computer-use/.env.example +++ b/pkg/templates/typescript/openai-computer-use/.env.example @@ -1,2 +1,3 @@ -# Copy this file to .env and fill in your API key +# Copy this file to .env and fill in your API keys OPENAI_API_KEY=your_openai_api_key_here +KERNEL_API_KEY=your_kernel_api_key_here diff --git a/pkg/templates/typescript/openai-computer-use/README.md b/pkg/templates/typescript/openai-computer-use/README.md index 6ac98411..b8bab4aa 100644 --- a/pkg/templates/typescript/openai-computer-use/README.md +++ b/pkg/templates/typescript/openai-computer-use/README.md @@ -1,8 +1,28 @@ # Kernel TypeScript Sample App - OpenAI Computer Use -This is a Kernel application that demonstrates using the Computer Use Agent (CUA) from OpenAI. +This is a Kernel application that demonstrates using the Computer Use Agent (CUA) from OpenAI with Kernel's native browser control API. -It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation. -Also makes use of the latest OpenAI SDK format, and has local equivalent to Kernel methods for local testing before deploying on Kernel. +It uses Kernel's computer control endpoints (screenshot, click, type, scroll, batch, etc.) and includes a `batch_computer_actions` tool that executes multiple actions in a single API call for lower latency. -See the [docs](https://www.kernel.sh/docs/quickstart) for information. +## Local testing + +You can test against a remote Kernel browser without deploying: + +```bash +cp .env.example .env +# Fill in OPENAI_API_KEY and KERNEL_API_KEY in .env +pnpm install +pnpm exec tsx run_local.ts +pnpm exec tsx run_local.ts --task "go to https://news.ycombinator.com and get the top 5 articles" +``` + +The local runner defaults to a built-in sample task. Pass `--task "..."` to run a custom prompt locally, and add `--debug` to include verbose in-flight events. + +## Deploy to Kernel + +```bash +kernel deploy index.ts --env-file .env +kernel invoke ts-openai-cua cua-task -p '{"task":"Go to https://news.ycombinator.com and get the top 5 articles"}' +``` + +See the [docs](https://www.kernel.sh/docs/quickstart) for more information. diff --git a/pkg/templates/typescript/openai-computer-use/index.ts b/pkg/templates/typescript/openai-computer-use/index.ts index 30c26477..e6a9a343 100644 --- a/pkg/templates/typescript/openai-computer-use/index.ts +++ b/pkg/templates/typescript/openai-computer-use/index.ts @@ -1,15 +1,28 @@ import { Kernel, type KernelContext } from '@onkernel/sdk'; -import 'dotenv/config'; +import * as dotenv from 'dotenv'; import type { ResponseItem, ResponseOutputMessage } from 'openai/resources/responses/responses'; import { Agent } from './lib/agent'; -import computers from './lib/computers'; +import { KernelComputer } from './lib/kernel-computer'; +import { maybeStartReplay, maybeStopReplay } from './lib/replay'; +import { + createEventLogger, + emitBrowserDeleteDone, + emitBrowserDeleteStarted, + emitBrowserNewDone, + emitBrowserNewStarted, + emitSessionState, +} from './lib/logging'; + +dotenv.config({ override: true, quiet: true }); interface CuaInput { task: string; + replay?: boolean; } interface CuaOutput { elapsed: number; answer: string | null; + replay_url?: string; logs?: ResponseItem[]; } @@ -38,18 +51,27 @@ app.action( async (ctx: KernelContext, payload?: CuaInput): Promise => { const start = Date.now(); if (!payload?.task) throw new Error('task is required'); + const onEvent = createEventLogger(); + emitBrowserNewStarted(onEvent); + const browserCreateStartedAt = Date.now(); const kb = await kernel.browsers.create({ invocation_id: ctx.invocation_id }); - console.log('> Kernel browser live view url:', kb.browser_live_view_url); + emitBrowserNewDone(onEvent, browserCreateStartedAt, kb.browser_live_view_url); + emitSessionState(onEvent, kb.session_id, kb.browser_live_view_url); - try { - const { computer } = await computers.create({ type: 'kernel', cdp_ws_url: kb.cdp_ws_url }); + const computer = new KernelComputer(kernel, kb.session_id, onEvent); + const replay = await maybeStartReplay(kernel, kb.session_id, { + enabled: payload.replay === true, + onEvent, + }); + let answer: string | null = null; + let replayUrl: string | null = null; - // Navigate to DuckDuckGo as starting page (less likely to trigger captchas than Google) + try { await computer.goto('https://duckduckgo.com'); const agent = new Agent({ - model: 'computer-use-preview', + model: 'gpt-5.4', computer, tools: [], acknowledge_safety_check_callback: (m: string): boolean => { @@ -58,7 +80,6 @@ app.action( }, }); - // run agent and get response const logs = await agent.runFullTurn({ messages: [ { @@ -75,13 +96,13 @@ app.action( }, ], print_steps: true, - debug: true, + debug: false, show_images: false, + onEvent, }); const elapsed = parseFloat(((Date.now() - start) / 1000).toFixed(2)); - // filter only LLM messages const messages = logs.filter( (item): item is ResponseOutputMessage => item.type === 'message' && @@ -91,22 +112,22 @@ app.action( const assistant = messages.find((m) => m.role === 'assistant'); const lastContentIndex = assistant?.content?.length ? assistant.content.length - 1 : -1; const lastContent = lastContentIndex >= 0 ? assistant?.content?.[lastContentIndex] : null; - const answer = lastContent && 'text' in lastContent ? lastContent.text : null; - - return { - // logs, // optionally, get the full agent run messages logs - elapsed, - answer, - }; + answer = lastContent && 'text' in lastContent ? lastContent.text : null; } catch (error) { - const elapsed = parseFloat(((Date.now() - start) / 1000).toFixed(2)); console.error('Error in cua-task:', error); - return { - elapsed, - answer: null, - }; + answer = null; } finally { - await kernel.browsers.deleteByID(kb.session_id); + emitBrowserDeleteStarted(onEvent); + const browserDeleteStartedAt = Date.now(); + try { + replayUrl = await maybeStopReplay(kernel, kb.session_id, replay, { onEvent }); + await kernel.browsers.deleteByID(kb.session_id); + } finally { + emitBrowserDeleteDone(onEvent, browserDeleteStartedAt); + } } + + const elapsed = parseFloat(((Date.now() - start) / 1000).toFixed(2)); + return replayUrl ? { elapsed, answer, replay_url: replayUrl } : { elapsed, answer }; }, ); diff --git a/pkg/templates/typescript/openai-computer-use/lib/agent.ts b/pkg/templates/typescript/openai-computer-use/lib/agent.ts index 97441654..b7234d21 100644 --- a/pkg/templates/typescript/openai-computer-use/lib/agent.ts +++ b/pkg/templates/typescript/openai-computer-use/lib/agent.ts @@ -6,57 +6,52 @@ import { type ResponseFunctionToolCallOutputItem, type ResponseComputerToolCall, type ResponseComputerToolCallOutputItem, - type ComputerTool, + type Tool, } from 'openai/resources/responses/responses'; import * as utils from './utils'; -import toolset from './toolset'; -import type { BasePlaywrightComputer } from './playwright/base'; -import type { LocalPlaywrightComputer } from './playwright/local'; -import type { KernelPlaywrightComputer } from './playwright/kernel'; +import type { AgentEvent } from './log-events'; +import { describeAction, describeBatchActions } from './log-events'; +import { batchInstructions, batchComputerTool, computerUseExtraTool } from './toolset'; +import type { CuaAction, KernelComputer } from './kernel-computer'; + +const BATCH_FUNC_NAME = 'batch_computer_actions'; +const EXTRA_FUNC_NAME = 'computer_use_extra'; +const POST_ACTION_SETTLE_MS = 300; +// Keep this shape aligned with CUA and current OpenAI Responses API. +const OPENAI_COMPUTER_TOOL = { type: 'computer' } as unknown as Tool; export class Agent { private model: string; - private computer: - | BasePlaywrightComputer - | LocalPlaywrightComputer - | KernelPlaywrightComputer - | undefined; - private tools: ComputerTool[]; + private computer: KernelComputer; + private tools: Tool[]; private print_steps = true; private debug = false; private show_images = false; private ackCb: (msg: string) => boolean; + private onEvent: ((event: AgentEvent) => void) | null = null; + private modelRequestStartedAt: number | null = null; constructor(opts: { model?: string; - computer?: - | BasePlaywrightComputer - | LocalPlaywrightComputer - | KernelPlaywrightComputer - | undefined; - tools?: ComputerTool[]; + computer: KernelComputer; + tools?: Tool[]; acknowledge_safety_check_callback?: (msg: string) => boolean; }) { - this.model = opts.model ?? 'computer-use-preview'; + this.model = opts.model ?? 'gpt-5.4'; this.computer = opts.computer; - this.tools = [...toolset.shared, ...(opts.tools ?? [])] as ComputerTool[]; this.ackCb = opts.acknowledge_safety_check_callback ?? ((): boolean => true); - if (this.computer) { - const [w, h] = this.computer.getDimensions(); - this.tools.push({ - type: 'computer_use_preview', - display_width: w, - display_height: h, - environment: this.computer.getEnvironment(), - }); - } + this.tools = [ + OPENAI_COMPUTER_TOOL, + batchComputerTool, + computerUseExtraTool, + ...(opts.tools ?? []), + ]; } private debugPrint(...args: unknown[]): void { if (this.debug) { - console.warn('--- debug:agent:debugPrint'); try { console.dir( args.map((msg) => utils.sanitizeMessage(msg as ResponseItem)), @@ -68,139 +63,296 @@ export class Agent { } } + private emit(event: AgentEvent['event'], data: Record): void { + if (this.print_steps) this.onEvent?.({ event, data }); + } + + private currentModelElapsedMs(): number | null { + return this.modelRequestStartedAt === null ? null : Date.now() - this.modelRequestStartedAt; + } + + private async capturePostActionScreenshot(): Promise { + await new Promise((resolve) => setTimeout(resolve, POST_ACTION_SETTLE_MS)); + return this.computer.screenshot(); + } + + private extractReasoningText(item: Record): string { + const summary = item.summary; + if (!Array.isArray(summary)) return ''; + const chunks = summary + .map((part) => { + if (!part || typeof part !== 'object') return ''; + const text = (part as { text?: unknown }).text; + return typeof text === 'string' ? text : ''; + }) + .filter(Boolean); + return chunks.join(' ').trim(); + } + + private extractUserPrompt(item: ResponseInputItem): string | null { + const message = item as unknown as { role?: unknown; content?: unknown }; + if (message.role !== 'user') return null; + if (typeof message.content === 'string') return message.content; + if (!Array.isArray(message.content)) return null; + const pieces = message.content + .map((entry) => { + if (!entry || typeof entry !== 'object') return ''; + const text = (entry as { text?: unknown }).text; + return typeof text === 'string' ? text : ''; + }) + .filter(Boolean); + return pieces.length > 0 ? pieces.join(' ') : null; + } + private async handleItem(item: ResponseItem): Promise { - if (item.type === 'message' && this.print_steps) { + const itemType = (item as { type?: string }).type; + if (itemType === 'reasoning') { + const text = this.extractReasoningText(item as unknown as Record); + if (text) this.emit('reasoning_delta', { text }); + } + + if (item.type === 'message') { const msg = item as ResponseOutputMessage; const c = msg.content; - if (Array.isArray(c) && c[0] && 'text' in c[0] && typeof c[0].text === 'string') - console.log(c[0].text); + if (msg.role === 'assistant' && Array.isArray(c)) { + for (const part of c) { + if (part && typeof part === 'object' && 'text' in part && typeof part.text === 'string') { + this.emit('text_delta', { text: part.text }); + } + } + this.emit('text_done', {}); + } } if (item.type === 'function_call') { const fc = item as ResponseFunctionToolCallItem; const argsObj = JSON.parse(fc.arguments) as Record; - if (this.print_steps) console.log(`${fc.name}(${JSON.stringify(argsObj)})`); - if (this.computer) { - const fn = (this.computer as unknown as Record)[fc.name]; - if (typeof fn === 'function') - await (fn as (...a: unknown[]) => unknown)(...Object.values(argsObj)); + if (fc.name === BATCH_FUNC_NAME && Array.isArray(argsObj.actions)) { + const actions = argsObj.actions.filter( + (action): action is Record => + typeof action === 'object' && action !== null, + ); + const elapsedMs = this.currentModelElapsedMs(); + this.emit('action', { + action_type: 'batch', + description: describeBatchActions(actions), + action: { type: 'batch', actions }, + ...(elapsedMs === null ? {} : { elapsed_ms: elapsedMs }), + }); + } else { + const elapsedMs = this.currentModelElapsedMs(); + this.emit('action', { + action_type: fc.name, + description: `${fc.name}(${JSON.stringify(argsObj)})`, + action: argsObj, + ...(elapsedMs === null ? {} : { elapsed_ms: elapsedMs }), + }); } + + if (fc.name === BATCH_FUNC_NAME) { + return this.handleBatchCall(fc.call_id, argsObj); + } + if (fc.name === EXTRA_FUNC_NAME) { + return this.handleExtraCall(fc.call_id, argsObj); + } + return [ { type: 'function_call_output', call_id: fc.call_id, - output: 'success', + output: `Unsupported function call: ${fc.name}`, } as unknown as ResponseFunctionToolCallOutputItem, ]; } if (item.type === 'computer_call') { - const cc = item as ResponseComputerToolCall; - const { type: actionType, ...actionArgs } = cc.action; - if (this.print_steps) console.log(`${actionType}(${JSON.stringify(actionArgs)})`); - if (this.computer) { - const fn = (this.computer as unknown as Record)[actionType as string]; - if (typeof fn === 'function') { - await (fn as (...a: unknown[]) => unknown)(...Object.values(actionArgs)); - const screenshot = await this.computer.screenshot(); - const pending = cc.pending_safety_checks ?? []; - for (const { message } of pending) - if (!this.ackCb(message)) throw new Error(`Safety check failed: ${message}`); - const out: Omit = { - type: 'computer_call_output', - call_id: cc.call_id, - // id: "?", // <---- omitting to work - need to determine id source, != call_id - acknowledged_safety_checks: pending, - output: { - type: 'computer_screenshot', - image_url: `data:image/webp;base64,${screenshot}`, - }, - }; - if (this.computer.getEnvironment() === 'browser') - utils.checkBlocklistedUrl(this.computer.getCurrentUrl()); - return [out as ResponseItem]; - } + const cc = item as ResponseComputerToolCall & { + action?: Record; + actions?: Array>; + }; + const actionList = Array.isArray(cc.actions) + ? cc.actions + : cc.action + ? [cc.action] + : []; + + const elapsedMs = this.currentModelElapsedMs(); + const actionType = + actionList.length === 1 ? String(actionList[0]?.type ?? 'unknown') : 'batch'; + const description = + actionList.length === 1 + ? describeAction(actionType, actionList[0] ?? {}) + : describeBatchActions(actionList); + const actionPayload = + actionList.length === 1 ? (actionList[0] ?? {}) : { type: 'batch', actions: actionList }; + this.emit('action', { + action_type: actionType, + description, + action: actionPayload, + ...(elapsedMs === null ? {} : { elapsed_ms: elapsedMs }), + }); + await this.computer.batchActions(actionList as CuaAction[]); + + const screenshot = await this.capturePostActionScreenshot(); + this.emit('screenshot', { captured: true, bytes_base64: screenshot.length }); + + const pending = cc.pending_safety_checks ?? []; + for (const check of pending) { + const msg = check.message ?? ''; + if (!this.ackCb(msg)) throw new Error(`Safety check failed: ${msg}`); } + + const currentUrl = await this.computer.getCurrentUrl(); + utils.checkBlocklistedUrl(currentUrl); + + const screenshotOutput = { + type: 'computer_screenshot', + image_url: `data:image/png;base64,${screenshot}`, + } as unknown as ResponseComputerToolCallOutputItem['output']; + + const out: Omit = { + type: 'computer_call_output', + call_id: cc.call_id, + acknowledged_safety_checks: pending, + output: screenshotOutput, + }; + return [out as ResponseItem]; } return []; } + private async handleBatchCall( + callId: string, + argsObj: Record, + ): Promise { + const actions = Array.isArray(argsObj.actions) ? (argsObj.actions as CuaAction[]) : []; + await this.computer.batchActions(actions); + + let statusText = 'Actions executed successfully.'; + const terminalReadAction = this.batchTerminalReadAction(actions); + if (terminalReadAction === 'url') { + try { + const currentUrl = await this.computer.getCurrentUrl(); + statusText = `Actions executed successfully. Current URL: ${currentUrl}`; + } catch (error) { + statusText = `Actions executed, but url() failed: ${error instanceof Error ? error.message : String(error)}`; + } + } + + const screenshot = await this.capturePostActionScreenshot(); + const outputItems: Array> = [{ type: 'input_text', text: statusText }]; + outputItems.push({ + type: 'input_image', + image_url: `data:image/png;base64,${screenshot}`, + detail: 'original', + }); + return [ + { + type: 'function_call_output', + call_id: callId, + output: outputItems, + } as unknown as ResponseFunctionToolCallOutputItem, + ]; + } + + private async handleExtraCall( + callId: string, + argsObj: Record, + ): Promise { + const action = typeof argsObj.action === 'string' ? argsObj.action : ''; + const url = typeof argsObj.url === 'string' ? argsObj.url : ''; + let statusText = ''; + if (action === 'goto') { + await this.computer.batchActions([{ type: 'goto', url }]); + statusText = 'goto executed successfully.'; + } else if (action === 'back') { + await this.computer.batchActions([{ type: 'back' }]); + statusText = 'back executed successfully.'; + } else if (action === 'url') { + const currentUrl = await this.computer.getCurrentUrl(); + statusText = `Current URL: ${currentUrl}`; + } else { + statusText = `unknown ${EXTRA_FUNC_NAME} action: ${action}`; + } + + const screenshot = await this.capturePostActionScreenshot(); + const outputItems: Array> = [{ type: 'input_text', text: statusText }]; + outputItems.push({ + type: 'input_image', + image_url: `data:image/png;base64,${screenshot}`, + detail: 'original', + }); + return [ + { + type: 'function_call_output', + call_id: callId, + output: outputItems, + } as unknown as ResponseFunctionToolCallOutputItem, + ]; + } + + private batchTerminalReadAction(actions: CuaAction[]): '' | 'url' | 'screenshot' { + if (actions.length === 0) return ''; + const lastType = actions[actions.length - 1]?.type; + if (lastType === 'url' || lastType === 'screenshot') return lastType; + return ''; + } + async runFullTurn(opts: { messages: ResponseInputItem[]; print_steps?: boolean; debug?: boolean; show_images?: boolean; + onEvent?: (event: AgentEvent) => void; }): Promise { this.print_steps = opts.print_steps ?? true; this.debug = opts.debug ?? false; this.show_images = opts.show_images ?? false; + this.onEvent = opts.onEvent ?? null; const newItems: ResponseItem[] = []; + let turns = 0; - while ( - newItems.length === 0 || - (newItems[newItems.length - 1] as ResponseItem & { role?: string }).role !== 'assistant' - ) { - // Add current URL to system message if in browser environment - const inputMessages = [...opts.messages]; - - if (this.computer?.getEnvironment() === 'browser') { - const current_url = this.computer.getCurrentUrl(); - // Find system message by checking if it has a role property with value 'system' - const sysIndex = inputMessages.findIndex((msg) => 'role' in msg && msg.role === 'system'); - - if (sysIndex >= 0) { - const msg = inputMessages[sysIndex]; - const urlInfo = `\n- Current URL: ${current_url}`; - - // Create a properly typed message based on the original - if (msg && 'content' in msg) { - if (typeof msg.content === 'string') { - // Create a new message with the updated content - const updatedMsg = { - ...msg, - content: msg.content + urlInfo, - }; - // Type assertion to ensure compatibility - inputMessages[sysIndex] = updatedMsg as typeof msg; - } else if (Array.isArray(msg.content) && msg.content.length > 0) { - // Handle array content case - const updatedContent = [...msg.content]; - - // Check if first item has text property - if (updatedContent[0] && 'text' in updatedContent[0]) { - updatedContent[0] = { - ...updatedContent[0], - text: updatedContent[0].text + urlInfo, - }; - } - - // Create updated message with new content - const updatedMsg = { - ...msg, - content: updatedContent, - }; - // Type assertion to ensure compatibility - inputMessages[sysIndex] = updatedMsg as typeof msg; - } - } - } - } + for (const message of opts.messages) { + const prompt = this.extractUserPrompt(message); + if (prompt) this.emit('prompt', { text: prompt }); + } - this.debugPrint(...inputMessages, ...newItems); - const response = await utils.createResponse({ - model: this.model, - input: [...inputMessages, ...newItems], - tools: this.tools, - truncation: 'auto', - }); - if (!response.output) throw new Error('No output from model'); - for (const msg of response.output as ResponseItem[]) { - newItems.push(msg, ...(await this.handleItem(msg))); + try { + while ( + newItems.length === 0 || + (newItems[newItems.length - 1] as ResponseItem & { role?: string }).role !== 'assistant' + ) { + turns += 1; + const inputMessages = [...opts.messages]; + + this.debugPrint(...inputMessages, ...newItems); + this.modelRequestStartedAt = Date.now(); + const response = await utils.createResponse({ + model: this.model, + input: [...inputMessages, ...newItems], + tools: this.tools, + truncation: 'auto', + reasoning: { + effort: 'low', + summary: 'concise', + }, + instructions: batchInstructions, + }); + if (!response.output) throw new Error('No output from model'); + for (const msg of response.output as ResponseItem[]) { + newItems.push(msg, ...(await this.handleItem(msg))); + } + this.modelRequestStartedAt = null; + this.emit('turn_done', { turn: turns }); } + } catch (error) { + this.modelRequestStartedAt = null; + this.emit('error', { message: error instanceof Error ? error.message : String(error) }); + throw error; } + this.emit('run_complete', { turns }); - // Return sanitized messages if show_images is false return !this.show_images ? newItems.map((msg) => utils.sanitizeMessage(msg) as ResponseItem) : newItems; diff --git a/pkg/templates/typescript/openai-computer-use/lib/computers.ts b/pkg/templates/typescript/openai-computer-use/lib/computers.ts deleted file mode 100644 index 5828fc8e..00000000 --- a/pkg/templates/typescript/openai-computer-use/lib/computers.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { KernelPlaywrightComputer } from './playwright/kernel'; -import { LocalPlaywrightComputer } from './playwright/local'; - -interface KernelConfig { - type: 'kernel'; - cdp_ws_url: string; -} -interface LocalConfig { - type: 'local'; - headless?: boolean; -} -type ComputerConfig = KernelConfig | LocalConfig; - -export default { - async create( - cfg: ComputerConfig, - ): Promise<{ computer: KernelPlaywrightComputer | LocalPlaywrightComputer }> { - if (cfg.type === 'kernel') { - const computer = new KernelPlaywrightComputer(cfg.cdp_ws_url); - await computer.enter(); - return { computer }; - } else { - const computer = new LocalPlaywrightComputer(cfg.headless ?? false); - await computer.enter(); - return { computer }; - } - }, -}; diff --git a/pkg/templates/typescript/openai-computer-use/lib/kernel-computer.ts b/pkg/templates/typescript/openai-computer-use/lib/kernel-computer.ts new file mode 100644 index 00000000..0e2e96c1 --- /dev/null +++ b/pkg/templates/typescript/openai-computer-use/lib/kernel-computer.ts @@ -0,0 +1,585 @@ +import { Kernel } from '@onkernel/sdk'; +import { describeAction, type AgentEvent } from './log-events'; + +// CUA model key names -> X11 keysym names for the Kernel computer API +const KEYSYM_MAP: Record = { + ENTER: 'Return', + Enter: 'Return', + RETURN: 'Return', + BACKSPACE: 'BackSpace', + Backspace: 'BackSpace', + DELETE: 'Delete', + TAB: 'Tab', + ESCAPE: 'Escape', + Escape: 'Escape', + ESC: 'Escape', + SPACE: 'space', + Space: 'space', + UP: 'Up', + DOWN: 'Down', + LEFT: 'Left', + RIGHT: 'Right', + HOME: 'Home', + END: 'End', + PAGEUP: 'Prior', + PAGE_UP: 'Prior', + PageUp: 'Prior', + PAGEDOWN: 'Next', + PAGE_DOWN: 'Next', + PageDown: 'Next', + CAPS_LOCK: 'Caps_Lock', + CapsLock: 'Caps_Lock', + CTRL: 'Control_L', + Ctrl: 'Control_L', + CONTROL: 'Control_L', + Control: 'Control_L', + ALT: 'Alt_L', + Alt: 'Alt_L', + SHIFT: 'Shift_L', + Shift: 'Shift_L', + META: 'Super_L', + Meta: 'Super_L', + SUPER: 'Super_L', + Super: 'Super_L', + CMD: 'Super_L', + COMMAND: 'Super_L', + F1: 'F1', + F2: 'F2', + F3: 'F3', + F4: 'F4', + F5: 'F5', + F6: 'F6', + F7: 'F7', + F8: 'F8', + F9: 'F9', + F10: 'F10', + F11: 'F11', + F12: 'F12', + INSERT: 'Insert', + Insert: 'Insert', + PRINT: 'Print', + SCROLLLOCK: 'Scroll_Lock', + PAUSE: 'Pause', + NUMLOCK: 'Num_Lock', +}; + +const MODIFIER_KEYSYMS = new Set([ + 'Control_L', + 'Control_R', + 'Alt_L', + 'Alt_R', + 'Shift_L', + 'Shift_R', + 'Super_L', + 'Super_R', + 'Meta_L', + 'Meta_R', +]); +const GOTO_CHORD_DELAY_MS = 200; + +function translateKeys(keys: string[]): string[] { + return keys.map((k) => KEYSYM_MAP[k] ?? k); +} + +function expandComboKeys(keys: string[]): string[] { + const out: string[] = []; + for (const raw of keys) { + if (typeof raw !== 'string') continue; + const parts = raw.includes('+') ? raw.split('+') : [raw]; + for (const part of parts) { + const trimmed = part.trim(); + if (trimmed) out.push(trimmed); + } + } + return out; +} + +function normalizeKeypressPayload( + keys: string[] = [], + holdKeys: string[] = [], +): { keys: string[]; holdKeys: string[] } { + const translatedHoldKeys = translateKeys(expandComboKeys(holdKeys)); + const translatedKeyEntries = translateKeys(expandComboKeys(keys)); + + const holdFromKeys: string[] = []; + const primaryKeys: string[] = []; + for (const key of translatedKeyEntries) { + if (MODIFIER_KEYSYMS.has(key)) holdFromKeys.push(key); + else primaryKeys.push(key); + } + + if (primaryKeys.length === 0) { + return { keys: translatedKeyEntries, holdKeys: translatedHoldKeys }; + } + + const holdMerged = [...translatedHoldKeys, ...holdFromKeys]; + const dedupedHold: string[] = []; + for (const key of holdMerged) { + if (!dedupedHold.includes(key)) dedupedHold.push(key); + } + return { keys: primaryKeys, holdKeys: dedupedHold }; +} + +function pixelsToScrollTicks(delta: number | undefined): number { + const value = typeof delta === 'number' && Number.isFinite(delta) ? delta : 0; + return Math.trunc(value); +} + +export interface CuaAction { + type: string; + x?: number; + y?: number; + text?: string; + url?: string; + keys?: string[]; + hold_keys?: string[]; + button?: string | number; + scroll_x?: number; + scroll_y?: number; + ms?: number; + path?: Array<{ x: number; y: number }>; + [key: string]: unknown; +} + +type BatchAction = { + type: 'click_mouse' | 'move_mouse' | 'type_text' | 'press_key' | 'scroll' | 'drag_mouse' | 'sleep'; + click_mouse?: { x: number; y: number; button?: string; num_clicks?: number }; + move_mouse?: { x: number; y: number }; + type_text?: { text: string }; + press_key?: { keys: string[]; hold_keys?: string[] }; + scroll?: { x: number; y: number; delta_x?: number; delta_y?: number }; + drag_mouse?: { path: number[][] }; + sleep?: { duration_ms: number }; +}; + +function normalizeButton(button?: string | number): string { + if (button === undefined || button === null) return 'left'; + if (typeof button === 'number') { + switch (button) { + case 1: return 'left'; + case 2: return 'middle'; + case 3: return 'right'; + default: return 'left'; + } + } + return button; +} + +function normalizeDragPath(path: unknown): number[][] { + if (!Array.isArray(path)) return []; + + const points: Array<[number, number]> = []; + for (const point of path) { + if (Array.isArray(point) && point.length >= 2) { + const [x, y] = point; + if (typeof x === 'number' && Number.isFinite(x) && typeof y === 'number' && Number.isFinite(y)) { + points.push([Math.trunc(x), Math.trunc(y)]); + } + continue; + } + + if ( + point && + typeof point === 'object' && + typeof (point as { x?: unknown }).x === 'number' && + Number.isFinite((point as { x: number }).x) && + typeof (point as { y?: unknown }).y === 'number' && + Number.isFinite((point as { y: number }).y) + ) { + points.push([ + Math.trunc((point as { x: number }).x), + Math.trunc((point as { y: number }).y), + ]); + } + } + + return points; +} + +function validateDragPath(path: number[][]): void { + if (path.length >= 2) return; + throw new Error(`drag action requires path with at least two points; got ${JSON.stringify(path)}`); +} + +function translateCuaAction(action: CuaAction): BatchAction { + switch (action.type) { + case 'click': { + if (action.button === 'back') { + return { type: 'press_key', press_key: { hold_keys: ['Alt'], keys: ['Left'] } }; + } + if (action.button === 'forward') { + return { type: 'press_key', press_key: { hold_keys: ['Alt'], keys: ['Right'] } }; + } + if (action.button === 'wheel') { + return { + type: 'scroll', + scroll: { + x: action.x ?? 0, + y: action.y ?? 0, + delta_x: pixelsToScrollTicks(action.scroll_x), + delta_y: pixelsToScrollTicks(action.scroll_y), + }, + }; + } + return { + type: 'click_mouse', + click_mouse: { x: action.x ?? 0, y: action.y ?? 0, button: normalizeButton(action.button) }, + }; + } + case 'double_click': + return { + type: 'click_mouse', + click_mouse: { x: action.x ?? 0, y: action.y ?? 0, num_clicks: 2 }, + }; + case 'type': + return { type: 'type_text', type_text: { text: action.text ?? '' } }; + case 'keypress': { + const normalized = normalizeKeypressPayload(action.keys ?? [], action.hold_keys ?? []); + return { + type: 'press_key', + press_key: { + keys: normalized.keys, + ...(normalized.holdKeys.length > 0 ? { hold_keys: normalized.holdKeys } : {}), + }, + }; + } + case 'scroll': + return { + type: 'scroll', + scroll: { + x: action.x ?? 0, + y: action.y ?? 0, + delta_x: pixelsToScrollTicks(action.scroll_x), + delta_y: pixelsToScrollTicks(action.scroll_y), + }, + }; + case 'move': + return { type: 'move_mouse', move_mouse: { x: action.x ?? 0, y: action.y ?? 0 } }; + case 'drag': { + const path = normalizeDragPath(action.path); + validateDragPath(path); + return { type: 'drag_mouse', drag_mouse: { path } }; + } + case 'wait': + return { type: 'sleep', sleep: { duration_ms: action.ms ?? 1000 } }; + default: + throw new Error(`Unknown CUA action type: ${action.type}`); + } +} + +function isBatchComputerActionType(actionType: string): boolean { + return ['click', 'double_click', 'type', 'keypress', 'scroll', 'move', 'drag', 'wait'].includes( + actionType, + ); +} + +function pressKeyAction(keys: string[], holdKeys?: string[]): BatchAction { + const normalized = normalizeKeypressPayload(keys, holdKeys); + return { + type: 'press_key', + press_key: { + keys: normalized.keys, + ...(normalized.holdKeys.length > 0 ? { hold_keys: normalized.holdKeys } : {}), + }, + }; +} + +function gotoBatchActions(url: string): BatchAction[] { + return [ + pressKeyAction(['l'], ['Ctrl']), + { type: 'sleep', sleep: { duration_ms: GOTO_CHORD_DELAY_MS } }, + pressKeyAction(['a'], ['Ctrl']), + { type: 'type_text', type_text: { text: url } }, + pressKeyAction(['Return']), + ]; +} + +function backBatchActions(): BatchAction[] { + return [pressKeyAction(['Left'], ['Alt'])]; +} + +function forwardBatchActions(): BatchAction[] { + return [pressKeyAction(['Right'], ['Alt'])]; +} + +function currentUrlBatchActions(): BatchAction[] { + return [ + pressKeyAction(['l'], ['Ctrl']), + pressKeyAction(['a'], ['Ctrl']), + pressKeyAction(['c'], ['Ctrl']), + pressKeyAction(['Escape']), + ]; +} + +function validateBatchTerminalReadActions(actions: CuaAction[]): void { + let readIdx = -1; + let readType = ''; + actions.forEach((action, idx) => { + if (action.type !== 'url' && action.type !== 'screenshot') return; + if (readIdx >= 0) { + throw new Error( + `batch can include at most one return-value action (${readType} or ${action.type}); found ${readType} at index ${readIdx} and ${action.type} at index ${idx}`, + ); + } + if (idx !== actions.length - 1) { + throw new Error(`return-value action "${action.type}" must be last in batch`); + } + readIdx = idx; + readType = action.type; + }); +} + +function buildPendingBatch(actions: CuaAction[]): BatchAction[] { + const pending: BatchAction[] = []; + for (const action of actions) { + const actionType = action.type; + if (isBatchComputerActionType(actionType)) { + pending.push(translateCuaAction(action)); + continue; + } + if (actionType === 'goto') { + pending.push(...gotoBatchActions(action.url ?? '')); + continue; + } + if (actionType === 'back') { + pending.push(...backBatchActions()); + continue; + } + if (actionType === 'url' || actionType === 'screenshot') { + continue; + } + throw new Error(`Unknown CUA action type: ${actionType}`); + } + return pending; +} + +function truncateText(text: string, max = 30): string { + if (text.length <= max) return text; + return `${text.slice(0, max - 3)}...`; +} + +function describeTranslatedBatch(actions: BatchAction[]): string { + const parts = actions.map((action) => { + switch (action.type) { + case 'click_mouse': { + const click = action.click_mouse; + if (!click) return action.type; + if ((click.num_clicks ?? 0) > 1) return `double_click(${click.x},${click.y})`; + return `click(${click.x},${click.y})`; + } + case 'type_text': { + const text = action.type_text?.text ?? ''; + return `type(${JSON.stringify(truncateText(text))})`; + } + case 'press_key': + return `key(hold=${JSON.stringify(action.press_key?.hold_keys ?? [])}, keys=${JSON.stringify(action.press_key?.keys ?? [])})`; + case 'scroll': + return 'scroll'; + case 'move_mouse': + return 'move'; + case 'drag_mouse': + return 'drag'; + case 'sleep': + return `sleep(${action.sleep?.duration_ms ?? 0}ms)`; + default: + return action.type; + } + }); + return `batch[${parts.join(' -> ')}]`; +} + +export class KernelComputer { + private client: Kernel; + private sessionId: string; + private width = 1920; + private height = 1080; + private onEvent: ((event: AgentEvent) => void) | null; + + constructor(client: Kernel, sessionId: string, onEvent?: (event: AgentEvent) => void) { + this.client = client; + this.sessionId = sessionId; + this.onEvent = onEvent ?? null; + } + + getEnvironment(): 'browser' { + return 'browser'; + } + + getDimensions(): [number, number] { + return [this.width, this.height]; + } + + private emitBackend(op: string, detail?: string, elapsedMs?: number): void { + const data: Record = { op }; + if (detail) data.detail = detail; + if (typeof elapsedMs === 'number') data.elapsed_ms = elapsedMs; + this.onEvent?.({ event: 'backend', data }); + } + + private async traceCall( + op: string, + fn: () => Promise, + detail?: string | ((result: T) => string | undefined), + ): Promise { + this.emitBackend(op); + const started = Date.now(); + let result!: T; + let completed = false; + try { + result = await fn(); + completed = true; + return result; + } finally { + const elapsedMs = Date.now() - started; + let resolvedDetail: string | undefined; + if (completed) { + resolvedDetail = + typeof detail === 'function' ? detail(result) : detail; + } + this.emitBackend(`${op}.done`, resolvedDetail, elapsedMs); + } + } + + async screenshot(): Promise { + return this.traceCall('screenshot', async () => { + const resp = await this.client.browsers.computer.captureScreenshot(this.sessionId); + const buf = Buffer.from(await resp.arrayBuffer()); + return buf.toString('base64'); + }); + } + + async click(x: number, y: number, button: string | number = 'left'): Promise { + if (button === 'back') { + await this.back(); + return; + } + if (button === 'forward') { + await this.forward(); + return; + } + if (button === 'wheel') { + await this.scroll(x, y, 0, 0); + return; + } + const normalizedButton = normalizeButton(button) as 'left' | 'right' | 'middle'; + const op = describeAction('click', { x, y, button: normalizedButton }); + await this.traceCall(op, async () => { + await this.client.browsers.computer.clickMouse(this.sessionId, { + x, + y, + button: normalizedButton, + }); + }); + } + + async doubleClick(x: number, y: number): Promise { + const op = describeAction('double_click', { x, y }); + await this.traceCall(op, async () => { + await this.client.browsers.computer.clickMouse(this.sessionId, { x, y, num_clicks: 2 }); + }); + } + + async type(text: string): Promise { + const op = describeAction('type', { text }); + await this.traceCall(op, async () => { + await this.client.browsers.computer.typeText(this.sessionId, { text }); + }); + } + + async keypress(keys: string[], holdKeys: string[] = []): Promise { + const normalized = normalizeKeypressPayload(keys, holdKeys); + const op = describeAction('keypress', { + keys: normalized.keys, + ...(normalized.holdKeys.length > 0 ? { hold_keys: normalized.holdKeys } : {}), + }); + await this.traceCall(op, async () => { + await this.client.browsers.computer.pressKey( + this.sessionId, + { + keys: normalized.keys, + ...(normalized.holdKeys.length > 0 ? { hold_keys: normalized.holdKeys } : {}), + } as Parameters[1], + ); + }); + } + + async scroll(x: number, y: number, scrollX: number, scrollY: number): Promise { + const op = describeAction('scroll', { x, y, scroll_x: scrollX, scroll_y: scrollY }); + const tickX = pixelsToScrollTicks(scrollX); + const tickY = pixelsToScrollTicks(scrollY); + await this.traceCall(op, async () => { + await this.client.browsers.computer.scroll(this.sessionId, { + x, + y, + delta_x: tickX, + delta_y: tickY, + }); + }); + } + + async move(x: number, y: number): Promise { + const op = describeAction('move', { x, y }); + await this.traceCall(op, async () => { + await this.client.browsers.computer.moveMouse(this.sessionId, { x, y }); + }); + } + + async drag(path: Array<{ x: number; y: number }>): Promise { + const op = describeAction('drag', { path }); + await this.traceCall(op, async () => { + const normalizedPath = normalizeDragPath(path); + validateDragPath(normalizedPath); + await this.client.browsers.computer.dragMouse(this.sessionId, { path: normalizedPath }); + }); + } + + async wait(ms = 1000): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); + } + + async batchActions(actions: CuaAction[]): Promise { + validateBatchTerminalReadActions(actions); + const pending = buildPendingBatch(actions); + const op = describeTranslatedBatch(pending); + await this.traceCall(op, async () => { + if (pending.length === 0) return; + await this.client.browsers.computer.batch(this.sessionId, { + actions: pending as Parameters[1]['actions'], + }); + }); + } + + async goto(url: string): Promise { + await this.batchActions([{ type: 'goto', url }]); + } + + async back(): Promise { + await this.batchActions([{ type: 'back' }]); + } + + async forward(): Promise { + const forwardActions = forwardBatchActions(); + await this.traceCall(describeTranslatedBatch(forwardActions), async () => { + await this.client.browsers.computer.batch(this.sessionId, { + actions: forwardActions as Parameters[1]['actions'], + }); + }); + } + + async getCurrentUrl(): Promise { + return this.traceCall('get_current_url()', async () => { + const copyActions = currentUrlBatchActions(); + await this.traceCall(describeTranslatedBatch(copyActions), async () => { + await this.client.browsers.computer.batch(this.sessionId, { + actions: copyActions as Parameters[1]['actions'], + }); + }); + const result = await this.client.browsers.computer.readClipboard(this.sessionId); + const currentUrl = (result.text ?? '').trim(); + if (!currentUrl) { + throw new Error('clipboard URL was empty'); + } + return currentUrl; + }); + } +} diff --git a/pkg/templates/typescript/openai-computer-use/lib/log-events.ts b/pkg/templates/typescript/openai-computer-use/lib/log-events.ts new file mode 100644 index 00000000..87643472 --- /dev/null +++ b/pkg/templates/typescript/openai-computer-use/lib/log-events.ts @@ -0,0 +1,84 @@ +export type AgentEventName = + | 'session_state' + | 'backend' + | 'prompt' + | 'reasoning_delta' + | 'text_delta' + | 'text_done' + | 'action' + | 'screenshot' + | 'turn_done' + | 'run_complete' + | 'error'; + +export interface AgentEvent { + event: AgentEventName; + data: Record; +} + +function toInt(value: unknown): number { + if (typeof value === 'number' && Number.isFinite(value)) return Math.trunc(value); + return 0; +} + +function truncate(text: string, max = 60): string { + return text.length > max ? `${text.slice(0, max - 3)}...` : text; +} + +export function describeAction(actionType: string, actionArgs: Record): string { + switch (actionType) { + case 'click': { + const x = toInt(actionArgs.x); + const y = toInt(actionArgs.y); + const button = typeof actionArgs.button === 'string' ? actionArgs.button : 'left'; + return button === 'left' ? `click(${x}, ${y})` : `click(${x}, ${y}, ${button})`; + } + case 'double_click': + return `double_click(${toInt(actionArgs.x)}, ${toInt(actionArgs.y)})`; + case 'type': { + const text = typeof actionArgs.text === 'string' ? actionArgs.text : ''; + return `type(${JSON.stringify(truncate(text))})`; + } + case 'keypress': { + const keys = Array.isArray(actionArgs.keys) ? actionArgs.keys : []; + const holdKeys = Array.isArray(actionArgs.hold_keys) ? actionArgs.hold_keys : []; + const serializedKeys = keys.filter((k): k is string => typeof k === 'string'); + const serializedHoldKeys = holdKeys.filter((k): k is string => typeof k === 'string'); + if (serializedHoldKeys.length > 0) { + return `keypress(hold=${JSON.stringify(serializedHoldKeys)}, keys=${JSON.stringify(serializedKeys)})`; + } + return `keypress(${JSON.stringify(serializedKeys)})`; + } + case 'scroll': + return `scroll(${toInt(actionArgs.x)}, ${toInt(actionArgs.y)}, dx=${toInt(actionArgs.scroll_x)}, dy=${toInt(actionArgs.scroll_y)})`; + case 'move': + return `move(${toInt(actionArgs.x)}, ${toInt(actionArgs.y)})`; + case 'drag': + return 'drag(...)'; + case 'wait': { + const ms = typeof actionArgs.ms === 'number' ? Math.trunc(actionArgs.ms) : 1000; + return `wait(${ms}ms)`; + } + case 'goto': { + const url = typeof actionArgs.url === 'string' ? actionArgs.url : ''; + return `goto(${JSON.stringify(url)})`; + } + case 'back': + return 'back()'; + case 'url': + return 'url()'; + case 'screenshot': + return 'screenshot()'; + default: + return actionType; + } +} + +export function describeBatchActions(actions: Array>): string { + const pieces = actions.map((action) => { + const actionType = typeof action.type === 'string' ? action.type : 'unknown'; + const { type: _ignored, ...actionArgs } = action; + return describeAction(actionType, actionArgs); + }); + return `batch[${pieces.join(' -> ')}]`; +} diff --git a/pkg/templates/typescript/openai-computer-use/lib/logging.ts b/pkg/templates/typescript/openai-computer-use/lib/logging.ts new file mode 100644 index 00000000..77eaf3c6 --- /dev/null +++ b/pkg/templates/typescript/openai-computer-use/lib/logging.ts @@ -0,0 +1,271 @@ +import type { AgentEvent } from './log-events'; + +const MAX_LINE_WIDTH = 120; + +function timestamp(): string { + return new Date().toISOString().slice(11, 23); +} + +function asString(value: unknown): string { + return typeof value === 'string' ? value : ''; +} + +function asNumber(value: unknown): number | null { + return typeof value === 'number' && Number.isFinite(value) ? value : null; +} + +function truncateOneLine(text: string, max = 90): string { + const singleLine = text.replace(/\s+/g, ' ').trim(); + return singleLine.length > max ? `${singleLine.slice(0, max - 3)}...` : singleLine; +} + +function formatKernelOp(op: string): string { + if (!op) return op; + if (op.includes('(') || op.includes('[')) return op; + return `${op}()`; +} + +export function emitBrowserNewStarted(onEvent: (event: AgentEvent) => void): void { + onEvent({ event: 'backend', data: { op: 'browsers.new' } }); +} + +export function emitBrowserNewDone( + onEvent: (event: AgentEvent) => void, + startedAtMs: number, + liveViewUrl?: string | null, +): void { + onEvent({ + event: 'backend', + data: { + op: 'browsers.new.done', + detail: liveViewUrl ?? '', + elapsed_ms: Date.now() - startedAtMs, + }, + }); +} + +export function emitSessionState( + onEvent: (event: AgentEvent) => void, + sessionId: string, + liveViewUrl?: string | null, +): void { + onEvent({ + event: 'session_state', + data: { session_id: sessionId, live_view_url: liveViewUrl ?? '' }, + }); +} + +export function emitBrowserDeleteStarted(onEvent: (event: AgentEvent) => void): void { + onEvent({ event: 'backend', data: { op: 'browsers.delete' } }); +} + +export function emitBrowserDeleteDone( + onEvent: (event: AgentEvent) => void, + startedAtMs: number, +): void { + onEvent({ + event: 'backend', + data: { + op: 'browsers.delete.done', + elapsed_ms: Date.now() - startedAtMs, + }, + }); +} + +class ThinkingSpinner { + private active = false; + private timer: NodeJS.Timeout | null = null; + private frame = 0; + private startAt = 0; + private startTS = ''; + private reasoning = ''; + + constructor(private readonly enabled: boolean) {} + + start(): void { + if (!this.enabled || this.active) return; + this.active = true; + this.frame = 0; + this.reasoning = ''; + this.startAt = Date.now(); + this.startTS = timestamp(); + this.timer = setInterval(() => this.redraw(), 100); + } + + addReasoning(text: string): void { + if (!this.active) return; + this.reasoning += text; + } + + stop(action?: string, opts?: { elapsedSeconds?: number }): void { + const elapsedSeconds = opts?.elapsedSeconds; + if (!this.active) { + if (action) { + const elapsedPrefix = + typeof elapsedSeconds === 'number' ? `[${elapsedSeconds.toFixed(3)}s] ` : ''; + process.stdout.write(`${timestamp()} agent> ${elapsedPrefix}${action}\n`); + } + return; + } + this.active = false; + if (this.timer) clearInterval(this.timer); + this.timer = null; + + const elapsed = + typeof elapsedSeconds === 'number' + ? elapsedSeconds.toFixed(3) + : ((Date.now() - this.startAt) / 1000).toFixed(3); + if (this.reasoning.trim()) { + const thinkingText = truncateOneLine(this.reasoning, 70); + const suffix = action ? ` -> ${action}` : ''; + process.stdout.write(`\r\x1b[2K${this.startTS} agent> [${elapsed}s] ${thinkingText}${suffix}\n`); + } else if (action) { + process.stdout.write(`\r\x1b[2K${this.startTS} agent> [${elapsed}s] ${action}\n`); + } else { + process.stdout.write(`\r\x1b[2K${this.startTS} agent> [${elapsed}s] thinking...\n`); + } + } + + private redraw(): void { + if (!this.active) return; + this.frame += 1; + const elapsed = ((Date.now() - this.startAt) / 1000).toFixed(3); + if (this.reasoning.trim()) { + const prefix = `${this.startTS} agent> [${elapsed}s] `; + const maxReasoningLen = Math.max(20, MAX_LINE_WIDTH - prefix.length); + const text = truncateOneLine(this.reasoning, maxReasoningLen); + process.stdout.write(`\r\x1b[2K${prefix}${text}`); + return; + } + const dots = '.'.repeat((this.frame % 3) + 1).padEnd(3, ' '); + process.stdout.write(`\r\x1b[2K${this.startTS} agent> [${elapsed}s] thinking${dots}`); + } +} + +export function createEventLogger(opts?: { verbose?: boolean }): (event: AgentEvent) => void { + const verbose = opts?.verbose ?? false; + + let inText = false; + let lastLiveViewUrl = ''; + const spinner = new ThinkingSpinner(process.stdout.isTTY); + + return (event: AgentEvent): void => { + const data = event.data; + switch (event.event) { + case 'session_state': { + const liveUrl = asString(data.live_view_url); + if (liveUrl && liveUrl !== lastLiveViewUrl) { + process.stdout.write(`${timestamp()} kernel> live view: ${liveUrl}\n`); + lastLiveViewUrl = liveUrl; + } + break; + } + case 'backend': { + const op = asString(data.op); + if (!op) break; + + if (inText) { + process.stdout.write('\n'); + inText = false; + } + + if (op === 'live_url') { + const detail = asString(data.detail); + if (detail && detail !== lastLiveViewUrl) { + process.stdout.write(`${timestamp()} kernel> live view: ${detail}\n`); + lastLiveViewUrl = detail; + } + break; + } + + if (op.endsWith('.done')) { + const baseOp = op.slice(0, -'.done'.length); + const displayOp = formatKernelOp(baseOp); + const detail = asString(data.detail); + const elapsedMs = asNumber(data.elapsed_ms); + const elapsed = elapsedMs === null ? '' : `[${(elapsedMs / 1000).toFixed(3)}s] `; + process.stdout.write( + `${timestamp()} kernel> ${elapsed}${displayOp}${detail ? ` ${detail}` : ''}\n`, + ); + if (baseOp === 'browsers.new' && detail) { + lastLiveViewUrl = detail; + } + break; + } + + if (verbose) process.stdout.write(`${timestamp()} kernel> ${op}\n`); + break; + } + case 'prompt': { + const text = asString(data.text); + if (text) process.stdout.write(`${timestamp()} user> ${text}\n`); + break; + } + case 'reasoning_delta': { + const text = asString(data.text); + if (process.stdout.isTTY) { + spinner.start(); + spinner.addReasoning(text); + } else if (verbose && text) { + process.stdout.write(`${timestamp()} agent> thinking: ${truncateOneLine(text)}\n`); + } + break; + } + case 'text_delta': { + spinner.stop(); + const text = asString(data.text); + if (!text) break; + if (!inText) { + process.stdout.write(`${timestamp()} agent> `); + inText = true; + } + process.stdout.write(text); + break; + } + case 'text_done': { + if (inText) { + process.stdout.write('\n'); + inText = false; + } + break; + } + case 'action': { + const actionType = asString(data.action_type); + const description = asString(data.description) || actionType; + const elapsedMs = asNumber(data.elapsed_ms); + const elapsedSeconds = elapsedMs === null ? undefined : elapsedMs / 1000; + if (inText) { + process.stdout.write('\n'); + inText = false; + } + spinner.stop(description, { elapsedSeconds }); + break; + } + case 'screenshot': { + if (verbose) process.stdout.write(`${timestamp()} debug> screenshot captured\n`); + break; + } + case 'turn_done': + case 'run_complete': { + spinner.stop(); + if (inText) { + process.stdout.write('\n'); + inText = false; + } + break; + } + case 'error': { + const message = asString(data.message) || 'unknown error'; + spinner.stop(); + if (inText) { + process.stdout.write('\n'); + inText = false; + } + process.stderr.write(`${timestamp()} error> ${message}\n`); + break; + } + default: + break; + } + }; +} diff --git a/pkg/templates/typescript/openai-computer-use/lib/playwright/base.ts b/pkg/templates/typescript/openai-computer-use/lib/playwright/base.ts deleted file mode 100644 index b43a7d2d..00000000 --- a/pkg/templates/typescript/openai-computer-use/lib/playwright/base.ts +++ /dev/null @@ -1,242 +0,0 @@ -import type { Browser, Page, Request, Response, Route } from 'playwright-core'; -import sharp from 'sharp'; -import utils from '../utils'; - -// CUA key -> Playwright key mapping -const KEY_MAP: Record = { - '/': '/', - '\\': '\\', - alt: 'Alt', - arrowdown: 'ArrowDown', - arrowleft: 'ArrowLeft', - arrowright: 'ArrowRight', - arrowup: 'ArrowUp', - backspace: 'Backspace', - capslock: 'CapsLock', - cmd: 'Meta', - ctrl: 'Control', - delete: 'Delete', - end: 'End', - enter: 'Enter', - esc: 'Escape', - home: 'Home', - insert: 'Insert', - option: 'Alt', - pagedown: 'PageDown', - pageup: 'PageUp', - shift: 'Shift', - space: ' ', - super: 'Meta', - tab: 'Tab', - win: 'Meta', -}; - -interface Point { - x: number; - y: number; -} - -export class BasePlaywrightComputer { - protected _browser: Browser | null = null; - protected _page: Page | null = null; - - constructor() { - this._browser = null; - this._page = null; - } - - /** - * Type guard to assert that this._page is present and is a Playwright Page. - * Throws an error if not present. - */ - protected _assertPage(): asserts this is { _page: Page } { - if (!this._page) { - throw new Error('Playwright Page is not initialized. Did you forget to call enter()?'); - } - } - - protected _handleNewPage = (page: Page): void => { - /** Handle the creation of a new page. */ - console.log('New page created'); - this._page = page; - page.on('close', this._handlePageClose.bind(this)); - }; - - protected _handlePageClose = (page: Page): void => { - /** Handle the closure of a page. */ - console.log('Page closed'); - try { - this._assertPage(); - } catch { - return; - } - if (this._page !== page) return; - - const browser = this._browser; - if (!browser || typeof browser.contexts !== 'function') { - console.log('Warning: Browser or context not available.'); - this._page = undefined as unknown as Page; - return; - } - - const contexts = browser.contexts(); - if (!contexts.length) { - console.log('Warning: No browser contexts available.'); - this._page = undefined as unknown as Page; - return; - } - - const context = contexts[0]; - if (!context || typeof context.pages !== 'function') { - console.log('Warning: Context pages not available.'); - this._page = undefined as unknown as Page; - return; - } - - const pages = context.pages(); - if (pages.length) { - this._page = pages[pages.length - 1] as Page; - } else { - console.log('Warning: All pages have been closed.'); - this._page = undefined as unknown as Page; - } - }; - - // Subclass hook - protected _getBrowserAndPage = async (): Promise<[Browser, Page]> => { - // Subclasses must implement, returning [Browser, Page] - throw new Error('Subclasses must implement _getBrowserAndPage()'); - }; - - getEnvironment = (): 'windows' | 'mac' | 'linux' | 'ubuntu' | 'browser' => { - return 'browser'; - }; - - getDimensions = (): [number, number] => { - return [1024, 768]; - }; - - enter = async (): Promise => { - // Call the subclass hook for getting browser/page - [this._browser, this._page] = await this._getBrowserAndPage(); - - // Set up network interception to flag URLs matching domains in BLOCKED_DOMAINS - const handleRoute = (route: Route, request: Request): void => { - const url = request.url(); - if (utils.checkBlocklistedUrl(url)) { - console.log(`Flagging blocked domain: ${url}`); - route.abort(); - } else { - route.continue(); - } - }; - - this._assertPage(); - await this._page.route('**/*', handleRoute); - return this; - }; - - exit = async (): Promise => { - if (this._browser) await this._browser.close(); - }; - - getCurrentUrl = (): string => { - this._assertPage(); - return this._page.url(); - }; - - screenshot = async (): Promise => { - this._assertPage(); - const buf = await this._page.screenshot({ fullPage: false }); - const webp = await sharp(buf).webp().toBuffer(); - return webp.toString('base64'); - }; - - click = async ( - button: 'left' | 'right' | 'back' | 'forward' | 'wheel', - x: number, - y: number, - ): Promise => { - this._assertPage(); - switch (button) { - case 'back': - await this.back(); - return; - case 'forward': - await this.forward(); - return; - case 'wheel': - await this._page.mouse.wheel(x, y); - return; - default: { - const btn = button === 'right' ? 'right' : 'left'; - await this._page.mouse.click(x, y, { button: btn }); - return; - } - } - }; - - doubleClick = async (x: number, y: number): Promise => { - this._assertPage(); - await this._page.mouse.dblclick(x, y); - }; - - scroll = async (x: number, y: number, scrollX: number, scrollY: number): Promise => { - this._assertPage(); - await this._page.mouse.move(x, y); - await this._page.evaluate( - (params: { dx: number; dy: number }) => window.scrollBy(params.dx, params.dy), - { dx: scrollX, dy: scrollY }, - ); - }; - - type = async (text: string): Promise => { - this._assertPage(); - await this._page.keyboard.type(text); - }; - - keypress = async (keys: string[]): Promise => { - this._assertPage(); - const mapped = keys.map((k) => KEY_MAP[k.toLowerCase()] ?? k); - for (const k of mapped) await this._page.keyboard.down(k); - for (const k of [...mapped].reverse()) await this._page.keyboard.up(k); - }; - - wait = async (ms = 1000): Promise => { - await new Promise((resolve) => setTimeout(resolve, ms)); - }; - - move = async (x: number, y: number): Promise => { - this._assertPage(); - await this._page.mouse.move(x, y); - }; - - drag = async (path: Point[]): Promise => { - this._assertPage(); - const first = path[0]; - if (!first) return; - await this._page.mouse.move(first.x, first.y); - await this._page.mouse.down(); - for (const pt of path.slice(1)) await this._page.mouse.move(pt.x, pt.y); - await this._page.mouse.up(); - }; - - goto = async (url: string): Promise => { - this._assertPage(); - try { - return await this._page.goto(url); - } catch { - return null; - } - }; - - back = async (): Promise => { - this._assertPage(); - return (await this._page.goBack()) || null; - }; - - forward = async (): Promise => { - this._assertPage(); - return (await this._page.goForward()) || null; - }; -} diff --git a/pkg/templates/typescript/openai-computer-use/lib/playwright/kernel.ts b/pkg/templates/typescript/openai-computer-use/lib/playwright/kernel.ts deleted file mode 100644 index 4dd0c869..00000000 --- a/pkg/templates/typescript/openai-computer-use/lib/playwright/kernel.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { chromium, type Browser, type Page } from 'playwright-core'; -import { BasePlaywrightComputer } from './base'; - -/** - * KernelPlaywrightComputer connects to a remote browser instance via CDP WebSocket URL. - * Similar to LocalPlaywrightComputer but uses an existing browser instance instead of launching one. - */ -export class KernelPlaywrightComputer extends BasePlaywrightComputer { - private cdp_ws_url: string; - - constructor(cdp_ws_url: string) { - super(); - this.cdp_ws_url = cdp_ws_url; - } - - _getBrowserAndPage = async (): Promise<[Browser, Page]> => { - const [width, height] = this.getDimensions(); - - // Connect to existing browser instance via CDP - const browser = await chromium.connectOverCDP(this.cdp_ws_url); - - // Get existing context or create new one - let context = browser.contexts()[0]; - if (!context) { - context = await browser.newContext(); - } - - // Add event listeners for page creation and closure - context.on('page', this._handleNewPage.bind(this)); - - // Get existing page or create new one - let page = context.pages()[0]; - if (!page) { - page = await context.newPage(); - } - - // Set viewport size - await page.setViewportSize({ width, height }); - page.on('close', this._handlePageClose.bind(this)); - - return [browser, page]; - }; -} diff --git a/pkg/templates/typescript/openai-computer-use/lib/playwright/local.ts b/pkg/templates/typescript/openai-computer-use/lib/playwright/local.ts deleted file mode 100644 index d0437801..00000000 --- a/pkg/templates/typescript/openai-computer-use/lib/playwright/local.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { chromium, type Browser, type Page } from 'playwright-core'; -import { BasePlaywrightComputer } from './base'; - -/** - * Launches a local Chromium instance using Playwright. - */ -export class LocalPlaywrightComputer extends BasePlaywrightComputer { - private headless: boolean; - - constructor(headless = false) { - super(); - this.headless = headless; - } - - _getBrowserAndPage = async (): Promise<[Browser, Page]> => { - const [width, height] = this.getDimensions(); - const launchArgs = [ - `--window-size=${width},${height}`, - '--disable-extensions', - '--disable-file-system', - ]; - - const browser = await chromium.launch({ - headless: this.headless, - args: launchArgs, - env: { DISPLAY: ':0' }, - }); - - const context = await browser.newContext(); - - // Add event listeners for page creation and closure - context.on('page', this._handleNewPage.bind(this)); - - const page = await context.newPage(); - await page.setViewportSize({ width, height }); - page.on('close', this._handlePageClose.bind(this)); - - await page.goto('https://duckduckgo.com'); - - // console.dir({debug_getBrowserAndPage: [browser, page]}); - return [browser, page]; - }; -} diff --git a/pkg/templates/typescript/openai-computer-use/lib/replay.ts b/pkg/templates/typescript/openai-computer-use/lib/replay.ts new file mode 100644 index 00000000..6858d9b7 --- /dev/null +++ b/pkg/templates/typescript/openai-computer-use/lib/replay.ts @@ -0,0 +1,114 @@ +import type { Kernel } from '@onkernel/sdk'; +import type { AgentEvent } from './log-events'; + +const DEFAULT_REPLAY_GRACE_MS = 5000; +const REPLAY_PROCESSING_DELAY_MS = 2000; +const REPLAY_POLL_TIMEOUT_MS = 60000; +const REPLAY_POLL_INTERVAL_MS = 1000; + +type EventLogger = (event: AgentEvent) => void; + +export interface ReplayState { + enabled: boolean; + replayId: string | null; + replayViewUrl: string | null; +} + +export async function maybeStartReplay( + kernel: Kernel, + sessionId: string, + opts?: { + enabled?: boolean; + onEvent?: EventLogger; + }, +): Promise { + const enabled = opts?.enabled ?? false; + const state: ReplayState = { + enabled, + replayId: null, + replayViewUrl: null, + }; + + if (!enabled) return state; + + const startedAtMs = Date.now(); + opts?.onEvent?.({ event: 'backend', data: { op: 'browsers.replays.start' } }); + try { + const replay = await kernel.browsers.replays.start(sessionId); + state.replayId = replay.replay_id ?? null; + opts?.onEvent?.({ + event: 'backend', + data: { + op: 'browsers.replays.start.done', + detail: state.replayId ?? '', + elapsed_ms: Date.now() - startedAtMs, + }, + }); + } catch (error) { + console.warn(`Warning: failed to start replay recording: ${String(error)}`); + console.warn('Continuing without replay recording.'); + state.enabled = false; + } + + return state; +} + +export async function maybeStopReplay( + kernel: Kernel, + sessionId: string, + replay: ReplayState, + opts?: { + onEvent?: EventLogger; + gracePeriodMs?: number; + }, +): Promise { + if (!replay.enabled || !replay.replayId) return replay.replayViewUrl; + + const gracePeriodMs = opts?.gracePeriodMs ?? DEFAULT_REPLAY_GRACE_MS; + if (gracePeriodMs > 0) { + await sleep(gracePeriodMs); + } + + const startedAtMs = Date.now(); + opts?.onEvent?.({ event: 'backend', data: { op: 'browsers.replays.stop' } }); + try { + await kernel.browsers.replays.stop(replay.replayId, { id: sessionId }); + await sleep(REPLAY_PROCESSING_DELAY_MS); + + const pollStartedAt = Date.now(); + while (Date.now() - pollStartedAt < REPLAY_POLL_TIMEOUT_MS) { + try { + const replays = await kernel.browsers.replays.list(sessionId); + const matchingReplay = replays.find((item) => item.replay_id === replay.replayId); + if (matchingReplay) { + replay.replayViewUrl = matchingReplay.replay_view_url ?? null; + break; + } + } catch { + // Ignore transient polling errors while the replay finishes processing. + } + await sleep(REPLAY_POLL_INTERVAL_MS); + } + + opts?.onEvent?.({ + event: 'backend', + data: { + op: 'browsers.replays.stop.done', + detail: replay.replayViewUrl ?? replay.replayId ?? '', + elapsed_ms: Date.now() - startedAtMs, + }, + }); + + if (!replay.replayViewUrl) { + console.warn('Warning: replay may still be processing.'); + } + } catch (error) { + console.warn(`Warning: failed to stop replay recording cleanly: ${String(error)}`); + } + + return replay.replayViewUrl; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/pkg/templates/typescript/openai-computer-use/lib/toolset.ts b/pkg/templates/typescript/openai-computer-use/lib/toolset.ts index 2999d0bd..aa43b9f1 100644 --- a/pkg/templates/typescript/openai-computer-use/lib/toolset.ts +++ b/pkg/templates/typescript/openai-computer-use/lib/toolset.ts @@ -1,40 +1,117 @@ -const shared = [ - { - type: 'function', - name: 'goto', - description: 'Go to a specific URL.', - parameters: { - type: 'object', - properties: { - url: { - type: 'string', - description: 'Fully qualified URL to navigate to.', +export const batchInstructions = `You have three ways to perform actions: +1. The standard computer tool — use for single actions when you need screenshot feedback after each step. +2. batch_computer_actions — use to execute multiple actions at once when you can predict the outcome. +3. computer_use_extra — use high-level browser actions: goto, back, and url. + +ALWAYS prefer batch_computer_actions when performing predictable sequences like: +- Clicking a text field, typing text, and pressing Enter +- Any sequence where you don't need to see intermediate results + +Use computer_use_extra for: +- action="goto" only when changing the page URL +- action="back" to go back in history +- action="url" to read the exact current URL + +When interacting with page content (search boxes, forms, chat inputs): +- Click the target input first, then type. +- Do not use URL-navigation actions for in-page text entry. + +For drag actions in batch_computer_actions: +- Always include a path field. +- path must be an array of at least two points. +- If one drag is likely to change the position, order, or layout of other targets, do not batch multiple drags together. +- In those cases, prefer one drag at a time and inspect the updated screenshot before planning the next drag. +- Each point must be an object like {"x": 123, "y": 456}.`; + +export const batchComputerTool = { + type: 'function' as const, + name: 'batch_computer_actions', + description: + 'Execute multiple computer actions in sequence without waiting for ' + + 'screenshots between them. Use this when you can predict the outcome of a ' + + 'sequence of actions without needing intermediate visual feedback. After all ' + + 'actions execute, a single screenshot is taken and returned.\n\n' + + 'PREFER this over individual computer actions when:\n' + + '- Typing text followed by pressing Enter\n' + + '- Clicking a field and then typing into it\n' + + "- Any sequence where intermediate screenshots aren't needed\n\n" + + 'Constraint: return-value actions (url, screenshot) can appear at most once ' + + 'and only as the final action in the batch.', + parameters: { + type: 'object', + properties: { + actions: { + type: 'array', + description: 'Ordered list of actions to execute', + items: { + type: 'object', + properties: { + type: { + type: 'string', + enum: [ + 'click', + 'double_click', + 'type', + 'keypress', + 'scroll', + 'move', + 'drag', + 'wait', + 'goto', + 'back', + 'url', + 'screenshot', + ], + }, + x: { type: 'number' }, + y: { type: 'number' }, + text: { type: 'string' }, + url: { type: 'string' }, + keys: { type: 'array', items: { type: 'string' } }, + hold_keys: { type: 'array', items: { type: 'string' } }, + button: { type: 'string' }, + scroll_x: { type: 'number' }, + scroll_y: { type: 'number' }, + path: { + type: 'array', + description: 'Required for drag actions. Provide at least two points as objects with x/y coordinates.', + items: { + type: 'object', + properties: { + x: { type: 'number' }, + y: { type: 'number' }, + }, + required: ['x', 'y'], + }, + }, + }, + required: ['type'], }, }, - additionalProperties: false, - required: ['url'], }, + required: ['actions'], }, - { - type: 'function', - name: 'back', - description: 'Navigate back in the browser history.', - parameters: { - type: 'object', - properties: {}, - additionalProperties: false, - }, - }, - { - type: 'function', - name: 'forward', - description: 'Navigate forward in the browser history.', - parameters: { - type: 'object', - properties: {}, - additionalProperties: false, + strict: false, +}; + +export const computerUseExtraTool = { + type: 'function' as const, + name: 'computer_use_extra', + description: 'High-level browser actions for navigation and URL retrieval.', + parameters: { + type: 'object', + properties: { + action: { + type: 'string', + enum: ['goto', 'back', 'url'], + description: 'Action to perform: goto, back, or url.', + }, + url: { + type: 'string', + description: 'Required when action is goto. Fully qualified URL to navigate to.', + }, }, + required: ['action'], }, -]; - -export default { shared }; + strict: false, +}; diff --git a/pkg/templates/typescript/openai-computer-use/lib/utils.ts b/pkg/templates/typescript/openai-computer-use/lib/utils.ts index f2dc0fd5..9a3134bf 100644 --- a/pkg/templates/typescript/openai-computer-use/lib/utils.ts +++ b/pkg/templates/typescript/openai-computer-use/lib/utils.ts @@ -1,7 +1,9 @@ -import 'dotenv/config'; -import sharp from 'sharp'; +import * as dotenv from 'dotenv'; import OpenAI from 'openai'; import { type ResponseItem } from 'openai/resources/responses/responses'; + +dotenv.config({ override: true, quiet: true }); + const openai = new OpenAI(); const BLOCKED_DOMAINS: readonly string[] = [ @@ -13,13 +15,6 @@ const BLOCKED_DOMAINS: readonly string[] = [ 'ilanbigio.com', ] as const; -export async function calculateImageDimensions( - base64Image: string, -): Promise<{ width: number; height: number }> { - const buf = Buffer.from(base64Image, 'base64'); - const meta = await sharp(buf).metadata(); - return { width: meta.width ?? 0, height: meta.height ?? 0 }; -} export function sanitizeMessage(msg: ResponseItem): ResponseItem { const sanitizedMsg = { ...msg } as ResponseItem; if ( @@ -33,28 +28,96 @@ export function sanitizeMessage(msg: ResponseItem): ResponseItem { output.image_url = '[omitted]'; } } + if ( + sanitizedMsg.type === 'function_call_output' && + Array.isArray((sanitizedMsg as { output?: unknown }).output) + ) { + const outputItems = (sanitizedMsg as unknown as { output: Array<{ type?: unknown; image_url?: unknown }> }).output; + sanitizedMsg.output = outputItems.map((item) => { + if (item.type === 'input_image' && typeof item.image_url === 'string') { + return { ...item, image_url: '[omitted]' }; + } + return item; + }) as typeof sanitizedMsg.output; + } return sanitizedMsg; } export async function createResponse( params: OpenAI.Responses.ResponseCreateParams, ): Promise<{ output?: OpenAI.Responses.ResponseOutputItem[] }> { - try { - const response = await openai.responses.create(params); - return 'output' in response ? response : { output: undefined }; - } catch (err: unknown) { - console.error((err as Error).message); - throw err; + const maxAttempts = Number(process.env.OPENAI_RETRY_MAX_ATTEMPTS ?? '4'); + const baseDelaySeconds = Number(process.env.OPENAI_RETRY_BASE_DELAY_SECONDS ?? '0.5'); + + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + try { + const response = await openai.responses.create(params); + return 'output' in response ? response : { output: undefined }; + } catch (err: unknown) { + const status = getErrorStatus(err); + const retryable = isRetryableError(err); + const message = getErrorMessage(err); + + if (!retryable || attempt >= maxAttempts) { + console.error(message); + throw err; + } + + const delayMs = baseDelaySeconds * 1000 * 2 ** (attempt - 1); + const label = status === null ? 'OpenAI request failed' : `OpenAI server error ${status}`; + console.warn( + `Warning: ${label}; retrying in ${(delayMs / 1000).toFixed(1)}s (${attempt}/${maxAttempts})`, + ); + await sleep(delayMs); + } } + throw new Error('OpenAI request failed unexpectedly'); +} + +function getErrorStatus(err: unknown): number | null { + if (typeof err !== 'object' || err === null) return null; + if (!('status' in err)) return null; + const status = (err as { status?: unknown }).status; + return typeof status === 'number' ? status : null; +} + +function getErrorMessage(err: unknown): string { + if (err instanceof Error && err.message) return err.message; + return String(err); +} + +function isRetryableError(err: unknown): boolean { + const status = getErrorStatus(err); + if (status !== null) return status >= 500; + + const msg = getErrorMessage(err).toLowerCase(); + return ( + msg.includes('fetch failed') || + msg.includes('network') || + msg.includes('econnreset') || + msg.includes('etimedout') || + msg.includes('timeout') + ); } -export function checkBlocklistedUrl(url: string): boolean { - const host = new URL(url).hostname; - return BLOCKED_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`)); +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +export function checkBlocklistedUrl(url: string): void { + try { + const host = new URL(url).hostname; + if (BLOCKED_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`))) { + throw new Error(`Blocked URL: ${url}`); + } + } catch (error) { + if (error instanceof Error && error.message.startsWith('Blocked URL:')) { + throw error; + } + } } export default { - calculateImageDimensions, sanitizeMessage, createResponse, checkBlocklistedUrl, diff --git a/pkg/templates/typescript/openai-computer-use/package.json b/pkg/templates/typescript/openai-computer-use/package.json index bdfa99dc..b9371e19 100644 --- a/pkg/templates/typescript/openai-computer-use/package.json +++ b/pkg/templates/typescript/openai-computer-use/package.json @@ -2,17 +2,17 @@ "type": "module", "private": true, "scripts": { - "build": "tsc" + "build": "tsc", + "test:local": "npx tsx run_local.ts" }, "dependencies": { - "@onkernel/sdk": "^0.23.0", + "@onkernel/sdk": "^0.43.0", "dotenv": "^17.2.3", - "openai": "^6.13.0", - "playwright-core": "^1.57.0", - "sharp": "^0.34.5" + "openai": "^6.13.0" }, "devDependencies": { "@types/node": "^22.15.17", + "tsx": "^4.19.0", "typescript": "^5.9.3" } } diff --git a/pkg/templates/typescript/openai-computer-use/pnpm-lock.yaml b/pkg/templates/typescript/openai-computer-use/pnpm-lock.yaml index c3737350..28304dd1 100644 --- a/pkg/templates/typescript/openai-computer-use/pnpm-lock.yaml +++ b/pkg/templates/typescript/openai-computer-use/pnpm-lock.yaml @@ -9,186 +9,208 @@ importers: .: dependencies: '@onkernel/sdk': - specifier: ^0.23.0 - version: 0.23.0 + specifier: ^0.43.0 + version: 0.43.0 dotenv: specifier: ^17.2.3 - version: 17.2.3 + version: 17.3.1 openai: specifier: ^6.13.0 - version: 6.13.0 - playwright-core: - specifier: ^1.57.0 - version: 1.57.0 - sharp: - specifier: ^0.34.5 - version: 0.34.5 + version: 6.25.0 devDependencies: '@types/node': specifier: ^22.15.17 - version: 22.19.3 + version: 22.19.11 + tsx: + specifier: ^4.19.0 + version: 4.21.0 typescript: specifier: ^5.9.3 version: 5.9.3 packages: - '@emnapi/runtime@1.7.1': - resolution: {integrity: sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA==} + '@esbuild/aix-ppc64@0.27.3': + resolution: {integrity: sha512-9fJMTNFTWZMh5qwrBItuziu834eOCUcEqymSH7pY+zoMVEZg3gcPuBNxH1EvfVYe9h0x/Ptw8KBzv7qxb7l8dg==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + + '@esbuild/android-arm64@0.27.3': + resolution: {integrity: sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] - '@img/colour@1.0.0': - resolution: {integrity: sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==} + '@esbuild/android-arm@0.27.3': + resolution: {integrity: sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA==} engines: {node: '>=18'} + cpu: [arm] + os: [android] - '@img/sharp-darwin-arm64@0.34.5': - resolution: {integrity: sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/android-x64@0.27.3': + resolution: {integrity: sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + + '@esbuild/darwin-arm64@0.27.3': + resolution: {integrity: sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg==} + engines: {node: '>=18'} cpu: [arm64] os: [darwin] - '@img/sharp-darwin-x64@0.34.5': - resolution: {integrity: sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/darwin-x64@0.27.3': + resolution: {integrity: sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg==} + engines: {node: '>=18'} cpu: [x64] os: [darwin] - '@img/sharp-libvips-darwin-arm64@1.2.4': - resolution: {integrity: sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==} + '@esbuild/freebsd-arm64@0.27.3': + resolution: {integrity: sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w==} + engines: {node: '>=18'} cpu: [arm64] - os: [darwin] + os: [freebsd] - '@img/sharp-libvips-darwin-x64@1.2.4': - resolution: {integrity: sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==} + '@esbuild/freebsd-x64@0.27.3': + resolution: {integrity: sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA==} + engines: {node: '>=18'} cpu: [x64] - os: [darwin] + os: [freebsd] - '@img/sharp-libvips-linux-arm64@1.2.4': - resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} + '@esbuild/linux-arm64@0.27.3': + resolution: {integrity: sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==} + engines: {node: '>=18'} cpu: [arm64] os: [linux] - '@img/sharp-libvips-linux-arm@1.2.4': - resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} + '@esbuild/linux-arm@0.27.3': + resolution: {integrity: sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw==} + engines: {node: '>=18'} cpu: [arm] os: [linux] - '@img/sharp-libvips-linux-ppc64@1.2.4': - resolution: {integrity: sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==} + '@esbuild/linux-ia32@0.27.3': + resolution: {integrity: sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + + '@esbuild/linux-loong64@0.27.3': + resolution: {integrity: sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + + '@esbuild/linux-mips64el@0.27.3': + resolution: {integrity: sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + + '@esbuild/linux-ppc64@0.27.3': + resolution: {integrity: sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA==} + engines: {node: '>=18'} cpu: [ppc64] os: [linux] - '@img/sharp-libvips-linux-riscv64@1.2.4': - resolution: {integrity: sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==} + '@esbuild/linux-riscv64@0.27.3': + resolution: {integrity: sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ==} + engines: {node: '>=18'} cpu: [riscv64] os: [linux] - '@img/sharp-libvips-linux-s390x@1.2.4': - resolution: {integrity: sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==} + '@esbuild/linux-s390x@0.27.3': + resolution: {integrity: sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw==} + engines: {node: '>=18'} cpu: [s390x] os: [linux] - '@img/sharp-libvips-linux-x64@1.2.4': - resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} + '@esbuild/linux-x64@0.27.3': + resolution: {integrity: sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA==} + engines: {node: '>=18'} cpu: [x64] os: [linux] - '@img/sharp-libvips-linuxmusl-arm64@1.2.4': - resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} + '@esbuild/netbsd-arm64@0.27.3': + resolution: {integrity: sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA==} + engines: {node: '>=18'} cpu: [arm64] - os: [linux] + os: [netbsd] - '@img/sharp-libvips-linuxmusl-x64@1.2.4': - resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} + '@esbuild/netbsd-x64@0.27.3': + resolution: {integrity: sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA==} + engines: {node: '>=18'} cpu: [x64] - os: [linux] + os: [netbsd] - '@img/sharp-linux-arm64@0.34.5': - resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/openbsd-arm64@0.27.3': + resolution: {integrity: sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw==} + engines: {node: '>=18'} cpu: [arm64] - os: [linux] + os: [openbsd] - '@img/sharp-linux-arm@0.34.5': - resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm] - os: [linux] - - '@img/sharp-linux-ppc64@0.34.5': - resolution: {integrity: sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [ppc64] - os: [linux] - - '@img/sharp-linux-riscv64@0.34.5': - resolution: {integrity: sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [riscv64] - os: [linux] - - '@img/sharp-linux-s390x@0.34.5': - resolution: {integrity: sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [s390x] - os: [linux] - - '@img/sharp-linux-x64@0.34.5': - resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/openbsd-x64@0.27.3': + resolution: {integrity: sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ==} + engines: {node: '>=18'} cpu: [x64] - os: [linux] + os: [openbsd] - '@img/sharp-linuxmusl-arm64@0.34.5': - resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/openharmony-arm64@0.27.3': + resolution: {integrity: sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g==} + engines: {node: '>=18'} cpu: [arm64] - os: [linux] + os: [openharmony] - '@img/sharp-linuxmusl-x64@0.34.5': - resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/sunos-x64@0.27.3': + resolution: {integrity: sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==} + engines: {node: '>=18'} cpu: [x64] - os: [linux] + os: [sunos] - '@img/sharp-wasm32@0.34.5': - resolution: {integrity: sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [wasm32] - - '@img/sharp-win32-arm64@0.34.5': - resolution: {integrity: sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/win32-arm64@0.27.3': + resolution: {integrity: sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==} + engines: {node: '>=18'} cpu: [arm64] os: [win32] - '@img/sharp-win32-ia32@0.34.5': - resolution: {integrity: sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/win32-ia32@0.27.3': + resolution: {integrity: sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==} + engines: {node: '>=18'} cpu: [ia32] os: [win32] - '@img/sharp-win32-x64@0.34.5': - resolution: {integrity: sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + '@esbuild/win32-x64@0.27.3': + resolution: {integrity: sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==} + engines: {node: '>=18'} cpu: [x64] os: [win32] - '@onkernel/sdk@0.23.0': - resolution: {integrity: sha512-P/ez6HU8sO2QvqWATkvC+Wdv+fgto4KfBCHLl2T6EUpoU3LhgOZ/sJP2ZRf/vh5Vh7QR2Vf05RgMaFcIGBGD9Q==} - - '@types/node@22.19.3': - resolution: {integrity: sha512-1N9SBnWYOJTrNZCdh/yJE+t910Y128BoyY+zBLWhL3r0TYzlTmFdXrPwHL9DyFZmlEXNQQolTZh3KHV31QDhyA==} + '@onkernel/sdk@0.43.0': + resolution: {integrity: sha512-pvveMdVCzjtVqNeLI+yk+VBTMaIvRe/jevvKJqnHl2svlDxvT7Z0mNFeiAWsDLeh1TQL92aWEKZoyEVxRniO9w==} - detect-libc@2.1.2: - resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} - engines: {node: '>=8'} + '@types/node@22.19.11': + resolution: {integrity: sha512-BH7YwL6rA93ReqeQS1c4bsPpcfOmJasG+Fkr6Y59q83f9M1WcBRHR2vM+P9eOisYRcN3ujQoiZY8uk5W+1WL8w==} - dotenv@17.2.3: - resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==} + dotenv@17.3.1: + resolution: {integrity: sha512-IO8C/dzEb6O3F9/twg6ZLXz164a2fhTnEWb95H23Dm4OuN+92NmEAlTrupP9VW6Jm3sO26tQlqyvyi4CsnY9GA==} engines: {node: '>=12'} - openai@6.13.0: - resolution: {integrity: sha512-yHbMo+EpNGPG3sRrXvmo0LhUPFN4bAURJw3G17bE+ax1G4tcTFCa9ZjvCWh3cvni0aHY0uWlk2IxcsPH4NR9Ow==} + esbuild@0.27.3: + resolution: {integrity: sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==} + engines: {node: '>=18'} + hasBin: true + + fsevents@2.3.3: + resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + + get-tsconfig@4.13.6: + resolution: {integrity: sha512-shZT/QMiSHc/YBLxxOkMtgSid5HFoauqCE3/exfsEcwg1WkeqjG+V40yBbBrsD+jW2HDXcs28xOfcbm2jI8Ddw==} + + openai@6.25.0: + resolution: {integrity: sha512-mEh6VZ2ds2AGGokWARo18aPISI1OhlgdEIC1ewhkZr8pSIT31dec0ecr9Nhxx0JlybyOgoAT1sWeKtwPZzJyww==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -199,23 +221,14 @@ packages: zod: optional: true - playwright-core@1.57.0: - resolution: {integrity: sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==} - engines: {node: '>=18'} - hasBin: true + resolve-pkg-maps@1.0.0: + resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==} - semver@7.7.3: - resolution: {integrity: sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==} - engines: {node: '>=10'} + tsx@4.21.0: + resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} + engines: {node: '>=18.0.0'} hasBin: true - sharp@0.34.5: - resolution: {integrity: sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - - tslib@2.8.1: - resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} - typescript@5.9.3: resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} engines: {node: '>=14.17'} @@ -226,156 +239,138 @@ packages: snapshots: - '@emnapi/runtime@1.7.1': - dependencies: - tslib: 2.8.1 + '@esbuild/aix-ppc64@0.27.3': optional: true - '@img/colour@1.0.0': {} + '@esbuild/android-arm64@0.27.3': + optional: true - '@img/sharp-darwin-arm64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-darwin-arm64': 1.2.4 + '@esbuild/android-arm@0.27.3': optional: true - '@img/sharp-darwin-x64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-darwin-x64': 1.2.4 + '@esbuild/android-x64@0.27.3': optional: true - '@img/sharp-libvips-darwin-arm64@1.2.4': + '@esbuild/darwin-arm64@0.27.3': optional: true - '@img/sharp-libvips-darwin-x64@1.2.4': + '@esbuild/darwin-x64@0.27.3': optional: true - '@img/sharp-libvips-linux-arm64@1.2.4': + '@esbuild/freebsd-arm64@0.27.3': optional: true - '@img/sharp-libvips-linux-arm@1.2.4': + '@esbuild/freebsd-x64@0.27.3': optional: true - '@img/sharp-libvips-linux-ppc64@1.2.4': + '@esbuild/linux-arm64@0.27.3': optional: true - '@img/sharp-libvips-linux-riscv64@1.2.4': + '@esbuild/linux-arm@0.27.3': optional: true - '@img/sharp-libvips-linux-s390x@1.2.4': + '@esbuild/linux-ia32@0.27.3': optional: true - '@img/sharp-libvips-linux-x64@1.2.4': + '@esbuild/linux-loong64@0.27.3': optional: true - '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + '@esbuild/linux-mips64el@0.27.3': optional: true - '@img/sharp-libvips-linuxmusl-x64@1.2.4': + '@esbuild/linux-ppc64@0.27.3': optional: true - '@img/sharp-linux-arm64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-arm64': 1.2.4 + '@esbuild/linux-riscv64@0.27.3': optional: true - '@img/sharp-linux-arm@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-arm': 1.2.4 + '@esbuild/linux-s390x@0.27.3': optional: true - '@img/sharp-linux-ppc64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-ppc64': 1.2.4 + '@esbuild/linux-x64@0.27.3': optional: true - '@img/sharp-linux-riscv64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-riscv64': 1.2.4 + '@esbuild/netbsd-arm64@0.27.3': optional: true - '@img/sharp-linux-s390x@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-s390x': 1.2.4 + '@esbuild/netbsd-x64@0.27.3': optional: true - '@img/sharp-linux-x64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-x64': 1.2.4 + '@esbuild/openbsd-arm64@0.27.3': optional: true - '@img/sharp-linuxmusl-arm64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 + '@esbuild/openbsd-x64@0.27.3': optional: true - '@img/sharp-linuxmusl-x64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linuxmusl-x64': 1.2.4 + '@esbuild/openharmony-arm64@0.27.3': optional: true - '@img/sharp-wasm32@0.34.5': - dependencies: - '@emnapi/runtime': 1.7.1 + '@esbuild/sunos-x64@0.27.3': optional: true - '@img/sharp-win32-arm64@0.34.5': + '@esbuild/win32-arm64@0.27.3': optional: true - '@img/sharp-win32-ia32@0.34.5': + '@esbuild/win32-ia32@0.27.3': optional: true - '@img/sharp-win32-x64@0.34.5': + '@esbuild/win32-x64@0.27.3': optional: true - '@onkernel/sdk@0.23.0': {} + '@onkernel/sdk@0.43.0': {} - '@types/node@22.19.3': + '@types/node@22.19.11': dependencies: undici-types: 6.21.0 - detect-libc@2.1.2: {} + dotenv@17.3.1: {} - dotenv@17.2.3: {} + esbuild@0.27.3: + optionalDependencies: + '@esbuild/aix-ppc64': 0.27.3 + '@esbuild/android-arm': 0.27.3 + '@esbuild/android-arm64': 0.27.3 + '@esbuild/android-x64': 0.27.3 + '@esbuild/darwin-arm64': 0.27.3 + '@esbuild/darwin-x64': 0.27.3 + '@esbuild/freebsd-arm64': 0.27.3 + '@esbuild/freebsd-x64': 0.27.3 + '@esbuild/linux-arm': 0.27.3 + '@esbuild/linux-arm64': 0.27.3 + '@esbuild/linux-ia32': 0.27.3 + '@esbuild/linux-loong64': 0.27.3 + '@esbuild/linux-mips64el': 0.27.3 + '@esbuild/linux-ppc64': 0.27.3 + '@esbuild/linux-riscv64': 0.27.3 + '@esbuild/linux-s390x': 0.27.3 + '@esbuild/linux-x64': 0.27.3 + '@esbuild/netbsd-arm64': 0.27.3 + '@esbuild/netbsd-x64': 0.27.3 + '@esbuild/openbsd-arm64': 0.27.3 + '@esbuild/openbsd-x64': 0.27.3 + '@esbuild/openharmony-arm64': 0.27.3 + '@esbuild/sunos-x64': 0.27.3 + '@esbuild/win32-arm64': 0.27.3 + '@esbuild/win32-ia32': 0.27.3 + '@esbuild/win32-x64': 0.27.3 + + fsevents@2.3.3: + optional: true - openai@6.13.0: {} + get-tsconfig@4.13.6: + dependencies: + resolve-pkg-maps: 1.0.0 - playwright-core@1.57.0: {} + openai@6.25.0: {} - semver@7.7.3: {} + resolve-pkg-maps@1.0.0: {} - sharp@0.34.5: + tsx@4.21.0: dependencies: - '@img/colour': 1.0.0 - detect-libc: 2.1.2 - semver: 7.7.3 + esbuild: 0.27.3 + get-tsconfig: 4.13.6 optionalDependencies: - '@img/sharp-darwin-arm64': 0.34.5 - '@img/sharp-darwin-x64': 0.34.5 - '@img/sharp-libvips-darwin-arm64': 1.2.4 - '@img/sharp-libvips-darwin-x64': 1.2.4 - '@img/sharp-libvips-linux-arm': 1.2.4 - '@img/sharp-libvips-linux-arm64': 1.2.4 - '@img/sharp-libvips-linux-ppc64': 1.2.4 - '@img/sharp-libvips-linux-riscv64': 1.2.4 - '@img/sharp-libvips-linux-s390x': 1.2.4 - '@img/sharp-libvips-linux-x64': 1.2.4 - '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 - '@img/sharp-libvips-linuxmusl-x64': 1.2.4 - '@img/sharp-linux-arm': 0.34.5 - '@img/sharp-linux-arm64': 0.34.5 - '@img/sharp-linux-ppc64': 0.34.5 - '@img/sharp-linux-riscv64': 0.34.5 - '@img/sharp-linux-s390x': 0.34.5 - '@img/sharp-linux-x64': 0.34.5 - '@img/sharp-linuxmusl-arm64': 0.34.5 - '@img/sharp-linuxmusl-x64': 0.34.5 - '@img/sharp-wasm32': 0.34.5 - '@img/sharp-win32-arm64': 0.34.5 - '@img/sharp-win32-ia32': 0.34.5 - '@img/sharp-win32-x64': 0.34.5 - - tslib@2.8.1: - optional: true + fsevents: 2.3.3 typescript@5.9.3: {} diff --git a/pkg/templates/typescript/openai-computer-use/run_local.ts b/pkg/templates/typescript/openai-computer-use/run_local.ts new file mode 100644 index 00000000..e57382a9 --- /dev/null +++ b/pkg/templates/typescript/openai-computer-use/run_local.ts @@ -0,0 +1,133 @@ +import * as dotenv from 'dotenv'; +import { Kernel } from '@onkernel/sdk'; +import { resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { Agent } from './lib/agent'; +import { KernelComputer } from './lib/kernel-computer'; +import { maybeStartReplay, maybeStopReplay } from './lib/replay'; +import { + createEventLogger, + emitBrowserDeleteDone, + emitBrowserDeleteStarted, + emitBrowserNewDone, + emitBrowserNewStarted, + emitSessionState, +} from './lib/logging'; + +dotenv.config({ override: true, quiet: true }); + +/** + * Local test script that creates a remote Kernel browser and runs the CUA agent. + * No Kernel app deployment needed. + * + * Usage: + * KERNEL_API_KEY=... OPENAI_API_KEY=... npx tsx run_local.ts --task "go to example.com and summarize it" + */ + +const DEFAULT_TASK = 'go to example.com and summarize what the page says'; + +export async function runLocalTest(args: string[] = process.argv.slice(2)): Promise { + if (!process.env.KERNEL_API_KEY) throw new Error('KERNEL_API_KEY is not set'); + if (!process.env.OPENAI_API_KEY) throw new Error('OPENAI_API_KEY is not set'); + + const client = new Kernel({ apiKey: process.env.KERNEL_API_KEY }); + const task = parseTask(args); + const replayEnabled = parseReplay(args); + const debug = args.includes('--debug'); + const onEvent = createEventLogger({ verbose: debug }); + + emitBrowserNewStarted(onEvent); + const browserCreateStartedAt = Date.now(); + const browser = await client.browsers.create({ timeout_seconds: 300 }); + emitBrowserNewDone(onEvent, browserCreateStartedAt, browser.browser_live_view_url); + emitSessionState(onEvent, browser.session_id, browser.browser_live_view_url); + + const computer = new KernelComputer(client, browser.session_id, onEvent); + const replay = await maybeStartReplay(client, browser.session_id, { + enabled: replayEnabled, + onEvent, + }); + + try { + await computer.goto('https://duckduckgo.com'); + + const agent = new Agent({ + model: 'gpt-5.4', + computer, + tools: [], + acknowledge_safety_check_callback: (m: string): boolean => { + console.log(`> safety check: ${m}`); + return true; + }, + }); + + await agent.runFullTurn({ + messages: [ + { + role: 'system', + content: `- Current date and time: ${new Date().toISOString()} (${new Date().toLocaleDateString( + 'en-US', + { weekday: 'long' }, + )})`, + }, + { + type: 'message', + role: 'user', + content: [ + { + type: 'input_text', + text: task, + }, + ], + }, + ], + print_steps: true, + debug, + show_images: false, + onEvent, + }); + } finally { + emitBrowserDeleteStarted(onEvent); + const browserDeleteStartedAt = Date.now(); + try { + const replayUrl = await maybeStopReplay(client, browser.session_id, replay, { onEvent }); + if (replayUrl) { + console.log(`> Replay URL: ${replayUrl}`); + } + await client.browsers.deleteByID(browser.session_id); + } finally { + emitBrowserDeleteDone(onEvent, browserDeleteStartedAt); + } + console.log('> Browser session deleted'); + } +} + +function parseTask(args: string[]): string { + const taskFromEquals = args.find((arg) => arg.startsWith('--task='))?.slice('--task='.length).trim(); + const taskFlagIndex = args.findIndex((arg) => arg === '--task'); + const nextArg = taskFlagIndex >= 0 ? args[taskFlagIndex + 1] : undefined; + const taskFromNext = nextArg && !nextArg.startsWith('--') ? nextArg.trim() : undefined; + const task = taskFromEquals || taskFromNext; + return task && task.length > 0 ? task : DEFAULT_TASK; +} + +function parseReplay(args: string[]): boolean { + const replayFromEquals = args.find((arg) => arg.startsWith('--replay='))?.slice('--replay='.length).trim(); + if (replayFromEquals) { + return !['0', 'false', 'no', 'off'].includes(replayFromEquals.toLowerCase()); + } + return args.includes('--replay'); +} + +function isDirectRun(): boolean { + const entry = process.argv[1]; + if (!entry) return false; + return resolve(entry) === resolve(fileURLToPath(import.meta.url)); +} + +if (isDirectRun()) { + runLocalTest().catch((error) => { + console.error(error); + process.exit(1); + }); +} diff --git a/pkg/templates/typescript/openai-computer-use/test.local.ts b/pkg/templates/typescript/openai-computer-use/test.local.ts deleted file mode 100644 index 23f9a5cc..00000000 --- a/pkg/templates/typescript/openai-computer-use/test.local.ts +++ /dev/null @@ -1,49 +0,0 @@ -import 'dotenv/config'; -import { Agent } from './lib/agent'; -import computers from './lib/computers'; - -/* - to run a local browser test before deploying to kernel -*/ - -async function test(): Promise { - const { computer } = await computers.create({ type: 'local' }); - const agent = new Agent({ - model: 'computer-use-preview', - computer, - tools: [], - acknowledge_safety_check_callback: (m: string): boolean => { - console.log(`> safety check: ${m}`); - return true; - }, - }); - - // run agent and get response - const logs = await agent.runFullTurn({ - messages: [ - { - role: 'system', - content: `- Current date and time: ${new Date().toISOString()} (${new Date().toLocaleDateString( - 'en-US', - { weekday: 'long' }, - )})`, - }, - { - type: 'message', - role: 'user', - content: [ - { - type: 'input_text', - text: 'go to ebay.com and look up oberheim ob-x prices and give me a report', - }, - ], - }, - ], - print_steps: true, - debug: true, - show_images: false, - }); - console.dir(logs, { depth: null }); -} - -test();