-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathmain.py
More file actions
92 lines (72 loc) · 2.57 KB
/
main.py
File metadata and controls
92 lines (72 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
from typing import Optional, TypedDict
import kernel
from loop import sampling_loop
from session import KernelBrowserSession
class QueryInput(TypedDict):
query: str
record_replay: Optional[bool]
kiosk: Optional[bool]
class QueryOutput(TypedDict):
result: str
replay_url: Optional[str]
api_key = os.getenv("YUTORI_API_KEY")
if not api_key:
raise ValueError("YUTORI_API_KEY is not set")
app = kernel.App("python-yutori-cua")
@app.action("cua-task")
async def cua_task(
ctx: kernel.KernelContext,
payload: QueryInput,
) -> QueryOutput:
"""
Process a user query using Yutori n1 Computer Use with Kernel's browser automation.
Args:
ctx: Kernel context containing invocation information
payload: An object containing:
- query: The task/query string to process
- record_replay: Optional boolean to enable video replay recording
Returns:
A dictionary containing:
- result: The result of the sampling loop as a string
- replay_url: URL to view the replay (if recording was enabled)
"""
if not payload or not payload.get("query"):
raise ValueError("Query is required")
record_replay = payload.get("record_replay", False)
kiosk_mode = payload.get("kiosk", False)
async with KernelBrowserSession(
invocation_id=ctx.invocation_id,
stealth=True,
record_replay=record_replay,
kiosk_mode=kiosk_mode,
) as session:
print("Kernel browser live view url:", session.live_view_url)
loop_result = await sampling_loop(
model="n1-latest",
task=payload["query"],
api_key=str(api_key),
kernel=session.kernel,
session_id=str(session.session_id),
viewport_width=session.viewport_width,
viewport_height=session.viewport_height,
kiosk_mode=kiosk_mode,
)
final_answer = loop_result.get("final_answer")
messages = loop_result.get("messages", [])
if final_answer:
result = final_answer
else:
# Extract last assistant message
result = _extract_last_assistant_message(messages)
return {
"result": result,
"replay_url": session.replay_view_url,
}
def _extract_last_assistant_message(messages: list) -> str:
for msg in reversed(messages):
if msg.get("role") == "assistant":
content = msg.get("content")
if isinstance(content, str) and content:
return content
return "Task completed"