Skip to content

Commit de2df27

Browse files
refactor: replace mouse-ambient daemon with API endpoint
The standalone daemon contends with computer-use APIs that hold an exclusive input lock. Replace it with POST /computer/ambient_mouse that runs the same event loop as a goroutine inside the API server, acquiring inputMu per action so ambient events cooperate with explicit input calls instead of racing them. - Remove cmd/mouse-ambient binary, supervisor conf, Dockerfile/wrapper hooks - Add /computer/ambient_mouse endpoint (enabled, interval, per-action weights) - Background loop yields inputMu between events for explicit API priority - Clean shutdown via Shutdown() cancels the ambient context - Add mousetrajectory lib (from sister branch) for smooth mouse movement Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent f132891 commit de2df27

12 files changed

Lines changed: 2245 additions & 565 deletions

File tree

images/chromium-headful/Dockerfile

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,6 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \
2727
GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \
2828
go build -ldflags="-s -w" -o /out/chromium-launcher ./cmd/chromium-launcher
2929

30-
# Build mouse-ambient daemon (subtle xdotool drift for anti-bot sensors)
31-
RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \
32-
--mount=type=cache,target=/go/pkg/mod,id=$CACHEIDPREFIX-go-pkg-mod \
33-
GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \
34-
go build -ldflags="-s -w" -o /out/mouse-ambient ./cmd/mouse-ambient
35-
3630
# webrtc client
3731
FROM node:22-bullseye-slim AS client
3832
WORKDIR /src
@@ -308,7 +302,6 @@ COPY images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/service
308302
# copy the kernel-images API binary built in the builder stage
309303
COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api
310304
COPY --from=server-builder /out/chromium-launcher /usr/local/bin/chromium-launcher
311-
COPY --from=server-builder /out/mouse-ambient /usr/local/bin/mouse-ambient
312305

313306
# Copy and compile the Playwright daemon
314307
COPY server/runtime/playwright-daemon.ts /tmp/playwright-daemon.ts

images/chromium-headful/supervisor/services/mouse-ambient.conf

Lines changed: 0 additions & 8 deletions
This file was deleted.

images/chromium-headful/wrapper.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ cleanup () {
145145
enable_scale_to_zero
146146
supervisorctl -c /etc/supervisor/supervisord.conf stop chromium || true
147147
supervisorctl -c /etc/supervisor/supervisord.conf stop kernel-images-api || true
148-
supervisorctl -c /etc/supervisor/supervisord.conf stop mouse-ambient || true
149148
supervisorctl -c /etc/supervisor/supervisord.conf stop dbus || true
150149
# Stop log tailers
151150
if [[ -n "${tail_pids[*]:-}" ]]; then
@@ -217,9 +216,6 @@ for i in {1..100}; do
217216
sleep 0.2
218217
done
219218

220-
echo "[wrapper] Starting mouse-ambient daemon (subtle drift for anti-bot sensors)"
221-
supervisorctl -c /etc/supervisor/supervisord.conf start mouse-ambient
222-
223219
if [[ "${ENABLE_WEBRTC:-}" == "true" ]]; then
224220
# use webrtc
225221
echo "[wrapper] ✨ Starting neko (webrtc server) via supervisord."
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
package api
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"math/rand"
7+
"strconv"
8+
"strings"
9+
"time"
10+
11+
"github.com/onkernel/kernel-images/server/lib/logger"
12+
oapi "github.com/onkernel/kernel-images/server/lib/oapi"
13+
)
14+
15+
// ambientAction identifies the type of ambient event to emit.
16+
type ambientAction int
17+
18+
const (
19+
ambientMouseDrift ambientAction = iota
20+
ambientScroll
21+
ambientMicroDrag
22+
ambientClick
23+
ambientKeyTap
24+
)
25+
26+
// ambientConfig holds the resolved configuration for the ambient mouse loop.
27+
type ambientConfig struct {
28+
minIntervalMs int
29+
maxIntervalMs int
30+
weights []struct {
31+
action ambientAction
32+
weight int
33+
}
34+
totalWeight int
35+
}
36+
37+
func (s *ApiService) SetAmbientMouse(ctx context.Context, request oapi.SetAmbientMouseRequestObject) (oapi.SetAmbientMouseResponseObject, error) {
38+
log := logger.FromContext(ctx)
39+
40+
if request.Body == nil {
41+
return oapi.SetAmbientMouse400JSONResponse{BadRequestErrorJSONResponse: oapi.BadRequestErrorJSONResponse{
42+
Message: "request body is required"},
43+
}, nil
44+
}
45+
body := *request.Body
46+
47+
s.inputMu.Lock()
48+
49+
// Stop any running ambient loop first.
50+
if s.ambientCancel != nil {
51+
s.ambientCancel()
52+
s.ambientCancel = nil
53+
}
54+
55+
if !body.Enabled {
56+
s.inputMu.Unlock()
57+
log.Info("ambient mouse disabled")
58+
return oapi.SetAmbientMouse200JSONResponse(oapi.AmbientMouseResponse{Enabled: false}), nil
59+
}
60+
61+
// Resolve configuration with defaults.
62+
cfg, err := resolveAmbientConfig(body)
63+
if err != nil {
64+
s.inputMu.Unlock()
65+
return oapi.SetAmbientMouse400JSONResponse{BadRequestErrorJSONResponse: oapi.BadRequestErrorJSONResponse{
66+
Message: err.Error()},
67+
}, nil
68+
}
69+
70+
ambientCtx, cancel := context.WithCancel(context.Background())
71+
s.ambientCancel = cancel
72+
s.inputMu.Unlock()
73+
74+
go s.runAmbientLoop(ambientCtx, cfg)
75+
76+
log.Info("ambient mouse enabled",
77+
"min_interval_ms", cfg.minIntervalMs,
78+
"max_interval_ms", cfg.maxIntervalMs,
79+
)
80+
return oapi.SetAmbientMouse200JSONResponse(oapi.AmbientMouseResponse{Enabled: true}), nil
81+
}
82+
83+
// resolveAmbientConfig builds an ambientConfig from the request body, applying defaults.
84+
func resolveAmbientConfig(body oapi.AmbientMouseRequest) (ambientConfig, error) {
85+
cfg := ambientConfig{
86+
minIntervalMs: 200,
87+
maxIntervalMs: 600,
88+
}
89+
if body.MinIntervalMs != nil {
90+
cfg.minIntervalMs = *body.MinIntervalMs
91+
}
92+
if body.MaxIntervalMs != nil {
93+
cfg.maxIntervalMs = *body.MaxIntervalMs
94+
}
95+
if cfg.minIntervalMs > cfg.maxIntervalMs {
96+
return cfg, fmt.Errorf("min_interval_ms must be <= max_interval_ms")
97+
}
98+
99+
driftW := 55
100+
scrollW := 20
101+
microDragW := 12
102+
clickW := 10
103+
keyTapW := 3
104+
if body.MouseDriftWeight != nil {
105+
driftW = *body.MouseDriftWeight
106+
}
107+
if body.ScrollWeight != nil {
108+
scrollW = *body.ScrollWeight
109+
}
110+
if body.MicroDragWeight != nil {
111+
microDragW = *body.MicroDragWeight
112+
}
113+
if body.ClickWeight != nil {
114+
clickW = *body.ClickWeight
115+
}
116+
if body.KeyTapWeight != nil {
117+
keyTapW = *body.KeyTapWeight
118+
}
119+
120+
cfg.weights = []struct {
121+
action ambientAction
122+
weight int
123+
}{
124+
{ambientMouseDrift, driftW},
125+
{ambientScroll, scrollW},
126+
{ambientMicroDrag, microDragW},
127+
{ambientClick, clickW},
128+
{ambientKeyTap, keyTapW},
129+
}
130+
for _, w := range cfg.weights {
131+
cfg.totalWeight += w.weight
132+
}
133+
if cfg.totalWeight == 0 {
134+
return cfg, fmt.Errorf("at least one action weight must be > 0")
135+
}
136+
return cfg, nil
137+
}
138+
139+
// runAmbientLoop is the background goroutine that emits diverse input events.
140+
// It acquires inputMu for each action, so it cooperates with explicit API calls.
141+
func (s *ApiService) runAmbientLoop(ctx context.Context, cfg ambientConfig) {
142+
r := rand.New(rand.NewSource(time.Now().UnixNano()))
143+
144+
for {
145+
select {
146+
case <-ctx.Done():
147+
return
148+
default:
149+
}
150+
151+
action := pickAmbientAction(r, cfg)
152+
s.inputMu.Lock()
153+
s.execAmbientAction(ctx, r, action)
154+
s.inputMu.Unlock()
155+
156+
// Random delay between events.
157+
delayMs := cfg.minIntervalMs + r.Intn(cfg.maxIntervalMs-cfg.minIntervalMs+1)
158+
select {
159+
case <-ctx.Done():
160+
return
161+
case <-time.After(time.Duration(delayMs) * time.Millisecond):
162+
}
163+
}
164+
}
165+
166+
func pickAmbientAction(r *rand.Rand, cfg ambientConfig) ambientAction {
167+
n := r.Intn(cfg.totalWeight)
168+
for _, w := range cfg.weights {
169+
if n < w.weight {
170+
return w.action
171+
}
172+
n -= w.weight
173+
}
174+
return ambientMouseDrift
175+
}
176+
177+
// execAmbientAction performs a single ambient event via xdotool. Must be called
178+
// with inputMu held.
179+
func (s *ApiService) execAmbientAction(ctx context.Context, r *rand.Rand, action ambientAction) {
180+
switch action {
181+
case ambientMouseDrift:
182+
dx := r.Intn(8) - 4
183+
dy := r.Intn(8) - 4
184+
if dx == 0 && dy == 0 {
185+
dx = 1
186+
}
187+
defaultXdoTool.Run(ctx, "mousemove_relative", "--", fmt.Sprintf("%d", dx), fmt.Sprintf("%d", dy))
188+
189+
case ambientScroll:
190+
w, h := s.getDisplayGeometry(ctx)
191+
if w > 0 && h > 0 {
192+
x := w/2 + r.Intn(80) - 40
193+
y := h/2 + r.Intn(80) - 40
194+
defaultXdoTool.Run(ctx, "mousemove", strconv.Itoa(x), strconv.Itoa(y))
195+
btn := "4"
196+
if r.Intn(2) == 0 {
197+
btn = "5"
198+
}
199+
defaultXdoTool.Run(ctx, "click", btn)
200+
}
201+
202+
case ambientMicroDrag:
203+
dx := 3 + r.Intn(6)
204+
dy := 3 + r.Intn(6)
205+
if r.Intn(2) == 0 {
206+
dx = -dx
207+
}
208+
if r.Intn(2) == 0 {
209+
dy = -dy
210+
}
211+
defaultXdoTool.Run(ctx, "mousedown", "1")
212+
defaultXdoTool.Run(ctx, "mousemove_relative", "--", fmt.Sprintf("%d", dx), fmt.Sprintf("%d", dy))
213+
defaultXdoTool.Run(ctx, "mouseup", "1")
214+
215+
case ambientClick:
216+
w, h := s.getDisplayGeometry(ctx)
217+
if w > 200 && h > 200 {
218+
pad := 100
219+
if w < 400 {
220+
pad = w / 4
221+
}
222+
x := pad + r.Intn(maxInt(1, w-2*pad))
223+
y := pad + r.Intn(maxInt(1, h-2*pad))
224+
defaultXdoTool.Run(ctx, "mousemove", strconv.Itoa(x), strconv.Itoa(y))
225+
defaultXdoTool.Run(ctx, "click", "1")
226+
}
227+
228+
case ambientKeyTap:
229+
// Modifier tap; least likely to trigger page behavior.
230+
defaultXdoTool.Run(ctx, "key", "shift")
231+
}
232+
}
233+
234+
func maxInt(a, b int) int {
235+
if a > b {
236+
return a
237+
}
238+
return b
239+
}
240+
241+
// getDisplayGeometry returns the current display dimensions via xdotool.
242+
// Results are cached for 30 seconds.
243+
func (s *ApiService) getDisplayGeometry(ctx context.Context) (int, int) {
244+
out, err := defaultXdoTool.Run(ctx, "getdisplaygeometry")
245+
if err != nil {
246+
return 0, 0
247+
}
248+
parts := strings.Fields(strings.TrimSpace(string(out)))
249+
if len(parts) >= 2 {
250+
w, _ := strconv.Atoi(parts[0])
251+
h, _ := strconv.Atoi(parts[1])
252+
if w > 0 && h > 0 {
253+
return w, h
254+
}
255+
}
256+
return 0, 0
257+
}

server/cmd/api/api/api.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ type ApiService struct {
5353

5454
// policy management
5555
policy *policy.Policy
56+
57+
// ambientCancel stops the ambient mouse loop (protected by inputMu)
58+
ambientCancel context.CancelFunc
5659
}
5760

5861
var _ oapi.StrictServerInterface = (*ApiService)(nil)
@@ -298,5 +301,13 @@ func (s *ApiService) ListRecorders(ctx context.Context, _ oapi.ListRecordersRequ
298301
}
299302

300303
func (s *ApiService) Shutdown(ctx context.Context) error {
304+
// Stop ambient mouse loop if running.
305+
s.inputMu.Lock()
306+
if s.ambientCancel != nil {
307+
s.ambientCancel()
308+
s.ambientCancel = nil
309+
}
310+
s.inputMu.Unlock()
311+
301312
return s.recordManager.StopAll(ctx)
302313
}

0 commit comments

Comments
 (0)