diff --git a/extension/background.js b/extension/background.js index f336257..0e99005 100644 --- a/extension/background.js +++ b/extension/background.js @@ -98,13 +98,21 @@ async function connect() { desired = false; // no token yet — wait for Auto-connect to fetch one. return; } + let sock; try { - ws = new WebSocket(serverUrl); + sock = new WebSocket(serverUrl); } catch (e) { lastError = "Bad server URL: " + String(e && e.message ? e.message : e); scheduleReconnect(); return; } + ws = sock; + // Every handler below guards on `ws === sock`. Handlers are bound to this + // specific socket but mutate module-level state (`connected`, `lastError`); + // once a newer connect() has replaced the global `ws`, this socket is + // superseded and a late-firing event from it (especially onclose) must NOT + // clobber the live connection's state — that desync is what stranded the + // popup on "Reconnecting…" while the socket was actually up. // Connect-stall watchdog. When the extension lacks host access to the target // (e.g. 127.0.0.1 site access is off in edge://extensions), the WebSocket @@ -113,21 +121,23 @@ async function connect() { if (connectTimer) clearTimeout(connectTimer); connectTimer = setTimeout(() => { connectTimer = null; - if (!connected && ws && ws.readyState !== WebSocket.OPEN) { + if (ws === sock && !connected && sock.readyState !== WebSocket.OPEN) { lastError = "Connection stalled (no response from " + serverUrl + "). " + "Most likely the extension lacks access to this host — open the extensions page › this extension › " + "Site access and allow 127.0.0.1 / localhost (set to 'On all sites'). Then reload the extension and Auto-connect again."; - try { ws.close(); } catch {} + try { sock.close(); } catch {} } }, CONNECT_TIMEOUT_MS); - ws.onopen = () => { + sock.onopen = () => { + if (ws !== sock) return; lastError = ""; - ws.send(JSON.stringify({ type: "hello", token })); + sock.send(JSON.stringify({ type: "hello", token })); }; - ws.onmessage = async (ev) => { + sock.onmessage = async (ev) => { + if (ws !== sock) return; let msg; try { msg = JSON.parse(ev.data); } catch { return; } if (msg.type === "welcome") { @@ -150,21 +160,42 @@ async function connect() { stop(true); return; } + // Server keepalive: the inbound frame itself is what keeps this MV3 worker + // awake (Chrome resets the idle timer on any ws message); the pong lets the + // server confirm we're alive. Receiving a ping also proves we're connected, + // so self-heal the flag in case a stale event left it out of sync. + if (msg.type === "ping") { + if (!connected) { connected = true; chrome.action.setBadgeText({ text: "on" }); } + send({ type: "pong" }); + return; + } + if (msg.type === "pong") { return; } await handleEnvelope(msg); }; - ws.onclose = () => { + sock.onclose = () => { + if (ws !== sock) return; // superseded socket — leave the live one alone if (connectTimer) { clearTimeout(connectTimer); connectTimer = null; } + const wasConnected = connected; connected = false; - if (!lastError) { + chrome.action.setBadgeText({ text: "" }); + // Dropping a live connection is routine (MV3 worker nap, jcode restart) — + // don't cry wolf. Reconnect quietly and let the badge/pill show "Reconnecting…". + // Only surface the hard "can't reach" error once a few attempts in a row + // have failed, i.e. the server really is gone or on a stale port. + if (wasConnected) { + lastError = ""; + reconnectDelay = 1000; + attempts = 0; + } else if (!lastError && attempts >= 3) { lastError = "Could not reach the jcode server. Check that jcode is running and the URL/port is right."; } - chrome.action.setBadgeText({ text: "" }); scheduleReconnect(); }; - ws.onerror = () => { + sock.onerror = () => { + if (ws !== sock) return; lastError = "WebSocket error connecting to " + serverUrl + " — is jcode running there, and does the extension have site access to it?"; - try { ws.close(); } catch {} + try { sock.close(); } catch {} }; } diff --git a/extension/popup/popup.html b/extension/popup/popup.html index 3f112da..a4026f4 100644 --- a/extension/popup/popup.html +++ b/extension/popup/popup.html @@ -42,6 +42,7 @@ .msg { margin-top: 8px; font-size: 12px; line-height: 1.4; padding: 7px 9px; border-radius: 7px; } .msg.err { background: #fdeceb; color: var(--red); } .msg.ok { background: #e6f6ec; color: var(--green); } + .msg.info { background: #f2f0ed; color: var(--ink2); } diff --git a/extension/popup/popup.js b/extension/popup/popup.js index 7ba6c48..7292146 100644 --- a/extension/popup/popup.js +++ b/extension/popup/popup.js @@ -41,7 +41,12 @@ async function refresh() { pill.className = "pill off"; pill.innerHTML = 'Reconnecting…'; $("autoConnect").textContent = "Auto-connect to jcode"; - showMsg((st.lastError ? st.lastError + " " : "") + "Click Disconnect to stop trying.", "err"); + // Only go red on a real failure; a routine reconnect stays neutral. + if (st.lastError) { + showMsg(st.lastError + " Click Disconnect to stop trying.", "err"); + } else { + showMsg("Reconnecting to jcode…", "info"); + } } else { pill.className = "pill off"; pill.innerHTML = 'Offline'; diff --git a/internal/browser/bridge.go b/internal/browser/bridge.go index e9032e0..bd7bb79 100644 --- a/internal/browser/bridge.go +++ b/internal/browser/bridge.go @@ -98,6 +98,7 @@ func (b *Bridge) HandleWS(w http.ResponseWriter, r *http.Request) { b.mu.Unlock() config.Logger().Printf("[browser] extension connected") + go bc.keepAlive() bc.readLoop() b.mu.Lock() @@ -135,6 +136,19 @@ type bridgeEnvelope struct { URL string `json:"url,omitempty"` } +// Keepalive timing. Chrome/Edge kill an MV3 service worker after ~30s idle, and +// an inbound websocket message resets that timer — so a steady server→extension +// ping is what actually keeps the extension worker (and thus the whole bridge) +// alive between browser commands. Without it the worker naps, the socket drops, +// and the popup flaps to "Reconnecting…". keepAliveWait is the read side: if no +// frame (pong, alarm ping, or command reply) arrives within two ping periods, +// treat the extension as dead and tear the socket down. +// vars, not consts, so tests can shrink them. +var ( + keepAlivePing = 15 * time.Second + keepAliveWait = 40 * time.Second +) + type bridgeConn struct { ws *websocket.Conn writeMu sync.Mutex @@ -147,6 +161,35 @@ type bridgeConn struct { closeErr error } +// writeJSON serializes all writes to the socket (gorilla forbids concurrent +// writers) and bounds a stuck write so a wedged peer can't block a ping or a +// command forever. +func (c *bridgeConn) writeJSON(v any) error { + c.writeMu.Lock() + defer c.writeMu.Unlock() + _ = c.ws.SetWriteDeadline(time.Now().Add(10 * time.Second)) + return c.ws.WriteJSON(v) +} + +// keepAlive pings the extension on an interval so its service worker stays awake +// and the socket stays up between commands. It exits when the read loop closes +// the conn. +func (c *bridgeConn) keepAlive() { + t := time.NewTicker(keepAlivePing) + defer t.Stop() + for { + select { + case <-c.closed: + return + case <-t.C: + if err := c.writeJSON(bridgeEnvelope{Type: "ping"}); err != nil { + c.close() // wake the read loop so it tears the conn down + return + } + } + } +} + func newBridgeConn(ws *websocket.Conn) *bridgeConn { ws.SetReadLimit(256 << 20) return &bridgeConn{ @@ -158,6 +201,7 @@ func newBridgeConn(ws *websocket.Conn) *bridgeConn { } func (c *bridgeConn) readLoop() { + _ = c.ws.SetReadDeadline(time.Now().Add(keepAliveWait)) for { var env bridgeEnvelope if err := c.ws.ReadJSON(&env); err != nil { @@ -171,7 +215,12 @@ func (c *bridgeConn) readLoop() { close(c.closed) return } + // Any inbound frame proves the extension is alive; extend the window. + _ = c.ws.SetReadDeadline(time.Now().Add(keepAliveWait)) switch env.Type { + case "ping", "pong": + // Keepalive traffic (the extension's own alarm ping, or a pong to + // ours) — nothing to route. case "cdp.result", "cdp.error", "tabs.result", "tab.result": c.mu.Lock() ch := c.pending[env.ID] @@ -204,9 +253,7 @@ func (c *bridgeConn) request(ctx context.Context, env bridgeEnvelope) (bridgeEnv c.pending[id] = ch c.mu.Unlock() - c.writeMu.Lock() - err := c.ws.WriteJSON(env) - c.writeMu.Unlock() + err := c.writeJSON(env) if err != nil { c.mu.Lock() delete(c.pending, id) diff --git a/internal/browser/bridge_test.go b/internal/browser/bridge_test.go index b19fe06..8424700 100644 --- a/internal/browser/bridge_test.go +++ b/internal/browser/bridge_test.go @@ -144,6 +144,37 @@ func TestBridgeCDPForwarding(t *testing.T) { } } +// TestBridgeKeepAlivePing verifies the server proactively pings the extension so +// its MV3 service worker can't nap the socket shut. A silent extension (no alarm +// ping, no command traffic) must still receive server pings; and if it never +// answers, the read watchdog must eventually drop it. +func TestBridgeKeepAlivePing(t *testing.T) { + oldPing, oldWait := keepAlivePing, keepAliveWait + keepAlivePing, keepAliveWait = 20*time.Millisecond, 120*time.Millisecond + t.Cleanup(func() { keepAlivePing, keepAliveWait = oldPing, oldWait }) + + b, wsURL := bridgeServer(t) + token := b.IssueToken() + fe, ok := dialExtension(t, wsURL, map[string]any{"type": "hello", "token": token}) + if !ok { + t.Fatal("token auth failed") + } + + // Read the first frame the server sends after welcome; it must be a ping. + _ = fe.conn.SetReadDeadline(time.Now().Add(2 * time.Second)) + var env bridgeEnvelope + if err := fe.conn.ReadJSON(&env); err != nil { + t.Fatalf("expected a server ping, got read error: %v", err) + } + if env.Type != "ping" { + t.Fatalf("first server keepalive frame = %q, want \"ping\"", env.Type) + } + + // A silent extension (we stop reading/answering) must be dropped by the read + // watchdog rather than lingering forever. + waitUntil(t, func() bool { return !b.Connected() }) +} + func TestBridgeStableTokenIsStable(t *testing.T) { t.Setenv("HOME", t.TempDir()) b := NewBridge()