diff --git a/cmd/pyrycode-relay/main.go b/cmd/pyrycode-relay/main.go index ce25d35..b4a3109 100644 --- a/cmd/pyrycode-relay/main.go +++ b/cmd/pyrycode-relay/main.go @@ -45,10 +45,16 @@ func main() { startedAt := time.Now() reg := relay.NewRegistry() + // maxFrameBytes: 256 KiB per-frame read cap. Derivation: + // docs/specs/architecture/29-wsconn-read-limit.md (≤50-message + // message_chunk envelope + routing wrapper, headroom for outliers, + // four orders of magnitude below nhooyr's 32 MiB default). + const maxFrameBytes int64 = 256 * 1024 + mux := http.NewServeMux() mux.Handle("/healthz", relay.NewHealthzHandler(reg, Version, startedAt)) - mux.Handle("/v1/server", relay.ServerHandler(reg, logger, 30*time.Second)) - mux.Handle("/v1/client", relay.ClientHandler(reg, logger)) + mux.Handle("/v1/server", relay.ServerHandler(reg, logger, 30*time.Second, maxFrameBytes)) + mux.Handle("/v1/client", relay.ClientHandler(reg, logger, maxFrameBytes)) if *insecureListen != "" { logger.Info("starting", "version", Version, "mode", "insecure", "listen", *insecureListen) diff --git a/docs/PROJECT-MEMORY.md b/docs/PROJECT-MEMORY.md index 20fddbe..147440d 100644 --- a/docs/PROJECT-MEMORY.md +++ b/docs/PROJECT-MEMORY.md @@ -49,6 +49,7 @@ Stateless WebSocket router between mobile clients and pyry binaries. Internet-ex - **Define narrow read-side interfaces at the consumer, not on the adapter.** `phoneSource` and `binarySource` (`ConnID() + Read(ctx)`) live in `forward.go`, not on `WSConn`. Production passes `*WSConn`; tests substitute a fake without touching `WSConn`. The two interfaces are structurally identical but named distinctly so the `Start*Forwarder` signatures read self-describingly at call sites; promote to a shared interface only if a third caller emerges. Adopted in #25 and #26. - **Per-conn goroutines exit cleanly via LIFO defers, not by closing the conn themselves.** Pattern for any goroutine launched alongside an upgraded WS conn (heartbeat, future binary-side ack pump, etc.): derive `hbCtx, cancelHB := context.WithCancel(r.Context())`, register `defer cancelHB()` *after* the release/unregister defer, then `go runX(hbCtx, wsconn, ...)`. Under LIFO unwind `cancelHB` fires first → goroutine observes `ctx.Done()` and returns without touching the conn → release defer's `wsconn.Close()` runs unopposed. The goroutine owns ONLY the failure-path close (heartbeat: `CloseWithCode(1011, "heartbeat timeout")`); cleanup-path close is the handler defer's. `closeOnce` makes the two paths idempotent against each other regardless of which fires first. Adopted in heartbeat (#7); same shape applies to any future per-conn watchdog/timer goroutine. - **Active-conn application close codes go through `WSConn.CloseWithCode`; stillborn-conn close codes go through the underlying `*websocket.Conn`.** ADR-0005's stillborn-conn pattern (`c.Close(websocket.StatusCode(4409), reason)` directly on the underlying conn before any `Send` could have run) coexists with ADR-0007's active-conn pattern (`wsconn.CloseWithCode(1011, "heartbeat timeout")` post-claim, where `closeOnce`/`writeMu` invariants are in play). Both share the same `closeOnce` guard via `Close()` delegating to `CloseWithCode(StatusNormalClosure, "")`. The two patterns describe structurally distinct windows of a WSConn's lifecycle. Adopted: stillborn `4409`/`4404` (#16, #5); active `1011` (#7). +- **Input-bounding policy applied at the adapter constructor, not per-handler.** When a library default is dangerously generous (nhooyr's 32 MiB `SetReadLimit` default), set the policy at the single chokepoint *both* endpoints reach — the wrapping adapter's constructor — rather than at each handler entry. `NewWSConn(c, connID, maxFrameBytes)` calls `c.SetReadLimit(maxFrameBytes)` before returning the struct, which structurally discharges "before any `Read` is performed": no goroutine other than the constructor holds a reference at that point. Distinct from the "policy values live at the wiring site" pattern (which is about *literal placement* — the value still lives as a `const` in `main`); this one is about *enforcement placement* — apply at the choke point so neither handler can forget. Adopted in `WSConn` (#29). Same shape applies to any future "bound the adversarial input at the wrapping seam" policy (e.g. max-frames-per-second, if it lands). - **Capture process-state timestamps in `main` after `flag.Parse()`, not as package-level vars.** `startedAt := time.Now()` lives inside `main` and is passed into the handler factory. A package-level `var startedAt = time.Now()` would fire at import time — before flag parsing, before `--version` early-returns — and be wrong for short-lived test binaries and any future deferred-serve setup. Adopted in #10. ## Conventions diff --git a/docs/knowledge/codebase/29.md b/docs/knowledge/codebase/29.md new file mode 100644 index 0000000..b29ccad --- /dev/null +++ b/docs/knowledge/codebase/29.md @@ -0,0 +1,42 @@ +# Ticket #29 — `SetReadLimit` on `WSConn`: 256 KiB per-frame cap + +Retires the "no per-message size cap on `Read`" follow-up flagged in `ws-conn-adapter.md` since #15. `nhooyr.io/websocket`'s 32 MiB default read limit was two orders of magnitude above the largest legitimate pyrycode envelope (`message_chunk` at ≤50 messages, text-only payloads — worst case comfortably under 256 KiB with the routing wrapper). With both forwarders now landed (#25, #26), the cap is applied at the single chokepoint both reach: `NewWSConn`. + +## Implementation + +- **`internal/relay/ws_conn.go`** — `NewWSConn` gains `maxFrameBytes int64` and calls `c.SetReadLimit(maxFrameBytes)` as its first statement, *before* the struct is returned. This discharges the "before any `Read` is performed against it" guarantee structurally: no goroutine other than the constructor holds a reference at that point. Type is `int64` because that is the exact `*websocket.Conn.SetReadLimit` signature — no conversion at the call site. Doc-comment on `NewWSConn` names the on-overlimit contract (library closes with `StatusMessageTooBig` / 1009; next `Read` surfaces a non-nil error). +- **`internal/relay/server_endpoint.go`** — `ServerHandler` gains `maxFrameBytes int64` after `grace`, threaded into `NewWSConn`. Doc-comment points to the spec for derivation. +- **`internal/relay/client_endpoint.go`** — `ClientHandler` gains `maxFrameBytes int64` after `logger`, threaded into `NewWSConn`. Doc-comment points to the spec. +- **`cmd/pyrycode-relay/main.go`** — single `const maxFrameBytes int64 = 256 * 1024` at the composition root, with an inline comment summarising the derivation and pointing to `docs/specs/architecture/29-wsconn-read-limit.md`. Same shape as the `30*time.Second` grace literal (#21): one literal, one place, no package-level constant in `internal/relay`. Mux registrations pass the const into both handlers. +- **`internal/relay/ws_conn_test.go`** — `startEcho` gains a `maxFrameBytes int64` parameter and is rebuilt to echo received frames back to the client (in addition to the existing push-onto-channel behaviour). The echo path lets the cap tests round-trip an oversize frame through the server so the *client-side* cap surfaces on the receiving `WSConn.Read`. Four existing callers updated to pass the production literal (`256 * 1024`); cap tests pass tight values (64 / 256 bytes). New tests: + - `TestWSConn_Read_FrameExceedingCap_ReturnsError` — cap=64, send 256-byte payload; assert first `Read` returns non-nil error AND a subsequent `Read` (under a tight ctx) also returns non-nil. Error type is **not** asserted on (library-dependent wrapping; the spec's "subsequent reads fail" contract is structurally distinct from the close-error type). + - `TestWSConn_Read_FrameAtCap_DeliveredIntact` — cap=256, send exactly 256 bytes; assert it arrives byte-stable on the server-channel and the echoed frame returns to the client with `err == nil`. +- **`internal/relay/heartbeat_test.go`, `internal/relay/server_endpoint_test.go`, `internal/relay/client_endpoint_test.go`** — mechanical signature updates for the new `NewWSConn` / `ServerHandler` / `ClientHandler` parameter. Production-sized cap (`256 * 1024`) at every test wiring site; no test-only flag/hook. +- **`docs/knowledge/features/ws-conn-adapter.md`** — retired the "No per-message size cap on `Read`" bullet from § *What the adapter deliberately does NOT do*; added a "Per-frame read cap" entry to § *Adversarial framing* documenting the 256 KiB policy, the wiring-site literal, the library's 1009-close behaviour, and the spec link. + +## Cap derivation (256 KiB) + +From `pyrycode/pyrycode/docs/protocol-mobile.md` § *Backfill semantics* (l.501–510) — `message_chunk` envelopes are bounded at ≤50 messages, v1 is text-only (l.327). A generous-but-not-absurd assistant message at ≤4 KiB UTF-8 gives 50 × 4 KiB ≈ 200 KiB. Plus envelope keys, JSON whitespace, and the binary-side routing wrapper (`conn_id` + `frame`, ~80 bytes) stays comfortably under 256 KiB. All other envelopes (`hello`, `hello_ack`, `error`, `send_message`, `ack`, `register_push_token`, `backfill_since`, `backfill_done`) are <2 KiB. 256 KiB is one order of magnitude above the worst-case legitimate envelope, four orders below the library default. Single value covers both forwarders; differentiation is deferred until binary-side framing demonstrates a different bound in practice. + +## Concurrency + +No new concurrency. `SetReadLimit` mutates state on `*websocket.Conn` once, on the construction goroutine, before any reference escapes. `nhooyr.io/websocket` does not document `SetReadLimit` as needing concurrency protection, and the post-construction `WSConn` API never calls it again. + +## Failure semantics + +When a peer sends an over-cap frame, `nhooyr.io/websocket` closes the underlying conn with `StatusMessageTooBig` (1009) and the next `WSConn.Read` returns a non-nil error. The forwarder loop sees the error and returns; the handler's existing `defer { Unregister/ScheduleRelease; Close; log }` (#16/#21/#5) runs in the same LIFO chain it always has. No new defer, no new code path, no new log call. `wsconn.Close()` in the defer is idempotent under `closeOnce` against the library's already-performed close. + +## Deliberately out of scope + +- **Per-IP / per-server-id rate limiting.** Connection-count caps remain separate. +- **Slow-loris hardening on the WS upgrade handshake.** `http.Server.ReadHeaderTimeout` already covers the pre-upgrade phase; post-upgrade slow-drain is bounded by the forwarder's read context. +- **Differentiating phone vs. binary caps.** Single value covers both; revisit when binary-side framing demands a different bound. +- **Operator-configurable via flag.** Promote from `const` only when a second operator needs a different value (current "literal at wiring site" pattern, #21). + +## Cross-links + +- [Feature: WSConn adapter](../features/ws-conn-adapter.md) — § *Adversarial framing* documents the live policy; § *What the adapter deliberately does NOT do* no longer lists this as a follow-up. +- [Spec: 29-wsconn-read-limit](../../specs/architecture/29-wsconn-read-limit.md) — architect's derivation and security review. +- [ADR-0004](../decisions/0004-ws-library-and-adapter-context-strategy.md) — adapter-owned context strategy that `SetReadLimit` slots into structurally (one more piece of policy applied at constructor time). +- [Phone-side frame forwarder](../features/phone-forwarder.md) / [Binary-side frame forwarder](../features/binary-forwarder.md) — the two callers of `WSConn.Read` whose pre-#29 residual this retires. +- [Protocol spec § Backfill semantics](https://github.com/pyrycode/pyrycode/blob/main/docs/protocol-mobile.md#backfill-semantics) — the upper-bound source for the 256 KiB derivation. diff --git a/docs/knowledge/features/ws-conn-adapter.md b/docs/knowledge/features/ws-conn-adapter.md index 52650ea..bcb413e 100644 --- a/docs/knowledge/features/ws-conn-adapter.md +++ b/docs/knowledge/features/ws-conn-adapter.md @@ -11,7 +11,7 @@ Package `internal/relay` (`ws_conn.go`): ```go type WSConn struct { /* *websocket.Conn + connID + writeMu + closeOnce + closeCtx */ } -func NewWSConn(c *websocket.Conn, connID string) *WSConn +func NewWSConn(c *websocket.Conn, connID string, maxFrameBytes int64) *WSConn func (w *WSConn) ConnID() string func (w *WSConn) Send(msg []byte) error @@ -66,7 +66,6 @@ These are documented so the next contributor doesn't add defensive code that doe - **No handshake / header validation / subprotocol selection.** Lives at the upgrade boundary (#4/#16, #5). - **No heartbeat policy.** The adapter exposes `Ping(ctx)` as a pure forwarder; the *policy* (interval, timeout, what to do on failure) lives in `runHeartbeat` in `internal/relay/heartbeat.go` (#7). The adapter does not own the heartbeat goroutine, the ticker, or the close decision — it only provides the pinging primitive. - **No read-side frame loop or envelope wrap/unwrap.** `Read` is a single-frame primitive; the loop and envelope wrapping live in `internal/relay/forward.go` ([phone-forwarder.md](phone-forwarder.md)). -- **No per-message size cap on `Read`.** Inherited from `*websocket.Conn`'s default (nhooyr's 32 MiB read limit). A deliberate `SetReadLimit` policy is a follow-up so it covers both forwarders. - **No per-conn send queue / backpressure / rate limit.** `Send` writes synchronously and returns. None of those are in the registry's `Conn` contract. - **No close-on-`Send`-error.** Caller observes the error and chooses to call `Close`. - **No close-code semantics beyond `StatusNormalClosure`.** The adapter doesn't know why the registry asked it to close. Close-code mapping (`4401`/`4404`/`4409`) is the upgrade handler's job. @@ -86,12 +85,13 @@ The adapter is on the hot path: every routed frame passes through `WSConn.Send`. - **`Close` racing with mid-`Write` `Send`.** Library is documented to handle this; relying on that property is explicit. If the property regresses in a future library version, the race test surfaces a `DATA RACE`. - **Same `*websocket.Conn` wrapped by two `WSConn` constructors.** Each gets its own `writeMu`; serial-write guarantee is broken; the wire interleaves. Caller-invariant; not enforced. - **Adversarial `connID` bytes.** Stored and returned as-is; no interpretation. Charset/length is the conn-id-scheme ticket's job. +- **Per-frame read cap.** `NewWSConn` applies `*websocket.Conn.SetReadLimit(maxFrameBytes)` before returning. Production wires **256 KiB** as a `const` in `cmd/pyrycode-relay/main.go` (derivation: `pyrycode/pyrycode/docs/protocol-mobile.md` § *Backfill semantics* — `message_chunk` envelopes are bounded at ≤50 messages with text-only payloads, worst case comfortably under 256 KiB once routing-envelope overhead is added; see `docs/specs/architecture/29-wsconn-read-limit.md`). Setting the cap at the constructor — the single chokepoint both forwarders reach — discharges the "cover every reader" guarantee without per-handler code. On an over-cap frame the library closes with `StatusMessageTooBig` (1009); subsequent `Read` calls surface a non-nil error. The relay does not emit its own close code on this path. The `security-sensitive` label was applied because `Send` is on every routed-frame path. Verdict from review: PASS — one new code-level slow-loris mitigation, no widening of documented threat surface beyond the supply-chain cost the threat model already names for adding any WS library. ## Testing -`internal/relay/ws_conn_test.go`, `package relay`. End-to-end against a real `*websocket.Conn` via `httptest.NewServer` — no library mocks. The `startEcho` helper spins up a server whose handler runs a tiny read-loop pushing received frames onto a buffered channel; the test gets back a connected `*WSConn` (client side) and the channel. +`internal/relay/ws_conn_test.go`, `package relay`. End-to-end against a real `*websocket.Conn` via `httptest.NewServer` — no library mocks. The `startEcho` helper spins up a server whose handler runs a tiny read-loop pushing received frames onto a buffered channel *and* echoing them back to the client; the test gets back a connected `*WSConn` (client side, capped at the caller-supplied `maxFrameBytes`) and the channel. The echo path lets cap tests round-trip a frame so the client-side cap surfaces on `Read`. Tests (1:1 with the AC): @@ -99,6 +99,8 @@ Tests (1:1 with the AC): - `TestWSConn_ConcurrentSend_ProducesIntactFrames` — N=16 goroutines each `Send` a unique tagged payload; the test asserts N intact, distinct frames. Race detector under `-race` is the primary signal; interleaved frames would surface as malformed messages or corrupted writes. - `TestWSConn_DoubleClose_DoesNotPanic`. - `TestWSConn_SendAfterClose_ReturnsError` — non-nil error is the contract; the specific error type is library-dependent (any of `context.Canceled`, a closed-connection error, or a wrapped variant) and not asserted on. +- `TestWSConn_Read_FrameExceedingCap_ReturnsError` — round-trips an oversize frame and asserts the first AND a subsequent `Read` both return non-nil. The error type is not asserted on (library-dependent close-error wrapping). +- `TestWSConn_Read_FrameAtCap_DeliveredIntact` — round-trips a frame whose payload is exactly at the cap; asserts the server-side channel receives the bytes intact and the client-side `Read` of the echoed frame returns the same bytes with `err == nil`. What we deliberately do not test: the library's behaviour itself (we trust `Write` to honour `ctx` and `Close` to be safe with in-flight `Write`); unit-level mocks of the library; performance. diff --git a/docs/lessons.md b/docs/lessons.md index 27db8ce..0c7f3ea 100644 --- a/docs/lessons.md +++ b/docs/lessons.md @@ -78,6 +78,10 @@ RFC 7230 §5.4 allows it. When comparing to a configured hostname, strip the por The library's auto-pong machinery runs inline with `Read` (or `CloseRead`'s background drain). A peer that completes the handshake and then never reads — even just to discard — cannot respond to incoming pings, because the ping frame sits in the kernel's TCP buffer unobserved. This is exactly what makes RFC 6455 ping/pong a useful liveness signal: a wedged peer that has stopped processing the conn fails the heartbeat structurally, not just because it "chose not to" pong. Implication for tests: to test the *unresponsive-peer* path, the test client must NOT read; to test the *healthy-peer* path, the client must read (or `CloseRead`) so the library auto-pongs. Reverse the two and the assertions swap meanings. Source: `internal/relay/heartbeat_test.go` (#7). +## `nhooyr.io/websocket` ships with a 32 MiB default per-frame read limit — explicit cap, applied at the constructor + +The library's `*websocket.Conn` accepts frames up to ~32 MiB out of the box. For a relay routing typed envelopes (worst-case `message_chunk` ≈ 200 KiB), that default lets a misbehaving peer pin two orders of magnitude more read buffer than any legitimate message needs. `SetReadLimit(n)` adjusts the cap; on an over-cap frame the library closes with `StatusMessageTooBig` (1009) and surfaces a non-nil error on the next `Read`. Apply it inside the wrapping adapter's constructor *before the struct returns* — that closes the window where a `Read` could fire against an uncapped conn without each handler having to remember. The cap value belongs at the composition root (`cmd/.../main.go` as a `const`), threaded through the handler constructor: one literal, one place, no package-level constant in the relay package. Source: `NewWSConn(c, connID, maxFrameBytes)` (#29). + ## `autocert.Manager.TLSConfig()` doesn't set `MinVersion` `gosec` G402 fires on `make lint` if you use it raw. Wrap it in a helper that pins `MinVersion = tls.VersionTLS12` (or 1.3) before handing it to `http.Server`. Centralising the override means a future bump is a one-line change. Source: `relay.TLSConfig` (#9). diff --git a/docs/specs/architecture/29-wsconn-read-limit.md b/docs/specs/architecture/29-wsconn-read-limit.md new file mode 100644 index 0000000..0f3529b --- /dev/null +++ b/docs/specs/architecture/29-wsconn-read-limit.md @@ -0,0 +1,211 @@ +# Spec: SetReadLimit on WSConn — per-frame size cap (#29) + +## Files to read first + +- `internal/relay/ws_conn.go:44-52` — `NewWSConn` constructor; this is the single application site for `SetReadLimit`. +- `internal/relay/ws_conn.go:81-84` — `WSConn.Read`; the cap surfaces here as a non-nil error from the library. +- `internal/relay/server_endpoint.go:35-59` — `ServerHandler` signature + `NewWSConn` call; one of the two production wiring threads. +- `internal/relay/client_endpoint.go:27-51` — `ClientHandler` signature + `NewWSConn` call; the other thread. +- `cmd/pyrycode-relay/main.go:23-51` — the wiring site. Mirrors the `30*time.Second` grace literal pattern (`ServerHandler(reg, logger, 30*time.Second)`). +- `internal/relay/ws_conn_test.go:20-54` — `startEcho` harness; the cap tests reuse it. +- `internal/relay/heartbeat_test.go:30-48` — `NewWSConn` call inside `startHeartbeatPair`; mechanical signature update. +- `internal/relay/server_endpoint_test.go:21-28` — `startServer` calls `ServerHandler`; signature update. +- `internal/relay/client_endpoint_test.go:21-28` — `startClient` calls `ClientHandler`; signature update. +- `internal/relay/forward.go` — confirms `phoneSource.Read` / `binarySource.Read` are the sole `WSConn.Read` callers, so cap enforcement at construction time covers both forwarders without per-call-site changes. +- `docs/knowledge/features/ws-conn-adapter.md` § *Out of scope* (the "No per-message size cap on `Read`" bullet — this ticket retires that follow-up) and § *What the adapter deliberately does NOT do* (the bullet must move to *Adversarial framing* once shipped). +- `pyrycode/pyrycode/docs/protocol-mobile.md` § *Message envelope* (l.177–201) and § *`message_chunk`* (l.456–467) + *Backfill semantics* (l.501–510): the upper-bound derivation below leans on the `≤ 50 messages per envelope` rule. + +## Context + +`*websocket.Conn` from `nhooyr.io/websocket` ships with a default 32 MiB read limit. That is two orders of magnitude larger than the largest expected pyrycode envelope and gives a misbehaving peer a generous pin on relay read memory. Per [`ws-conn-adapter.md`](../../knowledge/features/ws-conn-adapter.md) § *Out of scope*, a deliberate `SetReadLimit` policy was deferred until both forwarders existed; #25 (phone-side) and #26 (binary-side) have landed, so the policy now covers a single chokepoint. + +`NewWSConn` is the only construction site through which either handler reaches the wire. Applying `SetReadLimit` there — rather than at each handler — is structurally simpler (one call, one literal, no forgetting one side) and is the seam the AC selects. + +## Design + +### Cap value: 256 KiB + +Derived from `pyrycode/pyrycode/docs/protocol-mobile.md`: + +- The relay frames inbound traffic on **both** sides as the inner envelope wrapped (by the receiving handler's *peer*) in either the routing envelope (binary → relay) or the bare inner envelope (phone → relay). The cap applies to the raw WS frame the relay reads, which is in all cases ≤ the largest envelope plus a small wrapper. +- The largest envelope type defined in the spec is `message_chunk`: § *Backfill semantics* fixes the default chunk size at **≤ 50 messages per envelope** (l.507). Each `message.payload` carries `conversation_id`, `message_id`, `role`, and `text`; v1 is text-only (no attachments, l.327). Treating a generous-but-not-absurd assistant message at ≤ 4 KiB UTF-8, 50 × 4 KiB ≈ 200 KiB. +- Adding JSON whitespace, envelope keys, and the routing wrapper (`conn_id` + `frame` keys, ~80 bytes) keeps the worst-case inbound frame comfortably under 256 KiB while leaving headroom for outliers (a single unusually long assistant message inside a smaller chunk). +- All other envelopes (`hello`, `hello_ack`, `error`, `send_message`, `ack`, `register_push_token`, `backfill_since`, `backfill_done`) are < 2 KiB by inspection of the spec examples. +- 256 KiB is **four orders of magnitude** below the default 32 MiB, and one order of magnitude above the protocol's worst-case legitimate envelope. The pad absorbs future text-bearing fields without a policy change. + +This is a single policy value: phone-side and binary-side share it, because the binary's outbound envelopes (routing-wrapped `message_chunk`) and the phone's outbound envelopes are both bounded by the same `message_chunk` worst case. If binary-side framing later demonstrates a different bound in practice (e.g. a future streaming chunk type with a larger envelope), it gets a separate ticket per the ticket's "Out of scope" note. + +### API changes + +`internal/relay/ws_conn.go` — `NewWSConn` gains a `maxFrameBytes int64` parameter and applies the cap before returning: + +```go +func NewWSConn(c *websocket.Conn, connID string, maxFrameBytes int64) *WSConn { + c.SetReadLimit(maxFrameBytes) + ctx, cancel := context.WithCancel(context.Background()) + return &WSConn{ + conn: c, + connID: connID, + closeCtx: ctx, + cancel: cancel, + } +} +``` + +The call must precede the first `WSConn.Read` (which, by the single-reader contract, only fires from inside the forwarder once the handler launches it). Calling it at the top of `NewWSConn` — before the struct is even returned — discharges the AC's "before any `Read` is performed against it." + +Type is `int64` because that is the exact signature of `nhooyr.io/websocket.Conn.SetReadLimit`. No conversion at the call site. + +`internal/relay/server_endpoint.go` — `ServerHandler` gains a `maxFrameBytes int64` parameter and threads it into `NewWSConn`: + +```go +func ServerHandler(reg *Registry, logger *slog.Logger, grace time.Duration, maxFrameBytes int64) http.Handler { + return http.HandlerFunc(func(...) { + ... + wsconn := NewWSConn(c, connID, maxFrameBytes) + ... + }) +} +``` + +`internal/relay/client_endpoint.go` — `ClientHandler` gains `maxFrameBytes int64` identically. Both handlers close over the captured value; the closure is created once per `mux.Handle` registration. + +`cmd/pyrycode-relay/main.go` — single literal at the wiring site, mirroring the existing `30*time.Second` pattern: + +```go +const maxFrameBytes = 256 * 1024 // 256 KiB; see docs/specs/architecture/29-wsconn-read-limit.md + +mux.Handle("/v1/server", relay.ServerHandler(reg, logger, 30*time.Second, maxFrameBytes)) +mux.Handle("/v1/client", relay.ClientHandler(reg, logger, maxFrameBytes)) +``` + +The literal lives in `main` as a `const` (not a `var`, not a flag) so it is compile-time and visible at the composition root. The inline comment points to this spec for the derivation. A separate package-level constant in `internal/relay` is explicitly NOT introduced (the AC forbids it and the established convention in [project-memory.md] § *Patterns established* — "Policy values live at the wiring site" — endorses keeping the literal here until a second wiring entry point needs the same value). + +### Data flow + +``` +phone WS frame ──► nhooyr Conn.Read ──► (library checks SetReadLimit) + │ + over cap ───┤ + ├──► close handshake w/ 1009 + │ (library; no relay code) + ▼ + WSConn.Read returns non-nil err + │ + ▼ + StartPhoneForwarder returns + │ + ▼ + handler defer { UnregisterPhone; Close; log } + +binary WS frame ──► same path through StartBinaryForwarder +``` + +No new code paths are introduced. The library performs the close-with-1009 itself; the relay observes only the surfaced `Read` error. The handler's existing LIFO defer chain (registered after the successful claim/register) runs unchanged. + +### Concurrency model + +No change. `SetReadLimit` mutates state on the `*websocket.Conn` and is called once, on the construction goroutine, before any other goroutine has a reference to the `WSConn`. The library does not document `SetReadLimit` as needing concurrency protection, and the post-construction `WSConn` API never calls it again — there is no race. + +### Error handling + +- `WSConn.Read` returns whatever non-nil error the library surfaces for an oversize frame. The current spec for `nhooyr.io/websocket` v1.8.x close-fires with `StatusMessageTooBig` (1009) and the next `Read` returns a `*websocket.CloseError`, but the AC explicitly does NOT assert on the specific type — only on the non-nil contract. Concretely the test only asserts `err != nil`. This protects the test against library wrapping changes between minor versions. +- After the overlimit `Read`, the underlying conn is closed by the library. The next `Read` call returns the same close error. The forwarder loop sees the first error and returns; no further `Read` is issued. The test still asserts that a *subsequent* `Read` also returns a non-nil error, because that is the contract the AC enumerates ("subsequent reads fail") and because if a future library version starts auto-reopening after a 1009 we want a test signal. +- Handler-side: the AC requires no defer additions in `client_endpoint.go`. The existing defer (`UnregisterPhone`; `wsconn.Close()`; log) is sufficient: the library has already torn down the conn, and `wsconn.Close()` is idempotent under `closeOnce`. `UnregisterPhone` runs regardless. + +### Testing strategy + +Two new tests in `internal/relay/ws_conn_test.go`, plus mechanical signature updates at all `NewWSConn` and `ServerHandler` / `ClientHandler` call sites. + +**`TestWSConn_Read_FrameExceedingCap_ReturnsError`** + +- Uses `startEcho` with a small cap (e.g. 64 bytes — small enough that the test runs fast and the failure is unambiguous; the cap is a parameter, not the production 256 KiB). +- The test calls `wc.Send()` so the echo handler receives it and echos it back; the **echo server** does not have the limit applied (only the client-side `WSConn` does), so the oversize frame round-trips and arrives at `wc.Read`. The receiving side surfaces the cap. +- Asserts: first `wc.Read` returns `err != nil`; second `wc.Read` (with a tight context, e.g. 500 ms) also returns `err != nil`. +- Discriminator: the test asserts *only* that errors are non-nil; the library's specific error type (close-error, library-wrapped-context-cancelled, etc.) is not asserted. + +`startEcho` already returns the **client-side** `WSConn`. The cap is being applied on the client-side. The echo path (client → server → echo back → client receives) means the cap is enforced when the client reads the echoed-back oversize frame. This is the simplest way to exercise the read-side cap with the existing harness — the alternative (sending raw oversize frames from the server side) would require a custom server harness and add complexity for no test-value gain. + +To make `startEcho` parametric on the cap, change its signature: + +```go +func startEcho(t *testing.T, maxFrameBytes int64) (*WSConn, <-chan []byte, func()) +``` + +Existing callers pass the production literal (`256 * 1024`); the over-cap test passes a tiny value (64). The change is a single-line edit at four existing call sites inside `ws_conn_test.go`. + +**`TestWSConn_Read_FrameAtCap_DeliveredIntact`** + +- `startEcho` with cap = 256 bytes; send a frame of exactly 256 bytes; assert it arrives on the channel intact (existing pattern) AND that a subsequent `wc.Read` against a freshly-sent in-cap frame returns the same bytes with `err == nil`. + +**Existing tests:** `TestWSConn_ConnID_ReturnsConstructorValue` currently calls `NewWSConn(nil, "abc")`. With the new signature `NewWSConn(c, connID, maxFrameBytes)` and `SetReadLimit` called inside the constructor, passing `nil` for `c` would NPE. Options: + +1. Keep the test by changing it to use `startEcho` and asserting `wc.ConnID() == "test-conn-id"`. +2. Delete the test — the assertion is then covered by `startEcho`'s constructor call and any test that uses `wc.ConnID()`. + +Option (1) is the minimal-disruption path: rename to `TestWSConn_ConnID_ReturnsConstructorValue` (unchanged), swap the body to use the echo harness with a unique connID. The test still locks the constructor's connID round-trip. + +**Forwarder tests:** the existing forwarder test pattern continues to work because `phoneSource` / `binarySource` are local interfaces (`internal/relay/forward.go`) and the existing forwarder tests substitute fakes that do not go through `NewWSConn` — those tests are unaffected by the signature change. + +**`make test -race`, `make vet`, `make build`**: required clean per AC. + +### Out of scope (reaffirming the ticket's notes) + +- Per-IP / per-server-id rate limiting. +- Slow-loris hardening on the upgrade handshake. +- Differentiating phone vs. binary caps. +- Operator-configurable cap via flag. +- Per-IP / per-server-id connection count caps. + +### Doc updates the developer must make + +After implementation, `docs/knowledge/features/ws-conn-adapter.md` needs two edits: + +1. Remove the "No per-message size cap on `Read`" bullet from § *What the adapter deliberately does NOT do*. +2. Add a "Per-frame read cap" entry to § *Adversarial framing* documenting the 256 KiB policy, the wiring-site literal, and the library's 1009 close behaviour. + +(These doc edits are part of "make the spec true," not a separate ticket.) + +### Open questions + +None. The cap value is justified inline; the signature plumbing is the natural seam the AC names; the test surface fits the existing harness with one parametrisation. + +--- + +## Security review (label: `security-sensitive`) + +### Trust boundaries this spec touches + +| Boundary | Trust posture | Where enforced | +|---|---|---| +| Phone WS frame → `WSConn.Read` on `/v1/client` | Adversarial. Phone is internet-facing, no authentication at the relay layer. | Library, via `SetReadLimit(256*1024)` set in `NewWSConn`. | +| Binary WS frame → `WSConn.Read` on `/v1/server` | Lower-trust-but-not-trusted. Binary completed the header gate; relay does not verify token. | Same chokepoint. | +| Handler defer chain after over-cap `Read` | Already trust-bounded by #5 / #16 / #21 / #25 / #26; this spec adds no new paths. | Existing `defer { Unregister/ScheduleRelease; Close; log }`. | + +### Adversarial walk + +**(1) DoS — read-side memory pinning.** Before: a peer can send up to ~32 MiB per frame. A handful of concurrent connections each sending a single 32 MiB frame ties up ~tens-of-MiB-per-conn of read buffer until the read completes or fails. With the 256 KiB cap, the equivalent attack pins ≤256 KiB per connection — 128× reduction. Connection-count caps remain out of scope (separate ticket), so the attack surface narrows but is not closed; this is acceptable because (a) the ticket scopes this defence narrowly and (b) the upstream HTTP server already has its own read timeouts (`ReadTimeout: 60s` in `main.go`). + +**(2) DoS — slow drain.** A peer could try to send 256 KiB very slowly to keep one connection pinned. Mitigation: the library's read context (passed by the forwarder) bounds total wait; `nhooyr.io/websocket.Conn.Read` honours ctx cancellation. No new code needed — this property is unchanged from pre-#29. + +**(3) Amplification across forwarders.** Pre-#29, an oversize phone frame translates (via `StartPhoneForwarder` → `Marshal` → `BinaryFor.Send`) into a Marshal allocation of similar size and a write of similar size to the binary. The 256 KiB read cap on the phone side therefore also caps the synthesised write to the binary, eliminating the cross-forwarder amplification path the ticket flags. The symmetric path (binary → phone) is identically capped via `StartBinaryForwarder`. + +**(4) Differentiated cap bypass.** Single policy value means no per-side mismatch can be exploited. Adding a separate larger cap on one side is explicitly deferred. + +**(5) `SetReadLimit` call placement.** The cap is applied inside `NewWSConn`, before the struct is returned, before any goroutine other than the constructor has a reference to the `WSConn`. There is no window in which a `Read` could fire against an uncapped conn — the construction site discharges the "before any `Read`" AC. + +**(6) Trust of the library's enforcement.** `nhooyr.io/websocket` v1.8.x is the same direct dep called out in `docs/threat-model.md` § *Supply chain — Go dependencies*. The supply-chain risk is already named. `make lint` (`govulncheck`) covers known CVEs. No new dependency. + +**(7) Error-string leakage.** The library's close error (or its message) is logged via the existing forwarder logging (`forward.go` logs the cause on return). No user-controlled bytes from the oversize payload appear in any log — the library surfaces a typed close-error, not the offending payload. No new log call is added. + +**(8) Test cap of 64 / 256 bytes.** The test parametrises the cap so production code paths under test use small values. The production literal remains 256 KiB. No test-only flag or hook is added; the cap is a constructor parameter, not a global. + +**(9) Idempotence of close.** Post-overlimit, the library closes the underlying conn. The handler's `wsconn.Close()` in the defer is idempotent under `closeOnce`. The active-conn close (the library's) and the handler's clean-close-on-exit collapse to a single `*websocket.Conn.Close` — no double-close anomaly. + +**(10) Behavioural drift from library upgrade.** If a future minor version changes the wrapped error type or the specific close code, the tests still pass (they assert only `err != nil`). If the library starts auto-reopening the conn (extremely unlikely), the "subsequent reads fail" assertion surfaces the regression. + +### Findings + +None. Verdict: **PASS**. + +The change is a one-line library call applied at the single existing chokepoint, with a literal threaded from one wiring site. No new code paths, no new logs, no new failure modes beyond the one the ticket exists to introduce. diff --git a/internal/relay/client_endpoint.go b/internal/relay/client_endpoint.go index 3c11e25..8273ff3 100644 --- a/internal/relay/client_endpoint.go +++ b/internal/relay/client_endpoint.go @@ -24,7 +24,10 @@ import ( // connection, registers the phone in reg under the requested server-id, and // holds the connection open until the phone closes it (or the registry tears // it down on binary-grace expiry). -func ClientHandler(reg *Registry, logger *slog.Logger) http.Handler { +// +// maxFrameBytes is the per-frame read cap threaded into NewWSConn; see +// docs/specs/architecture/29-wsconn-read-limit.md for the derivation. +func ClientHandler(reg *Registry, logger *slog.Logger, maxFrameBytes int64) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { serverID := r.Header.Get("X-Pyrycode-Server") token := r.Header.Get("X-Pyrycode-Token") @@ -48,7 +51,7 @@ func ClientHandler(reg *Registry, logger *slog.Logger) http.Handler { } connID := "client-" + serverID + "-" + randHex8() - wsconn := NewWSConn(c, connID) + wsconn := NewWSConn(c, connID, maxFrameBytes) if err := reg.RegisterPhone(serverID, wsconn); err != nil { if errors.Is(err, ErrNoServer) { diff --git a/internal/relay/client_endpoint_test.go b/internal/relay/client_endpoint_test.go index 60a28c1..2327a4d 100644 --- a/internal/relay/client_endpoint_test.go +++ b/internal/relay/client_endpoint_test.go @@ -22,7 +22,7 @@ func startClient(t *testing.T) (*Registry, string, func()) { t.Helper() reg := NewRegistry() logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - srv := httptest.NewServer(ClientHandler(reg, logger)) + srv := httptest.NewServer(ClientHandler(reg, logger, 256*1024)) wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") return reg, wsURL, srv.Close } @@ -111,7 +111,7 @@ func TestClientEndpoint_HeaderGate_400(t *testing.T) { t.Run(tc.name, func(t *testing.T) { reg := NewRegistry() logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - srv := httptest.NewServer(ClientHandler(reg, logger)) + srv := httptest.NewServer(ClientHandler(reg, logger, 256*1024)) defer srv.Close() req, err := http.NewRequest(http.MethodGet, srv.URL+"/", nil) diff --git a/internal/relay/heartbeat_test.go b/internal/relay/heartbeat_test.go index 05b0053..ce8493a 100644 --- a/internal/relay/heartbeat_test.go +++ b/internal/relay/heartbeat_test.go @@ -45,7 +45,7 @@ func startHeartbeatPair(t *testing.T, interval, timeout time.Duration) ( close(handlerReady) return } - serverWS = NewWSConn(c, "test-hb") + serverWS = NewWSConn(c, "test-hb", 256*1024) // CloseRead drains control frames (so the server side auto- // processes incoming pongs and pings). Mirrors the production // /v1/server and /v1/client handler shape. diff --git a/internal/relay/server_endpoint.go b/internal/relay/server_endpoint.go index 2a7e828..585d57f 100644 --- a/internal/relay/server_endpoint.go +++ b/internal/relay/server_endpoint.go @@ -32,7 +32,10 @@ import ( // that window inherits the slot atomically (registry-side reclaim path, // see Registry.ScheduleReleaseServer). Production passes 30*time.Second // per protocol spec § Authentication → Binary → relay. -func ServerHandler(reg *Registry, logger *slog.Logger, grace time.Duration) http.Handler { +// +// maxFrameBytes is the per-frame read cap threaded into NewWSConn; see +// docs/specs/architecture/29-wsconn-read-limit.md for the derivation. +func ServerHandler(reg *Registry, logger *slog.Logger, grace time.Duration, maxFrameBytes int64) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { serverID := r.Header.Get("X-Pyrycode-Server") versionHeader := r.Header.Get("X-Pyrycode-Version") @@ -56,7 +59,7 @@ func ServerHandler(reg *Registry, logger *slog.Logger, grace time.Duration) http } connID := "server-" + serverID + "-" + randHex8() - wsconn := NewWSConn(c, connID) + wsconn := NewWSConn(c, connID, maxFrameBytes) if err := reg.ClaimServer(serverID, wsconn); err != nil { if errors.Is(err, ErrServerIDConflict) { diff --git a/internal/relay/server_endpoint_test.go b/internal/relay/server_endpoint_test.go index af4688a..8e20c67 100644 --- a/internal/relay/server_endpoint_test.go +++ b/internal/relay/server_endpoint_test.go @@ -22,7 +22,7 @@ func startServer(t *testing.T, grace time.Duration) (*Registry, string, func()) t.Helper() reg := NewRegistry() logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - srv := httptest.NewServer(ServerHandler(reg, logger, grace)) + srv := httptest.NewServer(ServerHandler(reg, logger, grace, 256*1024)) wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") return reg, wsURL, srv.Close } @@ -103,7 +103,7 @@ func TestServerEndpoint_HeaderGate_400(t *testing.T) { t.Run(tc.name, func(t *testing.T) { reg := NewRegistry() logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - srv := httptest.NewServer(ServerHandler(reg, logger, 100*time.Millisecond)) + srv := httptest.NewServer(ServerHandler(reg, logger, 100*time.Millisecond, 256*1024)) defer srv.Close() req, err := http.NewRequest(http.MethodGet, srv.URL+"/", nil) @@ -336,7 +336,7 @@ func TestServerEndpoint_WrongMethod_NoPanic(t *testing.T) { t.Run(tc.name, func(t *testing.T) { reg := NewRegistry() logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - srv := httptest.NewServer(ServerHandler(reg, logger, 100*time.Millisecond)) + srv := httptest.NewServer(ServerHandler(reg, logger, 100*time.Millisecond, 256*1024)) defer srv.Close() resp, err := tc.do(srv.URL) diff --git a/internal/relay/ws_conn.go b/internal/relay/ws_conn.go index 7bff98a..e3322a9 100644 --- a/internal/relay/ws_conn.go +++ b/internal/relay/ws_conn.go @@ -33,15 +33,25 @@ type WSConn struct { cancel context.CancelFunc } -// NewWSConn wraps c with the relay-assigned connection id. The caller -// retains responsibility for the WebSocket handshake and for choosing -// connID; this constructor neither validates connID nor inspects c. +// NewWSConn wraps c with the relay-assigned connection id and applies a +// per-frame read-size cap. The caller retains responsibility for the +// WebSocket handshake and for choosing connID; this constructor neither +// validates connID nor inspects c. +// +// maxFrameBytes bounds every inbound WebSocket frame read through this +// WSConn. Applied before the constructor returns — i.e. before any +// goroutine other than the constructor holds a reference — so the first +// Read against the underlying *websocket.Conn already sees the cap. If a +// peer sends a frame whose payload exceeds the cap, the library closes +// the conn with StatusMessageTooBig (1009) and the next Read surfaces a +// non-nil error. // // After construction, the WSConn owns c: callers must reach the // connection only through WSConn methods. Calling c.Write or c.Close // directly defeats the adapter's serialisation and cancellation // guarantees. -func NewWSConn(c *websocket.Conn, connID string) *WSConn { +func NewWSConn(c *websocket.Conn, connID string, maxFrameBytes int64) *WSConn { + c.SetReadLimit(maxFrameBytes) ctx, cancel := context.WithCancel(context.Background()) return &WSConn{ conn: c, diff --git a/internal/relay/ws_conn_test.go b/internal/relay/ws_conn_test.go index e3c6652..fc5382d 100644 --- a/internal/relay/ws_conn_test.go +++ b/internal/relay/ws_conn_test.go @@ -1,6 +1,7 @@ package relay import ( + "bytes" "context" "fmt" "net/http" @@ -14,10 +15,15 @@ import ( ) // startEcho stands up an httptest server whose handler upgrades to a -// WebSocket and forwards every received frame to a buffered channel. -// It returns a connected WSConn (client side), the receive channel, -// and a cleanup function the caller defers. -func startEcho(t *testing.T) (*WSConn, <-chan []byte, func()) { +// WebSocket, forwards every received frame to a buffered channel, and +// echoes the frame back to the client. The echo lets tests exercise the +// client-side WSConn.Read (and its SetReadLimit cap) by round-tripping +// frames; the server side has no custom read cap, so oversize frames +// reach the client where the cap surfaces. +// +// Returns a connected client-side WSConn capped at maxFrameBytes, the +// server-side receive channel, and a cleanup function the caller defers. +func startEcho(t *testing.T, maxFrameBytes int64) (*WSConn, <-chan []byte, func()) { t.Helper() received := make(chan []byte, 1024) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -33,6 +39,9 @@ func startEcho(t *testing.T) (*WSConn, <-chan []byte, func()) { return } received <- data + if err := c.Write(r.Context(), websocket.MessageBinary, data); err != nil { + return + } } })) @@ -45,7 +54,7 @@ func startEcho(t *testing.T) (*WSConn, <-chan []byte, func()) { t.Fatalf("dial: %v", err) } - wc := NewWSConn(client, "test-conn-id") + wc := NewWSConn(client, "test-conn-id", maxFrameBytes) cleanup := func() { wc.Close() srv.Close() @@ -54,14 +63,15 @@ func startEcho(t *testing.T) (*WSConn, <-chan []byte, func()) { } func TestWSConn_ConnID_ReturnsConstructorValue(t *testing.T) { - wc := NewWSConn(nil, "abc") - if got := wc.ConnID(); got != "abc" { - t.Fatalf("ConnID() = %q, want %q", got, "abc") + wc, _, cleanup := startEcho(t, 256*1024) + defer cleanup() + if got := wc.ConnID(); got != "test-conn-id" { + t.Fatalf("ConnID() = %q, want %q", got, "test-conn-id") } } func TestWSConn_ConcurrentSend_ProducesIntactFrames(t *testing.T) { - wc, received, cleanup := startEcho(t) + wc, received, cleanup := startEcho(t, 256*1024) defer cleanup() const n = 16 @@ -99,7 +109,7 @@ func TestWSConn_ConcurrentSend_ProducesIntactFrames(t *testing.T) { } func TestWSConn_DoubleClose_DoesNotPanic(t *testing.T) { - wc, _, cleanup := startEcho(t) + wc, _, cleanup := startEcho(t, 256*1024) defer cleanup() wc.Close() @@ -107,7 +117,7 @@ func TestWSConn_DoubleClose_DoesNotPanic(t *testing.T) { } func TestWSConn_SendAfterClose_ReturnsError(t *testing.T) { - wc, _, cleanup := startEcho(t) + wc, _, cleanup := startEcho(t, 256*1024) defer cleanup() wc.Close() @@ -115,3 +125,67 @@ func TestWSConn_SendAfterClose_ReturnsError(t *testing.T) { t.Fatal("Send after Close returned nil error, want non-nil") } } + +// TestWSConn_Read_FrameExceedingCap_ReturnsError verifies that a frame +// whose payload exceeds the per-frame read cap surfaces as a non-nil +// error on the receiving WSConn, and that subsequent reads also fail +// (the library closes the underlying conn per SetReadLimit contract). +// +// The test deliberately does NOT assert on the specific error type: the +// library is free to wrap the close error differently across versions. +// Only the non-nil contract from the AC is asserted. +func TestWSConn_Read_FrameExceedingCap_ReturnsError(t *testing.T) { + const maxBytes = int64(64) + wc, _, cleanup := startEcho(t, maxBytes) + defer cleanup() + + oversize := bytes.Repeat([]byte("x"), int(maxBytes)*4) + if err := wc.Send(oversize); err != nil { + t.Fatalf("Send oversize: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + if _, err := wc.Read(ctx); err == nil { + t.Fatal("Read on over-cap frame returned nil error, want non-nil") + } + + ctx2, cancel2 := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel2() + if _, err := wc.Read(ctx2); err == nil { + t.Fatal("subsequent Read after over-cap returned nil error, want non-nil") + } +} + +// TestWSConn_Read_FrameAtCap_DeliveredIntact verifies that a frame whose +// payload is exactly at the cap is delivered intact through WSConn.Read. +// Together with the over-cap test this pins the boundary behaviour. +func TestWSConn_Read_FrameAtCap_DeliveredIntact(t *testing.T) { + const maxBytes = int64(256) + wc, received, cleanup := startEcho(t, maxBytes) + defer cleanup() + + payload := bytes.Repeat([]byte("y"), int(maxBytes)) + if err := wc.Send(payload); err != nil { + t.Fatalf("Send at-cap: %v", err) + } + + select { + case got := <-received: + if !bytes.Equal(got, payload) { + t.Fatalf("server received %d bytes, want %d (and equal payload)", len(got), len(payload)) + } + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for server to receive at-cap frame") + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + got, err := wc.Read(ctx) + if err != nil { + t.Fatalf("Read at-cap echo: %v", err) + } + if !bytes.Equal(got, payload) { + t.Fatalf("client Read returned %d bytes, want %d (and equal payload)", len(got), len(payload)) + } +}