diff --git a/cmd/pyrycode-relay/main.go b/cmd/pyrycode-relay/main.go index f075d43..db90eea 100644 --- a/cmd/pyrycode-relay/main.go +++ b/cmd/pyrycode-relay/main.go @@ -29,7 +29,11 @@ func main() { certCache = flag.String("cert-cache", defaultCertCache(), "Directory for autocert's TLS certificate cache.") insecureListen = flag.String("insecure-listen", "", "Listen address for plain HTTP (e.g. :8080). Disables autocert; use only when fronted by a reverse proxy.") metricsListen = flag.String("metrics-listen", "127.0.0.1:9090", "Listen address for the /metrics endpoint. Must be a loopback IP literal (e.g. 127.0.0.1:9090, [::1]:9090). Empty disables.") - showVersion = flag.Bool("version", false, "Print version and exit.") + trustXFF = flag.Bool("trust-x-forwarded-for", false, + "Trust the X-Forwarded-For header as the source IP for per-IP rate limiting. "+ + "WARNING: enabling this without a trusted reverse proxy in front of the relay "+ + "allows clients to spoof their source IP and bypass per-IP rate limits.") + showVersion = flag.Bool("version", false, "Print version and exit.") ) flag.Parse() @@ -121,12 +125,33 @@ func main() { // four orders of magnitude below nhooyr's 32 MiB default). const maxFrameBytes int64 = 256 * 1024 + // Per-IP rate-limit policy: ~10 attempts/IP/minute steady-state, burst + // 20. Derivation: docs/threat-model.md § DoS resistance future-hardening + // line names ~10/min/IP and burst headroom for retry storms; the 5-min + // eviction sweep keeps the bucket map's resident size bounded under + // address-space scanning (docs/specs/architecture/50-ip-rate-limiter.md + // § Adversarial walk). All three values must be positive — NewIPRateLimiter + // panics on a zero/negative evictionInterval via time.NewTicker. + const ( + rateLimitRefillEvery = 6 * time.Second + rateLimitBurst = 20 + rateLimitEvictionInterval = 5 * time.Minute + ) + limiter := relay.NewIPRateLimiter(rateLimitRefillEvery, rateLimitBurst, rateLimitEvictionInterval) + // Best-effort: the current listener block calls os.Exit on error, which + // skips defers. A real graceful-shutdown path (signal handler + server + // Shutdown) is out of scope per #47; this defer runs on clean returns + // (e.g. --version above does not reach here, but future test entry + // points might). + defer limiter.Close() + rateLimit := relay.NewRateLimitMiddleware(limiter, logger, *trustXFF) + mux := http.NewServeMux() mux.Handle("/healthz", relay.NewHealthzHandler(reg, Version, startedAt)) - mux.Handle("/v1/server", relay.ServerHandler(reg, logger, 30*time.Second, maxFrameBytes)) + mux.Handle("/v1/server", rateLimit(relay.ServerHandler(reg, logger, 30*time.Second, maxFrameBytes))) // maxPhones=16 caps phones per server-id; over-cap registrations are // rejected with WS close 4429. Per #30 architect spec. - mux.Handle("/v1/client", relay.ClientHandler(reg, logger, maxFrameBytes, 16)) + mux.Handle("/v1/client", rateLimit(relay.ClientHandler(reg, logger, maxFrameBytes, 16))) if *insecureListen != "" { logger.Info("starting", "version", Version, "mode", "insecure", "listen", *insecureListen) diff --git a/docs/knowledge/INDEX.md b/docs/knowledge/INDEX.md index a60ee3e..e915e9b 100644 --- a/docs/knowledge/INDEX.md +++ b/docs/knowledge/INDEX.md @@ -4,6 +4,7 @@ One-line pointers into the evergreen knowledge base. Newest entries at the top o ## Features +- [Per-IP rate-limit middleware (`/v1/server` + `/v1/client`)](features/rate-limit-middleware.md) — HTTP middleware that throttles WS upgrade attempts per source IP **before** `websocket.Accept` runs, before the registry is touched, before the mobile/binary token header is read. One new exported symbol: `relay.NewRateLimitMiddleware(limiter *IPRateLimiter, logger *slog.Logger, trustForwardedFor bool) func(http.Handler) http.Handler` — returns the conventional composition shape, matching `EnforceHost`. Per request: extract `ip := ClientIP(r, trustForwardedFor)`, deny on `ip == ""` (empty-IP guard runs **before** `Allow` because the limiter's `Allow("")` is a normal map key, not a deny — loud-failure rule: refuse un-attributable traffic rather than admit it un-throttled), deny on `!limiter.Allow(ip)`, otherwise `next.ServeHTTP`. Both deny paths emit one `logger.Warn("rate_limited", "remote", strconv.Quote(ip))` line — single allowlisted key, `strconv.Quote` defends against control-byte log injection, the empty-IP case renders `remote="\"\""` so operators can distinguish the two deny causes by the value. 429 body is empty (`http.Error(w, "", http.StatusTooManyRequests)` — matches the existing `BadRequest` shape in `server_endpoint.go` / `client_endpoint.go`, no internal state leaked). New `--trust-x-forwarded-for` CLI flag (default `false`, Usage carries the explicit spoofing warning) is threaded once into the factory. Policy lives at the wiring site in `cmd/pyrycode-relay/main.go`: `rateLimitRefillEvery = 6s`, `rateLimitBurst = 20`, `rateLimitEvictionInterval = 5min` → ~10 sustained attempts/IP/min with 20-attempt burst headroom; `5min` sweep is well above `burst*refillEvery = 120s`. One shared `*IPRateLimiter` is applied to both `mux.Handle` registrations — a misbehaving IP retrying against either endpoint shares one bucket; `/healthz` is intentionally NOT wrapped (must remain pollable from monitoring). `defer limiter.Close()` is best-effort (current `os.Exit` listener path skips defers; graceful shutdown out of scope). Concurrency safety fully delegated to the limiter; middleware is stateless, no new locks, no new goroutines. Mobile/binary `X-Pyrycode-Token` structurally not read on this code path. Why middleware not constructor injection: keeps frame-routing handlers ignorant of admission control, makes `/healthz`'s exemption fall out for free, mirrors `EnforceHost`. Closes the third leg of `docs/threat-model.md` § *DoS resistance* alongside #29 (per-frame size cap) and #30 (per-server-id phone cap); the "token-bucket rate limit on /v1/server and /v1/client" line moves from *Future hardening* to v1 mitigation. Out of scope: connection-count cap (different threat surface — *attempt rate* vs *resident count*), CIDR-aware trusted-proxy chain (today's flag is flat all-or-nothing), multi-instance shared-state (would need Redis), rate-limit metrics counter (future ticket parallel to #58), adaptive policy, graceful-shutdown signal handler (#47). - [Metrics listener (localhost-only)](features/metrics-listener.md) — separate `*http.Server` for `/metrics`, bound to a loopback IP literal (default `127.0.0.1:9090`); kept off the internet-exposed public listener that serves `/healthz` + `/v1/{server,client}` because metric values leak operational state. Two exports + one sentinel in `internal/relay/metrics_listen.go`: `ErrNonLoopbackBind` (branchable via `errors.Is`), `CheckLoopbackBind(addr)` (pure validator — `net.SplitHostPort` → `ListenerPort` for the port-0 / range / format rejects inherited from #81 → `net.ParseIP(host).IsLoopback()`; hostnames including `localhost:9090` are rejected even when they currently resolve to loopback — the DNS-time TOCTOU window is closed structurally, not by re-resolving), and `NewMetricsServer(addr, h)` (opt-out-aware: `addr == ""` → `(nil, nil)`, validator failure → `(nil, err)`, otherwise an `*http.Server` with the public listener's four timeouts duplicated literally so either listener can drift independently in a future ticket). Wired into both listener branches in `cmd/pyrycode-relay/main.go`: a goroutine launch mirroring the autocert mode's http-01 listener pattern (`os.Exit(1)` on `ListenAndServe` failure — loud-fail-over-silent because a relay booted with metrics enabled but silently not serving them would mislead operator scrapes), the metrics port joins **both** the `expected` and `actual` sets of #81's allowlist (declared secondary listener — must land on both sides of the asymmetric check). Empty-flag opt-out is structural via `metricsSrv != nil` guards at every reference site — no repeated `if *metricsListen == "" {}` branches. TLS / authn deliberately out of scope (loopback IS the defence); same-host adversary in scope but not a defence target; graceful shutdown deferred to #31. Three tests in `metrics_listen_test.go` — 12-row validator matrix, 3-row constructor matrix with timeouts pinned to literal values (not a shared constant), and an end-to-end happy-path that drives validator + constructor + `net.Listen("tcp", "127.0.0.1:0")` + actual `http.Get` round-trip by exploiting `http.Server.Serve(l)` ignoring `Addr` once a listener is supplied (#60). - [Listener port allowlist (boot-time refusal)](features/listener-port-allowlist.md) — relay refuses to start (exit 2) if the set of TCP ports it is *about to bind* (`http.Server.Addr` values) contains any port outside an explicit expected set derived from parsed flags: `{443, 80}` in autocert mode, `{}` in `--insecure-listen :` mode. Catches stray `net/http/pprof :6060` listeners, env-flipped debug ports, accidentally-enabled metrics exporters. Three exports from `internal/relay/listeners.go`: `ErrUnexpectedListener` sentinel (branchable via `errors.Is`; wrapped message names surplus + expected ports both ascending so the failure log is deterministic across `map`-iteration runs and grep-friendly), `ListenerPort(addr string) (uint16, error)` parsing `":443"` / `"127.0.0.1:8080"` / `"[::1]:443"` with an explicit reject of port 0 (the ephemeral-placeholder trap — would smuggle an unknown bound port past the actual-set construction), and `CheckListenerPorts(expected, actual map[uint16]struct{}) error` (pure set-difference). **Asymmetric by design**: surplus = error, missing = nil (a failure to bind an expected port surfaces as `ListenAndServe`'s own bind error at exit 1; duplicating the signal at boot would clutter logs). **Port-only**: interface binding (`127.0.0.1` vs `0.0.0.0`) intentionally out of scope on a single-instance internet-exposed deploy. **Reports all surplus in one error** so a manifest enabling several debug surfaces fails in one boot rather than N restart cycles. Wired into each listener branch separately in `cmd/pyrycode-relay/main.go` (different `http.Server` shapes; lifting a helper would invent surface area without a second consumer); structured log fields `unexpected_ports` + `expected_ports` recomputed in `main` via unexported `listenerPortLists` so `Check…` stays a single-error return. Paired with `TestBinaryDoesNotImportPprof` in `cmd/pyrycode-relay/deps_test.go` — shells out to `go list -deps -json`, catches the `import _ "net/http/pprof"` handler-registration variant that attaches `/debug/pprof/*` to `http.DefaultServeMux` *without* opening a new port (the runtime check would miss it). Belt-and-suspenders means different fabric: stochastic-ish runtime guard + deterministic compile-time test, neither alone is complete. Exit 2 = config-rejected-at-boot, harmonised across the sentinel family (architect overrode the AC's literal `os.Exit(1)`) (#81). - [Env-var config validator (boot-time refusal)](features/env-config-validator.md) — table-driven validation of every env var the relay reads at boot. Single source of truth is the unexported `envContracts []envContract` registry in `internal/relay/env_config.go`; each row carries `name`, `required` bool, and an inline `validate func(string) error`. `CheckEnvConfig(lookup func(string) (string, bool)) error` walks the registry and returns the structured `*ErrInvalidConfig{Key, Reason}` on the first failure (`Reason` is `"missing"` or `"malformed-value: "`); the package-level sentinel `ErrInvalidConfigSentinel` is matched via a custom `Is` method (not `Unwrap`, which would double-print the message prefix) so `errors.Is(err, ErrInvalidConfigSentinel)` and `errors.As(err, &cfgErr)` form a dual contract. The `func(string) (string, bool)` (= `os.LookupEnv` shape) getter coexists with #77's `func(string) string` getter — the presence bit is necessary here to distinguish "missing-but-required" from "present-but-empty", semantically inert for `IsProductionMode`'s exact-`"1"` match. **Ordering is load-bearing**: wired in `main.go` BEFORE `CheckInsecureListenInProduction` so a typo like `PYRYCODE_RELAY_PRODUCTION=true` cannot slip through `IsProductionMode`'s silent-non-production fallback and reach the insecure-listen guard with an unvalidated value. Today's registry has one row (`PYRYCODE_RELAY_PRODUCTION`, optional-but-format-validated); future env-var reads register here at code-review time. `checkEnvConfigWith(lookup, contracts)` is the parameterised inner used by the `required: true` test case (today's production table has no required entries). Exit 2 = config-rejected-at-boot, matching the sibling refusals (#9, #77, #79) (#80). diff --git a/docs/knowledge/codebase/47.md b/docs/knowledge/codebase/47.md new file mode 100644 index 0000000..29fbf48 --- /dev/null +++ b/docs/knowledge/codebase/47.md @@ -0,0 +1,69 @@ +# Ticket #47 — per-IP rate-limit middleware on `/v1/server` + `/v1/client` + +Wires the primitives from #50 (`IPRateLimiter`) and #51 (`ClientIP`) into the upgrade pipeline as middleware. Excess attempts from a single source IP are rejected with HTTP `429 Too Many Requests` **before** `websocket.Accept` runs, closing the third leg of `docs/threat-model.md` § *DoS resistance* (connection floods, fork-bomb retry) alongside #29's per-frame size cap and #30's max-phones cap. + +## Implementation + +- **`internal/relay/ratelimit_middleware.go`** — new exported `NewRateLimitMiddleware(limiter *IPRateLimiter, logger *slog.Logger, trustForwardedFor bool) func(http.Handler) http.Handler`. The returned `func(http.Handler) http.Handler` is the middleware factory. Per request: extract `ip := ClientIP(r, trustForwardedFor)`, then deny on `ip == ""` (empty-IP guard runs **before** `Allow`, per the limiter's "`Allow("")` is a normal map key" contract), then deny on `!limiter.Allow(ip)`, otherwise `next.ServeHTTP`. Both deny paths emit the same one-line log and write `http.Error(w, "", http.StatusTooManyRequests)`. +- **`internal/relay/ratelimit_middleware_test.go`** — 9 tests covering: burst-then-deny + sentinel-counter "handler not invoked on deny"; per-IP isolation; `trustForwardedFor=true` bucketing by forged XFF; `trustForwardedFor=false` ignoring XFF; empty-IP denies without consulting the limiter (asserted via `len(limiter.buckets) == 0`); deny emits exactly one `level=WARN msg=rate_limited remote="\"…\""` line with no other fields; empty-IP deny renders `remote="\"\""`; allow path emits no log line; registry left pristine on deny when wrapped over a real `ServerHandler`. +- **`cmd/pyrycode-relay/main.go`** — new `--trust-x-forwarded-for` boolean flag (default `false`, Usage string carries the explicit spoofing warning). New policy constants `rateLimitRefillEvery = 6 * time.Second`, `rateLimitBurst = 20`, `rateLimitEvictionInterval = 5 * time.Minute` at the wiring site. `relay.NewIPRateLimiter` is constructed once and shared across both endpoints; `defer limiter.Close()` placed before the listeners block. `mux.Handle("/v1/server", …)` and `mux.Handle("/v1/client", …)` are wrapped via the middleware; `/healthz` is intentionally NOT wrapped — it must remain pollable from monitoring without throttling. +- **`docs/threat-model.md`** § *DoS resistance* — v1 mitigation updated to reference the wiring; the "token-bucket rate limit on /v1/server and /v1/client" line removed from Future hardening (now delivered). Residual-risk paragraph names what the attempt-rate cap does NOT cover (concurrent-connection count, multi-instance shared state, CIDR-aware trusted-proxy chain). +- **No new dependencies.** Stdlib + `internal/relay` only. + +## Design choices + +### Why middleware, not constructor injection + +`ServerHandler` and `ClientHandler` know nothing about admission control; the limiter knows nothing about HTTP. Composition at the root keeps each layer single-purpose. `/healthz`'s exemption falls out for free — it simply isn't wrapped. This mirrors the existing `EnforceHost(*domain, mux)` middleware shape in `main.go`. + +### Empty-IP-guard placement (the load-bearing call-ordering choice) + +`ClientIP` returns `""` when no usable source is available (unparseable `RemoteAddr` and either XFF disabled or yielding nothing). The limiter's `Allow("")` is a normal map key — not a deny — so the empty-string check MUST live in the middleware. Loud-failure principle from `docs/PROJECT-MEMORY.md` § *Project-level conventions*: refuse to admit un-attributable traffic rather than silently routing it. The test `TestRateLimitMiddleware_EmptyIPDeniesWithoutTouchingLimiter` asserts both the 429 status and `len(limiter.buckets) == 0` to pin the call-ordering structurally. + +### 429 response shape + +`http.Error(w, "", http.StatusTooManyRequests)` — empty body, matching the existing `http.Error(w, "", http.StatusBadRequest)` shape at `internal/relay/server_endpoint.go:44` and `client_endpoint.go:42`. The empty message renders as `"\n"` with `Content-Type: text/plain; charset=utf-8` and `X-Content-Type-Options: nosniff`. No internal state leaked (`docs/threat-model.md` § *Error response leakage*). + +### Deny log shape + +`logger.Warn("rate_limited", "remote", strconv.Quote(ip))` — exactly one line per deny. + +- **Level `Warn`.** Operational signal for ops dashboards; not noisy enough for `Info`, not severe enough for `Error`. "This attempt was rejected but the relay is healthy." +- **Message `"rate_limited"`** — event-type-as-msg, matching `"server_claimed"`, `"server_id_conflict"`, etc. +- **Single key `"remote"`** — already in `allowedLogKeys` (`internal/relay/log_allowlist.go`); no allowlist edit needed. +- **`strconv.Quote(ip)`** — per `client_ip.go`'s log-injection note. The text handler escapes the inner quotes, so a denied `1.2.3.4` renders as `remote="\"1.2.3.4\""` and an empty-IP deny renders as `remote="\"\""` — operators can disambiguate empty-IP from limiter-deny by the value. +- **Mobile/binary token never read** by this middleware — `X-Pyrycode-Token` is not in the code path. Structural: no `r.Header.Get("X-Pyrycode-Token")` anywhere in `ratelimit_middleware.go`. + +The `TestLogKeysAreAllowlisted` AST walker enforces the literal-string-key requirement; both keys are literals. + +### Policy values + +`refillEvery=6s` × `burst=20` = ~10 sustained attempts per IP per minute with 20-attempt burst headroom for legitimate retry storms. `evictionInterval=5min` keeps the bucket map's resident size bounded under address-space scanning (`burst*refillEvery = 120s` is the eviction threshold; sweep every 5 min is well above it). All three positive — `NewIPRateLimiter` panics via `time.NewTicker` on a zero/negative `evictionInterval`, which is the right loud failure for a wiring bug. + +### Shared limiter across endpoints + +One `*IPRateLimiter`, one `rateLimit` middleware, applied to both `mux.Handle` registrations. A misbehaving IP retrying against either endpoint shares one bucket — splitting per endpoint would just halve the attacker's per-IP cost. Concurrency safety is fully delegated to the limiter (see #50's race-tested model). + +### `defer limiter.Close()` is best-effort + +The current `main.go` listener block calls `os.Exit` on error, which skips defers. A real graceful-shutdown path (signal handler + `Server.Shutdown`) is out of scope for #47 — explicitly listed in the issue's *Out of Scope*. The defer is in place for any future code path that returns normally (e.g. test entry points, future shutdown). + +## Trust model + +The `--trust-x-forwarded-for` flag is the entire trust-model surface. Default `false` is the safe stance: direct-internet-facing deployments ignore the header; an attacker setting `X-Forwarded-For: 127.0.0.1` keys against their real `RemoteAddr`. Operators fronting the relay with a trusted reverse proxy that sets XFF correctly opt in. The flag's Usage string is the operator-facing trust contract; if it is enabled without a proxy, every request is keyed against whatever string the client supplies in the header — that is the operator's deployment bug, not a relay defect. Test `TestRateLimitMiddleware_TrustXFF_BucketsByForgedIP` exists explicitly to document this behaviour. + +## Out of scope + +- Connection-count cap (per-IP and global) — different threat surface (this ticket addresses *attempt rate*, not *resident connection count*). Stays in `docs/threat-model.md` § *Future hardening*. +- CIDR-aware trusted-proxy chain for XFF — current bool is flat all-or-nothing (#51's design choice). +- Multi-instance shared-state rate limiting — v1 is single-instance; would need Redis or equivalent. +- Rate-limit metrics counter — a future ticket, parallel to #58's frame-forward / grace-expiry counters. +- Adaptive (load-aware) policy — fixed token-bucket is sufficient for v1. +- Graceful shutdown signal handler — pre-existing gap, explicitly deferred per AC; opens as a follow-up when WS lifecycle work justifies it. + +## Cross-links + +- [Spec: 47-wire-rate-limit-middleware](../../specs/architecture/47-wire-rate-limit-middleware.md) — architect's design and security review (verdict: PASS, no findings). +- [Codebase: #50 IP rate-limiter primitive](50.md) — the limiter consumed here. +- [Codebase: #51 client-IP extraction helper](51.md) — the IP source consumed here. +- [Threat model § DoS resistance](../../threat-model.md) — the threat-model entry this ticket converts from future hardening to v1 mitigation. diff --git a/docs/knowledge/features/rate-limit-middleware.md b/docs/knowledge/features/rate-limit-middleware.md new file mode 100644 index 0000000..60c3896 --- /dev/null +++ b/docs/knowledge/features/rate-limit-middleware.md @@ -0,0 +1,113 @@ +# Per-IP rate-limit middleware (`/v1/server` + `/v1/client`) + +HTTP middleware that throttles WebSocket upgrade attempts per source IP. Excess attempts from a single IP receive `429 Too Many Requests` **before** `websocket.Accept` runs, before the registry is touched, before the mobile/binary token header is read. Combined with the per-frame size cap (#29) and the per-server-id phone cap (#30), this closes the third leg of `docs/threat-model.md` § *DoS resistance* (connection floods, fork-bomb retry). + +`/healthz` is intentionally NOT wrapped — it must remain pollable from monitoring without throttling. + +## API + +Package `internal/relay`: + +```go +func NewRateLimitMiddleware( + limiter *IPRateLimiter, + logger *slog.Logger, + trustForwardedFor bool, +) func(http.Handler) http.Handler +``` + +The factory returns the conventional `func(http.Handler) http.Handler` composition shape (matches `EnforceHost` in `cmd/pyrycode-relay/main.go`). + +## Behaviour + +Per request: + +1. Extract the source IP via `relay.ClientIP(r, trustForwardedFor)` (#51). +2. If the result is empty, deny: write `429`, emit the deny log line, return. **`limiter.Allow` is never called on the empty key** — refusing un-attributable traffic rather than admitting it un-throttled (the loud-failure rule). +3. Otherwise call `limiter.Allow(ip)`. On `false`, deny the same way. +4. On `true`, `next.ServeHTTP(w, r)`. + +### 429 response shape + +`http.Error(w, "", http.StatusTooManyRequests)` — empty body, matching the existing `http.Error(w, "", http.StatusBadRequest)` shape in `server_endpoint.go` / `client_endpoint.go`. `Content-Type: text/plain; charset=utf-8`, `X-Content-Type-Options: nosniff`, body `"\n"`. No internal state leaked. + +### Deny log shape + +```go +logger.Warn("rate_limited", "remote", strconv.Quote(ip)) +``` + +Exactly one line per deny, same shape on both deny branches. + +- Level `Warn`: operational signal, not noisy enough for `Info`, not severe enough for `Error`. +- Message `"rate_limited"`: event-type-as-msg convention (`"server_claimed"`, `"server_id_conflict"`, …). +- Single key `"remote"`: already in `allowedLogKeys` (`internal/relay/log_allowlist.go`); no allowlist edit. +- Value `strconv.Quote(ip)`: defends against control-byte log injection per `client_ip.go`'s contract. The text handler escapes inner quotes, so a denied `1.2.3.4` renders as `remote="\"1.2.3.4\""`; an empty-IP deny renders as `remote="\"\""` — operators can disambiguate the two deny causes by the value. +- Mobile/binary `X-Pyrycode-Token` is never read on this path. Structural: no `r.Header.Get("X-Pyrycode-Token")` anywhere in the file. + +The `TestLogKeysAreAllowlisted` AST walker enforces the literal-string-key requirement; both keys are literals. + +## Trust model — `--trust-x-forwarded-for` + +A boolean CLI flag controls whether `X-Forwarded-For` is honoured. Default `false`. The flag's `Usage` carries the operator-facing warning that enabling it without a trusted reverse proxy in front allows clients to spoof their source IP and bypass per-IP rate limits. + +- `--trust-x-forwarded-for=false` (default): the bucket key is the host portion of `r.RemoteAddr`. An attacker setting `X-Forwarded-For: 127.0.0.1` keys against their real `RemoteAddr`. +- `--trust-x-forwarded-for=true`: the bucket key is the left-most `X-Forwarded-For` entry, falling back to `RemoteAddr`. Trust is binary (all-or-nothing); CIDR-aware proxy chains are out of scope. + +The flag's value is captured at process start and threaded once into `NewRateLimitMiddleware`. No per-request mode-flipping. + +## Wiring + +`cmd/pyrycode-relay/main.go`: + +```go +const ( + rateLimitRefillEvery = 6 * time.Second + rateLimitBurst = 20 + rateLimitEvictionInterval = 5 * time.Minute +) +limiter := relay.NewIPRateLimiter(rateLimitRefillEvery, rateLimitBurst, rateLimitEvictionInterval) +defer limiter.Close() +rateLimit := relay.NewRateLimitMiddleware(limiter, logger, *trustXFF) + +mux.Handle("/healthz", relay.NewHealthzHandler(reg, Version, startedAt)) +mux.Handle("/v1/server", rateLimit(relay.ServerHandler(reg, logger, 30*time.Second, maxFrameBytes))) +mux.Handle("/v1/client", rateLimit(relay.ClientHandler(reg, logger, maxFrameBytes, 16))) +``` + +Policy values live at the wiring site, matching the existing `maxFrameBytes` pattern. Derivation: `refillEvery=6s` × `burst=20` ≈ 10 sustained attempts/IP/minute with 20-attempt burst headroom for legitimate retry storms; `evictionInterval=5min` is well above `burst*refillEvery = 120s` (the bucket's at-capacity threshold), keeping the bucket map's resident size bounded under address-space scanning. + +All three values must be positive — `NewIPRateLimiter` panics on a zero/negative `evictionInterval` via `time.NewTicker`, the right loud failure for a wiring bug. + +### Shared limiter across endpoints + +One `*IPRateLimiter`, one middleware, applied to both `mux.Handle` registrations. A misbehaving IP retrying against either endpoint shares one bucket — splitting per endpoint would just halve the attacker's per-IP cost. + +### `defer limiter.Close()` is best-effort + +The current listener block calls `os.Exit` on bind/serve errors, which skips defers. A real graceful-shutdown path (signal handler + `Server.Shutdown`) is out of scope; the defer runs on clean returns and any future shutdown sequence picks it up for free. + +## Why middleware, not constructor injection + +`ServerHandler` and `ClientHandler` know nothing about admission control; the limiter knows nothing about HTTP. Composition at the root keeps each layer single-purpose, makes `/healthz`'s exemption fall out for free (just don't wrap it), and matches the existing `EnforceHost(*domain, mux)` shape. + +## Concurrency + +The middleware is stateless aside from the shared `*IPRateLimiter`. Concurrency safety is fully delegated to the limiter (race-tested under `-race` per #50's model). No new locks, no new goroutines. + +## Out of scope + +- **Connection-count cap** (per-IP and global): different threat surface — this ticket caps *attempt rate*, not *resident connection count*. Stays in `docs/threat-model.md` § *Future hardening*. +- **CIDR-aware trusted-proxy chain**: today's flag is flat all-or-nothing (#51's design choice). +- **Multi-instance shared-state rate limiting**: v1 is single-instance; multi-instance would need Redis or equivalent. +- **Rate-limit metrics counter**: a future ticket, parallel to #58's frame-forward / grace-expiry counters. +- **Adaptive (load-aware) policy**: fixed token-bucket is sufficient for v1. +- **Graceful-shutdown signal handler**: pre-existing gap, deferred until WS lifecycle work justifies it. + +## Related + +- [Codebase: #47 wiring](../codebase/47.md) — implementation notes for this ticket. +- [Codebase: #50 IP rate-limiter primitive](../codebase/50.md) — the limiter consumed here. +- [Codebase: #51 client-IP extraction helper](../codebase/51.md) — the IP source consumed here. +- [Threat model § DoS resistance](../../threat-model.md) — the threat-model entry this ticket converts from future hardening to v1 mitigation. +- [Log-key allowlist](../../../internal/relay/log_allowlist.go) — the closed set the deny log line conforms to. diff --git a/docs/specs/architecture/47-wire-rate-limit-middleware.md b/docs/specs/architecture/47-wire-rate-limit-middleware.md new file mode 100644 index 0000000..551c500 --- /dev/null +++ b/docs/specs/architecture/47-wire-rate-limit-middleware.md @@ -0,0 +1,268 @@ +# Spec: wire per-IP rate-limit middleware on /v1/server + /v1/client (#47) + +## Files to read first + +- `cmd/pyrycode-relay/main.go:122-129` — the existing wiring shape: `const maxFrameBytes` policy value at the call site, `mux.Handle(...)` registration of `relay.ServerHandler`/`relay.ClientHandler`. The new middleware composes around these two handler results (NOT `/healthz`). The `defer limiter.Close()` lands before the listeners block. +- `internal/relay/ratelimit.go:50-115` — `NewIPRateLimiter`, `Allow(ip string) bool`, `Close()` contracts. Note: `Allow("")` is treated as any other map key (not a deny) — the empty-string guard MUST live in the middleware. +- `internal/relay/client_ip.go:1-48` — `ClientIP(r, trustForwardedFor)`; returns empty string when no usable source is available. Doc comment explicitly says callers logging the string must `strconv.Quote` it. +- `internal/relay/server_endpoint.go:38-75` — the handler the middleware wraps. Note the existing `logger.Info("server_id_conflict", "server_id", ..., "remote", remoteHost(r))` shape — same key set the new log line uses, same message-string-as-event-type convention. +- `internal/relay/client_endpoint.go:37-...` — the other handler the middleware wraps. Same shape. +- `internal/relay/log_allowlist.go:9-17` — the closed set of permitted log keys. `"remote"` is already present; **no allowlist edit needed** for this ticket. +- `internal/relay/log_keys_test.go:59-180` — the AST walker that gates `slog.Info|Warn|Error|Debug` keys. The middleware's `Warn` call must use string-literal keys only (no `slog.String(...)`, no `With`/`LogAttrs`/`Log`). +- `docs/specs/architecture/50-ip-rate-limiter.md` — the limiter primitive spec, especially § *Concurrency model* and § *Parameter contracts (not enforced at runtime)* — the wiring ticket picks the policy values; the limiter doesn't validate them. +- `docs/specs/architecture/51-client-ip-helper.md` (if present in repo; otherwise infer from `internal/relay/client_ip.go`) — the extraction primitive spec; the `trustForwardedFor` semantics the new CLI flag controls. +- `docs/threat-model.md` § *DoS resistance — connection floods, slow-loris, fork-bomb retry* and § *Log hygiene* and § *Error response leakage* — the three threat-model entries this ticket exercises. The DoS entry's "future hardening" line is what this ticket converts to live mitigation; the wiring ticket updates that entry (architect's edit in this commit's doc-update list). +- `docs/PROJECT-MEMORY.md` § *Project-level conventions* — "Loud failure over silent correction" (empty-IP denies rather than admitting un-throttled) and "Credentials the relay does not validate are presence-checked then discarded" (the mobile/binary token is NOT read by the middleware). +- `go.mod` — confirms stdlib + `internal/relay`. No new dep introduced. + +## Context + +`docs/threat-model.md` § *DoS resistance* lists "a token-bucket rate limit on /v1/server and /v1/client" as the future-hardening item. #50 landed the limiter primitive; #51 landed the IP-extraction helper. This ticket wires both into the upgrade pipeline as **middleware**, so excess attempts are rejected before `websocket.Accept` runs. Combined with the per-frame size cap (#29) and the per-server-id phone cap (#30), this closes the third leg the threat-model names. + +The composition seam is middleware, not constructor arguments. The handler functions know nothing about rate limiting; the limiter knows nothing about HTTP. The wiring at the composition root is the only place both meet — matching the "Policy values live at the wiring site" rule (`docs/PROJECT-MEMORY.md`). + +## Design + +### File layout + +- **edit** `cmd/pyrycode-relay/main.go` — add CLI flag, policy constants, limiter construction, `defer limiter.Close()`, middleware composition at the two `mux.Handle` call sites. +- **new** `internal/relay/ratelimit_middleware.go` — middleware factory + the empty-IP guard + the 429 + the deny-log line. +- **new** `internal/relay/ratelimit_middleware_test.go` — burst/refill/per-IP-isolation through the middleware, the `--trust-x-forwarded-for=true` behaviour, the empty-IP-deny path, the "registry never touched" and "inner handler never called on deny" assertions, the log-line shape. +- **edit** `docs/threat-model.md` § *DoS resistance* — v1 mitigation updated to reference this wiring; "Residual risk" + "Future hardening" trimmed of the items this ticket delivers. (This is the doc update the wiring ticket owes per #50's spec.) + +Total production code: ~70 lines. No new exported types from `internal/relay` beyond the middleware factory. Below all six S red lines. + +### Middleware factory (the only new exported symbol) + +```go +// NewRateLimitMiddleware returns an http.Handler-wrapping middleware that +// rejects requests whose source IP has exhausted its token bucket. On a +// denied request, the middleware writes 429 Too Many Requests, emits one +// slog Warn line ("rate_limited", "remote", strconv.Quote(host)), and +// does NOT invoke the wrapped handler. Empty-IP requests are denied the +// same way (Loud failure: refusing to admit un-attributable traffic +// without throttling). +// +// trustForwardedFor is threaded into relay.ClientIP. The caller MUST NOT +// set this true unless a trusted reverse proxy fronts the relay — see the +// --trust-x-forwarded-for CLI flag's Usage string in cmd/pyrycode-relay. +func NewRateLimitMiddleware(limiter *IPRateLimiter, logger *slog.Logger, trustForwardedFor bool) func(http.Handler) http.Handler +``` + +Behaviour summary (one paragraph in lieu of a function body): + +The returned middleware extracts `ip := ClientIP(r, trustForwardedFor)`. If `ip == ""` it writes 429, emits the deny log line, and returns. Otherwise it calls `limiter.Allow(ip)`; on `false` it writes 429, emits the deny log line, and returns. Only on `true` does it call `next.ServeHTTP(w, r)`. The handler invocation order is the test contract — verified by a sentinel handler that fails the test if invoked on a deny path. + +### 429 response shape + +Match the existing `http.Error(w, "", http.StatusBadRequest)` shape used at `internal/relay/server_endpoint.go:44` and `client_endpoint.go:42`: + +```go +http.Error(w, "", http.StatusTooManyRequests) +``` + +Empty message → body is just `"\n"` with `Content-Type: text/plain; charset=utf-8` and `X-Content-Type-Options: nosniff`. No internal state leaked (`docs/threat-model.md` § *Error response leakage*). `websocket.Accept` is structurally unreachable because we never call `next.ServeHTTP`. + +### Log line shape + +Exactly one line per denied attempt, regardless of whether the cause is empty-IP or limiter-deny: + +```go +logger.Warn("rate_limited", "remote", strconv.Quote(ip)) +``` + +- **Level: `Warn`.** Operational signal for ops dashboards; not noisy enough to need `Info`, not severe enough for `Error`. Matches "this attempt was rejected but the relay is healthy." +- **Message string `"rate_limited"`** — the event type, per the codebase convention (e.g. `"server_claimed"`, `"server_id_conflict"`). +- **Single key `"remote"`** — already in `allowedLogKeys`; no edit to `log_allowlist.go` needed. +- **Value `strconv.Quote(ip)`** — per `internal/relay/client_ip.go`'s log-injection note. For empty-IP this renders `""` (an explicit empty quoted string, distinguishable from a missing field). +- **Mobile/binary token never read.** The middleware short-circuits before `next.ServeHTTP`, so `X-Pyrycode-Token` is not touched by this code path. Stated as a non-AC reminder, enforced structurally by not having a `r.Header.Get("X-Pyrycode-Token")` anywhere in `ratelimit_middleware.go`. + +The `log_keys_test.go` AST walker enforces (a) literal-string keys and (b) no `With`/`LogAttrs`/`Log` shapes. Both satisfied. + +### CLI flag + +Add to the `flag.X(...)` block in `cmd/pyrycode-relay/main.go`: + +```go +trustXFF = flag.Bool("trust-x-forwarded-for", false, + "Trust the X-Forwarded-For header for per-IP rate limiting. "+ + "WARNING: enabling this without a trusted reverse proxy in front of "+ + "the relay allows clients to spoof their source IP and bypass "+ + "per-IP rate limits.") +``` + +Default `false`. Threaded as the third argument to `relay.NewRateLimitMiddleware`. The warning in Usage is the operator-facing trust contract per AC. + +### Wiring at the composition root + +In `main.go`, after the existing `const maxFrameBytes` and before `mux := http.NewServeMux()`: + +```go +// Per-IP rate-limit policy: ~10 attempts/IP/minute steady-state, burst 20. +// Derivation: docs/threat-model.md § DoS resistance future-hardening line +// names ~10/min/IP and burst headroom for retry storms; eviction interval +// 5 min keeps the bucket map's resident size bounded under address-space +// scanning (see docs/specs/architecture/50-ip-rate-limiter.md § Adversarial +// walk #1). All three values are positive — NewIPRateLimiter panics on a +// zero/negative evictionInterval via time.NewTicker. +const ( + rateLimitRefillEvery = 6 * time.Second + rateLimitBurst = 20 + rateLimitEvictionInterval = 5 * time.Minute +) + +limiter := relay.NewIPRateLimiter(rateLimitRefillEvery, rateLimitBurst, rateLimitEvictionInterval) +defer limiter.Close() + +rateLimit := relay.NewRateLimitMiddleware(limiter, logger, *trustXFF) + +mux := http.NewServeMux() +mux.Handle("/healthz", relay.NewHealthzHandler(reg, Version, startedAt)) +mux.Handle("/v1/server", rateLimit(relay.ServerHandler(reg, logger, 30*time.Second, maxFrameBytes))) +mux.Handle("/v1/client", rateLimit(relay.ClientHandler(reg, logger, maxFrameBytes, 16))) +``` + +`/healthz` is NOT wrapped — per AC, and because healthz must remain pollable from anywhere (load-balancers, monitoring) without throttling. + +`defer limiter.Close()` placement is "before the listeners block" per AC. As the ticket body acknowledges, `main.go` currently `os.Exit`s on listener error, which does not run defers — this is best-effort only. A real graceful-shutdown path is out of scope. + +### Why middleware, not constructor injection + +Pushing the limiter into `ServerHandler`/`ClientHandler` constructors would: + +- Couple two unrelated concerns (frame routing + admission control) at the type level. +- Force the limiter and its policy values to be known one layer deeper than necessary. +- Make `/healthz`'s exemption awkward (the constructor would need an `enabled bool` or a nil-limiter check). + +Middleware composition keeps each layer single-purpose. The composition root in `main.go` is the only place that knows both. Matches the `EnforceHost(*domain, mux)` pattern already in `main.go:191` — host enforcement is also middleware-shape, not handler-internal. + +### Concurrency model + +The middleware is stateless aside from the shared `*IPRateLimiter`. Concurrency safety is fully delegated to the limiter (`internal/relay/ratelimit.go` § *Concurrency model*). No new locks, no new goroutines. + +The `defer limiter.Close()` synchronises with the limiter's eviction goroutine via the limiter's internal WaitGroup; the middleware itself owns no goroutine to shut down. + +### Error handling + +The middleware has two terminal branches (empty-IP deny, limiter deny) and one pass-through. None return errors — `http.Error` writes the response directly. The pass-through is just `next.ServeHTTP(w, r)`; any error from the inner handler is the inner handler's responsibility. + +`ClientIP` and `Allow` are both infallible by design. There is no third failure mode to plumb. + +### Testing strategy + +All tests live in `internal/relay/ratelimit_middleware_test.go`, `package relay` (matching the convention). + +Test infrastructure (one helper, reusing the limiter's fake clock): + +- A `sentinelHandler` that increments a counter when called. The middleware test asserts the counter for both pass and deny paths. +- A `bytes.Buffer`-backed `slog.NewTextHandler` for capturing log lines. The middleware factory accepts a `*slog.Logger` so the test wires its own. +- Use `httptest.NewRecorder()` + `http.NewRequest`; set `req.RemoteAddr` directly (e.g. `"1.2.3.4:5555"`) and `req.Header.Set("X-Forwarded-For", "9.9.9.9")` per test. + +Test cases (bullet-pointed scenarios; developer writes the test bodies in the project's idiom): + +1. **Burst, then deny.** With burst=2, `refillEvery=time.Hour` (effectively no refill during the test), three POSTs to a wrapped sentinel from `RemoteAddr="1.1.1.1:1234"`: first two reach the sentinel and return its status (200); third returns 429 and the sentinel is NOT called (counter still at 2). Asserts the AC "denied attempts return 429 *before* websocket.Accept runs." + +2. **Per-IP isolation.** Same limiter, burst=1. Request from `"1.1.1.1:1"` → 200, sentinel called. Second request from `"1.1.1.1:1"` → 429, sentinel not re-called. Third request from `"2.2.2.2:1"` → 200, sentinel called. Asserts the AC's "request from a second IP is unaffected." + +3. **Trust XFF on.** `trustForwardedFor=true`, burst=1. Two requests with `RemoteAddr="1.1.1.1:1"` but `X-Forwarded-For="9.9.9.9"`: first → 200, second → 429. A third request with same RemoteAddr but `X-Forwarded-For="8.8.8.8"` → 200. Asserts the AC's "with the flag, a forged XFF counts against the forged IP's bucket — documents the trust model." + +4. **Trust XFF off (default).** `trustForwardedFor=false`. Two requests with `RemoteAddr="1.1.1.1:1"` and `X-Forwarded-For="9.9.9.9"` (set but ignored): first → 200, second → 429 (keyed to `1.1.1.1`, not `9.9.9.9`). Sanity-check that the default ignores the header. + +5. **Empty IP denies.** Craft a request whose `RemoteAddr` is unparseable (e.g. `RemoteAddr=""` — `net.SplitHostPort` returns an error; `ClientIP` returns `""`). With `trustForwardedFor=false`, expect 429 and the sentinel NOT called, **without** calling `limiter.Allow`. Verification of "limiter.Allow not called" can be done by using a fresh limiter for this test and asserting the bucket map is still empty after the request via `len(limiter.buckets)` under the limiter's lock (package-internal access — same pattern as `ratelimit_test.go`). Asserts the AC's "when ClientIP returns empty, deny before calling Allow." + +6. **Deny emits exactly one log line with the right shape.** A buffer-backed logger; one denied request; assert (a) exactly one line written, (b) line contains `level=WARN`, (c) line contains `msg=rate_limited`, (d) line contains `remote="\"1.2.3.4\""` (the strconv.Quote-wrapped value — slog text handler will escape the inner quotes). Repeat with empty-RemoteAddr request and assert `remote="\"\""` (empty quoted string). Asserts the AC's log-shape constraints. No other keys present. + +7. **Allowed request emits no log line.** Pass-through case; assert the buffer is empty. + +8. **Registry never touched on deny.** Wire a real `relay.NewRegistry()` into a real `ServerHandler` (the wrapped handler under test), make a denied request, assert `len(reg.AllPhones())` (or equivalent registry-state inspection) is unchanged. This pins the "registry is never touched" AC clause structurally. The simpler form is the sentinel-counter assertion (test #1); this test adds the registry-state assertion as a regression guard. + +No new test infrastructure beyond `bytes.Buffer` + `httptest`. The limiter's own fake clock is not needed: `refillEvery=time.Hour` makes all tests effectively rate-time-independent within a sub-second test duration. + +`make vet`, `make test -race`, and `make build` must all pass per project convention. + +### Doc updates the developer must make + +1. **`docs/threat-model.md` § *DoS resistance*** — update v1 mitigation to reference the wiring (file:line anchor at `cmd/pyrycode-relay/main.go`'s new const block + middleware composition). Remove the "A token-bucket rate limit on /v1/server and /v1/client" line from § *Future hardening* — it's now v1. The connection-count cap (per-IP and global, separate from rate limiting) stays in *Future hardening*. + +2. **`docs/knowledge/codebase/47.md`** — new file per the 2026-05-10 convention. Implementation summary (files touched, public API of the middleware factory, the empty-IP guard rationale, the 429 shape, the deny-log shape, the policy-defaults derivation). Include a "Patterns established" bullet if any (likely none new — this ticket consumes existing patterns). + +3. **`docs/knowledge/INDEX.md`** — documentation phase appends. Architect does NOT edit (per the agent role's "Never Update" list). + +4. **No `docs/PROJECT-MEMORY.md` edit** — human-maintained. + +### Open questions + +None. The middleware shape is fully constrained by AC; the log-level choice (Warn over Info) is documented inline; the policy defaults are inside the AC-suggested ranges with derivation traced to the threat-model + the limiter spec. + +--- + +## Security review + +**Verdict:** PASS + +### Trust boundaries this spec touches + +| Boundary | Trust posture | Where enforced | +|---|---|---| +| `r.RemoteAddr` — TCP-layer source address | Trusted as far as TCP is honest. Can still be the empty/unparseable case (loopback unix-socket binds, or a misconfigured proxy). | `ClientIP` returns `""` on unparseable; middleware denies on empty before `Allow`. | +| `X-Forwarded-For` request header — attacker-controlled by default | Untrusted unless `--trust-x-forwarded-for` is set. CLI flag defaults `false`. Flag's Usage string carries the explicit "operator owns the trust" warning. | `relay.ClientIP(r, trustForwardedFor)`; flag default in `main.go`. | +| `IPRateLimiter` state — shared across `/v1/server` and `/v1/client` | Trusted (in-process). Sharing one limiter across both endpoints is deliberate: a single attacker IP retrying against either endpoint should share one bucket, not two. | One `*IPRateLimiter` constructed once in `main`, passed to one `rateLimit` middleware, applied to both `mux.Handle` registrations. | +| Mobile/binary `X-Pyrycode-Token` header | Untrusted, validated by the binary. The relay never reads it; the rate-limit middleware doubly never reads it (runs before any handler). | Structurally: no `r.Header.Get("X-Pyrycode-Token")` call in `ratelimit_middleware.go`. | + +### Adversarial walk + +**1. Trust boundaries.** The two boundaries (`RemoteAddr`, `XFF`) are crossed exactly once each, in `ClientIP`. The middleware never re-parses, never sees the raw header. The trust mode (XFF on/off) is set at process start and never changes — no per-request mode-flipping, no header self-asserting "trust me." The flag's default-false posture is the loud-failure choice (`docs/PROJECT-MEMORY.md` § *Loud failure over silent correction*): the operator must opt in to the spoofable mode. **No findings.** + +**2. Tokens, secrets, credentials.** The mobile/binary token is not read by this middleware. The 429 response body is empty (no leak). The deny log line carries only `remote` (allowlisted) — not the token, not the User-Agent, not the server-id, not any header. The threat-model § Log hygiene rule is satisfied by the closed-set allowlist already in place (`log_allowlist.go:9-17`) + the AST walker (`log_keys_test.go`). **No findings.** + +**3. File operations.** None. Middleware is pure-memory. **N/A.** + +**4. Subprocess / external command execution.** None. **N/A.** + +**5. Cryptographic primitives.** None. **N/A.** + +**6. Network & I/O.** +- *Input size limits.* The middleware runs before `websocket.Accept`. Per-frame caps (`#29`) and the http.Server header timeouts (`ReadHeaderTimeout: 10s`, `ReadTimeout: 60s`) bound everything upstream of this code path. No new I/O the middleware itself performs. +- *Header validation.* `XFF` parsing is in `ClientIP`'s scope (#51). The middleware treats the result as an opaque string for map-keying. +- *Resource exhaustion.* The limiter's eviction goroutine + the bounded bucket map (proof in #50's spec) cap the limiter's memory. The middleware adds no other state. +- *Connection-count cap (separate from rate-limit).* Out of scope for this ticket — explicitly listed in the issue's *Out of Scope* and still in `docs/threat-model.md` § *Future hardening* after the doc update. The per-IP rate limit does not subsume the per-IP / global connection-count caps. +- **No findings.** + +**7. Error messages, logs, telemetry.** +- *429 body.* Empty (`http.Error(w, "", 429)`). No internal state leaked (`docs/threat-model.md` § *Error response leakage*). +- *Deny log line.* Exactly one line per deny, with one allowlisted key + a `strconv.Quote`-wrapped value (defends against control-byte log injection per `client_ip.go`'s comment). Same shape regardless of empty-IP vs limiter-deny — operator can disambiguate by the `remote` value (`""` vs a real IP). +- *Allow path.* No log line. Allowed traffic flows silently; the wrapped handler may log normally. +- *No telemetry yet.* A future metrics ticket could add a "rate_limited_total" counter (cf. #58's metric pattern). Listed as **OUT OF SCOPE — separate metrics ticket**, not a finding. +- **No findings.** + +**8. Concurrency.** +- *Limiter shared across both endpoints.* Single `*IPRateLimiter` is goroutine-safe per #50's race-tested concurrency model. +- *Middleware itself stateless.* No new locks, no shared mutable state added by this layer. +- *Shutdown safety.* `defer limiter.Close()` in `main` is best-effort (current `os.Exit`-on-error path skips defers). On clean shutdown via Ctrl-C in dev, the defer runs; the limiter's `Close()` is synchronous (`wg.Wait()`) and the eviction goroutine exits cleanly. The current `main.go` has no signal-handler wired (not introduced by this ticket); a real graceful-shutdown path is out of scope per AC. **SHOULD FIX → deferred — listed in *Out of Scope*; opening a follow-up issue for "graceful shutdown + signal handling" is appropriate when WS support lands.** +- **No findings (the shutdown gap is pre-existing and explicitly deferred).** + +**9. Threat model alignment.** +- *`docs/threat-model.md` § DoS resistance.* This ticket converts the "future hardening: token-bucket rate limit on /v1/server and /v1/client" line to a v1 mitigation. The doc update is part of this ticket's deliverables. +- *§ Log hygiene.* Conformed via the existing allowlist + walker. +- *§ Error response leakage.* Empty 429 body honours the "public-facing handlers return generic messages" rule. +- *Protocol spec § Security model.* The protocol-level threats (server-id race, token leak, replay, MITM) are upstream of this middleware. The middleware short-circuits before any of those code paths can run — it does not weaken any existing mitigation. +- **No findings.** + +### Decision summary + +| Category | Verdict | +|---|---| +| Trust boundaries | No findings | +| Tokens / secrets | No findings | +| File operations | N/A | +| Subprocess | N/A | +| Cryptography | N/A | +| Network & I/O | No findings (connection-count cap explicitly OUT OF SCOPE) | +| Errors / logs | No findings (metrics counter OUT OF SCOPE, future ticket) | +| Concurrency | No findings (graceful-shutdown pre-existing gap, explicitly deferred) | +| Threat model alignment | No findings (doc update is part of this ticket) | + +No **MUST FIX**. No **SHOULD FIX**. Two **OUT OF SCOPE** items, both already named in the issue body's *Out of Scope* section: connection-count caps (separate threat surface), graceful-shutdown signal handling (orthogonal). Both stay in `docs/threat-model.md` § *Future hardening* after this ticket's doc update. + +**Reviewer:** architect (self-review per `architect/security-review.md`) +**Date:** 2026-05-13 diff --git a/docs/threat-model.md b/docs/threat-model.md index aeeb6f1..a75372b 100644 --- a/docs/threat-model.md +++ b/docs/threat-model.md @@ -33,11 +33,11 @@ Each threat below records four fields: **Severity:** `medium`. -**v1 mitigation:** Slow-loris is mitigated by the `http.Server` timeouts in `cmd/pyrycode-relay/main.go:53-95` (`ReadHeaderTimeout: 10s`, `ReadTimeout: 60s`, `WriteTimeout: 60s`, `IdleTimeout: 120s`), applied identically to the autocert HTTP listener and the TLS listener. Bandwidth amplification is structurally absent: the relay forwards 1:1, so there is no amplification factor an attacker can lever. Connection-count caps and per-IP rate limits are deferred — none in v1. +**v1 mitigation:** Slow-loris is mitigated by the `http.Server` timeouts in `cmd/pyrycode-relay/main.go:53-95` (`ReadHeaderTimeout: 10s`, `ReadTimeout: 60s`, `WriteTimeout: 60s`, `IdleTimeout: 120s`), applied identically to the autocert HTTP listener and the TLS listener. A per-IP token-bucket rate limit fronts the `/v1/server` and `/v1/client` upgrade paths (`cmd/pyrycode-relay/main.go` policy block, `internal/relay/ratelimit_middleware.go`): ~10 attempts/IP/minute steady-state, burst 20, eviction sweep every 5 minutes. Over-cap attempts receive `429 Too Many Requests` before `websocket.Accept` runs; the wrapped registry is never touched on a deny. The middleware extracts the source IP via `relay.ClientIP`; `X-Forwarded-For` is honoured only when the operator opts in with `--trust-x-forwarded-for` (default `false`). `/healthz` is intentionally unwrapped — it must remain pollable from monitoring without throttling. Bandwidth amplification is structurally absent: the relay forwards 1:1, so there is no amplification factor an attacker can lever. Connection-count caps (per-IP and global, distinct from the attempt-rate cap) are deferred. -**Residual risk:** A misbehaving or malicious client can open many concurrent WebSocket connections (once WS is built) and exhaust file descriptors or RAM. A runaway client retry loop is not throttled. A single attacker can saturate the public bandwidth. +**Residual risk:** The rate limit caps the *attempt rate* per source IP but not the *concurrent connection count*: a slow attacker that opens connections at or below the bucket refill rate can still pin file descriptors and RAM. The single-instance limiter is per-process — a future multi-instance deployment behind L4 load balancing would need shared state to converge on a global limit. `X-Forwarded-For` is binary trust (all-or-nothing) — a CIDR-aware trusted-proxy chain is not implemented. -**Future hardening:** A connection-count cap (per-IP and global) on the WebSocket upgrade path; a token-bucket rate limit on `/v1/server` and `/v1/client`; reverse-proxy fronting for L4 protections once a chosen WebSocket library is in place. Trigger: first observed flood, or first paying user. +**Future hardening:** A connection-count cap (per-IP and global) on the WebSocket upgrade path; a CIDR-allowlisted trusted-proxy chain for `X-Forwarded-For`; reverse-proxy fronting for L4 protections; shared rate-limit state for multi-instance deployments. Trigger: first observed flood, first multi-instance deployment, or first paying user. ## Log hygiene — what must not be logged diff --git a/internal/relay/ratelimit_middleware.go b/internal/relay/ratelimit_middleware.go new file mode 100644 index 0000000..e0a8e89 --- /dev/null +++ b/internal/relay/ratelimit_middleware.go @@ -0,0 +1,40 @@ +package relay + +import ( + "log/slog" + "net/http" + "strconv" +) + +// NewRateLimitMiddleware returns an http.Handler-wrapping middleware that +// rejects requests whose source IP has exhausted its token bucket. On a +// denied request, the middleware writes 429 Too Many Requests, emits one +// slog Warn line ("rate_limited", "remote", strconv.Quote(host)), and does +// NOT invoke the wrapped handler. Empty-IP requests (ClientIP returns "") +// are denied the same way before Allow is called — refusing un-attributable +// traffic without throttling matches the loud-failure rule in +// docs/PROJECT-MEMORY.md § Project-level conventions. +// +// trustForwardedFor is threaded into relay.ClientIP. Callers MUST NOT set +// this true unless a trusted reverse proxy fronts the relay — see the +// --trust-x-forwarded-for CLI flag's Usage string in cmd/pyrycode-relay. +func NewRateLimitMiddleware(limiter *IPRateLimiter, logger *slog.Logger, trustForwardedFor bool) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ip := ClientIP(r, trustForwardedFor) + // Empty-IP guard must precede Allow: Allow("") is a normal map + // key in the limiter's contract, not a deny. + if ip == "" { + http.Error(w, "", http.StatusTooManyRequests) + logger.Warn("rate_limited", "remote", strconv.Quote(ip)) + return + } + if !limiter.Allow(ip) { + http.Error(w, "", http.StatusTooManyRequests) + logger.Warn("rate_limited", "remote", strconv.Quote(ip)) + return + } + next.ServeHTTP(w, r) + }) + } +} diff --git a/internal/relay/ratelimit_middleware_test.go b/internal/relay/ratelimit_middleware_test.go new file mode 100644 index 0000000..96cc247 --- /dev/null +++ b/internal/relay/ratelimit_middleware_test.go @@ -0,0 +1,280 @@ +package relay + +import ( + "bytes" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" +) + +// sentinelHandler is a minimal http.Handler that increments a counter on +// every invocation. The middleware tests assert the counter to lock in the +// "deny short-circuits before the wrapped handler runs" contract. +type sentinelHandler struct { + calls atomic.Int64 +} + +func (s *sentinelHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + s.calls.Add(1) + w.WriteHeader(http.StatusOK) +} + +// newMiddlewareTestLimiter builds a limiter that never refills during the +// test (refillEvery=time.Hour) and registers cleanup. The middleware test +// suite is time-independent — burst values alone drive the allow/deny +// transitions. +func newMiddlewareTestLimiter(t *testing.T, burst int) *IPRateLimiter { + t.Helper() + l := NewIPRateLimiter(time.Hour, burst, time.Hour) + t.Cleanup(l.Close) + return l +} + +// newMiddlewareRequest builds a request with RemoteAddr set as given. XFF is +// set only when xff is non-empty. +func newMiddlewareRequest(remoteAddr, xff string) *http.Request { + r := httptest.NewRequest(http.MethodGet, "/", nil) + r.RemoteAddr = remoteAddr + if xff != "" { + r.Header.Set("X-Forwarded-For", xff) + } + return r +} + +func TestRateLimitMiddleware_BurstThenDeny(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 2) + sentinel := &sentinelHandler{} + mw := NewRateLimitMiddleware(l, discardLogger(), false) + h := mw(sentinel) + + for i := 0; i < 2; i++ { + rr := httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1234", "")) + if rr.Code != http.StatusOK { + t.Fatalf("request %d: status = %d, want 200", i+1, rr.Code) + } + } + if got := sentinel.calls.Load(); got != 2 { + t.Fatalf("sentinel calls after burst: got %d, want 2", got) + } + + rr := httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1234", "")) + if rr.Code != http.StatusTooManyRequests { + t.Fatalf("denied request: status = %d, want 429", rr.Code) + } + if got := sentinel.calls.Load(); got != 2 { + t.Fatalf("sentinel calls after deny: got %d, want 2 (handler must not run on deny)", got) + } +} + +func TestRateLimitMiddleware_PerIPIsolation(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 1) + sentinel := &sentinelHandler{} + h := NewRateLimitMiddleware(l, discardLogger(), false)(sentinel) + + rr := httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "")) + if rr.Code != http.StatusOK { + t.Fatalf("first 1.1.1.1: status = %d, want 200", rr.Code) + } + + rr = httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "")) + if rr.Code != http.StatusTooManyRequests { + t.Fatalf("second 1.1.1.1: status = %d, want 429", rr.Code) + } + + rr = httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("2.2.2.2:1", "")) + if rr.Code != http.StatusOK { + t.Fatalf("first 2.2.2.2: status = %d, want 200 (per-IP isolation)", rr.Code) + } + + if got := sentinel.calls.Load(); got != 2 { + t.Fatalf("sentinel calls: got %d, want 2", got) + } +} + +func TestRateLimitMiddleware_TrustXFF_BucketsByForgedIP(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 1) + sentinel := &sentinelHandler{} + h := NewRateLimitMiddleware(l, discardLogger(), true)(sentinel) + + rr := httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "9.9.9.9")) + if rr.Code != http.StatusOK { + t.Fatalf("first XFF=9.9.9.9: status = %d, want 200", rr.Code) + } + + rr = httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "9.9.9.9")) + if rr.Code != http.StatusTooManyRequests { + t.Fatalf("second XFF=9.9.9.9: status = %d, want 429 (bucket keyed by XFF)", rr.Code) + } + + // Same RemoteAddr but a different XFF lands in a different bucket — the + // trust-the-header model means the relay does not key by RemoteAddr. + rr = httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "8.8.8.8")) + if rr.Code != http.StatusOK { + t.Fatalf("first XFF=8.8.8.8: status = %d, want 200", rr.Code) + } +} + +func TestRateLimitMiddleware_TrustXFFOff_IgnoresHeader(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 1) + sentinel := &sentinelHandler{} + h := NewRateLimitMiddleware(l, discardLogger(), false)(sentinel) + + rr := httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "9.9.9.9")) + if rr.Code != http.StatusOK { + t.Fatalf("first request: status = %d, want 200", rr.Code) + } + + // Default mode keys by RemoteAddr; the second request from 1.1.1.1 + // exhausts the bucket regardless of XFF. + rr = httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "9.9.9.9")) + if rr.Code != http.StatusTooManyRequests { + t.Fatalf("second request: status = %d, want 429 (XFF must be ignored)", rr.Code) + } +} + +func TestRateLimitMiddleware_EmptyIPDeniesWithoutTouchingLimiter(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 1) + sentinel := &sentinelHandler{} + h := NewRateLimitMiddleware(l, discardLogger(), false)(sentinel) + + // RemoteAddr=="" causes net.SplitHostPort to fail; ClientIP returns "". + rr := httptest.NewRecorder() + h.ServeHTTP(rr, newMiddlewareRequest("", "")) + if rr.Code != http.StatusTooManyRequests { + t.Fatalf("empty-IP request: status = %d, want 429", rr.Code) + } + if got := sentinel.calls.Load(); got != 0 { + t.Fatalf("sentinel calls: got %d, want 0 (handler must not run)", got) + } + + // The limiter must not have been consulted on the empty-IP path: an + // empty-key bucket would have been created had Allow("") run. This + // pins the AC's "deny before calling Allow" contract. + l.mu.Lock() + n := len(l.buckets) + l.mu.Unlock() + if n != 0 { + t.Fatalf("limiter buckets after empty-IP request: got %d, want 0 (Allow must not have been called)", n) + } +} + +func TestRateLimitMiddleware_DenyEmitsOneLogLine(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 1) + var buf bytes.Buffer + logger := slog.New(slog.NewTextHandler(&buf, nil)) + h := NewRateLimitMiddleware(l, logger, false)(&sentinelHandler{}) + + // Burn the bucket. + h.ServeHTTP(httptest.NewRecorder(), newMiddlewareRequest("1.2.3.4:5555", "")) + // Allowed request must not log. + if buf.Len() != 0 { + t.Fatalf("buffer after allowed request: got %q, want empty", buf.String()) + } + + // Denied request. + h.ServeHTTP(httptest.NewRecorder(), newMiddlewareRequest("1.2.3.4:5555", "")) + + out := buf.String() + lines := strings.Split(strings.TrimRight(out, "\n"), "\n") + if len(lines) != 1 { + t.Fatalf("log lines after one deny: got %d (%q), want 1", len(lines), out) + } + line := lines[0] + if !strings.Contains(line, "level=WARN") { + t.Errorf("log line missing level=WARN: %q", line) + } + if !strings.Contains(line, "msg=rate_limited") { + t.Errorf("log line missing msg=rate_limited: %q", line) + } + // slog text handler renders the strconv.Quote-wrapped value as a + // double-quoted string whose inner quotes are escaped: `remote="\"1.2.3.4\""`. + if !strings.Contains(line, `remote="\"1.2.3.4\""`) { + t.Errorf("log line missing quoted remote field: %q", line) + } + // Field set must be exactly {remote} — no token, no User-Agent, no + // server-id leaked from the request. + for _, forbidden := range []string{"server_id=", "binary_version=", "conn_id=", "device_name=", "user_agent="} { + if strings.Contains(line, forbidden) { + t.Errorf("log line contains forbidden field %q: %q", forbidden, line) + } + } +} + +func TestRateLimitMiddleware_EmptyIPDenyLogsQuotedEmptyString(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 1) + var buf bytes.Buffer + logger := slog.New(slog.NewTextHandler(&buf, nil)) + h := NewRateLimitMiddleware(l, logger, false)(&sentinelHandler{}) + + h.ServeHTTP(httptest.NewRecorder(), newMiddlewareRequest("", "")) + + line := buf.String() + if !strings.Contains(line, `remote="\"\""`) { + t.Errorf("empty-IP deny: log line missing remote=\"\\\"\\\"\": %q", line) + } +} + +func TestRateLimitMiddleware_AllowEmitsNoLogLine(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 2) + var buf bytes.Buffer + logger := slog.New(slog.NewTextHandler(&buf, nil)) + h := NewRateLimitMiddleware(l, logger, false)(&sentinelHandler{}) + + h.ServeHTTP(httptest.NewRecorder(), newMiddlewareRequest("1.2.3.4:1", "")) + h.ServeHTTP(httptest.NewRecorder(), newMiddlewareRequest("1.2.3.4:1", "")) + + if buf.Len() != 0 { + t.Fatalf("buffer after allowed requests: got %q, want empty", buf.String()) + } +} + +// TestRateLimitMiddleware_RegistryNotTouchedOnDeny pins the AC's structural +// guarantee that a denied attempt never reaches code paths that mutate the +// shared registry. The sentinel-counter assertion in the burst test already +// proves the wrapped handler does not run on deny; this is a belt-and- +// suspenders regression guard wired against a real ServerHandler. +func TestRateLimitMiddleware_RegistryNotTouchedOnDeny(t *testing.T) { + t.Parallel() + l := newMiddlewareTestLimiter(t, 1) + reg := NewRegistry() + logger := discardLogger() + wrapped := NewRateLimitMiddleware(l, logger, false)(ServerHandler(reg, logger, time.Second, 256*1024)) + + // Exhaust the bucket without going through the WS handler (no headers, + // so even if it did run it would 400 — but the deny must short-circuit + // before that, and the registry must remain pristine throughout). + wrapped.ServeHTTP(httptest.NewRecorder(), newMiddlewareRequest("1.1.1.1:1", "")) + // Denied attempt. + rr := httptest.NewRecorder() + wrapped.ServeHTTP(rr, newMiddlewareRequest("1.1.1.1:1", "")) + if rr.Code != http.StatusTooManyRequests { + t.Fatalf("denied attempt: status = %d, want 429", rr.Code) + } + + b, p := reg.Counts() + if b != 0 || p != 0 { + t.Fatalf("registry counts after denied attempt: binaries=%d phones=%d, want 0,0", b, p) + } +}