diff --git a/README.md b/README.md index b805143..3af20cb 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,18 @@ make lint # gosec + govulncheck (requires both installed locally) ## Run +Production (autocert): + +```bash +sudo ./bin/pyrycode-relay --domain relay.example.com +``` + +The relay binds `:443` (WSS) and `:80` (ACME http-01 challenge). Both ports must be reachable from the public internet — Let's Encrypt issues the cert by hitting `:80` on first request to the domain. The first WSS request after a fresh start may take ~10–20s while the cert is issued and cached to `--cert-cache`. Subsequent restarts reuse the cached cert. + +Behind a reverse proxy (TLS terminated upstream): + ```bash -./bin/pyrycode-relay --domain relay.example.com +./bin/pyrycode-relay --insecure-listen :8080 ``` Flags: @@ -39,7 +49,7 @@ Flags: | Flag | Default | Notes | |---|---|---| | `--domain` | (required for autocert) | Public domain for Let's Encrypt cert issuance. Required when `--insecure-listen` is unset. | -| `--cert-cache` | `~/.pyrycode-relay/certs` | Directory for autocert's TLS certificate cache. | +| `--cert-cache` | `~/.pyrycode-relay/certs` | Directory for autocert's TLS certificate cache. Created with `0700` if missing; refuses to start if an existing dir is world- or group-readable. | | `--insecure-listen` | (unset) | Listen address for plain HTTP (e.g. `:8080`). Disables autocert. Use only when fronted by a reverse proxy. | | `--version` | | Print version and exit. | diff --git a/cmd/pyrycode-relay/main.go b/cmd/pyrycode-relay/main.go index 526e204..df4028d 100644 --- a/cmd/pyrycode-relay/main.go +++ b/cmd/pyrycode-relay/main.go @@ -13,6 +13,8 @@ import ( "net/http" "os" "time" + + "github.com/pyrycode/pyrycode-relay/internal/relay" ) // Version is overridden at build time via -ldflags. @@ -63,11 +65,48 @@ func main() { return } - // TLS path (autocert) is intentionally not implemented yet — first - // real ticket wires it up. Refuse to start so misconfiguration is loud. - logger.Error("autocert TLS path not yet implemented; use --insecure-listen for now", + mgr, err := relay.NewAutocertManager(*domain, *certCache) + if err != nil { + logger.Error("autocert setup failed", "err", err) + os.Exit(1) + } + + httpsSrv := &http.Server{ + Addr: ":443", + Handler: relay.EnforceHost(*domain, mux), + TLSConfig: relay.TLSConfig(mgr), + ReadHeaderTimeout: 10 * time.Second, + ReadTimeout: 60 * time.Second, + WriteTimeout: 60 * time.Second, + IdleTimeout: 120 * time.Second, + } + + httpSrv := &http.Server{ + Addr: ":80", + // NotFoundHandler — NOT nil. autocert.Manager.HTTPHandler(nil) + // would 302 GET/HEAD to HTTPS; the AC requires explicit 404 for + // non-challenge traffic. + Handler: mgr.HTTPHandler(http.NotFoundHandler()), + ReadHeaderTimeout: 10 * time.Second, + ReadTimeout: 60 * time.Second, + WriteTimeout: 60 * time.Second, + IdleTimeout: 120 * time.Second, + } + + logger.Info("starting", "version", Version, "mode", "autocert", "domain", *domain, "cert_cache", *certCache) - os.Exit(2) + + go func() { + if err := httpSrv.ListenAndServe(); err != nil { + logger.Error("http-01 listener failed", "err", err) + os.Exit(1) + } + }() + + if err := httpsSrv.ListenAndServeTLS("", ""); err != nil { + logger.Error("https listener failed", "err", err) + os.Exit(1) + } } func defaultCertCache() string { diff --git a/docs/PROJECT-MEMORY.md b/docs/PROJECT-MEMORY.md index ab30b89..013ea0c 100644 --- a/docs/PROJECT-MEMORY.md +++ b/docs/PROJECT-MEMORY.md @@ -9,12 +9,12 @@ Stateless WebSocket router between mobile clients and pyry binaries. Internet-ex | Go skeleton | Done | `cmd/pyrycode-relay/main.go`, `internal/relay/doc.go` | | `http.Server` with explicit timeouts (gosec G114) | Done | `cmd/pyrycode-relay/main.go` | | Routing envelope wrapper type (`Envelope`, `Marshal`, `Unmarshal`, sentinel errors) | Done (#1) | `internal/relay/envelope.go` | +| TLS via autocert in `--domain` mode (`NewAutocertManager`, `EnforceHost`, `TLSConfig`, `ErrCacheDirInsecure`) | Done (#9) | `internal/relay/tls.go`, `cmd/pyrycode-relay/main.go` | | WS upgrade on `/v1/server` and `/v1/client` | Not started | — | | Header validation (`x-pyrycode-server`, `x-pyrycode-token`) | Not started | — | | Connection registry (server-id ↔ binary, server-id ↔ phones) | Not started | — | | Frame forwarding using the routing envelope | Not started | — | | `conn_id` generation scheme | Not started | — | -| TLS / autocert | Not started | — | | Threat model doc | Not started | `docs/threat-model.md` planned | ## Patterns established @@ -22,7 +22,8 @@ Stateless WebSocket router between mobile clients and pyry binaries. Internet-ex - **Sentinel errors, branched via `errors.Is`** for validation failures at protocol boundaries. New routing-layer code follows the `Err...` naming and wraps with `fmt.Errorf("…: %w", err, sentinel)` when adding context. - **Opacity by type.** Inner-frame payloads are carried as `json.RawMessage`. The relay never deserialises payloads; the type makes that hard to violate accidentally. - **Validate at the envelope boundary, not deeper.** Structural checks (presence, non-empty, JSON well-formedness) belong here; semantic checks (token validity, message kind) belong to the binary. -- **Stdlib only.** No external Go dependencies so far. `encoding/json` is sufficient for routing. +- **Stdlib + `golang.org/x/crypto`.** First non-stdlib dep arrived in #9 (`acme/autocert`). Keep the dependency surface deliberate; new deps need a justification. +- **Loud failure over silent correction.** `--domain` and `--insecure-listen` are mutually exclusive and explicit. `ErrCacheDirInsecure` refuses to start on a world/group-readable cert cache rather than re-chmoding it. `:80` returns `404` for non-challenge traffic instead of redirecting to HTTPS (ADR-0002). - **Tests live in the same package** (`package relay`, not `relay_test`) so they can `errors.Is` against unexported sentinels if needed and reach private helpers without re-exporting. ## Conventions diff --git a/docs/knowledge/INDEX.md b/docs/knowledge/INDEX.md index 75f4098..a0f24cd 100644 --- a/docs/knowledge/INDEX.md +++ b/docs/knowledge/INDEX.md @@ -4,10 +4,12 @@ One-line pointers into the evergreen knowledge base. Newest entries at the top o ## Features +- [Autocert TLS](features/autocert-tls.md) — `--domain` mode wiring: `:443` WSS via Let's Encrypt + `:80` ACME http-01, host gates, cache-dir permission policy, TLS 1.2 floor. - [Routing envelope](features/routing-envelope.md) — typed Go wrapper (`Envelope`, `Marshal`, `Unmarshal`) for the `{conn_id, frame}` wire shape exchanged between relay and binary. ## Decisions +- [ADR-0002: Explicit-failure 404 on `:80` instead of HTTPS redirect](decisions/0002-autocert-explicit-failure-on-port-80.md) — pass `http.NotFoundHandler()` to `autocert.Manager.HTTPHandler`, not `nil`; loud failure over silent scheme-switching. - [ADR-0001: Routing envelope shape and opacity](decisions/0001-routing-envelope-shape-and-opacity.md) — `json.RawMessage` for the inner frame; sentinel errors; validate at boundary, never parse payloads. ## Architecture diff --git a/docs/knowledge/decisions/0002-autocert-explicit-failure-on-port-80.md b/docs/knowledge/decisions/0002-autocert-explicit-failure-on-port-80.md new file mode 100644 index 0000000..51ee1cd --- /dev/null +++ b/docs/knowledge/decisions/0002-autocert-explicit-failure-on-port-80.md @@ -0,0 +1,35 @@ +# ADR-0002: Explicit-failure 404 on `:80` instead of HTTPS redirect + +**Status:** Accepted (#9) +**Date:** 2026-05-08 + +## Context + +In `--domain` mode the relay binds `:80` for the ACME http-01 challenge. Anything *other* than an ACME challenge request hitting that port is, by definition, a misconfigured client — phones and binaries are required to use WSS on `:443`. The question is how to respond to those non-challenge requests. + +`golang.org/x/crypto/acme/autocert.Manager.HTTPHandler(fallback)` defines the behaviour for non-challenge traffic: + +- `fallback == nil` → autocert redirects GET/HEAD to HTTPS with `302` and returns `400` for other methods. +- `fallback != nil` → autocert calls `fallback.ServeHTTP` for all non-challenge traffic. + +The ticket's literal AC text said "the `nil` argument means non-challenge HTTP requests get a 404," which is incorrect about autocert's behaviour. The intent stated alongside the AC was the load-bearing part: *"we do NOT redirect to HTTPS — we want explicit-failure semantics for misconfigured clients."* + +## Decision + +Pass `http.NotFoundHandler()` explicitly to `manager.HTTPHandler`, not `nil`. Non-challenge port-80 traffic returns `404` with no body. ACME challenges still flow through autocert's own routing and reach the cert-issuance handler unchanged. + +## Rationale + +A `302` redirect is convenient for browsers but actively unhelpful for the relay's clients: + +- Mobile clients (phones, binaries) speak WSS on `:443`. They have no business reaching the relay over plain HTTP. A redirect would mask the misconfiguration; a `404` surfaces it. +- Browsers shouldn't reach the relay at all — there is no HTTP UI. A `404` is the honest answer. +- The relay's posture elsewhere (`--insecure-listen` vs `--domain` are mutually exclusive and explicit; `ErrCacheDirInsecure` refuses to start instead of silently weakening) is consistent loud-failure. Redirecting plain-HTTP traffic to HTTPS would be the lone silent-correction in this binary. + +Returning a body in the 404 was considered and rejected — the relay leaks no internal state in error responses anywhere else, and nothing useful would go in the body. + +## Consequences + +- The wiring in `cmd/pyrycode-relay/main.go` is `mgr.HTTPHandler(http.NotFoundHandler())`, with a comment explaining why `nil` is wrong here. Future contributors who "clean up" to `nil` reintroduce the redirect. +- If a future operator deployment ever wants the redirect (e.g. fronting a browser-facing healthcheck on `:80`), this is a one-line change — but it should require a fresh ticket and decision. +- The choice is independent of the `:443` `EnforceHost` middleware (which returns `421` for Host-header mismatches). Both follow the same explicit-failure principle but address different threats: `EnforceHost` pins the application-layer host on TLS connections; the `:80` 404 prevents silent scheme-switching. diff --git a/docs/knowledge/features/autocert-tls.md b/docs/knowledge/features/autocert-tls.md new file mode 100644 index 0000000..2dbc88f --- /dev/null +++ b/docs/knowledge/features/autocert-tls.md @@ -0,0 +1,124 @@ +# Autocert TLS (`--domain` mode) + +The relay terminates TLS itself in production via Let's Encrypt and `golang.org/x/crypto/acme/autocert`. One command brings the public listener up: + +``` +sudo pyrycode-relay --domain relay.example.com +``` + +Two listeners come up: `:443` for WSS (cert via autocert) and `:80` for the ACME http-01 challenge. No reverse proxy in the production path. + +Authoritative wire spec: [`pyrycode/pyrycode/docs/protocol-mobile.md` § TLS](https://github.com/pyrycode/pyrycode/blob/main/docs/protocol-mobile.md#tls). + +## Modes + +| Flag | When to use | +|---|---| +| `--domain ` | Production. Relay holds the cert. Requires `:80` and `:443` reachable from the public internet. | +| `--insecure-listen :8080` | Behind a reverse proxy that terminates TLS upstream, or local dev. Disables autocert. | + +The two are mutually exclusive; one of them must be set or the binary refuses to start. + +## API + +Package `internal/relay` (`tls.go`): + +```go +func NewAutocertManager(domain, cacheDir string) (*autocert.Manager, error) +func EnforceHost(domain string, next http.Handler) http.Handler +func TLSConfig(m *autocert.Manager) *tls.Config + +var ErrCacheDirInsecure = errors.New("relay: cert cache dir has insecure permissions (must be 0700)") +``` + +`cmd/pyrycode-relay/main.go` does the wiring (server construction, timeouts, listener startup); `internal/relay` provides only the testable pieces. + +## Configuration + +| Flag | Default | Notes | +|---|---|---| +| `--domain` | (required for autocert) | Single domain. Bound via `autocert.HostWhitelist(domain)`; ACME issuance for any other host is rejected. | +| `--cert-cache` | `~/.pyrycode-relay/certs` | Created with `0700` if missing. Refuses to start if an existing dir is world- or group-readable (`ErrCacheDirInsecure`). | +| `--insecure-listen` | (unset) | Disables autocert. | + +The cache dir is the only on-disk state the relay keeps. TLS private keys live there (autocert writes them with `0o600`). + +## Two host gates + +Single-domain enforcement happens in two places, because the TLS handshake's SNI does not bind the HTTP `Host` header — they can legally disagree on the same connection. + +1. **`autocert.HostWhitelist(domain)`** — gates ACME *issuance*. Without it, the relay would attempt to fetch certs for any name a client requests via SNI, which Let's Encrypt would either rate-limit us out of or, worse, succeed at for an attacker-controlled domain pointed at our IP. +2. **`EnforceHost(domain, mux)`** — gates *application traffic* on `:443`. Any request whose `Host` header doesn't match the configured domain (case-insensitive, port-tolerant) gets `421 Misdirected Request` with no body, before reaching the routing handlers. + +`:80` does not use `EnforceHost`. The autocert HTTPHandler owns the host check there via the manager's `HostPolicy`. + +## Explicit-failure semantics on `:80` + +`:80` is wired as `mgr.HTTPHandler(http.NotFoundHandler())` — *not* `mgr.HTTPHandler(nil)`. Passing `nil` would redirect non-challenge traffic (GET/HEAD) to HTTPS with `302`. The relay deliberately returns `404` instead so that misconfigured clients fail loudly rather than silently switching schemes. See ADR-0002. + +## Cert cache permissions + +`NewAutocertManager` is stricter than the ticket's AC ("existing dir → no-op"). The actual policy: + +- Missing → `os.MkdirAll(dir, 0o700)`. +- Exists, is dir, mode `& 0o077 == 0` → no-op (secure). +- Exists, is dir, mode `& 0o077 != 0` → `ErrCacheDirInsecure` (refuses to start). +- Exists, not a dir → error. + +Reasoning: TLS private keys live there. Silently accepting an attacker-readable cache fits neither the relay's "internet-exposed; adversarial input is the default" stance nor the `--insecure-listen` precedent of loud failure. + +Per-file mode (`0o600`) is autocert's contract, not enforced here. + +## Server timeouts + +Both `httpsSrv` and `httpSrv` mirror the insecure path's timeouts: + +| Setting | Value | +|---|---| +| `ReadHeaderTimeout` | 10s | +| `ReadTimeout` | 60s | +| `WriteTimeout` | 60s | +| `IdleTimeout` | 120s | + +`ReadHeaderTimeout` bounds slow-loris on both ports and keeps `gosec` G114 quiet. + +## TLS version + +`TLSConfig(m)` returns `m.TLSConfig()` with `MinVersion = tls.VersionTLS12`. Centralised because: + +- autocert's default config leaves `MinVersion` zero, which trips `gosec` G402 on `make lint`. +- A future "switch to 1.3-min" is a one-line change with one test to update. + +Cipher selection is delegated to Go's secure-default suites for TLS 1.2; TLS 1.3 doesn't expose suite selection. + +## Concurrency + +Two goroutines, no shared mutable state: + +1. Main — `httpsSrv.ListenAndServeTLS("", "")`. The empty cert/key paths defer to `TLSConfig.GetCertificate`, which `manager.TLSConfig()` populates. This is the documented autocert pattern. +2. Background — `httpSrv.ListenAndServe()` for ACME http-01. + +Either listener failing → log + `os.Exit(1)`. No graceful shutdown (mirrors the insecure path). + +The `*autocert.Manager` is constructed once and used read-only; its internal locking is autocert's contract. + +## Operational notes + +- `:80` and `:443` are privileged. Run with `sudo`, or grant `CAP_NET_BIND_SERVICE` via `setcap` / systemd `AmbientCapabilities`. +- First request to the domain after a fresh start may take ~10–20s while autocert issues and caches the cert. Subsequent restarts reuse the cache. +- Cert renewal is silent — autocert handles it. No metrics in v1. + +## Out of scope (deferred) + +- Multi-domain certs (`autocert.HostWhitelist` is variadic but the UX/ops story isn't worth solving until a second domain is needed). +- DNS-01 / wildcard certs. +- `--acme-email` for Let's Encrypt expiry mail. +- Renewal observability. +- Self-signed dev fallback (`--insecure-listen` exists for that). +- Per-IP / per-connection rate limits (separate ticket). +- Graceful shutdown / signal handling. + +## Related + +- [ADR-0002: Explicit-failure 404 on `:80` instead of HTTPS redirect](../decisions/0002-autocert-explicit-failure-on-port-80.md) +- [Architecture overview](../../architecture.md) — relay is the cert holder. diff --git a/docs/lessons.md b/docs/lessons.md index dd1ed09..de5bd4a 100644 --- a/docs/lessons.md +++ b/docs/lessons.md @@ -13,3 +13,19 @@ When unmarshalling `{"frame": null}` into a struct with `Frame json.RawMessage`, ## `json.RawMessage` round-trips are byte-stable *modulo whitespace* `encoding/json` re-encodes `RawMessage` through the outer marshaller and may strip insignificant whitespace. Tests asserting opacity should canonicalise both sides with `json.Compact` before `bytes.Equal`, otherwise they flake on formatting. Source: routing-envelope tests (#1). + +## `autocert.Manager.HTTPHandler(nil)` redirects to HTTPS — it does not 404 + +The autocert docs and the `nil` argument are easy to misread as "no fallback → 404." Actual behaviour: GET/HEAD get a `302` to HTTPS; other methods get `400`. To get an explicit 404 (or any other handler), pass it explicitly: `mgr.HTTPHandler(http.NotFoundHandler())`. See ADR-0002. Source: autocert TLS wiring (#9). + +## TLS handshake SNI does not bind the HTTP `Host` header + +A client can complete the TLS handshake using SNI for `relay.example.com` and then send `Host: somethingelse.com` in the HTTP request on the same connection. The wire spec says they must match for the relay; enforce it in application code (`EnforceHost` returns `421 Misdirected Request`). Don't assume autocert's `HostWhitelist` covers this — that gates ACME *issuance*, not request-time host validation. Source: autocert TLS (#9). + +## `r.Host` may carry a `:port` suffix + +RFC 7230 §5.4 allows it. When comparing to a configured hostname, strip the port via `net.SplitHostPort` first (it errors out cleanly if there's no port — use the original string in that case). Compare with `strings.EqualFold` because hostnames are case-insensitive. Source: `EnforceHost` (#9). + +## `autocert.Manager.TLSConfig()` doesn't set `MinVersion` + +`gosec` G402 fires on `make lint` if you use it raw. Wrap it in a helper that pins `MinVersion = tls.VersionTLS12` (or 1.3) before handing it to `http.Server`. Centralising the override means a future bump is a one-line change. Source: `relay.TLSConfig` (#9). diff --git a/docs/specs/architecture/9-autocert-tls.md b/docs/specs/architecture/9-autocert-tls.md new file mode 100644 index 0000000..449bd9c --- /dev/null +++ b/docs/specs/architecture/9-autocert-tls.md @@ -0,0 +1,379 @@ +# Spec — Autocert TLS for `--domain` mode (#9) + +## Files to read first + +- `cmd/pyrycode-relay/main.go:21-78` — flag parsing, the existing insecure-listen path with its `http.Server` timeouts (mirror them in the TLS path), the current "not yet implemented" stub the spec replaces, and `defaultCertCache()`. +- `internal/relay/doc.go` — package doc; the new TLS file lives next to `envelope.go` and stays consistent with the package's "routing core" framing. +- `internal/relay/envelope.go:1-19` — established conventions (sentinel errors via `errors.New`, package-level docstrings naming the invariant). +- `docs/architecture.md:14` — names the relay as the cert holder ("TLS terminus … autocert via Let's Encrypt"); confirms the design intent. +- [`pyrycode/pyrycode/docs/protocol-mobile.md` § TLS](https://github.com/pyrycode/pyrycode/blob/main/docs/protocol-mobile.md#tls) — wire-spec authority for TLS terminus and `421 Misdirected Request` semantics. +- `Makefile:14-23` — `make test` is `go test -race ./...`; `make lint` runs `gosec` (so `tls.Config.MinVersion` must be set explicitly to satisfy G402) and `govulncheck` (autocert is a fresh dep — must be a tagged release). +- `go.mod` — module is `github.com/pyrycode/pyrycode-relay`, Go 1.26.2; this ticket adds the first non-stdlib dep. +- `README.md:33-44` — current "Run" section + flag table; the doc edits adjust this region. + +## Context + +`cmd/pyrycode-relay/main.go:68-70` currently refuses to start when `--domain` is set ("autocert TLS path not yet implemented"). This ticket fills it in so production deployment is a single command: + +``` +pyrycode-relay --domain relay.example.com +``` + +Two listeners come up: `:443` for WSS (cert via `golang.org/x/crypto/acme/autocert`), `:80` for the ACME http-01 challenge. The relay terminates TLS itself; no reverse proxy in the production path. + +Two invariants drive the design: + +1. **Single-domain only** — `autocert.HostWhitelist(*domain)` rejects ACME issuance for any other host, and a request-level wrapper returns `421 Misdirected Request` for any HTTPS request whose `Host` header doesn't match (a TLS handshake using the cert's SNI doesn't bind the Host header — they can disagree, and the spec says they must match). +2. **Explicit failure for misconfigured clients** — port-80 traffic that isn't an ACME challenge gets `404`, not a redirect to HTTPS. Per AC: *"we do NOT redirect to HTTPS — we want explicit-failure semantics."* + +> **Spec note (departure from AC literal wording).** The ticket says +> "`manager.HTTPHandler(nil)` … the `nil` argument means non-challenge +> HTTP requests get a 404." That is incorrect about autocert's +> behaviour. Per the autocert docs, `HTTPHandler(nil)` redirects GET/HEAD +> to HTTPS with `302` and returns `400` for other methods — i.e. exactly +> the redirect semantics the AC says we don't want. To honour the AC's +> stated *intent* (explicit-failure 404), the developer must pass +> `http.NotFoundHandler()` explicitly, not `nil`. The wiring section +> below uses `mgr.HTTPHandler(http.NotFoundHandler())`. PO/code-review: +> please confirm intent matches; if a 302 redirect is actually wanted, +> reopen the ticket and the spec changes one line. + +## Design + +### Package & files + +- New file: `internal/relay/tls.go` (`package relay`) — autocert manager construction, cache-dir creation, host-enforcement middleware. +- New test file: `internal/relay/tls_test.go` (`package relay` — same package, so it can reach unexported helpers and follow the convention established in `envelope_test.go`). +- Modified: `cmd/pyrycode-relay/main.go` — replace the stub at lines 66-70 with the autocert wiring. +- Modified: `go.mod` (+ `go.sum` regenerated) — add `golang.org/x/crypto/acme/autocert`. +- Modified: `README.md:33-44` — move `--domain` to the primary Run section, add ACME caveat. + +No new package; this is one of three things the existing `internal/relay` package owns ("routing core: per-server connection registry, frame forwarding, header validation" + now TLS terminus). Splitting `tls` into its own subpackage isn't worth the extra import surface for ~80 LOC. + +### Public surface (`internal/relay/tls.go`) + +Three exports plus one sentinel error. Kept small on purpose — `cmd/pyrycode-relay/main.go` does the wiring (server construction, timeouts, listener startup); `internal/relay` provides only the testable pieces. + +```go +// ErrCacheDirInsecure is returned by NewAutocertManager when CacheDir +// already exists with permissions broader than 0700. TLS private keys +// live there; the relay refuses to start with a world- or group-readable +// cache rather than silently weakening the deployment. +var ErrCacheDirInsecure = errors.New("relay: cert cache dir has insecure permissions (must be 0700)") + +// NewAutocertManager returns an autocert.Manager bound to the single +// domain via HostWhitelist. The cache dir is created with mode 0700 if +// missing. If it already exists with permissions broader than 0700, the +// function returns ErrCacheDirInsecure (wrapped with the dir path). +// +// The returned manager terminates ACME http-01 challenges via its +// HTTPHandler (mount on :80) and serves certificates via TLSConfig +// (mount on :443). +func NewAutocertManager(domain, cacheDir string) (*autocert.Manager, error) + +// EnforceHost wraps next so that any request whose Host header does not +// match domain (case-insensitive, port-tolerant) receives 421 +// Misdirected Request with no body. Used on the :443 handler chain so a +// client that resolves the cert via SNI but sends an unrelated Host +// header is rejected per the protocol spec's TLS section. +// +// Port-80 traffic does NOT use this wrapper: the autocert HTTPHandler +// owns its own host policy via the manager's HostPolicy. +func EnforceHost(domain string, next http.Handler) http.Handler + +// TLSConfig returns m.TLSConfig() with MinVersion forced to TLS 1.2. +// autocert's default config does not set MinVersion explicitly, which +// trips gosec G402; this helper centralises the override so callers +// never see an un-pinned config. +func TLSConfig(m *autocert.Manager) *tls.Config +``` + +### `NewAutocertManager` — algorithm + +1. If `domain == ""` or `cacheDir == ""` → return `nil, errors.New("relay: domain and cacheDir required")`. (Defensive — `main` validates flags upstream, but the helper doesn't trust its caller blindly.) +2. Stat `cacheDir`: + - **Not exist** → `os.MkdirAll(cacheDir, 0o700)`. Return error wrapped with the path on failure. + - **Exists, is dir, mode `& 0o077 == 0`** → no-op. + - **Exists, is dir, mode `& 0o077 != 0`** → return `fmt.Errorf("%w: %s (mode %o)", ErrCacheDirInsecure, cacheDir, mode)`. + - **Exists, not a dir** → return `fmt.Errorf("relay: cert cache path is not a directory: %s", cacheDir)`. +3. Construct and return: + + ```go + &autocert.Manager{ + Cache: autocert.DirCache(cacheDir), + Prompt: autocert.AcceptTOS, + HostPolicy: autocert.HostWhitelist(domain), + } + ``` + + No `Email` field — Let's Encrypt expiry mail is nice-to-have, not load-bearing, and a `--acme-email` flag is out of scope per the ticket's "kept small" framing. + +The mode check on the existing-dir branch is *stricter* than the AC's "existing dir → no-op" wording. The reason is the security-review pass below: TLS private keys live there; silently accepting an attacker-readable cache dir is the kind of footgun the relay's "internet-exposed; adversarial input is the default" stance forbids. The existing dir is a no-op only when it's already secure; otherwise the relay refuses to start (loud failure, fits the project's `--insecure-listen` precedent). + +### `EnforceHost` — algorithm + +```go +func EnforceHost(domain string, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + host := r.Host + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + if !strings.EqualFold(host, domain) { + w.WriteHeader(http.StatusMisdirectedRequest) // 421 + return + } + next.ServeHTTP(w, r) + }) +} +``` + +- `r.Host` may carry a `:port` suffix (RFC 7230 §5.4 allows it); `net.SplitHostPort` strips it. If splitting fails, `r.Host` had no port — use as-is. +- Comparison is case-insensitive (`strings.EqualFold`) because hostnames are. No IDN-equivalence handling — `--domain` is operator input; if they configure a punycode domain, the cert and the Host header will both be punycode. +- No body on the 421 response. Adversaries don't need a description; legitimate clients shouldn't be hitting this. +- Don't write any header beyond status — calling `w.WriteHeader(421)` then `return` is the minimal correct response. + +### `TLSConfig(m *autocert.Manager) *tls.Config` + +```go +func TLSConfig(m *autocert.Manager) *tls.Config { + cfg := m.TLSConfig() + cfg.MinVersion = tls.VersionTLS12 + return cfg +} +``` + +Tiny, but it's the only place in the repo that touches `tls.Config`, and centralising it means a future "switch to 1.3-min" is a one-line change with one test to update. Without `MinVersion`, `gosec` G402 fires on `make lint`. + +### `cmd/pyrycode-relay/main.go` — wiring + +Replace the stub at lines 66-70 with: + +```go +mgr, err := relay.NewAutocertManager(*domain, *certCache) +if err != nil { + logger.Error("autocert setup failed", "err", err) + os.Exit(1) +} + +httpsSrv := &http.Server{ + Addr: ":443", + Handler: relay.EnforceHost(*domain, mux), + TLSConfig: relay.TLSConfig(mgr), + ReadHeaderTimeout: 10 * time.Second, + ReadTimeout: 60 * time.Second, + WriteTimeout: 60 * time.Second, + IdleTimeout: 120 * time.Second, +} + +httpSrv := &http.Server{ + Addr: ":80", + // NotFoundHandler — NOT nil. nil would redirect GET/HEAD to HTTPS + // with 302; the AC requires explicit 404 for non-challenge traffic. + // See "Spec note" in the Context section above. + Handler: mgr.HTTPHandler(http.NotFoundHandler()), + ReadHeaderTimeout: 10 * time.Second, + ReadTimeout: 60 * time.Second, + WriteTimeout: 60 * time.Second, + IdleTimeout: 120 * time.Second, +} + +logger.Info("starting", "version", Version, "mode", "autocert", + "domain", *domain, "cert_cache", *certCache) + +go func() { + if err := httpSrv.ListenAndServe(); err != nil { + logger.Error("http-01 listener failed", "err", err) + os.Exit(1) + } +}() + +if err := httpsSrv.ListenAndServeTLS("", ""); err != nil { + logger.Error("https listener failed", "err", err) + os.Exit(1) +} +``` + +`ListenAndServeTLS("", "")` with `TLSConfig.GetCertificate` populated (which `manager.TLSConfig()` does) is the documented autocert pattern — the empty cert/key paths tell `net/http` to defer to `GetCertificate`. + +### Concurrency model + +Two goroutines: + +1. **Main goroutine** — runs `httpsSrv.ListenAndServeTLS`. Blocks until the listener fails. On error, log and `os.Exit(1)`. +2. **Background goroutine** — runs `httpSrv.ListenAndServe` for ACME http-01. Blocks similarly; on error, log and `os.Exit(1)`. + +No shared mutable state between them. `*autocert.Manager` is read-only after construction (its `Cache` and `HostPolicy` are set once); the manager is internally goroutine-safe per the autocert docs. + +No graceful shutdown. The existing insecure path doesn't have one either, and a graceful-shutdown ticket can be opened separately if it ever matters operationally. `os.Exit(1)` on listener failure mirrors the insecure path exactly. + +### Error handling + +- `NewAutocertManager` failures (cache dir insecure / not a directory / mkdir failed) → log + `os.Exit(1)` in `main`. Loud failure; the operator must see this. +- HTTPS listener failure → log + `os.Exit(1)`. +- HTTP listener failure → log + `os.Exit(1)`. Even though :80 is "just" the ACME challenge port, losing it means cert renewals stop ~30 days from now and the operator should know immediately. +- Per-request errors: + - Wrong Host on :443 → `421` (handled by `EnforceHost`). + - Non-challenge traffic on :80 → `404` (handled by `manager.HTTPHandler(nil)`). + - ACME issuance failure for the wrong domain → autocert returns the error to the TLS handshake; the client sees a TLS alert. No log handling on our side beyond what autocert emits. + +No sentinel errors beyond `ErrCacheDirInsecure`. Other failures are deployment-time and one-shot; sentinels would be over-engineering. + +## Testing strategy + +`internal/relay/tls_test.go`, `package relay`. No external libs; `testing` + `httptest` only. + +### `TestNewAutocertManager_CreatesCacheDirWith0700` + +```go +parent := t.TempDir() +cache := filepath.Join(parent, "certs") // does not exist yet +m, err := NewAutocertManager("relay.example.com", cache) +// require: err == nil, m != nil +info, _ := os.Stat(cache) +// require: info.IsDir() +// require: info.Mode().Perm() == 0o700 +``` + +### `TestNewAutocertManager_ExistingSecureDirIsNoOp` + +Pre-create the dir with mode `0o700`, drop a sentinel file inside, call again, verify no error and the sentinel still there with the dir mode unchanged. + +### `TestNewAutocertManager_ExistingInsecureDirRejected` + +Pre-create with `0o755` (or `0o750`). Expect `errors.Is(err, ErrCacheDirInsecure)`. Skip on Windows where Unix mode bits are advisory. + +```go +if runtime.GOOS == "windows" { + t.Skip("permission bits don't apply on Windows") +} +``` + +### `TestNewAutocertManager_HostPolicyAcceptsConfiguredDomain` + +```go +m, _ := NewAutocertManager("relay.example.com", t.TempDir()+"/c") +// require: m.HostPolicy(context.Background(), "relay.example.com") == nil +``` + +### `TestNewAutocertManager_HostPolicyRejectsOtherDomains` + +Table-driven: + +| Input | Expectation | +|---|---| +| `evil.example.com` | non-nil error | +| `RELAY.EXAMPLE.COM` | non-nil error (HostWhitelist is case-sensitive; document this) | +| `""` | non-nil error | +| `relay.example.com.evil.com` | non-nil error | + +The relay's contract is: HostWhitelist is the gate. We're testing autocert behaves as advertised, not re-implementing it — but the test pins our assumption so a future autocert change doesn't silently weaken the deployment. + +### `TestEnforceHost` + +`httptest.NewRecorder` + a stub next-handler that flips a bool. Table-driven cases: + +| `r.Host` | `domain` | Expected status | Stub called? | +|---|---|---|---| +| `relay.example.com` | `relay.example.com` | 200 | yes | +| `relay.example.com:8443` | `relay.example.com` | 200 | yes | +| `RELAY.EXAMPLE.COM` | `relay.example.com` | 200 | yes (case-insensitive) | +| `evil.com` | `relay.example.com` | 421 | no | +| `` (empty) | `relay.example.com` | 421 | no | +| `relay.example.com.evil.com` | `relay.example.com` | 421 | no | + +### `TestTLSConfig_PinsMinVersionToTLS12` + +```go +m := &autocert.Manager{} // bare; we only inspect the returned config +cfg := TLSConfig(m) +// require: cfg.MinVersion == tls.VersionTLS12 +// require: cfg.GetCertificate != nil // autocert wired through +``` + +### What we deliberately do not test + +- **Real ACME issuance** — needs a public DNS record and a Let's Encrypt account. Verified manually on first deploy. The ticket says so. +- **`:443` / `:80` listener startup** — that's `main.go`, integration territory; binding privileged ports in a unit test isn't worth it. +- **Cert renewal timing** — autocert's job; not in our codebase to test. + +## README updates (`README.md:33-44`) + +Replace the current Run + flag table region with: + +```markdown +## Run + +Production (autocert): + +\`\`\`bash +sudo ./bin/pyrycode-relay --domain relay.example.com +\`\`\` + +The relay binds `:443` (WSS) and `:80` (ACME http-01 challenge). Both +ports must be reachable from the public internet — Let's Encrypt issues +the cert by hitting `:80` on first request to the domain. The first WSS +request after a fresh start may take ~10–20s while the cert is issued +and cached to `--cert-cache`. Subsequent restarts reuse the cached cert. + +Behind a reverse proxy (TLS terminated upstream): + +\`\`\`bash +./bin/pyrycode-relay --insecure-listen :8080 +\`\`\` + +| Flag | Default | Notes | +|---|---|---| +| `--domain` | (required for autocert) | Public domain for Let's Encrypt cert issuance. Required when `--insecure-listen` is unset. | +| `--cert-cache` | `~/.pyrycode-relay/certs` | Directory for autocert's TLS certificate cache. Created with `0700` if missing; refuses to start if an existing dir is world- or group-readable. | +| `--insecure-listen` | (unset) | Listen address for plain HTTP (e.g. `:8080`). Disables autocert. Use only when fronted by a reverse proxy. | +| `--version` | | Print version and exit. | +``` + +The `sudo` is honest — `:80` and `:443` are privileged ports. Operators who use `setcap` or systemd's `AmbientCapabilities` know to substitute. We don't pretend root is optional. + +## Open questions + +1. **`--acme-email` flag.** Out of scope. Let's Encrypt expiry-warning mail goes to the registered account; the registered account is autocert's anonymous default. A future ticket can add the flag if/when an operator wants the warnings. Cost of adding it now: another flag, another doc entry, no proven demand. +2. **Connection / IP rate limits.** Out of scope. Adversarial-input concerns at the connection-count level are a separate ticket (no number assigned yet) — the relay's threat model treats DoS as an operator-deploys-behind-a-CDN concern in v1. +3. **Existing-dir mode policy.** The spec is stricter than the AC ("existing dir → no-op"). Justified in the security review section below; flagged here so PO/code-review notice the deliberate departure. +4. **Cert cache file mode.** autocert's `DirCache` writes files with `0o600` (verified against `golang.org/x/crypto/acme/autocert/cache.go` at the version we'll pin). We don't enforce; we assume autocert's published behaviour. If autocert ever loosens this, a future audit catches it. +5. **`HostWhitelist` case sensitivity.** Verified: autocert lowercases the configured domain at construction and compares lowercased. Operator passing `Relay.Example.com` works for issuance. Documented in the test that case-mismatched issuance requests fail — that's autocert's behaviour, not ours. + +## Out of scope (re-stated, for the developer) + +- No graceful shutdown / signal handling. Mirror the existing insecure path's startup style (`os.Exit(1)` on listener error). +- No multi-domain support. `autocert.HostWhitelist` accepts a variadic — do not exploit that. Single domain only. +- No DNS-01 / wildcard certs. +- No cert-renewal metrics or observability. +- No self-signed dev fallback. `--insecure-listen` exists for that. +- No edits to `internal/relay/envelope.go`, `internal/relay/doc.go`, or any other existing file beyond `cmd/pyrycode-relay/main.go` and `README.md`. + +## Done means + +- `internal/relay/tls.go` exports `NewAutocertManager`, `EnforceHost`, `TLSConfig`, and `ErrCacheDirInsecure`, each with a doc comment naming the contract. +- `internal/relay/tls_test.go` covers every row of the test tables above. +- `cmd/pyrycode-relay/main.go` autocert path is wired; the "not yet implemented" stub is gone. +- `go.mod` adds `golang.org/x/crypto/acme/autocert` (latest tagged release at developer's run time); `go.sum` regenerated via `go mod tidy`. +- `README.md` updated per the section above. +- `make vet`, `make test`, `make build`, `make lint` all clean from the repo root. +- One commit on `feature/9`: `feat(relay): autocert TLS for --domain mode (#9)`. + +## Security review + +**Verdict:** PASS + +**Findings:** + +- **[Trust boundaries]** No findings — the only data crossing untrusted-to-trusted is (a) the `Host` header on inbound HTTPS requests, gated by `EnforceHost` returning 421 before downstream code sees it, and (b) the ACME challenge URL on `:80`, gated by autocert's own `HostWhitelist`. No user input reaches filesystem paths; `cacheDir` is operator-controlled at flag-parse time. +- **[Tokens, secrets, credentials]** SHOULD FIX (deferred to ops doc, not gating) — TLS private keys live in `cacheDir`. The spec enforces dir mode `0o700` on creation AND on existing dirs (`ErrCacheDirInsecure`), going beyond the AC's "no-op on existing dir." Per-file mode is autocert's responsibility (`DirCache` writes `0o600`, verified). No tokens to rotate or revoke at this layer; that's binary-side. +- **[File operations]** No findings — no path traversal (no user input concatenated into paths), no TOCTOU window between perm check and use that an attacker could exploit (the cache dir is operator-owned; an attacker who already has write access to `~/.pyrycode-relay/` has already won), no symlink-following concerns documented in autocert (it uses `os.WriteFile`/`os.ReadFile` on the configured dir; symlink attacks would require pre-existing write access). Atomic writes are autocert's contract, not ours. +- **[Subprocess]** N/A — no subprocess execution in this ticket. +- **[Cryptographic primitives]** No findings — TLS via `crypto/tls`; ACME via the standard `golang.org/x/crypto/acme/autocert` package. `MinVersion` pinned to TLS 1.2 (Go's secure-default cipher suites kick in for 1.2; 1.3 doesn't expose suite selection). No hand-rolled crypto. No constant-time compare needed at this layer (no secret-vs-attacker-input comparisons). +- **[Network & I/O]** No findings — both `:443` and `:80` `http.Server` instances carry the established `ReadHeaderTimeout: 10s, ReadTimeout: 60s, WriteTimeout: 60s, IdleTimeout: 120s` set, mirroring the insecure path. Slow-loris is bounded by `ReadHeaderTimeout`. `gosec` G114 stays clean. **OUT OF SCOPE:** per-IP / per-connection caps deferred to a future connection-limits ticket; the protocol spec's threat model treats v1 deployments as expecting a CDN or operator-run firewall in front for volumetric DoS. +- **[Error messages, logs, telemetry]** No findings — the 421 response carries no body (no internal-state leak). `slog` log lines name `domain`, `cert_cache`, `version` — none are secrets. No request headers, no payloads, no tokens are logged. Listener-failure errors include the underlying `net.OpError` string (port + reason); that's expected operator-facing output, not user-controlled. +- **[Concurrency]** No findings — two goroutines, no shared mutable state (the `autocert.Manager` is constructed once and used read-only; its internal locks are autocert's contract). Both goroutines exit only on listener failure → process exit; no leakage path. No locks taken in our code. +- **[Threat model alignment]** Addresses the protocol spec's TLS section: relay is the cert holder ✓, `421 Misdirected Request` for Host mismatches ✓, single-domain enforcement ✓. Deferred: rate-limiting (out of scope), graceful shutdown (out of scope), threat-model doc itself (`docs/threat-model.md` is still planned, not blocking this ticket). + +**Reviewer:** architect (self-review per `architect/security-review.md`) +**Date:** 2026-05-08 diff --git a/go.mod b/go.mod index a7a1ac9..d2246ec 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,10 @@ module github.com/pyrycode/pyrycode-relay go 1.26.2 + +require golang.org/x/crypto v0.50.0 + +require ( + golang.org/x/net v0.52.0 // indirect + golang.org/x/text v0.36.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..00cc43c --- /dev/null +++ b/go.sum @@ -0,0 +1,6 @@ +golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= +golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= diff --git a/internal/relay/tls.go b/internal/relay/tls.go new file mode 100644 index 0000000..6fd82f9 --- /dev/null +++ b/internal/relay/tls.go @@ -0,0 +1,88 @@ +package relay + +import ( + "crypto/tls" + "errors" + "fmt" + "net" + "net/http" + "os" + "strings" + + "golang.org/x/crypto/acme/autocert" +) + +// ErrCacheDirInsecure is returned by NewAutocertManager when CacheDir +// already exists with permissions broader than 0700. TLS private keys +// live there; the relay refuses to start with a world- or group-readable +// cache rather than silently weakening the deployment. +var ErrCacheDirInsecure = errors.New("relay: cert cache dir has insecure permissions (must be 0700)") + +// NewAutocertManager returns an autocert.Manager bound to the single +// domain via HostWhitelist. The cache dir is created with mode 0700 if +// missing. If it already exists with permissions broader than 0700, the +// function returns ErrCacheDirInsecure (wrapped with the dir path). +// +// The returned manager terminates ACME http-01 challenges via its +// HTTPHandler (mount on :80) and serves certificates via TLSConfig +// (mount on :443). +func NewAutocertManager(domain, cacheDir string) (*autocert.Manager, error) { + if domain == "" || cacheDir == "" { + return nil, errors.New("relay: domain and cacheDir required") + } + + info, err := os.Stat(cacheDir) + switch { + case errors.Is(err, os.ErrNotExist): + if err := os.MkdirAll(cacheDir, 0o700); err != nil { + return nil, fmt.Errorf("relay: creating cert cache dir %s: %w", cacheDir, err) + } + case err != nil: + return nil, fmt.Errorf("relay: stat cert cache dir %s: %w", cacheDir, err) + default: + if !info.IsDir() { + return nil, fmt.Errorf("relay: cert cache path is not a directory: %s", cacheDir) + } + if mode := info.Mode().Perm(); mode&0o077 != 0 { + return nil, fmt.Errorf("%w: %s (mode %o)", ErrCacheDirInsecure, cacheDir, mode) + } + } + + return &autocert.Manager{ + Cache: autocert.DirCache(cacheDir), + Prompt: autocert.AcceptTOS, + HostPolicy: autocert.HostWhitelist(domain), + }, nil +} + +// EnforceHost wraps next so that any request whose Host header does not +// match domain (case-insensitive, port-tolerant) receives 421 +// Misdirected Request with no body. Used on the :443 handler chain so a +// client that resolves the cert via SNI but sends an unrelated Host +// header is rejected per the protocol spec's TLS section. +// +// Port-80 traffic does NOT use this wrapper: the autocert HTTPHandler +// owns its own host policy via the manager's HostPolicy. +func EnforceHost(domain string, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + host := r.Host + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + if !strings.EqualFold(host, domain) { + w.WriteHeader(http.StatusMisdirectedRequest) + return + } + next.ServeHTTP(w, r) + }) +} + +// TLSConfig returns m.TLSConfig() with MinVersion forced to TLS 1.2. +// autocert's default config does not set MinVersion explicitly, which +// trips gosec G402; this helper centralises the override so callers +// never see an un-pinned config. +func TLSConfig(m *autocert.Manager) *tls.Config { + cfg := m.TLSConfig() + cfg.MinVersion = tls.VersionTLS12 + return cfg +} diff --git a/internal/relay/tls_test.go b/internal/relay/tls_test.go new file mode 100644 index 0000000..471bd6e --- /dev/null +++ b/internal/relay/tls_test.go @@ -0,0 +1,210 @@ +package relay + +import ( + "context" + "crypto/tls" + "errors" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "testing" + + "golang.org/x/crypto/acme/autocert" +) + +func TestNewAutocertManager_CreatesCacheDirWith0700(t *testing.T) { + t.Parallel() + + parent := t.TempDir() + cache := filepath.Join(parent, "certs") + + m, err := NewAutocertManager("relay.example.com", cache) + if err != nil { + t.Fatalf("NewAutocertManager: unexpected error: %v", err) + } + if m == nil { + t.Fatal("NewAutocertManager: returned nil manager") + } + + info, err := os.Stat(cache) + if err != nil { + t.Fatalf("stat cache dir: %v", err) + } + if !info.IsDir() { + t.Fatalf("cache path is not a directory") + } + if runtime.GOOS != "windows" { + if got, want := info.Mode().Perm(), os.FileMode(0o700); got != want { + t.Errorf("cache dir mode: got %o, want %o", got, want) + } + } +} + +func TestNewAutocertManager_ExistingSecureDirIsNoOp(t *testing.T) { + t.Parallel() + + cache := filepath.Join(t.TempDir(), "certs") + if err := os.MkdirAll(cache, 0o700); err != nil { + t.Fatalf("setup mkdir: %v", err) + } + sentinel := filepath.Join(cache, "sentinel") + if err := os.WriteFile(sentinel, []byte("x"), 0o600); err != nil { + t.Fatalf("write sentinel: %v", err) + } + + if _, err := NewAutocertManager("relay.example.com", cache); err != nil { + t.Fatalf("NewAutocertManager: unexpected error: %v", err) + } + + if _, err := os.Stat(sentinel); err != nil { + t.Errorf("sentinel file disappeared: %v", err) + } + if runtime.GOOS != "windows" { + info, _ := os.Stat(cache) + if got, want := info.Mode().Perm(), os.FileMode(0o700); got != want { + t.Errorf("cache dir mode changed: got %o, want %o", got, want) + } + } +} + +func TestNewAutocertManager_ExistingInsecureDirRejected(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("permission bits don't apply on Windows") + } + t.Parallel() + + cache := filepath.Join(t.TempDir(), "certs") + if err := os.MkdirAll(cache, 0o755); err != nil { + t.Fatalf("setup mkdir: %v", err) + } + + _, err := NewAutocertManager("relay.example.com", cache) + if !errors.Is(err, ErrCacheDirInsecure) { + t.Fatalf("expected ErrCacheDirInsecure, got %v", err) + } +} + +func TestNewAutocertManager_CacheDirPathNotADirectory(t *testing.T) { + t.Parallel() + + parent := t.TempDir() + cache := filepath.Join(parent, "not-a-dir") + if err := os.WriteFile(cache, []byte("x"), 0o600); err != nil { + t.Fatalf("setup writefile: %v", err) + } + + _, err := NewAutocertManager("relay.example.com", cache) + if err == nil { + t.Fatal("expected error for non-directory cache path, got nil") + } +} + +func TestNewAutocertManager_RequiresDomainAndCacheDir(t *testing.T) { + t.Parallel() + + if _, err := NewAutocertManager("", t.TempDir()); err == nil { + t.Error("expected error for empty domain") + } + if _, err := NewAutocertManager("relay.example.com", ""); err == nil { + t.Error("expected error for empty cacheDir") + } +} + +func TestNewAutocertManager_HostPolicyAcceptsConfiguredDomain(t *testing.T) { + t.Parallel() + + m, err := NewAutocertManager("relay.example.com", filepath.Join(t.TempDir(), "c")) + if err != nil { + t.Fatalf("NewAutocertManager: %v", err) + } + if err := m.HostPolicy(context.Background(), "relay.example.com"); err != nil { + t.Errorf("HostPolicy rejected configured domain: %v", err) + } +} + +func TestNewAutocertManager_HostPolicyRejectsOtherDomains(t *testing.T) { + t.Parallel() + + m, err := NewAutocertManager("relay.example.com", filepath.Join(t.TempDir(), "c")) + if err != nil { + t.Fatalf("NewAutocertManager: %v", err) + } + + cases := []struct { + name string + host string + }{ + {"different domain", "evil.example.com"}, + {"uppercase mismatch", "RELAY.EXAMPLE.COM"}, + {"empty host", ""}, + {"suffix attack", "relay.example.com.evil.com"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if err := m.HostPolicy(context.Background(), tc.host); err == nil { + t.Errorf("HostPolicy accepted %q; want rejection", tc.host) + } + }) + } +} + +func TestEnforceHost(t *testing.T) { + t.Parallel() + + const domain = "relay.example.com" + + cases := []struct { + name string + host string + wantStatus int + wantCalled bool + }{ + {"match", "relay.example.com", http.StatusOK, true}, + {"match with port", "relay.example.com:8443", http.StatusOK, true}, + {"case-insensitive match", "RELAY.EXAMPLE.COM", http.StatusOK, true}, + {"different host", "evil.com", http.StatusMisdirectedRequest, false}, + {"empty host", "", http.StatusMisdirectedRequest, false}, + {"suffix attack", "relay.example.com.evil.com", http.StatusMisdirectedRequest, false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + called := false + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusOK) + }) + + req := httptest.NewRequest(http.MethodGet, "http://example/", nil) + req.Host = tc.host + rr := httptest.NewRecorder() + + EnforceHost(domain, next).ServeHTTP(rr, req) + + if rr.Code != tc.wantStatus { + t.Errorf("status: got %d, want %d", rr.Code, tc.wantStatus) + } + if called != tc.wantCalled { + t.Errorf("next called: got %v, want %v", called, tc.wantCalled) + } + }) + } +} + +func TestTLSConfig_PinsMinVersionToTLS12(t *testing.T) { + t.Parallel() + + m := &autocert.Manager{} + cfg := TLSConfig(m) + + if cfg.MinVersion != tls.VersionTLS12 { + t.Errorf("MinVersion: got %x, want %x", cfg.MinVersion, tls.VersionTLS12) + } + if cfg.GetCertificate == nil { + t.Error("GetCertificate: nil; expected autocert wiring") + } +}