From 1c6eae9991bfb03e118495040c371b5396c8282f Mon Sep 17 00:00:00 2001 From: callan fox Date: Mon, 13 Apr 2026 17:32:26 -0400 Subject: [PATCH] Fix streaming response handler to forward full SSE stream and headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The streaming response handler was modifying the response in two ways that caused Claude Code to send duplicate non-streaming requests: 1. Only 3 hardcoded headers (Content-Type, Cache-Control, Connection) were sent to the client. All upstream Anthropic headers (request-id, server-timing, rate-limit metadata, etc.) were dropped. 2. The SSE stream was filtered to only forward `data:` lines, stripping `event:` type prefixes and blank line separators. Claude Code detects the incomplete response and fires a non-streaming replay request to recover the full metadata. This doubles the request count recorded in the database — every streaming request gets a corresponding non-streaming twin with identical message content. This was confirmed by running identical prompts through the proxy before and after the fix: Before: 4 requests (2 streaming + 2 non-streaming replays) After: 2 requests (2 streaming, no replays) The fix forwards all upstream response headers and the complete SSE byte stream, while still parsing data: lines for DB storage. Co-Authored-By: Claude Opus 4.6 (1M context) --- proxy/internal/handler/handlers.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/proxy/internal/handler/handlers.go b/proxy/internal/handler/handlers.go index 05d1360b..30a68e78 100644 --- a/proxy/internal/handler/handlers.go +++ b/proxy/internal/handler/handlers.go @@ -243,6 +243,14 @@ func (h *Handler) NotFound(w http.ResponseWriter, r *http.Request) { func (h *Handler) handleStreamingResponse(w http.ResponseWriter, resp *http.Response, requestLog *model.RequestLog, startTime time.Time) { + // Forward all upstream response headers so clients see the full + // Anthropic metadata (request-id, rate-limits, server-timing, etc.) + for key, values := range resp.Header { + for _, v := range values { + w.Header().Add(key, v) + } + } + // Ensure SSE essentials are set (may already be present from upstream) w.Header().Set("Content-Type", "text/event-stream") w.Header().Set("Cache-Control", "no-cache") w.Header().Set("Connection", "keep-alive") @@ -282,15 +290,20 @@ func (h *Handler) handleStreamingResponse(w http.ResponseWriter, resp *http.Resp scanner := bufio.NewScanner(resp.Body) for scanner.Scan() { line := scanner.Text() + + // Forward every line (including event: prefixes and blank + // separators) so the client sees the original SSE stream. + fmt.Fprintf(w, "%s\n", line) + if f, ok := w.(http.Flusher); ok { + f.Flush() + } + + // Only parse data: lines for DB storage if line == "" || !strings.HasPrefix(line, "data:") { continue } streamingChunks = append(streamingChunks, line) - fmt.Fprintf(w, "%s\n\n", line) - if f, ok := w.(http.Flusher); ok { - f.Flush() - } jsonData := strings.TrimPrefix(line, "data: ")