From 536fba4d8b88eaa29eb1dd2a48f5bf63f1435c91 Mon Sep 17 00:00:00 2001
From: Parker Henderson <parker@parkerhendo.com>
Date: Tue, 24 Mar 2026 16:56:21 -0700
Subject: [PATCH 1/2] feat: add cross-platform SSE listener support

---
 src/eval.rs | 195 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 163 insertions(+), 32 deletions(-)
diff --git a/src/eval.rs b/src/eval.rs
index d108c61..8244479 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -28,6 +28,9 @@ use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
 use strip_ansi_escapes::strip;
 use tokio::io::{AsyncBufReadExt, BufReader};
+#[cfg(not(unix))]
+use tokio::net::TcpListener;
+#[cfg(unix)]
 use tokio::net::UnixListener;
 use tokio::process::Command;
 use tokio::sync::mpsc;
@@ -71,7 +74,7 @@ struct EvalRunnerProcess {
     rx: mpsc::UnboundedReceiver<EvalEvent>,
     sse_task: tokio::task::JoinHandle<()>,
     sse_connected: Arc<AtomicBool>,
-    _socket_cleanup_guard: SocketCleanupGuard,
+    _socket_cleanup_guard: Option<SocketCleanupGuard>,
 }
 
 struct EvalProcessOutput {
@@ -210,6 +213,20 @@ impl Drop for SocketCleanupGuard {
     }
 }
 
+enum SseListener {
+    #[cfg(unix)]
+    Unix(UnixListener),
+    #[cfg(not(unix))]
+    Tcp(TcpListener),
+}
+
+struct BoundSseListener {
+    listener: SseListener,
+    env_key: &'static str,
+    env_value: String,
+    socket_cleanup_guard: Option<SocketCleanupGuard>,
+}
+
 #[derive(Debug, Copy, Clone, Eq, PartialEq, ValueEnum)]
 pub enum EvalLanguage {
     #[value(alias = "js")]
@@ -659,32 +676,60 @@ async fn spawn_eval_runner(
     let (js_runner, py_runner) = prepare_eval_runners()?;
     let force_esm = matches!(js_mode, JsMode::ForceEsm);
 
-    let (listener, socket_path, socket_cleanup_guard) = bind_sse_listener()?;
+    let BoundSseListener {
+        listener,
+        env_key,
+        env_value,
+        socket_cleanup_guard,
+    } = bind_sse_listener()?;
     let (tx, rx) = mpsc::unbounded_channel();
     let sse_connected = Arc::new(AtomicBool::new(false));
 
     let tx_sse = tx.clone();
     let sse_connected_for_task = Arc::clone(&sse_connected);
     let sse_task = tokio::spawn(async move {
-        match listener.accept().await {
-            Ok((stream, _)) => {
-                sse_connected_for_task.store(true, Ordering::Relaxed);
-                if let Err(err) = read_sse_stream(stream, tx_sse.clone()).await {
+        match listener {
+            #[cfg(unix)]
+            SseListener::Unix(listener) => match listener.accept().await {
+                Ok((stream, _)) => {
+                    sse_connected_for_task.store(true, Ordering::Relaxed);
+                    if let Err(err) = read_sse_stream(stream, tx_sse.clone()).await {
+                        let _ = tx_sse.send(EvalEvent::Error {
+                            message: format!("SSE stream error: {err}"),
+                            stack: None,
+                            status: None,
+                        });
+                    }
+                }
+                Err(err) => {
                     let _ = tx_sse.send(EvalEvent::Error {
-                        message: format!("SSE stream error: {err}"),
+                        message: format!("Failed to accept SSE connection: {err}"),
                         stack: None,
                         status: None,
                     });
                 }
-            }
-            Err(err) => {
-                let _ = tx_sse.send(EvalEvent::Error {
-                    message: format!("Failed to accept SSE connection: {err}"),
-                    stack: None,
-                    status: None,
-                });
-            }
-        };
+            },
+            #[cfg(not(unix))]
+            SseListener::Tcp(listener) => match listener.accept().await {
+                Ok((stream, _)) => {
+                    sse_connected_for_task.store(true, Ordering::Relaxed);
+                    if let Err(err) = read_sse_stream(stream, tx_sse.clone()).await {
+                        let _ = tx_sse.send(EvalEvent::Error {
+                            message: format!("SSE stream error: {err}"),
+                            stack: None,
+                            status: None,
+                        });
+                    }
+                }
+                Err(err) => {
+                    let _ = tx_sse.send(EvalEvent::Error {
+                        message: format!("Failed to accept SSE connection: {err}"),
+                        stack: None,
+                        status: None,
+                    });
+                }
+            },
+        }
     });
 
     let (mut cmd, runner_kind) = match language {
@@ -752,10 +797,7 @@ async fn spawn_eval_runner(
             serde_json::to_string(&options.extra_args).context("failed to serialize extra args")?;
         cmd.env("BT_EVAL_EXTRA_ARGS_JSON", serialized);
     }
-    cmd.env(
-        "BT_EVAL_SSE_SOCK",
-        socket_path.to_string_lossy().to_string(),
-    );
+    cmd.env(env_key, env_value);
     cmd.stdout(Stdio::piped());
     cmd.stderr(Stdio::piped());
 
@@ -2303,7 +2345,11 @@ fn prepare_js_runner_in_cwd() -> Result<PathBuf> {
 
 fn runner_bin_name(runner_command: &Path) -> Option<String> {
     let name = runner_command.file_name()?.to_str()?.to_ascii_lowercase();
-    Some(name.strip_suffix(".cmd").unwrap_or(&name).to_string())
+    let trimmed = name
+        .strip_suffix(".cmd")
+        .or_else(|| name.strip_suffix(".exe"))
+        .unwrap_or(&name);
+    Some(trimmed.to_string())
 }
 
 fn runner_kind_for_bin(runner_command: &Path) -> RunnerKind {
@@ -2341,9 +2387,15 @@ fn is_ts_node_runner(runner_command: &Path) -> bool {
 
 fn find_python_binary() -> Option<PathBuf> {
     if let Some(venv_root) = std::env::var_os("VIRTUAL_ENV") {
-        let candidate = PathBuf::from(venv_root).join("bin").join("python");
-        if candidate.is_file() {
-            return Some(candidate);
+        let venv_root = PathBuf::from(venv_root);
+        let unix = venv_root.join("bin").join("python");
+        if unix.is_file() {
+            return Some(unix);
+        }
+
+        let windows = venv_root.join("Scripts").join("python.exe");
+        if windows.is_file() {
+            return Some(windows);
         }
     }
     find_binary_in_path(&["python3", "python"])
@@ -2357,9 +2409,10 @@ fn find_node_module_bin(binary: &str, start: &Path) -> Option<PathBuf> {
             return Some(base);
         }
         if cfg!(windows) {
-            let cmd = base.with_extension("cmd");
-            if cmd.is_file() {
-                return Some(cmd);
+            for candidate in with_windows_extensions(&base) {
+                if candidate.is_file() {
+                    return Some(candidate);
+                }
             }
         }
         current = dir.parent();
@@ -2376,9 +2429,10 @@ fn find_binary_in_path(candidates: &[&str]) -> Option<PathBuf> {
                 return Some(path);
             }
             if cfg!(windows) {
-                let cmd = path.with_extension("cmd");
-                if cmd.is_file() {
-                    return Some(cmd);
+                for candidate_path in with_windows_extensions(&path) {
+                    if candidate_path.is_file() {
+                        return Some(candidate_path);
+                    }
                 }
             }
         }
@@ -2386,6 +2440,16 @@ fn find_binary_in_path(candidates: &[&str]) -> Option<PathBuf> {
     None
 }
 
+#[cfg(windows)]
+fn with_windows_extensions(path: &Path) -> [PathBuf; 2] {
+    [path.with_extension("exe"), path.with_extension("cmd")]
+}
+
+#[cfg(not(windows))]
+fn with_windows_extensions(_path: &Path) -> [PathBuf; 0] {
+    []
+}
+
 fn build_sse_socket_path() -> Result<PathBuf> {
     let pid = std::process::id();
     let serial = SSE_SOCKET_COUNTER.fetch_add(1, Ordering::Relaxed);
@@ -2396,14 +2460,22 @@ fn build_sse_socket_path() -> Result<PathBuf> {
     Ok(std::env::temp_dir().join(format!("bt-eval-{pid}-{now}-{serial}.sock")))
 }
 
-fn bind_sse_listener() -> Result<(UnixListener, PathBuf, SocketCleanupGuard)> {
+#[cfg(unix)]
+fn bind_sse_listener() -> Result<BoundSseListener> {
     let mut last_bind_err: Option<std::io::Error> = None;
     for _ in 0..SSE_SOCKET_BIND_MAX_ATTEMPTS {
         let socket_path = build_sse_socket_path()?;
         let socket_cleanup_guard = SocketCleanupGuard::new(socket_path.clone());
         let _ = std::fs::remove_file(&socket_path);
         match UnixListener::bind(&socket_path) {
-            Ok(listener) => return Ok((listener, socket_path, socket_cleanup_guard)),
+            Ok(listener) => {
+                return Ok(BoundSseListener {
+                    listener: SseListener::Unix(listener),
+                    env_key: "BT_EVAL_SSE_SOCK",
+                    env_value: socket_path.to_string_lossy().to_string(),
+                    socket_cleanup_guard: Some(socket_cleanup_guard),
+                })
+            }
             Err(err)
                 if matches!(
                     err.kind(),
@@ -2429,6 +2501,32 @@ fn bind_sse_listener() -> Result<(UnixListener, PathBuf, SocketCleanupGuard)> {
     ))
 }
 
+#[cfg(not(unix))]
+fn bind_sse_listener() -> Result<BoundSseListener> {
+    bind_sse_tcp_listener()
+}
+
+#[cfg(not(unix))]
+fn bind_sse_tcp_listener() -> Result<BoundSseListener> {
+    let std_listener = std::net::TcpListener::bind((std::net::Ipv4Addr::LOCALHOST, 0))
+        .context("failed to bind SSE TCP listener")?;
+    let addr = std_listener
+        .local_addr()
+        .context("failed to resolve SSE TCP listener address")?;
+    std_listener
+        .set_nonblocking(true)
+        .context("failed to set SSE TCP listener non-blocking mode")?;
+    let listener =
+        TcpListener::from_std(std_listener).context("failed to create tokio SSE TCP listener")?;
+
+    Ok(BoundSseListener {
+        listener: SseListener::Tcp(listener),
+        env_key: "BT_EVAL_SSE_ADDR",
+        env_value: format!("127.0.0.1:{}", addr.port()),
+        socket_cleanup_guard: None,
+    })
+}
+
 fn eval_runner_cache_dir() -> PathBuf {
     let root = std::env::var_os("XDG_CACHE_HOME")
         .map(PathBuf::from)
@@ -3952,10 +4050,43 @@ mod tests {
     #[test]
     fn runner_kind_for_bin_detects_bun() {
         assert_eq!(runner_kind_for_bin(Path::new("bun")), RunnerKind::Bun);
+        assert_eq!(runner_kind_for_bin(Path::new("bun.exe")), RunnerKind::Bun);
         assert_eq!(runner_kind_for_bin(Path::new("bunx")), RunnerKind::Bun);
+        assert_eq!(runner_kind_for_bin(Path::new("bunx.cmd")), RunnerKind::Bun);
         assert_eq!(runner_kind_for_bin(Path::new("deno")), RunnerKind::Other);
     }
 
+    #[test]
+    fn find_python_binary_uses_virtual_env_scripts_python_exe() {
+        let _guard = env_test_lock()
+            .lock()
+            .unwrap_or_else(|poisoned| poisoned.into_inner());
+        let dir = make_temp_dir("venv-scripts-python");
+        let scripts_dir = dir.join("Scripts");
+        fs::create_dir_all(&scripts_dir).expect("create scripts directory");
+        let scripts_python = scripts_dir.join("python.exe");
+        fs::write(&scripts_python, "").expect("write scripts python");
+
+        let previous = set_env_var("VIRTUAL_ENV", &dir.to_string_lossy());
+        let found = find_python_binary();
+        restore_env_var("VIRTUAL_ENV", previous);
+
+        assert_eq!(found, Some(scripts_python));
+        let _ = fs::remove_dir_all(&dir);
+    }
+
+    #[cfg(not(unix))]
+    #[test]
+    fn bind_sse_tcp_listener_sets_sse_addr_env() {
+        let bound = bind_sse_tcp_listener().expect("bind tcp sse listener");
+        assert_eq!(bound.env_key, "BT_EVAL_SSE_ADDR");
+        assert!(
+            bound.env_value.starts_with("127.0.0.1:"),
+            "expected loopback host in SSE address env"
+        );
+        assert!(bound.socket_cleanup_guard.is_none());
+    }
+
     #[test]
     fn set_node_heap_size_env_sets_default_when_absent() {
         let _guard = env_test_lock()

From 0471015df2808e062bc6008f30635584c4daf7c2 Mon Sep 17 00:00:00 2001
From: Parker Henderson <parker@parkerhendo.com>
Date: Wed, 25 Mar 2026 11:13:35 -0700
Subject: [PATCH 2/2] WIP

---
 README.md   |   6 +-
 plan.md     | 157 +++++++++++++++++++++++++++++++++
 research.md | 249 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/eval.rs |   6 ++
 4 files changed, 415 insertions(+), 3 deletions(-)
 create mode 100644 plan.md
 create mode 100644 research.md

diff --git a/README.md b/README.md
index 92a972e..6ea6a0f 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 ## Current Limitations
 
-- `bt eval` is currently Unix-only (Linux/macOS). Windows support is planned.
+- `bt eval` supports Linux, macOS, and Windows.
 
 ## Install
 
@@ -114,7 +114,7 @@ Remove-Item -Recurse -Force (Join-Path $env:APPDATA "bt") -ErrorAction SilentlyC
 | `bt auth`        | Authenticate with Braintrust                                       |
 | `bt switch`      | Switch org and project context                                     |
 | `bt status`      | Show current org and project context                               |
-| `bt eval`        | Run eval files (Unix only)                                         |
+| `bt eval`        | Run eval files                                                     |
 | `bt sql`         | Run SQL queries against Braintrust                                 |
 | `bt view`        | View logs, traces, and spans                                       |
 | `bt projects`    | Manage projects (list, create, view, delete)                       |
@@ -326,5 +326,5 @@ Skill smoke-test harness:
 
 - Add richer channel controls for self-update (for example pinned/branch canary selection).
 - Expand release verification and smoke tests for installer flows across more architectures/environments.
-- Add `bt eval` support on Windows (today, `bt eval` is Unix-only due to Unix socket usage).
+- Expand `bt eval` cross-platform CI coverage, especially on Windows.
 - Add signed artifact verification guidance (signature flow) in install and upgrade docs.
diff --git a/plan.md b/plan.md
new file mode 100644
index 0000000..8f846e5
--- /dev/null
+++ b/plan.md
@@ -0,0 +1,157 @@
+# Plan: Make `bt eval` Work Natively on Windows (CLI + SDK)
+
+## Goal
+
+Ship reliable native Windows support for `bt eval` (no WSL), with deterministic SSE transport, passing Windows build/tests, and no regressions on macOS/Linux.
+
+## Non-Goals
+
+- No protocol redesign (keep existing SSE event protocol).
+- No new ad-hoc runtime env configuration outside existing CLI-owned `BT_EVAL_*` contract.
+- No behavior changes to unrelated commands.
+
+## Current Failures to Address
+
+1. **SDK compile break on Windows**
+- `braintrust-sdk-rust` references `std::os::unix::net::UnixStream` unguarded in `src/eval/bt_runner.rs`.
+- Windows build fails at compile time before TCP fallback can run.
+
+2. **Transport ambiguity from inherited env**
+- Runners check `BT_EVAL_SSE_SOCK` before `BT_EVAL_SSE_ADDR`.
+- If both are present, runner may choose wrong transport and fail to stream.
+
+## Workstream A: `braintrust-sdk-rust` (must land first)
+
+Owner: SDK repo (`../braintrust-sdk-rust`)
+
+### A1. Make SSE writer platform-safe
+
+- Refactor `create_sse_writer()` in `src/eval/bt_runner.rs`:
+  - Unix-only Unix socket connect path behind `#[cfg(unix)]`.
+  - Keep TCP connect path available on all platforms.
+  - Ensure non-Unix builds never reference `std::os::unix::*`.
+
+### A2. Keep deterministic transport semantics
+
+- On Unix:
+  - Preserve current preference order (`BT_EVAL_SSE_SOCK`, then `BT_EVAL_SSE_ADDR`).
+- On non-Unix:
+  - Ignore `BT_EVAL_SSE_SOCK` (optionally log unsupported warning) and use `BT_EVAL_SSE_ADDR`.
+
+### A3. Add SDK tests
+
+- Unit tests for:
+  - env-driven selection logic by platform
+  - valid/invalid `BT_EVAL_SSE_ADDR` handling
+- Add at least one CI check that compiles SDK eval on Windows.
+
+### A4. Merge and pin
+
+- Merge SDK fix.
+- Record commit SHA to consume from `btcli`.
+
+## Workstream B: `btcli` Runtime Wiring
+
+Owner: CLI repo (`btcli`)
+
+### B1. Keep platform listener ownership in CLI
+
+- Retain current listener model in `src/eval.rs`:
+  - Unix binds local Unix socket and exports `BT_EVAL_SSE_SOCK`.
+  - Non-Unix binds localhost TCP and exports `BT_EVAL_SSE_ADDR`.
+
+### B2. Enforce one transport env in child process
+
+- Before spawn, set selected SSE env key and remove the conflicting key.
+- This avoids inherited-env collisions across Rust/JS/Python runners.
+
+Status: implemented in `src/eval.rs` (keep and test).
+
+### B3. Add CLI tests for transport env determinism
+
+- Add unit/integration coverage that verifies:
+  - chosen key is present in child env
+  - opposite key is removed
+  - Windows path uses `BT_EVAL_SSE_ADDR`
+  - Unix path uses `BT_EVAL_SSE_SOCK`
+
+## Workstream C: JS/Python Runner Hardening (recommended)
+
+Owner: CLI repo runner scripts
+
+### C1. Keep current compatibility behavior
+
+- JS and Python runners may keep env priority as-is, since CLI now sanitizes env.
+
+### C2. Optional defensive hardening
+
+- On non-Unix, if `BT_EVAL_SSE_SOCK` is present:
+  - skip Unix socket attempt (or emit explicit warning)
+  - continue with `BT_EVAL_SSE_ADDR` if provided
+
+This reduces dependency on perfect parent env hygiene and improves diagnostics.
+
+## Workstream D: Dependency Update in `btcli`
+
+Owner: CLI repo
+
+### D1. Bump SDK revision
+
+- Update `braintrust-sdk-rust` `rev` in `Cargo.toml`.
+- Refresh `Cargo.lock`.
+
+### D2. Verify compile and behavior
+
+- Ensure `cargo check` passes locally.
+- Ensure release-canary Windows artifact build compiles with new SDK rev.
+
+## Workstream E: CI + Validation Matrix
+
+Owner: CLI + SDK repos
+
+### E1. Required validation
+
+1. **Windows build path**
+- `x86_64-pc-windows-msvc` build succeeds in release-canary/release matrix.
+
+2. **Runtime eval path (Windows)**
+- JS eval run emits SSE (`start`, `summary`, `done`) and exits correctly.
+- Python eval run emits SSE and exits correctly.
+
+3. **Conflict env regression**
+- Simulate inherited `BT_EVAL_SSE_SOCK` while Windows listener selects TCP.
+- Verify child still uses TCP via `BT_EVAL_SSE_ADDR`.
+
+4. **Unix regression check**
+- Unix eval path continues to use Unix sockets with no behavior regressions.
+
+### E2. Nice-to-have validation
+
+- Add a dedicated Windows eval integration job (JS + Python smoke tests).
+
+## Sequencing (Execution Order)
+
+1. Implement and merge **Workstream A** in `braintrust-sdk-rust`.
+2. Land/keep **Workstream B** in `btcli` with tests.
+3. Apply **Workstream D** SDK rev bump in `btcli`.
+4. Run **Workstream E** validation gates.
+5. Optional **Workstream C** hardening if diagnostics still weak.
+
+## Risks and Mitigations
+
+1. Risk: SDK change lands but CLI still has env collisions.
+- Mitigation: keep CLI opposite-key removal as a hard requirement.
+
+2. Risk: CI only proves compile, not runtime SSE correctness.
+- Mitigation: add Windows eval smoke tests that assert SSE event flow.
+
+3. Risk: Unix regressions from transport refactor.
+- Mitigation: keep Unix ordering unchanged and run existing Unix eval tests.
+
+## Definition of Done
+
+- Windows artifact build succeeds without `std::os::unix` compile errors.
+- `bt eval` runs natively on Windows for JS and Python with working SSE progress/summary.
+- Conflicting inherited SSE env vars do not break transport selection.
+- macOS/Linux behavior remains unchanged.
+- `btcli` is pinned to an SDK commit that contains the Windows-safe eval transport fix.
diff --git a/research.md b/research.md
new file mode 100644
index 0000000..de3e961
--- /dev/null
+++ b/research.md
@@ -0,0 +1,249 @@
+# Eval SSE Transport Research (btcli + braintrust-sdk-rust)
+
+## Scope
+
+This document explains how eval execution and SSE event transport currently work across:
+
+- `btcli` (parent process and UI)
+- JavaScript runner script (`scripts/eval-runner.ts`)
+- Python runner script (`scripts/eval-runner.py`)
+- Rust SDK runner (`../braintrust-sdk-rust/src/eval/bt_runner.rs`)
+
+It focuses on the Windows-native issues we are fixing:
+
+1. incorrect/unsafe Unix-socket handling on Windows
+2. nondeterministic transport selection when both `BT_EVAL_SSE_SOCK` and `BT_EVAL_SSE_ADDR` are present
+
+## High-Level Architecture
+
+### Parent/child split
+
+`btcli` is the parent orchestrator. It:
+
+1. starts a local SSE listener
+2. chooses transport (`unix socket` or `tcp`)
+3. passes the selected endpoint to the child runner via env vars
+4. reads SSE events and maps them into `EvalEvent` for UI/output
+
+Child runners (JS/Python/Rust SDK) do not host the listener. They connect to what `btcli` exposes and emit SSE events.
+
+## btcli Flow
+
+### Listener binding and transport selection
+
+Entry path:
+
+- `spawn_eval_runner(...)` in `src/eval.rs`
+- calls `bind_sse_listener()`
+
+Transport chosen by platform in `src/eval.rs`:
+
+- Unix: `bind_sse_listener()` -> `UnixListener::bind(...)`, exports `BT_EVAL_SSE_SOCK`
+- Non-Unix (Windows): `bind_sse_listener()` -> `bind_sse_tcp_listener()`, exports `BT_EVAL_SSE_ADDR` with `127.0.0.1:<ephemeral_port>`
+
+Relevant code:
+
+- `src/eval.rs:2471` (unix binding)
+- `src/eval.rs:2512` (non-unix binding)
+- `src/eval.rs:2531` (env key for non-unix is `BT_EVAL_SSE_ADDR`)
+
+### Child env wiring
+
+After constructing the child command, `btcli` sets eval env vars and now clears the opposite SSE key:
+
+- sets chosen key/value
+- removes conflicting key (`BT_EVAL_SSE_SOCK` or `BT_EVAL_SSE_ADDR`)
+
+Relevant code:
+
+- `src/eval.rs:801`
+- `src/eval.rs:807`
+
+This makes transport selection deterministic for all runners.
+
+### SSE parsing and event mapping
+
+`btcli` reads SSE framing (`event:` / `data:` blocks) and maps events:
+
+- `start`
+- `summary`
+- `processing`
+- `progress`
+- `console`
+- `error`
+- `dependencies`
+- `done`
+
+Relevant code:
+
+- `read_sse_stream(...)` in `src/eval.rs:2715`
+- `handle_sse_event(...)` in `src/eval.rs:2748`
+
+## Child Runner Implementations
+
+### JavaScript runner (`scripts/eval-runner.ts`)
+
+Config/env reader:
+
+- `readRunnerConfig()` consumes `BT_EVAL_*` flags (`BT_EVAL_JSONL`, `BT_EVAL_LIST`, etc.)
+
+SSE transport connection order in `createSseWriter()`:
+
+1. if `BT_EVAL_SSE_SOCK` is set, connect with `node:net` path mode
+2. else if `BT_EVAL_SSE_ADDR` is set, connect host/port TCP
+
+Relevant code:
+
+- `scripts/eval-runner.ts:256`
+- `scripts/eval-runner.ts:678`
+- `scripts/eval-runner.ts:687`
+- `scripts/eval-runner.ts:721`
+
+Event emission:
+
+- sends `processing`, `summary`, `error`, `dependencies`, `done`, and progress/console events
+- finalization sends `dependencies` + `done` then closes socket
+
+Relevant code:
+
+- `scripts/eval-runner.ts:1908`
+- `scripts/eval-runner.ts:1958`
+
+### Python runner (`scripts/eval-runner.py`)
+
+Config/env reader:
+
+- `read_runner_config()` consumes `BT_EVAL_*` flags
+
+SSE transport connection order in `create_sse_writer()`:
+
+1. if `BT_EVAL_SSE_SOCK` is set, connect using `socket.AF_UNIX`
+2. else if `BT_EVAL_SSE_ADDR` is set, parse and connect TCP
+
+Relevant code:
+
+- `scripts/eval-runner.py:145`
+- `scripts/eval-runner.py:90`
+- `scripts/eval-runner.py:97`
+
+Event emission:
+
+- sends `dependencies` and `done` before exit, closes socket in `finally`
+
+Relevant code:
+
+- `scripts/eval-runner.py:958`
+- `scripts/eval-runner.py:985`
+
+## Rust SDK Eval Runner (`braintrust-sdk-rust`)
+
+### Core entry points
+
+- `BtEvalRunner::from_env()` reads config from env and initializes SSE writer
+- `eval(...)` runs evaluator, emits `start` and `summary` or `error`
+- `finish(...)` handles list mode output and returns pass/fail aggregate
+
+Relevant code:
+
+- `../braintrust-sdk-rust/src/eval/bt_runner.rs:223`
+- `../braintrust-sdk-rust/src/eval/bt_runner.rs:287`
+- `../braintrust-sdk-rust/src/eval/bt_runner.rs:364`
+
+### Current transport logic and Windows break
+
+In `create_sse_writer()`:
+
+1. checks `BT_EVAL_SSE_SOCK` first
+2. tries `std::os::unix::net::UnixStream::connect(...)`
+3. falls back to `BT_EVAL_SSE_ADDR` TCP
+
+Relevant code:
+
+- `../braintrust-sdk-rust/src/eval/bt_runner.rs:127`
+- `../braintrust-sdk-rust/src/eval/bt_runner.rs:131`
+- `../braintrust-sdk-rust/src/eval/bt_runner.rs:145`
+
+Problem: `std::os::unix` is not available on Windows. Because that symbol is referenced unguarded, Windows compile fails before runtime fallback is possible.
+
+Observed CI error pattern:
+
+- compiler error: `could not find unix in os`
+- location: `src/eval/bt_runner.rs:131`
+- downstream `cargo-dist` exits with `127` after failing to produce `bt.exe`
+
+## Why We Still Need Platform-Aware Logic in btcli
+
+Even if SDK/runners become fully cross-platform, `btcli` must still:
+
+- bind listener socket/port
+- choose endpoint type
+- pass endpoint env to child
+- parse SSE for UI/progress integration
+
+So the right boundary is:
+
+- `btcli`: transport host/binding + env provisioning + event consumption
+- SDK/runners: transport client/connection + event emission
+
+## Root Causes for the Current Regression Class
+
+### 1) Compile-time portability gap in Rust SDK
+
+`bt_runner.rs` references Unix APIs without `cfg(unix)` guards.
+
+### 2) Runtime transport ambiguity from inherited env
+
+All child runners currently check `BT_EVAL_SSE_SOCK` before `BT_EVAL_SSE_ADDR`.
+If both are present (for example inherited parent env), the runner can choose the wrong transport.
+
+This is why clearing the opposite key in `btcli` is necessary even after SDK fixes.
+
+## Fix Strategy for Native Windows Support (No WSL)
+
+### In `braintrust-sdk-rust`
+
+Refactor `create_sse_writer()` to be platform-safe and deterministic:
+
+1. Gate Unix socket connect behind `#[cfg(unix)]`.
+2. On non-Unix, never reference `std::os::unix`.
+3. Keep TCP path (`BT_EVAL_SSE_ADDR`) available on all platforms.
+4. Optional: on Unix, still prefer socket then TCP; on non-Unix, ignore `BT_EVAL_SSE_SOCK` entirely or log that it is unsupported.
+
+### In `btcli`
+
+Keep the opposite-key clearing behavior in `spawn_eval_runner()`:
+
+- set selected env key
+- remove the conflicting one
+
+This guarantees deterministic behavior across SDK, JS runner, and Python runner.
+
+### In JS/Python runners
+
+Current behavior is acceptable with CLI env cleanup, but optional hardening:
+
+- On non-Unix, ignore or warn on `BT_EVAL_SSE_SOCK` before attempting Unix connect.
+
+## Validation Matrix
+
+### Windows
+
+1. Build `bt` and confirm no compile error from `std::os::unix`.
+2. Run at least one JS eval and one Python eval via `bt eval`.
+3. Verify progress/summary stream appears (SSE connected through TCP).
+4. Verify behavior when parent env includes stale `BT_EVAL_SSE_SOCK`:
+   - child should still use `BT_EVAL_SSE_ADDR`.
+
+### Unix (macOS/Linux)
+
+1. Run eval and verify Unix socket path still works.
+2. Confirm no regression in progress/summary/error event handling.
+
+## Key Takeaway
+
+Native Windows support is achieved by using TCP SSE end-to-end on Windows, not by disabling eval features.
+The minimal robust model is:
+
+- `btcli` chooses and exports exactly one transport key
+- SDK/runners connect using platform-safe transport code
+- SSE event protocol remains unchanged
diff --git a/src/eval.rs b/src/eval.rs
index 8244479..24f5edb 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -797,7 +797,13 @@ async fn spawn_eval_runner(
             serde_json::to_string(&options.extra_args).context("failed to serialize extra args")?;
         cmd.env("BT_EVAL_EXTRA_ARGS_JSON", serialized);
     }
+    let conflicting_sse_env_key = if env_key == "BT_EVAL_SSE_SOCK" {
+        "BT_EVAL_SSE_ADDR"
+    } else {
+        "BT_EVAL_SSE_SOCK"
+    };
     cmd.env(env_key, env_value);
+    cmd.env_remove(conflicting_sse_env_key);
     cmd.stdout(Stdio::piped());
     cmd.stderr(Stdio::piped());