diff --git a/Cargo.lock b/Cargo.lock
index 078e1b29fa..7b6801958f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8746,6 +8746,14 @@ dependencies = [
  "tracing-subscriber",
 ]
 
+[[package]]
+name = "ruvector-bet4-ivf-bench"
+version = "0.1.0"
+dependencies = [
+ "rand 0.8.5",
+ "ruvector-rairs",
+]
+
 [[package]]
 name = "ruvector-cli"
 version = "2.2.3"
diff --git a/Cargo.toml b/Cargo.toml
index 38128585a2..d92de77db0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -233,6 +233,8 @@ members = [
     "crates/ruvllm_retrieval_diffusion",
     # RAIRS IVF: Redundant Assignment + Amplified Inverse Residual (ADR-193)
     "crates/ruvector-rairs",
+    # BET 4 (SepRAG #534): LB-B&B IVF probing vs plain IVF nprobe
+    "crates/ruvector-bet4-ivf-bench",
 ]
 resolver = "2"
 
diff --git a/crates/ruvector-bet4-ivf-bench/Cargo.toml b/crates/ruvector-bet4-ivf-bench/Cargo.toml
new file mode 100644
index 0000000000..fdc1e82776
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "ruvector-bet4-ivf-bench"
+version = "0.1.0"
+edition = "2021"
+license = "MIT"
+publish = false
+description = "BET 4 (SepRAG #534): LB-ordered branch-and-bound IVF probing vs plain IVF nprobe"
+
+[dependencies]
+ruvector-rairs = { path = "../ruvector-rairs" }
+rand = "0.8"
+
+[lib]
+crate-type = ["rlib"]
diff --git a/crates/ruvector-bet4-ivf-bench/examples/ivf_pruning_sweep.rs b/crates/ruvector-bet4-ivf-bench/examples/ivf_pruning_sweep.rs
new file mode 100644
index 0000000000..8691ccf4ac
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/examples/ivf_pruning_sweep.rs
@@ -0,0 +1,198 @@
+//! BET 4 matched-recall sweep (M2/M3): LB-ordered branch-and-bound IVF probing vs the tuned plain
+//! `IvfFlat` `nprobe` incumbent, on real 128-d arxiv embeddings AND a PCA-8 low-dim control.
+//!
+//! Three contenders share one index per `nclusters` (built once): plain `nprobe` (incumbent),
+//! B&B in **LB-order** (the faithful BET-2 `RegionPruneIvf` kernel), and the **steelman** B&B —
+//! centroid-distance order + LB-skip (the strongest version: if it can't beat `nprobe`, the bound
+//! doesn't pay). Reports the exact-regime pruning fraction, matched-recall cost, and checks the
+//! FROZEN gate (docs/plans/bet4-ivf-pruning/PRE-REGISTRATION.md) on the steelman ratio.
+//!
+//! Run: `cargo run --release -p ruvector-bet4-ivf-bench --example ivf_pruning_sweep -- [N]`
+
+use ruvector_bet4_ivf_bench::data::load_feat_csv;
+use ruvector_bet4_ivf_bench::kernel::BnBIvf;
+use ruvector_bet4_ivf_bench::oracle::{brute_force_topk, recall_at_k};
+use ruvector_bet4_ivf_bench::pca::project_topm;
+use ruvector_rairs::SearchResult;
+use std::time::Instant;
+
+const K: usize = 10;
+const R_TARGET: f64 = 0.95;
+const NCLUSTERS: [usize; 3] = [64, 256, 1024];
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+    let n_req: usize = args.get(1).and_then(|s| s.parse().ok()).unwrap_or(20_000);
+    let data =
+        std::env::var("BET4_DATA").unwrap_or_else(|_| "target/m1-data/node-feat-100k.csv".into());
+
+    let corpus = load_feat_csv(&data, n_req).unwrap_or_else(|e| {
+        eprintln!("failed to load {data}: {e}");
+        std::process::exit(1);
+    });
+    let n = corpus.len();
+    let dim = corpus.first().map(|v| v.len()).unwrap_or(0);
+    println!("# BET4 sweep  n={n} dim={dim} k={K} R_target={R_TARGET}  data={data}\n");
+
+    run_regime("128-d (real arxiv features)", &corpus);
+
+    println!("\n# Projecting to PCA-8 (low-dim control)…");
+    let t = Instant::now();
+    let corpus8 = project_topm(&corpus, 8, 60);
+    println!("# PCA done in {:?}\n", t.elapsed());
+    run_regime("PCA-8 (low-dim control — bound should be TIGHT, B&B should WIN)", &corpus8);
+}
+
+fn run_regime(label: &str, corpus: &[Vec<f32>]) {
+    let n = corpus.len();
+    let dim = corpus[0].len();
+    let nq = 200.min(n);
+    let queries: Vec<usize> = (0..nq).collect();
+    let truth: Vec<Vec<usize>> = queries
+        .iter()
+        .map(|&q| brute_force_topk(corpus, &corpus[q], K))
+        .collect();
+
+    println!("════ REGIME: {label}   (dim={dim}) ════");
+    let mut cells: Vec<Cell> = Vec::new();
+
+    for &nc in &NCLUSTERS {
+        let t_build = Instant::now();
+        let idx = BnBIvf::build(corpus, nc, 15, 42);
+        let nc_eff = idx.num_lists();
+        let build = t_build.elapsed();
+
+        // Exact-regime pruning fraction (LB-order full budget).
+        let mut pruned = 0.0;
+        for &q in &queries {
+            let (_r, _e, probed) = idx.search(&corpus[q], K, None);
+            pruned += (nc_eff - probed) as f64 / nc_eff as f64;
+        }
+        let prune_frac = pruned / nq as f64;
+
+        let grid = knob_grid(nc_eff);
+        let plain = matched(&queries, corpus, &truth, &grid, |q, knob| {
+            let (r, ev, _) = idx.search_nprobe(q, K, knob);
+            (ids(&r), ev)
+        });
+        let bnb_lb = matched(&queries, corpus, &truth, &grid, |q, knob| {
+            let (r, ev, _) = idx.search(q, K, Some(knob));
+            (ids(&r), ev)
+        });
+        let bnb_skip = matched(&queries, corpus, &truth, &grid, |q, knob| {
+            let (r, ev, _) = idx.search_bnb_skip(q, K, Some(knob));
+            (ids(&r), ev)
+        });
+
+        let eval_ratio = plain.evals / bnb_skip.evals.max(1.0);
+        let wall_ratio = plain.wall_ns as f64 / bnb_skip.wall_ns.max(1) as f64;
+
+        println!("\n## nclusters={nc_eff}  (build {build:?})  exact-regime prune={:.1}%", prune_frac * 100.0);
+        print_row("plain nprobe   (incumbent)", &plain);
+        print_row("B&B  LB-order  (BET-2 kernel)", &bnb_lb);
+        print_row("B&B  steelman  (cdist+LB-skip)", &bnb_skip);
+        println!(
+            "   steelman vs incumbent: eval {eval_ratio:.2}x   wall {wall_ratio:.2}x"
+        );
+
+        cells.push(Cell { nc: nc_eff, eval_ratio, wall_ratio, prune_frac });
+    }
+
+    verdict(label, &cells);
+}
+
+struct Cell {
+    nc: usize,
+    eval_ratio: f64,
+    wall_ratio: f64,
+    prune_frac: f64,
+}
+
+struct Matched {
+    knob: usize,
+    recall: f64,
+    evals: f64,
+    wall_ns: u128,
+}
+
+fn print_row(name: &str, m: &Matched) {
+    println!(
+        "   {name:<32} knob={:<4} recall={:.4} evals/q={:>8.0} wall/q={:>6}µs",
+        m.knob,
+        m.recall,
+        m.evals,
+        m.wall_ns / 1000
+    );
+}
+
+/// First knob (ascending) whose mean recall ≥ `R_TARGET`, with its mean member-evals and wall-time;
+/// falls back to the largest knob if none reaches target.
+fn matched<F>(
+    queries: &[usize],
+    corpus: &[Vec<f32>],
+    truth: &[Vec<usize>],
+    grid: &[usize],
+    search: F,
+) -> Matched
+where
+    F: Fn(&[f32], usize) -> (Vec<usize>, usize),
+{
+    let mut last = Matched { knob: 0, recall: 0.0, evals: 0.0, wall_ns: 0 };
+    for &knob in grid {
+        let t = Instant::now();
+        let mut rec = 0.0;
+        let mut ev = 0usize;
+        for (qi, &q) in queries.iter().enumerate() {
+            let (got, e) = search(&corpus[q], knob);
+            ev += e;
+            rec += recall_at_k(&truth[qi], &got, K);
+        }
+        let wall_ns = t.elapsed().as_nanos() / queries.len() as u128;
+        last = Matched {
+            knob,
+            recall: rec / queries.len() as f64,
+            evals: ev as f64 / queries.len() as f64,
+            wall_ns,
+        };
+        if last.recall >= R_TARGET {
+            return last;
+        }
+    }
+    last
+}
+
+fn knob_grid(maxv: usize) -> Vec<usize> {
+    let mut g = Vec::new();
+    let mut x = 1usize;
+    while x < maxv {
+        g.push(x);
+        x = ((x as f64) * 1.5).ceil() as usize;
+    }
+    g.push(maxv);
+    g.dedup();
+    g
+}
+
+fn ids(res: &[SearchResult]) -> Vec<usize> {
+    res.iter().map(|r| r.id).collect()
+}
+
+fn verdict(label: &str, cells: &[Cell]) {
+    let all_win = cells.iter().all(|c| c.eval_ratio >= 2.0 && c.wall_ratio > 1.0);
+    let any_kill = cells.iter().any(|c| c.eval_ratio < 1.5 || c.wall_ratio < 1.0);
+    let v = if all_win {
+        "WIN (≥2× evals AND wall-clock win across all nclusters)"
+    } else if any_kill {
+        "KILL / NO-GO (<1.5× somewhere or wall reversed — bound too loose to pay)"
+    } else {
+        "QUALIFIED (1.5–2×, or mixed)"
+    };
+    println!("\n   ── verdict [{label}] ──");
+    for c in cells {
+        println!(
+            "      nclusters={:<5} steelman eval={:.2}x wall={:.2}x  exact-prune={:.1}%",
+            c.nc, c.eval_ratio, c.wall_ratio, c.prune_frac * 100.0
+        );
+    }
+    println!("      => {v}");
+}
diff --git a/crates/ruvector-bet4-ivf-bench/src/data.rs b/crates/ruvector-bet4-ivf-bench/src/data.rs
new file mode 100644
index 0000000000..2d2ec1184c
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/src/data.rs
@@ -0,0 +1,29 @@
+//! Loader for the aligned ogbn-arxiv 128-d node-feature CSV (row `i` = node `i`), the same
+//! public corpus used by ADR-201/202/204. Data lives under `target/m1-data/` (gitignored).
+
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+use std::path::Path;
+
+/// Load up to `limit` rows of comma-separated f32 features. Blank lines are skipped. Each
+/// returned row is one node's feature vector (all rows share the file's column count, 128 for
+/// the arxiv features).
+pub fn load_feat_csv<P: AsRef<Path>>(path: P, limit: usize) -> std::io::Result<Vec<Vec<f32>>> {
+    let reader = BufReader::new(File::open(path)?);
+    let mut out = Vec::with_capacity(limit);
+    for line in reader.lines() {
+        if out.len() >= limit {
+            break;
+        }
+        let line = line?;
+        if line.trim().is_empty() {
+            continue;
+        }
+        let row: Vec<f32> = line
+            .split(',')
+            .map(|s| s.trim().parse::<f32>().unwrap_or(0.0))
+            .collect();
+        out.push(row);
+    }
+    Ok(out)
+}
diff --git a/crates/ruvector-bet4-ivf-bench/src/kernel.rs b/crates/ruvector-bet4-ivf-bench/src/kernel.rs
new file mode 100644
index 0000000000..04a18addcc
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/src/kernel.rs
@@ -0,0 +1,234 @@
+//! `BnBIvf` — the BET 4 contender: an IVF index probed in **lower-bound order with
+//! branch-and-bound early termination**, over the same `ruvector-rairs` k-means substrate as
+//! the plain-`IvfFlat` incumbent.
+//!
+//! For a query `q` and cluster `c` with centroid `μ_c` and radius `r_c = max_{v∈c} ‖v−μ_c‖`,
+//! the triangle inequality gives a lower bound on the distance to *any* member of `c`:
+//! `LB(q,c) = max(0, ‖q−μ_c‖ − r_c)`. Probing clusters in ascending `LB` while tracking the
+//! running k-th-best distance `τ`, we may stop the instant `LB(c) ≥ τ`: every not-yet-probed
+//! cluster has an even larger `LB`, so none can contain a top-k point. That single break makes
+//! full-budget B&B **exact** (recall → 1.0) yet lets it skip clusters a fixed `nprobe` would
+//! scan. A `max_probe` cap turns it into an approximate knob (the analogue of `nprobe`) for the
+//! matched-recall comparison.
+
+use crate::oracle::l2;
+use ruvector_rairs::{kmeans, SearchResult};
+use std::cmp::Ordering;
+use std::collections::BinaryHeap;
+
+/// IVF index supporting lower-bound-ordered branch-and-bound probing.
+pub struct BnBIvf {
+    centroids: Vec<Vec<f32>>,
+    /// Per cluster: `(id, vector)` of its members.
+    lists: Vec<Vec<(usize, Vec<f32>)>>,
+    /// Per cluster: max member distance to its centroid (the B&B radius).
+    radii: Vec<f32>,
+}
+
+/// Top-k accumulator element. `BinaryHeap` is a max-heap, so the **worst** (largest distance)
+/// candidate sits on top and is the one evicted when a closer point arrives.
+struct Cand {
+    dist: f32,
+    id: usize,
+}
+impl PartialEq for Cand {
+    fn eq(&self, o: &Self) -> bool {
+        self.dist == o.dist
+    }
+}
+impl Eq for Cand {}
+impl PartialOrd for Cand {
+    fn partial_cmp(&self, o: &Self) -> Option<Ordering> {
+        Some(self.cmp(o))
+    }
+}
+impl Ord for Cand {
+    fn cmp(&self, o: &Self) -> Ordering {
+        self.dist.total_cmp(&o.dist)
+    }
+}
+
+/// Offer candidate `(id, d)` to a bounded top-`k` max-heap: insert while under capacity, else
+/// replace the current worst iff `d` is closer. Shared by both probe strategies so they accumulate
+/// results identically — only their cluster-visit order/stopping differs.
+#[inline]
+fn consider(heap: &mut BinaryHeap<Cand>, k: usize, id: usize, d: f32) {
+    if heap.len() < k {
+        heap.push(Cand { dist: d, id });
+    } else if d < heap.peek().unwrap().dist {
+        heap.pop();
+        heap.push(Cand { dist: d, id });
+    }
+}
+
+/// Drain a top-`k` heap into an ascending-distance result vector.
+fn finalize(heap: BinaryHeap<Cand>) -> Vec<SearchResult> {
+    let mut res: Vec<SearchResult> = heap
+        .into_iter()
+        .map(|c| SearchResult {
+            id: c.id,
+            distance: c.dist,
+        })
+        .collect();
+    res.sort_by(|a, b| a.distance.total_cmp(&b.distance));
+    res
+}
+
+impl BnBIvf {
+    /// Build over `corpus` using `ruvector-rairs` k-means (`nclusters`, `max_iter`, `seed`).
+    /// Using the same `(corpus, nclusters, max_iter, seed)` as `IvfFlat::train` yields identical
+    /// centroids — the shared-index guarantee the pre-registration requires.
+    pub fn build(corpus: &[Vec<f32>], nclusters: usize, max_iter: usize, seed: u64) -> Self {
+        assert!(!corpus.is_empty(), "empty corpus");
+        let k = nclusters.min(corpus.len()).max(1);
+        let (centroids, assignments) = kmeans::train(corpus, k, max_iter, seed);
+        let kc = centroids.len();
+        let mut lists: Vec<Vec<(usize, Vec<f32>)>> = vec![Vec::new(); kc];
+        for (i, v) in corpus.iter().enumerate() {
+            lists[assignments[i]].push((i, v.clone()));
+        }
+        let radii: Vec<f32> = (0..kc)
+            .map(|c| {
+                lists[c]
+                    .iter()
+                    .map(|(_, v)| l2(v, &centroids[c]))
+                    .fold(0.0f32, f32::max)
+            })
+            .collect();
+        Self {
+            centroids,
+            lists,
+            radii,
+        }
+    }
+
+    /// Number of inverted lists (clusters).
+    pub fn num_lists(&self) -> usize {
+        self.centroids.len()
+    }
+
+    /// Search for the top-`k` neighbours of `q`.
+    ///
+    /// `max_probe = None` runs full-budget B&B (**exact**); `Some(m)` probes at most `m`
+    /// clusters in lower-bound order (approximate, the `nprobe` analogue). Returns the top-k
+    /// (ascending distance), the number of **member** distance-evals charged, and the number of
+    /// clusters actually probed. The `nclusters` centroid evals (routing) are *not* folded into
+    /// the member count — the harness charges them separately and equally to both contenders.
+    pub fn search(
+        &self,
+        q: &[f32],
+        k: usize,
+        max_probe: Option<usize>,
+    ) -> (Vec<SearchResult>, usize, usize) {
+        let nclusters = self.centroids.len();
+        // Routing: lower bound per cluster, then ascending-LB order.
+        let mut order: Vec<(f32, usize)> = (0..nclusters)
+            .map(|c| {
+                let lb = (l2(q, &self.centroids[c]) - self.radii[c]).max(0.0);
+                (lb, c)
+            })
+            .collect();
+        order.sort_by(|a, b| a.0.total_cmp(&b.0));
+
+        let cap = max_probe.unwrap_or(nclusters).min(nclusters);
+        let mut heap: BinaryHeap<Cand> = BinaryHeap::with_capacity(k + 1);
+        let mut member_evals = 0usize;
+        let mut probed = 0usize;
+
+        for (lb, c) in order {
+            if probed >= cap {
+                break;
+            }
+            // Branch-and-bound: once the heap is full and the best possible distance in this
+            // (and every later) cluster is no better than the current k-th best, stop.
+            if heap.len() == k {
+                let kth = heap.peek().unwrap().dist;
+                if lb >= kth {
+                    break;
+                }
+            }
+            for (id, v) in &self.lists[c] {
+                member_evals += 1;
+                consider(&mut heap, k, *id, l2(q, v));
+            }
+            probed += 1;
+        }
+
+        (finalize(heap), member_evals, probed)
+    }
+
+    /// The **steelman B&B**: visit clusters in centroid-distance order (the effective `nprobe`
+    /// ordering, so τ tightens fast), but **skip** scanning any cluster the lower bound proves
+    /// cannot hold a top-k point (`LB(q,c) ≥ τ`). Unlike [`search`](Self::search)'s global early
+    /// `break`, skipping is correctness-safe in *any* visit order (a skipped cluster genuinely
+    /// cannot contain a closer point); a global break would be unsound here because a later,
+    /// large-radius cluster can have a *smaller* LB than the current one.
+    ///
+    /// `max_probe` caps the number of clusters **considered** (the apples-to-apples budget against
+    /// `nprobe`); LB-skips save member scans within that budget. This is the strongest version of
+    /// the bet — if it cannot beat `nprobe`, the bound itself doesn't pay. Returns
+    /// `(top-k, member_evals, clusters_considered)`.
+    pub fn search_bnb_skip(
+        &self,
+        q: &[f32],
+        k: usize,
+        max_probe: Option<usize>,
+    ) -> (Vec<SearchResult>, usize, usize) {
+        let nclusters = self.centroids.len();
+        let mut order: Vec<(f32, usize)> = (0..nclusters)
+            .map(|c| (l2(q, &self.centroids[c]), c))
+            .collect();
+        order.sort_by(|a, b| a.0.total_cmp(&b.0));
+        let cap = max_probe.unwrap_or(nclusters).min(nclusters);
+
+        let mut heap: BinaryHeap<Cand> = BinaryHeap::with_capacity(k + 1);
+        let mut member_evals = 0usize;
+        let mut considered = 0usize;
+        for (dc, c) in order {
+            if considered >= cap {
+                break;
+            }
+            considered += 1;
+            if heap.len() == k {
+                let kth = heap.peek().unwrap().dist;
+                if (dc - self.radii[c]).max(0.0) >= kth {
+                    continue; // LB-skip: provably cannot improve the top-k
+                }
+            }
+            for (id, v) in &self.lists[c] {
+                member_evals += 1;
+                consider(&mut heap, k, *id, l2(q, v));
+            }
+        }
+        (finalize(heap), member_evals, considered)
+    }
+
+    /// The **plain-IVF incumbent** strategy on this same shared index: visit the `nprobe` nearest
+    /// centroids (by centroid distance) and scan **all** their members — no lower-bound ordering,
+    /// no early termination. This is exactly `ruvector-rairs::IvfFlat::search`'s algorithm
+    /// (validated equal by `instrumented_nprobe_matches_rairs`), instrumented to count member
+    /// distance-evals and sharing B&B's centroids/lists so the comparison isolates the probe loop.
+    pub fn search_nprobe(
+        &self,
+        q: &[f32],
+        k: usize,
+        nprobe: usize,
+    ) -> (Vec<SearchResult>, usize, usize) {
+        let nclusters = self.centroids.len();
+        let mut cd: Vec<(f32, usize)> = (0..nclusters)
+            .map(|c| (l2(q, &self.centroids[c]), c))
+            .collect();
+        cd.sort_by(|a, b| a.0.total_cmp(&b.0));
+        let np = nprobe.clamp(1, nclusters);
+
+        let mut heap: BinaryHeap<Cand> = BinaryHeap::with_capacity(k + 1);
+        let mut member_evals = 0usize;
+        for &(_, c) in cd.iter().take(np) {
+            for (id, v) in &self.lists[c] {
+                member_evals += 1;
+                consider(&mut heap, k, *id, l2(q, v));
+            }
+        }
+        (finalize(heap), member_evals, np)
+    }
+}
diff --git a/crates/ruvector-bet4-ivf-bench/src/lib.rs b/crates/ruvector-bet4-ivf-bench/src/lib.rs
new file mode 100644
index 0000000000..c4cd77e46f
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/src/lib.rs
@@ -0,0 +1,17 @@
+//! BET 4 (SepRAG, ruvnet/RuVector #534): does **lower-bound-ordered branch-and-bound**
+//! IVF probing beat a tuned plain `IvfFlat` `nprobe` on unfiltered ANN over real 128-d
+//! embeddings, at matched recall@10?
+//!
+//! This closes the BET 4 caveat left open by ADR-201: the region-pruning IVF kernel was
+//! only ever run against ACORN (BET 2), never head-to-head against its natural incumbent —
+//! plain IVF `nprobe`. The B&B kernel is rebuilt self-contained here (BET 2's lives only on
+//! the #536 branch), over the same `ruvector-rairs` k-means substrate as the incumbent.
+//!
+//! Frozen gate: `docs/plans/bet4-ivf-pruning/PRE-REGISTRATION.md`.
+
+pub mod data;
+pub mod kernel;
+pub mod oracle;
+pub mod pca;
+
+pub use kernel::BnBIvf;
diff --git a/crates/ruvector-bet4-ivf-bench/src/oracle.rs b/crates/ruvector-bet4-ivf-bench/src/oracle.rs
new file mode 100644
index 0000000000..5ddef5ee80
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/src/oracle.rs
@@ -0,0 +1,39 @@
+//! Brute-force exact kNN ground truth + recall, and the shared L2 helper.
+//!
+//! The triangle-inequality lower bound the kernel relies on holds for the **metric** L2, not
+//! its square — so radii, centroid distances, and member distances all use true L2 (`sqrt`).
+//! Keeping one `l2` here guarantees the bound and the ranking use an identical metric.
+
+/// Euclidean (L2) distance between two equal-length vectors.
+#[inline]
+pub fn l2(a: &[f32], b: &[f32]) -> f32 {
+    a.iter()
+        .zip(b)
+        .map(|(x, y)| {
+            let d = x - y;
+            d * d
+        })
+        .sum::<f32>()
+        .sqrt()
+}
+
+/// Exact top-`k` neighbour ids of `q` over `corpus` under L2 (ascending distance).
+///
+/// `q` may itself be a corpus point; self (distance 0) is **not** excluded — it lands in both
+/// the oracle set and any contender's result, so it cancels and does not bias recall.
+pub fn brute_force_topk(corpus: &[Vec<f32>], q: &[f32], k: usize) -> Vec<usize> {
+    let mut scored: Vec<(f32, usize)> = corpus
+        .iter()
+        .enumerate()
+        .map(|(i, v)| (l2(q, v), i))
+        .collect();
+    scored.sort_by(|a, b| a.0.total_cmp(&b.0));
+    scored.into_iter().take(k).map(|(_, i)| i).collect()
+}
+
+/// recall@k = |truth_k ∩ got_k| / k. Tolerant of tie-reshuffling (set intersection, not order).
+pub fn recall_at_k(truth: &[usize], got: &[usize], k: usize) -> f64 {
+    let t: std::collections::HashSet<usize> = truth.iter().take(k).copied().collect();
+    let hits = got.iter().take(k).filter(|g| t.contains(g)).count();
+    hits as f64 / k.max(1) as f64
+}
diff --git a/crates/ruvector-bet4-ivf-bench/src/pca.rs b/crates/ruvector-bet4-ivf-bench/src/pca.rs
new file mode 100644
index 0000000000..c6358ffd97
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/src/pca.rs
@@ -0,0 +1,73 @@
+//! Minimal top-`m` PCA via power iteration + deflation — for BET 4's **low-dimensional control**.
+//!
+//! Projecting the real arxiv features onto their top principal components gives the *same data*
+//! at low intrinsic dimensionality, where the triangle-inequality cluster bound should be tight
+//! and the B&B kernel is expected to WIN — proving the kernel/harness are sound and isolating
+//! high-dimensional distance concentration as the cause of any 128-d NO-GO. No linalg dependency.
+
+/// Project `data` (n × dim) onto its top `m` principal components, returning n × m coordinates.
+/// Data is mean-centered first; components found by power iteration with deflation (`iters` steps
+/// each). f64 accumulation for numerical stability.
+pub fn project_topm(data: &[Vec<f32>], m: usize, iters: usize) -> Vec<Vec<f32>> {
+    let n = data.len();
+    if n == 0 {
+        return Vec::new();
+    }
+    let dim = data[0].len();
+
+    let mut mean = vec![0.0f64; dim];
+    for v in data {
+        for (d, &x) in v.iter().enumerate() {
+            mean[d] += x as f64;
+        }
+    }
+    for x in &mut mean {
+        *x /= n as f64;
+    }
+    let centered: Vec<Vec<f64>> = data
+        .iter()
+        .map(|v| (0..dim).map(|d| v[d] as f64 - mean[d]).collect())
+        .collect();
+
+    let mut comps: Vec<Vec<f64>> = Vec::with_capacity(m.min(dim));
+    for c in 0..m.min(dim) {
+        let mut v = vec![0.0f64; dim];
+        v[c % dim] = 1.0;
+        for _ in 0..iters {
+            // u = Σ_i (x_i · v) x_i  — covariance-times-v without forming the covariance matrix.
+            let mut u = vec![0.0f64; dim];
+            for x in &centered {
+                let dot: f64 = x.iter().zip(&v).map(|(a, b)| a * b).sum();
+                for (d, &xd) in x.iter().enumerate() {
+                    u[d] += dot * xd;
+                }
+            }
+            // Deflate against already-found components (Gram–Schmidt).
+            for prev in &comps {
+                let proj: f64 = u.iter().zip(prev).map(|(a, b)| a * b).sum();
+                for (d, &pd) in prev.iter().enumerate() {
+                    u[d] -= proj * pd;
+                }
+            }
+            let norm = u.iter().map(|x| x * x).sum::<f64>().sqrt();
+            if norm < 1e-12 {
+                break;
+            }
+            for x in &mut u {
+                *x /= norm;
+            }
+            v = u;
+        }
+        comps.push(v);
+    }
+
+    centered
+        .iter()
+        .map(|x| {
+            comps
+                .iter()
+                .map(|comp| x.iter().zip(comp).map(|(a, b)| a * b).sum::<f64>() as f32)
+                .collect()
+        })
+        .collect()
+}
diff --git a/crates/ruvector-bet4-ivf-bench/tests/oracle_gate.rs b/crates/ruvector-bet4-ivf-bench/tests/oracle_gate.rs
new file mode 100644
index 0000000000..675dd0beb8
--- /dev/null
+++ b/crates/ruvector-bet4-ivf-bench/tests/oracle_gate.rs
@@ -0,0 +1,102 @@
+//! M0 gate: full-budget `BnBIvf` must be **exact** — its top-10 must match the brute-force
+//! oracle (recall ≈ 1.0) on a real arxiv slice. This certifies the branch-and-bound invariant
+//! (ascending-LB order + `break` when `LB ≥ τ`) on real data before any matched-recall claim.
+
+use ruvector_bet4_ivf_bench::data::load_feat_csv;
+use ruvector_bet4_ivf_bench::kernel::BnBIvf;
+use ruvector_bet4_ivf_bench::oracle::{brute_force_topk, recall_at_k};
+use ruvector_rairs::{AnnIndex, IvfFlat};
+
+/// Repo-root-relative path to the gitignored arxiv feature slice.
+const DATA: &str = "../../target/m1-data/node-feat-2000.csv";
+
+#[test]
+fn bnb_full_budget_is_exact() {
+    let corpus = match load_feat_csv(DATA, 2000) {
+        Ok(c) if c.len() >= 500 => c,
+        _ => {
+            eprintln!("skipping bnb_full_budget_is_exact: {DATA} not available");
+            return;
+        }
+    };
+    let k = 10;
+    let idx = BnBIvf::build(&corpus, 64, 25, 42);
+    let nq = 100;
+    let mut acc = 0.0;
+    for q in 0..nq {
+        let truth = brute_force_topk(&corpus, &corpus[q], k);
+        let (res, _evals, _probed) = idx.search(&corpus[q], k, None); // None = full budget = exact
+        let got: Vec<usize> = res.iter().map(|r| r.id).collect();
+        acc += recall_at_k(&truth, &got, k);
+    }
+    let recall = acc / nq as f64;
+    assert!(
+        recall >= 0.999,
+        "full-budget B&B must be exact (B&B invariant broken): recall@10={recall:.4}"
+    );
+}
+
+#[test]
+fn capped_probe_reduces_member_evals() {
+    let corpus = match load_feat_csv(DATA, 2000) {
+        Ok(c) if c.len() >= 500 => c,
+        _ => {
+            eprintln!("skipping capped_probe_reduces_member_evals: {DATA} not available");
+            return;
+        }
+    };
+    let idx = BnBIvf::build(&corpus, 64, 25, 42);
+    let (_r_full, evals_full, _p) = idx.search(&corpus[0], 10, None);
+    let (_r_cap, evals_cap, probed_cap) = idx.search(&corpus[0], 10, Some(4));
+    assert!(probed_cap <= 4, "cap must bound clusters probed");
+    assert!(
+        evals_cap <= evals_full,
+        "capped probe should not cost more member-evals than full budget"
+    );
+}
+
+#[test]
+fn instrumented_nprobe_matches_rairs() {
+    // The cost-measured incumbent (BnBIvf::search_nprobe) must be algorithmically identical to the
+    // real ruvector-rairs::IvfFlat at the same (nclusters, max_iter, seed, nprobe) — same k-means
+    // substrate => same centroids/lists => same results. This legitimises measuring the incumbent's
+    // member-evals on the shared index rather than driving rairs separately.
+    let corpus = match load_feat_csv(DATA, 2000) {
+        Ok(c) if c.len() >= 500 => c,
+        _ => {
+            eprintln!("skipping instrumented_nprobe_matches_rairs: {DATA} not available");
+            return;
+        }
+    };
+    let (dim, k, nclusters, max_iter, seed, nprobe) = (corpus[0].len(), 10, 64, 25, 42u64, 8);
+
+    let mine = BnBIvf::build(&corpus, nclusters, max_iter, seed);
+    let mut rairs = IvfFlat::new(dim, nclusters, max_iter, seed);
+    rairs.train(&corpus).unwrap();
+    rairs.add(&corpus).unwrap();
+
+    let nq = 100;
+    let (mut r_mine, mut r_rairs) = (0.0, 0.0);
+    for q in 0..nq {
+        let truth = brute_force_topk(&corpus, &corpus[q], k);
+        let got_mine: Vec<usize> = mine
+            .search_nprobe(&corpus[q], k, nprobe)
+            .0
+            .iter()
+            .map(|r| r.id)
+            .collect();
+        let got_rairs: Vec<usize> = rairs
+            .search(&corpus[q], k, nprobe)
+            .unwrap()
+            .iter()
+            .map(|r| r.id)
+            .collect();
+        r_mine += recall_at_k(&truth, &got_mine, k);
+        r_rairs += recall_at_k(&truth, &got_rairs, k);
+    }
+    let (r_mine, r_rairs) = (r_mine / nq as f64, r_rairs / nq as f64);
+    assert!(
+        (r_mine - r_rairs).abs() < 0.01,
+        "instrumented incumbent must match rairs IvfFlat: mine={r_mine:.4} rairs={r_rairs:.4}"
+    );
+}
diff --git a/docs/adr/ADR-205-region-pruned-ivf-vs-plain-ivf-nprobe.md b/docs/adr/ADR-205-region-pruned-ivf-vs-plain-ivf-nprobe.md
new file mode 100644
index 0000000000..f55f422651
--- /dev/null
+++ b/docs/adr/ADR-205-region-pruned-ivf-vs-plain-ivf-nprobe.md
@@ -0,0 +1,146 @@
+---
+adr: 205
+title: "Triangle-Inequality Cluster Pruning vs Tuned Plain IVF nprobe — Structural NO-GO"
+status: proposed
+date: 2026-06-05
+authors: [ofershaal, claude-flow]
+related: [ADR-193, ADR-199, ADR-201]
+tags: [ruvector, retrieval, ann, ivf, rairs, pruning, branch-and-bound, no-go]
+---
+
+# ADR-205 — Triangle-Inequality Cluster Pruning vs Tuned Plain IVF `nprobe` (Structural NO-GO)
+
+## Status
+
+**Proposed — NO-GO (robust, structural), 2026-06-05.** Closes the BET 4 caveat left open by
+ADR-201: the region-pruning IVF kernel (`RegionPruneIvf`) was built and validated *exact* there but
+only ever run as BET 2's mechanism **against ACORN** — never head-to-head against its natural
+incumbent, **plain IVF `nprobe`**, on unfiltered ANN. This is that head-to-head. The gate was
+**pre-registered and frozen before any run** (`docs/plans/bet4-ivf-pruning/PRE-REGISTRATION.md`).
+
+**Lower-bound branch-and-bound IVF probing provides essentially zero benefit over a tuned plain
+`nprobe` — a flat 1.00× member-eval ratio in every cell, at both n=20k and n=50k, in both 128-d and
+a PCA-8 low-dim control.** The cause is **structural, not dimensional**: the triangle-inequality
+cluster bound can only prune *far* clusters, which a tuned `nprobe` already never visits — so the
+bound is **redundant** with `nprobe`'s centroid-distance cutoff. High dimensionality only makes the
+faithful BET-2 kernel (which probes in *LB order*) strictly **worse** (0.18–0.25×).
+
+## Context
+
+`ruvector-rairs::IvfFlat` (ADR-193) is plain IVF: k-means centroids + inverted lists;
+`search(q, k, nprobe)` scans all members of the `nprobe` nearest-centroid lists. BET 4 asked whether
+adding a triangle-inequality lower bound — `LB(q,c) = max(0, ‖q−μ_c‖ − r_c)`, `r_c` the cluster
+radius — and probing with branch-and-bound (skip/stop on clusters that provably cannot hold a
+top-k point) beats tuned `nprobe` at matched recall@10, on real 128-d arxiv embeddings.
+
+The kernel was rebuilt self-contained (`crates/ruvector-bet4-ivf-bench`), off clean `main`, over the
+same `ruvector-rairs` k-means substrate as the incumbent (BET 2's kernel lives only on the #536
+branch). Two correctness gates passed before any claim: full-budget B&B is **exact** (recall ≥ 0.999
+vs brute force), and the instrumented incumbent **matches `IvfFlat`** within 0.01 recall at matched
+params (so its measured cost is the real incumbent's).
+
+Three contenders share one index per `nclusters` (only the probe loop differs):
+- **plain `nprobe`** — the incumbent.
+- **B&B LB-order** — the faithful BET-2 `RegionPruneIvf`: probe in ascending `LB`, global `break`
+  when `LB ≥ τ` (exact at full budget).
+- **B&B steelman** — centroid-distance order (the effective `nprobe` ordering, so τ tightens fast)
+  + per-cluster **LB-skip** (correctness-safe in any order). The *strongest* cluster-level B&B: if
+  it cannot beat `nprobe`, the bound does not pay.
+
+## Decision / Finding
+
+**NO-GO.** Cost at matched recall@10 = 0.95, 200 queries; member distance-evals per query
+(steelman is the strongest contender, so it sets the verdict):
+
+**n = 50,000, 128-d (real arxiv features):**
+
+| nclusters | exact-prune | plain `nprobe` | B&B LB-order | **B&B steelman** | steelman ratio |
+|---|---|---|---|---|---|
+| 64   | 0.0%  | 11,102 ev | 49,182 (recall 0.99) | **11,102** | **1.00×** |
+| 256  | 4.7%  | 7,890 ev  | 49,979 (recall 1.00) | **7,890**  | **1.00×** |
+| 1024 | 13.1% | 5,682 ev  | 45,373 (recall 1.00) | **5,682**  | **1.00×** |
+
+**n = 50,000, PCA-8 (low-dim control — bound is tight here):**
+
+| nclusters | exact-prune | plain `nprobe` | **B&B steelman** | steelman ratio |
+|---|---|---|---|---|
+| 64   | 8.0%  | 4,393 ev | **4,393** | **1.00×** |
+| 256  | 45.1% | 1,835 ev | **1,835** | **1.00×** |
+| 1024 | 82.5% | 731 ev   | **731**   | **1.00×** |
+
+n=20k reproduces identically (steelman 1.00× in all six cells). Wall-clock tracks the eval ratio
+(0.94–1.02×) — no reversal, but no win either.
+
+**Mechanism (structural, the key result).** The true top-k neighbours live in the *nearest*
+clusters; any method must scan those members to find them. The LB bound only lets B&B *skip far
+clusters* — but a tuned `nprobe` already does not visit them. So at matched recall the steelman
+scans **exactly** the members `nprobe` scans (the near clusters all have `LB < τ`, so nothing is
+skipped inside the operating budget) → 1.00×, **in every dimension**. The win is not "hard"; it is
+**structurally impossible** against a tuned incumbent, because the bound and `nprobe`'s
+centroid-distance cutoff exploit the *same* locality.
+
+**Why the LB-order kernel is strictly worse (0.18–0.25×).** Ordering clusters by `LB = max(0, d −
+r_c)` pushes any *large-radius* cluster toward `LB ≈ 0` regardless of how far its centroid is, so
+B&B probes far, low-yield clusters early and needs ~all clusters to reach 0.95. LB-order is correct
+for *exact* early termination but a poor *priority* for approximate probing — centroid distance is
+better. High-dimensional concentration (large radii) makes this pathology severe.
+
+## The pre-registered low-dim control — an honest deviation
+
+The frozen pre-registration expected the **PCA-8 control to show B&B *winning*** ("tight bound ⇒
+B&B beats tuned `nprobe`; if it does not win even at 8-d, the implementation is suspect"). **It did
+not** — the steelman is 1.00× at PCA-8 too. That expectation was built on a **false premise**: a
+tight bound implies beating *full exact scan*, **not** beating *tuned `nprobe`*. The control still
+did its real job two ways, so the 128-d NO-GO is **interpretable, not voided**:
+
+1. **The kernel is sound.** The exact-regime pruning fraction scales correctly and strongly with
+   dimension — 0–13% at 128-d vs 8–82.5% at PCA-8 (n=50k). The bound *does* prune hard when it can;
+   the harness measures it correctly. The implementation is not suspect.
+2. **It replaced the predicted mechanism with a better one.** The control is what revealed the kill
+   is *structural redundancy* (dimension-independent), not *dimensional looseness*. The bound prunes
+   87% of clusters vs full-scan at PCA-8 yet still ties `nprobe`, because `nprobe`'s tuning already
+   captures that same pruning.
+
+Recording the deviation — the control disproved my predicted sign and taught the real finding — is
+the point, per the prove-not-hype protocol (cf. ADR-203's three documented deviations).
+
+## Consequences
+
+**Positive (a clean, general kill).**
+- **Companion to ADR-199.** Classical exact-pruning structures do not pay on embedding retrieval:
+  graph separators/contraction there (high treewidth), triangle-inequality cluster bounds here
+  (redundant with `nprobe`). The kills keep sharpening *where* these ideas work — and IVF `nprobe`
+  is simply already near-optimal at exploiting cluster locality.
+- **No code to ship, and that is the right outcome.** `ruvector-rairs::IvfFlat` needs no B&B add-on;
+  the result protects it from a complexity-adding non-improvement.
+
+**Boundaries / honest caveats.**
+- **Scope: cluster-level bounds vs tuned `nprobe`, recall@10 ≈ 0.95.** This does **not** speak to
+  finer techniques — IVFADC / product-quantized asymmetric distance, per-member bounds, or learned
+  routing — which prune *within* lists by a different mechanism and are outside the frozen claim.
+- **The structural argument predicts the same sign at other recall targets** (neighbours still live
+  in the near clusters at R=0.99), but only R=0.95 was measured.
+- **`nprobe` is the right incumbent precisely because it is already tuned.** Against an *untuned*
+  full-exact-scan baseline the bound wins (that is the exact-prune fraction) — but that baseline is
+  not what anyone ships.
+
+## Scoreboard
+
+**2 WINS** (ADR-200/202 reuse+periodic; ADR-204 incremental high-recall tier) /
+**4 KILLS** (ADR-199 CCH-on-embeddings; ADR-201 filtered-ANN vs ACORN; ADR-203 KG-treewidth;
+ADR-205 IVF cluster-pruning vs `nprobe`).
+
+## Next steps
+
+1. If IVF acceleration is ever revisited, the open lever is **within-list** pruning
+   (PQ/IVFADC asymmetric distance), a different mechanism than the cluster-level bound killed here.
+2. None for this kernel — the structural redundancy is dimension-independent and reproduced at two
+   scales; further `n`/recall sweeps would only reconfirm.
+
+## Alternatives considered
+
+- **B&B in LB order** (the faithful BET-2 kernel) — measured; strictly worse than `nprobe`
+  (0.18–0.25×) because LB is a poor approximate priority.
+- **B&B steelman** (centroid order + LB-skip) — the strongest cluster-level variant; ties `nprobe`
+  (1.00×). Retained as the verdict-setting contender.
+- **Within-list / PQ pruning** — not built; a different mechanism, noted as the only open lever.
diff --git a/docs/plans/bet4-ivf-pruning/PRE-REGISTRATION.md b/docs/plans/bet4-ivf-pruning/PRE-REGISTRATION.md
new file mode 100644
index 0000000000..706a7ad4ee
--- /dev/null
+++ b/docs/plans/bet4-ivf-pruning/PRE-REGISTRATION.md
@@ -0,0 +1,136 @@
+# BET 4 — Pre-Registration (FROZEN): LB-ordered branch-and-bound IVF probing vs tuned plain `IvfFlat`
+
+**Status: FROZEN (2026-06-05, user-confirmed).** No gate, threshold, metric, dataset, or
+control below may change after this commit. Deviations are limited to the explicitly
+pre-authorised list at the end; any other change voids the run.
+
+Thread: SepRAG (ruvnet/RuVector issue #534). This closes the BET 4 caveat left open by ADR-201
+(#536): the region-pruning IVF kernel was built and validated *exact* there, but only ever run as
+BET 2's mechanism **against ACORN** — never head-to-head against its own natural incumbent, **plain
+IVF `nprobe` probing**. This is that head-to-head, on **unfiltered** ANN (no predicate — the
+filtered question is BET 2, resolved NO-GO).
+
+Independent of #535/#537/#539: this branch (`feat/seprag-bet4-ivf-pruning`) is cut off **clean
+main**. The incumbent (`ruvector-rairs::IvfFlat`) is on main; the B&B kernel (which lives only on
+the BET 2 branch) is **rebuilt self-contained** here, so the result is valid regardless of any
+other PR's fate.
+
+## Claim (one claim, one number)
+
+> On unfiltered ANN over real **128-d** arxiv embeddings, **lower-bound-ordered branch-and-bound
+> IVF probing** scans **≥ 2× fewer member distance-evals** than a **tuned plain `IvfFlat`
+> `nprobe`**, at **matched recall@10**, **and wins on wall-clock**.
+
+## Incumbent (tuned, in-repo — no straw man)
+
+`ruvector-rairs::IvfFlat` (`crates/ruvector-rairs/src/ivf.rs`): k-means centroids + inverted lists;
+`search(query, k, nprobe)` scans **all** members of the `nprobe` nearest-centroid lists, then
+finalises top-k. Tuned = sweep `nclusters ∈ {64, 256, 1024}` × `nprobe ∈ [1, nclusters]` to its
+best (recall, cost) frontier. **Both contenders share the same k-means centroids and seed** — only
+the *probing strategy* differs, so the comparison isolates the strategy, not clustering luck.
+
+## Contender (the bet — rebuilt standalone)
+
+`BnBIvf` over the same centroids/lists:
+- Precompute per-cluster radius `r_c = max_{v ∈ list_c} ‖v − centroid_c‖`.
+- For a query `q`: compute `‖q − centroid_c‖` for all `c` (routing cost, charged); lower bound
+  `LB(q,c) = max(0, ‖q − centroid_c‖ − r_c)`.
+- Probe clusters in **ascending `LB`** order, maintaining a running k-th-best distance `τ`; scan a
+  cluster's members (each a charged distance-eval), update `τ`; **break when `LB(c) ≥ τ`** (no
+  unscanned cluster can contain a top-k point → provably done).
+- **Exact** at full budget (recall → 1.0). A `max_probe` cap (probe at most that many clusters) is
+  the approx knob used to hit a sub-1.0 recall target for the matched-recall comparison — the
+  analogue of `nprobe`.
+
+## Data
+
+`target/m1-data/node-feat-100k.csv` — ogbn-arxiv 128-d node features (public, aligned, the same
+corpus used by ADR-201/202/204). N-sweep at **20,000 and 100,000**. Queries: 200 held-out points.
+Ground truth: brute-force exact L2 kNN@10 recomputed on the corpus.
+
+## Metrics
+
+- **Primary: member distance-evals at matched recall@10.** The count of query↔member L2
+  evaluations (the dominant cost). Charged identically for both contenders. *Both* are additionally
+  charged the `nclusters` query↔centroid routing evals (equal for both) and B&B's radius
+  bookkeeping is build-time (reported separately, not hidden).
+- **Secondary (honesty guard): wall-clock per query.** An eval win that **reverses on wall-clock**
+  is reported as **"inconclusive," never WIN** (ADR-201 precedent).
+- **Reported regardless: exact-regime pruning fraction** — the mean % of clusters B&B skips at
+  recall → 1.0. The mechanistic explainer for whichever verdict lands.
+
+## Matched-recall protocol
+
+Pick recall target **R = 0.95**. Tune plain IVF `nprobe` (per `nclusters`) to the smallest value
+reaching mean recall@10 ≥ R; record its member-evals. Cap `BnBIvf`'s `max_probe` to the smallest
+value reaching ≥ R; record its member-evals. Compare. Repeat per `nclusters ∈ {64, 256, 1024}` and
+per N ∈ {20k, 100k}. (Also report the **exact** regime R → 1.0: B&B full-budget vs `nprobe =
+nclusters` full scan.)
+
+## Gate (FROZEN)
+
+| Verdict | Condition |
+|---|---|
+| **WIN** | member-scan reduction **≥ 2×** vs tuned `nprobe` at matched recall@10 (R = 0.95) **AND** wall-clock win **AND** holds across all three `nclusters` settings (at ≥ one N). |
+| **KILL (NO-GO)** | reduction **< 1.5×** at matched recall **OR** wall-clock reverses. Interpretation: the triangle-inequality bound is too loose in 128-d (distance concentration) to pay. |
+| **Qualified** | between 1.5× and 2×, or wins at some `nclusters`/N but not all → report as a **narrow/conditional edge** with the regime named (not a clean WIN). |
+| **Report always** | exact-regime pruning fraction; the full (recall, member-evals, wall-clock) frontier per cell. |
+
+## Controls (the teeth — both mandatory)
+
+1. **Exact-vs-exact probe** (R → 1.0): `BnBIvf` full-budget vs `IvfFlat` `nprobe = nclusters`
+   (full scan). Directly measures whether the LB bound prunes **at all** in 128-d. If ~0% of
+   clusters are pruned here, that *mechanistically* predicts the KILL — and would make any
+   matched-recall WIN suspect (must be reconciled).
+2. **Low-dimensional control:** rerun the entire protocol on a **low-intrinsic-dim** input —
+   PCA-project the arxiv features to **8-d** (retain the top-8 principal components). The bound is
+   expected to be tight here, so `BnBIvf` **should WIN** the low-d control. This proves the kernel
+   and harness are *sound* and isolates **high-d concentration** as the cause of any 128-d NO-GO —
+   BET 4's analogue of BET 3's roadNet control and BET 1's stale-index control. If the kernel does
+   **not** win even at 8-d, the implementation is suspect and the 128-d result is uninterpretable.
+
+## Adversarial checks (pre-committed)
+
+- **No free routing:** B&B is charged the `nclusters` centroid evals every query; the win must
+  survive that charge (it is identical for plain IVF, so it cancels, but it is *counted*, not
+  ignored).
+- **Wall-clock guard** (above): eval win must not reverse on wall-clock.
+- **Shared index:** identical centroids/seed/lists for both contenders; the *only* difference is
+  the probe loop. No re-clustering between contenders.
+- **Pruning-fraction reconciliation:** a matched-recall WIN with ~0% exact-regime pruning is
+  internally inconsistent and must be explained before being reported as a WIN.
+
+## Honest prior (stated before any run, per protocol)
+
+I lean **NO-GO at 128-d.** Under distance concentration the per-cluster radius `r_c` tends to be
+large relative to inter-centroid gaps, so `LB = max(0, d − r_c) ≈ 0` for most clusters → little
+pruning → proving exactness scans nearly everything, costing more than a tuned `nprobe` that
+accepts < 100% recall. That would be a clean kill, the IVF-level companion to ADR-199 (Euclidean
+embedding geometry defeats classical pruning structures — separators there, triangle-inequality
+cluster bounds here). A WIN would be a genuine shippable `IvfFlat` upgrade. Either outcome is a
+tidy, **consumer-independent** finding — the reason this is the chosen next bet.
+
+## Pre-authorised deviations (anything else voids the run)
+
+- Substitute PCA-to-8-d with a synthetic low-d clustered set **only if** PCA is impractical to
+  implement cleanly; the *role* (a tight-bound low-d control) is fixed.
+- Reduce N from 100k to a smaller second scale if 100k brute-force truth is prohibitively slow,
+  **provided** at least two distinct scales are reported and the larger is ≥ 50k.
+- Adjust query count upward (≥ 200) for noise control; never below 200.
+- Add `nclusters` settings; never drop one of {64, 256, 1024}.
+
+## Plan
+
+- **M0** — self-contained crate `crates/ruvector-bet4-ivf-bench` (deps: `ruvector-rairs`, `rand`):
+  data loader, `BnBIvf` kernel, brute-force oracle; **gate test** `BnBIvf` full-budget == oracle
+  (recall 1.0). clippy clean.
+- **M1** — instrument member-eval + wall-clock counting on both contenders (shared index).
+- **M2** — matched-recall sweep harness (`examples/ivf_pruning_sweep.rs`): the `nclusters` × N grid,
+  exact-regime probe, frontier print.
+- **M3** — low-d (PCA-8) control; adversarial reconciliation; verdict against this gate.
+- **M4** — ADR-205 (WIN, NO-GO, or qualified — honest, ADR-199/201 precedent); one PR at M4 linked
+  to #534; #534 scoreboard comment.
+
+---
+
+**Frozen.** Build starts at M0 against this document; the gate is not revisited.