ruvnet · shaal · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -115,6 +115,7 @@ members = [
     "crates/ruvector-solver",
     "crates/ruvector-solver-wasm",
     "crates/ruvector-solver-node",
+    "crates/ruvector-seprag",
     "examples/dna",
     "examples/OSpipe",
     "crates/ruvector-coherence",

diff --git a/crates/ruvector-seprag/Cargo.toml b/crates/ruvector-seprag/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+name = "ruvector-seprag"
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+authors.workspace = true
+repository.workspace = true
+description = "SepRAG — CCH-inspired separator-tree retrieval for hybrid vector + graph memory (M0 correctness gate). See docs/plans/seprag-cch-retrieval/."
+keywords = ["retrieval", "contraction-hierarchies", "nested-dissection", "knn", "graph"]
+categories = ["algorithms", "data-structures"]
+
+[dependencies]
+thiserror = { workspace = true }
+
+[dev-dependencies]
+approx = "0.5"
+# Production DiskANN/Vamana index — used by the diskann_drift example to confirm
+# BET 1 (ADR-200) on the real index rather than the lite reference Vamana.
+ruvector-diskann = { path = "../ruvector-diskann" }
+
+[lints.rust]
+unexpected_cfgs = { level = "allow", priority = -1 }
+dead_code = "allow"
+
+[lints.clippy]
+all = { level = "warn", priority = -1 }
+correctness = { level = "deny", priority = 0 }
+suspicious = { level = "deny", priority = 0 }
+needless_range_loop = "allow"
diff --git a/crates/ruvector-seprag/examples/blowup_report.rs b/crates/ruvector-seprag/examples/blowup_report.rs
@@ -0,0 +1,52 @@
+//! M0→M1 diagnostic: print the metrics that become M1's go/no-go signal
+//! (ADR-199 §4) on synthetic graphs — shortcut-blowup ratio, elimination-tree
+//! height, and pruned-vs-unpruned search space.
+//!
+//! Run: `cargo run -p ruvector-seprag --example blowup_report`
+
+use ruvector_seprag::query::{elim_depth, KnnIndex, QueryStats};
+use ruvector_seprag::{gen, Graph, SepRag};
+
+fn report(name: &str, g: Graph) {
+    let n = g.n;
+    let m = g.edges().count();
+    let pois: Vec<u32> = gen::sample_pois(n, (n / 2).max(1), 1);
+    let srcs = gen::sample_pois(n, 32.min(n), 2);
+
+    let sr = SepRag::build(g);
+    let max_depth = (0..n as u32).map(|r| elim_depth(&sr.topo, r)).max().unwrap_or(0);
+    let idx = KnnIndex::build(&sr.topo, &sr.metric, &pois);
+
+    let (mut pruned, mut unpruned, mut anc_vis, mut anc_prune) = (0usize, 0usize, 0usize, 0usize);
+    for &src in &srcs {
+        let mut sp = QueryStats::default();
+        let _ = idx.knn(src, 10, true, &mut sp);
+        let mut su = QueryStats::default();
+        let _ = idx.knn(src, 10, false, &mut su);
+        pruned += sp.bucket_entries_scanned;
+        unpruned += su.bucket_entries_scanned;
+        anc_vis += sp.ancestors_visited;
+        anc_prune += sp.ancestors_pruned;
+    }
+    let q = srcs.len().max(1);
+    println!(
+        "{name:<14} n={n:<5} m={m:<6} blowup={:>5.2}x  elim_h={max_depth:<4} \
+         scans/q: pruned={:<5} unpruned={:<5} ({:.0}% saved)  anc_vis/q={} pruned/q={}",
+        sr.blowup_ratio(),
+        pruned / q,
+        unpruned / q,
+        100.0 * (1.0 - pruned as f64 / unpruned.max(1) as f64),
+        anc_vis / q,
+        anc_prune / q,
+    );
+}
+
+fn main() {
+    println!("SepRAG M0 diagnostic — synthetic graphs (lower blowup + more pruning = more road-like)\n");
+    report("grid-20x20", gen::grid(20, 20, 1));
+    report("grid-40x40", gen::grid(40, 40, 1));
+    report("sbm-clean", gen::sbm(8, 50, 0.25, 0.003, 1));
+    report("sbm-dense", gen::sbm(8, 50, 0.25, 0.05, 1));
+    report("path-1000", gen::path(1000, 1));
+    println!("\nNote: synthetic only. The real go/no-go is M1 on ogbn-arxiv (ADR-199).");
+}
diff --git a/crates/ruvector-seprag/examples/diskann_drift.rs b/crates/ruvector-seprag/examples/diskann_drift.rs
@@ -0,0 +1,135 @@
+//! BET 1 on the PRODUCTION index (ADR-200 next step): re-run the re-weight-vs-
+//! rebuild test on `ruvector-diskann`'s real Vamana graph, not the lite
+//! reference Vamana. This (a) confirms the result on the shipping index and
+//! (b) firms the rebuild baseline (the lite Vamana showed build variance).
+//!
+//! The reuse trick is native to `VamanaGraph`: the graph stores only topology;
+//! `greedy_search(vectors, query, beam)` takes the vectors externally. So drift
+//! = pass the *transformed* vectors to a graph built on the *original* ones.
+//!
+//! Run: cargo run --release -p ruvector-seprag --example diskann_drift -- <feat.csv> <N>
+
+use ruvector_diskann::distance::FlatVectors;
+use ruvector_diskann::graph::VamanaGraph;
+use ruvector_seprag::ann::{
+    apply_linear, brute_topk, identity, l2, lerp_mat, read_vectors, recall, target_rot, Rng, Vec32,
+};
+use std::time::Instant;
+
+const R: usize = 32;
+const BUILD_BEAM: usize = 64;
+const SEARCH_BEAM: usize = 64;
+const ALPHA: f32 = 1.2;
+const K: usize = 10;
+
+fn flat(vecs: &[Vec32], dim: usize) -> FlatVectors {
+    let mut f = FlatVectors::with_capacity(dim, vecs.len());
+    for v in vecs {
+        f.push(v);
+    }
+    f
+}
+
+fn build_graph(vecs: &[Vec32], dim: usize) -> VamanaGraph {
+    let f = flat(vecs, dim);
+    let mut g = VamanaGraph::new(vecs.len(), R, BUILD_BEAM, ALPHA);
+    g.build(&f).expect("vamana build");
+    g
+}
+
+/// Top-k from a graph search over `vecs`, re-ranked by exact distance to the query.
+fn topk(g: &VamanaGraph, vecs: &[Vec32], f: &FlatVectors, q: usize) -> Vec<u32> {
+    let (cands, _) = g.greedy_search(f, &vecs[q], SEARCH_BEAM);
+    let mut scored: Vec<(f32, u32)> = cands.iter().map(|&c| (l2(&vecs[c as usize], &vecs[q]), c)).collect();
+    scored.sort_by(|a, b| a.0.total_cmp(&b.0));
+    scored.into_iter().filter(|&(_, c)| c as usize != q).take(K).map(|(_, c)| c).collect()
+}
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+    let path = args.get(1).cloned().unwrap_or_else(|| "target/m1-data/node-feat-100k.csv".into());
+    let n: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20000);
+    let vecs = read_vectors(&path, n);
+    let n = vecs.len();
+    let dim = vecs[0].len();
+
+    eprintln!("[diskann] n={n} dim={dim}; ruvector-diskann Vamana R={R} L={BUILD_BEAM} alpha={ALPHA}");
+    let t0 = Instant::now();
+    let g0 = build_graph(&vecs, dim);
+    eprintln!("[diskann] base graph built in {:.1}s\n", t0.elapsed().as_secs_f64());
+
+    let id = identity(dim);
+    let rot = target_rot(dim, &mut Rng::new(54321));
+
+    // ---- Part 1: global rotational drift ----
+    println!("=== diskann BET 1: GLOBAL rotational drift (recall@{K}) ===");
+    println!("{:>5} {:>7} | {:>8} {:>8} | {:>9}", "t", "churn", "A reuse", "B rebld", "B build s");
+    println!("{}", "-".repeat(46));
+    let mut qrng = Rng::new(999);
+    let queries: Vec<usize> = (0..100).map(|_| qrng.below(n)).collect();
+    let base_truth: Vec<Vec<u32>> = queries.iter().map(|&q| brute_topk(&vecs, q, K)).collect();
+
+    for &t in &[0.0f32, 0.25, 0.5, 1.0] {
+        let vt = apply_linear(&lerp_mat(&id, &rot, t), &vecs, dim);
+        let ft = flat(&vt, dim);
+        let truth: Vec<Vec<u32>> = queries.iter().map(|&q| brute_topk(&vt, q, K)).collect();
+        let churn: f64 = truth.iter().zip(&base_truth).map(|(a, b)| 1.0 - recall(a, b)).sum::<f64>() / queries.len() as f64;
+
+        let ra: f64 = queries.iter().zip(&truth).map(|(&q, tr)| recall(&topk(&g0, &vt, &ft, q), tr)).sum::<f64>() / queries.len() as f64;
+
+        let tb = Instant::now();
+        let gt = build_graph(&vt, dim);
+        let bt = tb.elapsed().as_secs_f64();
+        let rb: f64 = queries.iter().zip(&truth).map(|(&q, tr)| recall(&topk(&gt, &vt, &ft, q), tr)).sum::<f64>() / queries.len() as f64;
+
+        println!("{:>5.2} {:>6.0}% | {:>7.1}% {:>7.1}% | {:>9.2}", t, churn * 100.0, ra * 100.0, rb * 100.0, bt);
+    }
+
+    // ---- Part 2: region-local drift (does the lite-Vamana t=0.25 dip reproduce?) ----
+    println!("\n=== diskann BET 1: REGION-LOCAL drift (warp 15% cluster, recall@{K}) ===");
+    let region_frac = 0.15f32;
+    let mut rng = Rng::new(2024);
+    let centre = vecs[rng.below(n)].clone();
+    let mut by_dist: Vec<(f32, usize)> = (0..n).map(|i| (l2(&vecs[i], &centre), i)).collect();
+    by_dist.sort_by(|a, b| a.0.total_cmp(&b.0));
+    let region_size = (n as f32 * region_frac) as usize;
+    let mut in_region = vec![false; n];
+    for &(_, i) in by_dist.iter().take(region_size) {
+        in_region[i] = true;
+    }
+    let region_ids: Vec<usize> = (0..n).filter(|&i| in_region[i]).collect();
+    let outside_ids: Vec<usize> = (0..n).filter(|&i| !in_region[i]).collect();
+    let mut qr = Rng::new(77);
+    let q_in: Vec<usize> = (0..100).map(|_| region_ids[qr.below(region_ids.len())]).collect();
+    let q_out: Vec<usize> = (0..100).map(|_| outside_ids[qr.below(outside_ids.len())]).collect();
+
+    println!("{:>5} | {:>7} {:>7} {:>7} | {:>7} {:>7}", "t", "chrnIn", "A_in", "B_in", "A_out", "B_out");
+    println!("{}", "-".repeat(54));
+    for &t in &[0.0f32, 0.25, 0.5, 1.0] {
+        let a = lerp_mat(&id, &rot, t);
+        let mut vt = vecs.clone();
+        for &i in &region_ids {
+            vt[i] = (0..dim).map(|r| { let row = &a[r * dim..(r + 1) * dim]; row.iter().zip(&vecs[i]).map(|(x, y)| x * y).sum() }).collect();
+        }
+        let ft = flat(&vt, dim);
+        let gt = build_graph(&vt, dim);
+
+        let eval = |qs: &[usize]| -> (f64, f64, f64) {
+            let (mut churn, mut ra, mut rb) = (0.0, 0.0, 0.0);
+            for &q in qs {
+                let truth = brute_topk(&vt, q, K);
+                let truth0 = brute_topk(&vecs, q, K);
+                churn += 1.0 - recall(&truth, &truth0);
+                ra += recall(&topk(&g0, &vt, &ft, q), &truth);
+                rb += recall(&topk(&gt, &vt, &ft, q), &truth);
+            }
+            let m = qs.len() as f64;
+            (churn / m * 100.0, ra / m * 100.0, rb / m * 100.0)
+        };
+        let (ci, ai, bi) = eval(&q_in);
+        let (_co, ao, bo) = eval(&q_out);
+        println!("{:>5.2} | {:>6.0}% {:>6.1}% {:>6.1}% | {:>6.1}% {:>6.1}%", t, ci, ai, bi, ao, bo);
+    }
+
+    println!("\nGate: A within 2% of B (overall and in-region). Production-index confirmation of ADR-200.");
+}