Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
35756c9
docs: SepRAG (CCH-inspired retrieval) ADRs 196-199 + milestone plans
shaal Jun 4, 2026
f1d215d
feat(seprag): M0 correctness gate — CCH separator-tree k-NN on toy gr…
shaal Jun 4, 2026
80c1aef
feat(seprag): M1 first-pass harness — SepRAG on real ogbn-arxiv citat…
shaal Jun 4, 2026
4377c1c
feat(seprag): balanced separator + backbone-sparsify knob; M1 attribu…
shaal Jun 4, 2026
bf1a310
feat(seprag): road-network control + feature-manifold backbone tests
shaal Jun 4, 2026
7189a7a
docs(seprag): record empirical NO-GO outcome in ADRs 196/197/199 + plan
shaal Jun 4, 2026
3918163
feat(seprag): BET 1 — customizable re-weight vs rebuild (ADR-200, WIN…
shaal Jun 4, 2026
8ceab24
feat(seprag): BET 1 complete — non-linear drift + query-cost check (t…
shaal Jun 4, 2026
11bef01
chore(seprag): idiomatic char-array split (clippy clean)
shaal Jun 4, 2026
413db9f
refactor(seprag): shared ann engine + scale harness for BET 1
shaal Jun 4, 2026
9ec8de4
feat(seprag): BET 1 scale result to n=100k (ADR-200) — win holds, gap…
shaal Jun 4, 2026
96e9160
feat(seprag): region-local drift test (ADR-200) — reuse holds locally…
shaal Jun 4, 2026
5526e34
feat(seprag): port BET 1 to production ruvector-diskann (ADR-200) — c…
shaal Jun 4, 2026
79b57a0
feat(seprag): hybrid re-weight+periodic-rebuild policy (ADR-200) — sh…
shaal Jun 4, 2026
44ee4db
chore(seprag): add ruvector-seprag to Cargo.lock
shaal Jun 4, 2026
7ba3d49
docs(bet3): pre-register treewidth-probe gate (FROZEN) on curated KGs
shaal Jun 5, 2026
e52670f
feat(bet3): curated-KG treewidth probe — reuses seprag kernel + road …
shaal Jun 5, 2026
14e1c62
docs(bet3): ADR-203 NO-GO — curated KGs are high-treewidth (3rd KILL)
shaal Jun 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ members = [
"crates/ruvector-solver",
"crates/ruvector-solver-wasm",
"crates/ruvector-solver-node",
"crates/ruvector-seprag",
"examples/dna",
"examples/OSpipe",
"crates/ruvector-coherence",
Expand Down
30 changes: 30 additions & 0 deletions crates/ruvector-seprag/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[package]
name = "ruvector-seprag"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true
description = "SepRAG — CCH-inspired separator-tree retrieval for hybrid vector + graph memory (M0 correctness gate). See docs/plans/seprag-cch-retrieval/."
keywords = ["retrieval", "contraction-hierarchies", "nested-dissection", "knn", "graph"]
categories = ["algorithms", "data-structures"]

[dependencies]
thiserror = { workspace = true }

[dev-dependencies]
approx = "0.5"
# Production DiskANN/Vamana index — used by the diskann_drift example to confirm
# BET 1 (ADR-200) on the real index rather than the lite reference Vamana.
ruvector-diskann = { path = "../ruvector-diskann" }

[lints.rust]
unexpected_cfgs = { level = "allow", priority = -1 }
dead_code = "allow"

[lints.clippy]
all = { level = "warn", priority = -1 }
correctness = { level = "deny", priority = 0 }
suspicious = { level = "deny", priority = 0 }
needless_range_loop = "allow"
52 changes: 52 additions & 0 deletions crates/ruvector-seprag/examples/blowup_report.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//! M0→M1 diagnostic: print the metrics that become M1's go/no-go signal
//! (ADR-199 §4) on synthetic graphs — shortcut-blowup ratio, elimination-tree
//! height, and pruned-vs-unpruned search space.
//!
//! Run: `cargo run -p ruvector-seprag --example blowup_report`

use ruvector_seprag::query::{elim_depth, KnnIndex, QueryStats};
use ruvector_seprag::{gen, Graph, SepRag};

fn report(name: &str, g: Graph) {
let n = g.n;
let m = g.edges().count();
let pois: Vec<u32> = gen::sample_pois(n, (n / 2).max(1), 1);
let srcs = gen::sample_pois(n, 32.min(n), 2);

let sr = SepRag::build(g);
let max_depth = (0..n as u32).map(|r| elim_depth(&sr.topo, r)).max().unwrap_or(0);
let idx = KnnIndex::build(&sr.topo, &sr.metric, &pois);

let (mut pruned, mut unpruned, mut anc_vis, mut anc_prune) = (0usize, 0usize, 0usize, 0usize);
for &src in &srcs {
let mut sp = QueryStats::default();
let _ = idx.knn(src, 10, true, &mut sp);
let mut su = QueryStats::default();
let _ = idx.knn(src, 10, false, &mut su);
pruned += sp.bucket_entries_scanned;
unpruned += su.bucket_entries_scanned;
anc_vis += sp.ancestors_visited;
anc_prune += sp.ancestors_pruned;
}
let q = srcs.len().max(1);
println!(
"{name:<14} n={n:<5} m={m:<6} blowup={:>5.2}x elim_h={max_depth:<4} \
scans/q: pruned={:<5} unpruned={:<5} ({:.0}% saved) anc_vis/q={} pruned/q={}",
sr.blowup_ratio(),
pruned / q,
unpruned / q,
100.0 * (1.0 - pruned as f64 / unpruned.max(1) as f64),
anc_vis / q,
anc_prune / q,
);
}

fn main() {
println!("SepRAG M0 diagnostic — synthetic graphs (lower blowup + more pruning = more road-like)\n");
report("grid-20x20", gen::grid(20, 20, 1));
report("grid-40x40", gen::grid(40, 40, 1));
report("sbm-clean", gen::sbm(8, 50, 0.25, 0.003, 1));
report("sbm-dense", gen::sbm(8, 50, 0.25, 0.05, 1));
report("path-1000", gen::path(1000, 1));
println!("\nNote: synthetic only. The real go/no-go is M1 on ogbn-arxiv (ADR-199).");
}
135 changes: 135 additions & 0 deletions crates/ruvector-seprag/examples/diskann_drift.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
//! BET 1 on the PRODUCTION index (ADR-200 next step): re-run the re-weight-vs-
//! rebuild test on `ruvector-diskann`'s real Vamana graph, not the lite
//! reference Vamana. This (a) confirms the result on the shipping index and
//! (b) firms the rebuild baseline (the lite Vamana showed build variance).
//!
//! The reuse trick is native to `VamanaGraph`: the graph stores only topology;
//! `greedy_search(vectors, query, beam)` takes the vectors externally. So drift
//! = pass the *transformed* vectors to a graph built on the *original* ones.
//!
//! Run: cargo run --release -p ruvector-seprag --example diskann_drift -- <feat.csv> <N>

use ruvector_diskann::distance::FlatVectors;
use ruvector_diskann::graph::VamanaGraph;
use ruvector_seprag::ann::{
apply_linear, brute_topk, identity, l2, lerp_mat, read_vectors, recall, target_rot, Rng, Vec32,
};
use std::time::Instant;

const R: usize = 32;
const BUILD_BEAM: usize = 64;
const SEARCH_BEAM: usize = 64;
const ALPHA: f32 = 1.2;
const K: usize = 10;

fn flat(vecs: &[Vec32], dim: usize) -> FlatVectors {
let mut f = FlatVectors::with_capacity(dim, vecs.len());
for v in vecs {
f.push(v);
}
f
}

fn build_graph(vecs: &[Vec32], dim: usize) -> VamanaGraph {
let f = flat(vecs, dim);
let mut g = VamanaGraph::new(vecs.len(), R, BUILD_BEAM, ALPHA);
g.build(&f).expect("vamana build");
g
}

/// Top-k from a graph search over `vecs`, re-ranked by exact distance to the query.
fn topk(g: &VamanaGraph, vecs: &[Vec32], f: &FlatVectors, q: usize) -> Vec<u32> {
let (cands, _) = g.greedy_search(f, &vecs[q], SEARCH_BEAM);
let mut scored: Vec<(f32, u32)> = cands.iter().map(|&c| (l2(&vecs[c as usize], &vecs[q]), c)).collect();
scored.sort_by(|a, b| a.0.total_cmp(&b.0));
scored.into_iter().filter(|&(_, c)| c as usize != q).take(K).map(|(_, c)| c).collect()
}

fn main() {
let args: Vec<String> = std::env::args().collect();
let path = args.get(1).cloned().unwrap_or_else(|| "target/m1-data/node-feat-100k.csv".into());
let n: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20000);
let vecs = read_vectors(&path, n);
let n = vecs.len();
let dim = vecs[0].len();

eprintln!("[diskann] n={n} dim={dim}; ruvector-diskann Vamana R={R} L={BUILD_BEAM} alpha={ALPHA}");
let t0 = Instant::now();
let g0 = build_graph(&vecs, dim);
eprintln!("[diskann] base graph built in {:.1}s\n", t0.elapsed().as_secs_f64());

let id = identity(dim);
let rot = target_rot(dim, &mut Rng::new(54321));

// ---- Part 1: global rotational drift ----
println!("=== diskann BET 1: GLOBAL rotational drift (recall@{K}) ===");
println!("{:>5} {:>7} | {:>8} {:>8} | {:>9}", "t", "churn", "A reuse", "B rebld", "B build s");
println!("{}", "-".repeat(46));
let mut qrng = Rng::new(999);
let queries: Vec<usize> = (0..100).map(|_| qrng.below(n)).collect();
let base_truth: Vec<Vec<u32>> = queries.iter().map(|&q| brute_topk(&vecs, q, K)).collect();

for &t in &[0.0f32, 0.25, 0.5, 1.0] {
let vt = apply_linear(&lerp_mat(&id, &rot, t), &vecs, dim);
let ft = flat(&vt, dim);
let truth: Vec<Vec<u32>> = queries.iter().map(|&q| brute_topk(&vt, q, K)).collect();
let churn: f64 = truth.iter().zip(&base_truth).map(|(a, b)| 1.0 - recall(a, b)).sum::<f64>() / queries.len() as f64;

let ra: f64 = queries.iter().zip(&truth).map(|(&q, tr)| recall(&topk(&g0, &vt, &ft, q), tr)).sum::<f64>() / queries.len() as f64;

let tb = Instant::now();
let gt = build_graph(&vt, dim);
let bt = tb.elapsed().as_secs_f64();
let rb: f64 = queries.iter().zip(&truth).map(|(&q, tr)| recall(&topk(&gt, &vt, &ft, q), tr)).sum::<f64>() / queries.len() as f64;

println!("{:>5.2} {:>6.0}% | {:>7.1}% {:>7.1}% | {:>9.2}", t, churn * 100.0, ra * 100.0, rb * 100.0, bt);
}

// ---- Part 2: region-local drift (does the lite-Vamana t=0.25 dip reproduce?) ----
println!("\n=== diskann BET 1: REGION-LOCAL drift (warp 15% cluster, recall@{K}) ===");
let region_frac = 0.15f32;
let mut rng = Rng::new(2024);
let centre = vecs[rng.below(n)].clone();
let mut by_dist: Vec<(f32, usize)> = (0..n).map(|i| (l2(&vecs[i], &centre), i)).collect();
by_dist.sort_by(|a, b| a.0.total_cmp(&b.0));
let region_size = (n as f32 * region_frac) as usize;
let mut in_region = vec![false; n];
for &(_, i) in by_dist.iter().take(region_size) {
in_region[i] = true;
}
let region_ids: Vec<usize> = (0..n).filter(|&i| in_region[i]).collect();
let outside_ids: Vec<usize> = (0..n).filter(|&i| !in_region[i]).collect();
let mut qr = Rng::new(77);
let q_in: Vec<usize> = (0..100).map(|_| region_ids[qr.below(region_ids.len())]).collect();
let q_out: Vec<usize> = (0..100).map(|_| outside_ids[qr.below(outside_ids.len())]).collect();

println!("{:>5} | {:>7} {:>7} {:>7} | {:>7} {:>7}", "t", "chrnIn", "A_in", "B_in", "A_out", "B_out");
println!("{}", "-".repeat(54));
for &t in &[0.0f32, 0.25, 0.5, 1.0] {
let a = lerp_mat(&id, &rot, t);
let mut vt = vecs.clone();
for &i in &region_ids {
vt[i] = (0..dim).map(|r| { let row = &a[r * dim..(r + 1) * dim]; row.iter().zip(&vecs[i]).map(|(x, y)| x * y).sum() }).collect();
}
let ft = flat(&vt, dim);
let gt = build_graph(&vt, dim);

let eval = |qs: &[usize]| -> (f64, f64, f64) {
let (mut churn, mut ra, mut rb) = (0.0, 0.0, 0.0);
for &q in qs {
let truth = brute_topk(&vt, q, K);
let truth0 = brute_topk(&vecs, q, K);
churn += 1.0 - recall(&truth, &truth0);
ra += recall(&topk(&g0, &vt, &ft, q), &truth);
rb += recall(&topk(&gt, &vt, &ft, q), &truth);
}
let m = qs.len() as f64;
(churn / m * 100.0, ra / m * 100.0, rb / m * 100.0)
};
let (ci, ai, bi) = eval(&q_in);
let (_co, ao, bo) = eval(&q_out);
println!("{:>5.2} | {:>6.0}% {:>6.1}% {:>6.1}% | {:>6.1}% {:>6.1}%", t, ci, ai, bi, ao, bo);
}

println!("\nGate: A within 2% of B (overall and in-region). Production-index confirmation of ADR-200.");
}
Loading
Loading