From 23615ae998e038e595b5be771c23b1cee83379f4 Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 14:44:27 -0400 Subject: [PATCH 1/8] =?UTF-8?q?feat(filtered-bench):=20pre-register=20BET?= =?UTF-8?q?=202=E2=8A=974=20+=20M0=20substrate/oracle=20gate=20(issue=20#5?= =?UTF-8?q?34)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Region-pruned filtered ANN vs tuned ACORN. New self-contained crate ruvector-filtered-bench, depending only on ruvector-acorn (incumbent + oracle) and ruvector-rairs (IVF) — independent of ruvector-seprag/PR #535. Pre-registration (docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md) freezes a selectivity-shaped win/kill gate before any contender runs: at correlation rho>=0.7, contender A within 2% filtered-recall@10 of tuned ACORN at >=5x fewer distance-evals/query at sel<=1% (>=2x at sel=5%), monotonic in selectivity; graceful-degradation and wall-clock honesty guards; rho=0 recall-collapse kill control. M0 (plumbing, pre-freeze-safe): - data.rs: aligned ogbn-arxiv feat/label/year loader. - predicate.rs: rho-correlation knob holding selectivity exactly constant across rho, plus natural label/year predicate families. - tests/oracle_gate.rs: exact_filtered_knn cross-checked against an independent brute force on a real arxiv slice (sel x rho grid). 5 tests green, clippy clean. --- Cargo.lock | 9 + Cargo.toml | 2 + crates/ruvector-filtered-bench/Cargo.toml | 15 ++ crates/ruvector-filtered-bench/src/data.rs | 114 ++++++++++ crates/ruvector-filtered-bench/src/lib.rs | 26 +++ .../ruvector-filtered-bench/src/predicate.rs | 197 ++++++++++++++++++ .../tests/oracle_gate.rs | 85 ++++++++ .../bet2-filtered-ann/PRE-REGISTRATION.md | 120 +++++++++++ 8 files changed, 568 insertions(+) create mode 100644 crates/ruvector-filtered-bench/Cargo.toml create mode 100644 crates/ruvector-filtered-bench/src/data.rs create mode 100644 crates/ruvector-filtered-bench/src/lib.rs create mode 100644 crates/ruvector-filtered-bench/src/predicate.rs create mode 100644 crates/ruvector-filtered-bench/tests/oracle_gate.rs create mode 100644 docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md diff --git a/Cargo.lock b/Cargo.lock index 078e1b29fa..234c7f4b4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9309,6 +9309,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "ruvector-filtered-bench" +version = "0.1.0" +dependencies = [ + "rand 0.8.5", + "ruvector-acorn", + "ruvector-rairs", +] + [[package]] name = "ruvector-fpga-transformer" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 38128585a2..7e1fe37464 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -233,6 +233,8 @@ members = [ "crates/ruvllm_retrieval_diffusion", # RAIRS IVF: Redundant Assignment + Amplified Inverse Residual (ADR-193) "crates/ruvector-rairs", + # BET 2 ⊗ BET 4: region-pruned filtered ANN vs ACORN (SepRAG issue #534, off main) + "crates/ruvector-filtered-bench", ] resolver = "2" diff --git a/crates/ruvector-filtered-bench/Cargo.toml b/crates/ruvector-filtered-bench/Cargo.toml new file mode 100644 index 0000000000..00f5260f41 --- /dev/null +++ b/crates/ruvector-filtered-bench/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "ruvector-filtered-bench" +version = "0.1.0" +edition = "2021" +description = "BET 2 ⊗ BET 4: region-pruned filtered ANN (IVF cluster-skip) vs tuned ACORN — pre-registered head-to-head on ogbn-arxiv. Self-contained; independent of ruvector-seprag/PR #535." +authors = ["ofershaal", "claude-flow"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/ruvnet/ruvector" +keywords = ["ann", "filtered-search", "ivf", "acorn", "benchmark"] +categories = ["algorithms", "data-structures"] + +[dependencies] +ruvector-acorn = { path = "../ruvector-acorn" } +ruvector-rairs = { path = "../ruvector-rairs" } +rand = "0.8" diff --git a/crates/ruvector-filtered-bench/src/data.rs b/crates/ruvector-filtered-bench/src/data.rs new file mode 100644 index 0000000000..c280d512da --- /dev/null +++ b/crates/ruvector-filtered-bench/src/data.rs @@ -0,0 +1,114 @@ +//! M0 — load aligned ogbn-arxiv features / labels / years. +//! +//! Row `i` of every file is node `i` (ogbn-arxiv node-index order), so the three +//! arrays align by position. Features are pre-extracted to plain CSV (128 comma- +//! separated f32 per line); labels/years are the gunzipped single-column files. +//! +//! One-time extraction (already done in `target/m1-data/`): +//! ```text +//! gunzip -kc target/m1-data/arxiv/raw/node-label.csv.gz > target/m1-data/node-label.csv +//! gunzip -kc target/m1-data/arxiv/raw/node_year.csv.gz > target/m1-data/node-year.csv +//! # features: target/m1-data/node-feat-100k.csv (first 100k rows already extracted) +//! ``` + +use std::path::Path; + +/// Default in-repo paths (relative to workspace root). +pub const FEAT_100K: &str = "target/m1-data/node-feat-100k.csv"; +pub const LABELS: &str = "target/m1-data/node-label.csv"; +pub const YEARS: &str = "target/m1-data/node-year.csv"; + +/// An aligned ogbn-arxiv slice: `feats[i]`, `labels[i]`, `years[i]` all describe node `i`. +#[derive(Clone)] +pub struct Dataset { + pub feats: Vec>, + pub labels: Vec, + pub years: Vec, + pub dim: usize, +} + +impl Dataset { + pub fn len(&self) -> usize { + self.feats.len() + } + pub fn is_empty(&self) -> bool { + self.feats.is_empty() + } + + /// Load `max_n` aligned rows (capped by the shortest file). Panics on malformed + /// input — this is a benchmark harness, not a service; failing loud is correct. + pub fn load( + feat_path: impl AsRef, + label_path: impl AsRef, + year_path: impl AsRef, + max_n: usize, + ) -> Dataset { + let feats = read_feats(feat_path.as_ref(), max_n); + let labels = read_ints(label_path.as_ref(), max_n); + let years = read_ints(year_path.as_ref(), max_n); + + // Truncate all three to the common minimum so alignment is exact. + let n = feats.len().min(labels.len()).min(years.len()); + let dim = feats.first().map(|v| v.len()).unwrap_or(0); + assert!(n > 0, "empty dataset after load"); + assert!( + feats.iter().take(n).all(|v| v.len() == dim), + "ragged feature rows — dim must be constant" + ); + + Dataset { + feats: feats.into_iter().take(n).collect(), + labels: labels.into_iter().take(n).map(|v| v as u32).collect(), + years: years.into_iter().take(n).map(|v| v as i32).collect(), + dim, + } + } + + /// Convenience: load the standard in-repo 100k arxiv slice. + pub fn load_arxiv(max_n: usize) -> Dataset { + Dataset::load(FEAT_100K, LABELS, YEARS, max_n) + } +} + +fn read_feats(path: &Path, max_n: usize) -> Vec> { + let raw = std::fs::read_to_string(path) + .unwrap_or_else(|e| panic!("read features {}: {e}", path.display())); + raw.lines() + .take(max_n) + .map(|line| { + line.split(',') + .map(|f| f.trim().parse::().expect("parse feature f32")) + .collect::>() + }) + .collect() +} + +fn read_ints(path: &Path, max_n: usize) -> Vec { + let raw = std::fs::read_to_string(path) + .unwrap_or_else(|e| panic!("read ints {}: {e}", path.display())); + raw.lines() + .take(max_n) + .map(|line| line.trim().parse::().expect("parse int")) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn loads_aligned_slice() { + // Small slice keeps the test fast; skips cleanly if data isn't extracted. + if !Path::new(FEAT_100K).exists() { + eprintln!("skip: {FEAT_100K} not extracted"); + return; + } + let ds = Dataset::load_arxiv(2000); + assert_eq!(ds.len(), 2000); + assert_eq!(ds.labels.len(), 2000); + assert_eq!(ds.years.len(), 2000); + assert_eq!(ds.dim, 128); + assert!(ds.labels.iter().all(|&l| l < 40), "arxiv has 40 subject labels"); + assert!(ds.years.iter().all(|&y| (1900..=2025).contains(&y))); + } +} diff --git a/crates/ruvector-filtered-bench/src/lib.rs b/crates/ruvector-filtered-bench/src/lib.rs new file mode 100644 index 0000000000..b3206e97aa --- /dev/null +++ b/crates/ruvector-filtered-bench/src/lib.rs @@ -0,0 +1,26 @@ +//! BET 2 ⊗ BET 4 — Region-pruned filtered ANN vs tuned ACORN. +//! +//! Pre-registered head-to-head (see `docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md`): +//! does IVF **cluster-skip** pruning beat predicate-agnostic ACORN on *correlated* +//! predicates at low selectivity, by ≥5× distance-evals/query at equal (±2%) recall? +//! +//! This crate is **self-contained**: it depends only on `ruvector-acorn` (the incumbent and +//! the `exact_filtered_knn` oracle) and `ruvector-rairs` (the IVF substrate). It has no +//! dependency on `ruvector-seprag` (PR #535), so it ships as an independent PR. +//! +//! ## Module map (filled across milestones) +//! - `data` (M0) — load ogbn-arxiv features / labels / years. +//! - `predicate` (M0) — predicate families + ρ-correlation knob + selectivity targeting. +//! - `prune` (M2) — contender A: region-pruned IVF filtered search + eval counters. + +pub mod data; +pub mod predicate; + +// Re-export the substrate + incumbent + oracle so the head-to-head harness has one entry +// point and the dependency graph is exercised at build time. +pub use ruvector_acorn::{recall_at_k, AcornIndexGamma, FilteredIndex, FlatFilteredIndex}; +pub use ruvector_rairs::ivf::IvfFlat; + +/// Exact filtered k-NN oracle (brute force) — ground truth for every contender. +/// Thin re-export of the in-repo incumbent's oracle to keep one source of truth. +pub use ruvector_acorn::graph::exact_filtered_knn; diff --git a/crates/ruvector-filtered-bench/src/predicate.rs b/crates/ruvector-filtered-bench/src/predicate.rs new file mode 100644 index 0000000000..0abe48be58 --- /dev/null +++ b/crates/ruvector-filtered-bench/src/predicate.rs @@ -0,0 +1,197 @@ +//! M0 — predicate families, the ρ-correlation knob, and selectivity targeting. +//! +//! A [`Predicate`] is a boolean membership mask over node ids `[0, n)`. The harness +//! passes it to ACORN / the oracle as `|id| pred.test(id)`. +//! +//! ## The ρ-knob (the controlled instrument) +//! +//! [`correlated`] builds a predicate of an *exact* target selectivity whose correlation +//! with embedding geometry is tunable: ρ=1 is a tight, structurally-clustered set (built +//! from subject-label classes, which occupy regions of the embedding space); ρ=0 is a +//! random set of the same size (ACORN's home turf, the kill control); intermediate ρ +//! replaces a fraction `1−ρ` of structured members with random non-members. Selectivity is +//! held fixed across ρ so cost differences are attributable to correlation, not set size. + +use rand::seq::SliceRandom; +use rand::Rng; + +/// A predicate over node ids, plus the construction parameters that produced it. +#[derive(Clone)] +pub struct Predicate { + mask: Vec, + /// Number of matching nodes (`mask` trues). + pub n_match: usize, + /// Requested selectivity (matches/n); see [`Predicate::selectivity`] for the realized value. + pub target_sel: f64, + /// Construction correlation knob in `[0,1]` (1 = structured, 0 = random). `NaN` for + /// natural-family predicates where ρ is not a construction parameter. + pub rho: f64, +} + +impl Predicate { + #[inline] + pub fn test(&self, id: u32) -> bool { + self.mask[id as usize] + } + + /// `Fn(u32) -> bool` view for ACORN / oracle APIs. + pub fn as_fn(&self) -> impl Fn(u32) -> bool + Copy + '_ { + move |id| self.mask[id as usize] + } + + /// Realized selectivity = matches / n. + pub fn selectivity(&self) -> f64 { + self.n_match as f64 / self.mask.len() as f64 + } + + pub fn len(&self) -> usize { + self.mask.len() + } + pub fn is_empty(&self) -> bool { + self.mask.is_empty() + } + + fn from_mask(mask: Vec, target_sel: f64, rho: f64) -> Predicate { + let n_match = mask.iter().filter(|&&b| b).count(); + Predicate { mask, n_match, target_sel, rho } + } +} + +/// Natural categorical predicate: nodes whose subject label equals `class`. +pub fn from_label(labels: &[u32], class: u32) -> Predicate { + let mask = labels.iter().map(|&l| l == class).collect::>(); + let sel = mask.iter().filter(|&&b| b).count() as f64 / labels.len() as f64; + Predicate::from_mask(mask, sel, f64::NAN) +} + +/// Natural ordinal predicate: nodes with `year >= y`. +pub fn year_ge(years: &[i32], y: i32) -> Predicate { + let mask = years.iter().map(|&yr| yr >= y).collect::>(); + let sel = mask.iter().filter(|&&b| b).count() as f64 / years.len() as f64; + Predicate::from_mask(mask, sel, f64::NAN) +} + +/// The controlled instrument: a predicate of exact selectivity `target_sel` with tunable +/// geometric correlation `rho ∈ [0,1]`. +/// +/// - `seed_class_rank` selects which size-ranked label class seeds the structured set +/// (0 = largest); rotating it lets M3 average over several regions to remove +/// region-specific bias. +/// - The structured pool is the union of label classes (in size order from the seed), +/// truncated to exactly `m = round(target_sel · n)` members. `keep = round(rho · m)` of +/// those are retained; the remaining `m − keep` are random non-members, so |set| = m for +/// every ρ. +pub fn correlated( + labels: &[u32], + target_sel: f64, + rho: f64, + seed_class_rank: usize, + rng: &mut impl Rng, +) -> Predicate { + let n = labels.len(); + let m = ((target_sel * n as f64).round() as usize).clamp(1, n); + let rho = rho.clamp(0.0, 1.0); + + // Label classes sorted by descending size; rotate by seed_class_rank. + let n_classes = (labels.iter().copied().max().unwrap_or(0) as usize) + 1; + let mut counts = vec![0usize; n_classes]; + for &l in labels { + counts[l as usize] += 1; + } + let mut class_order: Vec = (0..n_classes as u32).collect(); + class_order.sort_by_key(|&c| std::cmp::Reverse(counts[c as usize])); + if !class_order.is_empty() { + let rot = seed_class_rank % class_order.len(); + class_order.rotate_left(rot); + } + + // Accumulate node ids class-by-class until the pool reaches m, then truncate. + let mut structured: Vec = Vec::with_capacity(m); + 'fill: for &c in &class_order { + for (id, &l) in labels.iter().enumerate() { + if l == c { + structured.push(id as u32); + if structured.len() >= m { + break 'fill; + } + } + } + } + + let keep = ((rho * m as f64).round() as usize).min(structured.len()); + let mut mask = vec![false; n]; + for &id in &structured[..keep] { + mask[id as usize] = true; + } + + // Fill the rest with random non-members so realized selectivity == m/n exactly. + let need = m - keep; + if need > 0 { + let mut pool: Vec = (0..n as u32).filter(|&id| !mask[id as usize]).collect(); + let (picked, _) = pool.partial_shuffle(rng, need); + for &id in picked.iter() { + mask[id as usize] = true; + } + } + + Predicate::from_mask(mask, target_sel, rho) +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::rngs::StdRng; + use rand::SeedableRng; + + fn synth_labels(n: usize, n_classes: u32) -> Vec { + // Deterministic block labels: class regions are contiguous id ranges (a proxy for + // geometric clustering, sufficient to test the ρ mechanism). + (0..n).map(|i| (i as u32 * n_classes / n as u32).min(n_classes - 1)).collect() + } + + #[test] + fn selectivity_is_exact_across_rho() { + let labels = synth_labels(10_000, 8); + let mut rng = StdRng::seed_from_u64(1); + for &rho in &[0.0, 0.3, 0.7, 1.0] { + let p = correlated(&labels, 0.05, rho, 0, &mut rng); + assert_eq!(p.n_match, 500, "exact selectivity must hold for ρ={rho}"); + assert!((p.selectivity() - 0.05).abs() < 1e-9); + } + } + + #[test] + fn rho1_is_structured_rho0_is_spread() { + // ρ=1 concentrates in few classes; ρ=0 spreads across all. Use distinct-class + // count of the matched set as a cheap structure proxy. + let labels = synth_labels(10_000, 8); + let mut rng = StdRng::seed_from_u64(2); + let distinct = |p: &Predicate| { + let mut s = std::collections::HashSet::new(); + for id in 0..labels.len() as u32 { + if p.test(id) { + s.insert(labels[id as usize]); + } + } + s.len() + }; + let p1 = correlated(&labels, 0.05, 1.0, 0, &mut rng); + let p0 = correlated(&labels, 0.05, 0.0, 0, &mut rng); + assert!( + distinct(&p1) < distinct(&p0), + "ρ=1 should span fewer classes ({}) than ρ=0 ({})", + distinct(&p1), + distinct(&p0) + ); + } + + #[test] + fn from_label_matches_count() { + let labels = vec![0u32, 1, 1, 2, 1]; + let p = from_label(&labels, 1); + assert_eq!(p.n_match, 3); + // labels = [0,1,1,2,1] → ids 1,2,4 match; 0,3 do not. + assert!(p.test(1) && p.test(2) && p.test(4)); + assert!(!p.test(0) && !p.test(3)); + } +} diff --git a/crates/ruvector-filtered-bench/tests/oracle_gate.rs b/crates/ruvector-filtered-bench/tests/oracle_gate.rs new file mode 100644 index 0000000000..89c6412a45 --- /dev/null +++ b/crates/ruvector-filtered-bench/tests/oracle_gate.rs @@ -0,0 +1,85 @@ +//! M0 gate — "trust the oracle." +//! +//! Every contender (A/B/C/D) is scored against `ruvector-acorn::exact_filtered_knn`. If that +//! oracle is wrong, every downstream recall number is meaningless. This test cross-checks it +//! against a **fully independent** brute-force filtered k-NN (separate distance code, separate +//! sort) on a real ogbn-arxiv slice, exercising the whole data → predicate → oracle path. +//! +//! Skips cleanly when the arxiv data isn't extracted (CI without the dataset). + +use ruvector_filtered_bench::data::{Dataset, FEAT_100K}; +use ruvector_filtered_bench::exact_filtered_knn; +use ruvector_filtered_bench::predicate; + +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::path::Path; + +/// Independent brute force: no shared code with the oracle. Plain scalar L2, stable sort by +/// (distance, id) so ties (which don't occur on real float embeddings) are still deterministic. +fn independent_filtered_knn( + feats: &[Vec], + labels_mask: &dyn Fn(u32) -> bool, + query: &[f32], + k: usize, +) -> Vec { + let mut scored: Vec<(f64, u32)> = (0..feats.len() as u32) + .filter(|&id| labels_mask(id)) + .map(|id| { + let d: f64 = feats[id as usize] + .iter() + .zip(query) + .map(|(a, b)| { + let diff = (*a - *b) as f64; + diff * diff + }) + .sum(); + (d, id) + }) + .collect(); + scored.sort_by(|a, b| a.0.total_cmp(&b.0).then(a.1.cmp(&b.1))); + scored.into_iter().take(k).map(|(_, id)| id).collect() +} + +#[test] +fn oracle_matches_independent_brute_force() { + if !Path::new(FEAT_100K).exists() { + eprintln!("skip: arxiv data not extracted ({FEAT_100K})"); + return; + } + let ds = Dataset::load_arxiv(3000); + let k = 10; + let mut rng = StdRng::seed_from_u64(42); + + // Sweep a few selectivities; each must keep #matches >= k (the M0 selectivity floor). + for &sel in &[0.02_f64, 0.05, 0.20] { + for &rho in &[0.0_f64, 1.0] { + let pred = predicate::correlated(&ds.labels, sel, rho, 0, &mut rng); + assert!( + pred.n_match >= k, + "selectivity floor violated: sel={sel} ρ={rho} → only {} matches < k={k}", + pred.n_match + ); + let pf = pred.as_fn(); + + // 8 random queries drawn from the corpus. + for _ in 0..8 { + let qi = rng.gen_range(0..ds.len()); + let q = &ds.feats[qi]; + + let oracle = exact_filtered_knn(&ds.feats, q, k, pf); + let truth = independent_filtered_knn(&ds.feats, &pf, q, k); + + assert_eq!( + oracle, truth, + "oracle disagrees with independent brute force (sel={sel} ρ={rho} q={qi})" + ); + // Every returned id must actually satisfy the predicate. + assert!( + oracle.iter().all(|&id| pf(id)), + "oracle returned a non-matching id (sel={sel} ρ={rho})" + ); + } + } + } +} diff --git a/docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md b/docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md new file mode 100644 index 0000000000..b0c0309b40 --- /dev/null +++ b/docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md @@ -0,0 +1,120 @@ +# BET 2 ⊗ BET 4 — Region-Pruned Filtered ANN vs tuned ACORN + +**Status:** Pre-registered (gate frozen before any run) · **Date:** 2026-06-04 · +**Research line:** SepRAG (ruvnet/RuVector issue #534) · **Self-contained:** depends only on +crates already on `main` (`ruvector-acorn`, `ruvector-rairs`) — **independent of PR #535.** · +**Builds on (by reference, not by compile):** ADR-200 (BET 1 WIN), ADR-193 (`ruvector-rairs` +IVF), ADR-199 (CCH NO-GO → why IVF, not separators) · +**Outcome ADR:** ADR-201 (written from the result — WIN *or* NO-GO). + +> This document is the **pre-registration**, committed before the harness runs. A loss is an +> acceptable, reportable outcome (cf. ADR-199). Editing the gate after seeing results voids +> the bet. Plumbing (M0) may be built before freeze; contender runs (M1+) may not. + +## Prove-not-hype protocol (mandatory — all five) + +1. **One claim, one number.** 2. **Beat the strongest in-repo incumbent, tuned.** +3. **Public data + ground truth.** 4. **Pre-register WIN *and* KILL.** 5. **Adversarial check.** + +## Thesis (one claim, one number) + +> For predicates whose membership **correlates with embedding-cluster structure** (ρ ≥ 0.7) at +> **selectivity ≤ 1%**, IVF **region-pruned** filtered search reaches **filtered-recall@10 +> within 2%** of tuned ACORN at **≥ 5× fewer distance-evaluations per query** — and the cost +> advantage **grows monotonically as selectivity falls** (the mechanism signature). + +Primary cost = **distance-evals/query** (hardware-independent, as ADR-200). Wall-clock is +reported and acts as an honesty guard (below). + +## Why this scope is the honest one (central insight) + +ACORN (SIGMOD 2024, arXiv:2403.04871; `ruvector-acorn::AcornIndexGamma`) is +**predicate-agnostic by design**: a denser γ·M graph + expand-all-neighbors traversal stay +navigable *through* predicate-failing nodes, computing a distance for every expanded node, +pass or fail. So ACORN's per-query distance count is **flat-to-rising as selectivity drops** — +and ACORN **owns** the uncorrelated case. Attacking it there is a guaranteed loss. + +Region-pruning wins the opposite case: when the predicate correlates with cluster membership, +whole clusters with zero matches are skipped, and a cheap O(1) predicate test gates the +expensive 128-d distance — so A pays distance-evals only for `routing (≈√n centroids) + +actual matches in probed clusters`, which **shrinks as selectivity drops**. That asymmetry is +the entire bet, and it is the production-RAG metadata-filter case (`tenant_id`, `doc_type`, +`language`, `year≥Y`, `category=X`). + +On embeddings the pruning kernel **cannot** live on graph separators (ADR-199: embedding +graphs are high-treewidth → CCH contraction blew up). Its only viable, treewidth-immune +substrate is the **IVF hierarchy** (`ruvector-rairs`) — i.e. BET 4. **BET 2 (benchmark + +incumbent) and BET 4 (mechanism) are one experiment.** + +## Data & predicates (real, public — ogbn-arxiv) + +n ≈ 169,343, 128-d features (`target/m1-data/arxiv/raw/`, in hand). Oracle = +`ruvector-acorn::exact_filtered_knn`. + +| Predicate | Correlation ρ | Source | +|---|---|---| +| Subject-area label = c (one of 40) | **high** | `node-label.csv.gz` | +| Year ≥ Y / year ∈ [a,b] | **medium** | `node_year.csv.gz` | +| Random Bernoulli(p), equal selectivity | **ρ = 0 (kill control)** | synthetic | + +**Correlation knob ρ:** interpolate a real label predicate toward a random one of equal +selectivity by shuffling a fraction `1−ρ` of membership. Sweep ρ ∈ {0, 0.3, 0.5, 0.7, 1.0}. +**Selectivity sweep:** {0.1, 0.5, 1, 5, 10, 30}% (sub-10% is where post-filter collapses). + +## Contenders + +| ID | Index | Role | +|---|---|---| +| **A** | IVF region-pruned filtered search (`rairs::IvfFlat` + per-cluster match-count pruning, predicate-gated distance) | **the bet** | +| **B** | `AcornIndexGamma`, tuned (γ∈{2,3}, ef∈{64,128,200}; best cost at equal recall) | strong incumbent | +| **D** | ACORN + predicate-aware entry points | adversarial "tune harder" (rule #5) | +| **C** | flat / post-filter | floor — proves benchmark teeth (recall collapse at low sel) | + +All scored against `exact_filtered_knn` ground truth. + +## Pre-registered gate + +- **WIN** — at **ρ ≥ 0.7**: A within **2%** filtered-recall@10 of best{B, D} **and** the + distance-eval ratio is **≥ 5× at sel ≤ 1%** and **≥ 2× at sel = 5%**, **monotonically + increasing as selectivity falls** (the mechanism must be visible, not a single lucky cell). +- **Graceful-degradation guard** — in ACORN's regime (sel ≥ 10% **or** ρ ≤ 0.3) A may lose, + but by **≤ 1.5×** in distance-evals (no catastrophic blowup). Cost-axis analogue of the + recall-collapse control. +- **Wall-clock honesty guard** — wall-clock reported alongside; a distance-eval win that + **reverses on wall-clock → "inconclusive," not WIN** (IVF cluster scans vs ACORN's graph + walk have different cache behavior; the win must survive both). +- **KILL (reportable NO-GO)** — *either* A's recall **collapses** at the ρ=0 control (must + degrade *safely* to ≈ the floor, not catastrophically), *or* no (selectivity, ρ) cell meets + the WIN bar. +- **Reported regardless:** the crossover correlation **ρ\*** (and crossover selectivity) where + ACORN overtakes A on cost — the iso-cost frontier is itself a publishable result. + +**Named live risk (not a formality):** ACORN on correlated subgraphs may already be cheap +enough that ≥5× is unreachable → that is a clean, reportable KILL, written up like ADR-199. + +## Where it lives (self-contained off main) + +New crate **`crates/ruvector-filtered-bench`**, depending only on `ruvector-acorn` + +`ruvector-rairs` (+ `rand`). Contender A and the predicate / ρ-knob / selectivity generators +live in `src/`; the harness is `examples/filtered_ann_pruning.rs`. No dependency on +`ruvector-seprag` (PR #535) — this PR stands alone. + +## Milestones + +- **M0 — substrate + oracle wiring.** Load arxiv feat+label+year; build `IvfFlat`; confirm + `exact_filtered_knn` + `recall_at_k` on a slice (use a selectivity floor so #matches ≥ k=10). + Predicate + ρ-knob + selectivity generators. *Gate: oracle matches brute-force exactly.* +- **M1 — contenders B/C/D.** Tuned ACORN sweep + post-filter floor; reproduce the documented + low-selectivity post-filter recall collapse (proves teeth). +- **M2 — contender A.** Cluster probe order (match-count, then centroid distance); zero-match + cluster skip; predicate-gated distance; per-query distance-eval + wall-clock counters. +- **M3 — full sweep + gate eval.** selectivity × ρ grid; emit WIN/KILL table; find ρ\*; + apply the wall-clock honesty guard. +- **M4 — ADR-201.** Write the outcome (WIN or NO-GO) with ADR-199/200 honesty. + +## Out of scope (named, not silently assumed) + +- The uncorrelated/agnostic regime as a *target* (kill control only — ACORN owns it). +- Multi-predicate conjunctions, streaming updates, the live-GNN metric (BET 1's frontier). +- Disk-resident / billion-scale (in-memory ogbn-arxiv is the stage). + From d56ff201b94d5affd2ba553cfdb6fa646ed04371 Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 14:56:52 -0400 Subject: [PATCH 2/8] feat(filtered-bench): M1 incumbents + post-filter teeth + tuned ACORN baseline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instrument ruvector-acorn with additive, result-preserving counted-search variants (acorn_search_counted, flat_filtered_search_counted) so distance-evals — the pre-registered primary cost metric — are measured exactly on ACORN-as-shipped. 13 acorn tests pass incl. a counted==uncounted + flat-evals==#matches invariant. filtered-bench contenders (src/contenders.rs): - B: ACORN predicate-agnostic search (the incumbent), exact eval counts. - C: classic post-filter (retrieve top-pool unfiltered, then filter) — the floor. M1 findings (n=20k arxiv, ρ=1, k=10): - TEETH (examples/teeth.rs): at the gate-relevant low selectivity, post-filter collapses while ACORN holds — sel=0.1%: 73.7% vs 22.7%; sel=0.5%: 90.4% vs 59.7%; sel=1%: 92.6% vs 79.3%. At sel>=5% post-filter is fine (as theory predicts). Benchmark is demonstrably sensitive (50+ pt recall swing) — the negative control. - TUNED ACORN (examples/acorn_tune.rs): ACORN reaches ~92.6% recall at sel=1% with gamma=2, ef=512, at ~1622 evals/query; evals are ~flat in ef (early-termination bound), so "tuned" = crank ef for recall at near-constant cost. This is the fair incumbent baseline for the M3 gate, and it validates the >=5x bar: contender A must reach >=90.6% recall at <=~324 evals/query to win. --- crates/ruvector-acorn/src/search.rs | 96 +++++++++++++- .../examples/acorn_tune.rs | 78 +++++++++++ .../ruvector-filtered-bench/examples/teeth.rs | 96 ++++++++++++++ .../ruvector-filtered-bench/src/contenders.rs | 124 ++++++++++++++++++ crates/ruvector-filtered-bench/src/lib.rs | 1 + 5 files changed, 389 insertions(+), 6 deletions(-) create mode 100644 crates/ruvector-filtered-bench/examples/acorn_tune.rs create mode 100644 crates/ruvector-filtered-bench/examples/teeth.rs create mode 100644 crates/ruvector-filtered-bench/src/contenders.rs diff --git a/crates/ruvector-acorn/src/search.rs b/crates/ruvector-acorn/src/search.rs index 98e2ee61a8..1983600cc2 100644 --- a/crates/ruvector-acorn/src/search.rs +++ b/crates/ruvector-acorn/src/search.rs @@ -33,6 +33,34 @@ pub fn acorn_search( k: usize, ef: usize, predicate: impl Fn(u32) -> bool, +) -> Vec<(u32, f32)> { + let mut evals = 0u64; + acorn_search_impl(graph, query, k, ef, predicate, &mut evals) +} + +/// Like [`acorn_search`] but also returns the exact number of distance +/// evaluations (`l2_sq` calls) performed — the hardware-independent cost metric +/// used by the filtered-ANN benchmark (`ruvector-filtered-bench`). Results are +/// identical to [`acorn_search`]; only the eval counter is added. +pub fn acorn_search_counted( + graph: &AcornGraph, + query: &[f32], + k: usize, + ef: usize, + predicate: impl Fn(u32) -> bool, +) -> (Vec<(u32, f32)>, u64) { + let mut evals = 0u64; + let out = acorn_search_impl(graph, query, k, ef, predicate, &mut evals); + (out, evals) +} + +fn acorn_search_impl( + graph: &AcornGraph, + query: &[f32], + k: usize, + ef: usize, + predicate: impl Fn(u32) -> bool, + evals: &mut u64, ) -> Vec<(u32, f32)> { if graph.is_empty() { return vec![]; @@ -44,12 +72,19 @@ pub fn acorn_search( // point. O(probes × D) overhead vs O(n × D) for flat — negligible. let n_probes = (n as f64).sqrt().ceil() as usize; let n_probes = n_probes.clamp(4, 64); - let entry = (0..n_probes) - .map(|i| (i * n / n_probes) as u32) - .min_by(|&a, &b| { - l2_sq(query, graph.row(a as usize)).total_cmp(&l2_sq(query, graph.row(b as usize))) - }) - .unwrap_or(0); + // Count each probe distance once (result-identical to the min_by form, which + // recomputed l2_sq inside the comparator — the count reflects fundamental work). + let mut entry = 0u32; + let mut best = f32::INFINITY; + for i in 0..n_probes { + let cand = (i * n / n_probes) as u32; + let d = l2_sq(query, graph.row(cand as usize)); + *evals += 1; + if d < best { + best = d; + entry = cand; + } + } let mut visited: Vec = vec![false; n]; // Min-heap by distance — pop closest unexplored candidate first. @@ -61,6 +96,7 @@ pub fn acorn_search( let mut farthest_in_beam: BinaryHeap = BinaryHeap::with_capacity(ef + 1); let d0 = l2_sq(query, graph.row(entry as usize)); + *evals += 1; candidates.push(Reverse((OrdF32(d0), entry))); farthest_in_beam.push(OrdF32(d0)); visited[entry as usize] = true; @@ -93,6 +129,7 @@ pub fn acorn_search( } visited[ni] = true; let nd = l2_sq(query, graph.row(ni)); + *evals += 1; // Bounded beam: only admit if there's room or the new candidate // is closer than the worst pending one. @@ -129,6 +166,30 @@ pub fn flat_filtered_search( query: &[f32], k: usize, predicate: impl Fn(u32) -> bool, +) -> Vec<(u32, f32)> { + let mut evals = 0u64; + flat_filtered_search_impl(data, query, k, predicate, &mut evals) +} + +/// Like [`flat_filtered_search`] but also returns the exact distance-eval count +/// (one `l2_sq` per predicate-passing vector). Results identical. +pub fn flat_filtered_search_counted( + data: &[Vec], + query: &[f32], + k: usize, + predicate: impl Fn(u32) -> bool, +) -> (Vec<(u32, f32)>, u64) { + let mut evals = 0u64; + let out = flat_filtered_search_impl(data, query, k, predicate, &mut evals); + (out, evals) +} + +fn flat_filtered_search_impl( + data: &[Vec], + query: &[f32], + k: usize, + predicate: impl Fn(u32) -> bool, + evals: &mut u64, ) -> Vec<(u32, f32)> { let mut heap: BinaryHeap<(OrdF32, u32)> = BinaryHeap::with_capacity(k + 1); @@ -137,6 +198,7 @@ pub fn flat_filtered_search( continue; } let d = l2_sq(v, query); + *evals += 1; if heap.len() < k { heap.push((OrdF32(d), i as u32)); } else if let Some(&(OrdF32(worst), _)) = heap.peek() { @@ -199,6 +261,28 @@ mod tests { } } + #[test] + fn counted_variants_match_uncounted_and_count_evals() { + // The benchmark depends on this invariant: *_counted returns identical + // results to the plain fn, plus a positive, finite eval count. + let data = unit_data(40); + let graph = AcornGraph::build(data.clone(), 8).unwrap(); + let query = vec![17.0_f32, 0.0]; + let pred = |id: u32| id % 3 == 0; + + let plain = acorn_search(&graph, &query, 5, 60, pred); + let (counted, evals) = acorn_search_counted(&graph, &query, 5, 60, pred); + assert_eq!(plain, counted, "counted search must match plain search"); + assert!(evals > 0, "must record at least the entry probes"); + + let fplain = flat_filtered_search(&data, &query, 5, pred); + let (fcounted, fevals) = flat_filtered_search_counted(&data, &query, 5, pred); + assert_eq!(fplain, fcounted); + // Flat does exactly one eval per predicate-passing vector. + let n_pass = (0..data.len() as u32).filter(|&i| pred(i)).count() as u64; + assert_eq!(fevals, n_pass, "flat evals == #matches"); + } + #[test] fn acorn_search_half_predicate() { let data = unit_data(30); diff --git a/crates/ruvector-filtered-bench/examples/acorn_tune.rs b/crates/ruvector-filtered-bench/examples/acorn_tune.rs new file mode 100644 index 0000000000..a5325269ff --- /dev/null +++ b/crates/ruvector-filtered-bench/examples/acorn_tune.rs @@ -0,0 +1,78 @@ +//! M1 — find ACORN's *tuned* operating point (rule #2: beat the incumbent tuned). +//! +//! Sweeps ef × γ for filtered recall@10 at a representative low selectivity (ρ=1), so the +//! later head-to-head compares against ACORN at its best, not an under-tuned strawman. +//! +//! Run: cargo run --release -p ruvector-filtered-bench --example acorn_tune -- [N] [Q] [sel] + +use ruvector_acorn::graph::exact_filtered_knn; +use ruvector_filtered_bench::contenders::{recall, Acorn}; +use ruvector_filtered_bench::data::{Dataset, FEAT_100K}; +use ruvector_filtered_bench::predicate; + +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::path::Path; + +fn main() { + let args: Vec = std::env::args().collect(); + let n: usize = args.get(1).and_then(|s| s.parse().ok()).unwrap_or(20_000); + let q_count: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(200); + let sel: f64 = args.get(3).and_then(|s| s.parse().ok()).unwrap_or(0.01); + + if !Path::new(FEAT_100K).exists() { + eprintln!("data not extracted ({FEAT_100K}); skipping."); + return; + } + + let k = 10; + let ds = Dataset::load_arxiv(n); + let n = ds.len(); + let mut rng = StdRng::seed_from_u64(7); + let pred = predicate::correlated(&ds.labels, sel, 1.0, 0, &mut rng); + let pf = pred.as_fn(); + let queries: Vec = (0..q_count).map(|_| rng.gen_range(0..n)).collect(); + + // Precompute truth once per query (independent of ef/γ). + let truths: Vec> = queries + .iter() + .map(|&qi| { + exact_filtered_knn(&ds.feats, &ds.feats[qi], k + 1, pf) + .into_iter() + .filter(|&id| id as usize != qi) + .take(k) + .collect() + }) + .collect(); + + println!( + "\n=== ACORN tuning: filtered recall@{k} (n={n}, sel={sel}, #match={}, Q={q_count}) ===", + pred.n_match + ); + println!("{:>5} {:>6} | {:>10} {:>11}", "γ", "ef", "recall", "evals/q"); + println!("{}", "-".repeat(40)); + + for &gamma in &[2usize, 3] { + let acorn = Acorn::build(&ds.feats, gamma, 64); // ef field unused; we pass ef below + for &ef in &[64usize, 128, 256, 512, 1024] { + let (mut rec, mut ev) = (0.0, 0u64); + for (qi, truth) in queries.iter().zip(&truths) { + let (got, evals) = + ruvector_acorn::search::acorn_search_counted(&acorn.graph, &ds.feats[*qi], k, ef, pf); + let got: Vec = got + .into_iter() + .map(|(id, _)| id) + .filter(|&id| id as usize != *qi) + .collect(); + rec += recall(truth, &got); + ev += evals; + } + let nq = queries.len() as f64; + println!( + "{gamma:>5} {ef:>6} | {:>9.1}% {:>11}", + 100.0 * rec / nq, + ev / queries.len() as u64 + ); + } + } +} diff --git a/crates/ruvector-filtered-bench/examples/teeth.rs b/crates/ruvector-filtered-bench/examples/teeth.rs new file mode 100644 index 0000000000..acf7b63d95 --- /dev/null +++ b/crates/ruvector-filtered-bench/examples/teeth.rs @@ -0,0 +1,96 @@ +//! M1 — "the benchmark has teeth." +//! +//! Before claiming contender A beats ACORN, we must show the problem is real: at low +//! selectivity, the classic **post-filter** baseline (retrieve top-`pool` ignoring the +//! predicate, then filter) collapses, while ACORN's predicate-agnostic search holds recall. +//! Both run on the *same* ACORN-γ graph, so the only variable is the traversal policy — +//! isolating post-filter as the cause of the collapse (not graph density). +//! +//! This is the negative-control analogue of ADR-200's stale-index control: if post-filter +//! did *not* collapse, the benchmark would be insensitive and any later "win" meaningless. +//! +//! Run: cargo run --release -p ruvector-filtered-bench --example teeth -- [N] [Q] [seed] + +use ruvector_acorn::graph::exact_filtered_knn; +use ruvector_filtered_bench::contenders::{recall, Acorn}; +use ruvector_filtered_bench::data::{Dataset, FEAT_100K}; +use ruvector_filtered_bench::predicate; + +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::path::Path; + +fn main() { + let args: Vec = std::env::args().collect(); + let n: usize = args.get(1).and_then(|s| s.parse().ok()).unwrap_or(20_000); + let q_count: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(200); + let seed: u64 = args.get(3).and_then(|s| s.parse().ok()).unwrap_or(7); + + if !Path::new(FEAT_100K).exists() { + eprintln!("data not extracted ({FEAT_100K}); see src/data.rs header. skipping."); + return; + } + + let k = 10; + let ef = 512; // tuned operating point (see acorn_tune: ~92% recall at sel=1%, n=20k) + let pool = 512; // post-filter retrieval pool == ef (generous; not a strawman k-only pool) + let gamma = 2; + + eprintln!("[teeth] loading arxiv slice n={n}…"); + let ds = Dataset::load_arxiv(n); + let n = ds.len(); + eprintln!("[teeth] building ACORN-γ (γ={gamma}, {} edges/node, ef={ef})…", 16 * gamma); + let t0 = std::time::Instant::now(); + let acorn = Acorn::build(&ds.feats, gamma, ef); + eprintln!("[teeth] graph built in {:.1}s", t0.elapsed().as_secs_f64()); + + let mut rng = StdRng::seed_from_u64(seed); + let queries: Vec = (0..q_count).map(|_| rng.gen_range(0..n)).collect(); + + println!("\n=== M1 teeth: post-filter collapse vs ACORN-agnostic (ρ=1, n={n}, k={k}, Q={q_count}) ==="); + println!( + "{:>7} {:>8} | {:>10} {:>10} | {:>11} {:>11}", + "sel", "#match", "B_recall", "C_recall", "B_evals", "C_evals" + ); + println!("{}", "-".repeat(66)); + + for &sel in &[0.001_f64, 0.005, 0.01, 0.05, 0.10, 0.30] { + let pred = predicate::correlated(&ds.labels, sel, 1.0, 0, &mut rng); + if pred.n_match < k { + println!("{sel:>7.3} {:>8} | (skipped: #match < k)", pred.n_match); + continue; + } + let pf = pred.as_fn(); + + let (mut b_rec, mut c_rec, mut b_ev, mut c_ev) = (0.0, 0.0, 0u64, 0u64); + for &qi in &queries { + let q = &ds.feats[qi]; + // Exclude the query's own id so the trivial self-match (distance 0) can't + // inflate either contender. + let truth: Vec = exact_filtered_knn(&ds.feats, q, k + 1, pf) + .into_iter() + .filter(|&id| id as usize != qi) + .take(k) + .collect(); + + let b = acorn.search(q, k, pf); + let c = acorn.postfilter(q, k, pool, pf); + let strip = |ids: Vec| ids.into_iter().filter(|&id| id as usize != qi).collect::>(); + + b_rec += recall(&truth, &strip(b.ids)); + c_rec += recall(&truth, &strip(c.ids)); + b_ev += b.evals; + c_ev += c.evals; + } + let nq = queries.len() as f64; + println!( + "{sel:>7.3} {:>8} | {:>9.1}% {:>9.1}% | {:>11} {:>11}", + pred.n_match, + 100.0 * b_rec / nq, + 100.0 * c_rec / nq, + b_ev / queries.len() as u64, + c_ev / queries.len() as u64, + ); + } + println!("\nExpected (teeth): C_recall falls sharply as sel→0 while B_recall stays high."); +} diff --git a/crates/ruvector-filtered-bench/src/contenders.rs b/crates/ruvector-filtered-bench/src/contenders.rs new file mode 100644 index 0000000000..58cf491d22 --- /dev/null +++ b/crates/ruvector-filtered-bench/src/contenders.rs @@ -0,0 +1,124 @@ +//! M1 — incumbents (B/D) and the post-filter floor (C), each reporting exact +//! distance-evals via the instrumented `ruvector-acorn` search. +//! +//! All three drive the **real** `AcornGraph` + `acorn_search_counted` — not a +//! re-implementation — so the head-to-head measures ACORN as shipped (protocol +//! rule #2). Contender A (region-pruned IVF) arrives in M2 (`prune` module). + +use ruvector_acorn::graph::AcornGraph; +use ruvector_acorn::search::acorn_search_counted; + +/// ACORN edge budget base (γ·M neighbors/node); matches `AcornIndexGamma::M`. +pub const ACORN_M: usize = 16; + +/// Outcome of one filtered query: the returned ids (nearest-first) and the exact +/// number of distance evaluations spent — the pre-registered primary cost metric. +pub struct QueryResult { + pub ids: Vec, + pub evals: u64, +} + +/// A real ACORN-γ graph. Drives **B** (predicate-agnostic search) and **C** (the +/// post-filter floor) off one graph, so the only variable between them is the +/// traversal policy — the cleanest demonstration that post-filter, not graph +/// density, is what collapses at low selectivity. +pub struct Acorn { + pub graph: AcornGraph, + pub gamma: usize, + pub ef: usize, +} + +impl Acorn { + /// Build the incumbent graph. `gamma` = 2 is `AcornIndexGamma`'s default + /// (32 edges/node); `gamma` = 3 is the "tune harder" variant (D's denser graph). + pub fn build(feats: &[Vec], gamma: usize, ef: usize) -> Self { + let graph = AcornGraph::build(feats.to_vec(), ACORN_M * gamma) + .expect("acorn graph build"); + Acorn { graph, gamma, ef } + } + + /// **Contender B** — ACORN predicate-agnostic search (expands all neighbors). + pub fn search(&self, query: &[f32], k: usize, predicate: impl Fn(u32) -> bool) -> QueryResult { + let (got, evals) = acorn_search_counted(&self.graph, query, k, self.ef, predicate); + QueryResult { ids: got.into_iter().map(|(id, _)| id).collect(), evals } + } + + /// **Contender C** — classic post-filter: retrieve the `pool` nearest neighbors + /// *ignoring* the predicate, then keep the first `k` that pass. At low + /// selectivity the unfiltered pool is almost all non-matching, so few (or zero) + /// survive → recall collapses. This is the floor ACORN was designed to beat; + /// reproducing the collapse proves the benchmark has teeth. + pub fn postfilter( + &self, + query: &[f32], + k: usize, + pool: usize, + predicate: impl Fn(u32) -> bool, + ) -> QueryResult { + let pool = pool.max(k); + // Unfiltered retrieval (predicate = always-true); cost is the search's evals. + let (cands, evals) = acorn_search_counted(&self.graph, query, pool, self.ef, |_| true); + let ids = cands + .into_iter() + .map(|(id, _)| id) + .filter(|&id| predicate(id)) + .take(k) + .collect(); + QueryResult { ids, evals } + } +} + +/// Recall@k against an exact filtered-kNN truth set: fraction of the true top-k +/// that the contender returned. `truth` may be shorter than k when matches < k. +pub fn recall(truth: &[u32], got: &[u32]) -> f64 { + if truth.is_empty() { + return 1.0; + } + let got_set: std::collections::HashSet = got.iter().copied().collect(); + let hit = truth.iter().filter(|id| got_set.contains(id)).count(); + hit as f64 / truth.len() as f64 +} + +#[cfg(test)] +mod tests { + use super::*; + use ruvector_acorn::graph::exact_filtered_knn; + + fn ramp(n: usize) -> Vec> { + (0..n).map(|i| vec![i as f32, (i % 7) as f32]).collect() + } + + #[test] + fn agnostic_beats_postfilter_when_selective() { + // A predicate matching only every 11th node (~9%) should let ACORN's + // agnostic search keep recall while post-filter (pool=k) starves. + let feats = ramp(600); + let acorn = Acorn::build(&feats, 2, 80); + let k = 5; + let pred = |id: u32| id.is_multiple_of(11); + + let (mut agn_hits, mut pf_hits, mut n) = (0.0, 0.0, 0.0); + for qi in (0..600).step_by(97) { + let truth = exact_filtered_knn(&feats, &feats[qi], k, pred); + let agn = acorn.search(&feats[qi], k, pred); + let pf = acorn.postfilter(&feats[qi], k, k, pred); // tight pool → starves + agn_hits += recall(&truth, &agn.ids); + pf_hits += recall(&truth, &pf.ids); + n += 1.0; + } + assert!( + agn_hits / n >= pf_hits / n, + "agnostic recall {:.2} should be >= post-filter recall {:.2}", + agn_hits / n, + pf_hits / n + ); + } + + #[test] + fn evals_are_recorded() { + let feats = ramp(300); + let acorn = Acorn::build(&feats, 2, 64); + let r = acorn.search(&feats[10], 5, |_| true); + assert!(r.evals > 0); + } +} diff --git a/crates/ruvector-filtered-bench/src/lib.rs b/crates/ruvector-filtered-bench/src/lib.rs index b3206e97aa..a9b2eb4ea2 100644 --- a/crates/ruvector-filtered-bench/src/lib.rs +++ b/crates/ruvector-filtered-bench/src/lib.rs @@ -13,6 +13,7 @@ //! - `predicate` (M0) — predicate families + ρ-correlation knob + selectivity targeting. //! - `prune` (M2) — contender A: region-pruned IVF filtered search + eval counters. +pub mod contenders; pub mod data; pub mod predicate; From 57b27b3c22bfcaaf45ce0b5bf52e0e645c263f00 Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 16:07:27 -0400 Subject: [PATCH 3/8] =?UTF-8?q?feat(filtered-bench):=20M2=20contender=20A?= =?UTF-8?q?=20=E2=80=94=20region-pruned=20IVF=20(exact=20B&B)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit src/prune.rs: RegionPruneIvf, built on ruvector-rairs k-means (ADR-193 substrate). Two stacked prunings realizing the salvaged SepRAG kernel on the treewidth-immune IVF hierarchy: 1. predicate pruning — skip clusters with zero matching members (the BET-2 win). 2. branch-and-bound distance pruning — triangle-inequality lower bound (dist(q,centroid) - radius); once the top-k heap is full, clusters whose LB exceeds the worst result are skipped. Probe in LB order so the bound lets us break, not just skip — a strict improvement over the M2-sketch's match-count ordering, and it yields EXACT filtered top-k. Cost metric = nclusters (routing) + matching members scanned; the O(1) predicate gates the expensive distance, so non-matching points cost nothing (the asymmetry vs ACORN, which evaluates a distance per expanded node regardless of predicate). max_probe knob: None = exact B&B (recall 1.0); Some(p) caps match-clusters probed (trades recall for fewer evals, mirroring ACORN's ef) for equal-recall comparison. Tests: exact_bb_matches_oracle (recall 1.0 vs exact_filtered_knn on 20 queries) and zero_match_clusters_are_skipped (1% selectivity → <1000 evals vs 4000 full scan). 8 unit + 1 integration green, clippy clean. --- crates/ruvector-filtered-bench/src/lib.rs | 1 + crates/ruvector-filtered-bench/src/prune.rs | 200 ++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 crates/ruvector-filtered-bench/src/prune.rs diff --git a/crates/ruvector-filtered-bench/src/lib.rs b/crates/ruvector-filtered-bench/src/lib.rs index a9b2eb4ea2..2e892f05f1 100644 --- a/crates/ruvector-filtered-bench/src/lib.rs +++ b/crates/ruvector-filtered-bench/src/lib.rs @@ -16,6 +16,7 @@ pub mod contenders; pub mod data; pub mod predicate; +pub mod prune; // Re-export the substrate + incumbent + oracle so the head-to-head harness has one entry // point and the dependency graph is exercised at build time. diff --git a/crates/ruvector-filtered-bench/src/prune.rs b/crates/ruvector-filtered-bench/src/prune.rs new file mode 100644 index 0000000000..78819a2462 --- /dev/null +++ b/crates/ruvector-filtered-bench/src/prune.rs @@ -0,0 +1,200 @@ +//! M2 — contender A: region-pruned IVF filtered search. +//! +//! Built on `ruvector-rairs` k-means (the ADR-193 IVF substrate). Two stacked prunings, +//! both realizing the salvaged SepRAG kernel on a treewidth-immune cluster hierarchy: +//! +//! 1. **Predicate pruning** — skip every cluster with zero predicate-matching members. +//! This is the BET-2 win: a correlated metadata filter concentrates matches in a few +//! clusters, so most of the corpus is never touched. +//! 2. **Branch-and-bound distance pruning** — by the triangle inequality, the nearest +//! possible point in cluster `c` is `dist(q, centroid_c) − radius_c`. Once the top-k +//! heap is full, clusters whose lower bound exceeds the current k-th distance cannot +//! improve the result and are skipped. With a valid lower bound this is **exact**. +//! +//! Cost (the pre-registered metric) = `#centroids routed (= nclusters)` + `#matching +//! members for which a distance was computed`. The O(1) predicate test gates the +//! expensive distance, so non-matching points cost nothing — the asymmetry vs ACORN +//! (which evaluates a distance per expanded node regardless of predicate). + +use ruvector_rairs::kmeans; + +use crate::contenders::QueryResult; + +#[inline] +fn l2_sq(a: &[f32], b: &[f32]) -> f32 { + a.iter().zip(b).map(|(x, y)| (x - y) * (x - y)).sum() +} + +/// Region-pruned IVF index (contender A). +pub struct RegionPruneIvf { + centroids: Vec>, + /// `members[c]` = node ids assigned to cluster `c`. + members: Vec>, + /// `radius[c]` = max **L2** distance (not squared) from centroid `c` to any member — + /// the triangle-inequality slack for the branch-and-bound lower bound. + radius: Vec, + pub nclusters: usize, +} + +impl RegionPruneIvf { + /// Partition `feats` into `nclusters` k-means cells (rairs clustering). + pub fn build(feats: &[Vec], nclusters: usize, max_iter: usize, seed: u64) -> Self { + let (centroids, assign) = kmeans::train(feats, nclusters, max_iter, seed); + let k = centroids.len(); + let mut members = vec![Vec::new(); k]; + for (id, &c) in assign.iter().enumerate() { + members[c].push(id as u32); + } + let radius = (0..k) + .map(|c| { + members[c] + .iter() + .map(|&id| l2_sq(¢roids[c], &feats[id as usize]).sqrt()) + .fold(0.0_f32, f32::max) + }) + .collect(); + RegionPruneIvf { centroids, members, radius, nclusters: k } + } + + /// Region-pruned filtered top-k search. + /// + /// `max_probe = None` runs exact branch-and-bound (recall 1.0); `Some(p)` caps the + /// number of *match-containing* clusters probed (the approximate knob that trades + /// recall for fewer distance-evals, mirroring ACORN's `ef`). + pub fn search( + &self, + feats: &[Vec], + query: &[f32], + k: usize, + predicate: impl Fn(u32) -> bool, + max_probe: Option, + ) -> QueryResult { + let mut evals = 0u64; + + // 1. Route: distance to every centroid (the fixed routing cost). + let mut clusters: Vec<(f32, usize)> = (0..self.nclusters) + .map(|c| { + evals += 1; + (l2_sq(query, &self.centroids[c]), c) + }) + .collect(); + + // Lower bound per cluster (squared L2): (max(0, sqrt(d_qc) - radius))^2. + // Sorting by LB lets us *break* (not just skip) once LB exceeds the worst result. + let lb_sq = |d_qc_sq: f32, c: usize| { + let lb = (d_qc_sq.sqrt() - self.radius[c]).max(0.0); + lb * lb + }; + clusters.sort_by(|&(da, ca), &(db, cb)| { + lb_sq(da, ca).total_cmp(&lb_sq(db, cb)) + }); + + // 2. Probe in lower-bound order, skipping zero-match clusters; B&B early-out. + // Max-heap on squared distance — peek = current worst of the top-k. + let mut heap: std::collections::BinaryHeap<(ordered::Of, u32)> = + std::collections::BinaryHeap::with_capacity(k + 1); + let mut probed = 0usize; + + for &(d_qc_sq, c) in &clusters { + // B&B: once the heap is full, no later cluster (sorted by LB) can help. + if heap.len() >= k { + if let Some(&(ordered::Of(worst), _)) = heap.peek() { + if lb_sq(d_qc_sq, c) >= worst { + break; + } + } + } + // Does this cluster contain any match? (cheap O(1) tests, not distance-evals) + let mut any = false; + for &id in &self.members[c] { + if !predicate(id) { + continue; + } + any = true; + let d = l2_sq(query, &feats[id as usize]); + evals += 1; + if heap.len() < k { + heap.push((ordered::Of(d), id)); + } else if let Some(&(ordered::Of(worst), _)) = heap.peek() { + if d < worst { + heap.pop(); + heap.push((ordered::Of(d), id)); + } + } + } + if any { + probed += 1; + if let Some(cap) = max_probe { + if probed >= cap { + break; + } + } + } + } + + let mut out: Vec<(u32, f32)> = + heap.into_iter().map(|(ordered::Of(d), id)| (id, d)).collect(); + out.sort_by(|a, b| a.1.total_cmp(&b.1)); + QueryResult { ids: out.into_iter().map(|(id, _)| id).collect(), evals } + } +} + +/// Minimal total-ordered f32 wrapper for the binary heap (NaN-free distances). +mod ordered { + #[derive(Clone, Copy, PartialEq)] + pub struct Of(pub f32); + impl Eq for Of {} + impl PartialOrd for Of { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + impl Ord for Of { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.total_cmp(&other.0) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ruvector_acorn::graph::exact_filtered_knn; + use rand::rngs::StdRng; + use rand::{Rng, SeedableRng}; + + fn gauss(n: usize, dim: usize, seed: u64) -> Vec> { + let mut rng = StdRng::seed_from_u64(seed); + (0..n) + .map(|_| (0..dim).map(|_| rng.gen_range(-1.0_f32..1.0)).collect()) + .collect() + } + + #[test] + fn exact_bb_matches_oracle() { + // max_probe = None must return the exact filtered top-k (recall 1.0). + let feats = gauss(2000, 16, 1); + let idx = RegionPruneIvf::build(&feats, 48, 10, 7); + let k = 10; + let pred = |id: u32| id.is_multiple_of(4); + let mut rng = StdRng::seed_from_u64(99); + for _ in 0..20 { + let qi = rng.gen_range(0..feats.len()); + let truth = exact_filtered_knn(&feats, &feats[qi], k, pred); + let got = idx.search(&feats, &feats[qi], k, pred, None); + assert_eq!(got.ids, truth, "exact B&B must equal the oracle"); + } + } + + #[test] + fn zero_match_clusters_are_skipped() { + // A predicate matching a tiny fraction must cost far fewer evals than scanning all. + let feats = gauss(4000, 16, 2); + let idx = RegionPruneIvf::build(&feats, 64, 10, 7); + let pred = |id: u32| id < 40; // 1% selectivity + let r = idx.search(&feats, &feats[0], 10, pred, None); + // evals = nclusters routing + matches scanned; must be << full scan (4000). + assert!(r.evals < 1000, "pruning failed: {} evals", r.evals); + assert!(r.evals >= idx.nclusters as u64, "must at least route to all centroids"); + } +} From 48e994c0c47492b6db7fddcee3edd325de7b379c Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 16:13:48 -0400 Subject: [PATCH 4/8] =?UTF-8?q?feat(filtered-bench):=20M3=20sweep=20+=20ga?= =?UTF-8?q?te=20eval=20=E2=80=94=20partial=20WIN=20(boundary=20at=20sel=0.7: region-pruned IVF beats tuned ACORN by 6.1-48x evals and 4.7-26x wall-clock at equal-or-better recall (A's exact B&B recall >= ACORN). e.g. rho=1 sel=1%: ACORN 92.6%@1622 evals vs A 99.9%@264 evals = 6.1x (4.7x wall). - MISS at sel=5%: best 1.5x (gate wanted >=2x). The win is a low-selectivity (<=1%) phenomenon — the dominant production metadata-filter regime, but a real boundary, not the full pre-registered claim. - Mechanism partly refuted: A also wins at rho=0 (low sel), so the eval advantage is selectivity-driven (few matches -> cheap exact B&B) more than correlation- driven; correlation governs recall, not cost. Reported, not buried. - rho=0 kill control: A does NOT collapse (recall-safe); high-sel (>=10%) A loses as expected (ACORN's regime). Wall-clock guard: PASS (win survives the clock). nclusters is A's tuning knob (parallel to ACORN's ef): 64 beats 128 in the win regime (cheaper routing); both confirm the same boundary. --- .../ruvector-filtered-bench/examples/sweep.rs | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 crates/ruvector-filtered-bench/examples/sweep.rs diff --git a/crates/ruvector-filtered-bench/examples/sweep.rs b/crates/ruvector-filtered-bench/examples/sweep.rs new file mode 100644 index 0000000000..8a8ebe5d28 --- /dev/null +++ b/crates/ruvector-filtered-bench/examples/sweep.rs @@ -0,0 +1,159 @@ +//! M3 — the frozen-gate run: tuned ACORN vs region-pruned IVF over a selectivity × ρ grid. +//! +//! Compares **cost at matched recall** (the honest framing): contender A's exact B&B has +//! recall 1.0 ≥ ACORN, so we tune A's probe cap down until its recall ≈ ACORN's, then +//! compare distance-evals/query. Reports the ratio against the pre-registered gate +//! (≥5× at sel≤1%, ≥2× at sel=5%, ρ≥0.7), the ρ=0 kill control, and wall-clock (the +//! honesty guard — a distance-eval win that reverses on wall-clock is "inconclusive"). +//! +//! Run: cargo run --release -p ruvector-filtered-bench --example sweep -- [N] [Q] [nclusters] [ef] [seed] + +use ruvector_acorn::graph::exact_filtered_knn; +use ruvector_acorn::search::acorn_search_counted; +use ruvector_filtered_bench::contenders::{recall, Acorn}; +use ruvector_filtered_bench::data::{Dataset, FEAT_100K}; +use ruvector_filtered_bench::predicate; +use ruvector_filtered_bench::prune::RegionPruneIvf; + +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::path::Path; +use std::time::Instant; + +const K: usize = 10; +const GATE: f64 = 0.02; // recall match tolerance + +fn main() { + let a: Vec = std::env::args().collect(); + let n: usize = a.get(1).and_then(|s| s.parse().ok()).unwrap_or(20_000); + let q_count: usize = a.get(2).and_then(|s| s.parse().ok()).unwrap_or(200); + let nclusters: usize = a.get(3).and_then(|s| s.parse().ok()).unwrap_or(128); + let ef: usize = a.get(4).and_then(|s| s.parse().ok()).unwrap_or(512); + let seed: u64 = a.get(5).and_then(|s| s.parse().ok()).unwrap_or(7); + + if !Path::new(FEAT_100K).exists() { + eprintln!("data not extracted ({FEAT_100K}); skipping."); + return; + } + + let ds = Dataset::load_arxiv(n); + let n = ds.len(); + eprintln!("[sweep] n={n} Q={q_count} nclusters={nclusters} ef={ef}"); + eprintln!("[sweep] building ACORN-γ2 + region-prune IVF…"); + let t = Instant::now(); + let acorn = Acorn::build(&ds.feats, 2, ef); + let ivf = RegionPruneIvf::build(&ds.feats, nclusters, 15, seed); + eprintln!("[sweep] built in {:.1}s (ivf nclusters={})", t.elapsed().as_secs_f64(), ivf.nclusters); + + let mut rng = StdRng::seed_from_u64(seed); + let queries: Vec = (0..q_count).map(|_| rng.gen_range(0..n)).collect(); + + let sels = [0.001_f64, 0.005, 0.01, 0.05, 0.10, 0.30]; + let rhos = [0.0_f64, 0.3, 0.5, 0.7, 1.0]; + let probe_caps = [1usize, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128]; + + println!("\n=== M3 sweep (n={n}, k={K}, ACORN γ2 ef={ef}, IVF nclusters={}) ===", ivf.nclusters); + println!( + "{:>4} {:>6} {:>7} | {:>7} {:>8} | {:>7} {:>8} | {:>7} {:>8} {:>6} {:>6} | verdict", + "ρ", "sel", "#match", "B_rec", "B_evals", "Aex_rec", "Aex_ev", "Am_rec", "Am_evals", "ev×", "wc×" + ); + println!("{}", "-".repeat(104)); + + for &rho in &rhos { + for &sel in &sels { + let pred = predicate::correlated(&ds.labels, sel, rho, 0, &mut rng); + if pred.n_match < K { + continue; + } + let pf = pred.as_fn(); + + // Truth per query (exclude self to avoid trivial distance-0 inflation). + let truths: Vec> = queries + .iter() + .map(|&qi| { + exact_filtered_knn(&ds.feats, &ds.feats[qi], K + 1, pf) + .into_iter() + .filter(|&id| id as usize != qi) + .take(K) + .collect() + }) + .collect(); + + // ACORN (B). + let (b_rec, b_ev, b_ms) = measure(&queries, &truths, |qi| { + let (got, ev) = acorn_search_counted(&acorn.graph, &ds.feats[qi], K, ef, pf); + (got.into_iter().map(|(id, _)| id).collect(), ev) + }); + + // A exact (B&B, recall ~1.0). + let (aex_rec, aex_ev, _) = measure(&queries, &truths, |qi| { + let r = ivf.search(&ds.feats, &ds.feats[qi], K, pf, None); + (r.ids, r.evals) + }); + + // A matched: smallest probe cap with recall >= b_rec - GATE. + let mut am_rec = aex_rec; + let mut am_ev = aex_ev; + let mut am_ms = 0.0; + for &cap in &probe_caps { + let (r, ev, ms) = measure(&queries, &truths, |qi| { + let res = ivf.search(&ds.feats, &ds.feats[qi], K, pf, Some(cap)); + (res.ids, res.evals) + }); + if r >= b_rec - GATE { + am_rec = r; + am_ev = ev; + am_ms = ms; + break; + } + } + + let ratio = if am_ev > 0 { b_ev as f64 / am_ev as f64 } else { 0.0 }; + // Wall-clock honesty guard: a distance-eval win that reverses on the clock is + // not a real win. wc_ratio > 1 means A is also faster in wall time. + let wc_ratio = if am_ms > 0.0 { b_ms / am_ms } else { 0.0 }; + let target = if sel <= 0.01 { 5.0 } else if sel <= 0.05 { 2.0 } else { 0.0 }; + let verdict = if rho >= 0.7 && target > 0.0 { + if ratio >= target { "WIN" } else { "miss" } + } else if rho <= 0.3 { + // graceful-degradation guard: A must not lose by >1.5x + if ratio >= 1.0 / 1.5 { "ok(ctrl)" } else { "DEGRADE" } + } else { + "—" + }; + + println!( + "{rho:>4.1} {sel:>6.3} {:>7} | {:>6.1}% {:>8} | {:>6.1}% {:>8} | {:>6.1}% {:>8} {:>5.1}× {:>5.1}× | {verdict}", + pred.n_match, + 100.0 * b_rec, + b_ev, + 100.0 * aex_rec, + aex_ev, + 100.0 * am_rec, + am_ev, + ratio, + wc_ratio, + ); + } + println!(); + } +} + +/// Mean recall, mean distance-evals, mean wall-clock(µs) over the query set. +fn measure( + queries: &[usize], + truths: &[Vec], + mut run: impl FnMut(usize) -> (Vec, u64), +) -> (f64, u64, f64) { + let mut rec = 0.0; + let mut ev = 0u64; + let t = Instant::now(); + for (&qi, truth) in queries.iter().zip(truths) { + let (ids, e) = run(qi); + let ids: Vec = ids.into_iter().filter(|&id| id as usize != qi).collect(); + rec += recall(truth, &ids); + ev += e; + } + let nq = queries.len() as f64; + (rec / nq, ev / queries.len() as u64, t.elapsed().as_secs_f64() * 1e6 / nq) +} From 43d62c07809a051026886cbae84810c54e06580f Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 16:34:44 -0400 Subject: [PATCH 5/8] =?UTF-8?q?feat(filtered-bench):=20M3=20adversarial=20?= =?UTF-8?q?check=20(contender=20D)=20=E2=80=94=20win=20largely=20fails=20t?= =?UTF-8?q?he=20gate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds predicate-aware-entry ACORN (the rule-#5 "tune harder" adversary): - ruvector-acorn: acorn_search_seeded_counted (beam starts from caller seeds instead of multi-probe entry); acorn_search_impl refactored to take Option, existing fns pass None — 13 acorn tests still green (behavior preserved). - contenders.rs: Acorn::search_predicate_entry — stride-sample probes, predicate-test free, distance-eval only matching probes, seed the beam from the nearest matches. - examples/adversarial.rs: A vs best-of(vanilla-B, predicate-entry-D) at matched recall. FINDING (rule #5 changed the verdict): predicate-aware entry slashes ACORN's cost at HIGH correlation (rho=1 sel=0.1%: 3753 -> 203 evals), collapsing A's advantage from 44.7x (vs vanilla) to 2.4x — BELOW the pre-registered 5x bar. A vs best ACORN: rho=1.0: 2.4x / 2.3x / 1.9x (sel .001/.005/.01) — MISS at the 5x bar. rho=0.7: 38.8x / 14.6x / 6.5x — WIN (D's seeding is weak at moderate correlation, where matches are scattered so a seeded walk still wanders). So A and predicate-entry-ACORN exploit the SAME structure and converge (~2x) at high correlation; A's clean win is NOT robust to a properly-tuned ACORN. Honest verdict: largely a KILL at the pre-registered bar, with a narrower conditional edge at rho~0.7. Caveat favoring A: D's seeding leans on ~16k "free" predicate tests (the eval metric ignores the O(1) predicate scan); at scale that scan isn't free, restoring some edge. --- crates/ruvector-acorn/src/search.rs | 80 +++++++--- .../examples/adversarial.rs | 144 ++++++++++++++++++ .../ruvector-filtered-bench/src/contenders.rs | 49 +++++- 3 files changed, 247 insertions(+), 26 deletions(-) create mode 100644 crates/ruvector-filtered-bench/examples/adversarial.rs diff --git a/crates/ruvector-acorn/src/search.rs b/crates/ruvector-acorn/src/search.rs index 1983600cc2..64e2d8879c 100644 --- a/crates/ruvector-acorn/src/search.rs +++ b/crates/ruvector-acorn/src/search.rs @@ -35,7 +35,7 @@ pub fn acorn_search( predicate: impl Fn(u32) -> bool, ) -> Vec<(u32, f32)> { let mut evals = 0u64; - acorn_search_impl(graph, query, k, ef, predicate, &mut evals) + acorn_search_impl(graph, query, k, ef, predicate, &mut evals, None) } /// Like [`acorn_search`] but also returns the exact number of distance @@ -50,7 +50,24 @@ pub fn acorn_search_counted( predicate: impl Fn(u32) -> bool, ) -> (Vec<(u32, f32)>, u64) { let mut evals = 0u64; - let out = acorn_search_impl(graph, query, k, ef, predicate, &mut evals); + let out = acorn_search_impl(graph, query, k, ef, predicate, &mut evals, None); + (out, evals) +} + +/// ACORN search seeded from caller-supplied entry nodes instead of the default +/// multi-probe entry — the substrate for contender D (predicate-aware entry). The beam +/// starts from `seeds` (each costs one distance-eval, counted); everything else is the +/// identical predicate-agnostic traversal. Returns results + exact eval count. +pub fn acorn_search_seeded_counted( + graph: &AcornGraph, + query: &[f32], + k: usize, + ef: usize, + predicate: impl Fn(u32) -> bool, + seeds: &[u32], +) -> (Vec<(u32, f32)>, u64) { + let mut evals = 0u64; + let out = acorn_search_impl(graph, query, k, ef, predicate, &mut evals, Some(seeds)); (out, evals) } @@ -61,6 +78,7 @@ fn acorn_search_impl( ef: usize, predicate: impl Fn(u32) -> bool, evals: &mut u64, + seeds: Option<&[u32]>, ) -> Vec<(u32, f32)> { if graph.is_empty() { return vec![]; @@ -68,24 +86,6 @@ fn acorn_search_impl( let n = graph.len(); let ef = ef.max(k); - // Multi-probe entry: sample evenly-spaced nodes to find a good starting - // point. O(probes × D) overhead vs O(n × D) for flat — negligible. - let n_probes = (n as f64).sqrt().ceil() as usize; - let n_probes = n_probes.clamp(4, 64); - // Count each probe distance once (result-identical to the min_by form, which - // recomputed l2_sq inside the comparator — the count reflects fundamental work). - let mut entry = 0u32; - let mut best = f32::INFINITY; - for i in 0..n_probes { - let cand = (i * n / n_probes) as u32; - let d = l2_sq(query, graph.row(cand as usize)); - *evals += 1; - if d < best { - best = d; - entry = cand; - } - } - let mut visited: Vec = vec![false; n]; // Min-heap by distance — pop closest unexplored candidate first. let mut candidates: BinaryHeap> = BinaryHeap::with_capacity(ef + 1); @@ -95,11 +95,41 @@ fn acorn_search_impl( // candidate, used to gate eviction when the frontier exceeds ef. let mut farthest_in_beam: BinaryHeap = BinaryHeap::with_capacity(ef + 1); - let d0 = l2_sq(query, graph.row(entry as usize)); - *evals += 1; - candidates.push(Reverse((OrdF32(d0), entry))); - farthest_in_beam.push(OrdF32(d0)); - visited[entry as usize] = true; + // Initial frontier: caller-supplied predicate-aware seeds (contender D), else the + // standard multi-probe entry. Multi-probe distances are counted once (result-identical + // to the original min_by form, which recomputed l2_sq inside the comparator). + let seed_ids: Vec = match seeds { + Some(s) if !s.is_empty() => s.iter().copied().filter(|&id| (id as usize) < n).collect(), + _ => { + let n_probes = (n as f64).sqrt().ceil() as usize; + let n_probes = n_probes.clamp(4, 64); + let mut entry = 0u32; + let mut best = f32::INFINITY; + for i in 0..n_probes { + let cand = (i * n / n_probes) as u32; + let d = l2_sq(query, graph.row(cand as usize)); + *evals += 1; + if d < best { + best = d; + entry = cand; + } + } + vec![entry] + } + }; + for &s in &seed_ids { + if visited[s as usize] { + continue; + } + let d = l2_sq(query, graph.row(s as usize)); + *evals += 1; + candidates.push(Reverse((OrdF32(d), s))); + farthest_in_beam.push(OrdF32(d)); + visited[s as usize] = true; + } + if candidates.is_empty() { + return vec![]; + } while let Some(Reverse((OrdF32(curr_d), curr))) = candidates.pop() { // Pop curr's mirror entry from the farthest-tracker. Since the two diff --git a/crates/ruvector-filtered-bench/examples/adversarial.rs b/crates/ruvector-filtered-bench/examples/adversarial.rs new file mode 100644 index 0000000000..b362391fff --- /dev/null +++ b/crates/ruvector-filtered-bench/examples/adversarial.rs @@ -0,0 +1,144 @@ +//! M3 adversarial check (protocol rule #5) — does predicate-aware-entry ACORN (contender D) +//! erase region-pruning's win in its own regime? +//! +//! For the win cells (ρ≥0.7, sel≤5%) it reports vanilla ACORN (B), the best +//! predicate-aware-entry ACORN (D, over a probe-budget sweep, tuned to match B's recall at +//! fewest evals), and contender A matched to the same recall. The headline ratio is A vs the +//! **cheaper** ACORN variant — so the win must survive the strongest ACORN we can build. +//! +//! Run: cargo run --release -p ruvector-filtered-bench --example adversarial -- [N] [Q] [nclusters] + +use ruvector_acorn::graph::exact_filtered_knn; +use ruvector_acorn::search::acorn_search_counted; +use ruvector_filtered_bench::contenders::{recall, Acorn}; +use ruvector_filtered_bench::data::{Dataset, FEAT_100K}; +use ruvector_filtered_bench::predicate; +use ruvector_filtered_bench::prune::RegionPruneIvf; + +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::path::Path; + +const K: usize = 10; +const GATE: f64 = 0.02; + +fn main() { + let a: Vec = std::env::args().collect(); + let n: usize = a.get(1).and_then(|s| s.parse().ok()).unwrap_or(20_000); + let q_count: usize = a.get(2).and_then(|s| s.parse().ok()).unwrap_or(200); + let nclusters: usize = a.get(3).and_then(|s| s.parse().ok()).unwrap_or(64); + let ef = 512; + let seed = 7; + + if !Path::new(FEAT_100K).exists() { + eprintln!("data not extracted ({FEAT_100K}); skipping."); + return; + } + + let ds = Dataset::load_arxiv(n); + let n = ds.len(); + let acorn = Acorn::build(&ds.feats, 2, ef); + let ivf = RegionPruneIvf::build(&ds.feats, nclusters, 15, seed); + let mut rng = StdRng::seed_from_u64(seed); + let queries: Vec = (0..q_count).map(|_| rng.gen_range(0..n)).collect(); + + let probe_budgets = [256usize, 1024, 4096, 16384]; + let a_caps = [1usize, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128]; + + println!("\n=== M3 adversarial: A vs best-of(vanilla-B, predicate-entry-D) (n={n}, nclusters={}) ===", ivf.nclusters); + println!( + "{:>4} {:>6} | {:>7} {:>7} | {:>7} {:>7} {:>7} | {:>7} {:>7} | {:>6} {:>6} | verdict", + "ρ", "sel", "B_rec", "B_ev", "D_rec", "D_ev", "D_pb", "Am_rec", "Am_ev", "vsB", "vsBest" + ); + println!("{}", "-".repeat(100)); + + for &rho in &[0.7_f64, 1.0] { + for &sel in &[0.001_f64, 0.005, 0.01, 0.05] { + let pred = predicate::correlated(&ds.labels, sel, rho, 0, &mut rng); + if pred.n_match < K { + continue; + } + let pf = pred.as_fn(); + let truths: Vec> = queries + .iter() + .map(|&qi| { + exact_filtered_knn(&ds.feats, &ds.feats[qi], K + 1, pf) + .into_iter() + .filter(|&id| id as usize != qi) + .take(K) + .collect() + }) + .collect(); + + // B — vanilla ACORN. + let (b_rec, b_ev) = mean(&queries, &truths, |qi| { + let (g, e) = acorn_search_counted(&acorn.graph, &ds.feats[qi], K, ef, pf); + (g.into_iter().map(|(id, _)| id).collect(), e) + }); + + // D — predicate-aware entry; pick cheapest probe budget reaching B's recall. + let (mut d_rec, mut d_ev, mut d_pb) = (0.0, u64::MAX, 0usize); + for &pb in &probe_budgets { + let (r, e) = mean(&queries, &truths, |qi| { + let res = acorn.search_predicate_entry(&ds.feats[qi], K, pf, pb, 4); + (res.ids, res.evals) + }); + if r >= b_rec - GATE && e < d_ev { + d_rec = r; + d_ev = e; + d_pb = pb; + } + } + if d_ev == u64::MAX { + // none matched B's recall; report the highest-budget point. + let (r, e) = mean(&queries, &truths, |qi| { + let res = acorn.search_predicate_entry(&ds.feats[qi], K, pf, 16384, 4); + (res.ids, res.evals) + }); + d_rec = r; + d_ev = e; + d_pb = 16384; + } + + // A — matched to vanilla B's recall. + let (mut am_rec, mut am_ev) = (1.0, u64::MAX); + for &cap in &a_caps { + let (r, e) = mean(&queries, &truths, |qi| { + let res = ivf.search(&ds.feats, &ds.feats[qi], K, pf, Some(cap)); + (res.ids, res.evals) + }); + if r >= b_rec - GATE { + am_rec = r; + am_ev = e; + break; + } + } + + let best_acorn = b_ev.min(d_ev); + let vs_b = b_ev as f64 / am_ev as f64; + let vs_best = best_acorn as f64 / am_ev as f64; + let target = if sel <= 0.01 { 5.0 } else { 2.0 }; + let verdict = if vs_best >= target { "WIN" } else { "miss" }; + + println!( + "{rho:>4.1} {sel:>6.3} | {:>6.1}% {:>7} | {:>6.1}% {:>7} {:>7} | {:>6.1}% {:>7} | {:>5.1}× {:>5.1}× | {verdict}", + 100.0 * b_rec, b_ev, + 100.0 * d_rec, d_ev, d_pb, + 100.0 * am_rec, am_ev, + vs_b, vs_best, + ); + } + } +} + +fn mean(queries: &[usize], truths: &[Vec], mut run: impl FnMut(usize) -> (Vec, u64)) -> (f64, u64) { + let mut rec = 0.0; + let mut ev = 0u64; + for (&qi, truth) in queries.iter().zip(truths) { + let (ids, e) = run(qi); + let ids: Vec = ids.into_iter().filter(|&id| id as usize != qi).collect(); + rec += recall(truth, &ids); + ev += e; + } + (rec / queries.len() as f64, ev / queries.len() as u64) +} diff --git a/crates/ruvector-filtered-bench/src/contenders.rs b/crates/ruvector-filtered-bench/src/contenders.rs index 58cf491d22..b76daa43ee 100644 --- a/crates/ruvector-filtered-bench/src/contenders.rs +++ b/crates/ruvector-filtered-bench/src/contenders.rs @@ -6,11 +6,16 @@ //! rule #2). Contender A (region-pruned IVF) arrives in M2 (`prune` module). use ruvector_acorn::graph::AcornGraph; -use ruvector_acorn::search::acorn_search_counted; +use ruvector_acorn::search::{acorn_search_counted, acorn_search_seeded_counted}; /// ACORN edge budget base (γ·M neighbors/node); matches `AcornIndexGamma::M`. pub const ACORN_M: usize = 16; +#[inline] +fn l2_sq(a: &[f32], b: &[f32]) -> f32 { + a.iter().zip(b).map(|(x, y)| (x - y) * (x - y)).sum() +} + /// Outcome of one filtered query: the returned ids (nearest-first) and the exact /// number of distance evaluations spent — the pre-registered primary cost metric. pub struct QueryResult { @@ -66,6 +71,48 @@ impl Acorn { .collect(); QueryResult { ids, evals } } + + /// **Contender D** — ACORN with *predicate-aware entry* (the adversarial "tune harder" + /// variant, rule #5). Stride-samples `max_entry_probes` nodes, tests the predicate on + /// each (O(1), uncounted — symmetric with how contender A gates distances), and + /// distance-evaluates only the *matching* probes to pick the `n_seeds` nearest matching + /// seeds. The agnostic beam then starts inside the matching region instead of walking to + /// it from a random entry. Falls back to standard ACORN if the sample finds no match. + /// + /// Cost = (matching probes distance-evaluated) + seeded-search evals. At very low + /// selectivity a bounded sample usually finds no match → D degenerates to B. + pub fn search_predicate_entry( + &self, + query: &[f32], + k: usize, + predicate: impl Fn(u32) -> bool, + max_entry_probes: usize, + n_seeds: usize, + ) -> QueryResult { + let n = self.graph.len(); + let probes = max_entry_probes.clamp(1, n); + let mut evals = 0u64; + let mut seeds: Vec<(f32, u32)> = Vec::new(); + for i in 0..probes { + let id = (i * n / probes) as u32; + if !predicate(id) { + continue; + } + let d = l2_sq(query, self.graph.row(id as usize)); + evals += 1; + seeds.push((d, id)); + } + if seeds.is_empty() { + // No matching seed in the sample → standard ACORN entry. + let (got, ev) = acorn_search_counted(&self.graph, query, k, self.ef, predicate); + return QueryResult { ids: got.into_iter().map(|(id, _)| id).collect(), evals: evals + ev }; + } + seeds.sort_by(|a, b| a.0.total_cmp(&b.0)); + seeds.truncate(n_seeds.max(1)); + let seed_ids: Vec = seeds.iter().map(|&(_, id)| id).collect(); + let (got, ev) = acorn_search_seeded_counted(&self.graph, query, k, self.ef, predicate, &seed_ids); + QueryResult { ids: got.into_iter().map(|(id, _)| id).collect(), evals: evals + ev } + } } /// Recall@k against an exact filtered-kNN truth set: fraction of the true top-k From 5523706553c384704a31d2458f04e8066fd9103e Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 16:37:28 -0400 Subject: [PATCH 6/8] =?UTF-8?q?docs(adr):=20ADR-201=20=E2=80=94=20region-p?= =?UTF-8?q?runed=20filtered=20ANN=20vs=20ACORN,=20qualified=20NO-GO=20(M4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Writes up the BET 2 ⊗ BET 4 outcome with ADR-199/200 honesty. Verdict: region-pruned IVF beats VANILLA ACORN 6-48x evals (4.7-26x wall-clock) at sel<=1%, but the pre-registered >=5x WIN does NOT survive the rule-#5 adversarial check — giving ACORN a predicate-aware entry collapses the gap to ~2x at high correlation (rho=1), below the bar. A retains a narrow conditional edge at moderate correlation (rho~0.7, 6-39x) plus an at-scale caveat (D's seeding leans on a ~full predicate scan the eval metric treats as free). Net: the bet does not cleanly pay; the clean win was an artifact of an under-equipped incumbent. Central lesson: a filtered-ANN cost claim is meaningless without a predicate-aware-entry baseline. Also strips a stray tag from the pre-registration doc (non-semantic). --- ...201-region-pruned-filtered-ann-vs-acorn.md | 156 ++++++++++++++++++ .../bet2-filtered-ann/PRE-REGISTRATION.md | 1 - 2 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md diff --git a/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md b/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md new file mode 100644 index 0000000000..5f29465e38 --- /dev/null +++ b/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md @@ -0,0 +1,156 @@ +--- +adr: 201 +title: "Region-Pruned IVF for Filtered ANN vs ACORN: Qualified NO-GO" +status: proposed +date: 2026-06-04 +authors: [ofershaal, claude-flow] +related: [ADR-193, ADR-196, ADR-199, ADR-200] +tags: [ruvector, retrieval, ann, filtered-search, acorn, ivf, region-pruning, no-go] +--- + +# ADR-201 — Region-Pruned IVF for Filtered ANN vs ACORN: Qualified NO-GO + +## Status + +**Proposed — qualified NO-GO at the pre-registered bar (2026-06-04).** BET 2 ⊗ BET 4 of the +SepRAG exploration (issue #534): does region-pruned IVF search beat the in-repo `ruvector-acorn` +incumbent on *correlated* filtered queries? Pre-registration: +[`docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md`](../plans/bet2-filtered-ann/PRE-REGISTRATION.md). + +Region-pruning beats *vanilla* ACORN by 6–48× distance-evals (and 4.7–26× wall-clock) at +selectivity ≤ 1%. **But the pre-registered ≥5× WIN does not survive the mandatory adversarial +check (protocol rule #5):** giving ACORN a *predicate-aware entry* — a simple, known enhancement +— collapses the advantage to **~2× at high correlation (ρ=1), below the 5× bar.** A retains a +real but **narrow, conditional** edge at *moderate* correlation (ρ≈0.7, 6–39×) and very low +selectivity, plus an at-scale metric caveat that favours it. Net: the bet **does not cleanly +pay**; the clean win was an artifact of an under-equipped incumbent. + +## Context + +Filtered ANN ("nearest among items matching predicate X") is a real flat-ANN weakness: a +post-filter graph walk starves at low selectivity. `ruvector-acorn` (SIGMOD 2024, +arXiv:2403.04871) fixes this with a denser γ·M graph + predicate-agnostic traversal, and is the +strong in-repo incumbent. The hypothesis (BET 2 ⊗ BET 4): when the predicate **correlates** with +embedding-cluster structure (the production metadata-filter case — `tenant`, `doc_type`, `year`, +`category`), an IVF hierarchy can **skip whole clusters with zero matches** and beat ACORN on +cost. On embeddings the pruning kernel cannot use graph separators (high treewidth, [ADR-199]), +so the substrate is the treewidth-immune IVF hierarchy (`ruvector-rairs`, [ADR-193]) — BET 4 is +the mechanism, BET 2 the benchmark. + +## Method + +Self-contained crate `ruvector-filtered-bench` (depends only on `ruvector-acorn` + +`ruvector-rairs`; independent of [ADR-200]/PR #535). Real ogbn-arxiv (n=20k slice, 128-d, 40 +subject labels). Ground truth = `ruvector-acorn::exact_filtered_knn`. Cost = **distance-evals/ +query** (hardware-independent), with wall-clock as an honesty guard. Predicates built by a +ρ-correlation knob holding selectivity *exactly* constant across ρ (shuffle a fraction 1−ρ of a +structured label-class set), so cost deltas are attributable to correlation, not set size. + +Contenders, all scored against the same oracle, all reporting **exact** distance-evals (ACORN +was instrumented with additive, result-preserving `*_counted` search variants): +- **A** — region-pruned IVF (`prune::RegionPruneIvf`): k-means partition + two stacked prunings + — skip zero-match clusters (predicate) and a triangle-inequality branch-and-bound on cluster + radius (exact). The salvaged separator-tree B&B kernel ([ADR-196]) on the IVF hierarchy. +- **B** — tuned vanilla ACORN (γ=2, ef swept; ef=512 ≈ 92% recall at sel=1%). +- **C** — post-filter floor (retrieve top-pool unfiltered, then filter). +- **D** — ACORN with predicate-aware entry (the rule-#5 "tune harder" adversary): sample probes, + predicate-test free, distance-eval only matching probes, seed the beam from the nearest match. + +## Evidence + +### The benchmark has teeth (negative control, M1) + +Post-filter (C) vs agnostic ACORN (B) on the *same* graph, ρ=1, recall@10: + +| sel | B (agnostic) | C (post-filter) | +|---|---|---| +| 0.1% | 73.7% | **22.7%** | +| 0.5% | 90.4% | **59.7%** | +| 1% | 92.6% | 79.3% | +| ≥5% | (converge) | (fine) | + +A 50+ point swing at low selectivity → the benchmark can distinguish methods (it is not +insensitive). Tuned ACORN reaches ~92.6% recall @ ~1622 evals/query at sel=1%; its eval count is +~flat in ef (early-termination-bound), so "tuned" = crank ef for recall at near-constant cost. + +### A vs vanilla ACORN — large win (M3 sweep, nclusters=64, cost at matched recall) + +| ρ | sel | ACORN-B evals | A evals | ev-ratio | wall-clock ratio | +|---|---|---|---|---|---| +| 1.0 | 0.1% | 3753 | 145 | 25.9× | 22.5× | +| 1.0 | 0.5% | 2152 | 164 | 13.1× | 8.3× | +| 1.0 | 1% | 1622 | 264 | 6.1× | 4.7× | +| 1.0 | 5% | 955 | 628 | 1.5× | 1.6× | +| 0.7 | 1% | 1710 | 189 | 9.0× | 6.4× | + +A's exact B&B has recall ≥ ACORN (≈1.0). Win is monotonic in selectivity and **selectivity- +driven** (it also holds at ρ=0 in the sparse regime — partially refuting the pre-registered +*correlation* mechanism: correlation governs recall quality, not the eval win). sel=5% already +misses the ≥2× sub-bar. + +### A vs **predicate-aware-entry** ACORN — the win collapses (M3 adversarial, rule #5) + +| ρ | sel | vanilla B | **tuned D** | A | A vs **best ACORN** | +|---|---|---|---|---|---| +| 1.0 | 0.1% | 3753 | **203** | 84 | **2.4× — MISS** | +| 1.0 | 0.5% | 2152 | **377** | 164 | **2.3× — MISS** | +| 1.0 | 1% | 1622 | **508** | 264 | **1.9× — MISS** | +| 0.7 | 0.1% | 4009 | 3100 | 80 | 38.8× — WIN | +| 0.7 | 1% | 1769 | 1388 | 214 | 6.5× — WIN | + +**Predicate-aware entry cuts ACORN's cost up to ~18× at high correlation** (3753→203 evals), +because seeding the beam at any matching node lands it inside the tight match cluster, finishing +in a few hops. A and D then exploit the *same* structure and converge to within ~2×. The win +**inverts with correlation**: A beats D decisively (6–39×) only at *moderate* ρ=0.7, where D's +sampled seed often lands on a scattered random match and the walk still wanders. + +## Decision / Finding + +**Qualified NO-GO at the pre-registered ≥5× bar.** Region-pruned IVF does *not* cleanly beat a +properly-tuned ACORN. The headline 6–48× win is against *vanilla* ACORN; once ACORN is given a +predicate-aware entry (a simple, standard enhancement), the gap at high correlation falls to +~2×, below the bar. The pre-registered WIN required ≥5× at sel≤1% for ρ≥0.7 — met at ρ=0.7, +**failed at ρ=1.0** — so the conjunction does not hold. + +What *did* hold, honestly: +- A's **exact** recall (1.0) dominates ACORN's ~92% — a quality, not cost, advantage. +- A retains a **6–39× cost edge at moderate correlation (ρ≈0.7) and sel≤1%**, where ACORN's + predicate-aware seeding is ineffective. +- **At-scale caveat (favours A):** D's seeding leans on predicate-testing ~16k nodes that the + distance-eval metric counts as free (O(1) predicate vs 128-d distance). At billion-scale a near- + full predicate scan per query is *not* free; that cost would partially restore A's edge. The + metric flatters D in exactly the regime where D wins. + +## Consequences + +- **Do not productionize region-pruned IVF as a general ACORN replacement.** The clean win was an + artifact of benchmarking an under-equipped incumbent — caught only by the rule-#5 adversarial + check, which is the central lesson: *a filtered-ANN cost claim is meaningless without a + predicate-aware-entry baseline.* +- The B&B region-pruning kernel is **correct and exact** (validated vs the oracle) and remains a + reusable asset; its cost advantage is real but narrow and regime-dependent. +- The honest open question worth a follow-up: at **large n**, where D's per-query predicate scan + is genuinely costly, does A's edge re-open? That is the only condition under which this bet + could flip to a WIN, and it is not yet tested. + +## Boundaries / not proven + +- Single dataset (ogbn-arxiv), n=20k, k=10, 200 queries (per-point noise ~±1%). +- Label-derived correlation as a proxy for production metadata filters. +- ACORN's lite single-layer graph is weak in dense regions (recall non-monotonic at high + selectivity); the comparison is fair (both use it) but absolute recalls are modest there. +- D's seed-finding is one realization of predicate-aware entry; a smarter one could differ. +- The at-scale (large-n) regime — where the verdict might flip — is unmeasured. + +## Next steps + +1. **Large-n re-test** (n ≥ 10⁵–10⁶, ≥500 queries): the one condition that could flip the + verdict (D's predicate scan stops being free). If A's edge re-opens there, revisit. +2. Otherwise, close BET 2 ⊗ BET 4 as a qualified NO-GO and retain the exact B&B kernel as a + validated asset for the narrow ρ≈0.7 / very-low-selectivity regime. + +## Alternatives considered + +- **Region-pruning on graph separators** (not IVF) — rejected upstream ([ADR-199]: embedding + graphs are high-treewidth). +- **Believing the vanilla-ACORN win** — rejected: it does not survive the adversarial check. diff --git a/docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md b/docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md index b0c0309b40..affe25ec2d 100644 --- a/docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md +++ b/docs/plans/bet2-filtered-ann/PRE-REGISTRATION.md @@ -117,4 +117,3 @@ live in `src/`; the harness is `examples/filtered_ann_pruning.rs`. No dependency - The uncorrelated/agnostic regime as a *target* (kill control only — ACORN owns it). - Multi-predicate conjunctions, streaming updates, the live-GNN metric (BET 1's frontier). - Disk-resident / billion-scale (in-memory ogbn-arxiv is the stage). - From dfb15d3737f00f143c05d0c964a40ca58afcb573 Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 16:47:03 -0400 Subject: [PATCH 7/8] =?UTF-8?q?docs(adr):=20ADR-201=20next-steps=20?= =?UTF-8?q?=E2=80=94=20multi-predicate=20conjunction=20hypothesis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The experiment's own evidence points to two flip conditions (conjunctions where ACORN's predicate-seeding degrades but cluster-skip composes; large-n where the predicate scan stops being free) and the open BET 4 standalone baseline. --- ...201-region-pruned-filtered-ann-vs-acorn.md | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md b/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md index 5f29465e38..0c5cbf94e0 100644 --- a/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md +++ b/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md @@ -144,10 +144,24 @@ What *did* hold, honestly: ## Next steps -1. **Large-n re-test** (n ≥ 10⁵–10⁶, ≥500 queries): the one condition that could flip the - verdict (D's predicate scan stops being free). If A's edge re-opens there, revisit. -2. Otherwise, close BET 2 ⊗ BET 4 as a qualified NO-GO and retain the exact B&B kernel as a - validated asset for the narrow ρ≈0.7 / very-low-selectivity regime. +Two conditions, both surfaced by this experiment's own evidence, could flip the verdict to a +scoped WIN — they are the honest follow-ups, not the result: + +1. **Multi-predicate conjunctions (the strongest lead).** Under `X ∧ Y ∧ Z`, region-pruning's + cluster-skip **composes** (skip clusters with zero conjunction-matches), while ACORN's + predicate-aware entry (contender D) **degrades sharply** — a *sampled* seed satisfying *all* + conjuncts becomes exponentially unlikely as the conjunction tightens, so D regresses toward + vanilla ACORN's cost while A stays cheap. This is precisely the regime where A could beat a + tuned ACORN *even at high correlation*. This ADR's experiments hold one predicate fixed; + conjunctions were out of scope. Highest-leverage next bet, and it reuses this harness. +2. **Large-n re-test** (n ≥ 10⁵–10⁶, ≥500 queries): D's seeding leans on a ~full predicate scan + the distance-eval metric treats as free; at scale that scan is genuinely costly, which could + re-open A's edge. Add a predicate-scan cost term and/or measure wall-clock at n=10⁶. +3. **(Lower priority) BET 4 standalone:** the IVF region-pruning kernel was validated as BET 2's + *mechanism* but never run vs the original *plain-IVF-probe* baseline. The kernel is exact; the + standalone "beats plain IVF" head-to-head is technically still open. +4. If none of the above re-open it, close BET 2 ⊗ BET 4 as a qualified NO-GO and retain the + exact B&B kernel as a validated asset for the narrow ρ≈0.7 / very-low-selectivity regime. ## Alternatives considered From 7c10ade929e9cb0a0c58f8f737b122cbc9a01f81 Mon Sep 17 00:00:00 2001 From: Ofer Shaal Date: Thu, 4 Jun 2026 17:03:59 -0400 Subject: [PATCH 8/8] =?UTF-8?q?docs(adr):=20retract=20conjunction=20lead?= =?UTF-8?q?=20in=20ADR-201=20(scrutiny=20=E2=80=94=20it=20doesn't=20hold)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A conjunction is a single O(1) boolean predicate of selectivity = product; in the distance-eval metric it reduces to (selectivity, scatter) — both already swept. The 'exponentially-unlikely seed' reasoning was wrong (testing a conjunction is O(1)). Residual leads downgraded to narrow/speculative (predicate-eval cost, large-n). Recommend closing BET 2 ⊗ BET 4; thread value is BET 1 productionization + BET 3. --- ...201-region-pruned-filtered-ann-vs-acorn.md | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md b/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md index 0c5cbf94e0..6d68671aec 100644 --- a/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md +++ b/docs/adr/ADR-201-region-pruned-filtered-ann-vs-acorn.md @@ -144,24 +144,36 @@ What *did* hold, honestly: ## Next steps -Two conditions, both surfaced by this experiment's own evidence, could flip the verdict to a -scoped WIN — they are the honest follow-ups, not the result: - -1. **Multi-predicate conjunctions (the strongest lead).** Under `X ∧ Y ∧ Z`, region-pruning's - cluster-skip **composes** (skip clusters with zero conjunction-matches), while ACORN's - predicate-aware entry (contender D) **degrades sharply** — a *sampled* seed satisfying *all* - conjuncts becomes exponentially unlikely as the conjunction tightens, so D regresses toward - vanilla ACORN's cost while A stays cheap. This is precisely the regime where A could beat a - tuned ACORN *even at high correlation*. This ADR's experiments hold one predicate fixed; - conjunctions were out of scope. Highest-leverage next bet, and it reuses this harness. +**Retraction (2026-06-04, post-verdict scrutiny).** An earlier draft of this section named +*multi-predicate conjunctions* as "the strongest lead," on the reasoning that a seed satisfying +all conjuncts is "exponentially unlikely" to sample. **That reasoning is wrong and is retracted.** +A conjunction `X ∧ Y ∧ Z` is a single boolean predicate of selectivity ≈ the product, evaluated +**O(1)** by both A and ACORN-D. In the distance-eval metric a conjunction is therefore invisible +*as* a conjunction — only its **selectivity** and **geometric scatter** matter, and both axes are +already swept here (the selectivity sweep × the ρ-knob). ACORN-D finds a conjunction-seed by +sampling at exactly the rate it finds any seed of that selectivity. The multi-modal rescue also +fails: top-k nearest matches are almost always local to one mode, so D's seed lands correctly. +**Conjunctions do not favour region-pruning in this cost model.** + +The honest residual leads (both narrow): + +1. **Predicate-evaluation cost** (a *different* cost axis, excluded here). ACORN's agnostic + traversal tests the predicate on **every** expanded node (~1600/query); A tests it on far + fewer (probed-cluster members) and can precompute per-attribute per-cluster bitmaps. When + predicate evaluation is *expensive* (many attributes, costly lookups — and conjunctions + amplify this), A's asymmetry could matter. But for cheap metadata predicates this term is + small vs a 128-d distance, so the regime is narrow. Would require a predicate-eval cost model. 2. **Large-n re-test** (n ≥ 10⁵–10⁶, ≥500 queries): D's seeding leans on a ~full predicate scan - the distance-eval metric treats as free; at scale that scan is genuinely costly, which could - re-open A's edge. Add a predicate-scan cost term and/or measure wall-clock at n=10⁶. + this metric treats as free; at scale that scan is genuinely costly, which *could* re-open A's + edge. The most concrete remaining check. 3. **(Lower priority) BET 4 standalone:** the IVF region-pruning kernel was validated as BET 2's *mechanism* but never run vs the original *plain-IVF-probe* baseline. The kernel is exact; the standalone "beats plain IVF" head-to-head is technically still open. -4. If none of the above re-open it, close BET 2 ⊗ BET 4 as a qualified NO-GO and retain the - exact B&B kernel as a validated asset for the narrow ρ≈0.7 / very-low-selectivity regime. + +**Recommendation:** treat BET 2 ⊗ BET 4 as **closed** (qualified NO-GO). The residual leads are +narrow/speculative; the SepRAG thread's remaining value is productionizing BET 1 (the proven WIN, +[ADR-200]) and exploring BET 3 (multi-hop KG, a different mechanism). Retain the exact B&B kernel +as a validated asset for the narrow ρ≈0.7 / very-low-selectivity regime. ## Alternatives considered