diff --git a/src/registry.rs b/src/registry.rs index e7d494d..c7c7551 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -87,7 +87,7 @@ impl USearchIndexConfig { .map_err(|e| DataFusionError::Execution(format!("USearch Index::new failed: {e}"))) } - /// Load a previously saved index from `path`. + /// Load a previously saved index from `path` into memory. /// /// Uses the same `IndexOptions` as `build_index()`. The options must /// match those used when the index was originally built — passing wrong @@ -101,6 +101,26 @@ impl USearchIndexConfig { Ok(index) } + /// Memory-map a previously saved index from `path`. + /// + /// Unlike [`load_index`], this does not copy the index into RAM. The OS + /// pages data in on demand, keeping resident memory proportional to the + /// working set rather than the full index size. Prefer this for the + /// reload-from-disk path where the index file is already local. + /// + /// The returned [`Index`] is fully functional for search; the backing + /// file must remain on disk for the lifetime of the index. + /// + /// [`load_index`]: Self::load_index + pub fn view_index(&self, path: &str) -> Result { + let index = Index::new(&self.to_index_options()) + .map_err(|e| DataFusionError::Execution(format!("USearch Index::new failed: {e}")))?; + index + .view(path) + .map_err(|e| DataFusionError::Execution(format!("USearch index view failed: {e}")))?; + Ok(index) + } + fn to_index_options(&self) -> IndexOptions { IndexOptions { dimensions: self.dimensions, diff --git a/src/sqlite_provider.rs b/src/sqlite_provider.rs index 6fa723f..6b767ab 100644 --- a/src/sqlite_provider.rs +++ b/src/sqlite_provider.rs @@ -252,6 +252,12 @@ impl SqliteLookupProvider { )?; } + // Checkpoint WAL so the data is flushed to the main database file. + // Without this, data written during build may only exist in the WAL + // and can be lost if the process exits before a passive checkpoint. + conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);") + .map_err(|e| DataFusionError::Execution(format!("WAL checkpoint failed: {e}")))?; + let mut conns = vec![conn]; for _ in 1..pool_size { conns.push(open_conn(db_path)?); diff --git a/tests/optimizer_rule.rs b/tests/optimizer_rule.rs index ec98fe8..de0d5fe 100644 --- a/tests/optimizer_rule.rs +++ b/tests/optimizer_rule.rs @@ -419,3 +419,52 @@ async fn test_qualified_ref_where_clause_rewrites() { "qualified ref + WHERE → filter absorbed, rule must fire\nPlan: {plan:?}" ); } + +// ═══════════════════════════════════════════════════════════════════════════════ +// SELECT only distance — no base columns projected +// ═══════════════════════════════════════════════════════════════════════════════ +// +// When the SELECT list contains only the distance UDF (no base table columns), +// the Projection node has a single computed expression. The optimizer must still +// recognise the pattern and rewrite to USearchNode. + +/// Bare table, SELECT only distance alias, ORDER BY alias — rule must fire. +#[tokio::test] +async fn test_select_only_distance_bare_rewrites() { + let ctx = make_ctx(MetricKind::L2sq).await; + let sql = + format!("SELECT l2_distance(vector, {Q}) AS dist FROM items ORDER BY dist ASC LIMIT 5"); + let plan = optimized_plan(&ctx, &sql).await; + assert!( + contains_usearch_node(&plan), + "SELECT only distance (bare) → rule must fire\nPlan: {plan:?}" + ); +} + +/// Qualified table, SELECT only distance alias, ORDER BY alias — rule must fire. +#[tokio::test] +async fn test_select_only_distance_qualified_rewrites() { + let ctx = make_ctx_qualified(MetricKind::L2sq).await; + let sql = format!( + "SELECT l2_distance(vector, {Q}) AS dist FROM datafusion.public.items ORDER BY dist ASC LIMIT 5" + ); + let plan = optimized_plan(&ctx, &sql).await; + assert!( + contains_usearch_node(&plan), + "SELECT only distance (qualified) → rule must fire\nPlan: {plan:?}" + ); +} + +/// Bare table, SELECT only distance (no alias), ORDER BY the UDF directly. +#[tokio::test] +async fn test_select_only_distance_no_alias_rewrites() { + let ctx = make_ctx(MetricKind::L2sq).await; + let sql = format!( + "SELECT l2_distance(vector, {Q}) FROM items ORDER BY l2_distance(vector, {Q}) ASC LIMIT 5" + ); + let plan = optimized_plan(&ctx, &sql).await; + assert!( + contains_usearch_node(&plan), + "SELECT only distance (no alias, ORDER BY UDF) → rule must fire\nPlan: {plan:?}" + ); +}