fix(core): restore semantic vector skip and benchmark gating

phernandez · phernandez · commit 16fc164b4ee0 · 2026-04-07T12:58:13.000-05:00
Signed-off-by: phernandez &lt;paul@basicmachines.co&gt;
diff --git a/src/basic_memory/repository/search_repository_base.py b/src/basic_memory/repository/search_repository_base.py
@@ -1236,8 +1236,8 @@ async def _prepare_entity_vector_jobs(self, entity_id: int) -> _PreparedEntityVe
                                     "embedding_model": current_embedding_model,
                                 },
                             )
-                    skipped_chunks_count += 1
-                    continue
+                        skipped_chunks_count += 1
+                        continue
 
                 pending_records.append(record)
 
diff --git a/test-int/semantic/conftest.py b/test-int/semantic/conftest.py
@@ -65,6 +65,11 @@ class SearchCombo:
     SearchCombo("postgres-openai", DatabaseBackend.POSTGRES, "openai", 1536),
 ]
 
+# Benchmark queries compare ranking quality across providers rather than enforcing
+# the stricter production retrieval cutoff. OpenAI paraphrase matches cluster near
+# ~0.37 in this corpus, so the default 0.55 filter hides otherwise-correct results.
+BENCHMARK_MIN_SIMILARITY = 0.3
+
 
 # --- Skip guards ---
 
@@ -229,6 +234,7 @@ async def create_search_service(
         default_project="bench-project",
         database_backend=combo.backend,
         semantic_search_enabled=semantic_enabled,
+        semantic_min_similarity=BENCHMARK_MIN_SIMILARITY,
     )
 
     # Create search repository (backend-specific)
diff --git a/test-int/semantic/test_semantic_quality.py b/test-int/semantic/test_semantic_quality.py
@@ -51,8 +51,9 @@
     ("sqlite-fastembed", "paraphrase", "hybrid"): 0.25,
     ("postgres-fastembed", "lexical", "hybrid"): 0.37,
     ("postgres-fastembed", "paraphrase", "hybrid"): 0.25,
-    # OpenAI metrics are still recorded, but we do not gate on them yet.
-    # The current benchmark corpus is too small to make that combo stable.
+    # OpenAI hybrid should handle paraphrases better than FastEmbed.
+    ("postgres-openai", "lexical", "hybrid"): 0.37,
+    ("postgres-openai", "paraphrase", "hybrid"): 0.25,
 }
 
 

Original file line number	Diff line number	Diff line change
`@@ -1236,8 +1236,8 @@ async def _prepare_entity_vector_jobs(self, entity_id: int) -> _PreparedEntityVe`
`1236`	`1236`	`"embedding_model": current_embedding_model,`
`1237`	`1237`	`},`
`1238`	`1238`	`)`
`1239`		`- skipped_chunks_count += 1`
`1240`		`- continue`
	`1239`	`+ skipped_chunks_count += 1`
	`1240`	`+ continue`
`1241`	`1241`
`1242`	`1242`	`pending_records.append(record)`
`1243`	`1243`