Skip to content

Commit 16fc164

Browse files
committed
fix(core): restore semantic vector skip and benchmark gating
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 4529bfa commit 16fc164

3 files changed

Lines changed: 11 additions & 4 deletions

File tree

src/basic_memory/repository/search_repository_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,8 +1236,8 @@ async def _prepare_entity_vector_jobs(self, entity_id: int) -> _PreparedEntityVe
12361236
"embedding_model": current_embedding_model,
12371237
},
12381238
)
1239-
skipped_chunks_count += 1
1240-
continue
1239+
skipped_chunks_count += 1
1240+
continue
12411241

12421242
pending_records.append(record)
12431243

test-int/semantic/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ class SearchCombo:
6565
SearchCombo("postgres-openai", DatabaseBackend.POSTGRES, "openai", 1536),
6666
]
6767

68+
# Benchmark queries compare ranking quality across providers rather than enforcing
69+
# the stricter production retrieval cutoff. OpenAI paraphrase matches cluster near
70+
# ~0.37 in this corpus, so the default 0.55 filter hides otherwise-correct results.
71+
BENCHMARK_MIN_SIMILARITY = 0.3
72+
6873

6974
# --- Skip guards ---
7075

@@ -229,6 +234,7 @@ async def create_search_service(
229234
default_project="bench-project",
230235
database_backend=combo.backend,
231236
semantic_search_enabled=semantic_enabled,
237+
semantic_min_similarity=BENCHMARK_MIN_SIMILARITY,
232238
)
233239

234240
# Create search repository (backend-specific)

test-int/semantic/test_semantic_quality.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@
5151
("sqlite-fastembed", "paraphrase", "hybrid"): 0.25,
5252
("postgres-fastembed", "lexical", "hybrid"): 0.37,
5353
("postgres-fastembed", "paraphrase", "hybrid"): 0.25,
54-
# OpenAI metrics are still recorded, but we do not gate on them yet.
55-
# The current benchmark corpus is too small to make that combo stable.
54+
# OpenAI hybrid should handle paraphrases better than FastEmbed.
55+
("postgres-openai", "lexical", "hybrid"): 0.37,
56+
("postgres-openai", "paraphrase", "hybrid"): 0.25,
5657
}
5758

5859

0 commit comments

Comments
 (0)