Conversation
📝 WalkthroughWalkthroughThis PR caches dense vector sizes in CollectionTopology, consolidates topology resolution to a single fetch per operation, and updates executor flows and validation to use the cached topology; tests assert exactly one metadata fetch per high-level operation. ChangesTopology Metadata Caching
Estimated code review effort🎯 4 (Complex) | ⏱️ ~45 minutes Poem
🚥 Pre-merge checks | ✅ 3 | ❌ 2❌ Failed checks (1 warning, 1 inconclusive)
✅ Passed checks (3 passed)
✏️ Tip: You can configure your own custom pre-merge checks in the settings. ✨ Finishing Touches📝 Generate docstrings
🧪 Generate unit tests (beta)
Comment |
|
The change is in the right direction: caching dense vector sizes in Before merging, I think we should complete the scope of issue #42 in this PR:
This would turn the current partial optimization into the full fix: one resolved topology object reused for both routing and validation. |
There was a problem hiding this comment.
Actionable comments posted: 1
🤖 Prompt for all review comments with AI agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
Inline comments:
In `@tests/test_executor.py`:
- Around line 3524-3613: The tests (e.g.,
test_insert_existing_dense_collection_fetches_metadata_once,
test_insert_existing_hybrid_collection_fetches_metadata_once,
test_dimension_mismatch_fetches_metadata_once,
test_search_existing_collection_fetches_metadata_once,
test_insert_nonexistent_collection_issues_one_get_collection_call) currently
assert get_collection() was called once but do not assert collection_exists()
was not called; update each single-fetch-path test to also assert
mock_client.collection_exists.assert_not_called() (or the equivalent negative
assertion) after executor.execute(...) so the tests enforce the Issue `#42`
contract that no extra collection_exists() round-trip occurs when metadata is
fetched once.
🪄 Autofix (Beta)
Fix all unresolved CodeRabbit comments on this PR:
- Push a commit to this branch (recommended)
- Create a new PR with the fixes
ℹ️ Review info
⚙️ Run configuration
Configuration used: Repository UI
Review profile: CHILL
Plan: Pro Plus
Run ID: 45760e95-f3fe-4a6b-839f-dcab74eb54ff
📒 Files selected for processing (2)
src/qql/executor.pytests/test_executor.py
| def test_insert_existing_dense_collection_fetches_metadata_once( | ||
| self, executor, mock_client | ||
| ): | ||
| """Dense INSERT to an existing collection must call get_collection() exactly once.""" | ||
| mock_client.collection_exists.return_value = True | ||
| mock_client.get_collection.return_value.config.params.vectors.size = 384 | ||
| # Use a plain unnamed-dense mock (non-dict vectors) | ||
| mock_client.get_collection.return_value.config.params.sparse_vectors = None | ||
|
|
||
| node = InsertStmt(collection="docs", values={"text": "hello"}, model=None) | ||
| executor.execute(node) | ||
|
|
||
| mock_client.get_collection.assert_called_once() | ||
|
|
||
| def test_insert_existing_hybrid_collection_fetches_metadata_once( | ||
| self, executor, mock_client | ||
| ): | ||
| """Hybrid auto-detect INSERT must call get_collection() exactly once.""" | ||
| from qdrant_client.models import Distance, SparseVectorParams, VectorParams | ||
|
|
||
| mock_client.collection_exists.return_value = True | ||
| mock_client.get_collection.return_value.config.params.vectors = { | ||
| "dense": VectorParams(size=384, distance=Distance.COSINE) | ||
| } | ||
| mock_client.get_collection.return_value.config.params.sparse_vectors = { | ||
| "sparse": SparseVectorParams() | ||
| } | ||
|
|
||
| node = InsertStmt(collection="docs", values={"text": "hello"}, model=None) | ||
| executor.execute(node) | ||
|
|
||
| mock_client.get_collection.assert_called_once() | ||
|
|
||
| def test_dimension_mismatch_fetches_metadata_once(self, executor, mock_client): | ||
| """A dimension-mismatch error must be raised without a second metadata fetch.""" | ||
| from qdrant_client.models import Distance, VectorParams | ||
|
|
||
| mock_client.collection_exists.return_value = True | ||
| mock_client.get_collection.return_value.config.params.vectors = { | ||
| "dense": VectorParams(size=768, distance=Distance.COSINE) | ||
| } | ||
| mock_client.get_collection.return_value.config.params.sparse_vectors = None | ||
|
|
||
| node = InsertStmt( | ||
| collection="docs", | ||
| values={"text": "hello"}, | ||
| model=None, | ||
| dense_vector="dense", | ||
| ) | ||
| from qql.exceptions import QQLRuntimeError | ||
|
|
||
| with pytest.raises(QQLRuntimeError, match="dimension mismatch"): | ||
| executor.execute(node) | ||
|
|
||
| # Only one metadata call despite the error | ||
| mock_client.get_collection.assert_called_once() | ||
|
|
||
| def test_insert_nonexistent_collection_issues_one_get_collection_call( | ||
| self, executor, mock_client | ||
| ): | ||
| """Inserting into a brand-new collection calls get_collection() once | ||
| (which returns 404 and is handled as exists=False) then creates the collection.""" | ||
| mock_client.collection_exists.return_value = False | ||
|
|
||
| node = InsertStmt(collection="new_col", values={"text": "hello"}, model=None) | ||
| executor.execute(node) | ||
|
|
||
| # _fetch_collection_info() must have been called once (got 404 → new collection) | ||
| mock_client.get_collection.assert_called_once() | ||
| mock_client.create_collection.assert_called_once() | ||
|
|
||
| def test_search_existing_collection_fetches_metadata_once( | ||
| self, executor, mock_client | ||
| ): | ||
| """SEARCH on an existing dense collection must call get_collection() exactly once.""" | ||
| from qdrant_client.models import Distance, VectorParams | ||
|
|
||
| mock_client.collection_exists.return_value = True | ||
| mock_client.get_collection.return_value.config.params.vectors = { | ||
| "dense": VectorParams(size=384, distance=Distance.COSINE) | ||
| } | ||
| mock_client.get_collection.return_value.config.params.sparse_vectors = None | ||
| mock_client.query_points.return_value.points = [] | ||
|
|
||
| node = SearchStmt( | ||
| collection="docs", query_text="hello", limit=5, model=None | ||
| ) | ||
| executor.execute(node) | ||
|
|
||
| mock_client.get_collection.assert_called_once() |
There was a problem hiding this comment.
Coverage gap: assert that collection_exists() is not called in the single-fetch path tests.
These tests currently prove one get_collection() call, but they still allow an extra collection_exists() round-trip. That misses the explicit Issue #42 contract.
✅ Suggested test hardening
class TestCollectionMetadataFetchCount:
@@
def test_insert_existing_dense_collection_fetches_metadata_once(
self, executor, mock_client
):
@@
executor.execute(node)
mock_client.get_collection.assert_called_once()
+ mock_client.collection_exists.assert_not_called()
@@
def test_insert_existing_hybrid_collection_fetches_metadata_once(
self, executor, mock_client
):
@@
executor.execute(node)
mock_client.get_collection.assert_called_once()
+ mock_client.collection_exists.assert_not_called()
@@
def test_dimension_mismatch_fetches_metadata_once(self, executor, mock_client):
@@
with pytest.raises(QQLRuntimeError, match="dimension mismatch"):
executor.execute(node)
# Only one metadata call despite the error
mock_client.get_collection.assert_called_once()
+ mock_client.collection_exists.assert_not_called()
@@
def test_insert_nonexistent_collection_issues_one_get_collection_call(
self, executor, mock_client
):
@@
# _fetch_collection_info() must have been called once (got 404 → new collection)
mock_client.get_collection.assert_called_once()
+ mock_client.collection_exists.assert_not_called()
mock_client.create_collection.assert_called_once()
@@
def test_search_existing_collection_fetches_metadata_once(
self, executor, mock_client
):
@@
executor.execute(node)
mock_client.get_collection.assert_called_once()
+ mock_client.collection_exists.assert_not_called()🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
In `@tests/test_executor.py` around lines 3524 - 3613, The tests (e.g.,
test_insert_existing_dense_collection_fetches_metadata_once,
test_insert_existing_hybrid_collection_fetches_metadata_once,
test_dimension_mismatch_fetches_metadata_once,
test_search_existing_collection_fetches_metadata_once,
test_insert_nonexistent_collection_issues_one_get_collection_call) currently
assert get_collection() was called once but do not assert collection_exists()
was not called; update each single-fetch-path test to also assert
mock_client.collection_exists.assert_not_called() (or the equivalent negative
assertion) after executor.execute(...) so the tests enforce the Issue `#42`
contract that no extra collection_exists() round-trip occurs when metadata is
fetched once.
| return None | ||
| raise QQLRuntimeError( | ||
| f"Qdrant error fetching collection '{name}': {e}" | ||
| ) from e |
There was a problem hiding this comment.
except ValueError as e:
if f"Collection {name} not found" in str(e):
return None
raise
add something like this .. so that get_collection() raises ValueError instead of HTTP UnexpectedResponse(404)
| executor.execute(node) | ||
|
|
||
| mock_client.get_collection.assert_called_once() | ||
|
|
There was a problem hiding this comment.
mock_client.collection_exists.assert_not_called()
| executor.execute(node) | ||
|
|
||
| mock_client.get_collection.assert_called_once() | ||
|
|
There was a problem hiding this comment.
mock_client.collection_exists.assert_not_called()
|
|
||
| # Only one metadata call despite the error | ||
| mock_client.get_collection.assert_called_once() | ||
|
|
There was a problem hiding this comment.
mock_client.collection_exists.assert_not_called()
| mock_client.get_collection.assert_called_once() | ||
|
|
||
| def test_insert_nonexistent_collection_issues_one_get_collection_call( | ||
| self, executor, mock_client |
There was a problem hiding this comment.
Change it to
self, executor, mock_client, mocker
| """Inserting into a brand-new collection calls get_collection() once | ||
| (which returns 404 and is handled as exists=False) then creates the collection.""" | ||
| mock_client.collection_exists.return_value = False | ||
|
|
There was a problem hiding this comment.
create_collection = mocker.patch.object(executor, "_create_collection_and_wait")
| # _fetch_collection_info() must have been called once (got 404 → new collection) | ||
| mock_client.get_collection.assert_called_once() | ||
| mock_client.create_collection.assert_called_once() | ||
|
|
There was a problem hiding this comment.
mock_client.collection_exists.assert_not_called()
| ) | ||
| executor.execute(node) | ||
|
|
||
| mock_client.get_collection.assert_called_once() |
There was a problem hiding this comment.
mock_client.collection_exists.assert_not_called()
| ) | ||
| executor.execute(node) | ||
|
|
||
| mock_client.get_collection.assert_called_once() |
There was a problem hiding this comment.
def test_local_qdrant_missing_collection_resolves_as_absent(self, cfg):
"""Qdrant local mode raises ValueError for missing collections, not HTTP 404."""
from qdrant_client import QdrantClient
topology = Executor(QdrantClient(":memory:"), cfg)._resolve_topology("ghost")
assert topology.exists is False
Summary by CodeRabbit