Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 74 additions & 25 deletions src/qql/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,18 @@ class CollectionTopology:
has_unnamed_dense: bool = False
dense_names: tuple[str, ...] = ()
sparse_names: tuple[str, ...] = ()
# Sizes fetched once in _resolve_topology() so _ensure_collection() never
# needs a second get_collection() call.
dense_sizes: tuple[tuple[str, int], ...] = ()

def dense_size_map(self) -> dict[str, int]:
"""Return {vector_name: size} for every dense vector whose size was fetched.

Unnamed single-vector collections appear under the ``""`` key, matching
``dense_config_key()``. Returns an empty dict when ``exists`` is False or
sizes were not available (e.g. when a mock omits the size attribute).
"""
return dict(self.dense_sizes)

@property
def has_dense(self) -> bool:
Expand Down Expand Up @@ -228,25 +240,51 @@ def execute(self, node: ASTNode) -> ExecutionResult:

# ── Statement executors ───────────────────────────────────────────────

def _resolve_topology(self, name: str) -> CollectionTopology:
if not self._client.collection_exists(name):
return CollectionTopology(exists=False, is_named_dense=False)
def _fetch_collection_info(self, name: str):
"""Fetch full CollectionInfo for *name* in a single API call.

info = self._client.get_collection(name)
Returns the CollectionInfo object when the collection exists, or
``None`` when the collection is not found (HTTP 404). Any other
Qdrant error is re-raised as :class:`QQLRuntimeError`.
"""
try:
return self._client.get_collection(name)
except UnexpectedResponse as e:
if e.status_code == 404:
return None
raise QQLRuntimeError(
f"Qdrant error fetching collection '{name}': {e}"
) from e
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

        except ValueError as e:
            if f"Collection {name} not found" in str(e):
                return None
            raise

add something like this .. so that get_collection() raises ValueError instead of HTTP UnexpectedResponse(404)


def _topology_from_collection_info(self, info: Any) -> CollectionTopology:
"""Parse a CollectionInfo object into a :class:`CollectionTopology`.

Separates API access (handled by :meth:`_fetch_collection_info`) from
topology parsing so each concern can be tested independently.
"""
params = info.config.params
vectors = params.vectors # type: ignore[union-attr]
sparse_vectors = params.sparse_vectors or {}

if isinstance(vectors, dict):
dense_names = tuple(vectors.keys())
dense_sizes: tuple[tuple[str, int], ...] = tuple(
(k, v.size)
for k, v in vectors.items()
if getattr(v, "size", None) is not None
)
has_unnamed_dense = False
is_named_dense = True
elif vectors is None:
dense_names = ()
dense_sizes = ()
has_unnamed_dense = False
is_named_dense = False
else:
# Single unnamed dense vector
dense_names = ()
unnamed_size = getattr(vectors, "size", None)
dense_sizes = (("", unnamed_size),) if unnamed_size is not None else ()
has_unnamed_dense = True
is_named_dense = False

Expand All @@ -259,8 +297,20 @@ def _resolve_topology(self, name: str) -> CollectionTopology:
has_unnamed_dense=has_unnamed_dense,
dense_names=dense_names,
sparse_names=sparse_names,
dense_sizes=dense_sizes,
)

def _resolve_topology(self, name: str) -> CollectionTopology:
"""Return the topology for *name* using exactly one Qdrant API call.

Calls :meth:`_fetch_collection_info` once. A 404 response is treated
as ``exists=False``; any other error is propagated.
"""
info = self._fetch_collection_info(name)
if info is None:
return CollectionTopology(exists=False, is_named_dense=False)
return self._topology_from_collection_info(info)

def _default_dense_vector_name(self) -> str:
return self._config.default_dense_vector_name

Expand Down Expand Up @@ -565,9 +615,9 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult:
)

def _execute_alter_collection(self, node: AlterCollectionStmt) -> ExecutionResult:
if not self._client.collection_exists(node.collection):
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
topology = self._resolve_topology(node.collection)
if not topology.exists:
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")

update_kwargs: dict[str, Any] = {"collection_name": node.collection}
vectors_config = self._build_vectors_config_diff(topology, node.config)
Expand Down Expand Up @@ -660,10 +710,9 @@ def _execute_show(self, node: ShowCollectionsStmt) -> ExecutionResult:
)

def _execute_show_collection(self, node: ShowCollectionStmt) -> ExecutionResult:
if not self._client.collection_exists(node.collection):
info = self._fetch_collection_info(node.collection)
if info is None:
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")

info = self._client.get_collection(node.collection)
config = info.config
params = config.params

Expand Down Expand Up @@ -828,9 +877,9 @@ def _execute_select(self, node: SelectStmt) -> ExecutionResult:
)

def _execute_search(self, node: SearchStmt) -> ExecutionResult:
if not self._client.collection_exists(node.collection):
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
topology = self._resolve_topology(node.collection)
if not topology.exists:
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")

# Build WHERE filter (shared by both hybrid and dense-only paths)
qdrant_filter: Filter | None = None
Expand Down Expand Up @@ -1711,9 +1760,9 @@ def _execute_search_groups(

def _execute_update_vector(self, node: UpdateVectorStmt) -> ExecutionResult:
"""Execute UPDATE ... SET VECTOR using update_vectors()."""
if not self._client.collection_exists(node.collection):
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
topology = self._resolve_topology(node.collection)
if not topology.exists:
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
vector_name = topology.dense_payload_name(node.vector_name)
vector_struct: Any = (
{vector_name: list(node.vector)} if vector_name else list(node.vector)
Expand Down Expand Up @@ -1915,33 +1964,33 @@ def _ensure_collection(
topology: CollectionTopology,
explicit_vector: str | None,
) -> None:
"""Create the collection if it doesn't exist. Raises on dimension mismatch.
"""Create the collection if needed, or validate dimension compatibility.

QQL-created dense collections use the configured dense vector name.
Externally created unnamed collections still accept plain dense vectors.
All validation is done against pre-fetched ``topology`` data; no extra
Qdrant API calls are made.
"""
if topology.exists:
info = self._client.get_collection(name)
vectors = info.config.params.vectors # type: ignore[union-attr]
if isinstance(vectors, dict):
sizes = topology.dense_size_map()
if topology.is_named_dense:
# dense_using() raises QQLRuntimeError on bad/ambiguous names,
# and always returns a non-None string in the named-dense branch.
vector_name = topology.dense_using(explicit_vector)
if vector_name is None:
raise QQLRuntimeError("Collection has no dense vector")
vector_config = vectors[vector_name]
expected_size = getattr(vector_config, "size", None)
expected_size = sizes.get(vector_name) # type: ignore[arg-type]
if expected_size is not None and expected_size != vector_size:
raise QQLRuntimeError(
f"Vector dimension mismatch: collection '{name}' vector "
f"'{vector_name}' expects {expected_size} dims, but "
f"model produces {vector_size} dims. Specify a compatible "
"model with USING MODEL '<model>'."
)
elif vectors is not None:
# Unnamed single-vector collection: validate dimensions
if vectors.size != vector_size:
elif topology.has_unnamed_dense:
expected_size = sizes.get("")
if expected_size is not None and expected_size != vector_size:
raise QQLRuntimeError(
f"Vector dimension mismatch: collection '{name}' expects "
f"{vectors.size} dims, but model produces {vector_size} dims. "
f"{expected_size} dims, but model produces {vector_size} dims. "
f"Specify a compatible model with USING MODEL '<model>'."
)
else:
Expand Down
119 changes: 119 additions & 0 deletions tests/test_executor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import pytest
from unittest.mock import DEFAULT

from qdrant_client.http.exceptions import UnexpectedResponse

from qql.ast_nodes import (
AlterCollectionStmt,
Expand Down Expand Up @@ -52,8 +55,20 @@ def collection_exists(_name):
def create_collection(**_kwargs):
state["exists"] = True

def get_collection(_name):
if not (state["exists"] or bool(client.collection_exists.return_value)):
raise UnexpectedResponse(
status_code=404,
reason_phrase="Not Found",
content=b"Not Found",
headers={},
)
# Collection exists — fall through to the configured return_value
return DEFAULT

client.collection_exists.side_effect = collection_exists
client.create_collection.side_effect = create_collection
client.get_collection.side_effect = get_collection
return client


Expand Down Expand Up @@ -3492,3 +3507,107 @@ def test_grouped_hybrid_search_uses_build_hybrid_vectors(self, executor, mock_cl
)
executor.execute(node)
helper.assert_called_once()


# ── Regression: single metadata fetch path ────────────────────────────────────


class TestCollectionMetadataFetchCount:
"""Verify that every high-level operation issues exactly ONE get_collection()
call (Issue #42 — eliminate duplicate Qdrant API calls).

_resolve_topology() must delegate to _fetch_collection_info() which calls
get_collection() once and handles 404 as exists=False without a prior
collection_exists() round-trip.
"""

def test_insert_existing_dense_collection_fetches_metadata_once(
self, executor, mock_client
):
"""Dense INSERT to an existing collection must call get_collection() exactly once."""
mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value.config.params.vectors.size = 384
# Use a plain unnamed-dense mock (non-dict vectors)
mock_client.get_collection.return_value.config.params.sparse_vectors = None

node = InsertStmt(collection="docs", values={"text": "hello"}, model=None)
executor.execute(node)

mock_client.get_collection.assert_called_once()

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mock_client.collection_exists.assert_not_called()

def test_insert_existing_hybrid_collection_fetches_metadata_once(
self, executor, mock_client
):
"""Hybrid auto-detect INSERT must call get_collection() exactly once."""
from qdrant_client.models import Distance, SparseVectorParams, VectorParams

mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value.config.params.vectors = {
"dense": VectorParams(size=384, distance=Distance.COSINE)
}
mock_client.get_collection.return_value.config.params.sparse_vectors = {
"sparse": SparseVectorParams()
}

node = InsertStmt(collection="docs", values={"text": "hello"}, model=None)
executor.execute(node)

mock_client.get_collection.assert_called_once()

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mock_client.collection_exists.assert_not_called()

def test_dimension_mismatch_fetches_metadata_once(self, executor, mock_client):
"""A dimension-mismatch error must be raised without a second metadata fetch."""
from qdrant_client.models import Distance, VectorParams

mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value.config.params.vectors = {
"dense": VectorParams(size=768, distance=Distance.COSINE)
}
mock_client.get_collection.return_value.config.params.sparse_vectors = None

node = InsertStmt(
collection="docs",
values={"text": "hello"},
model=None,
dense_vector="dense",
)
from qql.exceptions import QQLRuntimeError

with pytest.raises(QQLRuntimeError, match="dimension mismatch"):
executor.execute(node)

# Only one metadata call despite the error
mock_client.get_collection.assert_called_once()

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mock_client.collection_exists.assert_not_called()

def test_insert_nonexistent_collection_issues_one_get_collection_call(
self, executor, mock_client
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change it to
self, executor, mock_client, mocker

):
"""Inserting into a brand-new collection calls get_collection() once
(which returns 404 and is handled as exists=False) then creates the collection."""
mock_client.collection_exists.return_value = False

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create_collection = mocker.patch.object(executor, "_create_collection_and_wait")

node = InsertStmt(collection="new_col", values={"text": "hello"}, model=None)
executor.execute(node)

# _fetch_collection_info() must have been called once (got 404 → new collection)
mock_client.get_collection.assert_called_once()
mock_client.create_collection.assert_called_once()

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mock_client.collection_exists.assert_not_called()

def test_search_existing_collection_fetches_metadata_once(
self, executor, mock_client
):
"""SEARCH on an existing dense collection must call get_collection() exactly once."""
from qdrant_client.models import Distance, VectorParams

mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value.config.params.vectors = {
"dense": VectorParams(size=384, distance=Distance.COSINE)
}
mock_client.get_collection.return_value.config.params.sparse_vectors = None
mock_client.query_points.return_value.points = []

node = SearchStmt(
collection="docs", query_text="hello", limit=5, model=None
)
executor.execute(node)

mock_client.get_collection.assert_called_once()
Comment on lines +3524 to +3613
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Coverage gap: assert that collection_exists() is not called in the single-fetch path tests.

These tests currently prove one get_collection() call, but they still allow an extra collection_exists() round-trip. That misses the explicit Issue #42 contract.

✅ Suggested test hardening
 class TestCollectionMetadataFetchCount:
@@
     def test_insert_existing_dense_collection_fetches_metadata_once(
         self, executor, mock_client
     ):
@@
         executor.execute(node)

         mock_client.get_collection.assert_called_once()
+        mock_client.collection_exists.assert_not_called()
@@
     def test_insert_existing_hybrid_collection_fetches_metadata_once(
         self, executor, mock_client
     ):
@@
         executor.execute(node)

         mock_client.get_collection.assert_called_once()
+        mock_client.collection_exists.assert_not_called()
@@
     def test_dimension_mismatch_fetches_metadata_once(self, executor, mock_client):
@@
         with pytest.raises(QQLRuntimeError, match="dimension mismatch"):
             executor.execute(node)

         # Only one metadata call despite the error
         mock_client.get_collection.assert_called_once()
+        mock_client.collection_exists.assert_not_called()
@@
     def test_insert_nonexistent_collection_issues_one_get_collection_call(
         self, executor, mock_client
     ):
@@
         # _fetch_collection_info() must have been called once (got 404 → new collection)
         mock_client.get_collection.assert_called_once()
+        mock_client.collection_exists.assert_not_called()
         mock_client.create_collection.assert_called_once()
@@
     def test_search_existing_collection_fetches_metadata_once(
         self, executor, mock_client
     ):
@@
         executor.execute(node)

         mock_client.get_collection.assert_called_once()
+        mock_client.collection_exists.assert_not_called()
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/test_executor.py` around lines 3524 - 3613, The tests (e.g.,
test_insert_existing_dense_collection_fetches_metadata_once,
test_insert_existing_hybrid_collection_fetches_metadata_once,
test_dimension_mismatch_fetches_metadata_once,
test_search_existing_collection_fetches_metadata_once,
test_insert_nonexistent_collection_issues_one_get_collection_call) currently
assert get_collection() was called once but do not assert collection_exists()
was not called; update each single-fetch-path test to also assert
mock_client.collection_exists.assert_not_called() (or the equivalent negative
assertion) after executor.execute(...) so the tests enforce the Issue `#42`
contract that no extra collection_exists() round-trip occurs when metadata is
fetched once.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mock_client.collection_exists.assert_not_called()

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def test_local_qdrant_missing_collection_resolves_as_absent(self, cfg):
"""Qdrant local mode raises ValueError for missing collections, not HTTP 404."""
from qdrant_client import QdrantClient

    topology = Executor(QdrantClient(":memory:"), cfg)._resolve_topology("ghost")

    assert topology.exists is False

Loading