Skip to content

Commit fb5d117

Browse files
committed
perf: use deque for iterator object cache
Both _ObjectIterator and _ObjectAIterator consume cached objects front-to-back via list.pop(0), which is O(n) per removal. With the default ITERATOR_CACHE_SIZE (typically 100), each batch drain is O(n²). Switch to collections.deque with popleft() for O(1) front removal.
1 parent c8ec05b commit fb5d117

1 file changed

Lines changed: 9 additions & 8 deletions

File tree

weaviate/collections/iterator.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import deque
12
from dataclasses import dataclass
23
from typing import (
34
Any,
@@ -54,14 +55,14 @@ def __init__(
5455
self.__query = query
5556
self.__inputs = inputs
5657

57-
self.__iter_object_cache: List[Object[TProperties, TReferences]] = []
58+
self.__iter_object_cache: deque[Object[TProperties, TReferences]] = deque()
5859
self.__iter_object_last_uuid: Optional[UUID] = _parse_after(self.__inputs.after)
5960
self.__iter_cache_size = cache_size or ITERATOR_CACHE_SIZE
6061

6162
def __iter__(
6263
self,
6364
) -> Iterator[Object[TProperties, TReferences]]:
64-
self.__iter_object_cache = []
65+
self.__iter_object_cache = deque()
6566
self.__iter_object_last_uuid = _parse_after(self.__inputs.after)
6667
return self
6768

@@ -75,11 +76,11 @@ def __next__(self) -> Object[TProperties, TReferences]:
7576
return_properties=self.__inputs.return_properties,
7677
return_references=self.__inputs.return_references,
7778
)
78-
self.__iter_object_cache = res.objects # type: ignore
79+
self.__iter_object_cache = deque(res.objects) # type: ignore
7980
if len(self.__iter_object_cache) == 0:
8081
raise StopIteration
8182

82-
ret_object = self.__iter_object_cache.pop(0)
83+
ret_object = self.__iter_object_cache.popleft()
8384
self.__iter_object_last_uuid = ret_object.uuid
8485
assert (
8586
self.__iter_object_last_uuid is not None
@@ -100,14 +101,14 @@ def __init__(
100101
self.__query = query
101102
self.__inputs = inputs
102103

103-
self.__iter_object_cache: List[Object[TProperties, TReferences]] = []
104+
self.__iter_object_cache: deque[Object[TProperties, TReferences]] = deque()
104105
self.__iter_object_last_uuid: Optional[UUID] = _parse_after(self.__inputs.after)
105106
self.__iter_cache_size = cache_size or ITERATOR_CACHE_SIZE
106107

107108
def __aiter__(
108109
self,
109110
) -> AsyncIterator[Object[TProperties, TReferences]]:
110-
self.__iter_object_cache = []
111+
self.__iter_object_cache = deque()
111112
self.__iter_object_last_uuid = _parse_after(self.__inputs.after)
112113
return self
113114

@@ -123,11 +124,11 @@ async def __anext__(
123124
return_properties=self.__inputs.return_properties,
124125
return_references=self.__inputs.return_references,
125126
)
126-
self.__iter_object_cache = res.objects # type: ignore
127+
self.__iter_object_cache = deque(res.objects) # type: ignore
127128
if len(self.__iter_object_cache) == 0:
128129
raise StopAsyncIteration
129130

130-
ret_object = self.__iter_object_cache.pop(0)
131+
ret_object = self.__iter_object_cache.popleft()
131132
self.__iter_object_last_uuid = ret_object.uuid
132133
assert (
133134
self.__iter_object_last_uuid is not None

0 commit comments

Comments
 (0)