Skip to content

Commit d696fb0

Browse files
authored
Merge branch 'main' into feat/metal-simdgroup-kernels
2 parents b441924 + 83c03ed commit d696fb0

56 files changed

Lines changed: 608 additions & 388 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/main.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,7 @@ jobs:
8686
arch_flag: "" # ARM64 uses auto-detection
8787
- os: ubuntu-24.04
8888
platform: linux-x64
89-
# FIXME: ENABLE_ZEN3 is hardcoded for the current GitHub-hosted runner (AMD EPYC 7T83).
90-
# This should be removed once #101 is resolved.
91-
arch_flag: "--config-settings='cmake.define.ENABLE_ZEN3=\"ON\"'"
89+
arch_flag: "" # Use native CPU microarchitecture
9290

9391
steps:
9492
- name: Checkout code
@@ -162,4 +160,4 @@ jobs:
162160
./db-example
163161
./core-example
164162
./ailego-example
165-
shell: bash
163+
shell: bash

CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}")
2828
option(BUILD_TOOLS "Build tools" ON)
2929
message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}")
3030

31+
option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" ON)
32+
message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}")
33+
3134
cc_directory(thirdparty)
3235
cc_directories(src)
3336
cc_directories(tests)
@@ -56,6 +59,6 @@ if(BUILD_PYTHON_BINDINGS)
5659
set(ZVEC_PY_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}")
5760
endif()
5861

59-
MESSAGE(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}")
62+
message(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}")
6063
install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})
61-
endif()
64+
endif()

cmake/option.cmake

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ option(ENABLE_SAPPHIRERAPIDS "Enable Intel Sapphire Rapids Server CPU microarchi
1313
option(ENABLE_EMERALDRAPIDS "Enable Intel Emerald Rapids Server CPU microarchitecture" OFF)
1414
option(ENABLE_GRANITERAPIDS "Enable Intel Granite Rapids Server CPU microarchitecture" OFF)
1515

16+
option(ENABLE_NATIVE "Enable native CPU microarchitecture" ON)
17+
1618
## AMD Microarchitectures
1719
option(ENABLE_ZEN1 "Enable AMD Zen+ Family 17h CPU microarchitecture" OFF)
1820
option(ENABLE_ZEN2 "Enable AMD Zen 2 Family 17h CPU microarchitecture" OFF)
@@ -36,9 +38,10 @@ set(ARCH_OPTIONS
3638
ENABLE_ZEN1 ENABLE_ZEN2 ENABLE_ZEN3
3739
ENABLE_ARMV8A ENABLE_ARMV8.1A ENABLE_ARMV8.2A ENABLE_ARMV8.3A ENABLE_ARMV8.4A
3840
ENABLE_ARMV8.5A ENABLE_ARMV8.6A
41+
ENABLE_NATIVE
3942
)
4043

41-
set(AUTO_DETECT_ARCH ON)
44+
option(AUTO_DETECT_ARCH "Auto detect CPU microarchitecture" ON)
4245
foreach(opt IN LISTS ARCH_OPTIONS)
4346
if(${opt})
4447
set(AUTO_DETECT_ARCH OFF)
@@ -122,8 +125,11 @@ if(MSVC)
122125
return()
123126
endif()
124127

125-
126128
if(NOT AUTO_DETECT_ARCH)
129+
if(ENABLE_NATIVE)
130+
add_arch_flag("-march=native" NATIVE ENABLE_NATIVE)
131+
endif()
132+
127133
if(ENABLE_ZEN3)
128134
add_arch_flag("-march=znver3" ZNVER3 ENABLE_ZEN3)
129135
endif()

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ classifiers = [
2727
"Programming Language :: Python :: 3.10",
2828
"Programming Language :: Python :: 3.11",
2929
"Programming Language :: Python :: 3.12",
30+
"Programming Language :: Python :: 3.13",
31+
"Programming Language :: Python :: 3.14",
3032
"Topic :: Database",
3133
"Topic :: Scientific/Engineering :: Artificial Intelligence",
3234
"Topic :: Software Development :: Libraries :: Python Modules",
@@ -161,6 +163,8 @@ build = [
161163
"cp310-*",
162164
"cp311-*",
163165
"cp312-*",
166+
"cp313-*",
167+
"cp314-*",
164168
]
165169
build-frontend = "build"
166170
test-requires = ["pytest", "numpy"]

python/zvec/extension/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from .bm25_embedding_function import BM25EmbeddingFunction
1717
from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
18+
from .http_embedding_function import HTTPDenseEmbedding
1819
from .jina_embedding_function import JinaDenseEmbedding
1920
from .jina_function import JinaFunctionBase
2021
from .multi_vector_reranker import RrfReRanker, WeightedReRanker
@@ -37,6 +38,7 @@
3738
"DefaultLocalReRanker",
3839
"DefaultLocalSparseEmbedding",
3940
"DenseEmbeddingFunction",
41+
"HTTPDenseEmbedding",
4042
"JinaDenseEmbedding",
4143
"JinaFunctionBase",
4244
"OpenAIDenseEmbedding",
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# Copyright 2025-present the zvec project
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from __future__ import annotations
15+
16+
import json
17+
import os
18+
import urllib.request
19+
from functools import lru_cache
20+
from typing import Optional
21+
22+
from ..common.constants import TEXT, DenseVectorType
23+
from .embedding_function import DenseEmbeddingFunction
24+
25+
26+
class HTTPDenseEmbedding(DenseEmbeddingFunction[TEXT]):
27+
"""Dense text embedding function using any OpenAI-compatible HTTP endpoint.
28+
29+
This class calls any server that implements the ``/v1/embeddings`` API
30+
(LM Studio, Ollama, vLLM, LocalAI, etc.) using only the Python standard
31+
library — no extra dependencies are required.
32+
33+
The embedding dimension is detected automatically from the first server
34+
response.
35+
36+
Args:
37+
base_url (str, optional): Base URL of the embedding server.
38+
Defaults to ``"http://localhost:1234"`` (LM Studio).
39+
Common values:
40+
41+
- ``"http://localhost:1234"`` — LM Studio
42+
- ``"http://localhost:11434"`` — Ollama
43+
model (str, optional): Model identifier as expected by the server.
44+
Defaults to ``"text-embedding-nomic-embed-text-v1.5@f16"``.
45+
api_key (Optional[str], optional): Bearer token for authenticated
46+
endpoints. Falls back to the ``OPENAI_API_KEY`` environment
47+
variable. Leave as ``None`` for local servers that do not
48+
require authentication.
49+
timeout (int, optional): HTTP request timeout in seconds.
50+
Defaults to 30.
51+
52+
Attributes:
53+
dimension (int): Embedding vector dimensionality (auto-detected).
54+
55+
Raises:
56+
TypeError: If ``embed()`` receives a non-string input.
57+
ValueError: If input is empty/whitespace-only or the server returns
58+
an unexpected response format.
59+
RuntimeError: If the HTTP request fails or the server is unreachable.
60+
61+
Examples:
62+
>>> from zvec.extension import HTTPDenseEmbedding
63+
>>>
64+
>>> # LM Studio (default)
65+
>>> emb = HTTPDenseEmbedding()
66+
>>> vector = emb.embed("Hello, world!")
67+
>>> len(vector)
68+
768
69+
>>>
70+
>>> # Ollama
71+
>>> emb = HTTPDenseEmbedding(
72+
... base_url="http://localhost:11434",
73+
... model="nomic-embed-text",
74+
... )
75+
>>> vector = emb.embed("Semantic search with local models")
76+
77+
See Also:
78+
- ``DenseEmbeddingFunction``: Protocol for dense embeddings.
79+
- ``OpenAIDenseEmbedding``: Cloud embedding via the OpenAI API.
80+
"""
81+
82+
ENDPOINT = "/v1/embeddings"
83+
84+
def __init__(
85+
self,
86+
base_url: str = "http://localhost:1234",
87+
model: str = "text-embedding-nomic-embed-text-v1.5@f16",
88+
api_key: Optional[str] = None,
89+
timeout: int = 30,
90+
) -> None:
91+
self._base_url = base_url.rstrip("/")
92+
self._model = model
93+
self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
94+
self._timeout = timeout
95+
self._dimension: Optional[int] = None
96+
97+
@property
98+
def dimension(self) -> int:
99+
"""int: Embedding vector dimensionality (auto-detected on first call)."""
100+
if self._dimension is None:
101+
self._dimension = len(self.embed("dimension probe"))
102+
return self._dimension
103+
104+
def __call__(self, input: TEXT) -> DenseVectorType:
105+
"""Make the embedding function callable."""
106+
return self.embed(input)
107+
108+
@lru_cache(maxsize=256)
109+
def embed(self, input: TEXT) -> DenseVectorType:
110+
"""Generate a dense embedding vector for the input text.
111+
112+
Results are cached (LRU, up to 256 entries) so repeated strings
113+
do not trigger extra HTTP requests.
114+
115+
Args:
116+
input (TEXT): Input text string to embed. Must be non-empty
117+
after stripping whitespace.
118+
119+
Returns:
120+
DenseVectorType: A list of floats representing the embedding.
121+
122+
Raises:
123+
TypeError: If *input* is not a string.
124+
ValueError: If *input* is empty/whitespace-only or the server
125+
returns an unexpected response format.
126+
RuntimeError: If the HTTP request fails.
127+
"""
128+
if not isinstance(input, TEXT):
129+
raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
130+
131+
input = input.strip()
132+
if not input:
133+
raise ValueError("Input text cannot be empty or whitespace only")
134+
135+
url = self._base_url + self.ENDPOINT
136+
payload = json.dumps({"model": self._model, "input": input}).encode()
137+
138+
headers: dict[str, str] = {"Content-Type": "application/json"}
139+
if self._api_key:
140+
headers["Authorization"] = f"Bearer {self._api_key}"
141+
142+
req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
143+
try:
144+
with urllib.request.urlopen(req, timeout=self._timeout) as resp:
145+
body = json.loads(resp.read())
146+
except urllib.error.HTTPError as exc:
147+
raise RuntimeError(
148+
f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}"
149+
) from exc
150+
except OSError as exc:
151+
raise RuntimeError(
152+
f"Could not reach embedding server at {url}: {exc}"
153+
) from exc
154+
155+
try:
156+
vector: list[float] = body["data"][0]["embedding"]
157+
except (KeyError, IndexError) as exc:
158+
raise ValueError(
159+
f"Unexpected response format from embedding server: {body}"
160+
) from exc
161+
162+
return vector

src/ailego/algorithm/lloyd_cluster.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <algorithm>
1818
#include <random>
19+
#include <vector>
1920
#include <ailego/parallel/lock.h>
2021
#include <zvec/ailego/parallel/thread_pool.h>
2122
#include <zvec/ailego/utility/type_helper.h>
@@ -247,7 +248,7 @@ class LloydCluster {
247248
protected:
248249
//! Cluster the cache features
249250
void cluster_cache_features(void) {
250-
float scores[BatchCount];
251+
std::vector<float> scores(BatchCount);
251252

252253
for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) {
253254
size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
@@ -258,7 +259,7 @@ class LloydCluster {
258259
for (size_t j = 0; j != count; j += BatchCount) {
259260
ContextType::template BatchDistance<1>(centroids_matrix_[j], feature,
260261
centroids_matrix_.dimension(),
261-
scores);
262+
scores.data());
262263

263264
for (size_t k = 0; k < BatchCount; ++k) {
264265
if (scores[k] < nearest_score) {
@@ -271,7 +272,7 @@ class LloydCluster {
271272
for (size_t j = count, total = centroids_matrix_.count(); j != total;
272273
++j) {
273274
ContextType::Distance(centroids_matrix_[j], feature,
274-
centroids_matrix_.dimension(), scores);
275+
centroids_matrix_.dimension(), scores.data());
275276

276277
if (scores[0] < nearest_score) {
277278
nearest_score = scores[0];
@@ -295,23 +296,23 @@ class LloydCluster {
295296
return i < j;
296297
};
297298

298-
float nearest_scores[BatchCount];
299-
size_t nearest_indexes[BatchCount];
299+
std::vector<float> nearest_scores(BatchCount);
300+
std::vector<size_t> nearest_indexes(BatchCount);
300301

301302
rows.resize(BatchCount);
302303
for (size_t i = first * BatchCount; i != last * BatchCount;
303304
i += BatchCount) {
304305
size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
305306
const StoreType *block = feature_matrix_[i];
306307

307-
std::fill(nearest_indexes, nearest_indexes + BatchCount, 0);
308-
std::fill(nearest_scores, nearest_scores + BatchCount,
308+
std::fill(nearest_indexes.data(), nearest_indexes.data() + BatchCount, 0);
309+
std::fill(nearest_scores.data(), nearest_scores.data() + BatchCount,
309310
std::numeric_limits<float>::max());
310311

311312
for (size_t j = 0; j != count; j += BatchCount) {
312313
ContextType::template BatchDistance<BatchCount>(
313314
centroids_matrix_[j], block, centroids_matrix_.dimension(),
314-
&scores[0]);
315+
scores.data());
315316

316317
for (size_t k = 0; k < BatchCount; ++k) {
317318
const float *start = &scores[k * BatchCount];
@@ -328,7 +329,7 @@ class LloydCluster {
328329
++j) {
329330
ContextType::template BatchDistance<1>(block, centroids_matrix_[j],
330331
centroids_matrix_.dimension(),
331-
&scores[0]);
332+
scores.data());
332333

333334
for (size_t k = 0; k < BatchCount; ++k) {
334335
float score = scores[k];

0 commit comments

Comments
 (0)