alibaba
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 2 additions & 4 deletions b/‎.github/workflows/main.yml‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 5 additions & 2 deletions b/‎CMakeLists.txt‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎cmake/option.cmake‎
Lines changed: 8 additions & 2 deletions b/‎cmake/option.cmake‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎python/zvec/extension/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎python/zvec/extension/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/zvec/extension/http_embedding_function.py‎
Lines changed: 162 additions & 0 deletions b/‎python/zvec/extension/http_embedding_function.py‎
Lines changed: 162 additions & 0 deletions
diff --git a/‎src/ailego/algorithm/lloyd_cluster.h‎
Lines changed: 10 additions & 9 deletions b/‎src/ailego/algorithm/lloyd_cluster.h‎
Lines changed: 10 additions & 9 deletions
@@ -86,9 +86,7 @@ jobs:
             arch_flag: ""  # ARM64 uses auto-detection
           - os: ubuntu-24.04
             platform: linux-x64
-            # FIXME: ENABLE_ZEN3 is hardcoded for the current GitHub-hosted runner (AMD EPYC 7T83).
-            # This should be removed once #101 is resolved.
-            arch_flag: "--config-settings='cmake.define.ENABLE_ZEN3=\"ON\"'"
+            arch_flag: ""  # Use native CPU microarchitecture
 
     steps:
       - name: Checkout code
@@ -162,4 +160,4 @@ jobs:
           ./db-example
           ./core-example
           ./ailego-example
-        shell: bash
+        shell: bash
@@ -28,6 +28,9 @@ message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}")
 option(BUILD_TOOLS "Build tools" ON)
 message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}")
 
+option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" ON)
+message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}")
+
 cc_directory(thirdparty)
 cc_directories(src)
 cc_directories(tests)
@@ -56,6 +59,6 @@ if(BUILD_PYTHON_BINDINGS)
         set(ZVEC_PY_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}")
     endif()
 
-    MESSAGE(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}")
+    message(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}")
     install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})
-endif()
+endif()
@@ -13,6 +13,8 @@ option(ENABLE_SAPPHIRERAPIDS "Enable Intel Sapphire Rapids Server CPU microarchi
 option(ENABLE_EMERALDRAPIDS "Enable Intel Emerald Rapids Server CPU microarchitecture" OFF)
 option(ENABLE_GRANITERAPIDS "Enable Intel Granite Rapids Server CPU microarchitecture" OFF)
 
+option(ENABLE_NATIVE "Enable native CPU microarchitecture" ON)
+
 ## AMD Microarchitectures
 option(ENABLE_ZEN1 "Enable AMD Zen+ Family 17h CPU microarchitecture" OFF)
 option(ENABLE_ZEN2 "Enable AMD Zen 2 Family 17h CPU microarchitecture" OFF)
@@ -36,9 +38,10 @@ set(ARCH_OPTIONS
   ENABLE_ZEN1 ENABLE_ZEN2 ENABLE_ZEN3
   ENABLE_ARMV8A ENABLE_ARMV8.1A ENABLE_ARMV8.2A ENABLE_ARMV8.3A ENABLE_ARMV8.4A
   ENABLE_ARMV8.5A ENABLE_ARMV8.6A
+  ENABLE_NATIVE
 )
 
-set(AUTO_DETECT_ARCH ON)
+option(AUTO_DETECT_ARCH "Auto detect CPU microarchitecture" ON)
 foreach(opt IN LISTS ARCH_OPTIONS)
   if(${opt})
     set(AUTO_DETECT_ARCH OFF)
@@ -122,8 +125,11 @@ if(MSVC)
   return()
 endif()
 
-
 if(NOT AUTO_DETECT_ARCH)
+  if(ENABLE_NATIVE)
+    add_arch_flag("-march=native" NATIVE ENABLE_NATIVE)
+  endif()
+
   if(ENABLE_ZEN3)
     add_arch_flag("-march=znver3" ZNVER3 ENABLE_ZEN3)
   endif()
 
@@ -27,6 +27,8 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Topic :: Database",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Software Development :: Libraries :: Python Modules",
@@ -161,6 +163,8 @@ build = [
     "cp310-*",
     "cp311-*",
     "cp312-*",
+    "cp313-*",
+    "cp314-*",
 ]
 build-frontend = "build"
 test-requires = ["pytest", "numpy"]
 
@@ -15,6 +15,7 @@
 
 from .bm25_embedding_function import BM25EmbeddingFunction
 from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
+from .http_embedding_function import HTTPDenseEmbedding
 from .jina_embedding_function import JinaDenseEmbedding
 from .jina_function import JinaFunctionBase
 from .multi_vector_reranker import RrfReRanker, WeightedReRanker
@@ -37,6 +38,7 @@
     "DefaultLocalReRanker",
     "DefaultLocalSparseEmbedding",
     "DenseEmbeddingFunction",
+    "HTTPDenseEmbedding",
     "JinaDenseEmbedding",
     "JinaFunctionBase",
     "OpenAIDenseEmbedding",
 
@@ -0,0 +1,162 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import json
+import os
+import urllib.request
+from functools import lru_cache
+from typing import Optional
+
+from ..common.constants import TEXT, DenseVectorType
+from .embedding_function import DenseEmbeddingFunction
+
+
+class HTTPDenseEmbedding(DenseEmbeddingFunction[TEXT]):
+    """Dense text embedding function using any OpenAI-compatible HTTP endpoint.
+
+    This class calls any server that implements the ``/v1/embeddings`` API
+    (LM Studio, Ollama, vLLM, LocalAI, etc.) using only the Python standard
+    library — no extra dependencies are required.
+
+    The embedding dimension is detected automatically from the first server
+    response.
+
+    Args:
+        base_url (str, optional): Base URL of the embedding server.
+            Defaults to ``"http://localhost:1234"`` (LM Studio).
+            Common values:
+
+            - ``"http://localhost:1234"``  — LM Studio
+            - ``"http://localhost:11434"`` — Ollama
+        model (str, optional): Model identifier as expected by the server.
+            Defaults to ``"text-embedding-nomic-embed-text-v1.5@f16"``.
+        api_key (Optional[str], optional): Bearer token for authenticated
+            endpoints.  Falls back to the ``OPENAI_API_KEY`` environment
+            variable.  Leave as ``None`` for local servers that do not
+            require authentication.
+        timeout (int, optional): HTTP request timeout in seconds.
+            Defaults to 30.
+
+    Attributes:
+        dimension (int): Embedding vector dimensionality (auto-detected).
+
+    Raises:
+        TypeError: If ``embed()`` receives a non-string input.
+        ValueError: If input is empty/whitespace-only or the server returns
+            an unexpected response format.
+        RuntimeError: If the HTTP request fails or the server is unreachable.
+
+    Examples:
+        >>> from zvec.extension import HTTPDenseEmbedding
+        >>>
+        >>> # LM Studio (default)
+        >>> emb = HTTPDenseEmbedding()
+        >>> vector = emb.embed("Hello, world!")
+        >>> len(vector)
+        768
+        >>>
+        >>> # Ollama
+        >>> emb = HTTPDenseEmbedding(
+        ...     base_url="http://localhost:11434",
+        ...     model="nomic-embed-text",
+        ... )
+        >>> vector = emb.embed("Semantic search with local models")
+
+    See Also:
+        - ``DenseEmbeddingFunction``: Protocol for dense embeddings.
+        - ``OpenAIDenseEmbedding``: Cloud embedding via the OpenAI API.
+    """
+
+    ENDPOINT = "/v1/embeddings"
+
+    def __init__(
+        self,
+        base_url: str = "http://localhost:1234",
+        model: str = "text-embedding-nomic-embed-text-v1.5@f16",
+        api_key: Optional[str] = None,
+        timeout: int = 30,
+    ) -> None:
+        self._base_url = base_url.rstrip("/")
+        self._model = model
+        self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
+        self._timeout = timeout
+        self._dimension: Optional[int] = None
+
+    @property
+    def dimension(self) -> int:
+        """int: Embedding vector dimensionality (auto-detected on first call)."""
+        if self._dimension is None:
+            self._dimension = len(self.embed("dimension probe"))
+        return self._dimension
+
+    def __call__(self, input: TEXT) -> DenseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    @lru_cache(maxsize=256)
+    def embed(self, input: TEXT) -> DenseVectorType:
+        """Generate a dense embedding vector for the input text.
+
+        Results are cached (LRU, up to 256 entries) so repeated strings
+        do not trigger extra HTTP requests.
+
+        Args:
+            input (TEXT): Input text string to embed.  Must be non-empty
+                after stripping whitespace.
+
+        Returns:
+            DenseVectorType: A list of floats representing the embedding.
+
+        Raises:
+            TypeError: If *input* is not a string.
+            ValueError: If *input* is empty/whitespace-only or the server
+                returns an unexpected response format.
+            RuntimeError: If the HTTP request fails.
+        """
+        if not isinstance(input, TEXT):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        url = self._base_url + self.ENDPOINT
+        payload = json.dumps({"model": self._model, "input": input}).encode()
+
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self._api_key:
+            headers["Authorization"] = f"Bearer {self._api_key}"
+
+        req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
+        try:
+            with urllib.request.urlopen(req, timeout=self._timeout) as resp:
+                body = json.loads(resp.read())
+        except urllib.error.HTTPError as exc:
+            raise RuntimeError(
+                f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}"
+            ) from exc
+        except OSError as exc:
+            raise RuntimeError(
+                f"Could not reach embedding server at {url}: {exc}"
+            ) from exc
+
+        try:
+            vector: list[float] = body["data"][0]["embedding"]
+        except (KeyError, IndexError) as exc:
+            raise ValueError(
+                f"Unexpected response format from embedding server: {body}"
+            ) from exc
+
+        return vector
@@ -16,6 +16,7 @@
 
 #include <algorithm>
 #include <random>
+#include <vector>
 #include <ailego/parallel/lock.h>
 #include <zvec/ailego/parallel/thread_pool.h>
 #include <zvec/ailego/utility/type_helper.h>
@@ -247,7 +248,7 @@ class LloydCluster {
  protected:
   //! Cluster the cache features
   void cluster_cache_features(void) {
-    float scores[BatchCount];
+    std::vector<float> scores(BatchCount);
 
     for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) {
       size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
@@ -258,7 +259,7 @@ class LloydCluster {
       for (size_t j = 0; j != count; j += BatchCount) {
         ContextType::template BatchDistance<1>(centroids_matrix_[j], feature,
                                                centroids_matrix_.dimension(),
-                                               scores);
+                                               scores.data());
 
         for (size_t k = 0; k < BatchCount; ++k) {
           if (scores[k] < nearest_score) {
@@ -271,7 +272,7 @@ class LloydCluster {
       for (size_t j = count, total = centroids_matrix_.count(); j != total;
            ++j) {
         ContextType::Distance(centroids_matrix_[j], feature,
-                              centroids_matrix_.dimension(), scores);
+                              centroids_matrix_.dimension(), scores.data());
 
         if (scores[0] < nearest_score) {
           nearest_score = scores[0];
@@ -295,23 +296,23 @@ class LloydCluster {
       return i < j;
     };
 
-    float nearest_scores[BatchCount];
-    size_t nearest_indexes[BatchCount];
+    std::vector<float> nearest_scores(BatchCount);
+    std::vector<size_t> nearest_indexes(BatchCount);
 
     rows.resize(BatchCount);
     for (size_t i = first * BatchCount; i != last * BatchCount;
          i += BatchCount) {
       size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
       const StoreType *block = feature_matrix_[i];
 
-      std::fill(nearest_indexes, nearest_indexes + BatchCount, 0);
-      std::fill(nearest_scores, nearest_scores + BatchCount,
+      std::fill(nearest_indexes.data(), nearest_indexes.data() + BatchCount, 0);
+      std::fill(nearest_scores.data(), nearest_scores.data() + BatchCount,
                 std::numeric_limits<float>::max());
 
       for (size_t j = 0; j != count; j += BatchCount) {
         ContextType::template BatchDistance<BatchCount>(
             centroids_matrix_[j], block, centroids_matrix_.dimension(),
-            &scores[0]);
+            scores.data());
 
         for (size_t k = 0; k < BatchCount; ++k) {
           const float *start = &scores[k * BatchCount];
@@ -328,7 +329,7 @@ class LloydCluster {
            ++j) {
         ContextType::template BatchDistance<1>(block, centroids_matrix_[j],
                                                centroids_matrix_.dimension(),
-                                               &scores[0]);
+                                               scores.data());
 
         for (size_t k = 0; k < BatchCount; ++k) {
           float score = scores[k];