|
| 1 | +# Copyright 2025-present the zvec project |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +from __future__ import annotations |
| 15 | + |
| 16 | +import json |
| 17 | +import os |
| 18 | +import urllib.request |
| 19 | +from functools import lru_cache |
| 20 | +from typing import Optional |
| 21 | + |
| 22 | +from ..common.constants import TEXT, DenseVectorType |
| 23 | +from .embedding_function import DenseEmbeddingFunction |
| 24 | + |
| 25 | + |
| 26 | +class HTTPDenseEmbedding(DenseEmbeddingFunction[TEXT]): |
| 27 | + """Dense text embedding function using any OpenAI-compatible HTTP endpoint. |
| 28 | +
|
| 29 | + This class calls any server that implements the ``/v1/embeddings`` API |
| 30 | + (LM Studio, Ollama, vLLM, LocalAI, etc.) using only the Python standard |
| 31 | + library — no extra dependencies are required. |
| 32 | +
|
| 33 | + The embedding dimension is detected automatically from the first server |
| 34 | + response. |
| 35 | +
|
| 36 | + Args: |
| 37 | + base_url (str, optional): Base URL of the embedding server. |
| 38 | + Defaults to ``"http://localhost:1234"`` (LM Studio). |
| 39 | + Common values: |
| 40 | +
|
| 41 | + - ``"http://localhost:1234"`` — LM Studio |
| 42 | + - ``"http://localhost:11434"`` — Ollama |
| 43 | + model (str, optional): Model identifier as expected by the server. |
| 44 | + Defaults to ``"text-embedding-nomic-embed-text-v1.5@f16"``. |
| 45 | + api_key (Optional[str], optional): Bearer token for authenticated |
| 46 | + endpoints. Falls back to the ``OPENAI_API_KEY`` environment |
| 47 | + variable. Leave as ``None`` for local servers that do not |
| 48 | + require authentication. |
| 49 | + timeout (int, optional): HTTP request timeout in seconds. |
| 50 | + Defaults to 30. |
| 51 | +
|
| 52 | + Attributes: |
| 53 | + dimension (int): Embedding vector dimensionality (auto-detected). |
| 54 | +
|
| 55 | + Raises: |
| 56 | + TypeError: If ``embed()`` receives a non-string input. |
| 57 | + ValueError: If input is empty/whitespace-only or the server returns |
| 58 | + an unexpected response format. |
| 59 | + RuntimeError: If the HTTP request fails or the server is unreachable. |
| 60 | +
|
| 61 | + Examples: |
| 62 | + >>> from zvec.extension import HTTPDenseEmbedding |
| 63 | + >>> |
| 64 | + >>> # LM Studio (default) |
| 65 | + >>> emb = HTTPDenseEmbedding() |
| 66 | + >>> vector = emb.embed("Hello, world!") |
| 67 | + >>> len(vector) |
| 68 | + 768 |
| 69 | + >>> |
| 70 | + >>> # Ollama |
| 71 | + >>> emb = HTTPDenseEmbedding( |
| 72 | + ... base_url="http://localhost:11434", |
| 73 | + ... model="nomic-embed-text", |
| 74 | + ... ) |
| 75 | + >>> vector = emb.embed("Semantic search with local models") |
| 76 | +
|
| 77 | + See Also: |
| 78 | + - ``DenseEmbeddingFunction``: Protocol for dense embeddings. |
| 79 | + - ``OpenAIDenseEmbedding``: Cloud embedding via the OpenAI API. |
| 80 | + """ |
| 81 | + |
| 82 | + ENDPOINT = "/v1/embeddings" |
| 83 | + |
| 84 | + def __init__( |
| 85 | + self, |
| 86 | + base_url: str = "http://localhost:1234", |
| 87 | + model: str = "text-embedding-nomic-embed-text-v1.5@f16", |
| 88 | + api_key: Optional[str] = None, |
| 89 | + timeout: int = 30, |
| 90 | + ) -> None: |
| 91 | + self._base_url = base_url.rstrip("/") |
| 92 | + self._model = model |
| 93 | + self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "") |
| 94 | + self._timeout = timeout |
| 95 | + self._dimension: Optional[int] = None |
| 96 | + |
| 97 | + @property |
| 98 | + def dimension(self) -> int: |
| 99 | + """int: Embedding vector dimensionality (auto-detected on first call).""" |
| 100 | + if self._dimension is None: |
| 101 | + self._dimension = len(self.embed("dimension probe")) |
| 102 | + return self._dimension |
| 103 | + |
| 104 | + def __call__(self, input: TEXT) -> DenseVectorType: |
| 105 | + """Make the embedding function callable.""" |
| 106 | + return self.embed(input) |
| 107 | + |
| 108 | + @lru_cache(maxsize=256) |
| 109 | + def embed(self, input: TEXT) -> DenseVectorType: |
| 110 | + """Generate a dense embedding vector for the input text. |
| 111 | +
|
| 112 | + Results are cached (LRU, up to 256 entries) so repeated strings |
| 113 | + do not trigger extra HTTP requests. |
| 114 | +
|
| 115 | + Args: |
| 116 | + input (TEXT): Input text string to embed. Must be non-empty |
| 117 | + after stripping whitespace. |
| 118 | +
|
| 119 | + Returns: |
| 120 | + DenseVectorType: A list of floats representing the embedding. |
| 121 | +
|
| 122 | + Raises: |
| 123 | + TypeError: If *input* is not a string. |
| 124 | + ValueError: If *input* is empty/whitespace-only or the server |
| 125 | + returns an unexpected response format. |
| 126 | + RuntimeError: If the HTTP request fails. |
| 127 | + """ |
| 128 | + if not isinstance(input, TEXT): |
| 129 | + raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}") |
| 130 | + |
| 131 | + input = input.strip() |
| 132 | + if not input: |
| 133 | + raise ValueError("Input text cannot be empty or whitespace only") |
| 134 | + |
| 135 | + url = self._base_url + self.ENDPOINT |
| 136 | + payload = json.dumps({"model": self._model, "input": input}).encode() |
| 137 | + |
| 138 | + headers: dict[str, str] = {"Content-Type": "application/json"} |
| 139 | + if self._api_key: |
| 140 | + headers["Authorization"] = f"Bearer {self._api_key}" |
| 141 | + |
| 142 | + req = urllib.request.Request(url, data=payload, headers=headers, method="POST") |
| 143 | + try: |
| 144 | + with urllib.request.urlopen(req, timeout=self._timeout) as resp: |
| 145 | + body = json.loads(resp.read()) |
| 146 | + except urllib.error.HTTPError as exc: |
| 147 | + raise RuntimeError( |
| 148 | + f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}" |
| 149 | + ) from exc |
| 150 | + except OSError as exc: |
| 151 | + raise RuntimeError( |
| 152 | + f"Could not reach embedding server at {url}: {exc}" |
| 153 | + ) from exc |
| 154 | + |
| 155 | + try: |
| 156 | + vector: list[float] = body["data"][0]["embedding"] |
| 157 | + except (KeyError, IndexError) as exc: |
| 158 | + raise ValueError( |
| 159 | + f"Unexpected response format from embedding server: {body}" |
| 160 | + ) from exc |
| 161 | + |
| 162 | + return vector |
0 commit comments