Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ build/
venv/
.venv/
.idea/
results/
logs/

# Worktrees
Expand Down
2 changes: 1 addition & 1 deletion install/requirements_py3.11.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ psutil
polars
plotly
environs
pydantic<v2
pydantic>=2.0,<3
scikit-learn
pymilvus
clickhouse_connect
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
"polars",
"plotly",
"environs",
"pydantic<v2",
"pydantic>=2.0,<3",
"scikit-learn",
"pymilvus", # with pandas, numpy
"hdrhistogram>=0.10.1",
Expand Down
4 changes: 2 additions & 2 deletions vectordb_bench/backend/clients/alisql/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def parse_metric(self) -> str:


class AliSQLHNSWConfig(AliSQLIndexConfig, DBCaseConfig):
M: int | None
ef_search: int | None
M: int | None = None
ef_search: int | None = None
index: IndexType = IndexType.HNSW

def index_param(self) -> dict:
Expand Down
22 changes: 11 additions & 11 deletions vectordb_bench/backend/clients/alloydb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ class AlloyDBIndexParam(TypedDict):
metric: str
index_type: str
index_creation_with_options: Sequence[dict[str, Any]]
maintenance_work_mem: str | None
max_parallel_workers: int | None
maintenance_work_mem: str | None = None
max_parallel_workers: int | None = None


class AlloyDBSearchParam(TypedDict):
Expand Down Expand Up @@ -120,15 +120,15 @@ def _optionally_build_set_options(

class AlloyDBScaNNConfig(AlloyDBIndexConfig):
index: IndexType = IndexType.SCANN
num_leaves: int | None
quantizer: str | None
enable_pca: str | None
max_num_levels: int | None
num_leaves_to_search: int | None
max_top_neighbors_buffer_size: int | None
pre_reordering_num_neighbors: int | None
num_search_threads: int | None
max_num_prefetch_datasets: int | None
num_leaves: int | None = None
quantizer: str | None = None
enable_pca: str | None = None
max_num_levels: int | None = None
num_leaves_to_search: int | None = None
max_top_neighbors_buffer_size: int | None = None
pre_reordering_num_neighbors: int | None = None
num_search_threads: int | None = None
max_num_prefetch_datasets: int | None = None
maintenance_work_mem: str | None = None
max_parallel_workers: int | None = None

Expand Down
24 changes: 16 additions & 8 deletions vectordb_bench/backend/clients/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from contextlib import contextmanager
from enum import StrEnum

from pydantic import BaseModel, SecretStr, validator
from pydantic import BaseModel, model_validator

from vectordb_bench.backend.filter import Filter, FilterOp

Expand Down Expand Up @@ -43,6 +43,9 @@ class IndexType(StrEnum):
GPU_CAGRA = "GPU_CAGRA"
SCANN = "scann"
SCANN_MILVUS = "SCANN_MILVUS"
SVS_VAMANA = "SVS_VAMANA"
SVS_VAMANA_LVQ = "SVS_VAMANA_LVQ"
SVS_VAMANA_LEANVEC = "SVS_VAMANA_LEANVEC"
Hologres_HGraph = "HGraph"
Hologres_Graph = "Graph"
NONE = "NONE"
Expand Down Expand Up @@ -90,13 +93,18 @@ def common_long_configs() -> list[str]:
def to_dict(self) -> dict:
raise NotImplementedError

@validator("*")
def not_empty_field(cls, v: any, field: any):
if field.name in cls.common_short_configs() or field.name in cls.common_long_configs():
return v
if not v and isinstance(v, str | SecretStr):
raise ValueError("Empty string!")
return v
@model_validator(mode="before")
@classmethod
def not_empty_field(cls, data: any) -> any:
if not isinstance(data, dict):
return data
skip = set(cls.common_short_configs()) | set(cls.common_long_configs())
for field_name, v in data.items():
if field_name in skip:
continue
if isinstance(v, str) and not v:
raise ValueError("Empty string!")
return data


class DBCaseConfig(ABC):
Expand Down
25 changes: 13 additions & 12 deletions vectordb_bench/backend/clients/aws_opensearch/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from enum import Enum

from pydantic import BaseModel, SecretStr, validator
from pydantic import BaseModel, SecretStr, model_validator

from ..api import DBCaseConfig, DBConfig, MetricType

Expand Down Expand Up @@ -32,17 +32,18 @@ def to_dict(self) -> dict:
"timeout": 600,
}

@validator("*")
def not_empty_field(cls, v: any, field: any):
if (
field.name in cls.common_short_configs()
or field.name in cls.common_long_configs()
or field.name in ["user", "password", "host"]
):
return v
if isinstance(v, str | SecretStr) and len(v) == 0:
raise ValueError("Empty string!")
return v
@model_validator(mode="before")
@classmethod
def not_empty_field(cls, data: any) -> any:
if not isinstance(data, dict):
return data
skip = set(cls.common_short_configs()) | set(cls.common_long_configs()) | {"user", "password", "host"}
for field_name, v in data.items():
if field_name in skip:
continue
if isinstance(v, str) and not v:
raise ValueError("Empty string!")
return data


class AWSOS_Engine(Enum):
Expand Down
2 changes: 1 addition & 1 deletion vectordb_bench/backend/clients/chroma/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class ChromaConfig(DBConfig):
user: str | None = None
password: SecretStr | None
password: SecretStr | None = None
host: SecretStr = "localhost"
port: int = 8000

Expand Down
8 changes: 4 additions & 4 deletions vectordb_bench/backend/clients/cockroachdb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@ class CockroachDBIndexParam(TypedDict):

metric: str
index_creation_with_options: Sequence[dict[str, Any]]
min_partition_size: int | None
max_partition_size: int | None
build_beam_size: int | None
min_partition_size: int | None = None
max_partition_size: int | None = None
build_beam_size: int | None = None


class CockroachDBSearchParam(TypedDict):
"""Search parameters for CockroachDB vector queries."""

metric_fun_op: LiteralString
vector_search_beam_size: int | None
vector_search_beam_size: int | None = None


class CockroachDBSessionCommands(TypedDict):
Expand Down
9 changes: 5 additions & 4 deletions vectordb_bench/backend/clients/doris/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from pydantic import BaseModel, SecretStr, validator
from pydantic import BaseModel, SecretStr, model_validator

from ..api import DBCaseConfig, DBConfig, MetricType

Expand All @@ -17,9 +17,10 @@ class DorisConfig(DBConfig):
db_name: str = "test"
ssl: bool = False

@validator("*")
def not_empty_field(cls, v: any, field: any):
return v
@model_validator(mode="before")
@classmethod
def not_empty_field(cls, data: any) -> any:
return data

def to_dict(self) -> dict:
pwd_str = self.password.get_secret_value()
Expand Down
28 changes: 14 additions & 14 deletions vectordb_bench/backend/clients/lindorm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def parse_metric(self) -> str:

class HNSWConfig(LindormIndexConfig, DBCaseConfig):
index: IndexType = IndexType.HNSW
M: int | None
efConstruction: int | None
efSearch: int | None
M: int | None = None
efConstruction: int | None = None
efSearch: int | None = None
filter_type: str | None = "efficient_filter"
k_expand_scope: int | None = 1000

Expand All @@ -72,12 +72,12 @@ def search_param(self, do_filter: bool = False) -> dict:
# first layer searching for cluster centroids is hnsw
class IVFPQConfig(LindormIndexConfig, DBCaseConfig):
index: IndexType = IndexType.IVFPQ
nlist: int | None
nprobe: int | None
nlist: int | None = None
nprobe: int | None = None
# search parameters
centroids_hnsw_M: int | None
centroids_hnsw_efConstruction: int | None
centroids_hnsw_efSearch: int | None
centroids_hnsw_M: int | None = None
centroids_hnsw_efConstruction: int | None = None
centroids_hnsw_efSearch: int | None = None
filter_type: str | None = "efficient_filter"

reorder_factor: int | None = 10
Expand Down Expand Up @@ -116,13 +116,13 @@ def search_param(self, do_filter: bool = False) -> dict:

class IVFBQConfig(LindormIndexConfig, DBCaseConfig):
index: IndexType = IndexType.IVFBQ
nlist: int | None
exbits: int | None
nprobe: int | None
nlist: int | None = None
exbits: int | None = None
nprobe: int | None = None
# search parameters
centroids_hnsw_M: int | None
centroids_hnsw_efConstruction: int | None
centroids_hnsw_efSearch: int | None
centroids_hnsw_M: int | None = None
centroids_hnsw_efConstruction: int | None = None
centroids_hnsw_efSearch: int | None = None
filter_type: str | None = "efficient_filter"

reorder_factor: int | None = 10
Expand Down
6 changes: 3 additions & 3 deletions vectordb_bench/backend/clients/mariadb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ def parse_metric(self) -> str:


class MariaDBHNSWConfig(MariaDBIndexConfig, DBCaseConfig):
M: int | None
ef_search: int | None
M: int | None = None
ef_search: int | None = None
index: IndexType = IndexType.HNSW
storage_engine: str = "InnoDB"
max_cache_size: int | None
max_cache_size: int | None = None

def index_param(self) -> dict:
return {
Expand Down
Loading
Loading