Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ All the database client supported

| Optional database client | install command |
|--------------------------|---------------------------------------------|
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
| pymilvus, zilliz_cloud (*default*) , aliyunmilvus | `pip install vectordb-bench` |
| qdrant | `pip install vectordb-bench[qdrant]` |
| pinecone | `pip install vectordb-bench[pinecone]` |
| weaviate | `pip install vectordb-bench[weaviate]` |
Expand Down
183 changes: 183 additions & 0 deletions tests/test_aliyun_milvus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import pytest
from click.testing import CliRunner

from vectordb_bench.backend.clients import DB
from vectordb_bench.backend.clients.aliyun_milvus import cli as aliyun_milvus_cli
from vectordb_bench.backend.clients.api import IndexType
from vectordb_bench.backend.clients.milvus.config import MilvusConfig
from vectordb_bench.backend.clients.milvus.milvus import Milvus
from vectordb_bench.frontend.config.dbCaseConfigs import (
AliyunMilvusLoadConfig,
AliyunMilvusPerformanceConfig,
)
from vectordb_bench.models import CaseConfigParamType


def _diskann_config_cls():
return DB.AliyunMilvus.case_config_cls(index_type=IndexType.DISKANN)


def test_aliyun_milvus_reuses_upstream_milvus_client_and_config():
"""AliyunMilvus is minimal: it reuses the upstream Milvus client and config."""
assert DB.AliyunMilvus.init_cls is Milvus
assert DB.AliyunMilvus.config_cls is MilvusConfig


def test_aliyun_milvus_search_params_opt_in_by_default():
"""With none of the three knobs set, only search_list is sent (same as Milvus DISKANN)."""
case_config = _diskann_config_cls()(search_list=200)

assert case_config.search_param()["params"] == {"search_list": 200}


def test_aliyun_milvus_search_params_injected_when_set():
case_config = _diskann_config_cls()(
search_list=200,
rerank_topk_multiplier=0,
early_termination_threshold=0,
cross_segment_rerank=True,
)

assert case_config.search_param()["params"] == {
"search_list": 200,
"rerank_topk_multiplier": 0,
"early_termination_threshold": 0,
"cross_segment_rerank": True,
}


def test_aliyun_milvus_zero_is_a_meaningful_value_not_unset():
"""0 is a real value (e.g. disables rerank reads) and must be sent."""
case_config = _diskann_config_cls()(search_list=200, rerank_topk_multiplier=0)

assert case_config.search_param()["params"]["rerank_topk_multiplier"] == 0


@pytest.mark.parametrize(
("raw", "expected"),
[
("true", True),
("True", True),
("1", True),
("yes", True),
("on", True),
("false", False),
("0", False),
("anything", False),
(1, True),
(0, False),
],
)
def test_aliyun_milvus_cross_segment_rerank_normalizes_non_bool_inputs(raw, expected):
"""CLI/UI may pass strings or ints for the bool knob; coerce them consistently."""
case_config = _diskann_config_cls()(search_list=200, cross_segment_rerank=raw)

assert case_config.cross_segment_rerank is expected
assert case_config.search_param()["params"]["cross_segment_rerank"] is expected


def test_aliyun_milvus_ui_sentinels_normalize_to_unset():
"""UI 'unset' sentinels (-1 for numbers, 'DEFAULT' for the bool) -> None -> omitted."""
case_config = _diskann_config_cls()(
search_list=200,
rerank_topk_multiplier=-1,
early_termination_threshold=-1,
cross_segment_rerank="DEFAULT",
)

assert case_config.search_param()["params"] == {"search_list": 200}


def test_aliyun_milvus_frontend_exposes_search_params_in_performance_only():
"""The three knobs are search-time only: shown in Performance, not in Load."""
load_labels = [config.label for config in AliyunMilvusLoadConfig]
performance_labels = [config.label for config in AliyunMilvusPerformanceConfig]

for label in (
CaseConfigParamType.rerank_topk_multiplier,
CaseConfigParamType.early_termination_threshold,
CaseConfigParamType.cross_segment_rerank,
):
assert label in performance_labels
assert label not in load_labels


def test_aliyun_milvus_load_config_has_no_search_list():
"""search_list is a query-time param, must not appear in load config."""
load_labels = [config.label for config in AliyunMilvusLoadConfig]
assert CaseConfigParamType.SearchList not in load_labels


def test_aliyun_milvus_only_diskann_supported():
"""Non-DISKANN index types should not silently fall back to upstream Milvus."""
assert DB.AliyunMilvus.case_config_cls(index_type=IndexType.DISKANN) is not None
assert DB.AliyunMilvus.case_config_cls(index_type=IndexType.HNSW) is None
assert DB.AliyunMilvus.case_config_cls(index_type=IndexType.IVFFlat) is None


def test_aliyun_milvus_cli_omitting_knobs_sends_only_search_list(monkeypatch: pytest.MonkeyPatch):
captured = {}

monkeypatch.setattr(aliyun_milvus_cli, "run", lambda **kwargs: captured.update(kwargs))

runner = CliRunner()
result = runner.invoke(
aliyun_milvus_cli.AliyunMilvusDISKANN,
["--uri", "http://localhost:19530", "--search-list", "200"],
)

assert result.exit_code == 0, result.output
case_config = captured["db_case_config"]
assert case_config.rerank_topk_multiplier is None
assert case_config.early_termination_threshold is None
assert case_config.cross_segment_rerank is None
assert case_config.search_param()["params"] == {"search_list": 200}


def test_aliyun_milvus_cli_passes_knobs_when_specified(monkeypatch: pytest.MonkeyPatch):
captured = {}

monkeypatch.setattr(aliyun_milvus_cli, "run", lambda **kwargs: captured.update(kwargs))

runner = CliRunner()
result = runner.invoke(
aliyun_milvus_cli.AliyunMilvusDISKANN,
[
"--uri", "http://localhost:19530",
"--search-list", "200",
"--rerank-topk-multiplier", "0",
"--early-termination-threshold", "0",
"--cross-segment-rerank",
],
)

assert result.exit_code == 0, result.output
case_config = captured["db_case_config"]
assert case_config.rerank_topk_multiplier == 0
assert case_config.early_termination_threshold == 0
assert case_config.cross_segment_rerank is True


def test_aliyun_milvus_cli_no_cross_segment_rerank_sends_false(monkeypatch: pytest.MonkeyPatch):
"""--no-cross-segment-rerank explicitly disables it: send False, not omit."""
captured = {}

monkeypatch.setattr(aliyun_milvus_cli, "run", lambda **kwargs: captured.update(kwargs))

runner = CliRunner()
result = runner.invoke(
aliyun_milvus_cli.AliyunMilvusDISKANN,
[
"--uri", "http://localhost:19530",
"--search-list", "200",
"--no-cross-segment-rerank",
],
)

assert result.exit_code == 0, result.output
case_config = captured["db_case_config"]
assert case_config.cross_segment_rerank is False
assert case_config.search_param()["params"] == {
"search_list": 200,
"cross_segment_rerank": False,
}
18 changes: 18 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DB(Enum):
PolarDB = "PolarDB"
Pinot = "Pinot"
SeekDB = "SeekDB"
AliyunMilvus = "AliyunMilvus"

@property
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
Expand Down Expand Up @@ -269,6 +270,13 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDB

if self == DB.AliyunMilvus:
# AliyunMilvus reuses the upstream Milvus client; only the DISKANN
# case config differs (three opt-in search params).
from .milvus.milvus import Milvus

return Milvus

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -477,6 +485,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDBConfig

if self == DB.AliyunMilvus:
from .milvus.config import MilvusConfig

return MilvusConfig

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -667,6 +680,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915

return _seekdb_case_config.get(index_type)

if self == DB.AliyunMilvus:
from .aliyun_milvus.config import _aliyun_milvus_case_config

return _aliyun_milvus_case_config.get(index_type)

# DB.Pinecone, DB.Redis
return EmptyDBCaseConfig

Expand Down
82 changes: 82 additions & 0 deletions vectordb_bench/backend/clients/aliyun_milvus/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from typing import Annotated, Unpack

import click
from pydantic import SecretStr

from vectordb_bench.backend.clients import DB
from vectordb_bench.cli.cli import (
cli,
click_parameter_decorators_from_typed_dict,
run,
)

from ..milvus.cli import MilvusDISKANNTypedDict, _with_partition_key

DBTYPE = DB.AliyunMilvus


class AliyunMilvusDISKANNTypedDict(MilvusDISKANNTypedDict):
"""Same as Milvus DISKANN, plus three opt-in search-time params.

Each of the three options defaults to "not specified" (omit -> not sent).
"""

rerank_topk_multiplier: Annotated[
int | None,
click.option(
"--rerank-topk-multiplier",
type=int,
help="Search param: topk multiplier for rerank budget (0 disables rerank read). Omit to not send it.",
required=False,
default=None,
),
]
early_termination_threshold: Annotated[
int | None,
click.option(
"--early-termination-threshold",
type=int,
help="Search param: early termination threshold (0 disables). Omit to not send it.",
required=False,
default=None,
),
]
cross_segment_rerank: Annotated[
bool | None,
click.option(
"--cross-segment-rerank/--no-cross-segment-rerank",
"cross_segment_rerank",
help="Search param: enable cross-segment rerank. Omit to not send it.",
default=None,
),
]


@cli.command(name="aliyunmilvusdiskann")
@click_parameter_decorators_from_typed_dict(AliyunMilvusDISKANNTypedDict)
def AliyunMilvusDISKANN(**parameters: Unpack[AliyunMilvusDISKANNTypedDict]):
from ..milvus.config import MilvusConfig
from .config import AliyunMilvusDISKANNConfig

run(
db=DBTYPE,
db_config=MilvusConfig(
db_label=parameters["db_label"],
uri=SecretStr(parameters["uri"]),
user=parameters["user_name"],
password=SecretStr(parameters["password"]) if parameters["password"] else None,
num_shards=int(parameters["num_shards"]),
replica_number=int(parameters["replica_number"]),
),
db_case_config=_with_partition_key(
AliyunMilvusDISKANNConfig(
search_list=parameters["search_list"],
# Pass through as-is; None means "not specified" -> omitted from search params.
rerank_topk_multiplier=parameters["rerank_topk_multiplier"],
early_termination_threshold=parameters["early_termination_threshold"],
cross_segment_rerank=parameters["cross_segment_rerank"],
),
parameters,
),
**parameters,
)
59 changes: 59 additions & 0 deletions vectordb_bench/backend/clients/aliyun_milvus/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from pydantic import field_validator

from ..api import DBCaseConfig, IndexType
from ..milvus.config import DISKANNConfig


class AliyunMilvusDISKANNConfig(DISKANNConfig):
"""Milvus DISKANN plus three opt-in Aliyun search-time params.

Identical to the upstream Milvus DISKANN for index build/load. The only
difference is three extra search params that are injected into the per-query
search params **only when explicitly specified**. ``None`` means "not
specified" (the param is omitted and the server keeps its own default).

Note ``0`` is a meaningful value (e.g. ``rerank_topk_multiplier=0`` disables
rerank reads), so "unset" must be ``None``, not ``0``. The web UI passes a
negative number / ``"DEFAULT"`` sentinel which is normalized to ``None``.
"""

rerank_topk_multiplier: int | None = None
early_termination_threshold: int | None = None
cross_segment_rerank: bool | None = None

@field_validator("rerank_topk_multiplier", "early_termination_threshold", mode="before")
@classmethod
def _normalize_optional_int(cls, v: object) -> int | None:
if v is None or v == "" or v == "DEFAULT":
return None
iv = int(v)
# negative value is the UI "unset" sentinel; 0 stays a real value
return None if iv < 0 else iv

@field_validator("cross_segment_rerank", mode="before")
@classmethod
def _normalize_optional_bool(cls, v: object) -> bool | None:
if v is None or v == "" or v == "DEFAULT":
return None
if isinstance(v, bool):
return v
if isinstance(v, str):
return v.strip().lower() in ("true", "1", "yes", "on")
return bool(v)

def search_param(self) -> dict:
# Reuse the base DISKANN search params (metric_type + search_list) and
# only add the three knobs that were explicitly specified.
sp = super().search_param()
if self.rerank_topk_multiplier is not None:
sp["params"]["rerank_topk_multiplier"] = self.rerank_topk_multiplier
if self.early_termination_threshold is not None:
sp["params"]["early_termination_threshold"] = self.early_termination_threshold
if self.cross_segment_rerank is not None:
sp["params"]["cross_segment_rerank"] = self.cross_segment_rerank
return sp


_aliyun_milvus_case_config: dict[IndexType, type[DBCaseConfig]] = {
IndexType.DISKANN: AliyunMilvusDISKANNConfig,
}
2 changes: 2 additions & 0 deletions vectordb_bench/cli/vectordbbench.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ..backend.clients.alisql.cli import AliSQLHNSW
from ..backend.clients.aliyun_milvus.cli import AliyunMilvusDISKANN
from ..backend.clients.alloydb.cli import AlloyDBScaNN
from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
from ..backend.clients.chroma.cli import Chroma
Expand Down Expand Up @@ -98,6 +99,7 @@
cli.add_command(PolarDBHNSWPQ)
cli.add_command(PolarDBHNSWSQ)
cli.add_command(SeekDBHNSW)
cli.add_command(AliyunMilvusDISKANN)


if __name__ == "__main__":
Expand Down
Loading