From a6235138bd6e34de0edb0dfd01d58d4db0a19cb0 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Simhadri Date: Sun, 10 May 2026 10:22:53 -0700 Subject: [PATCH 1/5] add config examples for wiki-1M with Cohere embeddings --- ...aph-index-product-quantization-wiki1M.json | 55 +++++++++++++++++++ ...-index-spherical-quantization-wiki1M.json} | 0 2 files changed, 55 insertions(+) create mode 100644 diskann-benchmark/example/graph-index-product-quantization-wiki1M.json rename diskann-benchmark/example/{spherical-exhaustive.json => graph-index-spherical-quantization-wiki1M.json} (100%) diff --git a/diskann-benchmark/example/graph-index-product-quantization-wiki1M.json b/diskann-benchmark/example/graph-index-product-quantization-wiki1M.json new file mode 100644 index 000000000..07f649135 --- /dev/null +++ b/diskann-benchmark/example/graph-index-product-quantization-wiki1M.json @@ -0,0 +1,55 @@ +{ + "search_directories": [ + "../big-ann-benchmarks/data/wikipedia_cohere" ], + "jobs": [ + { + "type": "async-index-build-pq", + "content": { + "index_operation": { + "source":{ + "index-source": "Build", + "data_type": "float32", + "data": "wikipedia_base.bin.crop_nb_1000000", + "distance": "inner_product", + "start_point_strategy": "medoid", + "max_degree": 32, + "l_build": 100, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 32, + "multi_insert": { + "batch_size": 128, + "batch_parallelism": 32, + "intra_batch_candidates": "all" + } + }, + "search_phase": { + "search-type": "topk", + "queries": "wikipedia_query.bin", + "groundtruth": "wikipedia-1M", + "reps": 1, + "num_threads": [ + 32 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 100, + 150, + 200, + 300 + ], + "recall_k": 10 + } + ] + } + }, + "num_pq_chunks": 192, + "seed": 13076402859301299683, + "max_fp_vecs_per_prune": 0, + "use_fp_for_search": false + } + } + ] +} diff --git a/diskann-benchmark/example/spherical-exhaustive.json b/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json similarity index 100% rename from diskann-benchmark/example/spherical-exhaustive.json rename to diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json From bd3e73fa05ed75efe84d2f43992b01b5433c62fe Mon Sep 17 00:00:00 2001 From: Harsha Simhadri Date: Sun, 10 May 2026 15:48:15 -0700 Subject: [PATCH 2/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../example/graph-index-product-quantization-wiki1M.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diskann-benchmark/example/graph-index-product-quantization-wiki1M.json b/diskann-benchmark/example/graph-index-product-quantization-wiki1M.json index 07f649135..423199c10 100644 --- a/diskann-benchmark/example/graph-index-product-quantization-wiki1M.json +++ b/diskann-benchmark/example/graph-index-product-quantization-wiki1M.json @@ -3,7 +3,7 @@ "../big-ann-benchmarks/data/wikipedia_cohere" ], "jobs": [ { - "type": "async-index-build-pq", + "type": "graph-index-build-pq", "content": { "index_operation": { "source":{ From fdcbff1b9236948a58b9bc03ab22eca56212e169 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Simhadri Date: Sun, 10 May 2026 17:21:17 -0700 Subject: [PATCH 3/5] fix: restore spherical-exhaustive.json and update wiki1M SQ config The PR accidentally renamed spherical-exhaustive.json to graph-index-spherical-quantization-wiki1M.json, breaking the spherical_quantization_intergration test which references the old name. - Restore spherical-exhaustive.json with its original content - Update graph-index-spherical-quantization-wiki1M.json with proper content Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...h-index-spherical-quantization-wiki1M.json | 563 +++++++----------- .../example/spherical-exhaustive.json | 447 ++++++++++++++ 2 files changed, 652 insertions(+), 358 deletions(-) create mode 100644 diskann-benchmark/example/spherical-exhaustive.json diff --git a/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json b/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json index e48e3ad42..695b1d041 100644 --- a/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json +++ b/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json @@ -1,443 +1,290 @@ { - "search_directories": [ + "search_directories":[ "test_data/disk_index_search" ], "jobs": [ { - "type": "exhaustive-spherical-quantization", + "type": "graph-index-build-spherical-quantization", "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { + "build": { + "data_type": "float32", + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "distance": "squared_l2", + "start_point_strategy": "medoid", + "max_degree": 32, + "l_build": 50, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert": null + }, + "search_phase": { + "search-type": "topk", "queries": "disk_index_sample_query_10pts.fbin", "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "reps": 5, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 10, + 20, + 30, + 40, + 50, + 60, + 70, + 80, + 90, + 100 + ], + "recall_k": 10 + } + ] }, - "query_layouts": [ - "same_as_data", - "four_bit_transposed" - ], - "compression_threads": 1, - "num_bits": 1, - "seed": 7831252621480178695, + "seed": 12648430, "transform_kind": { "padding_hadamard": "same" }, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "exhaustive-spherical-quantization", - "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } - }, "query_layouts": [ + "four_bit_transposed", "same_as_data", - "four_bit_transposed" + "full_precision" ], - "compression_threads": 1, "num_bits": 1, - "seed": 7831252621480178695, - "transform_kind": { - "padding_hadamard": "natural" - }, "pre_scale": { "some": 0.00390625 } } }, { - "type": "exhaustive-spherical-quantization", + "type": "graph-index-build-spherical-quantization", "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { + "build": { + "data_type": "float32", + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "distance": "squared_l2", + "start_point_strategy": "medoid", + "max_degree": 32, + "l_build": 50, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert": null + }, + "search_phase": { + "search-type": "topk", "queries": "disk_index_sample_query_10pts.fbin", "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "reps": 5, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 10, + 20, + 30, + 40, + 50, + 60, + 70, + 80, + 90, + 100 + ], + "recall_k": 10 + } + ] }, - "query_layouts": [ - "same_as_data", - "four_bit_transposed" - ], - "compression_threads": 1, - "num_bits": 1, - "seed": 7831252621480178695, + "seed": 12648430, "transform_kind": { - "padding_hadamard": { - "override": 100 - } - }, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "exhaustive-spherical-quantization", - "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "padding_hadamard": "natural" }, "query_layouts": [ + "four_bit_transposed", "same_as_data", - "four_bit_transposed" + "full_precision" ], - "compression_threads": 1, "num_bits": 1, - "seed": 7831252621480178695, - "transform_kind": { - "random_rotation": "same" - }, "pre_scale": { "some": 0.00390625 } } }, { - "type": "exhaustive-spherical-quantization", + "type": "graph-index-build-spherical-quantization", "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { + "build": { + "data_type": "float32", + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "distance": "squared_l2", + "start_point_strategy": "medoid", + "max_degree": 32, + "l_build": 50, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert": null + }, + "search_phase": { + "search-type": "topk", "queries": "disk_index_sample_query_10pts.fbin", "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "reps": 5, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 10, + 20, + 30, + 40, + 50, + 60, + 70, + 80, + 90, + 100 + ], + "recall_k": 10 + } + ] }, - "query_layouts": [ - "same_as_data", - "four_bit_transposed" - ], - "compression_threads": 1, - "num_bits": 1, - "seed": 7831252621480178695, + "seed": 12648430, "transform_kind": { - "random_rotation": "natural" - }, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "exhaustive-spherical-quantization", - "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] + "padding_hadamard": { + "override": 64 } }, "query_layouts": [ + "four_bit_transposed", "same_as_data", - "four_bit_transposed" + "full_precision" ], - "compression_threads": 1, "num_bits": 1, - "seed": 7831252621480178695, - "transform_kind": { - "random_rotation": { - "override": 100 - } - }, "pre_scale": { "some": 0.00390625 } } }, { - "type": "exhaustive-spherical-quantization", + "type": "graph-index-build-spherical-quantization", "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } - }, - "query_layouts": [ - "same_as_data", - "scalar_quantized" - ], - "compression_threads": 1, - "num_bits": 4, - "seed": 7831252621480178695, - "transform_kind": { - "padding_hadamard": "same" + "build": { + "data_type": "float32", + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "distance": "squared_l2", + "start_point_strategy": "medoid", + "max_degree": 32, + "l_build": 50, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert": null }, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "exhaustive-spherical-quantization", - "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { + "search_phase": { + "search-type": "topk", "queries": "disk_index_sample_query_10pts.fbin", "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "reps": 5, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 10, + 20, + 30, + 40, + 50, + 60, + 70, + 80, + 90, + 100 + ], + "recall_k": 10 + } + ] }, - "query_layouts": [ - "same_as_data", - "scalar_quantized" - ], - "compression_threads": 1, - "num_bits": 4, - "seed": 7831252621480178695, + "seed": 12648430, "transform_kind": { "padding_hadamard": "natural" }, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "exhaustive-spherical-quantization", - "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } - }, "query_layouts": [ + "scalar_quantized", "same_as_data", - "scalar_quantized" + "full_precision" ], - "compression_threads": 1, - "num_bits": 4, - "seed": 7831252621480178695, - "transform_kind": { - "padding_hadamard": { - "override": 100 - } - }, + "num_bits": 2, "pre_scale": { "some": 0.00390625 } } }, { - "type": "exhaustive-spherical-quantization", + "type": "graph-index-build-spherical-quantization", "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "build": { + "data_type": "float32", + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "distance": "squared_l2", + "start_point_strategy": "medoid", + "max_degree": 32, + "l_build": 50, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert": null }, - "query_layouts": [ - "same_as_data", - "scalar_quantized" - ], - "compression_threads": 1, - "num_bits": 4, - "seed": 7831252621480178695, - "transform_kind": { - "random_rotation": { - "override": 100 - } - }, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "exhaustive-spherical-quantization", - "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { + "search_phase": { + "search-type": "topk", "queries": "disk_index_sample_query_10pts.fbin", "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "reps": 5, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 10, + 20, + 30, + 40, + 50, + 60, + 70, + 80, + 90, + 100 + ], + "recall_k": 10 + } + ] }, - "query_layouts": [ - "same_as_data", - "scalar_quantized" - ], - "compression_threads": 1, - "num_bits": 8, - "seed": 7831252621480178695, + "seed": 12648430, "transform_kind": { - "padding_hadamard": "same" - }, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "exhaustive-spherical-quantization", - "content": { - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "data_type": "float32", - "distance": "squared_l2", - "search": { - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "num_threads": 1, - "recalls": { - "recall_k": [ - 5, - 10 - ], - "recall_n": [ - 5, - 10 - ] - } + "random_rotation": "same" }, "query_layouts": [ + "scalar_quantized", "same_as_data", - "scalar_quantized" + "full_precision" ], - "compression_threads": 1, - "num_bits": 8, - "seed": 7831252621480178695, - "transform_kind": { - "random_rotation": "natural" - }, + "num_bits": 4, "pre_scale": { "some": 0.00390625 } diff --git a/diskann-benchmark/example/spherical-exhaustive.json b/diskann-benchmark/example/spherical-exhaustive.json new file mode 100644 index 000000000..e48e3ad42 --- /dev/null +++ b/diskann-benchmark/example/spherical-exhaustive.json @@ -0,0 +1,447 @@ +{ + "search_directories": [ + "test_data/disk_index_search" + ], + "jobs": [ + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "four_bit_transposed" + ], + "compression_threads": 1, + "num_bits": 1, + "seed": 7831252621480178695, + "transform_kind": { + "padding_hadamard": "same" + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "four_bit_transposed" + ], + "compression_threads": 1, + "num_bits": 1, + "seed": 7831252621480178695, + "transform_kind": { + "padding_hadamard": "natural" + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "four_bit_transposed" + ], + "compression_threads": 1, + "num_bits": 1, + "seed": 7831252621480178695, + "transform_kind": { + "padding_hadamard": { + "override": 100 + } + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "four_bit_transposed" + ], + "compression_threads": 1, + "num_bits": 1, + "seed": 7831252621480178695, + "transform_kind": { + "random_rotation": "same" + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "four_bit_transposed" + ], + "compression_threads": 1, + "num_bits": 1, + "seed": 7831252621480178695, + "transform_kind": { + "random_rotation": "natural" + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "four_bit_transposed" + ], + "compression_threads": 1, + "num_bits": 1, + "seed": 7831252621480178695, + "transform_kind": { + "random_rotation": { + "override": 100 + } + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "scalar_quantized" + ], + "compression_threads": 1, + "num_bits": 4, + "seed": 7831252621480178695, + "transform_kind": { + "padding_hadamard": "same" + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "scalar_quantized" + ], + "compression_threads": 1, + "num_bits": 4, + "seed": 7831252621480178695, + "transform_kind": { + "padding_hadamard": "natural" + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "scalar_quantized" + ], + "compression_threads": 1, + "num_bits": 4, + "seed": 7831252621480178695, + "transform_kind": { + "padding_hadamard": { + "override": 100 + } + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "scalar_quantized" + ], + "compression_threads": 1, + "num_bits": 4, + "seed": 7831252621480178695, + "transform_kind": { + "random_rotation": { + "override": 100 + } + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "scalar_quantized" + ], + "compression_threads": 1, + "num_bits": 8, + "seed": 7831252621480178695, + "transform_kind": { + "padding_hadamard": "same" + }, + "pre_scale": { + "some": 0.00390625 + } + } + }, + { + "type": "exhaustive-spherical-quantization", + "content": { + "data": "disk_index_siftsmall_learn_256pts_data.fbin", + "data_type": "float32", + "distance": "squared_l2", + "search": { + "queries": "disk_index_sample_query_10pts.fbin", + "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "num_threads": 1, + "recalls": { + "recall_k": [ + 5, + 10 + ], + "recall_n": [ + 5, + 10 + ] + } + }, + "query_layouts": [ + "same_as_data", + "scalar_quantized" + ], + "compression_threads": 1, + "num_bits": 8, + "seed": 7831252621480178695, + "transform_kind": { + "random_rotation": "natural" + }, + "pre_scale": { + "some": 0.00390625 + } + } + } + ] +} From 10c4f6f4f3c035a79e45ba7cc87d582c49a85104 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Simhadri Date: Sun, 10 May 2026 17:22:58 -0700 Subject: [PATCH 4/5] update sq --- ...h-index-spherical-quantization-wiki1M.json | 261 +----------------- 1 file changed, 12 insertions(+), 249 deletions(-) diff --git a/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json b/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json index 695b1d041..653e1a4f1 100644 --- a/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json +++ b/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json @@ -1,45 +1,39 @@ { "search_directories":[ - "test_data/disk_index_search" + "../big-ann-benchmarks/data/wikipedia_cohere" ], "jobs": [ { - "type": "graph-index-build-spherical-quantization", + "type": "async-index-build-spherical-quantization", "content": { "build": { "data_type": "float32", - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "distance": "squared_l2", + "data": "wikipedia_base.bin.crop_nb_1000000", + "distance": "inner_product", "start_point_strategy": "medoid", "max_degree": 32, - "l_build": 50, + "l_build": 100, "alpha": 1.2, "backedge_ratio": 1.0, - "num_threads": 8, + "num_threads": 32, "multi_insert": null }, "search_phase": { "search-type": "topk", - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", + "queries": "wikipedia_query.bin", + "groundtruth": "wikipedia-1M", "reps": 5, "num_threads": [ - 8 + 32 ], "runs": [ { "search_n": 10, "search_l": [ - 10, - 20, - 30, - 40, 50, - 60, - 70, - 80, - 90, - 100 + 100, + 150, + 200,300 ], "recall_k": 10 } @@ -50,180 +44,6 @@ "padding_hadamard": "same" }, "query_layouts": [ - "four_bit_transposed", - "same_as_data", - "full_precision" - ], - "num_bits": 1, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "graph-index-build-spherical-quantization", - "content": { - "build": { - "data_type": "float32", - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "distance": "squared_l2", - "start_point_strategy": "medoid", - "max_degree": 32, - "l_build": 50, - "alpha": 1.2, - "backedge_ratio": 1.0, - "num_threads": 8, - "multi_insert": null - }, - "search_phase": { - "search-type": "topk", - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "reps": 5, - "num_threads": [ - 8 - ], - "runs": [ - { - "search_n": 10, - "search_l": [ - 10, - 20, - 30, - 40, - 50, - 60, - 70, - 80, - 90, - 100 - ], - "recall_k": 10 - } - ] - }, - "seed": 12648430, - "transform_kind": { - "padding_hadamard": "natural" - }, - "query_layouts": [ - "four_bit_transposed", - "same_as_data", - "full_precision" - ], - "num_bits": 1, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "graph-index-build-spherical-quantization", - "content": { - "build": { - "data_type": "float32", - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "distance": "squared_l2", - "start_point_strategy": "medoid", - "max_degree": 32, - "l_build": 50, - "alpha": 1.2, - "backedge_ratio": 1.0, - "num_threads": 8, - "multi_insert": null - }, - "search_phase": { - "search-type": "topk", - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "reps": 5, - "num_threads": [ - 8 - ], - "runs": [ - { - "search_n": 10, - "search_l": [ - 10, - 20, - 30, - 40, - 50, - 60, - 70, - 80, - 90, - 100 - ], - "recall_k": 10 - } - ] - }, - "seed": 12648430, - "transform_kind": { - "padding_hadamard": { - "override": 64 - } - }, - "query_layouts": [ - "four_bit_transposed", - "same_as_data", - "full_precision" - ], - "num_bits": 1, - "pre_scale": { - "some": 0.00390625 - } - } - }, - { - "type": "graph-index-build-spherical-quantization", - "content": { - "build": { - "data_type": "float32", - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "distance": "squared_l2", - "start_point_strategy": "medoid", - "max_degree": 32, - "l_build": 50, - "alpha": 1.2, - "backedge_ratio": 1.0, - "num_threads": 8, - "multi_insert": null - }, - "search_phase": { - "search-type": "topk", - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "reps": 5, - "num_threads": [ - 8 - ], - "runs": [ - { - "search_n": 10, - "search_l": [ - 10, - 20, - 30, - 40, - 50, - 60, - 70, - 80, - 90, - 100 - ], - "recall_k": 10 - } - ] - }, - "seed": 12648430, - "transform_kind": { - "padding_hadamard": "natural" - }, - "query_layouts": [ - "scalar_quantized", "same_as_data", "full_precision" ], @@ -232,63 +52,6 @@ "some": 0.00390625 } } - }, - { - "type": "graph-index-build-spherical-quantization", - "content": { - "build": { - "data_type": "float32", - "data": "disk_index_siftsmall_learn_256pts_data.fbin", - "distance": "squared_l2", - "start_point_strategy": "medoid", - "max_degree": 32, - "l_build": 50, - "alpha": 1.2, - "backedge_ratio": 1.0, - "num_threads": 8, - "multi_insert": null - }, - "search_phase": { - "search-type": "topk", - "queries": "disk_index_sample_query_10pts.fbin", - "groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin", - "reps": 5, - "num_threads": [ - 8 - ], - "runs": [ - { - "search_n": 10, - "search_l": [ - 10, - 20, - 30, - 40, - 50, - 60, - 70, - 80, - 90, - 100 - ], - "recall_k": 10 - } - ] - }, - "seed": 12648430, - "transform_kind": { - "random_rotation": "same" - }, - "query_layouts": [ - "scalar_quantized", - "same_as_data", - "full_precision" - ], - "num_bits": 4, - "pre_scale": { - "some": 0.00390625 - } - } } ] } From b32f1adfadcb391f9f19640bd4738f1e88b12001 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Simhadri Date: Sun, 10 May 2026 17:29:13 -0700 Subject: [PATCH 5/5] update sq --- .../example/graph-index-spherical-quantization-wiki1M.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json b/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json index 653e1a4f1..7c3b95efc 100644 --- a/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json +++ b/diskann-benchmark/example/graph-index-spherical-quantization-wiki1M.json @@ -4,7 +4,7 @@ ], "jobs": [ { - "type": "async-index-build-spherical-quantization", + "type": "graph-index-build-spherical-quantization", "content": { "build": { "data_type": "float32",