From 40124bb84ccfb735ccf730ca06f51b6ccbaeb71d Mon Sep 17 00:00:00 2001 From: goodnight Date: Sat, 23 May 2026 05:11:10 +0100 Subject: [PATCH] update default cross-encoder model to Xenova/ms-marco-MiniLM-L-6-v2 across docs --- docs/reference.md | 6 +++--- docs/search.md | 8 ++++---- resources/Features.md | 2 +- tests/test_parser.py | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/reference.md b/docs/reference.md index 55d61d9..1aa91ff 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -79,13 +79,13 @@ SEARCH docs SIMILAR TO 'hello' LIMIT 5 ### Cross-encoder reranking (RERANK default) ``` -cross-encoder/ms-marco-MiniLM-L-6-v2 +Xenova/ms-marco-MiniLM-L-6-v2 ``` | Model | Notes | |---|---| -| `cross-encoder/ms-marco-MiniLM-L-6-v2` | Default. Fast passage reranker | -| `cross-encoder/ms-marco-MiniLM-L-12-v2` | Larger, higher quality | +| `Xenova/ms-marco-MiniLM-L-6-v2` | Default. Fast passage reranker | +| `Xenova/ms-marco-MiniLM-L-12-v2` | Larger, higher quality | | `BAAI/bge-reranker-base` | Strong general-purpose reranker | | `BAAI/bge-reranker-large` | Highest quality, slower | diff --git a/docs/search.md b/docs/search.md index 8daa2d4..0c77dae 100644 --- a/docs/search.md +++ b/docs/search.md @@ -370,15 +370,15 @@ SEARCH articles SIMILAR TO 'attention mechanism in transformers' LIMIT 10 USING Custom cross-encoder model: ```sql SEARCH articles SIMILAR TO 'semantic search' LIMIT 5 - RERANK MODEL 'cross-encoder/ms-marco-MiniLM-L-6-v2' + RERANK MODEL 'Xenova/ms-marco-MiniLM-L-6-v2' ``` -**Default cross-encoder model:** `cross-encoder/ms-marco-MiniLM-L-6-v2` +**Default cross-encoder model:** `Xenova/ms-marco-MiniLM-L-6-v2` | Model | Notes | |---|---| -| `cross-encoder/ms-marco-MiniLM-L-6-v2` | Default. Fast and accurate for passage reranking | -| `cross-encoder/ms-marco-MiniLM-L-12-v2` | Larger, higher quality, slower | +| `Xenova/ms-marco-MiniLM-L-6-v2` | Default. Fast and accurate for passage reranking | +| `Xenova/ms-marco-MiniLM-L-12-v2` | Larger, higher quality, slower | | `BAAI/bge-reranker-base` | BGE reranker, strong general-purpose performance | | `BAAI/bge-reranker-large` | Highest quality BGE reranker, slower | diff --git a/resources/Features.md b/resources/Features.md index 0272124..0c52da5 100644 --- a/resources/Features.md +++ b/resources/Features.md @@ -431,7 +431,7 @@ SEARCH medical_records SIMILAR TO 'neuromuscular junction antibody acetylcholine AND sub_specialty IN ('neuromuscular', 'neuroimmunology') AND severity IN ('high', 'critical') AND year >= 2022 - RERANK MODEL 'cross-encoder/ms-marco-MiniLM-L-6-v2' + RERANK MODEL 'Xenova/ms-marco-MiniLM-L-6-v2' ``` --- diff --git a/tests/test_parser.py b/tests/test_parser.py index 9c1bcca..ec9e051 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1003,10 +1003,10 @@ def test_rerank_flag_set(self): def test_rerank_with_model(self): node = parse( - "SEARCH col SIMILAR TO 'q' LIMIT 5 RERANK MODEL 'cross-encoder/ms-marco-MiniLM-L-6-v2'" + "SEARCH col SIMILAR TO 'q' LIMIT 5 RERANK MODEL 'Xenova/ms-marco-MiniLM-L-6-v2'" ) assert node.rerank is True - assert node.rerank_model == "cross-encoder/ms-marco-MiniLM-L-6-v2" + assert node.rerank_model == "Xenova/ms-marco-MiniLM-L-6-v2" def test_rerank_default_false(self): node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5") @@ -1032,12 +1032,12 @@ def test_rerank_with_where(self): def test_rerank_with_hybrid_where_and_model(self): node = parse( "SEARCH col SIMILAR TO 'q' LIMIT 5 USING HYBRID WHERE year > 2020 " - "RERANK MODEL 'cross-encoder/ms-marco-MiniLM-L-6-v2'" + "RERANK MODEL 'Xenova/ms-marco-MiniLM-L-6-v2'" ) assert node.hybrid is True assert node.query_filter is not None assert node.rerank is True - assert node.rerank_model == "cross-encoder/ms-marco-MiniLM-L-6-v2" + assert node.rerank_model == "Xenova/ms-marco-MiniLM-L-6-v2" def test_rerank_lowercase(self): node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5 rerank")