From e4266465f822111cadcf8c2c3755c16cd6623143 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Fri, 13 Mar 2026 09:49:28 -0700 Subject: [PATCH 1/5] Adding changes for local and global full statistics improvements --- .../aio/hybrid_search_aggregator.py | 8 +- .../hybrid_search_aggregator.py | 8 +- .../azure/cosmos/aio/_container.py | 21 ++++ .../azure-cosmos/azure/cosmos/container.py | 16 +++ .../tests/test_query_hybrid_search.py | 106 ++++++++++++++++++ .../tests/test_query_hybrid_search_async.py | 100 +++++++++++++++++ 6 files changed, 255 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py index 0080fc3c02b8..87506b2dea67 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py @@ -71,7 +71,11 @@ def __init__(self, client, resource_link, options, partitioned_query_execution_i async def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-many-statements # Check if we need to run global statistics queries, and if so do for every partition in the container if self._hybrid_search_query_info['requiresGlobalStatistics']: - target_partition_key_ranges = await self._get_target_partition_key_range(target_all_ranges=True) + # When FullTextScoreScope is "Local", use only target ranges for statistics. + # When "Global" (default), use all ranges. + full_text_score_scope = self._options.get("fullTextScoreScope", "Global") + use_all_ranges = full_text_score_scope != "Local" + target_partition_key_ranges = await self._get_target_partition_key_range(target_all_ranges=use_all_ranges) global_statistics_doc_producers = [] global_statistics_query = self._attach_parameters(self._hybrid_search_query_info['globalStatisticsQuery']) @@ -100,7 +104,7 @@ async def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-ma if exceptions._partition_range_is_gone(e): # repairing document producer context on partition split global_statistics_doc_producers = await self._repair_document_producer(global_statistics_query, - target_all_ranges=True) + target_all_ranges=use_all_ranges) else: raise except StopAsyncIteration: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py index a738dd94ddbe..bf39fc0c453b 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py @@ -206,7 +206,11 @@ def __init__(self, client, resource_link, options, def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-many-statements # Check if we need to run global statistics queries, and if so do for every partition in the container if self._hybrid_search_query_info['requiresGlobalStatistics']: - target_partition_key_ranges = self._get_target_partition_key_range(target_all_ranges=True) + # When FullTextScoreScope is "Local", use only target ranges for statistics. + # When "Global" (default), use all ranges. + full_text_score_scope = self._options.get("fullTextScoreScope", "Global") + use_all_ranges = full_text_score_scope != "Local" + target_partition_key_ranges = self._get_target_partition_key_range(target_all_ranges=use_all_ranges) global_statistics_doc_producers = [] global_statistics_query = self._attach_parameters(self._hybrid_search_query_info['globalStatisticsQuery']) partitioned_query_execution_context_list = [] @@ -234,7 +238,7 @@ def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-many-sta if exceptions._partition_range_is_gone(e): # repairing document producer context on partition split global_statistics_doc_producers = self._repair_document_producer(global_statistics_query, - target_all_ranges=True) + target_all_ranges=use_all_ranges) else: raise except StopIteration: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index ba1c293151a3..be3039192a53 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -547,6 +547,7 @@ def query_items( max_item_count: Optional[int] = None, parameters: Optional[list[dict[str, object]]] = None, partition_key: PartitionKeyType, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, populate_query_advice: Optional[bool] = None, @@ -587,6 +588,10 @@ def query_items( None, it will perform a cross partition query. To learn more about using partition keys, see `here `_. :paramtype partition_key: ~azure.cosmos.partition_key.PartitionKeyType + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. @@ -638,6 +643,7 @@ def query_items( initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, max_item_count: Optional[int] = None, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, parameters: Optional[list[dict[str, object]]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, @@ -672,6 +678,10 @@ def query_items( milliseconds. For accounts configured to use the integrated cache, using Session or Eventual consistency, responses are guaranteed to be no staler than this value. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword parameters: Optional array of parameters to the query. Each parameter is a dict() with 'name' and 'value' keys. Ignored if no query is provided. @@ -727,6 +737,7 @@ def query_items( max_integrated_cache_staleness_in_ms: Optional[int] = None, max_item_count: Optional[int] = None, parameters: Optional[list[dict[str, object]]] = None, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, populate_query_advice: Optional[bool] = None, @@ -763,6 +774,10 @@ def query_items( Each parameter is a dict() with 'name' and 'value' keys. Ignored if no query is provided. :paramtype parameters: [List[Dict[str, object]]] + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. @@ -843,6 +858,10 @@ def query_items( None, it will perform a cross partition query. To learn more about using partition keys, see `here `_. :paramtype partition_key: ~azure.cosmos.partition_key.PartitionKeyType + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. @@ -902,6 +921,8 @@ def query_items( feed_options["maxIntegratedCacheStaleness"] = max_integrated_cache_staleness_in_ms if utils.valid_key_value_exist(kwargs, "continuation_token_limit"): feed_options["responseContinuationTokenLimitInKb"] = kwargs.pop("continuation_token_limit") + if utils.valid_key_value_exist(kwargs, "full_text_score_scope"): + feed_options["fullTextScoreScope"] = kwargs.pop("full_text_score_scope") # populate availability_strategy if (Constants.Kwargs.AVAILABILITY_STRATEGY in feed_options diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index f23588870f2e..05a907b8a998 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -746,6 +746,7 @@ def query_items( enable_cross_partition_query: Optional[bool] = None, max_item_count: Optional[int] = None, enable_scan_in_query: Optional[bool] = None, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, populate_query_metrics: Optional[bool] = None, *, continuation_token_limit: Optional[int] = None, @@ -782,6 +783,10 @@ def query_items( :param int max_item_count: Max number of items to be returned in the enumeration operation. :param bool enable_scan_in_query: Allow scan on the queries which couldn't be served as indexing was opted out on the requested paths. + :param Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :param bool populate_query_metrics: Enable returning query metrics in response headers. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur @@ -846,6 +851,7 @@ def query_items( initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, max_item_count: Optional[int] = None, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, parameters: Optional[list[dict[str, object]]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, @@ -883,6 +889,10 @@ def query_items( milliseconds. For accounts configured to use the integrated cache, using Session or Eventual consistency, responses are guaranteed to be no staler than this value. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword parameters: Optional array of parameters to the query. Each parameter is a dict() with 'name' and 'value' keys. Ignored if no query is provided. @@ -968,6 +978,10 @@ def query_items( # pylint:disable=docstring-missing-param None, it will perform a cross partition query. To learn more about using partition keys, see `here `_. :paramtype partition_key: ~azure.cosmos.partition_key.PartitionKeyType + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. @@ -1035,6 +1049,8 @@ def query_items( # pylint:disable=docstring-missing-param feed_options["maxIntegratedCacheStaleness"] = max_integrated_cache_staleness_in_ms if utils.valid_key_value_exist(kwargs, "continuation_token_limit"): feed_options["responseContinuationTokenLimitInKb"] = kwargs.pop("continuation_token_limit") + if utils.valid_key_value_exist(kwargs, "full_text_score_scope"): + feed_options["fullTextScoreScope"] = kwargs.pop("full_text_score_scope") # populate availability_strategy if (Constants.Kwargs.AVAILABILITY_STRATEGY in feed_options diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py index 39ef0525719b..168829766b42 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py @@ -471,6 +471,112 @@ def test_hybrid_and_non_hybrid_param_queries_equivalence(self): assert len(literal_simple_indices) == len(param_simple_indices) == 5 assert literal_simple_indices == param_simple_indices + def test_hybrid_search_with_full_text_score_scope_global(self): + """Test that full_text_score_scope='Global' returns the same results as the default (no scope set).""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + # Default (no scope) + results_default = self.test_container.query_items(query, enable_cross_partition_query=True) + result_list_default = [res['index'] for res in results_default] + + # Explicit Global scope + results_global = self.test_container.query_items(query, enable_cross_partition_query=True, + full_text_score_scope="Global") + result_list_global = [res['index'] for res in results_global] + + assert len(result_list_default) == len(result_list_global) == 3 + assert set(result_list_default) == set(result_list_global) == {2, 85, 57} + + def test_hybrid_search_with_full_text_score_scope_local(self): + """Test that full_text_score_scope='Local' returns valid results for a cross-partition query.""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + results_local = self.test_container.query_items(query, enable_cross_partition_query=True, + full_text_score_scope="Local") + result_list_local = [res['index'] for res in results_local] + assert len(result_list_local) == 3 + for res in result_list_local: + assert res in [2, 85, 57] + + def test_hybrid_search_with_full_text_score_scope_local_partition_key(self): + """Test that full_text_score_scope='Local' works with a specific partition key.""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + # With Local scope and partition key, statistics are scoped to just that partition + results_local = self.test_container.query_items(query, partition_key='1', + full_text_score_scope="Local") + result_list_local = list(results_local) + # Should return results only from partition key '1' + assert len(result_list_local) > 0 + for res in result_list_local: + assert res['index'] in [2, 85, 57] + + def test_hybrid_search_rrf_with_full_text_score_scope_local(self): + """Test RRF hybrid search with Local scope returns valid results.""" + query = "SELECT TOP 10 c.index, c.title, c.text FROM c WHERE " \ + "FullTextContains(c.title, 'John') OR FullTextContains(c.text, 'John') OR " \ + "FullTextContains(c.text, 'United States') ORDER BY RANK RRF(FullTextScore(c.title, 'John')," \ + " FullTextScore(c.text, 'United States'))" + + results_local = self.test_container.query_items(query, enable_cross_partition_query=True, + full_text_score_scope="Local") + result_list_local = list(results_local) + assert len(result_list_local) == 10 + for res in result_list_local: + assert res['index'] in [61, 51, 49, 54, 75, 24, 77, 76, 80, 25, 22, 2, 66, 57, 85] + + def test_hybrid_search_local_vs_global_scope_both_return_results(self): + """Verify both Local and Global scopes return valid, non-empty results for the same query.""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + for scope in ["Local", "Global"]: + results = self.test_container.query_items(query, enable_cross_partition_query=True, + full_text_score_scope=scope) + result_list = list(results) + assert len(result_list) > 0, f"Expected results for scope={scope}, got none." + for res in result_list: + assert res['index'] in [2, 85, 57] + + def test_weighted_rrf_with_full_text_score_scope_local(self): + """Test weighted RRF with Local scope returns valid results.""" + query = """ + SELECT TOP 15 c.index AS Index, c.title AS Title, c.text AS Text + FROM c + WHERE FullTextContains(c.title, 'John') OR FullTextContains(c.text, 'John') + OR FullTextContains(c.text, 'United States') + ORDER BY RANK RRF(FullTextScore(c.title, 'John'), + FullTextScore(c.text, 'United States'), [1, 1]) + """ + + for scope in ["Local", "Global"]: + results = self.test_container.query_items(query, enable_cross_partition_query=True, + full_text_score_scope=scope) + result_list = [res['Index'] for res in results] + assert len(result_list) > 0, f"Expected results for scope={scope}, got none." + for result in result_list: + assert result in [61, 51, 49, 54, 75, 24, 77, 76, 80, 25, 22, 2, 66, 57, 85] + + def test_hybrid_search_parameterized_with_full_text_score_scope(self): + """Test parameterized hybrid search with full_text_score_scope.""" + param_query = ( + "SELECT TOP 10 c.index, c.title FROM c " + "WHERE FullTextContains(c.title, @term) OR FullTextContains(c.text, @term) " + "ORDER BY RANK FullTextScore(c.title, @term)" + ) + params = [{"name": "@term", "value": "John"}] + + results_local = self.test_container.query_items( + param_query, parameters=params, enable_cross_partition_query=True, + full_text_score_scope="Local" + ) + result_list_local = [res['index'] for res in results_local] + assert len(result_list_local) == 3 + assert set(result_list_local) == {2, 85, 57} + if __name__ == "__main__": unittest.main() diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py index da68ae5b384c..40156301139a 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py @@ -477,6 +477,106 @@ async def test_hybrid_and_non_hybrid_param_queries_equivalence_async(self): assert len(literal_simple_indices) == len(param_simple_indices) == 5 assert literal_simple_indices == param_simple_indices + async def test_hybrid_search_with_full_text_score_scope_global_async(self): + """Test that full_text_score_scope='Global' returns the same results as the default (no scope set).""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + # Default (no scope) + results_default = self.test_container.query_items(query) + result_list_default = [res['index'] async for res in results_default] + + # Explicit Global scope + results_global = self.test_container.query_items(query, full_text_score_scope="Global") + result_list_global = [res['index'] async for res in results_global] + + assert len(result_list_default) == len(result_list_global) == 3 + assert set(result_list_default) == set(result_list_global) == {2, 85, 57} + + async def test_hybrid_search_with_full_text_score_scope_local_async(self): + """Test that full_text_score_scope='Local' returns valid results for a cross-partition query.""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + results_local = self.test_container.query_items(query, full_text_score_scope="Local") + result_list_local = [res['index'] async for res in results_local] + assert len(result_list_local) == 3 + for res in result_list_local: + assert res in [2, 85, 57] + + async def test_hybrid_search_with_full_text_score_scope_local_partition_key_async(self): + """Test that full_text_score_scope='Local' works with a specific partition key.""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + # With Local scope and partition key, statistics are scoped to just that partition + results_local = self.test_container.query_items(query, partition_key='1', + full_text_score_scope="Local") + result_list_local = [res async for res in results_local] + # Should return results only from partition key '1' + assert len(result_list_local) > 0 + for res in result_list_local: + assert res['index'] in [2, 85, 57] + + async def test_hybrid_search_rrf_with_full_text_score_scope_local_async(self): + """Test RRF hybrid search with Local scope returns valid results.""" + query = "SELECT TOP 10 c.index, c.title, c.text FROM c WHERE " \ + "FullTextContains(c.title, 'John') OR FullTextContains(c.text, 'John') OR " \ + "FullTextContains(c.text, 'United States') ORDER BY RANK RRF(FullTextScore(c.title, 'John')," \ + " FullTextScore(c.text, 'United States'))" + + results_local = self.test_container.query_items(query, full_text_score_scope="Local") + result_list_local = [res async for res in results_local] + assert len(result_list_local) == 10 + for res in result_list_local: + assert res['index'] in [61, 51, 49, 54, 75, 24, 77, 76, 80, 25, 22, 2, 66, 57, 85] + + async def test_hybrid_search_local_vs_global_scope_both_return_results_async(self): + """Verify both Local and Global scopes return valid, non-empty results for the same query.""" + query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" + + for scope in ["Local", "Global"]: + results = self.test_container.query_items(query, full_text_score_scope=scope) + result_list = [res async for res in results] + assert len(result_list) > 0, f"Expected results for scope={scope}, got none." + for res in result_list: + assert res['index'] in [2, 85, 57] + + async def test_weighted_rrf_with_full_text_score_scope_local_async(self): + """Test weighted RRF with Local scope returns valid results.""" + query = """ + SELECT TOP 15 c.index AS Index, c.title AS Title, c.text AS Text + FROM c + WHERE FullTextContains(c.title, 'John') OR FullTextContains(c.text, 'John') + OR FullTextContains(c.text, 'United States') + ORDER BY RANK RRF(FullTextScore(c.title, 'John'), + FullTextScore(c.text, 'United States'), [1, 1]) + """ + + for scope in ["Local", "Global"]: + results = self.test_container.query_items(query, full_text_score_scope=scope) + result_list = [res['Index'] async for res in results] + assert len(result_list) > 0, f"Expected results for scope={scope}, got none." + for result in result_list: + assert result in [61, 51, 49, 54, 75, 24, 77, 76, 80, 25, 22, 2, 66, 57, 85] + + async def test_hybrid_search_parameterized_with_full_text_score_scope_async(self): + """Test parameterized hybrid search with full_text_score_scope.""" + param_query = ( + "SELECT TOP 10 c.index, c.title FROM c " + "WHERE FullTextContains(c.title, @term) OR FullTextContains(c.text, @term) " + "ORDER BY RANK FullTextScore(c.title, @term)" + ) + params = [{"name": "@term", "value": "John"}] + + results_local = self.test_container.query_items( + param_query, parameters=params, full_text_score_scope="Local" + ) + result_list_local = [res['index'] async for res in results_local] + assert len(result_list_local) == 3 + assert set(result_list_local) == {2, 85, 57} + if __name__ == "__main__": unittest.main() From 5cb2e38564e82f16ad67e78483c1b5461e731c64 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Fri, 13 Mar 2026 10:13:01 -0700 Subject: [PATCH 2/5] Updating some code --- .../aio/hybrid_search_aggregator.py | 8 ++--- .../hybrid_search_aggregator.py | 7 ++-- .../azure/cosmos/aio/_container.py | 35 ++++++++++--------- .../azure-cosmos/azure/cosmos/container.py | 27 +++++++------- 4 files changed, 43 insertions(+), 34 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py index 87506b2dea67..e63db38c2ef9 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py @@ -6,12 +6,12 @@ from azure.cosmos._execution_context.aio.base_execution_context import _QueryExecutionContextBase from azure.cosmos._execution_context.aio import document_producer from azure.cosmos._execution_context.hybrid_search_aggregator import _retrieve_component_scores, _rewrite_query_infos, \ - _compute_rrf_scores, _compute_ranks, _coalesce_duplicate_rids, _attach_parameters + _compute_rrf_scores, _compute_ranks, _coalesce_duplicate_rids, _attach_parameters, \ + _FULL_TEXT_SCORE_SCOPE_KEY, _FULL_TEXT_SCORE_SCOPE_LOCAL, _FULL_TEXT_SCORE_SCOPE_DEFAULT from azure.cosmos._routing import routing_range from azure.cosmos import exceptions # pylint: disable=protected-access -RRF_CONSTANT = 60 class _Placeholders: @@ -73,8 +73,8 @@ async def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-ma if self._hybrid_search_query_info['requiresGlobalStatistics']: # When FullTextScoreScope is "Local", use only target ranges for statistics. # When "Global" (default), use all ranges. - full_text_score_scope = self._options.get("fullTextScoreScope", "Global") - use_all_ranges = full_text_score_scope != "Local" + full_text_score_scope = self._options.get(_FULL_TEXT_SCORE_SCOPE_KEY, _FULL_TEXT_SCORE_SCOPE_DEFAULT) + use_all_ranges = full_text_score_scope != _FULL_TEXT_SCORE_SCOPE_LOCAL target_partition_key_ranges = await self._get_target_partition_key_range(target_all_ranges=use_all_ranges) global_statistics_doc_producers = [] global_statistics_query = self._attach_parameters(self._hybrid_search_query_info['globalStatisticsQuery']) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py index bf39fc0c453b..6c1a938bc879 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py @@ -11,6 +11,9 @@ # pylint: disable=protected-access RRF_CONSTANT = 60 +_FULL_TEXT_SCORE_SCOPE_KEY = "fullTextScoreScope" +_FULL_TEXT_SCORE_SCOPE_LOCAL = "Local" +_FULL_TEXT_SCORE_SCOPE_DEFAULT = "Global" class _Placeholders: @@ -208,8 +211,8 @@ def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-many-sta if self._hybrid_search_query_info['requiresGlobalStatistics']: # When FullTextScoreScope is "Local", use only target ranges for statistics. # When "Global" (default), use all ranges. - full_text_score_scope = self._options.get("fullTextScoreScope", "Global") - use_all_ranges = full_text_score_scope != "Local" + full_text_score_scope = self._options.get(_FULL_TEXT_SCORE_SCOPE_KEY, _FULL_TEXT_SCORE_SCOPE_DEFAULT) + use_all_ranges = full_text_score_scope != _FULL_TEXT_SCORE_SCOPE_LOCAL target_partition_key_ranges = self._get_target_partition_key_range(target_all_ranges=use_all_ranges) global_statistics_doc_producers = [] global_statistics_query = self._attach_parameters(self._hybrid_search_query_info['globalStatisticsQuery']) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index be3039192a53..8eca009a4f5b 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -542,12 +542,12 @@ def query_items( *, continuation_token_limit: Optional[int] = None, enable_scan_in_query: Optional[bool] = None, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, max_item_count: Optional[int] = None, parameters: Optional[list[dict[str, object]]] = None, partition_key: PartitionKeyType, - full_text_score_scope: Optional[Literal["Local", "Global"]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, populate_query_advice: Optional[bool] = None, @@ -575,6 +575,10 @@ def query_items( in this list are specified as the names of the Azure Cosmos locations like, 'West US', 'East US' and so on. If all preferred locations were excluded, primary/hub location will be used. This excluded_location will override existing excluded_locations in client level. + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword dict[str, str] initial_headers: Initial headers to be sent as part of the request. :keyword int max_integrated_cache_staleness_in_ms: The max cache staleness for the integrated cache in milliseconds. For accounts configured to use the integrated cache, using Session or Eventual consistency, @@ -588,10 +592,6 @@ def query_items( None, it will perform a cross partition query. To learn more about using partition keys, see `here `_. :paramtype partition_key: ~azure.cosmos.partition_key.PartitionKeyType - :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used - by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed - across all documents in the container. When set to "Local", statistics are computed only over the subset - of documents within the partition key values specified in the query. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. @@ -640,10 +640,10 @@ def query_items( continuation_token_limit: Optional[int] = None, enable_scan_in_query: Optional[bool] = None, feed_range: dict[str, Any], + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, max_item_count: Optional[int] = None, - full_text_score_scope: Optional[Literal["Local", "Global"]] = None, parameters: Optional[list[dict[str, object]]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, @@ -673,15 +673,15 @@ def query_items( If all preferred locations were excluded, primary/hub location will be used. This excluded_location will override existing excluded_locations in client level. :keyword dict[str, Any] feed_range: The feed range that is used to define the scope. + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword dict[str, str] initial_headers: Initial headers to be sent as part of the request. :keyword int max_integrated_cache_staleness_in_ms: The max cache staleness for the integrated cache in milliseconds. For accounts configured to use the integrated cache, using Session or Eventual consistency, responses are guaranteed to be no staler than this value. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used - by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed - across all documents in the container. When set to "Local", statistics are computed only over the subset - of documents within the partition key values specified in the query. :keyword parameters: Optional array of parameters to the query. Each parameter is a dict() with 'name' and 'value' keys. Ignored if no query is provided. @@ -733,11 +733,11 @@ def query_items( *, continuation_token_limit: Optional[int] = None, enable_scan_in_query: Optional[bool] = None, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, max_item_count: Optional[int] = None, parameters: Optional[list[dict[str, object]]] = None, - full_text_score_scope: Optional[Literal["Local", "Global"]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, populate_query_advice: Optional[bool] = None, @@ -765,6 +765,10 @@ def query_items( in this list are specified as the names of the Azure Cosmos locations like, 'West US', 'East US' and so on. If all preferred locations were excluded, primary/hub location will be used. This excluded_location will override existing excluded_locations in client level. + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword Dict[str, str] initial_headers: Initial headers to be sent as part of the request. :keyword int max_integrated_cache_staleness_in_ms: The max cache staleness for the integrated cache in milliseconds. For accounts configured to use the integrated cache, using Session or Eventual consistency, @@ -774,10 +778,6 @@ def query_items( Each parameter is a dict() with 'name' and 'value' keys. Ignored if no query is provided. :paramtype parameters: [List[Dict[str, object]]] - :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used - by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed - across all documents in the container. When set to "Local", statistics are computed only over the subset - of documents within the partition key values specified in the query. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. @@ -922,7 +922,10 @@ def query_items( if utils.valid_key_value_exist(kwargs, "continuation_token_limit"): feed_options["responseContinuationTokenLimitInKb"] = kwargs.pop("continuation_token_limit") if utils.valid_key_value_exist(kwargs, "full_text_score_scope"): - feed_options["fullTextScoreScope"] = kwargs.pop("full_text_score_scope") + scope = kwargs.pop("full_text_score_scope") + if scope not in ("Local", "Global"): + raise ValueError(f"full_text_score_scope must be 'Local' or 'Global', got '{scope}'") + feed_options["fullTextScoreScope"] = scope # populate availability_strategy if (Constants.Kwargs.AVAILABILITY_STRATEGY in feed_options diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 05a907b8a998..4ec488be4e5e 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -746,10 +746,10 @@ def query_items( enable_cross_partition_query: Optional[bool] = None, max_item_count: Optional[int] = None, enable_scan_in_query: Optional[bool] = None, - full_text_score_scope: Optional[Literal["Local", "Global"]] = None, populate_query_metrics: Optional[bool] = None, *, continuation_token_limit: Optional[int] = None, + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, populate_index_metrics: Optional[bool] = None, @@ -783,20 +783,20 @@ def query_items( :param int max_item_count: Max number of items to be returned in the enumeration operation. :param bool enable_scan_in_query: Allow scan on the queries which couldn't be served as indexing was opted out on the requested paths. - :param Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + :param bool populate_query_metrics: Enable returning query metrics in response headers. + :keyword int continuation_token_limit: The size limit in kb of the response continuation token in the query + response. Valid values are positive integers. + A value of 0 is the same as not passing a value (default no limit). + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed across all documents in the container. When set to "Local", statistics are computed only over the subset of documents within the partition key values specified in the query. - :param bool populate_query_metrics: Enable returning query metrics in response headers. :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can be optimized. Please note that this option will incur additional latency overhead, so it should be enabled when debugging queries. - :keyword int continuation_token_limit: The size limit in kb of the response continuation token in the query - response. Valid values are positive integers. - A value of 0 is the same as not passing a value (default no limit). :keyword Sequence[str] excluded_locations: Excluded locations to be skipped from preferred locations. The locations in this list are specified as the names of the Azure Cosmos locations like, 'West US', 'East US' and so on. If all preferred locations were excluded, primary/hub location will be used. @@ -848,10 +848,10 @@ def query_items( enable_cross_partition_query: Optional[bool] = None, enable_scan_in_query: Optional[bool] = None, feed_range: dict[str, Any], + full_text_score_scope: Optional[Literal["Local", "Global"]] = None, initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, max_item_count: Optional[int] = None, - full_text_score_scope: Optional[Literal["Local", "Global"]] = None, parameters: Optional[list[dict[str, object]]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, @@ -884,15 +884,15 @@ def query_items( If all preferred locations were excluded, primary/hub location will be used. This excluded_location will override existing excluded_locations in client level. :keyword dict[str, Any] feed_range: The feed range that is used to define the scope. + :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used + by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed + across all documents in the container. When set to "Local", statistics are computed only over the subset + of documents within the partition key values specified in the query. :keyword dict[str, str] initial_headers: Initial headers to be sent as part of the request. :keyword int max_integrated_cache_staleness_in_ms: The max cache staleness for the integrated cache in milliseconds. For accounts configured to use the integrated cache, using Session or Eventual consistency, responses are guaranteed to be no staler than this value. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword Literal["Local", "Global"] full_text_score_scope: Sets the scope for computing BM25 statistics used - by FullTextScore in hybrid search queries. When set to "Global" (default), BM25 statistics are computed - across all documents in the container. When set to "Local", statistics are computed only over the subset - of documents within the partition key values specified in the query. :keyword parameters: Optional array of parameters to the query. Each parameter is a dict() with 'name' and 'value' keys. Ignored if no query is provided. @@ -1050,7 +1050,10 @@ def query_items( # pylint:disable=docstring-missing-param if utils.valid_key_value_exist(kwargs, "continuation_token_limit"): feed_options["responseContinuationTokenLimitInKb"] = kwargs.pop("continuation_token_limit") if utils.valid_key_value_exist(kwargs, "full_text_score_scope"): - feed_options["fullTextScoreScope"] = kwargs.pop("full_text_score_scope") + scope = kwargs.pop("full_text_score_scope") + if scope not in ("Local", "Global"): + raise ValueError(f"full_text_score_scope must be 'Local' or 'Global', got '{scope}'") + feed_options["fullTextScoreScope"] = scope # populate availability_strategy if (Constants.Kwargs.AVAILABILITY_STRATEGY in feed_options From 5096d525b4ad13b80a515dc9ed65aafd7b5f286d Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Fri, 13 Mar 2026 10:15:05 -0700 Subject: [PATCH 3/5] Updating changelog --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 011365c27ffc..707e060c5ada 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -6,6 +6,7 @@ * Added support for Query Advisor feature - See [PR 45331](https://github.com/Azure/azure-sdk-for-python/pull/45331) * Added `get_response_headers()` and `get_last_response_headers()` methods to the `CosmosItemPaged` and `CosmosAsyncItemPaged` objects returned by `query_items()`, allowing access to response headers from query operations. See [PR 44593](https://github.com/Azure/azure-sdk-for-python/pull/44593) * Added InferenceRequestTimeout property for HttpTimeout Policy to Reranking API. See [45469](https://github.com/Azure/azure-sdk-for-python/pull/45469) +* Added `full_text_score_scope` parameter to `query_items()` for controlling BM25 statistics scope in hybrid search queries. Supports "Local" and "Global" (default) scopes. #### Breaking Changes From 1074c270bc42dd4481880e5d0efa4621ef15853c Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Fri, 13 Mar 2026 10:20:55 -0700 Subject: [PATCH 4/5] Updating changelog --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 707e060c5ada..a36162364840 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -6,7 +6,7 @@ * Added support for Query Advisor feature - See [PR 45331](https://github.com/Azure/azure-sdk-for-python/pull/45331) * Added `get_response_headers()` and `get_last_response_headers()` methods to the `CosmosItemPaged` and `CosmosAsyncItemPaged` objects returned by `query_items()`, allowing access to response headers from query operations. See [PR 44593](https://github.com/Azure/azure-sdk-for-python/pull/44593) * Added InferenceRequestTimeout property for HttpTimeout Policy to Reranking API. See [45469](https://github.com/Azure/azure-sdk-for-python/pull/45469) -* Added `full_text_score_scope` parameter to `query_items()` for controlling BM25 statistics scope in hybrid search queries. Supports "Local" and "Global" (default) scopes. +* Added `full_text_score_scope` parameter to `query_items()` for controlling BM25 statistics scope in hybrid search queries. Supports "Local" and "Global" (default) scopes. See [45686](https://github.com/Azure/azure-sdk-for-python/pull/45686) #### Breaking Changes From ce0cf102f8f5d156d60b26e9d4e3fae6069a8347 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Fri, 13 Mar 2026 16:39:22 -0700 Subject: [PATCH 5/5] Fixing build issues, and resolving copilot comments --- .../aio/hybrid_search_aggregator.py | 6 ++++-- .../_execution_context/hybrid_search_aggregator.py | 6 ++++-- sdk/cosmos/azure-cosmos/azure/cosmos/container.py | 2 +- .../azure-cosmos/tests/test_query_hybrid_search.py | 11 ++++++----- .../tests/test_query_hybrid_search_async.py | 11 ++++++----- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py index e63db38c2ef9..f6b664e33d39 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/hybrid_search_aggregator.py @@ -103,8 +103,10 @@ async def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-ma except exceptions.CosmosHttpResponseError as e: if exceptions._partition_range_is_gone(e): # repairing document producer context on partition split - global_statistics_doc_producers = await self._repair_document_producer(global_statistics_query, - target_all_ranges=use_all_ranges) + global_statistics_doc_producers = await self._repair_document_producer( + global_statistics_query, + target_all_ranges=use_all_ranges + ) else: raise except StopAsyncIteration: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py index 6c1a938bc879..db739bc93c91 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/hybrid_search_aggregator.py @@ -240,8 +240,10 @@ def _run_hybrid_search(self): # pylint: disable=too-many-branches, too-many-sta except exceptions.CosmosHttpResponseError as e: if exceptions._partition_range_is_gone(e): # repairing document producer context on partition split - global_statistics_doc_producers = self._repair_document_producer(global_statistics_query, - target_all_ranges=use_all_ranges) + global_statistics_doc_producers = self._repair_document_producer( + global_statistics_query, + target_all_ranges=use_all_ranges + ) else: raise except StopIteration: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 4ec488be4e5e..3ec1b346b7b8 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -939,7 +939,7 @@ def query_items( ... @distributed_trace - def query_items( # pylint:disable=docstring-missing-param + def query_items( # pylint:disable=docstring-missing-param,too-many-statements self, *args: Any, **kwargs: Any diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py index 168829766b42..14dae19edb8a 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search.py @@ -502,17 +502,18 @@ def test_hybrid_search_with_full_text_score_scope_local(self): def test_hybrid_search_with_full_text_score_scope_local_partition_key(self): """Test that full_text_score_scope='Local' works with a specific partition key.""" - query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + query = "SELECT TOP 10 c.index, c.title, c.pk FROM c WHERE FullTextContains(c.title, 'John') OR " \ "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" # With Local scope and partition key, statistics are scoped to just that partition - results_local = self.test_container.query_items(query, partition_key='1', + results_local = self.test_container.query_items(query, partition_key='2', full_text_score_scope="Local") result_list_local = list(results_local) - # Should return results only from partition key '1' - assert len(result_list_local) > 0 + # Only index=2 has pk='1' among the 'John' matches (57 and 85 have pk='2') + assert len(result_list_local) == 1 for res in result_list_local: - assert res['index'] in [2, 85, 57] + assert res['pk'] == '2' + assert res['index'] == 2 def test_hybrid_search_rrf_with_full_text_score_scope_local(self): """Test RRF hybrid search with Local scope returns valid results.""" diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py index 40156301139a..3a81e5e9d825 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_hybrid_search_async.py @@ -506,17 +506,18 @@ async def test_hybrid_search_with_full_text_score_scope_local_async(self): async def test_hybrid_search_with_full_text_score_scope_local_partition_key_async(self): """Test that full_text_score_scope='Local' works with a specific partition key.""" - query = "SELECT TOP 10 c.index, c.title FROM c WHERE FullTextContains(c.title, 'John') OR " \ + query = "SELECT TOP 10 c.index, c.title, c.pk FROM c WHERE FullTextContains(c.title, 'John') OR " \ "FullTextContains(c.text, 'John') ORDER BY RANK FullTextScore(c.title, 'John')" # With Local scope and partition key, statistics are scoped to just that partition - results_local = self.test_container.query_items(query, partition_key='1', + results_local = self.test_container.query_items(query, partition_key='2', full_text_score_scope="Local") result_list_local = [res async for res in results_local] - # Should return results only from partition key '1' - assert len(result_list_local) > 0 + # Only index=2 has pk='1' among the 'John' matches (57 and 85 have pk='2') + assert len(result_list_local) == 1 for res in result_list_local: - assert res['index'] in [2, 85, 57] + assert res['pk'] == '2' + assert res['index'] == 2 async def test_hybrid_search_rrf_with_full_text_score_scope_local_async(self): """Test RRF hybrid search with Local scope returns valid results."""