-
Notifications
You must be signed in to change notification settings - Fork 3.3k
feed range query fix #46692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feed range query fix #46692
Changes from all commits
5266479
cc412f0
42292ed
d77b332
1c57e67
936b10c
85a900b
8426ee4
175e3b8
490fa5b
266f28f
d158291
ea824cf
97afca3
1d21b60
2e5ae47
147d9cd
478624f
9923ac6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -256,7 +256,7 @@ extends: | |
| service: ml | ||
| in_batch: ${{ parameters.release_azure_ai_ml }} | ||
| channels: | ||
| - conda-forge | ||
| - https://prefix.dev/conda-forge | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for this change?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The 1ES build agents block conda.anaconda.org (DNS sinkhole 192.0.2.11) due to network isolation, so the per-package channel override -c conda-forge was failing to resolve when conda-build invoked it.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tvaron3 did you see this failure though? I would expect any of the existing pipelines to be grandfathered in to the correct network isolation. |
||
| checkout: | ||
| - package: azure-ai-ml | ||
| version: 1.28.1 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| # The MIT License (MIT) | ||
| # The MIT License (MIT) | ||
| # Copyright (c) 2014 Microsoft Corporation | ||
|
|
||
| # Permission is hereby granted, free of charge, to any person obtaining a copy | ||
|
|
@@ -39,6 +39,11 @@ | |
| from . import documents | ||
| from . import http_constants | ||
| from . import _runtime_constants | ||
| from ._query_aggregate_utils import ( | ||
| _AggregatePartialClassification, | ||
| _classify_aggregate_partial, | ||
| _get_select_value_aggregate_function, | ||
| ) | ||
| from ._constants import _Constants as Constants | ||
| from .auth import _get_authorization_header | ||
| from .offer import ThroughputProperties | ||
|
|
@@ -124,6 +129,7 @@ def build_options(kwargs: dict[str, Any]) -> dict[str, Any]: | |
| options['accessCondition'] = {'type': 'IfNoneMatch', 'condition': if_none_match} | ||
| return options | ||
|
|
||
|
|
||
| def _merge_query_results( | ||
| results: dict[str, Any], | ||
| partial_result: dict[str, Any], | ||
|
|
@@ -163,22 +169,13 @@ def _merge_query_results( | |
|
|
||
| results_docs = results.get("Documents") | ||
|
|
||
| # Check if both results are aggregate queries | ||
| is_partial_agg = ( | ||
| isinstance(partial_docs, list) | ||
| and len(partial_docs) == 1 | ||
| and isinstance(partial_docs[0], dict) | ||
| and partial_docs[0].get("_aggregate") is not None | ||
| ) | ||
| is_results_agg = ( | ||
| results_docs | ||
| and isinstance(results_docs, list) | ||
| and len(results_docs) == 1 | ||
| and isinstance(results_docs[0], dict) | ||
| and results_docs[0].get("_aggregate") is not None | ||
| ) | ||
| partial_aggregate_class = _classify_aggregate_partial(partial_docs, query) | ||
| results_aggregate_class = _classify_aggregate_partial(results_docs, query) | ||
|
|
||
| if is_partial_agg and is_results_agg: | ||
| if ( | ||
| partial_aggregate_class == _AggregatePartialClassification.OBJECT | ||
| and results_aggregate_class == _AggregatePartialClassification.OBJECT | ||
| ): | ||
| agg_results = results_docs[0]["_aggregate"] # type: ignore[index] | ||
| agg_partial = partial_docs[0]["_aggregate"] | ||
| for key in agg_partial: | ||
|
|
@@ -196,33 +193,26 @@ def _merge_query_results( | |
| agg_results[key] += agg_partial[key] | ||
| return results | ||
|
|
||
| # Check if both are VALUE aggregate queries | ||
| is_partial_value_agg = ( | ||
| isinstance(partial_docs, list) | ||
| and len(partial_docs) == 1 | ||
| and isinstance(partial_docs[0], (int, float)) | ||
| ) | ||
| is_results_value_agg = ( | ||
| results_docs | ||
| and isinstance(results_docs, list) | ||
| and len(results_docs) == 1 | ||
| and isinstance(results_docs[0], (int, float)) | ||
| ) | ||
|
|
||
| if is_partial_value_agg and is_results_value_agg: | ||
| query_text = query.get("query") if isinstance(query, dict) else query | ||
| if query_text: | ||
| query_upper = query_text.upper() | ||
| # For MIN/MAX, we find the min/max of the partial results. | ||
| # For COUNT/SUM, we sum the partial results. | ||
| # Without robust query parsing, we can't distinguish them reliably. | ||
| # Defaulting to sum for COUNT/SUM. MIN/MAX VALUE queries are not fully supported client-side. | ||
| if " SELECT VALUE MIN" in query_upper: | ||
| results_docs[0] = min(results_docs[0], partial_docs[0]) # type: ignore[index] | ||
| elif " SELECT VALUE MAX" in query_upper: | ||
| results_docs[0] = max(results_docs[0], partial_docs[0]) # type: ignore[index] | ||
| else: # For COUNT/SUM, we sum the partial results | ||
| results_docs[0] += partial_docs[0] # type: ignore[index] | ||
| if ( | ||
| partial_aggregate_class == _AggregatePartialClassification.VALUE | ||
| and results_aggregate_class == _AggregatePartialClassification.VALUE | ||
| ): | ||
| aggregate_fn = _get_select_value_aggregate_function(query) | ||
| if aggregate_fn is None: | ||
| raise ValueError( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation — Maintainability: Internal invariant violation should use This raise guards an internal code invariant — it should only fire if Using Suggested fix: Use |
||
| "Invariant violation: VALUE aggregate classification requires a recognized aggregate function." | ||
| ) | ||
| if aggregate_fn == "MIN": | ||
| results_docs[0] = min(results_docs[0], partial_docs[0]) # type: ignore[index] | ||
| elif aggregate_fn == "MAX": | ||
| results_docs[0] = max(results_docs[0], partial_docs[0]) # type: ignore[index] | ||
| elif aggregate_fn == "AVG": | ||
| raise ValueError( | ||
| "VALUE AVG aggregate merge across partitions is not supported client-side." | ||
| ) | ||
| else: | ||
| # COUNT/SUM are additive. | ||
| results_docs[0] += partial_docs[0] # type: ignore[index] | ||
| return results | ||
|
|
||
| # Standard query, append documents | ||
|
|
@@ -234,6 +224,29 @@ def _merge_query_results( | |
| return results | ||
|
|
||
|
|
||
| def _raise_query_merge_value_error(merge_error: ValueError) -> None: | ||
| """Raise a clearer user-facing error for unsupported VALUE aggregate merges. | ||
|
|
||
| ``SELECT VALUE AVG(...)`` partials cannot be merged correctly client-side | ||
| across multiple partition/range responses. We fail loudly instead of | ||
| falling back to list concatenation (which would silently produce | ||
| mathematically incorrect results). | ||
|
|
||
| :param merge_error: ValueError raised while merging partial query results. | ||
| :type merge_error: ValueError | ||
| :raises ValueError: Always re-raises, potentially with a clearer message. | ||
| """ | ||
| merge_message = str(merge_error) | ||
| if "VALUE AVG aggregate merge across partitions is not supported client-side." in merge_message: | ||
| raise ValueError( | ||
| "Unsupported query shape for range-scoped pagination: " | ||
| "SELECT VALUE AVG(...) cannot be merged client-side when the query " | ||
| "scope spans multiple physical partitions." | ||
| ) from merge_error | ||
| raise merge_error | ||
|
|
||
|
|
||
|
|
||
| def GetHeaders( # pylint: disable=too-many-statements,too-many-branches | ||
| cosmos_client_connection: Union["CosmosClientConnection", "AsyncClientConnection"], | ||
| default_headers: Mapping[str, Any], | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see this is necessary because of a pipeline failure due to pkg_resources usage? Can we leave a comment here to explain that this change, and the addition of
setuptoolsandwheelto host reqs, should be reverted whenever this fix is no longer necessaryUh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also, I'm seeing that the Conda pipeline is using packages from version
2025.09.01when the most recent is this past March , is this branch up to date with main ? the change to azure-ai-ml's meta.yaml is already in main actually https://github.com/Azure/azure-sdk-for-python/blob/main/conda/conda-recipes/azure-ai-ml/meta.yamlThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No that's the reason for the changes outside of cosmos we are hotfixing an old release but the pipeline has had changes since then.