From f30d779a0f2f31c5ce18a2794888b7af37f5d5d9 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Fri, 23 Jan 2026 07:46:17 -0800 Subject: [PATCH] fix: test fix internal PiperOrigin-RevId: 860106467 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 21 - google/cloud/aiplatform/gapic_version.py | 2 +- .../_protos/match_service.proto | 22 +- google/cloud/aiplatform/models.py | 129 ------ google/cloud/aiplatform/preview/models.py | 124 +----- .../schema/predict/instance/gapic_version.py | 2 +- .../predict/instance_v1/gapic_version.py | 2 +- .../v1/schema/predict/params/gapic_version.py | 2 +- .../schema/predict/params_v1/gapic_version.py | 2 +- .../predict/prediction/gapic_version.py | 2 +- .../predict/prediction_v1/gapic_version.py | 2 +- .../trainingjob/definition/gapic_version.py | 2 +- .../definition_v1/gapic_version.py | 2 +- .../schema/predict/instance/gapic_version.py | 2 +- .../predict/instance_v1beta1/gapic_version.py | 2 +- .../schema/predict/params/gapic_version.py | 2 +- .../predict/params_v1beta1/gapic_version.py | 2 +- .../predict/prediction/gapic_version.py | 2 +- .../prediction_v1beta1/gapic_version.py | 2 +- .../trainingjob/definition/gapic_version.py | 2 +- .../definition_v1beta1/gapic_version.py | 2 +- google/cloud/aiplatform/version.py | 2 +- google/cloud/aiplatform_v1/gapic_version.py | 2 +- .../cloud/aiplatform_v1beta1/gapic_version.py | 2 +- .../types/vertex_rag_data.py | 93 +--- pypi/_vertex_ai_placeholder/version.py | 2 +- ...t_metadata_google.cloud.aiplatform.v1.json | 2 +- ...adata_google.cloud.aiplatform.v1beta1.json | 2 +- tests/unit/aiplatform/test_endpoints.py | 133 ------ tests/unit/aiplatform/test_metadata_models.py | 3 - tests/unit/aiplatform/test_models.py | 143 ------ .../test_vertex_rag_data_service.py | 4 - ...t_create_agent_engine_developer_connect.py | 52 ++- .../replays/test_create_evaluation_run.py | 27 +- vertexai/_genai/evals.py | 14 - vertexai/_genai/types/__init__.py | 38 +- vertexai/_genai/types/common.py | 408 ++++++++---------- vertexai/agent_engines/templates/adk.py | 17 +- .../reasoning_engines/templates/adk.py | 15 +- 40 files changed, 290 insertions(+), 1001 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index bd118c81f8..3c53912ea5 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.134.0" + ".": "1.133.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index a3e856ac93..bb8d2c7834 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,26 +1,5 @@ # Changelog -## [1.134.0](https://github.com/googleapis/python-aiplatform/compare/v1.133.0...v1.134.0) (2026-01-18) - - -### Features - -* Add metadata to memories ([f9fc79d](https://github.com/googleapis/python-aiplatform/commit/f9fc79dda6888538486f4fd6a44aa02fa1bcba75)) -* Expose PSC for OpenModel ([feeb54d](https://github.com/googleapis/python-aiplatform/commit/feeb54d7a227adfadfb7d45a425c16e260dcb16b)) -* GenAI Client(evals) - Add support for `inference_configs` in `create_evaluation_run`. ([33fe72a](https://github.com/googleapis/python-aiplatform/commit/33fe72a41de35f43c1ceb905ecf5652d5257b3ac)) -* GenAI SDK client - Support agent engine sandbox http request in genai sdk ([11c23a3](https://github.com/googleapis/python-aiplatform/commit/11c23a36a2a2e8a7ac6e9a4d6934943d9e8d1aa9)) -* Support metadata filtering for memory retrieval ([f9fc79d](https://github.com/googleapis/python-aiplatform/commit/f9fc79dda6888538486f4fd6a44aa02fa1bcba75)) -* Support metadata merge strategies for memory generation ([f9fc79d](https://github.com/googleapis/python-aiplatform/commit/f9fc79dda6888538486f4fd6a44aa02fa1bcba75)) -* Support Python 3.14 for reasoning engine. ([394cd1d](https://github.com/googleapis/python-aiplatform/commit/394cd1d5c29eeca46804fca90f6a9a43ab92206d)) -* Update data types from discovery doc. ([0c6fb66](https://github.com/googleapis/python-aiplatform/commit/0c6fb66ed5f641f60d5d1d14a51a5f4fcfa64aa1)) -* Update data types from discovery doc. ([a451fa3](https://github.com/googleapis/python-aiplatform/commit/a451fa374f670d2540f654866eb1091948efaf79)) - - -### Bug Fixes - -* Mistyping of langchain tools causing mypy errors ([0705a37](https://github.com/googleapis/python-aiplatform/commit/0705a378c6b81fa82a8e77c9c6026130209e57fb)) -* Test fix internal ([b1b900e](https://github.com/googleapis/python-aiplatform/commit/b1b900e953f9391b901cbdbe448a976d63fa3aca)) - ## [1.133.0](https://github.com/googleapis/python-aiplatform/compare/v1.132.0...v1.133.0) (2026-01-08) diff --git a/google/cloud/aiplatform/gapic_version.py b/google/cloud/aiplatform/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/gapic_version.py +++ b/google/cloud/aiplatform/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/matching_engine/_protos/match_service.proto b/google/cloud/aiplatform/matching_engine/_protos/match_service.proto index 2e4d5f6773..ab4ee104be 100644 --- a/google/cloud/aiplatform/matching_engine/_protos/match_service.proto +++ b/google/cloud/aiplatform/matching_engine/_protos/match_service.proto @@ -21,14 +21,15 @@ service MatchService { } // Feature embedding vector for sparse index. An array of numbers whose values -// are located in the specified dimensions. -message SparseEmbedding { - // The list of embedding values of the sparse vector. - repeated float float_val = 1; + // are located in the specified dimensions. + message SparseEmbedding { - // The list of indexes for the embedding values of the sparse vector. - repeated int64 dimension = 2; -} + // The list of embedding values of the sparse vector. + repeated float float_val = 1; + + // The list of indexes for the embedding values of the sparse vector. + repeated int64 dimension = 2; + } // Parameters for a match query. message MatchRequest { @@ -49,6 +50,7 @@ message MatchRequest { // Parameters for RRF algorithm that combines search results. message RRF { + // Users can provide an alpha value to give more weight to sparse vs dense. // For example, if the alpha is 0, we don't return dense at all, if it's 1, // we don't return sparse at all. @@ -68,7 +70,7 @@ message MatchRequest { // The list of restricts. repeated Namespace restricts = 4; - // The list of numeric restricts. + //The list of numeric restricts. repeated NumericNamespace numeric_restricts = 11; // Crowding is a constraint on a neighbor list produced by nearest neighbor @@ -126,9 +128,6 @@ message Embedding { // to return per crowding attribute value // (per_crowding_attribute_num_neighbors) is configured per-query. int64 crowding_attribute = 4; - - // The key-value map of additional metadata for the datapoint. - google.protobuf.Struct embedding_metadata = 7; } // Response of a match query. @@ -239,6 +238,7 @@ message Namespace { // eligible for each matching query, overall query is an AND across namespaces. // This uses numeric comparisons. message NumericNamespace { + // The string name of the namespace that this proto is specifying, // such as "size" or "cost". string name = 1; diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index eab5c98a71..e186b140ba 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -1367,9 +1367,6 @@ def deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, enable_access_logging=False, @@ -1470,13 +1467,6 @@ def deploy( autoscaling_target_request_count_per_minute (int): Optional. The target number of requests per minute for autoscaling. If set, the model will be scaled based on the number of requests it receives. - autoscaling_target_dcgm_fi_dev_gpu_util (int): - Optional. Target DCGM metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): - Optional. Target vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): - Optional. Target vLLM metrics for number of inference requests - currently waiting in the queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. The target number of pubsub undelivered messages for autoscaling. If set, the model will be scaled based on the pubsub queue size. @@ -1565,9 +1555,6 @@ def deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, spot=spot, @@ -1604,9 +1591,6 @@ def _deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, spot: bool = False, @@ -1710,13 +1694,6 @@ def _deploy( autoscaling_target_request_count_per_minute (int): Optional. The target number of requests per minute for autoscaling. If set, the model will be scaled based on the number of requests it receives. - autoscaling_target_dcgm_fi_dev_gpu_util (int): - Optional. Target DCGM metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): - Optional. Target vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): - Optional. Target vLLM metrics for number of inference requests - currently waiting in the queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. The target number of pubsub undelivered messages for autoscaling. If set, the model will be scaled based on the pubsub queue size. @@ -1782,9 +1759,6 @@ def _deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, spot=spot, @@ -1828,9 +1802,6 @@ def _deploy_call( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, spot: bool = False, @@ -1940,13 +1911,6 @@ def _deploy_call( A default value of 60 will be used if not specified. autoscaling_target_request_count_per_minute (int): Optional. Target request count per minute per instance. - autoscaling_target_dcgm_fi_dev_gpu_util (int): - Optional. Target DCGM metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): - Optional. Target vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): - Optional. Target vLLM metrics for number of inference requests - currently waiting in the queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target pubsub queue size per instance. autoscaling_pubsub_subscription_labels (Dict[str, str]): @@ -2042,9 +2006,6 @@ def _deploy_call( or autoscaling_target_accelerator_duty_cycle or autoscaling_target_cpu_utilization or autoscaling_target_request_count_per_minute - or autoscaling_target_dcgm_fi_dev_gpu_util - or autoscaling_target_vllm_gpu_cache_usage_perc - or autoscaling_target_vllm_num_requests_waiting or autoscaling_target_pubsub_num_undelivered_messages or autoscaling_pubsub_subscription_labels ) @@ -2056,9 +2017,6 @@ def _deploy_call( "autoscaling_target_accelerator_duty_cycle, " "autoscaling_target_cpu_utilization, " "autoscaling_target_request_count_per_minute, " - "autoscaling_target_dcgm_fi_dev_gpu_util, " - "autoscaling_target_vllm_gpu_cache_usage_perc, " - "autoscaling_target_vllm_num_requests_waiting, " "autoscaling_target_pubsub_num_undelivered_messages, " "autoscaling_pubsub_subscription_labels parameters " "may not be set when `deployment_resource_pool` is " @@ -2120,9 +2078,6 @@ def _deploy_call( or autoscaling_target_accelerator_duty_cycle or autoscaling_target_cpu_utilization or autoscaling_target_request_count_per_minute - or autoscaling_target_dcgm_fi_dev_gpu_util - or autoscaling_target_vllm_gpu_cache_usage_perc - or autoscaling_target_vllm_num_requests_waiting or autoscaling_target_pubsub_num_undelivered_messages or autoscaling_pubsub_subscription_labels ) @@ -2140,9 +2095,6 @@ def _deploy_call( "autoscaling_target_accelerator_duty_cycle, " "autoscaling_target_cpu_utilization, " "autoscaling_target_request_count_per_minute, " - "autoscaling_target_dcgm_fi_dev_gpu_util, " - "autoscaling_target_vllm_gpu_cache_usage_perc, " - "autoscaling_target_vllm_num_requests_waiting, " "autoscaling_target_pubsub_num_undelivered_messages, " "autoscaling_pubsub_subscription_labels parameters " "are ignored." @@ -2204,48 +2156,6 @@ def _deploy_call( [autoscaling_metric_spec] ) - if autoscaling_target_dcgm_fi_dev_gpu_util: - autoscaling_metric_spec = ( - gca_machine_resources_compat.AutoscalingMetricSpec( - metric_name=( - "prometheus.googleapis.com/" - "vertex_dcgm_fi_dev_gpu_util" - ), - target=autoscaling_target_dcgm_fi_dev_gpu_util, - ) - ) - dedicated_resources.autoscaling_metric_specs.extend( - [autoscaling_metric_spec] - ) - - if autoscaling_target_vllm_gpu_cache_usage_perc: - autoscaling_metric_spec = ( - gca_machine_resources_compat.AutoscalingMetricSpec( - metric_name=( - "prometheus.googleapis.com/" - "vertex_vllm_gpu_cache_usage_perc" - ), - target=autoscaling_target_vllm_gpu_cache_usage_perc, - ) - ) - dedicated_resources.autoscaling_metric_specs.extend( - [autoscaling_metric_spec] - ) - - if autoscaling_target_vllm_num_requests_waiting: - autoscaling_metric_spec = ( - gca_machine_resources_compat.AutoscalingMetricSpec( - metric_name=( - "prometheus.googleapis.com/" - "vertex_vllm_num_requests_waiting" - ), - target=autoscaling_target_vllm_num_requests_waiting, - ) - ) - dedicated_resources.autoscaling_metric_specs.extend( - [autoscaling_metric_spec] - ) - if autoscaling_target_pubsub_num_undelivered_messages: autoscaling_metric_spec = gca_machine_resources.AutoscalingMetricSpec( metric_name=( @@ -4582,9 +4492,6 @@ def deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, ) -> None: @@ -4766,9 +4673,6 @@ def deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, ) @@ -5844,9 +5748,6 @@ def deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, enable_access_logging=False, @@ -5969,13 +5870,6 @@ def deploy( autoscaling_target_request_count_per_minute (int): Optional. The target number of requests per minute for autoscaling. If set, the model will be scaled based on the number of requests it receives. - autoscaling_target_dcgm_fi_dev_gpu_util (int): - Optional. Target DCGM metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): - Optional. Target vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): - Optional. Target vLLM metrics for number of inference requests - currently waiting in the queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. The target number of pubsub undelivered messages for autoscaling. If set, the model will be scaled based on the pubsub queue size. @@ -6035,13 +5929,6 @@ def deploy( autoscaling_target_request_count_per_minute (int): Optional. The target number of requests per minute for autoscaling. If set, the model will be scaled based on the number of requests it receives. - autoscaling_target_dcgm_fi_dev_gpu_util (int): - Optional. Target DCGM metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): - Optional. Target vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): - Optional. Target vLLM metrics for number of inference requests - currently waiting in the queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. The target number of pubsub undelivered messages for autoscaling. If set, the model will be scaled based on the pubsub queue size. @@ -6114,9 +6001,6 @@ def deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, spot=spot, @@ -6163,9 +6047,6 @@ def _deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, spot: bool = False, @@ -6290,13 +6171,6 @@ def _deploy( autoscaling_target_request_count_per_minute (int): Optional. The target number of requests per minute for autoscaling. If set, the model will be scaled based on the number of requests it receives. - autoscaling_target_dcgm_fi_dev_gpu_util (int): - Optional. Target DCGM metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): - Optional. Target vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): - Optional. Target vLLM metrics for number of inference requests - currently waiting in the queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. The target number of pubsub undelivered messages for autoscaling. If set, the model will be scaled based on the pubsub queue size. @@ -6393,9 +6267,6 @@ def _deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, spot=spot, diff --git a/google/cloud/aiplatform/preview/models.py b/google/cloud/aiplatform/preview/models.py index 64714f6abe..b3cb9f9ba8 100644 --- a/google/cloud/aiplatform/preview/models.py +++ b/google/cloud/aiplatform/preview/models.py @@ -783,9 +783,6 @@ def deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, deployment_resource_pool: Optional[DeploymentResourcePool] = None, @@ -874,14 +871,8 @@ def deploy( specified. A default value of 60 will be used if not specified. autoscaling_target_request_count_per_minute (int): Target request count per minute per instance. - autoscaling_target_dcgm_fi_dev_gpu_util (int): Target DCGM metrics for - GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): Target vLLM metrics - for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): Target vLLM metrics - for number of inference requests currently waiting in the queue. - autoscaling_target_pubsub_num_undelivered_messages (int): Target number - of pubsub undelivered messages per instance. + autoscaling_target_pubsub_num_undelivered_messages (int): Target + number of pubsub undelivered messages per instance. autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional. Monitored resource labels as key value pairs for metric filtering for pubsub_num_undelivered_messages. @@ -970,9 +961,6 @@ def deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, deployment_resource_pool=deployment_resource_pool, @@ -1008,9 +996,6 @@ def _deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, deployment_resource_pool: Optional[DeploymentResourcePool] = None, @@ -1093,14 +1078,8 @@ def _deploy( specified. A default value of 60 will be used if not specified. autoscaling_target_request_count_per_minute (int): Target request count per minute per instance. - autoscaling_target_dcgm_fi_dev_gpu_util (int): Target DCGM metrics for - GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): Target vLLM metrics - for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): Target vLLM metrics - for number of inference requests currently waiting in the queue. - autoscaling_target_pubsub_num_undelivered_messages (int): Target number - of pubsub undelivered messages per instance. + autoscaling_target_pubsub_num_undelivered_messages (int): Target + number of pubsub undelivered messages per instance. autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional. Monitored resource labels as key value pairs for metric filtering for pubsub_num_undelivered_messages. @@ -1175,9 +1154,6 @@ def _deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, deployment_resource_pool=deployment_resource_pool, @@ -1220,9 +1196,6 @@ def _deploy_call( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, deployment_resource_pool: Optional[DeploymentResourcePool] = None, @@ -1312,13 +1285,6 @@ def _deploy_call( not specified. autoscaling_target_request_count_per_minute (int): Optional. Target request count per minute per instance. - autoscaling_target_dcgm_fi_dev_gpu_util (int): Optional. Target DCGM - metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): Optional. Target - vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): Optional. Target - vLLM metrics for number of inference requests currently waiting in the - queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target number of pubsub undelivered messages per instance. autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional. @@ -1419,9 +1385,6 @@ def _deploy_call( or autoscaling_target_accelerator_duty_cycle or autoscaling_target_request_count_per_minute or autoscaling_target_cpu_utilization - or autoscaling_target_dcgm_fi_dev_gpu_util - or autoscaling_target_vllm_gpu_cache_usage_perc - or autoscaling_target_vllm_num_requests_waiting or autoscaling_target_pubsub_num_undelivered_messages or autoscaling_pubsub_subscription_labels ) @@ -1439,9 +1402,6 @@ def _deploy_call( "autoscaling_target_accelerator_duty_cycle, " "autoscaling_target_cpu_utilization, " "autoscaling_target_request_count_per_minute, " - "autoscaling_target_dcgm_fi_dev_gpu_util, " - "autoscaling_target_vllm_gpu_cache_usage_perc, " - "autoscaling_target_vllm_num_requests_waiting, " "autoscaling_target_pubsub_num_undelivered_messages, " "autoscaling_pubsub_subscription_labels parameters " "are ignored." @@ -1522,51 +1482,11 @@ def _deploy_call( [autoscaling_metric_spec] ) - if autoscaling_target_dcgm_fi_dev_gpu_util: - autoscaling_metric_spec = ( - gca_machine_resources_compat.AutoscalingMetricSpec( - metric_name=( - "prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util" - ), - target=autoscaling_target_dcgm_fi_dev_gpu_util, - ) - ) - dedicated_resources.autoscaling_metric_specs.extend( - [autoscaling_metric_spec] - ) - - if autoscaling_target_vllm_gpu_cache_usage_perc: - autoscaling_metric_spec = ( - gca_machine_resources_compat.AutoscalingMetricSpec( - metric_name=( - "prometheus.googleapis.com/" - "vertex_vllm_gpu_cache_usage_perc" - ), - target=autoscaling_target_vllm_gpu_cache_usage_perc, - ) - ) - dedicated_resources.autoscaling_metric_specs.extend( - [autoscaling_metric_spec] - ) - - if autoscaling_target_vllm_num_requests_waiting: - autoscaling_metric_spec = ( - gca_machine_resources_compat.AutoscalingMetricSpec( - metric_name=( - "prometheus.googleapis.com/" - "vertex_vllm_num_requests_waiting" - ), - target=autoscaling_target_vllm_num_requests_waiting, - ) - ) - dedicated_resources.autoscaling_metric_specs.extend( - [autoscaling_metric_spec] - ) - if autoscaling_target_pubsub_num_undelivered_messages: autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec( metric_name=( - "pubsub.googleapis.com/subscription/num_undelivered_messages" + "pubsub.googleapis.com/subscription/" + "num_undelivered_messages" ), target=autoscaling_target_pubsub_num_undelivered_messages, monitored_resource_labels=autoscaling_pubsub_subscription_labels, @@ -1622,9 +1542,6 @@ def _deploy_call( or autoscaling_target_accelerator_duty_cycle or autoscaling_target_cpu_utilization or autoscaling_target_request_count_per_minute - or autoscaling_target_dcgm_fi_dev_gpu_util - or autoscaling_target_vllm_gpu_cache_usage_perc - or autoscaling_target_vllm_num_requests_waiting or autoscaling_target_pubsub_num_undelivered_messages or autoscaling_pubsub_subscription_labels ) @@ -1636,9 +1553,6 @@ def _deploy_call( "autoscaling_target_accelerator_duty_cycle, " "autoscaling_target_cpu_utilization, " "autoscaling_target_request_count_per_minute, " - "autoscaling_target_dcgm_fi_dev_gpu_util, " - "autoscaling_target_vllm_gpu_cache_usage_perc, " - "autoscaling_target_vllm_num_requests_waiting, " "autoscaling_target_pubsub_num_undelivered_messages, " "autoscaling_pubsub_subscription_labels parameters " "may not be set when `deployment_resource_pool` is " @@ -1900,9 +1814,6 @@ def deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, deployment_resource_pool: Optional[DeploymentResourcePool] = None, @@ -2012,13 +1923,6 @@ def deploy( not specified. autoscaling_target_request_count_per_minute (int): Optional. Target request count per minute per instance. - autoscaling_target_dcgm_fi_dev_gpu_util (int): Optional. Target DCGM - metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): Optional. Target - vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): Optional. Target - vLLM metrics for number of inference requests currently waiting in the - queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target number of pubsub undelivered messages per instance. autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional. @@ -2126,9 +2030,6 @@ def deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, deployment_resource_pool=deployment_resource_pool, @@ -2170,9 +2071,6 @@ def _deploy( autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, - autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None, - autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None, - autoscaling_target_vllm_num_requests_waiting: Optional[int] = None, autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None, autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None, deployment_resource_pool: Optional[DeploymentResourcePool] = None, @@ -2274,13 +2172,6 @@ def _deploy( not specified. autoscaling_target_request_count_per_minute (int): Optional. Target request count per minute per instance. - autoscaling_target_dcgm_fi_dev_gpu_util (int): Optional. Target DCGM - metrics for GPU utilization. - autoscaling_target_vllm_gpu_cache_usage_perc (int): Optional. Target - vLLM metrics for GPU KV cache usage percentage. - autoscaling_target_vllm_num_requests_waiting (int): Optional. Target - vLLM metrics for number of inference requests currently waiting in the - queue. autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target number of pubsub undelivered messages per instance. autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional. @@ -2390,9 +2281,6 @@ def _deploy( autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute, - autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util, - autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc, - autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting, autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages, autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels, deployment_resource_pool=deployment_resource_pool, diff --git a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/version.py b/google/cloud/aiplatform/version.py index 808d127eda..c5d0a7014d 100644 --- a/google/cloud/aiplatform/version.py +++ b/google/cloud/aiplatform/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.134.0" +__version__ = "1.133.0" diff --git a/google/cloud/aiplatform_v1/gapic_version.py b/google/cloud/aiplatform_v1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform_v1/gapic_version.py +++ b/google/cloud/aiplatform_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform_v1beta1/gapic_version.py b/google/cloud/aiplatform_v1beta1/gapic_version.py index 069510e593..35edacdb89 100644 --- a/google/cloud/aiplatform_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.134.0" # {x-release-please-version} +__version__ = "1.133.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py b/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py index 30ec10c8bb..e377e7ddaf 100644 --- a/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py +++ b/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py @@ -1546,32 +1546,20 @@ class RagManagedDbConfig(proto.Message): This field is a member of `oneof`_ ``tier``. scaled (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Scaled): - Deprecated: Use ``mode`` instead to set the tier under - Spanner. Sets the RagManagedDb to the Scaled tier. + Sets the RagManagedDb to the Scaled tier. + This is the default tier if not explicitly + chosen. This field is a member of `oneof`_ ``tier``. basic (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Basic): - Deprecated: Use ``mode`` instead to set the tier under - Spanner. Sets the RagManagedDb to the Basic tier. + Sets the RagManagedDb to the Basic tier. This field is a member of `oneof`_ ``tier``. unprovisioned (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Unprovisioned): - Deprecated: Use ``mode`` instead to set the tier under - Spanner. Sets the RagManagedDb to the Unprovisioned tier. + Sets the RagManagedDb to the Unprovisioned + tier. This field is a member of `oneof`_ ``tier``. - serverless (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Serverless): - Sets the backend to be the serverless mode - offered by RAG Engine. - - This field is a member of `oneof`_ ``mode``. - spanner (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Spanner): - Sets the RAG Engine backend to be - RagManagedDb, built on top of Spanner. - NOTE: This is the default mode (w/ Basic Tier) - if not explicitly chosen. - - This field is a member of `oneof`_ ``mode``. """ class Enterprise(proto.Message): @@ -1597,8 +1585,7 @@ class Basic(proto.Message): - Latency insensitive workload. - Only using RAG Engine with external vector DBs. - NOTE: This is the default tier under Spanner mode if not explicitly - chosen. + NOTE: This is the default tier if not explicitly chosen. """ @@ -1613,60 +1600,6 @@ class Unprovisioned(proto.Message): """ - class Spanner(proto.Message): - r"""Message to configure the Spanner database used by - RagManagedDb. - - This message has `oneof`_ fields (mutually exclusive fields). - For each oneof, at most one member field can be set at the same time. - Setting any member of the oneof automatically clears all other - members. - - .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields - - Attributes: - scaled (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Scaled): - Sets the RagManagedDb to the Scaled tier. - - This field is a member of `oneof`_ ``tier``. - basic (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Basic): - Sets the RagManagedDb to the Basic tier. This - is the default tier for Spanner mode if not - explicitly chosen. - - This field is a member of `oneof`_ ``tier``. - unprovisioned (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Unprovisioned): - Sets the RagManagedDb to the Unprovisioned - tier. - - This field is a member of `oneof`_ ``tier``. - """ - - scaled: "RagManagedDbConfig.Scaled" = proto.Field( - proto.MESSAGE, - number=1, - oneof="tier", - message="RagManagedDbConfig.Scaled", - ) - basic: "RagManagedDbConfig.Basic" = proto.Field( - proto.MESSAGE, - number=2, - oneof="tier", - message="RagManagedDbConfig.Basic", - ) - unprovisioned: "RagManagedDbConfig.Unprovisioned" = proto.Field( - proto.MESSAGE, - number=3, - oneof="tier", - message="RagManagedDbConfig.Unprovisioned", - ) - - class Serverless(proto.Message): - r"""Message to configure the serverless mode offered by RAG - Engine. - - """ - enterprise: Enterprise = proto.Field( proto.MESSAGE, number=1, @@ -1691,18 +1624,6 @@ class Serverless(proto.Message): oneof="tier", message=Unprovisioned, ) - serverless: Serverless = proto.Field( - proto.MESSAGE, - number=5, - oneof="mode", - message=Serverless, - ) - spanner: Spanner = proto.Field( - proto.MESSAGE, - number=6, - oneof="mode", - message=Spanner, - ) class RagEngineConfig(proto.Message): diff --git a/pypi/_vertex_ai_placeholder/version.py b/pypi/_vertex_ai_placeholder/version.py index 23963c4d83..120e175e87 100644 --- a/pypi/_vertex_ai_placeholder/version.py +++ b/pypi/_vertex_ai_placeholder/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.134.0" +__version__ = "1.133.0" diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json index 88ac6c8e7a..72a983d65e 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "1.134.0" + "version": "1.133.0" }, "snippets": [ { diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json index 5e7a8e6322..11e1910d7e 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "1.134.0" + "version": "1.133.0" }, "snippets": [ { diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py index 89b53f6aef..085139464a 100644 --- a/tests/unit/aiplatform/test_endpoints.py +++ b/tests/unit/aiplatform/test_endpoints.py @@ -146,15 +146,6 @@ _TEST_METRIC_NAME_REQUEST_COUNT = ( "aiplatform.googleapis.com/prediction/online/request_count" ) -_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION = ( - "prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util" -) -_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE = ( - "prometheus.googleapis.com/vertex_vllm_gpu_cache_usage_perc" -) -_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING = ( - "prometheus.googleapis.com/vertex_vllm_num_requests_waiting" -) _TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE = ( "pubsub.googleapis.com/subscription/num_undelivered_messages" ) @@ -2250,130 +2241,6 @@ def test_deploy_with_autoscaling_target_request_count_per_minute_preview( timeout=None, ) - @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock") - @pytest.mark.parametrize("sync", [True, False]) - def test_deploy_with_autoscaling_target_dcgm_vllm_metrics( - self, deploy_model_mock, sync - ): - test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) - test_model = models.Model(_TEST_ID) - test_model._gca_resource.supported_deployment_resources_types.append( - aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES - ) - test_endpoint.deploy( - model=test_model, - machine_type=_TEST_MACHINE_TYPE, - service_account=_TEST_SERVICE_ACCOUNT, - sync=sync, - deploy_request_timeout=None, - autoscaling_target_dcgm_fi_dev_gpu_util=60, - autoscaling_target_vllm_gpu_cache_usage_perc=50, - autoscaling_target_vllm_num_requests_waiting=10, - ) - - if not sync: - test_endpoint.wait() - - expected_dedicated_resources = gca_machine_resources.DedicatedResources( - machine_spec=gca_machine_resources.MachineSpec( - machine_type=_TEST_MACHINE_TYPE, - ), - min_replica_count=1, - max_replica_count=1, - autoscaling_metric_specs=[ - gca_machine_resources.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION, - target=60, - ), - gca_machine_resources.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE, - target=50, - ), - gca_machine_resources.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING, - target=10, - ), - ], - ) - - expected_deployed_model = gca_endpoint.DeployedModel( - dedicated_resources=expected_dedicated_resources, - model=test_model.resource_name, - display_name=None, - service_account=_TEST_SERVICE_ACCOUNT, - ) - deploy_model_mock.assert_called_once_with( - endpoint=test_endpoint.resource_name, - deployed_model=expected_deployed_model, - traffic_split={"0": 100}, - metadata=(), - timeout=None, - ) - - @pytest.mark.usefixtures( - "get_endpoint_mock", "get_model_mock", "preview_deploy_model_mock" - ) - @pytest.mark.parametrize("sync", [True, False]) - def test_deploy_with_autoscaling_target_dcgm_vllm_metrics_preview( - self, preview_deploy_model_mock, sync - ): - test_endpoint = preview_models.Endpoint(_TEST_ENDPOINT_NAME) - test_model = preview_models.Model(_TEST_ID) - test_model._gca_resource.supported_deployment_resources_types.append( - aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES - ) - test_endpoint.deploy( - model=test_model, - machine_type=_TEST_MACHINE_TYPE, - service_account=_TEST_SERVICE_ACCOUNT, - sync=sync, - deploy_request_timeout=None, - autoscaling_target_dcgm_fi_dev_gpu_util=60, - autoscaling_target_vllm_gpu_cache_usage_perc=50, - autoscaling_target_vllm_num_requests_waiting=10, - ) - - if not sync: - test_endpoint.wait() - - expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources( - machine_spec=gca_machine_resources_v1beta1.MachineSpec( - machine_type=_TEST_MACHINE_TYPE, - ), - min_replica_count=1, - max_replica_count=1, - autoscaling_metric_specs=[ - gca_machine_resources_v1beta1.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION, - target=60, - ), - gca_machine_resources_v1beta1.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE, - target=50, - ), - gca_machine_resources_v1beta1.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING, - target=10, - ), - ], - ) - - expected_deployed_model = gca_endpoint_v1beta1.DeployedModel( - dedicated_resources=expected_dedicated_resources, - model=test_model.resource_name, - display_name=None, - service_account=_TEST_SERVICE_ACCOUNT, - enable_container_logging=True, - faster_deployment_config=gca_endpoint_v1beta1.FasterDeploymentConfig(), - ) - preview_deploy_model_mock.assert_called_once_with( - endpoint=test_endpoint.resource_name, - deployed_model=expected_deployed_model, - traffic_split={"0": 100}, - metadata=(), - timeout=None, - ) - @pytest.mark.usefixtures( "get_endpoint_mock", "get_model_mock", "preview_deploy_model_mock" ) diff --git a/tests/unit/aiplatform/test_metadata_models.py b/tests/unit/aiplatform/test_metadata_models.py index aecb01c753..b7120c2310 100644 --- a/tests/unit/aiplatform/test_metadata_models.py +++ b/tests/unit/aiplatform/test_metadata_models.py @@ -157,10 +157,8 @@ def create_model_file(filename): @pytest.fixture def mock_storage_blob_download_xgboost_xgbmodel_file(): def create_model_file(filename): - xgb.XGBClassifier._estimator_type = "classifier" x, y = make_classification() model = xgb.XGBClassifier() - model._estimator_type = "classifier" model.fit(x, y) model.save_model(filename) @@ -524,7 +522,6 @@ def test_save_model_xgboost_xgbmodel( x, y = make_classification() xgb_model = xgb.XGBClassifier() - xgb_model._estimator_type = "classifier" xgb_model.fit(x, y) aiplatform.init( diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py index 5cf874db98..5af84e24ea 100644 --- a/tests/unit/aiplatform/test_models.py +++ b/tests/unit/aiplatform/test_models.py @@ -524,15 +524,6 @@ _TEST_METRIC_NAME_REQUEST_COUNT = ( "aiplatform.googleapis.com/prediction/online/request_count" ) -_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION = ( - "prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util" -) -_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE = ( - "prometheus.googleapis.com/vertex_vllm_gpu_cache_usage_perc" -) -_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING = ( - "prometheus.googleapis.com/vertex_vllm_num_requests_waiting" -) _TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE = ( "pubsub.googleapis.com/subscription/num_undelivered_messages" ) @@ -2568,140 +2559,6 @@ def test_preview_deploy_no_endpoint_dedicated_resources_autoscaling_request_coun timeout=None, ) - @pytest.mark.usefixtures( - "get_model_mock", - "create_endpoint_mock", - "get_endpoint_mock", - ) - @pytest.mark.parametrize("sync", [True, False]) - def test_deploy_no_endpoint_dedicated_resources_autoscaling_dcgm_vllm_metrics( - self, deploy_model_mock, sync - ): - test_model = models.Model(_TEST_ID) - test_model._gca_resource.supported_deployment_resources_types.append( - aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES - ) - - test_endpoint = test_model.deploy( - machine_type=_TEST_MACHINE_TYPE, - accelerator_type=_TEST_ACCELERATOR_TYPE, - accelerator_count=_TEST_ACCELERATOR_COUNT, - sync=sync, - deploy_request_timeout=None, - system_labels=_TEST_LABELS, - autoscaling_target_dcgm_fi_dev_gpu_util=60, - autoscaling_target_vllm_gpu_cache_usage_perc=50, - autoscaling_target_vllm_num_requests_waiting=10, - ) - - if not sync: - test_endpoint.wait() - - expected_dedicated_resources = gca_machine_resources.DedicatedResources( - machine_spec=gca_machine_resources.MachineSpec( - machine_type=_TEST_MACHINE_TYPE, - accelerator_type=_TEST_ACCELERATOR_TYPE, - accelerator_count=_TEST_ACCELERATOR_COUNT, - ), - min_replica_count=1, - max_replica_count=1, - autoscaling_metric_specs=[ - gca_machine_resources.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION, - target=60, - ), - gca_machine_resources.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE, - target=50, - ), - gca_machine_resources.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING, - target=10, - ), - ], - ) - expected_deployed_model = gca_endpoint.DeployedModel( - dedicated_resources=expected_dedicated_resources, - model=test_model.resource_name, - display_name=None, - system_labels=_TEST_LABELS, - ) - deploy_model_mock.assert_called_once_with( - endpoint=test_endpoint.resource_name, - deployed_model=expected_deployed_model, - traffic_split={"0": 100}, - metadata=(), - timeout=None, - ) - - @pytest.mark.usefixtures( - "get_model_mock", - "create_endpoint_mock", - "get_endpoint_mock", - ) - @pytest.mark.parametrize("sync", [True, False]) - def test_preview_deploy_no_endpoint_dedicated_resources_autoscaling_dcgm_vllm_metrics( - self, preview_deploy_model_mock, sync - ): - test_model = preview_models.Model(_TEST_ID).preview - test_model._gca_resource.supported_deployment_resources_types.append( - aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES - ) - - test_endpoint = test_model.deploy( - machine_type=_TEST_MACHINE_TYPE, - accelerator_type=_TEST_ACCELERATOR_TYPE, - accelerator_count=_TEST_ACCELERATOR_COUNT, - sync=sync, - deploy_request_timeout=None, - system_labels=_TEST_LABELS, - autoscaling_target_dcgm_fi_dev_gpu_util=60, - autoscaling_target_vllm_gpu_cache_usage_perc=50, - autoscaling_target_vllm_num_requests_waiting=10, - ) - - if not sync: - test_endpoint.wait() - - expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources( - machine_spec=gca_machine_resources_v1beta1.MachineSpec( - machine_type=_TEST_MACHINE_TYPE, - accelerator_type=_TEST_ACCELERATOR_TYPE, - accelerator_count=_TEST_ACCELERATOR_COUNT, - ), - min_replica_count=1, - max_replica_count=1, - autoscaling_metric_specs=[ - gca_machine_resources_v1beta1.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION, - target=60, - ), - gca_machine_resources_v1beta1.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE, - target=50, - ), - gca_machine_resources_v1beta1.AutoscalingMetricSpec( - metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING, - target=10, - ), - ], - ) - expected_deployed_model = gca_endpoint_v1beta1.DeployedModel( - dedicated_resources=expected_dedicated_resources, - model=test_model.resource_name, - display_name=None, - enable_container_logging=True, - faster_deployment_config=gca_endpoint_v1beta1.FasterDeploymentConfig(), - system_labels=_TEST_LABELS, - ) - preview_deploy_model_mock.assert_called_once_with( - endpoint=test_endpoint.resource_name, - deployed_model=expected_deployed_model, - traffic_split={"0": 100}, - metadata=(), - timeout=None, - ) - @pytest.mark.usefixtures( "get_model_mock", "create_endpoint_mock", diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py index c41d7da44c..b4337e95b2 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py @@ -10756,8 +10756,6 @@ def test_update_rag_engine_config_rest_call_success(request_type): "scaled": {}, "basic": {}, "unprovisioned": {}, - "serverless": {}, - "spanner": {"scaled": {}, "basic": {}, "unprovisioned": {}}, }, } # The version of a generated dependency at test runtime may differ from the version used during generation. @@ -13775,8 +13773,6 @@ async def test_update_rag_engine_config_rest_asyncio_call_success(request_type): "scaled": {}, "basic": {}, "unprovisioned": {}, - "serverless": {}, - "spanner": {"scaled": {}, "basic": {}, "unprovisioned": {}}, }, } # The version of a generated dependency at test runtime may differ from the version used during generation. diff --git a/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py b/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py index ab6f7af4cf..1b66399a48 100644 --- a/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py +++ b/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py @@ -14,6 +14,8 @@ # # pylint: disable=protected-access,bad-continuation,missing-function-docstring +import sys + from tests.unit.vertexai.genai.replays import pytest_helper from vertexai._genai import types @@ -23,13 +25,37 @@ def test_create_with_developer_connect_source(client): - """Tests creating an agent engine with developer connect source.""" - developer_connect_source_config = types.ReasoningEngineSpecSourceCodeSpecDeveloperConnectConfig( - git_repository_link="projects/reasoning-engine-test-1/locations/europe-west3/connections/shawn-develop-connect/gitRepositoryLinks/shawn-yang-google-adk-samples", - revision="main", - dir="test", - ) - agent_engine = client.agent_engines.create( + """Tests creating an agent engine with developer connect source.""" + if sys.version_info >= (3, 13): + try: + client._api_client._initialize_replay_session_if_not_loaded() + if client._api_client.replay_session: + target_ver = f"{sys.version_info.major}.{sys.version_info.minor}" + for interaction in client._api_client.replay_session.interactions: + + def _update_ver(obj): + if isinstance(obj, dict): + if "python_spec" in obj and isinstance(obj["python_spec"], dict): + if "version" in obj["python_spec"]: + obj["python_spec"]["version"] = target_ver + for v in obj.values(): + _update_ver(v) + elif isinstance(obj, list): + for item in obj: + _update_ver(item) + + if hasattr(interaction.request, "body_segments"): + _update_ver(interaction.request.body_segments) + if hasattr(interaction.request, "body"): + _update_ver(interaction.request.body) + except Exception: + pass + developer_connect_source_config = types.ReasoningEngineSpecSourceCodeSpecDeveloperConnectConfig( + git_repository_link="projects/reasoning-engine-test-1/locations/europe-west3/connections/shawn-develop-connect/gitRepositoryLinks/shawn-yang-google-adk-samples", + revision="main", + dir="test", + ) + agent_engine = client.agent_engines.create( config={ "display_name": "test-agent-engine-dev-connect", "developer_connect_source": developer_connect_source_config, @@ -42,21 +68,21 @@ def test_create_with_developer_connect_source(client): }, }, ) - assert agent_engine.api_resource.display_name == "test-agent-engine-dev-connect" - assert ( + assert agent_engine.api_resource.display_name == "test-agent-engine-dev-connect" + assert ( agent_engine.api_resource.spec.source_code_spec.developer_connect_source.config.git_repository_link == developer_connect_source_config.git_repository_link ) - assert ( + assert ( agent_engine.api_resource.spec.source_code_spec.developer_connect_source.config.revision == developer_connect_source_config.revision ) - assert ( + assert ( agent_engine.api_resource.spec.source_code_spec.developer_connect_source.config.dir == developer_connect_source_config.dir ) - # Clean up resources. - client.agent_engines.delete(name=agent_engine.api_resource.name, force=True) + # Clean up resources. + client.agent_engines.delete(name=agent_engine.api_resource.name, force=True) pytestmark = pytest_helper.setup( diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index cd97ab042c..392de18fc2 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -46,23 +46,6 @@ ) ), ) -EXACT_MATCH_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric( - metric="exact_match", - metric_config=types.UnifiedMetric( - computation_based_metric_spec=types.ComputationBasedMetricSpec( - type=types.ComputationBasedMetricType.EXACT_MATCH, - ) - ), -) -BLEU_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric( - metric="exact_match_2", - metric_config=types.UnifiedMetric( - computation_based_metric_spec=types.ComputationBasedMetricSpec( - type=types.ComputationBasedMetricType.BLEU, - parameters={"use_effective_order": True}, - ) - ), -) def test_create_eval_run_data_source_evaluation_set(client): @@ -91,8 +74,6 @@ def test_create_eval_run_data_source_evaluation_set(client): GENERAL_QUALITY_METRIC, types.RubricMetric.FINAL_RESPONSE_QUALITY, LLM_METRIC, - EXACT_MATCH_COMPUTATION_BASED_METRIC, - BLEU_COMPUTATION_BASED_METRIC, ], agent_info=types.evals.AgentInfo( agent_resource_name="project/123/locations/us-central1/reasoningEngines/456", @@ -113,13 +94,7 @@ def test_create_eval_run_data_source_evaluation_set(client): output_config=genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) ), - metrics=[ - GENERAL_QUALITY_METRIC, - FINAL_RESPONSE_QUALITY_METRIC, - LLM_METRIC, - EXACT_MATCH_COMPUTATION_BASED_METRIC, - BLEU_COMPUTATION_BASED_METRIC, - ], + metrics=[GENERAL_QUALITY_METRIC, FINAL_RESPONSE_QUALITY_METRIC, LLM_METRIC], ) assert evaluation_run.inference_configs[ "agent-1" diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index f090cf3ac4..b4b1d7f2a9 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -577,13 +577,6 @@ def _UnifiedMetric_from_vertex( getv(from_object, ["predefinedMetricSpec"]), ) - if getv(from_object, ["computationBasedMetricSpec"]) is not None: - setv( - to_object, - ["computation_based_metric_spec"], - getv(from_object, ["computationBasedMetricSpec"]), - ) - return to_object @@ -628,13 +621,6 @@ def _UnifiedMetric_to_vertex( getv(from_object, ["predefined_metric_spec"]), ) - if getv(from_object, ["computation_based_metric_spec"]) is not None: - setv( - to_object, - ["computationBasedMetricSpec"], - getv(from_object, ["computation_based_metric_spec"]), - ) - return to_object diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py index e0ed0ca64d..904c390edf 100644 --- a/vertexai/_genai/types/__init__.py +++ b/vertexai/_genai/types/__init__.py @@ -174,10 +174,6 @@ from .common import CometResult from .common import CometResultDict from .common import CometResultOrDict -from .common import ComputationBasedMetricSpec -from .common import ComputationBasedMetricSpecDict -from .common import ComputationBasedMetricSpecOrDict -from .common import ComputationBasedMetricType from .common import ContainerSpec from .common import ContainerSpecDict from .common import ContainerSpecOrDict @@ -1082,9 +1078,6 @@ "CustomCodeExecutionSpec", "CustomCodeExecutionSpecDict", "CustomCodeExecutionSpecOrDict", - "ComputationBasedMetricSpec", - "ComputationBasedMetricSpecDict", - "ComputationBasedMetricSpecOrDict", "UnifiedMetric", "UnifiedMetricDict", "UnifiedMetricOrDict", @@ -1103,9 +1096,6 @@ "EvaluationRunResults", "EvaluationRunResultsDict", "EvaluationRunResultsOrDict", - "EvalCaseMetricResult", - "EvalCaseMetricResultDict", - "EvalCaseMetricResultOrDict", "ResponseCandidateResult", "ResponseCandidateResultDict", "ResponseCandidateResultOrDict", @@ -1115,9 +1105,6 @@ "AggregatedMetricResult", "AggregatedMetricResultDict", "AggregatedMetricResultOrDict", - "WinRateStats", - "WinRateStatsDict", - "WinRateStatsOrDict", "ResponseCandidate", "ResponseCandidateDict", "ResponseCandidateOrDict", @@ -1139,12 +1126,6 @@ "EvaluationResult", "EvaluationResultDict", "EvaluationResultOrDict", - "EvaluationRunAgentConfig", - "EvaluationRunAgentConfigDict", - "EvaluationRunAgentConfigOrDict", - "EvaluationRunInferenceConfig", - "EvaluationRunInferenceConfigDict", - "EvaluationRunInferenceConfigOrDict", "EvaluationRun", "EvaluationRunDict", "EvaluationRunOrDict", @@ -1496,9 +1477,6 @@ "UpdateAgentEngineConfig", "UpdateAgentEngineConfigDict", "UpdateAgentEngineConfigOrDict", - "MemoryMetadataValue", - "MemoryMetadataValueDict", - "MemoryMetadataValueOrDict", "AgentEngineMemoryConfig", "AgentEngineMemoryConfigDict", "AgentEngineMemoryConfigOrDict", @@ -1556,6 +1534,9 @@ "RetrieveMemoriesRequestSimpleRetrievalParams", "RetrieveMemoriesRequestSimpleRetrievalParamsDict", "RetrieveMemoriesRequestSimpleRetrievalParamsOrDict", + "MemoryMetadataValue", + "MemoryMetadataValueDict", + "MemoryMetadataValueOrDict", "MemoryFilter", "MemoryFilterDict", "MemoryFilterOrDict", @@ -1871,6 +1852,18 @@ "ContentMapContents", "ContentMapContentsDict", "ContentMapContentsOrDict", + "EvalCaseMetricResult", + "EvalCaseMetricResultDict", + "EvalCaseMetricResultOrDict", + "EvaluationRunAgentConfig", + "EvaluationRunAgentConfigDict", + "EvaluationRunAgentConfigOrDict", + "EvaluationRunInferenceConfig", + "EvaluationRunInferenceConfigDict", + "EvaluationRunInferenceConfigOrDict", + "WinRateStats", + "WinRateStatsDict", + "WinRateStatsOrDict", "EvaluateMethodConfig", "EvaluateMethodConfigDict", "EvaluateMethodConfigOrDict", @@ -1943,7 +1936,6 @@ "EvaluationItemType", "SamplingMethod", "RubricContentType", - "ComputationBasedMetricType", "EvaluationRunState", "OptimizeTarget", "MemoryMetadataMergeStrategy", diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index cd7a52b516..736b2fc178 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -324,21 +324,6 @@ class RubricContentType(_common.CaseInSensitiveEnum): """Generate rubrics in a unit test format.""" -class ComputationBasedMetricType(_common.CaseInSensitiveEnum): - """Represents the type of the computation based metric.""" - - COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = ( - "COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED" - ) - """Computation based metric type is unspecified.""" - EXACT_MATCH = "EXACT_MATCH" - """Exact match metric.""" - BLEU = "BLEU" - """BLEU metric.""" - ROUGE = "ROUGE" - """ROUGE metric.""" - - class EvaluationRunState(_common.CaseInSensitiveEnum): """Represents the state of an evaluation run.""" @@ -984,33 +969,6 @@ def evaluate(instance: dict[str, Any]) -> float: ] -class ComputationBasedMetricSpec(_common.BaseModel): - """Specification for a computation based metric.""" - - type: Optional[ComputationBasedMetricType] = Field( - default=None, description="""The type of the computation based metric.""" - ) - parameters: Optional[dict[str, Any]] = Field( - default=None, - description="""A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}.""", - ) - - -class ComputationBasedMetricSpecDict(TypedDict, total=False): - """Specification for a computation based metric.""" - - type: Optional[ComputationBasedMetricType] - """The type of the computation based metric.""" - - parameters: Optional[dict[str, Any]] - """A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}.""" - - -ComputationBasedMetricSpecOrDict = Union[ - ComputationBasedMetricSpec, ComputationBasedMetricSpecDict -] - - class UnifiedMetric(_common.BaseModel): """The unified metric used for evaluation.""" @@ -1032,9 +990,6 @@ class UnifiedMetric(_common.BaseModel): predefined_metric_spec: Optional[PredefinedMetricSpec] = Field( default=None, description="""The spec for a pre-defined metric.""" ) - computation_based_metric_spec: Optional[ComputationBasedMetricSpec] = Field( - default=None, description="""The spec for a computation based metric.""" - ) class UnifiedMetricDict(TypedDict, total=False): @@ -1058,9 +1013,6 @@ class UnifiedMetricDict(TypedDict, total=False): predefined_metric_spec: Optional[PredefinedMetricSpecDict] """The spec for a pre-defined metric.""" - computation_based_metric_spec: Optional[ComputationBasedMetricSpecDict] - """The spec for a computation based metric.""" - UnifiedMetricOrDict = Union[UnifiedMetric, UnifiedMetricDict] @@ -1245,53 +1197,6 @@ class EvaluationRunResultsDict(TypedDict, total=False): EvaluationRunResultsOrDict = Union[EvaluationRunResults, EvaluationRunResultsDict] -class EvalCaseMetricResult(_common.BaseModel): - """Evaluation result for a single evaluation case for a single metric.""" - - metric_name: Optional[str] = Field( - default=None, description="""Name of the metric.""" - ) - score: Optional[float] = Field(default=None, description="""Score of the metric.""") - explanation: Optional[str] = Field( - default=None, description="""Explanation of the metric.""" - ) - rubric_verdicts: Optional[list[evals_types.RubricVerdict]] = Field( - default=None, - description="""The details of all the rubrics and their verdicts for rubric-based metrics.""", - ) - raw_output: Optional[list[str]] = Field( - default=None, description="""Raw output of the metric.""" - ) - error_message: Optional[str] = Field( - default=None, description="""Error message for the metric.""" - ) - - -class EvalCaseMetricResultDict(TypedDict, total=False): - """Evaluation result for a single evaluation case for a single metric.""" - - metric_name: Optional[str] - """Name of the metric.""" - - score: Optional[float] - """Score of the metric.""" - - explanation: Optional[str] - """Explanation of the metric.""" - - rubric_verdicts: Optional[list[evals_types.RubricVerdict]] - """The details of all the rubrics and their verdicts for rubric-based metrics.""" - - raw_output: Optional[list[str]] - """Raw output of the metric.""" - - error_message: Optional[str] - """Error message for the metric.""" - - -EvalCaseMetricResultOrDict = Union[EvalCaseMetricResult, EvalCaseMetricResultDict] - - class ResponseCandidateResult(_common.BaseModel): """Aggregated metric results for a single response candidate of an EvalCase.""" @@ -1299,7 +1204,7 @@ class ResponseCandidateResult(_common.BaseModel): default=None, description="""Index of the response candidate this result pertains to.""", ) - metric_results: Optional[dict[str, EvalCaseMetricResult]] = Field( + metric_results: Optional[dict[str, "EvalCaseMetricResult"]] = Field( default=None, description="""A dictionary of metric results for this response candidate, keyed by metric name.""", ) @@ -1311,7 +1216,7 @@ class ResponseCandidateResultDict(TypedDict, total=False): response_index: Optional[int] """Index of the response candidate this result pertains to.""" - metric_results: Optional[dict[str, EvalCaseMetricResultDict]] + metric_results: Optional[dict[str, "EvalCaseMetricResultDict"]] """A dictionary of metric results for this response candidate, keyed by metric name.""" @@ -1403,31 +1308,6 @@ class AggregatedMetricResultDict(TypedDict, total=False): AggregatedMetricResultOrDict = Union[AggregatedMetricResult, AggregatedMetricResultDict] -class WinRateStats(_common.BaseModel): - """Statistics for win rates for a single metric.""" - - win_rates: Optional[list[float]] = Field( - default=None, - description="""Win rates for the metric, one for each candidate.""", - ) - tie_rate: Optional[float] = Field( - default=None, description="""Tie rate for the metric.""" - ) - - -class WinRateStatsDict(TypedDict, total=False): - """Statistics for win rates for a single metric.""" - - win_rates: Optional[list[float]] - """Win rates for the metric, one for each candidate.""" - - tie_rate: Optional[float] - """Tie rate for the metric.""" - - -WinRateStatsOrDict = Union[WinRateStats, WinRateStatsDict] - - class ResponseCandidate(_common.BaseModel): """A model-generated content to the prompt.""" @@ -1729,7 +1609,7 @@ class EvaluationResult(_common.BaseModel): default=None, description="""A list of summary-level evaluation results for each metric.""", ) - win_rates: Optional[dict[str, WinRateStats]] = Field( + win_rates: Optional[dict[str, "WinRateStats"]] = Field( default=None, description="""A dictionary of win rates for each metric, only populated for multi-response evaluation runs.""", ) @@ -1766,7 +1646,7 @@ class EvaluationResultDict(TypedDict, total=False): summary_metrics: Optional[list[AggregatedMetricResultDict]] """A list of summary-level evaluation results for each metric.""" - win_rates: Optional[dict[str, WinRateStatsDict]] + win_rates: Optional[dict[str, "WinRateStatsDict"]] """A dictionary of win rates for each metric, only populated for multi-response evaluation runs.""" evaluation_dataset: Optional[list[EvaluationDatasetDict]] @@ -1782,71 +1662,6 @@ class EvaluationResultDict(TypedDict, total=False): EvaluationResultOrDict = Union[EvaluationResult, EvaluationResultDict] -class EvaluationRunAgentConfig(_common.BaseModel): - """This field is experimental and may change in future versions. - - Agent config for an evaluation run. - """ - - developer_instruction: Optional[genai_types.Content] = Field( - default=None, description="""The developer instruction for the agent.""" - ) - tools: Optional[list[genai_types.Tool]] = Field( - default=None, description="""The tools available to the agent.""" - ) - - -class EvaluationRunAgentConfigDict(TypedDict, total=False): - """This field is experimental and may change in future versions. - - Agent config for an evaluation run. - """ - - developer_instruction: Optional[genai_types.ContentDict] - """The developer instruction for the agent.""" - - tools: Optional[list[genai_types.ToolDict]] - """The tools available to the agent.""" - - -EvaluationRunAgentConfigOrDict = Union[ - EvaluationRunAgentConfig, EvaluationRunAgentConfigDict -] - - -class EvaluationRunInferenceConfig(_common.BaseModel): - """This field is experimental and may change in future versions. - - Configuration that describes an agent. - """ - - agent_config: Optional[EvaluationRunAgentConfig] = Field( - default=None, description="""The agent config.""" - ) - model: Optional[str] = Field( - default=None, - description="""The fully qualified name of the publisher model or endpoint to use for inference.""", - ) - - -class EvaluationRunInferenceConfigDict(TypedDict, total=False): - """This field is experimental and may change in future versions. - - Configuration that describes an agent. - """ - - agent_config: Optional[EvaluationRunAgentConfigDict] - """The agent config.""" - - model: Optional[str] - """The fully qualified name of the publisher model or endpoint to use for inference.""" - - -EvaluationRunInferenceConfigOrDict = Union[ - EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict -] - - class EvaluationRun(_common.BaseModel): """Represents an evaluation run.""" @@ -1875,7 +1690,7 @@ class EvaluationRun(_common.BaseModel): evaluation_config: Optional[EvaluationRunConfig] = Field( default=None, description="""The evaluation config for the evaluation run.""" ) - inference_configs: Optional[dict[str, EvaluationRunInferenceConfig]] = Field( + inference_configs: Optional[dict[str, "EvaluationRunInferenceConfig"]] = Field( default=None, description="""This field is experimental and may change in future versions. The inference configs for the evaluation run.""", ) @@ -1959,7 +1774,7 @@ class EvaluationRunDict(TypedDict, total=False): evaluation_config: Optional[EvaluationRunConfigDict] """The evaluation config for the evaluation run.""" - inference_configs: Optional[dict[str, EvaluationRunInferenceConfigDict]] + inference_configs: Optional[dict[str, "EvaluationRunInferenceConfigDict"]] """This field is experimental and may change in future versions. The inference configs for the evaluation run.""" labels: Optional[dict[str, str]] @@ -6796,37 +6611,6 @@ class _UpdateAgentEngineRequestParametersDict(TypedDict, total=False): ] -class MemoryMetadataValue(_common.BaseModel): - """The metadata values for memories.""" - - timestamp_value: Optional[datetime.datetime] = Field( - default=None, - description="""Timestamp value. When filtering on timestamp values, only the seconds field will be compared.""", - ) - double_value: Optional[float] = Field(default=None, description="""Double value.""") - bool_value: Optional[bool] = Field(default=None, description="""Boolean value.""") - string_value: Optional[str] = Field(default=None, description="""String value.""") - - -class MemoryMetadataValueDict(TypedDict, total=False): - """The metadata values for memories.""" - - timestamp_value: Optional[datetime.datetime] - """Timestamp value. When filtering on timestamp values, only the seconds field will be compared.""" - - double_value: Optional[float] - """Double value.""" - - bool_value: Optional[bool] - """Boolean value.""" - - string_value: Optional[str] - """String value.""" - - -MemoryMetadataValueOrDict = Union[MemoryMetadataValue, MemoryMetadataValueDict] - - class AgentEngineMemoryConfig(_common.BaseModel): """Config for creating a Memory.""" @@ -6868,7 +6652,7 @@ class AgentEngineMemoryConfig(_common.BaseModel): topics: Optional[list[MemoryTopicId]] = Field( default=None, description="""Optional. The topics of the memory.""" ) - metadata: Optional[dict[str, MemoryMetadataValue]] = Field( + metadata: Optional[dict[str, "MemoryMetadataValue"]] = Field( default=None, description="""Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank.""", ) @@ -6909,7 +6693,7 @@ class AgentEngineMemoryConfigDict(TypedDict, total=False): topics: Optional[list[MemoryTopicIdDict]] """Optional. The topics of the memory.""" - metadata: Optional[dict[str, MemoryMetadataValueDict]] + metadata: Optional[dict[str, "MemoryMetadataValueDict"]] """Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank.""" @@ -7022,7 +6806,7 @@ class Memory(_common.BaseModel): topics: Optional[list[MemoryTopicId]] = Field( default=None, description="""Optional. The Topics of the Memory.""" ) - metadata: Optional[dict[str, MemoryMetadataValue]] = Field( + metadata: Optional[dict[str, "MemoryMetadataValue"]] = Field( default=None, description="""Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank.""", ) @@ -7070,7 +6854,7 @@ class MemoryDict(TypedDict, total=False): topics: Optional[list[MemoryTopicIdDict]] """Optional. The Topics of the Memory.""" - metadata: Optional[dict[str, MemoryMetadataValueDict]] + metadata: Optional[dict[str, "MemoryMetadataValueDict"]] """Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank.""" @@ -7384,7 +7168,7 @@ class GenerateAgentEngineMemoriesConfig(_common.BaseModel): default=None, description="""Optional. Input only. If true, no revisions will be created for this request.""", ) - metadata: Optional[dict[str, MemoryMetadataValue]] = Field( + metadata: Optional[dict[str, "MemoryMetadataValue"]] = Field( default=None, description="""Optional. User-provided metadata for the generated memories. This is not generated by Memory Bank.""", ) @@ -7423,7 +7207,7 @@ class GenerateAgentEngineMemoriesConfigDict(TypedDict, total=False): disable_memory_revisions: Optional[bool] """Optional. Input only. If true, no revisions will be created for this request.""" - metadata: Optional[dict[str, MemoryMetadataValueDict]] + metadata: Optional[dict[str, "MemoryMetadataValueDict"]] """Optional. User-provided metadata for the generated memories. This is not generated by Memory Bank.""" metadata_merge_strategy: Optional[MemoryMetadataMergeStrategy] @@ -7886,6 +7670,37 @@ class RetrieveMemoriesRequestSimpleRetrievalParamsDict(TypedDict, total=False): ] +class MemoryMetadataValue(_common.BaseModel): + """Memory metadata.""" + + timestamp_value: Optional[datetime.datetime] = Field( + default=None, + description="""Timestamp value. When filtering on timestamp values, only the seconds field will be compared.""", + ) + double_value: Optional[float] = Field(default=None, description="""Double value.""") + bool_value: Optional[bool] = Field(default=None, description="""Boolean value.""") + string_value: Optional[str] = Field(default=None, description="""String value.""") + + +class MemoryMetadataValueDict(TypedDict, total=False): + """Memory metadata.""" + + timestamp_value: Optional[datetime.datetime] + """Timestamp value. When filtering on timestamp values, only the seconds field will be compared.""" + + double_value: Optional[float] + """Double value.""" + + bool_value: Optional[bool] + """Boolean value.""" + + string_value: Optional[str] + """String value.""" + + +MemoryMetadataValueOrDict = Union[MemoryMetadataValue, MemoryMetadataValueDict] + + class MemoryFilter(_common.BaseModel): """Filter to apply when retrieving memories.""" @@ -13488,6 +13303,143 @@ class ContentMapContentsDict(TypedDict, total=False): ContentMapContentsOrDict = Union[ContentMapContents, ContentMapContentsDict] +class EvalCaseMetricResult(_common.BaseModel): + """Evaluation result for a single evaluation case for a single metric.""" + + metric_name: Optional[str] = Field( + default=None, description="""Name of the metric.""" + ) + score: Optional[float] = Field(default=None, description="""Score of the metric.""") + explanation: Optional[str] = Field( + default=None, description="""Explanation of the metric.""" + ) + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] = Field( + default=None, + description="""The details of all the rubrics and their verdicts for rubric-based metrics.""", + ) + raw_output: Optional[list[str]] = Field( + default=None, description="""Raw output of the metric.""" + ) + error_message: Optional[str] = Field( + default=None, description="""Error message for the metric.""" + ) + + +class EvalCaseMetricResultDict(TypedDict, total=False): + """Evaluation result for a single evaluation case for a single metric.""" + + metric_name: Optional[str] + """Name of the metric.""" + + score: Optional[float] + """Score of the metric.""" + + explanation: Optional[str] + """Explanation of the metric.""" + + rubric_verdicts: Optional[list[evals_types.RubricVerdict]] + """The details of all the rubrics and their verdicts for rubric-based metrics.""" + + raw_output: Optional[list[str]] + """Raw output of the metric.""" + + error_message: Optional[str] + """Error message for the metric.""" + + +EvalCaseMetricResultOrDict = Union[EvalCaseMetricResult, EvalCaseMetricResultDict] + + +class EvaluationRunAgentConfig(_common.BaseModel): + """This field is experimental and may change in future versions. + + Agent config for an evaluation run. + """ + + developer_instruction: Optional[genai_types.Content] = Field( + default=None, description="""The developer instruction for the agent.""" + ) + tools: Optional[list[genai_types.Tool]] = Field( + default=None, description="""The tools available to the agent.""" + ) + + +class EvaluationRunAgentConfigDict(TypedDict, total=False): + """This field is experimental and may change in future versions. + + Agent config for an evaluation run. + """ + + developer_instruction: Optional[genai_types.ContentDict] + """The developer instruction for the agent.""" + + tools: Optional[list[genai_types.ToolDict]] + """The tools available to the agent.""" + + +EvaluationRunAgentConfigOrDict = Union[ + EvaluationRunAgentConfig, EvaluationRunAgentConfigDict +] + + +class EvaluationRunInferenceConfig(_common.BaseModel): + """This field is experimental and may change in future versions. + + Configuration that describes an agent. + """ + + agent_config: Optional[EvaluationRunAgentConfig] = Field( + default=None, description="""The agent config.""" + ) + model: Optional[str] = Field( + default=None, + description="""The fully qualified name of the publisher model or endpoint to use for inference.""", + ) + + +class EvaluationRunInferenceConfigDict(TypedDict, total=False): + """This field is experimental and may change in future versions. + + Configuration that describes an agent. + """ + + agent_config: Optional[EvaluationRunAgentConfigDict] + """The agent config.""" + + model: Optional[str] + """The fully qualified name of the publisher model or endpoint to use for inference.""" + + +EvaluationRunInferenceConfigOrDict = Union[ + EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict +] + + +class WinRateStats(_common.BaseModel): + """Statistics for win rates for a single metric.""" + + win_rates: Optional[list[float]] = Field( + default=None, + description="""Win rates for the metric, one for each candidate.""", + ) + tie_rate: Optional[float] = Field( + default=None, description="""Tie rate for the metric.""" + ) + + +class WinRateStatsDict(TypedDict, total=False): + """Statistics for win rates for a single metric.""" + + win_rates: Optional[list[float]] + """Win rates for the metric, one for each candidate.""" + + tie_rate: Optional[float] + """Tie rate for the metric.""" + + +WinRateStatsOrDict = Union[WinRateStats, WinRateStatsDict] + + class EvaluateMethodConfig(_common.BaseModel): """Optional parameters for the evaluate method.""" diff --git a/vertexai/agent_engines/templates/adk.py b/vertexai/agent_engines/templates/adk.py index 0878eeb720..e717410fe6 100644 --- a/vertexai/agent_engines/templates/adk.py +++ b/vertexai/agent_engines/templates/adk.py @@ -318,10 +318,8 @@ def _warn_missing_dependency( return None def _detect_cloud_resource_id(project_id: str) -> Optional[str]: - location = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "") or os.getenv( - "GOOGLE_CLOUD_LOCATION", "" - ) - agent_engine_id = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID") + location = os.getenv("GOOGLE_CLOUD_LOCATION", None) + agent_engine_id = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", None) if all(v is not None for v in (location, agent_engine_id)): return f"//aiplatform.googleapis.com/projects/{project_id}/locations/{location}/reasoningEngines/{agent_engine_id}" return None @@ -361,10 +359,7 @@ def _detect_cloud_resource_id(project_id: str) -> Optional[str]: "cloud.platform": "gcp.agent_engine", "service.name": os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", ""), "service.instance.id": f"{uuid.uuid4().hex}-{os.getpid()}", - "cloud.region": ( - os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "") - or os.getenv("GOOGLE_CLOUD_LOCATION", "") - ), + "cloud.region": os.getenv("GOOGLE_CLOUD_LOCATION", ""), } | ( {"cloud.resource_id": cloud_resource_id} @@ -777,15 +772,11 @@ def set_up(self): os.environ["GOOGLE_CLOUD_PROJECT"] = project location = self._tmpl_attrs.get("location") if location: - if "GOOGLE_CLOUD_AGENT_ENGINE_LOCATION" not in os.environ: - os.environ["GOOGLE_CLOUD_AGENT_ENGINE_LOCATION"] = location - if "GOOGLE_CLOUD_LOCATION" not in os.environ: - os.environ["GOOGLE_CLOUD_LOCATION"] = location + os.environ["GOOGLE_CLOUD_LOCATION"] = location express_mode_api_key = self._tmpl_attrs.get("express_mode_api_key") if express_mode_api_key and not project: os.environ["GOOGLE_API_KEY"] = express_mode_api_key # Clear location and project env vars if express mode api key is provided. - os.environ.pop("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", None) os.environ.pop("GOOGLE_CLOUD_LOCATION", None) os.environ.pop("GOOGLE_CLOUD_PROJECT", None) location = None diff --git a/vertexai/preview/reasoning_engines/templates/adk.py b/vertexai/preview/reasoning_engines/templates/adk.py index 63c988ac35..872de508ef 100644 --- a/vertexai/preview/reasoning_engines/templates/adk.py +++ b/vertexai/preview/reasoning_engines/templates/adk.py @@ -320,9 +320,7 @@ def _warn_missing_dependency( return None def _detect_cloud_resource_id(project_id: str) -> Optional[str]: - location = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "") or os.getenv( - "GOOGLE_CLOUD_LOCATION", "" - ) + location = os.getenv("GOOGLE_CLOUD_LOCATION", None) agent_engine_id = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", None) if all(v is not None for v in (location, agent_engine_id)): return f"//aiplatform.googleapis.com/projects/{project_id}/locations/{location}/reasoningEngines/{agent_engine_id}" @@ -363,10 +361,7 @@ def _detect_cloud_resource_id(project_id: str) -> Optional[str]: "cloud.platform": "gcp.agent_engine", "service.name": os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", ""), "service.instance.id": f"{uuid.uuid4().hex}-{os.getpid()}", - "cloud.region": ( - os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "") - or os.getenv("GOOGLE_CLOUD_LOCATION", "") - ), + "cloud.region": os.getenv("GOOGLE_CLOUD_LOCATION", ""), } | ( {"cloud.resource_id": cloud_resource_id} @@ -693,11 +688,7 @@ def set_up(self): project = self._tmpl_attrs.get("project") os.environ["GOOGLE_CLOUD_PROJECT"] = project location = self._tmpl_attrs.get("location") - if location: - if "GOOGLE_CLOUD_AGENT_ENGINE_LOCATION" not in os.environ: - os.environ["GOOGLE_CLOUD_AGENT_ENGINE_LOCATION"] = location - if "GOOGLE_CLOUD_LOCATION" not in os.environ: - os.environ["GOOGLE_CLOUD_LOCATION"] = location + os.environ["GOOGLE_CLOUD_LOCATION"] = location # Disable content capture in custom ADK spans unless user enabled # tracing explicitly with the old flag