From f30d779a0f2f31c5ce18a2794888b7af37f5d5d9 Mon Sep 17 00:00:00 2001
From: A Vertex SDK engineer <vertex-sdk-bot@google.com>
Date: Fri, 23 Jan 2026 07:46:17 -0800
Subject: [PATCH] fix: test fix internal

PiperOrigin-RevId: 860106467
---
 .release-please-manifest.json                 |   2 +-
 CHANGELOG.md                                  |  21 -
 google/cloud/aiplatform/gapic_version.py      |   2 +-
 .../_protos/match_service.proto               |  22 +-
 google/cloud/aiplatform/models.py             | 129 ------
 google/cloud/aiplatform/preview/models.py     | 124 +-----
 .../schema/predict/instance/gapic_version.py  |   2 +-
 .../predict/instance_v1/gapic_version.py      |   2 +-
 .../v1/schema/predict/params/gapic_version.py |   2 +-
 .../schema/predict/params_v1/gapic_version.py |   2 +-
 .../predict/prediction/gapic_version.py       |   2 +-
 .../predict/prediction_v1/gapic_version.py    |   2 +-
 .../trainingjob/definition/gapic_version.py   |   2 +-
 .../definition_v1/gapic_version.py            |   2 +-
 .../schema/predict/instance/gapic_version.py  |   2 +-
 .../predict/instance_v1beta1/gapic_version.py |   2 +-
 .../schema/predict/params/gapic_version.py    |   2 +-
 .../predict/params_v1beta1/gapic_version.py   |   2 +-
 .../predict/prediction/gapic_version.py       |   2 +-
 .../prediction_v1beta1/gapic_version.py       |   2 +-
 .../trainingjob/definition/gapic_version.py   |   2 +-
 .../definition_v1beta1/gapic_version.py       |   2 +-
 google/cloud/aiplatform/version.py            |   2 +-
 google/cloud/aiplatform_v1/gapic_version.py   |   2 +-
 .../cloud/aiplatform_v1beta1/gapic_version.py |   2 +-
 .../types/vertex_rag_data.py                  |  93 +---
 pypi/_vertex_ai_placeholder/version.py        |   2 +-
 ...t_metadata_google.cloud.aiplatform.v1.json |   2 +-
 ...adata_google.cloud.aiplatform.v1beta1.json |   2 +-
 tests/unit/aiplatform/test_endpoints.py       | 133 ------
 tests/unit/aiplatform/test_metadata_models.py |   3 -
 tests/unit/aiplatform/test_models.py          | 143 ------
 .../test_vertex_rag_data_service.py           |   4 -
 ...t_create_agent_engine_developer_connect.py |  52 ++-
 .../replays/test_create_evaluation_run.py     |  27 +-
 vertexai/_genai/evals.py                      |  14 -
 vertexai/_genai/types/__init__.py             |  38 +-
 vertexai/_genai/types/common.py               | 408 ++++++++----------
 vertexai/agent_engines/templates/adk.py       |  17 +-
 .../reasoning_engines/templates/adk.py        |  15 +-
 40 files changed, 290 insertions(+), 1001 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index bd118c81f8..3c53912ea5 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-    ".": "1.134.0"
+    ".": "1.133.0"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a3e856ac93..bb8d2c7834 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,26 +1,5 @@
 # Changelog
 
-## [1.134.0](https://github.com/googleapis/python-aiplatform/compare/v1.133.0...v1.134.0) (2026-01-18)
-
-
-### Features
-
-* Add metadata to memories ([f9fc79d](https://github.com/googleapis/python-aiplatform/commit/f9fc79dda6888538486f4fd6a44aa02fa1bcba75))
-* Expose PSC for OpenModel ([feeb54d](https://github.com/googleapis/python-aiplatform/commit/feeb54d7a227adfadfb7d45a425c16e260dcb16b))
-* GenAI Client(evals) - Add support for `inference_configs` in `create_evaluation_run`. ([33fe72a](https://github.com/googleapis/python-aiplatform/commit/33fe72a41de35f43c1ceb905ecf5652d5257b3ac))
-* GenAI SDK client - Support agent engine sandbox http request in genai sdk ([11c23a3](https://github.com/googleapis/python-aiplatform/commit/11c23a36a2a2e8a7ac6e9a4d6934943d9e8d1aa9))
-* Support metadata filtering for memory retrieval ([f9fc79d](https://github.com/googleapis/python-aiplatform/commit/f9fc79dda6888538486f4fd6a44aa02fa1bcba75))
-* Support metadata merge strategies for memory generation ([f9fc79d](https://github.com/googleapis/python-aiplatform/commit/f9fc79dda6888538486f4fd6a44aa02fa1bcba75))
-* Support Python 3.14 for reasoning engine. ([394cd1d](https://github.com/googleapis/python-aiplatform/commit/394cd1d5c29eeca46804fca90f6a9a43ab92206d))
-* Update data types from discovery doc. ([0c6fb66](https://github.com/googleapis/python-aiplatform/commit/0c6fb66ed5f641f60d5d1d14a51a5f4fcfa64aa1))
-* Update data types from discovery doc. ([a451fa3](https://github.com/googleapis/python-aiplatform/commit/a451fa374f670d2540f654866eb1091948efaf79))
-
-
-### Bug Fixes
-
-* Mistyping of langchain tools causing mypy errors ([0705a37](https://github.com/googleapis/python-aiplatform/commit/0705a378c6b81fa82a8e77c9c6026130209e57fb))
-* Test fix internal ([b1b900e](https://github.com/googleapis/python-aiplatform/commit/b1b900e953f9391b901cbdbe448a976d63fa3aca))
-
 ## [1.133.0](https://github.com/googleapis/python-aiplatform/compare/v1.132.0...v1.133.0) (2026-01-08)
 
 
diff --git a/google/cloud/aiplatform/gapic_version.py b/google/cloud/aiplatform/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/gapic_version.py
+++ b/google/cloud/aiplatform/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/matching_engine/_protos/match_service.proto b/google/cloud/aiplatform/matching_engine/_protos/match_service.proto
index 2e4d5f6773..ab4ee104be 100644
--- a/google/cloud/aiplatform/matching_engine/_protos/match_service.proto
+++ b/google/cloud/aiplatform/matching_engine/_protos/match_service.proto
@@ -21,14 +21,15 @@ service MatchService {
 }
 
 // Feature embedding vector for sparse index. An array of numbers whose values
-// are located in the specified dimensions.
-message SparseEmbedding {
-  // The list of embedding values of the sparse vector.
-  repeated float float_val = 1;
+  // are located in the specified dimensions.
+  message SparseEmbedding {
 
-  // The list of indexes for the embedding values of the sparse vector.
-  repeated int64 dimension = 2;
-}
+    // The list of embedding values of the sparse vector.
+    repeated float float_val = 1;
+
+    // The list of indexes for the embedding values of the sparse vector.
+    repeated int64 dimension = 2;
+  }
 
 // Parameters for a match query.
 message MatchRequest {
@@ -49,6 +50,7 @@ message MatchRequest {
 
   // Parameters for RRF algorithm that combines search results.
   message RRF {
+
     // Users can provide an alpha value to give more weight to sparse vs dense.
     // For example, if the alpha is 0, we don't return dense at all, if it's 1,
     // we don't return sparse at all.
@@ -68,7 +70,7 @@ message MatchRequest {
   // The list of restricts.
   repeated Namespace restricts = 4;
 
-  // The list of numeric restricts.
+  //The list of numeric restricts.
   repeated NumericNamespace numeric_restricts = 11;
 
   // Crowding is a constraint on a neighbor list produced by nearest neighbor
@@ -126,9 +128,6 @@ message Embedding {
   // to return per crowding attribute value
   // (per_crowding_attribute_num_neighbors) is configured per-query.
   int64 crowding_attribute = 4;
-
-  // The key-value map of additional metadata for the datapoint.
-  google.protobuf.Struct embedding_metadata = 7;
 }
 
 // Response of a match query.
@@ -239,6 +238,7 @@ message Namespace {
 // eligible for each matching query, overall query is an AND across namespaces.
 // This uses numeric comparisons.
 message NumericNamespace {
+
   // The string name of the namespace that this proto is specifying,
   // such as "size" or "cost".
   string name = 1;
diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py
index eab5c98a71..e186b140ba 100644
--- a/google/cloud/aiplatform/models.py
+++ b/google/cloud/aiplatform/models.py
@@ -1367,9 +1367,6 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         enable_access_logging=False,
@@ -1470,13 +1467,6 @@ def deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int):
-                Optional. Target DCGM metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int):
-                Optional. Target vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int):
-                Optional. Target vLLM metrics for number of inference requests
-                currently waiting in the queue.
             autoscaling_target_pubsub_num_undelivered_messages (int):
                 Optional. The target number of pubsub undelivered messages for autoscaling.
                 If set, the model will be scaled based on the pubsub queue size.
@@ -1565,9 +1555,6 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
@@ -1604,9 +1591,6 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         spot: bool = False,
@@ -1710,13 +1694,6 @@ def _deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int):
-                Optional. Target DCGM metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int):
-                Optional. Target vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int):
-                Optional. Target vLLM metrics for number of inference requests
-                currently waiting in the queue.
             autoscaling_target_pubsub_num_undelivered_messages (int):
                 Optional. The target number of pubsub undelivered messages for autoscaling.
                 If set, the model will be scaled based on the pubsub queue size.
@@ -1782,9 +1759,6 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
@@ -1828,9 +1802,6 @@ def _deploy_call(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         spot: bool = False,
@@ -1940,13 +1911,6 @@ def _deploy_call(
                 A default value of 60 will be used if not specified.
             autoscaling_target_request_count_per_minute (int):
                 Optional. Target request count per minute per instance.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int):
-                Optional. Target DCGM metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int):
-                Optional. Target vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int):
-                Optional. Target vLLM metrics for number of inference requests
-                currently waiting in the queue.
             autoscaling_target_pubsub_num_undelivered_messages (int):
                 Optional. Target pubsub queue size per instance.
             autoscaling_pubsub_subscription_labels (Dict[str, str]):
@@ -2042,9 +2006,6 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_cpu_utilization
                 or autoscaling_target_request_count_per_minute
-                or autoscaling_target_dcgm_fi_dev_gpu_util
-                or autoscaling_target_vllm_gpu_cache_usage_perc
-                or autoscaling_target_vllm_num_requests_waiting
                 or autoscaling_target_pubsub_num_undelivered_messages
                 or autoscaling_pubsub_subscription_labels
             )
@@ -2056,9 +2017,6 @@ def _deploy_call(
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
                     "autoscaling_target_request_count_per_minute, "
-                    "autoscaling_target_dcgm_fi_dev_gpu_util, "
-                    "autoscaling_target_vllm_gpu_cache_usage_perc, "
-                    "autoscaling_target_vllm_num_requests_waiting, "
                     "autoscaling_target_pubsub_num_undelivered_messages, "
                     "autoscaling_pubsub_subscription_labels parameters "
                     "may not be set when `deployment_resource_pool` is "
@@ -2120,9 +2078,6 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_cpu_utilization
                 or autoscaling_target_request_count_per_minute
-                or autoscaling_target_dcgm_fi_dev_gpu_util
-                or autoscaling_target_vllm_gpu_cache_usage_perc
-                or autoscaling_target_vllm_num_requests_waiting
                 or autoscaling_target_pubsub_num_undelivered_messages
                 or autoscaling_pubsub_subscription_labels
             )
@@ -2140,9 +2095,6 @@ def _deploy_call(
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
                     "autoscaling_target_request_count_per_minute, "
-                    "autoscaling_target_dcgm_fi_dev_gpu_util, "
-                    "autoscaling_target_vllm_gpu_cache_usage_perc, "
-                    "autoscaling_target_vllm_num_requests_waiting, "
                     "autoscaling_target_pubsub_num_undelivered_messages, "
                     "autoscaling_pubsub_subscription_labels parameters "
                     "are ignored."
@@ -2204,48 +2156,6 @@ def _deploy_call(
                         [autoscaling_metric_spec]
                     )
 
-                if autoscaling_target_dcgm_fi_dev_gpu_util:
-                    autoscaling_metric_spec = (
-                        gca_machine_resources_compat.AutoscalingMetricSpec(
-                            metric_name=(
-                                "prometheus.googleapis.com/"
-                                "vertex_dcgm_fi_dev_gpu_util"
-                            ),
-                            target=autoscaling_target_dcgm_fi_dev_gpu_util,
-                        )
-                    )
-                    dedicated_resources.autoscaling_metric_specs.extend(
-                        [autoscaling_metric_spec]
-                    )
-
-                if autoscaling_target_vllm_gpu_cache_usage_perc:
-                    autoscaling_metric_spec = (
-                        gca_machine_resources_compat.AutoscalingMetricSpec(
-                            metric_name=(
-                                "prometheus.googleapis.com/"
-                                "vertex_vllm_gpu_cache_usage_perc"
-                            ),
-                            target=autoscaling_target_vllm_gpu_cache_usage_perc,
-                        )
-                    )
-                    dedicated_resources.autoscaling_metric_specs.extend(
-                        [autoscaling_metric_spec]
-                    )
-
-                if autoscaling_target_vllm_num_requests_waiting:
-                    autoscaling_metric_spec = (
-                        gca_machine_resources_compat.AutoscalingMetricSpec(
-                            metric_name=(
-                                "prometheus.googleapis.com/"
-                                "vertex_vllm_num_requests_waiting"
-                            ),
-                            target=autoscaling_target_vllm_num_requests_waiting,
-                        )
-                    )
-                    dedicated_resources.autoscaling_metric_specs.extend(
-                        [autoscaling_metric_spec]
-                    )
-
                 if autoscaling_target_pubsub_num_undelivered_messages:
                     autoscaling_metric_spec = gca_machine_resources.AutoscalingMetricSpec(
                         metric_name=(
@@ -4582,9 +4492,6 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
     ) -> None:
@@ -4766,9 +4673,6 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
         )
@@ -5844,9 +5748,6 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         enable_access_logging=False,
@@ -5969,13 +5870,6 @@ def deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int):
-                Optional. Target DCGM metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int):
-                Optional. Target vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int):
-                Optional. Target vLLM metrics for number of inference requests
-                currently waiting in the queue.
             autoscaling_target_pubsub_num_undelivered_messages (int):
                 Optional. The target number of pubsub undelivered messages for autoscaling.
                 If set, the model will be scaled based on the pubsub queue size.
@@ -6035,13 +5929,6 @@ def deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int):
-                Optional. Target DCGM metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int):
-                Optional. Target vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int):
-                Optional. Target vLLM metrics for number of inference requests
-                currently waiting in the queue.
             autoscaling_target_pubsub_num_undelivered_messages (int):
                 Optional. The target number of pubsub undelivered messages for autoscaling.
                 If set, the model will be scaled based on the pubsub queue size.
@@ -6114,9 +6001,6 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
@@ -6163,9 +6047,6 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         spot: bool = False,
@@ -6290,13 +6171,6 @@ def _deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int):
-                Optional. Target DCGM metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int):
-                Optional. Target vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int):
-                Optional. Target vLLM metrics for number of inference requests
-                currently waiting in the queue.
             autoscaling_target_pubsub_num_undelivered_messages (int):
                 Optional. The target number of pubsub undelivered messages for autoscaling.
                 If set, the model will be scaled based on the pubsub queue size.
@@ -6393,9 +6267,6 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
diff --git a/google/cloud/aiplatform/preview/models.py b/google/cloud/aiplatform/preview/models.py
index 64714f6abe..b3cb9f9ba8 100644
--- a/google/cloud/aiplatform/preview/models.py
+++ b/google/cloud/aiplatform/preview/models.py
@@ -783,9 +783,6 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -874,14 +871,8 @@ def deploy(
               specified. A default value of 60 will be used if not specified.
             autoscaling_target_request_count_per_minute (int): Target request
               count per minute per instance.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int): Target DCGM metrics for
-              GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int): Target vLLM metrics
-                for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int): Target vLLM metrics
-              for number of inference requests currently waiting in the queue.
-            autoscaling_target_pubsub_num_undelivered_messages (int): Target number
-              of pubsub undelivered messages per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int): Target
+              number of pubsub undelivered messages per instance.
             autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
               Monitored resource labels as key value pairs for metric filtering
               for pubsub_num_undelivered_messages.
@@ -970,9 +961,6 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
@@ -1008,9 +996,6 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -1093,14 +1078,8 @@ def _deploy(
               specified. A default value of 60 will be used if not specified.
             autoscaling_target_request_count_per_minute (int): Target request
               count per minute per instance.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int): Target DCGM metrics for
-              GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int): Target vLLM metrics
-              for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int): Target vLLM metrics
-              for number of inference requests currently waiting in the queue.
-            autoscaling_target_pubsub_num_undelivered_messages (int): Target number
-              of pubsub undelivered messages per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int): Target
+              number of pubsub undelivered messages per instance.
             autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
               Monitored resource labels as key value pairs for metric filtering
               for pubsub_num_undelivered_messages.
@@ -1175,9 +1154,6 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
@@ -1220,9 +1196,6 @@ def _deploy_call(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -1312,13 +1285,6 @@ def _deploy_call(
               not specified.
             autoscaling_target_request_count_per_minute (int): Optional. Target
               request count per minute per instance.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int): Optional. Target DCGM
-              metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int): Optional. Target
-              vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int): Optional. Target
-              vLLM metrics for number of inference requests currently waiting in the
-              queue.
             autoscaling_target_pubsub_num_undelivered_messages (int): Optional.
               Target number of pubsub undelivered messages per instance.
             autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
@@ -1419,9 +1385,6 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_request_count_per_minute
                 or autoscaling_target_cpu_utilization
-                or autoscaling_target_dcgm_fi_dev_gpu_util
-                or autoscaling_target_vllm_gpu_cache_usage_perc
-                or autoscaling_target_vllm_num_requests_waiting
                 or autoscaling_target_pubsub_num_undelivered_messages
                 or autoscaling_pubsub_subscription_labels
             )
@@ -1439,9 +1402,6 @@ def _deploy_call(
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
                     "autoscaling_target_request_count_per_minute, "
-                    "autoscaling_target_dcgm_fi_dev_gpu_util, "
-                    "autoscaling_target_vllm_gpu_cache_usage_perc, "
-                    "autoscaling_target_vllm_num_requests_waiting, "
                     "autoscaling_target_pubsub_num_undelivered_messages, "
                     "autoscaling_pubsub_subscription_labels parameters "
                     "are ignored."
@@ -1522,51 +1482,11 @@ def _deploy_call(
                         [autoscaling_metric_spec]
                     )
 
-                if autoscaling_target_dcgm_fi_dev_gpu_util:
-                    autoscaling_metric_spec = (
-                        gca_machine_resources_compat.AutoscalingMetricSpec(
-                            metric_name=(
-                                "prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util"
-                            ),
-                            target=autoscaling_target_dcgm_fi_dev_gpu_util,
-                        )
-                    )
-                    dedicated_resources.autoscaling_metric_specs.extend(
-                        [autoscaling_metric_spec]
-                    )
-
-                if autoscaling_target_vllm_gpu_cache_usage_perc:
-                    autoscaling_metric_spec = (
-                        gca_machine_resources_compat.AutoscalingMetricSpec(
-                            metric_name=(
-                                "prometheus.googleapis.com/"
-                                "vertex_vllm_gpu_cache_usage_perc"
-                            ),
-                            target=autoscaling_target_vllm_gpu_cache_usage_perc,
-                        )
-                    )
-                    dedicated_resources.autoscaling_metric_specs.extend(
-                        [autoscaling_metric_spec]
-                    )
-
-                if autoscaling_target_vllm_num_requests_waiting:
-                    autoscaling_metric_spec = (
-                        gca_machine_resources_compat.AutoscalingMetricSpec(
-                            metric_name=(
-                                "prometheus.googleapis.com/"
-                                "vertex_vllm_num_requests_waiting"
-                            ),
-                            target=autoscaling_target_vllm_num_requests_waiting,
-                        )
-                    )
-                    dedicated_resources.autoscaling_metric_specs.extend(
-                        [autoscaling_metric_spec]
-                    )
-
                 if autoscaling_target_pubsub_num_undelivered_messages:
                     autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
                         metric_name=(
-                            "pubsub.googleapis.com/subscription/num_undelivered_messages"
+                            "pubsub.googleapis.com/subscription/"
+                            "num_undelivered_messages"
                         ),
                         target=autoscaling_target_pubsub_num_undelivered_messages,
                         monitored_resource_labels=autoscaling_pubsub_subscription_labels,
@@ -1622,9 +1542,6 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_cpu_utilization
                 or autoscaling_target_request_count_per_minute
-                or autoscaling_target_dcgm_fi_dev_gpu_util
-                or autoscaling_target_vllm_gpu_cache_usage_perc
-                or autoscaling_target_vllm_num_requests_waiting
                 or autoscaling_target_pubsub_num_undelivered_messages
                 or autoscaling_pubsub_subscription_labels
             )
@@ -1636,9 +1553,6 @@ def _deploy_call(
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
                     "autoscaling_target_request_count_per_minute, "
-                    "autoscaling_target_dcgm_fi_dev_gpu_util, "
-                    "autoscaling_target_vllm_gpu_cache_usage_perc, "
-                    "autoscaling_target_vllm_num_requests_waiting, "
                     "autoscaling_target_pubsub_num_undelivered_messages, "
                     "autoscaling_pubsub_subscription_labels parameters "
                     "may not be set when `deployment_resource_pool` is "
@@ -1900,9 +1814,6 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -2012,13 +1923,6 @@ def deploy(
               not specified.
             autoscaling_target_request_count_per_minute (int): Optional. Target
               request count per minute per instance.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int): Optional. Target DCGM
-              metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int): Optional. Target
-              vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int): Optional. Target
-              vLLM metrics for number of inference requests currently waiting in the
-              queue.
             autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target
               number of pubsub undelivered messages per instance.
             autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
@@ -2126,9 +2030,6 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
@@ -2170,9 +2071,6 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
-        autoscaling_target_dcgm_fi_dev_gpu_util: Optional[int] = None,
-        autoscaling_target_vllm_gpu_cache_usage_perc: Optional[int] = None,
-        autoscaling_target_vllm_num_requests_waiting: Optional[int] = None,
         autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
         autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -2274,13 +2172,6 @@ def _deploy(
               not specified.
             autoscaling_target_request_count_per_minute (int): Optional. Target
               request count per minute per instance.
-            autoscaling_target_dcgm_fi_dev_gpu_util (int): Optional. Target DCGM
-              metrics for GPU utilization.
-            autoscaling_target_vllm_gpu_cache_usage_perc (int): Optional. Target
-              vLLM metrics for GPU KV cache usage percentage.
-            autoscaling_target_vllm_num_requests_waiting (int): Optional. Target
-              vLLM metrics for number of inference requests currently waiting in the
-              queue.
             autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target
               number of pubsub undelivered messages per instance.
             autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
@@ -2390,9 +2281,6 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
-            autoscaling_target_dcgm_fi_dev_gpu_util=autoscaling_target_dcgm_fi_dev_gpu_util,
-            autoscaling_target_vllm_gpu_cache_usage_perc=autoscaling_target_vllm_gpu_cache_usage_perc,
-            autoscaling_target_vllm_num_requests_waiting=autoscaling_target_vllm_num_requests_waiting,
             autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
             autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
diff --git a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/version.py b/google/cloud/aiplatform/version.py
index 808d127eda..c5d0a7014d 100644
--- a/google/cloud/aiplatform/version.py
+++ b/google/cloud/aiplatform/version.py
@@ -15,4 +15,4 @@
 # limitations under the License.
 #
 
-__version__ = "1.134.0"
+__version__ = "1.133.0"
diff --git a/google/cloud/aiplatform_v1/gapic_version.py b/google/cloud/aiplatform_v1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform_v1/gapic_version.py
+++ b/google/cloud/aiplatform_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform_v1beta1/gapic_version.py b/google/cloud/aiplatform_v1beta1/gapic_version.py
index 069510e593..35edacdb89 100644
--- a/google/cloud/aiplatform_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.134.0"  # {x-release-please-version}
+__version__ = "1.133.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py b/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py
index 30ec10c8bb..e377e7ddaf 100644
--- a/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py
+++ b/google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py
@@ -1546,32 +1546,20 @@ class RagManagedDbConfig(proto.Message):
 
             This field is a member of `oneof`_ ``tier``.
         scaled (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Scaled):
-            Deprecated: Use ``mode`` instead to set the tier under
-            Spanner. Sets the RagManagedDb to the Scaled tier.
+            Sets the RagManagedDb to the Scaled tier.
+            This is the default tier if not explicitly
+            chosen.
 
             This field is a member of `oneof`_ ``tier``.
         basic (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Basic):
-            Deprecated: Use ``mode`` instead to set the tier under
-            Spanner. Sets the RagManagedDb to the Basic tier.
+            Sets the RagManagedDb to the Basic tier.
 
             This field is a member of `oneof`_ ``tier``.
         unprovisioned (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Unprovisioned):
-            Deprecated: Use ``mode`` instead to set the tier under
-            Spanner. Sets the RagManagedDb to the Unprovisioned tier.
+            Sets the RagManagedDb to the Unprovisioned
+            tier.
 
             This field is a member of `oneof`_ ``tier``.
-        serverless (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Serverless):
-            Sets the backend to be the serverless mode
-            offered by RAG Engine.
-
-            This field is a member of `oneof`_ ``mode``.
-        spanner (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Spanner):
-            Sets the RAG Engine backend to be
-            RagManagedDb, built on top of Spanner.
-            NOTE: This is the default mode (w/ Basic Tier)
-            if not explicitly chosen.
-
-            This field is a member of `oneof`_ ``mode``.
     """
 
     class Enterprise(proto.Message):
@@ -1597,8 +1585,7 @@ class Basic(proto.Message):
         - Latency insensitive workload.
         - Only using RAG Engine with external vector DBs.
 
-        NOTE: This is the default tier under Spanner mode if not explicitly
-        chosen.
+        NOTE: This is the default tier if not explicitly chosen.
 
         """
 
@@ -1613,60 +1600,6 @@ class Unprovisioned(proto.Message):
 
         """
 
-    class Spanner(proto.Message):
-        r"""Message to configure the Spanner database used by
-        RagManagedDb.
-
-        This message has `oneof`_ fields (mutually exclusive fields).
-        For each oneof, at most one member field can be set at the same time.
-        Setting any member of the oneof automatically clears all other
-        members.
-
-        .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
-
-        Attributes:
-            scaled (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Scaled):
-                Sets the RagManagedDb to the Scaled tier.
-
-                This field is a member of `oneof`_ ``tier``.
-            basic (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Basic):
-                Sets the RagManagedDb to the Basic tier. This
-                is the default tier for Spanner mode if not
-                explicitly chosen.
-
-                This field is a member of `oneof`_ ``tier``.
-            unprovisioned (google.cloud.aiplatform_v1beta1.types.RagManagedDbConfig.Unprovisioned):
-                Sets the RagManagedDb to the Unprovisioned
-                tier.
-
-                This field is a member of `oneof`_ ``tier``.
-        """
-
-        scaled: "RagManagedDbConfig.Scaled" = proto.Field(
-            proto.MESSAGE,
-            number=1,
-            oneof="tier",
-            message="RagManagedDbConfig.Scaled",
-        )
-        basic: "RagManagedDbConfig.Basic" = proto.Field(
-            proto.MESSAGE,
-            number=2,
-            oneof="tier",
-            message="RagManagedDbConfig.Basic",
-        )
-        unprovisioned: "RagManagedDbConfig.Unprovisioned" = proto.Field(
-            proto.MESSAGE,
-            number=3,
-            oneof="tier",
-            message="RagManagedDbConfig.Unprovisioned",
-        )
-
-    class Serverless(proto.Message):
-        r"""Message to configure the serverless mode offered by RAG
-        Engine.
-
-        """
-
     enterprise: Enterprise = proto.Field(
         proto.MESSAGE,
         number=1,
@@ -1691,18 +1624,6 @@ class Serverless(proto.Message):
         oneof="tier",
         message=Unprovisioned,
     )
-    serverless: Serverless = proto.Field(
-        proto.MESSAGE,
-        number=5,
-        oneof="mode",
-        message=Serverless,
-    )
-    spanner: Spanner = proto.Field(
-        proto.MESSAGE,
-        number=6,
-        oneof="mode",
-        message=Spanner,
-    )
 
 
 class RagEngineConfig(proto.Message):
diff --git a/pypi/_vertex_ai_placeholder/version.py b/pypi/_vertex_ai_placeholder/version.py
index 23963c4d83..120e175e87 100644
--- a/pypi/_vertex_ai_placeholder/version.py
+++ b/pypi/_vertex_ai_placeholder/version.py
@@ -15,4 +15,4 @@
 # limitations under the License.
 #
 
-__version__ = "1.134.0"
+__version__ = "1.133.0"
diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
index 88ac6c8e7a..72a983d65e 100644
--- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
+++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-aiplatform",
-    "version": "1.134.0"
+    "version": "1.133.0"
   },
   "snippets": [
     {
diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
index 5e7a8e6322..11e1910d7e 100644
--- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
+++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-aiplatform",
-    "version": "1.134.0"
+    "version": "1.133.0"
   },
   "snippets": [
     {
diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py
index 89b53f6aef..085139464a 100644
--- a/tests/unit/aiplatform/test_endpoints.py
+++ b/tests/unit/aiplatform/test_endpoints.py
@@ -146,15 +146,6 @@
 _TEST_METRIC_NAME_REQUEST_COUNT = (
     "aiplatform.googleapis.com/prediction/online/request_count"
 )
-_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION = (
-    "prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util"
-)
-_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE = (
-    "prometheus.googleapis.com/vertex_vllm_gpu_cache_usage_perc"
-)
-_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING = (
-    "prometheus.googleapis.com/vertex_vllm_num_requests_waiting"
-)
 _TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE = (
     "pubsub.googleapis.com/subscription/num_undelivered_messages"
 )
@@ -2250,130 +2241,6 @@ def test_deploy_with_autoscaling_target_request_count_per_minute_preview(
             timeout=None,
         )
 
-    @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock")
-    @pytest.mark.parametrize("sync", [True, False])
-    def test_deploy_with_autoscaling_target_dcgm_vllm_metrics(
-        self, deploy_model_mock, sync
-    ):
-        test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
-        test_model = models.Model(_TEST_ID)
-        test_model._gca_resource.supported_deployment_resources_types.append(
-            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
-        )
-        test_endpoint.deploy(
-            model=test_model,
-            machine_type=_TEST_MACHINE_TYPE,
-            service_account=_TEST_SERVICE_ACCOUNT,
-            sync=sync,
-            deploy_request_timeout=None,
-            autoscaling_target_dcgm_fi_dev_gpu_util=60,
-            autoscaling_target_vllm_gpu_cache_usage_perc=50,
-            autoscaling_target_vllm_num_requests_waiting=10,
-        )
-
-        if not sync:
-            test_endpoint.wait()
-
-        expected_dedicated_resources = gca_machine_resources.DedicatedResources(
-            machine_spec=gca_machine_resources.MachineSpec(
-                machine_type=_TEST_MACHINE_TYPE,
-            ),
-            min_replica_count=1,
-            max_replica_count=1,
-            autoscaling_metric_specs=[
-                gca_machine_resources.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION,
-                    target=60,
-                ),
-                gca_machine_resources.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE,
-                    target=50,
-                ),
-                gca_machine_resources.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING,
-                    target=10,
-                ),
-            ],
-        )
-
-        expected_deployed_model = gca_endpoint.DeployedModel(
-            dedicated_resources=expected_dedicated_resources,
-            model=test_model.resource_name,
-            display_name=None,
-            service_account=_TEST_SERVICE_ACCOUNT,
-        )
-        deploy_model_mock.assert_called_once_with(
-            endpoint=test_endpoint.resource_name,
-            deployed_model=expected_deployed_model,
-            traffic_split={"0": 100},
-            metadata=(),
-            timeout=None,
-        )
-
-    @pytest.mark.usefixtures(
-        "get_endpoint_mock", "get_model_mock", "preview_deploy_model_mock"
-    )
-    @pytest.mark.parametrize("sync", [True, False])
-    def test_deploy_with_autoscaling_target_dcgm_vllm_metrics_preview(
-        self, preview_deploy_model_mock, sync
-    ):
-        test_endpoint = preview_models.Endpoint(_TEST_ENDPOINT_NAME)
-        test_model = preview_models.Model(_TEST_ID)
-        test_model._gca_resource.supported_deployment_resources_types.append(
-            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
-        )
-        test_endpoint.deploy(
-            model=test_model,
-            machine_type=_TEST_MACHINE_TYPE,
-            service_account=_TEST_SERVICE_ACCOUNT,
-            sync=sync,
-            deploy_request_timeout=None,
-            autoscaling_target_dcgm_fi_dev_gpu_util=60,
-            autoscaling_target_vllm_gpu_cache_usage_perc=50,
-            autoscaling_target_vllm_num_requests_waiting=10,
-        )
-
-        if not sync:
-            test_endpoint.wait()
-
-        expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources(
-            machine_spec=gca_machine_resources_v1beta1.MachineSpec(
-                machine_type=_TEST_MACHINE_TYPE,
-            ),
-            min_replica_count=1,
-            max_replica_count=1,
-            autoscaling_metric_specs=[
-                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION,
-                    target=60,
-                ),
-                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE,
-                    target=50,
-                ),
-                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING,
-                    target=10,
-                ),
-            ],
-        )
-
-        expected_deployed_model = gca_endpoint_v1beta1.DeployedModel(
-            dedicated_resources=expected_dedicated_resources,
-            model=test_model.resource_name,
-            display_name=None,
-            service_account=_TEST_SERVICE_ACCOUNT,
-            enable_container_logging=True,
-            faster_deployment_config=gca_endpoint_v1beta1.FasterDeploymentConfig(),
-        )
-        preview_deploy_model_mock.assert_called_once_with(
-            endpoint=test_endpoint.resource_name,
-            deployed_model=expected_deployed_model,
-            traffic_split={"0": 100},
-            metadata=(),
-            timeout=None,
-        )
-
     @pytest.mark.usefixtures(
         "get_endpoint_mock", "get_model_mock", "preview_deploy_model_mock"
     )
diff --git a/tests/unit/aiplatform/test_metadata_models.py b/tests/unit/aiplatform/test_metadata_models.py
index aecb01c753..b7120c2310 100644
--- a/tests/unit/aiplatform/test_metadata_models.py
+++ b/tests/unit/aiplatform/test_metadata_models.py
@@ -157,10 +157,8 @@ def create_model_file(filename):
 @pytest.fixture
 def mock_storage_blob_download_xgboost_xgbmodel_file():
     def create_model_file(filename):
-        xgb.XGBClassifier._estimator_type = "classifier"
         x, y = make_classification()
         model = xgb.XGBClassifier()
-        model._estimator_type = "classifier"
         model.fit(x, y)
         model.save_model(filename)
 
@@ -524,7 +522,6 @@ def test_save_model_xgboost_xgbmodel(
 
         x, y = make_classification()
         xgb_model = xgb.XGBClassifier()
-        xgb_model._estimator_type = "classifier"
         xgb_model.fit(x, y)
 
         aiplatform.init(
diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py
index 5cf874db98..5af84e24ea 100644
--- a/tests/unit/aiplatform/test_models.py
+++ b/tests/unit/aiplatform/test_models.py
@@ -524,15 +524,6 @@
 _TEST_METRIC_NAME_REQUEST_COUNT = (
     "aiplatform.googleapis.com/prediction/online/request_count"
 )
-_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION = (
-    "prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util"
-)
-_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE = (
-    "prometheus.googleapis.com/vertex_vllm_gpu_cache_usage_perc"
-)
-_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING = (
-    "prometheus.googleapis.com/vertex_vllm_num_requests_waiting"
-)
 _TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE = (
     "pubsub.googleapis.com/subscription/num_undelivered_messages"
 )
@@ -2568,140 +2559,6 @@ def test_preview_deploy_no_endpoint_dedicated_resources_autoscaling_request_coun
             timeout=None,
         )
 
-    @pytest.mark.usefixtures(
-        "get_model_mock",
-        "create_endpoint_mock",
-        "get_endpoint_mock",
-    )
-    @pytest.mark.parametrize("sync", [True, False])
-    def test_deploy_no_endpoint_dedicated_resources_autoscaling_dcgm_vllm_metrics(
-        self, deploy_model_mock, sync
-    ):
-        test_model = models.Model(_TEST_ID)
-        test_model._gca_resource.supported_deployment_resources_types.append(
-            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
-        )
-
-        test_endpoint = test_model.deploy(
-            machine_type=_TEST_MACHINE_TYPE,
-            accelerator_type=_TEST_ACCELERATOR_TYPE,
-            accelerator_count=_TEST_ACCELERATOR_COUNT,
-            sync=sync,
-            deploy_request_timeout=None,
-            system_labels=_TEST_LABELS,
-            autoscaling_target_dcgm_fi_dev_gpu_util=60,
-            autoscaling_target_vllm_gpu_cache_usage_perc=50,
-            autoscaling_target_vllm_num_requests_waiting=10,
-        )
-
-        if not sync:
-            test_endpoint.wait()
-
-        expected_dedicated_resources = gca_machine_resources.DedicatedResources(
-            machine_spec=gca_machine_resources.MachineSpec(
-                machine_type=_TEST_MACHINE_TYPE,
-                accelerator_type=_TEST_ACCELERATOR_TYPE,
-                accelerator_count=_TEST_ACCELERATOR_COUNT,
-            ),
-            min_replica_count=1,
-            max_replica_count=1,
-            autoscaling_metric_specs=[
-                gca_machine_resources.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION,
-                    target=60,
-                ),
-                gca_machine_resources.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE,
-                    target=50,
-                ),
-                gca_machine_resources.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING,
-                    target=10,
-                ),
-            ],
-        )
-        expected_deployed_model = gca_endpoint.DeployedModel(
-            dedicated_resources=expected_dedicated_resources,
-            model=test_model.resource_name,
-            display_name=None,
-            system_labels=_TEST_LABELS,
-        )
-        deploy_model_mock.assert_called_once_with(
-            endpoint=test_endpoint.resource_name,
-            deployed_model=expected_deployed_model,
-            traffic_split={"0": 100},
-            metadata=(),
-            timeout=None,
-        )
-
-    @pytest.mark.usefixtures(
-        "get_model_mock",
-        "create_endpoint_mock",
-        "get_endpoint_mock",
-    )
-    @pytest.mark.parametrize("sync", [True, False])
-    def test_preview_deploy_no_endpoint_dedicated_resources_autoscaling_dcgm_vllm_metrics(
-        self, preview_deploy_model_mock, sync
-    ):
-        test_model = preview_models.Model(_TEST_ID).preview
-        test_model._gca_resource.supported_deployment_resources_types.append(
-            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
-        )
-
-        test_endpoint = test_model.deploy(
-            machine_type=_TEST_MACHINE_TYPE,
-            accelerator_type=_TEST_ACCELERATOR_TYPE,
-            accelerator_count=_TEST_ACCELERATOR_COUNT,
-            sync=sync,
-            deploy_request_timeout=None,
-            system_labels=_TEST_LABELS,
-            autoscaling_target_dcgm_fi_dev_gpu_util=60,
-            autoscaling_target_vllm_gpu_cache_usage_perc=50,
-            autoscaling_target_vllm_num_requests_waiting=10,
-        )
-
-        if not sync:
-            test_endpoint.wait()
-
-        expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources(
-            machine_spec=gca_machine_resources_v1beta1.MachineSpec(
-                machine_type=_TEST_MACHINE_TYPE,
-                accelerator_type=_TEST_ACCELERATOR_TYPE,
-                accelerator_count=_TEST_ACCELERATOR_COUNT,
-            ),
-            min_replica_count=1,
-            max_replica_count=1,
-            autoscaling_metric_specs=[
-                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_DCGM_METRICS_GPU_UTILIZATION,
-                    target=60,
-                ),
-                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_GPU_CACHE_USAGE_PERCENTAGE,
-                    target=50,
-                ),
-                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
-                    metric_name=_TEST_METRIC_NAME_VLLM_METRICS_NUM_REQUESTS_WAITING,
-                    target=10,
-                ),
-            ],
-        )
-        expected_deployed_model = gca_endpoint_v1beta1.DeployedModel(
-            dedicated_resources=expected_dedicated_resources,
-            model=test_model.resource_name,
-            display_name=None,
-            enable_container_logging=True,
-            faster_deployment_config=gca_endpoint_v1beta1.FasterDeploymentConfig(),
-            system_labels=_TEST_LABELS,
-        )
-        preview_deploy_model_mock.assert_called_once_with(
-            endpoint=test_endpoint.resource_name,
-            deployed_model=expected_deployed_model,
-            traffic_split={"0": 100},
-            metadata=(),
-            timeout=None,
-        )
-
     @pytest.mark.usefixtures(
         "get_model_mock",
         "create_endpoint_mock",
diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py
index c41d7da44c..b4337e95b2 100644
--- a/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py
+++ b/tests/unit/gapic/aiplatform_v1beta1/test_vertex_rag_data_service.py
@@ -10756,8 +10756,6 @@ def test_update_rag_engine_config_rest_call_success(request_type):
             "scaled": {},
             "basic": {},
             "unprovisioned": {},
-            "serverless": {},
-            "spanner": {"scaled": {}, "basic": {}, "unprovisioned": {}},
         },
     }
     # The version of a generated dependency at test runtime may differ from the version used during generation.
@@ -13775,8 +13773,6 @@ async def test_update_rag_engine_config_rest_asyncio_call_success(request_type):
             "scaled": {},
             "basic": {},
             "unprovisioned": {},
-            "serverless": {},
-            "spanner": {"scaled": {}, "basic": {}, "unprovisioned": {}},
         },
     }
     # The version of a generated dependency at test runtime may differ from the version used during generation.
diff --git a/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py b/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py
index ab6f7af4cf..1b66399a48 100644
--- a/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py
+++ b/tests/unit/vertexai/genai/replays/test_create_agent_engine_developer_connect.py
@@ -14,6 +14,8 @@
 #
 # pylint: disable=protected-access,bad-continuation,missing-function-docstring
 
+import sys
+
 from tests.unit.vertexai.genai.replays import pytest_helper
 from vertexai._genai import types
 
@@ -23,13 +25,37 @@
 
 
 def test_create_with_developer_connect_source(client):
-    """Tests creating an agent engine with developer connect source."""
-    developer_connect_source_config = types.ReasoningEngineSpecSourceCodeSpecDeveloperConnectConfig(
-        git_repository_link="projects/reasoning-engine-test-1/locations/europe-west3/connections/shawn-develop-connect/gitRepositoryLinks/shawn-yang-google-adk-samples",
-        revision="main",
-        dir="test",
-    )
-    agent_engine = client.agent_engines.create(
+  """Tests creating an agent engine with developer connect source."""
+  if sys.version_info >= (3, 13):
+    try:
+      client._api_client._initialize_replay_session_if_not_loaded()
+      if client._api_client.replay_session:
+        target_ver = f"{sys.version_info.major}.{sys.version_info.minor}"
+        for interaction in client._api_client.replay_session.interactions:
+
+          def _update_ver(obj):
+            if isinstance(obj, dict):
+              if "python_spec" in obj and isinstance(obj["python_spec"], dict):
+                if "version" in obj["python_spec"]:
+                  obj["python_spec"]["version"] = target_ver
+              for v in obj.values():
+                _update_ver(v)
+            elif isinstance(obj, list):
+              for item in obj:
+                _update_ver(item)
+
+          if hasattr(interaction.request, "body_segments"):
+            _update_ver(interaction.request.body_segments)
+          if hasattr(interaction.request, "body"):
+            _update_ver(interaction.request.body)
+    except Exception:
+      pass
+  developer_connect_source_config = types.ReasoningEngineSpecSourceCodeSpecDeveloperConnectConfig(
+      git_repository_link="projects/reasoning-engine-test-1/locations/europe-west3/connections/shawn-develop-connect/gitRepositoryLinks/shawn-yang-google-adk-samples",
+      revision="main",
+      dir="test",
+  )
+  agent_engine = client.agent_engines.create(
         config={
             "display_name": "test-agent-engine-dev-connect",
             "developer_connect_source": developer_connect_source_config,
@@ -42,21 +68,21 @@ def test_create_with_developer_connect_source(client):
             },
         },
     )
-    assert agent_engine.api_resource.display_name == "test-agent-engine-dev-connect"
-    assert (
+  assert agent_engine.api_resource.display_name == "test-agent-engine-dev-connect"
+  assert (
         agent_engine.api_resource.spec.source_code_spec.developer_connect_source.config.git_repository_link
         == developer_connect_source_config.git_repository_link
     )
-    assert (
+  assert (
         agent_engine.api_resource.spec.source_code_spec.developer_connect_source.config.revision
         == developer_connect_source_config.revision
     )
-    assert (
+  assert (
         agent_engine.api_resource.spec.source_code_spec.developer_connect_source.config.dir
         == developer_connect_source_config.dir
     )
-    # Clean up resources.
-    client.agent_engines.delete(name=agent_engine.api_resource.name, force=True)
+  # Clean up resources.
+  client.agent_engines.delete(name=agent_engine.api_resource.name, force=True)
 
 
 pytestmark = pytest_helper.setup(
diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
index cd97ab042c..392de18fc2 100644
--- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
+++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -46,23 +46,6 @@
         )
     ),
 )
-EXACT_MATCH_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric(
-    metric="exact_match",
-    metric_config=types.UnifiedMetric(
-        computation_based_metric_spec=types.ComputationBasedMetricSpec(
-            type=types.ComputationBasedMetricType.EXACT_MATCH,
-        )
-    ),
-)
-BLEU_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric(
-    metric="exact_match_2",
-    metric_config=types.UnifiedMetric(
-        computation_based_metric_spec=types.ComputationBasedMetricSpec(
-            type=types.ComputationBasedMetricType.BLEU,
-            parameters={"use_effective_order": True},
-        )
-    ),
-)
 
 
 def test_create_eval_run_data_source_evaluation_set(client):
@@ -91,8 +74,6 @@ def test_create_eval_run_data_source_evaluation_set(client):
             GENERAL_QUALITY_METRIC,
             types.RubricMetric.FINAL_RESPONSE_QUALITY,
             LLM_METRIC,
-            EXACT_MATCH_COMPUTATION_BASED_METRIC,
-            BLEU_COMPUTATION_BASED_METRIC,
         ],
         agent_info=types.evals.AgentInfo(
             agent_resource_name="project/123/locations/us-central1/reasoningEngines/456",
@@ -113,13 +94,7 @@ def test_create_eval_run_data_source_evaluation_set(client):
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
         ),
-        metrics=[
-            GENERAL_QUALITY_METRIC,
-            FINAL_RESPONSE_QUALITY_METRIC,
-            LLM_METRIC,
-            EXACT_MATCH_COMPUTATION_BASED_METRIC,
-            BLEU_COMPUTATION_BASED_METRIC,
-        ],
+        metrics=[GENERAL_QUALITY_METRIC, FINAL_RESPONSE_QUALITY_METRIC, LLM_METRIC],
     )
     assert evaluation_run.inference_configs[
         "agent-1"
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
index f090cf3ac4..b4b1d7f2a9 100644
--- a/vertexai/_genai/evals.py
+++ b/vertexai/_genai/evals.py
@@ -577,13 +577,6 @@ def _UnifiedMetric_from_vertex(
             getv(from_object, ["predefinedMetricSpec"]),
         )
 
-    if getv(from_object, ["computationBasedMetricSpec"]) is not None:
-        setv(
-            to_object,
-            ["computation_based_metric_spec"],
-            getv(from_object, ["computationBasedMetricSpec"]),
-        )
-
     return to_object
 
 
@@ -628,13 +621,6 @@ def _UnifiedMetric_to_vertex(
             getv(from_object, ["predefined_metric_spec"]),
         )
 
-    if getv(from_object, ["computation_based_metric_spec"]) is not None:
-        setv(
-            to_object,
-            ["computationBasedMetricSpec"],
-            getv(from_object, ["computation_based_metric_spec"]),
-        )
-
     return to_object
 
 
diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py
index e0ed0ca64d..904c390edf 100644
--- a/vertexai/_genai/types/__init__.py
+++ b/vertexai/_genai/types/__init__.py
@@ -174,10 +174,6 @@
 from .common import CometResult
 from .common import CometResultDict
 from .common import CometResultOrDict
-from .common import ComputationBasedMetricSpec
-from .common import ComputationBasedMetricSpecDict
-from .common import ComputationBasedMetricSpecOrDict
-from .common import ComputationBasedMetricType
 from .common import ContainerSpec
 from .common import ContainerSpecDict
 from .common import ContainerSpecOrDict
@@ -1082,9 +1078,6 @@
     "CustomCodeExecutionSpec",
     "CustomCodeExecutionSpecDict",
     "CustomCodeExecutionSpecOrDict",
-    "ComputationBasedMetricSpec",
-    "ComputationBasedMetricSpecDict",
-    "ComputationBasedMetricSpecOrDict",
     "UnifiedMetric",
     "UnifiedMetricDict",
     "UnifiedMetricOrDict",
@@ -1103,9 +1096,6 @@
     "EvaluationRunResults",
     "EvaluationRunResultsDict",
     "EvaluationRunResultsOrDict",
-    "EvalCaseMetricResult",
-    "EvalCaseMetricResultDict",
-    "EvalCaseMetricResultOrDict",
     "ResponseCandidateResult",
     "ResponseCandidateResultDict",
     "ResponseCandidateResultOrDict",
@@ -1115,9 +1105,6 @@
     "AggregatedMetricResult",
     "AggregatedMetricResultDict",
     "AggregatedMetricResultOrDict",
-    "WinRateStats",
-    "WinRateStatsDict",
-    "WinRateStatsOrDict",
     "ResponseCandidate",
     "ResponseCandidateDict",
     "ResponseCandidateOrDict",
@@ -1139,12 +1126,6 @@
     "EvaluationResult",
     "EvaluationResultDict",
     "EvaluationResultOrDict",
-    "EvaluationRunAgentConfig",
-    "EvaluationRunAgentConfigDict",
-    "EvaluationRunAgentConfigOrDict",
-    "EvaluationRunInferenceConfig",
-    "EvaluationRunInferenceConfigDict",
-    "EvaluationRunInferenceConfigOrDict",
     "EvaluationRun",
     "EvaluationRunDict",
     "EvaluationRunOrDict",
@@ -1496,9 +1477,6 @@
     "UpdateAgentEngineConfig",
     "UpdateAgentEngineConfigDict",
     "UpdateAgentEngineConfigOrDict",
-    "MemoryMetadataValue",
-    "MemoryMetadataValueDict",
-    "MemoryMetadataValueOrDict",
     "AgentEngineMemoryConfig",
     "AgentEngineMemoryConfigDict",
     "AgentEngineMemoryConfigOrDict",
@@ -1556,6 +1534,9 @@
     "RetrieveMemoriesRequestSimpleRetrievalParams",
     "RetrieveMemoriesRequestSimpleRetrievalParamsDict",
     "RetrieveMemoriesRequestSimpleRetrievalParamsOrDict",
+    "MemoryMetadataValue",
+    "MemoryMetadataValueDict",
+    "MemoryMetadataValueOrDict",
     "MemoryFilter",
     "MemoryFilterDict",
     "MemoryFilterOrDict",
@@ -1871,6 +1852,18 @@
     "ContentMapContents",
     "ContentMapContentsDict",
     "ContentMapContentsOrDict",
+    "EvalCaseMetricResult",
+    "EvalCaseMetricResultDict",
+    "EvalCaseMetricResultOrDict",
+    "EvaluationRunAgentConfig",
+    "EvaluationRunAgentConfigDict",
+    "EvaluationRunAgentConfigOrDict",
+    "EvaluationRunInferenceConfig",
+    "EvaluationRunInferenceConfigDict",
+    "EvaluationRunInferenceConfigOrDict",
+    "WinRateStats",
+    "WinRateStatsDict",
+    "WinRateStatsOrDict",
     "EvaluateMethodConfig",
     "EvaluateMethodConfigDict",
     "EvaluateMethodConfigOrDict",
@@ -1943,7 +1936,6 @@
     "EvaluationItemType",
     "SamplingMethod",
     "RubricContentType",
-    "ComputationBasedMetricType",
     "EvaluationRunState",
     "OptimizeTarget",
     "MemoryMetadataMergeStrategy",
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
index cd7a52b516..736b2fc178 100644
--- a/vertexai/_genai/types/common.py
+++ b/vertexai/_genai/types/common.py
@@ -324,21 +324,6 @@ class RubricContentType(_common.CaseInSensitiveEnum):
     """Generate rubrics in a unit test format."""
 
 
-class ComputationBasedMetricType(_common.CaseInSensitiveEnum):
-    """Represents the type of the computation based metric."""
-
-    COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = (
-        "COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED"
-    )
-    """Computation based metric type is unspecified."""
-    EXACT_MATCH = "EXACT_MATCH"
-    """Exact match metric."""
-    BLEU = "BLEU"
-    """BLEU metric."""
-    ROUGE = "ROUGE"
-    """ROUGE metric."""
-
-
 class EvaluationRunState(_common.CaseInSensitiveEnum):
     """Represents the state of an evaluation run."""
 
@@ -984,33 +969,6 @@ def evaluate(instance: dict[str, Any]) -> float:
 ]
 
 
-class ComputationBasedMetricSpec(_common.BaseModel):
-    """Specification for a computation based metric."""
-
-    type: Optional[ComputationBasedMetricType] = Field(
-        default=None, description="""The type of the computation based metric."""
-    )
-    parameters: Optional[dict[str, Any]] = Field(
-        default=None,
-        description="""A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}.""",
-    )
-
-
-class ComputationBasedMetricSpecDict(TypedDict, total=False):
-    """Specification for a computation based metric."""
-
-    type: Optional[ComputationBasedMetricType]
-    """The type of the computation based metric."""
-
-    parameters: Optional[dict[str, Any]]
-    """A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}."""
-
-
-ComputationBasedMetricSpecOrDict = Union[
-    ComputationBasedMetricSpec, ComputationBasedMetricSpecDict
-]
-
-
 class UnifiedMetric(_common.BaseModel):
     """The unified metric used for evaluation."""
 
@@ -1032,9 +990,6 @@ class UnifiedMetric(_common.BaseModel):
     predefined_metric_spec: Optional[PredefinedMetricSpec] = Field(
         default=None, description="""The spec for a pre-defined metric."""
     )
-    computation_based_metric_spec: Optional[ComputationBasedMetricSpec] = Field(
-        default=None, description="""The spec for a computation based metric."""
-    )
 
 
 class UnifiedMetricDict(TypedDict, total=False):
@@ -1058,9 +1013,6 @@ class UnifiedMetricDict(TypedDict, total=False):
     predefined_metric_spec: Optional[PredefinedMetricSpecDict]
     """The spec for a pre-defined metric."""
 
-    computation_based_metric_spec: Optional[ComputationBasedMetricSpecDict]
-    """The spec for a computation based metric."""
-
 
 UnifiedMetricOrDict = Union[UnifiedMetric, UnifiedMetricDict]
 
@@ -1245,53 +1197,6 @@ class EvaluationRunResultsDict(TypedDict, total=False):
 EvaluationRunResultsOrDict = Union[EvaluationRunResults, EvaluationRunResultsDict]
 
 
-class EvalCaseMetricResult(_common.BaseModel):
-    """Evaluation result for a single evaluation case for a single metric."""
-
-    metric_name: Optional[str] = Field(
-        default=None, description="""Name of the metric."""
-    )
-    score: Optional[float] = Field(default=None, description="""Score of the metric.""")
-    explanation: Optional[str] = Field(
-        default=None, description="""Explanation of the metric."""
-    )
-    rubric_verdicts: Optional[list[evals_types.RubricVerdict]] = Field(
-        default=None,
-        description="""The details of all the rubrics and their verdicts for rubric-based metrics.""",
-    )
-    raw_output: Optional[list[str]] = Field(
-        default=None, description="""Raw output of the metric."""
-    )
-    error_message: Optional[str] = Field(
-        default=None, description="""Error message for the metric."""
-    )
-
-
-class EvalCaseMetricResultDict(TypedDict, total=False):
-    """Evaluation result for a single evaluation case for a single metric."""
-
-    metric_name: Optional[str]
-    """Name of the metric."""
-
-    score: Optional[float]
-    """Score of the metric."""
-
-    explanation: Optional[str]
-    """Explanation of the metric."""
-
-    rubric_verdicts: Optional[list[evals_types.RubricVerdict]]
-    """The details of all the rubrics and their verdicts for rubric-based metrics."""
-
-    raw_output: Optional[list[str]]
-    """Raw output of the metric."""
-
-    error_message: Optional[str]
-    """Error message for the metric."""
-
-
-EvalCaseMetricResultOrDict = Union[EvalCaseMetricResult, EvalCaseMetricResultDict]
-
-
 class ResponseCandidateResult(_common.BaseModel):
     """Aggregated metric results for a single response candidate of an EvalCase."""
 
@@ -1299,7 +1204,7 @@ class ResponseCandidateResult(_common.BaseModel):
         default=None,
         description="""Index of the response candidate this result pertains to.""",
     )
-    metric_results: Optional[dict[str, EvalCaseMetricResult]] = Field(
+    metric_results: Optional[dict[str, "EvalCaseMetricResult"]] = Field(
         default=None,
         description="""A dictionary of metric results for this response candidate, keyed by metric name.""",
     )
@@ -1311,7 +1216,7 @@ class ResponseCandidateResultDict(TypedDict, total=False):
     response_index: Optional[int]
     """Index of the response candidate this result pertains to."""
 
-    metric_results: Optional[dict[str, EvalCaseMetricResultDict]]
+    metric_results: Optional[dict[str, "EvalCaseMetricResultDict"]]
     """A dictionary of metric results for this response candidate, keyed by metric name."""
 
 
@@ -1403,31 +1308,6 @@ class AggregatedMetricResultDict(TypedDict, total=False):
 AggregatedMetricResultOrDict = Union[AggregatedMetricResult, AggregatedMetricResultDict]
 
 
-class WinRateStats(_common.BaseModel):
-    """Statistics for win rates for a single metric."""
-
-    win_rates: Optional[list[float]] = Field(
-        default=None,
-        description="""Win rates for the metric, one for each candidate.""",
-    )
-    tie_rate: Optional[float] = Field(
-        default=None, description="""Tie rate for the metric."""
-    )
-
-
-class WinRateStatsDict(TypedDict, total=False):
-    """Statistics for win rates for a single metric."""
-
-    win_rates: Optional[list[float]]
-    """Win rates for the metric, one for each candidate."""
-
-    tie_rate: Optional[float]
-    """Tie rate for the metric."""
-
-
-WinRateStatsOrDict = Union[WinRateStats, WinRateStatsDict]
-
-
 class ResponseCandidate(_common.BaseModel):
     """A model-generated content to the prompt."""
 
@@ -1729,7 +1609,7 @@ class EvaluationResult(_common.BaseModel):
         default=None,
         description="""A list of summary-level evaluation results for each metric.""",
     )
-    win_rates: Optional[dict[str, WinRateStats]] = Field(
+    win_rates: Optional[dict[str, "WinRateStats"]] = Field(
         default=None,
         description="""A dictionary of win rates for each metric, only populated for multi-response evaluation runs.""",
     )
@@ -1766,7 +1646,7 @@ class EvaluationResultDict(TypedDict, total=False):
     summary_metrics: Optional[list[AggregatedMetricResultDict]]
     """A list of summary-level evaluation results for each metric."""
 
-    win_rates: Optional[dict[str, WinRateStatsDict]]
+    win_rates: Optional[dict[str, "WinRateStatsDict"]]
     """A dictionary of win rates for each metric, only populated for multi-response evaluation runs."""
 
     evaluation_dataset: Optional[list[EvaluationDatasetDict]]
@@ -1782,71 +1662,6 @@ class EvaluationResultDict(TypedDict, total=False):
 EvaluationResultOrDict = Union[EvaluationResult, EvaluationResultDict]
 
 
-class EvaluationRunAgentConfig(_common.BaseModel):
-    """This field is experimental and may change in future versions.
-
-    Agent config for an evaluation run.
-    """
-
-    developer_instruction: Optional[genai_types.Content] = Field(
-        default=None, description="""The developer instruction for the agent."""
-    )
-    tools: Optional[list[genai_types.Tool]] = Field(
-        default=None, description="""The tools available to the agent."""
-    )
-
-
-class EvaluationRunAgentConfigDict(TypedDict, total=False):
-    """This field is experimental and may change in future versions.
-
-    Agent config for an evaluation run.
-    """
-
-    developer_instruction: Optional[genai_types.ContentDict]
-    """The developer instruction for the agent."""
-
-    tools: Optional[list[genai_types.ToolDict]]
-    """The tools available to the agent."""
-
-
-EvaluationRunAgentConfigOrDict = Union[
-    EvaluationRunAgentConfig, EvaluationRunAgentConfigDict
-]
-
-
-class EvaluationRunInferenceConfig(_common.BaseModel):
-    """This field is experimental and may change in future versions.
-
-    Configuration that describes an agent.
-    """
-
-    agent_config: Optional[EvaluationRunAgentConfig] = Field(
-        default=None, description="""The agent config."""
-    )
-    model: Optional[str] = Field(
-        default=None,
-        description="""The fully qualified name of the publisher model or endpoint to use for inference.""",
-    )
-
-
-class EvaluationRunInferenceConfigDict(TypedDict, total=False):
-    """This field is experimental and may change in future versions.
-
-    Configuration that describes an agent.
-    """
-
-    agent_config: Optional[EvaluationRunAgentConfigDict]
-    """The agent config."""
-
-    model: Optional[str]
-    """The fully qualified name of the publisher model or endpoint to use for inference."""
-
-
-EvaluationRunInferenceConfigOrDict = Union[
-    EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict
-]
-
-
 class EvaluationRun(_common.BaseModel):
     """Represents an evaluation run."""
 
@@ -1875,7 +1690,7 @@ class EvaluationRun(_common.BaseModel):
     evaluation_config: Optional[EvaluationRunConfig] = Field(
         default=None, description="""The evaluation config for the evaluation run."""
     )
-    inference_configs: Optional[dict[str, EvaluationRunInferenceConfig]] = Field(
+    inference_configs: Optional[dict[str, "EvaluationRunInferenceConfig"]] = Field(
         default=None,
         description="""This field is experimental and may change in future versions. The inference configs for the evaluation run.""",
     )
@@ -1959,7 +1774,7 @@ class EvaluationRunDict(TypedDict, total=False):
     evaluation_config: Optional[EvaluationRunConfigDict]
     """The evaluation config for the evaluation run."""
 
-    inference_configs: Optional[dict[str, EvaluationRunInferenceConfigDict]]
+    inference_configs: Optional[dict[str, "EvaluationRunInferenceConfigDict"]]
     """This field is experimental and may change in future versions. The inference configs for the evaluation run."""
 
     labels: Optional[dict[str, str]]
@@ -6796,37 +6611,6 @@ class _UpdateAgentEngineRequestParametersDict(TypedDict, total=False):
 ]
 
 
-class MemoryMetadataValue(_common.BaseModel):
-    """The metadata values for memories."""
-
-    timestamp_value: Optional[datetime.datetime] = Field(
-        default=None,
-        description="""Timestamp value. When filtering on timestamp values, only the seconds field will be compared.""",
-    )
-    double_value: Optional[float] = Field(default=None, description="""Double value.""")
-    bool_value: Optional[bool] = Field(default=None, description="""Boolean value.""")
-    string_value: Optional[str] = Field(default=None, description="""String value.""")
-
-
-class MemoryMetadataValueDict(TypedDict, total=False):
-    """The metadata values for memories."""
-
-    timestamp_value: Optional[datetime.datetime]
-    """Timestamp value. When filtering on timestamp values, only the seconds field will be compared."""
-
-    double_value: Optional[float]
-    """Double value."""
-
-    bool_value: Optional[bool]
-    """Boolean value."""
-
-    string_value: Optional[str]
-    """String value."""
-
-
-MemoryMetadataValueOrDict = Union[MemoryMetadataValue, MemoryMetadataValueDict]
-
-
 class AgentEngineMemoryConfig(_common.BaseModel):
     """Config for creating a Memory."""
 
@@ -6868,7 +6652,7 @@ class AgentEngineMemoryConfig(_common.BaseModel):
     topics: Optional[list[MemoryTopicId]] = Field(
         default=None, description="""Optional. The topics of the memory."""
     )
-    metadata: Optional[dict[str, MemoryMetadataValue]] = Field(
+    metadata: Optional[dict[str, "MemoryMetadataValue"]] = Field(
         default=None,
         description="""Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank.""",
     )
@@ -6909,7 +6693,7 @@ class AgentEngineMemoryConfigDict(TypedDict, total=False):
     topics: Optional[list[MemoryTopicIdDict]]
     """Optional. The topics of the memory."""
 
-    metadata: Optional[dict[str, MemoryMetadataValueDict]]
+    metadata: Optional[dict[str, "MemoryMetadataValueDict"]]
     """Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank."""
 
 
@@ -7022,7 +6806,7 @@ class Memory(_common.BaseModel):
     topics: Optional[list[MemoryTopicId]] = Field(
         default=None, description="""Optional. The Topics of the Memory."""
     )
-    metadata: Optional[dict[str, MemoryMetadataValue]] = Field(
+    metadata: Optional[dict[str, "MemoryMetadataValue"]] = Field(
         default=None,
         description="""Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank.""",
     )
@@ -7070,7 +6854,7 @@ class MemoryDict(TypedDict, total=False):
     topics: Optional[list[MemoryTopicIdDict]]
     """Optional. The Topics of the Memory."""
 
-    metadata: Optional[dict[str, MemoryMetadataValueDict]]
+    metadata: Optional[dict[str, "MemoryMetadataValueDict"]]
     """Optional. User-provided metadata for the Memory. This information was provided when creating, updating, or generating the Memory. It was not generated by Memory Bank."""
 
 
@@ -7384,7 +7168,7 @@ class GenerateAgentEngineMemoriesConfig(_common.BaseModel):
         default=None,
         description="""Optional. Input only. If true, no revisions will be created for this request.""",
     )
-    metadata: Optional[dict[str, MemoryMetadataValue]] = Field(
+    metadata: Optional[dict[str, "MemoryMetadataValue"]] = Field(
         default=None,
         description="""Optional. User-provided metadata for the generated memories. This is not generated by Memory Bank.""",
     )
@@ -7423,7 +7207,7 @@ class GenerateAgentEngineMemoriesConfigDict(TypedDict, total=False):
     disable_memory_revisions: Optional[bool]
     """Optional. Input only. If true, no revisions will be created for this request."""
 
-    metadata: Optional[dict[str, MemoryMetadataValueDict]]
+    metadata: Optional[dict[str, "MemoryMetadataValueDict"]]
     """Optional. User-provided metadata for the generated memories. This is not generated by Memory Bank."""
 
     metadata_merge_strategy: Optional[MemoryMetadataMergeStrategy]
@@ -7886,6 +7670,37 @@ class RetrieveMemoriesRequestSimpleRetrievalParamsDict(TypedDict, total=False):
 ]
 
 
+class MemoryMetadataValue(_common.BaseModel):
+    """Memory metadata."""
+
+    timestamp_value: Optional[datetime.datetime] = Field(
+        default=None,
+        description="""Timestamp value. When filtering on timestamp values, only the seconds field will be compared.""",
+    )
+    double_value: Optional[float] = Field(default=None, description="""Double value.""")
+    bool_value: Optional[bool] = Field(default=None, description="""Boolean value.""")
+    string_value: Optional[str] = Field(default=None, description="""String value.""")
+
+
+class MemoryMetadataValueDict(TypedDict, total=False):
+    """Memory metadata."""
+
+    timestamp_value: Optional[datetime.datetime]
+    """Timestamp value. When filtering on timestamp values, only the seconds field will be compared."""
+
+    double_value: Optional[float]
+    """Double value."""
+
+    bool_value: Optional[bool]
+    """Boolean value."""
+
+    string_value: Optional[str]
+    """String value."""
+
+
+MemoryMetadataValueOrDict = Union[MemoryMetadataValue, MemoryMetadataValueDict]
+
+
 class MemoryFilter(_common.BaseModel):
     """Filter to apply when retrieving memories."""
 
@@ -13488,6 +13303,143 @@ class ContentMapContentsDict(TypedDict, total=False):
 ContentMapContentsOrDict = Union[ContentMapContents, ContentMapContentsDict]
 
 
+class EvalCaseMetricResult(_common.BaseModel):
+    """Evaluation result for a single evaluation case for a single metric."""
+
+    metric_name: Optional[str] = Field(
+        default=None, description="""Name of the metric."""
+    )
+    score: Optional[float] = Field(default=None, description="""Score of the metric.""")
+    explanation: Optional[str] = Field(
+        default=None, description="""Explanation of the metric."""
+    )
+    rubric_verdicts: Optional[list[evals_types.RubricVerdict]] = Field(
+        default=None,
+        description="""The details of all the rubrics and their verdicts for rubric-based metrics.""",
+    )
+    raw_output: Optional[list[str]] = Field(
+        default=None, description="""Raw output of the metric."""
+    )
+    error_message: Optional[str] = Field(
+        default=None, description="""Error message for the metric."""
+    )
+
+
+class EvalCaseMetricResultDict(TypedDict, total=False):
+    """Evaluation result for a single evaluation case for a single metric."""
+
+    metric_name: Optional[str]
+    """Name of the metric."""
+
+    score: Optional[float]
+    """Score of the metric."""
+
+    explanation: Optional[str]
+    """Explanation of the metric."""
+
+    rubric_verdicts: Optional[list[evals_types.RubricVerdict]]
+    """The details of all the rubrics and their verdicts for rubric-based metrics."""
+
+    raw_output: Optional[list[str]]
+    """Raw output of the metric."""
+
+    error_message: Optional[str]
+    """Error message for the metric."""
+
+
+EvalCaseMetricResultOrDict = Union[EvalCaseMetricResult, EvalCaseMetricResultDict]
+
+
+class EvaluationRunAgentConfig(_common.BaseModel):
+    """This field is experimental and may change in future versions.
+
+    Agent config for an evaluation run.
+    """
+
+    developer_instruction: Optional[genai_types.Content] = Field(
+        default=None, description="""The developer instruction for the agent."""
+    )
+    tools: Optional[list[genai_types.Tool]] = Field(
+        default=None, description="""The tools available to the agent."""
+    )
+
+
+class EvaluationRunAgentConfigDict(TypedDict, total=False):
+    """This field is experimental and may change in future versions.
+
+    Agent config for an evaluation run.
+    """
+
+    developer_instruction: Optional[genai_types.ContentDict]
+    """The developer instruction for the agent."""
+
+    tools: Optional[list[genai_types.ToolDict]]
+    """The tools available to the agent."""
+
+
+EvaluationRunAgentConfigOrDict = Union[
+    EvaluationRunAgentConfig, EvaluationRunAgentConfigDict
+]
+
+
+class EvaluationRunInferenceConfig(_common.BaseModel):
+    """This field is experimental and may change in future versions.
+
+    Configuration that describes an agent.
+    """
+
+    agent_config: Optional[EvaluationRunAgentConfig] = Field(
+        default=None, description="""The agent config."""
+    )
+    model: Optional[str] = Field(
+        default=None,
+        description="""The fully qualified name of the publisher model or endpoint to use for inference.""",
+    )
+
+
+class EvaluationRunInferenceConfigDict(TypedDict, total=False):
+    """This field is experimental and may change in future versions.
+
+    Configuration that describes an agent.
+    """
+
+    agent_config: Optional[EvaluationRunAgentConfigDict]
+    """The agent config."""
+
+    model: Optional[str]
+    """The fully qualified name of the publisher model or endpoint to use for inference."""
+
+
+EvaluationRunInferenceConfigOrDict = Union[
+    EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict
+]
+
+
+class WinRateStats(_common.BaseModel):
+    """Statistics for win rates for a single metric."""
+
+    win_rates: Optional[list[float]] = Field(
+        default=None,
+        description="""Win rates for the metric, one for each candidate.""",
+    )
+    tie_rate: Optional[float] = Field(
+        default=None, description="""Tie rate for the metric."""
+    )
+
+
+class WinRateStatsDict(TypedDict, total=False):
+    """Statistics for win rates for a single metric."""
+
+    win_rates: Optional[list[float]]
+    """Win rates for the metric, one for each candidate."""
+
+    tie_rate: Optional[float]
+    """Tie rate for the metric."""
+
+
+WinRateStatsOrDict = Union[WinRateStats, WinRateStatsDict]
+
+
 class EvaluateMethodConfig(_common.BaseModel):
     """Optional parameters for the evaluate method."""
 
diff --git a/vertexai/agent_engines/templates/adk.py b/vertexai/agent_engines/templates/adk.py
index 0878eeb720..e717410fe6 100644
--- a/vertexai/agent_engines/templates/adk.py
+++ b/vertexai/agent_engines/templates/adk.py
@@ -318,10 +318,8 @@ def _warn_missing_dependency(
         return None
 
     def _detect_cloud_resource_id(project_id: str) -> Optional[str]:
-        location = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "") or os.getenv(
-            "GOOGLE_CLOUD_LOCATION", ""
-        )
-        agent_engine_id = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID")
+        location = os.getenv("GOOGLE_CLOUD_LOCATION", None)
+        agent_engine_id = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", None)
         if all(v is not None for v in (location, agent_engine_id)):
             return f"//aiplatform.googleapis.com/projects/{project_id}/locations/{location}/reasoningEngines/{agent_engine_id}"
         return None
@@ -361,10 +359,7 @@ def _detect_cloud_resource_id(project_id: str) -> Optional[str]:
             "cloud.platform": "gcp.agent_engine",
             "service.name": os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", ""),
             "service.instance.id": f"{uuid.uuid4().hex}-{os.getpid()}",
-            "cloud.region": (
-                os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "")
-                or os.getenv("GOOGLE_CLOUD_LOCATION", "")
-            ),
+            "cloud.region": os.getenv("GOOGLE_CLOUD_LOCATION", ""),
         }
         | (
             {"cloud.resource_id": cloud_resource_id}
@@ -777,15 +772,11 @@ def set_up(self):
             os.environ["GOOGLE_CLOUD_PROJECT"] = project
         location = self._tmpl_attrs.get("location")
         if location:
-            if "GOOGLE_CLOUD_AGENT_ENGINE_LOCATION" not in os.environ:
-                os.environ["GOOGLE_CLOUD_AGENT_ENGINE_LOCATION"] = location
-            if "GOOGLE_CLOUD_LOCATION" not in os.environ:
-                os.environ["GOOGLE_CLOUD_LOCATION"] = location
+            os.environ["GOOGLE_CLOUD_LOCATION"] = location
         express_mode_api_key = self._tmpl_attrs.get("express_mode_api_key")
         if express_mode_api_key and not project:
             os.environ["GOOGLE_API_KEY"] = express_mode_api_key
             # Clear location and project env vars if express mode api key is provided.
-            os.environ.pop("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", None)
             os.environ.pop("GOOGLE_CLOUD_LOCATION", None)
             os.environ.pop("GOOGLE_CLOUD_PROJECT", None)
             location = None
diff --git a/vertexai/preview/reasoning_engines/templates/adk.py b/vertexai/preview/reasoning_engines/templates/adk.py
index 63c988ac35..872de508ef 100644
--- a/vertexai/preview/reasoning_engines/templates/adk.py
+++ b/vertexai/preview/reasoning_engines/templates/adk.py
@@ -320,9 +320,7 @@ def _warn_missing_dependency(
         return None
 
     def _detect_cloud_resource_id(project_id: str) -> Optional[str]:
-        location = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "") or os.getenv(
-            "GOOGLE_CLOUD_LOCATION", ""
-        )
+        location = os.getenv("GOOGLE_CLOUD_LOCATION", None)
         agent_engine_id = os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", None)
         if all(v is not None for v in (location, agent_engine_id)):
             return f"//aiplatform.googleapis.com/projects/{project_id}/locations/{location}/reasoningEngines/{agent_engine_id}"
@@ -363,10 +361,7 @@ def _detect_cloud_resource_id(project_id: str) -> Optional[str]:
             "cloud.platform": "gcp.agent_engine",
             "service.name": os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_ID", ""),
             "service.instance.id": f"{uuid.uuid4().hex}-{os.getpid()}",
-            "cloud.region": (
-                os.getenv("GOOGLE_CLOUD_AGENT_ENGINE_LOCATION", "")
-                or os.getenv("GOOGLE_CLOUD_LOCATION", "")
-            ),
+            "cloud.region": os.getenv("GOOGLE_CLOUD_LOCATION", ""),
         }
         | (
             {"cloud.resource_id": cloud_resource_id}
@@ -693,11 +688,7 @@ def set_up(self):
         project = self._tmpl_attrs.get("project")
         os.environ["GOOGLE_CLOUD_PROJECT"] = project
         location = self._tmpl_attrs.get("location")
-        if location:
-            if "GOOGLE_CLOUD_AGENT_ENGINE_LOCATION" not in os.environ:
-                os.environ["GOOGLE_CLOUD_AGENT_ENGINE_LOCATION"] = location
-            if "GOOGLE_CLOUD_LOCATION" not in os.environ:
-                os.environ["GOOGLE_CLOUD_LOCATION"] = location
+        os.environ["GOOGLE_CLOUD_LOCATION"] = location
 
         # Disable content capture in custom ADK spans unless user enabled
         # tracing explicitly with the old flag