diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index 76b7f36f74..2988553a9f 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -15,7 +15,7 @@ # pylint: disable=protected-access,bad-continuation,missing-function-docstring from tests.unit.vertexai.genai.replays import pytest_helper -from vertexai import types +from vertexai._genai import types from google.genai import types as genai_types import pytest import pandas as pd @@ -238,6 +238,33 @@ def test_create_eval_run_with_inference_configs(client): assert evaluation_run.error is None +def test_create_eval_run_with_metric_resource_name(client): + """Tests create_evaluation_run with metric_resource_name.""" + metric_resource_name = "projects/977012026409/locations/us-central1/evaluationMetrics/6048334299558576128" + contents = pd.DataFrame( + [ + { + "prompt": "How do I ensure data residency in Vertex AI?", + "response": "Vertex AI provides regional endpoints...", + } + ] + ) + metric = types.Metric( + name="my_custom_metric", metric_resource_name=metric_resource_name + ) + evaluation_run = client.evals.create_evaluation_run( + dataset=types.EvaluationDataset(eval_dataset_df=contents), + metrics=[metric], + dest=GCS_DEST, + ) + assert isinstance(evaluation_run, types.EvaluationRun) + assert evaluation_run.evaluation_config.metrics[0].metric == "my_custom_metric" + assert ( + evaluation_run.evaluation_config.metrics[0].metric_resource_name + == metric_resource_name + ) + + # Dataframe tests fail in replay mode because of UUID generation mismatch. # def test_create_eval_run_data_source_evaluation_dataset(client): # """Tests that create_evaluation_run() creates a correctly structured diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 97e56c9e19..4c3721ba9e 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -45,6 +45,7 @@ from . import _gcs_utils from . import evals from . import types +from . import _transformers as t logger = logging.getLogger(__name__) @@ -1328,7 +1329,7 @@ def _resolve_dataset_inputs( def _resolve_evaluation_run_metrics( - metrics: list[types.EvaluationRunMetric], api_client: Any + metrics: list[types.EvaluationRunMetric] | list[types.Metric], api_client: Any ) -> list[types.EvaluationRunMetric]: """Resolves a list of evaluation run metric instances, loading RubricMetric if necessary.""" if not metrics: @@ -1361,6 +1362,16 @@ def _resolve_evaluation_run_metrics( e, ) raise + elif isinstance(metric_instance, types.Metric): + config_dict = t.t_metrics([metric_instance])[0] + res_name = config_dict.pop("metric_resource_name", None) + resolved_metrics_list.append( + types.EvaluationRunMetric( + metric=metric_instance.name, + metric_config=config_dict if config_dict else None, + metric_resource_name=res_name, + ) + ) else: try: metric_name_str = str(metric_instance) diff --git a/vertexai/_genai/_transformers.py b/vertexai/_genai/_transformers.py index a7bad99bba..14d1353534 100644 --- a/vertexai/_genai/_transformers.py +++ b/vertexai/_genai/_transformers.py @@ -38,6 +38,8 @@ def t_metrics( for metric in metrics: metric_payload_item: dict[str, Any] = {} + if hasattr(metric, "metric_resource_name") and metric.metric_resource_name: + metric_payload_item["metric_resource_name"] = metric.metric_resource_name metric_name = getv(metric, ["name"]).lower() @@ -79,6 +81,9 @@ def t_metrics( "return_raw_output": return_raw_output } metric_payload_item["pointwise_metric_spec"] = pointwise_spec + elif "metric_resource_name" in metric_payload_item: + # Valid case: Metric is identified by resource name; no inline spec required. + pass else: raise ValueError( f"Unsupported metric type or invalid metric name: {metric_name}" diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 1088df664a..842db3412f 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -392,6 +392,13 @@ def _EvaluationRunMetric_from_vertex( if getv(from_object, ["metric"]) is not None: setv(to_object, ["metric"], getv(from_object, ["metric"])) + if getv(from_object, ["metricResourceName"]) is not None: + setv( + to_object, + ["metric_resource_name"], + getv(from_object, ["metricResourceName"]), + ) + if getv(from_object, ["metricConfig"]) is not None: setv( to_object, @@ -410,6 +417,13 @@ def _EvaluationRunMetric_to_vertex( if getv(from_object, ["metric"]) is not None: setv(to_object, ["metric"], getv(from_object, ["metric"])) + if getv(from_object, ["metric_resource_name"]) is not None: + setv( + to_object, + ["metricResourceName"], + getv(from_object, ["metric_resource_name"]), + ) + if getv(from_object, ["metric_config"]) is not None: setv( to_object, diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index 4a7512d0fe..ef1549ea75 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -2479,6 +2479,10 @@ class EvaluationRunMetric(_common.BaseModel): metric: Optional[str] = Field( default=None, description="""The name of the metric.""" ) + metric_resource_name: Optional[str] = Field( + default=None, + description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""", + ) metric_config: Optional[UnifiedMetric] = Field( default=None, description="""The unified metric used for evaluation run.""" ) @@ -2490,6 +2494,9 @@ class EvaluationRunMetricDict(TypedDict, total=False): metric: Optional[str] """The name of the metric.""" + metric_resource_name: Optional[str] + """The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""" + metric_config: Optional[UnifiedMetricDict] """The unified metric used for evaluation run.""" @@ -4439,6 +4446,10 @@ class Metric(_common.BaseModel): default=None, description="""Optional steering instruction parameters for the automated predefined metric.""", ) + metric_resource_name: Optional[str] = Field( + default=None, + description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""", + ) # Allow extra fields to support metric-specific config fields. model_config = ConfigDict(extra="allow") @@ -4643,6 +4654,9 @@ class MetricDict(TypedDict, total=False): metric_spec_parameters: Optional[dict[str, Any]] """Optional steering instruction parameters for the automated predefined metric.""" + metric_resource_name: Optional[str] + """The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""" + MetricOrDict = Union[Metric, MetricDict]