Skip to content

Commit 7460632

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Allow using registered metric resource names in evaluation
PiperOrigin-RevId: 880868820
1 parent eacc86c commit 7460632

5 files changed

Lines changed: 119 additions & 18 deletions

File tree

vertexai/_genai/_evals_common.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,15 @@ def _resolve_evaluation_run_metrics(
13291329
for metric_instance in metrics:
13301330
if isinstance(metric_instance, types.EvaluationRunMetric):
13311331
resolved_metrics_list.append(metric_instance)
1332+
elif isinstance(metric_instance, str) and metric_instance.startswith(
1333+
"projects/"
1334+
):
1335+
resolved_metrics_list.append(
1336+
types.EvaluationRunMetric(
1337+
metric=metric_instance.split("/")[-1],
1338+
metric_resource_name=metric_instance,
1339+
)
1340+
)
13321341
elif isinstance(
13331342
metric_instance, _evals_metric_loaders.LazyLoadedPrebuiltMetric
13341343
):
@@ -1409,6 +1418,15 @@ def _resolve_metrics(
14091418
raise
14101419
elif isinstance(metric_instance, types.Metric):
14111420
resolved_metrics_list.append(metric_instance)
1421+
elif isinstance(metric_instance, str) and metric_instance.startswith(
1422+
"projects/"
1423+
):
1424+
# Wrap the string in a Metric object to satisfy Pydantic validation
1425+
# and extract the ID for the 'name' property
1426+
metric_id = metric_instance.split("/")[-1]
1427+
resolved_metrics_list.append(
1428+
types.Metric(name=metric_id, metric_resource_name=metric_instance)
1429+
)
14121430
else:
14131431
try:
14141432
metric_name_str = str(metric_instance)

vertexai/_genai/_evals_metric_handlers.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ def get_metric_result(
10271027
for attempt in range(_MAX_RETRIES):
10281028
try:
10291029
api_response = self.module._evaluate_instances(
1030-
metrics=[self.metric],
1030+
metrics_sources=[self.metric],
10311031
instance=payload.get("instance"),
10321032
autorater_config=payload.get("autorater_config"),
10331033
)
@@ -1164,7 +1164,7 @@ def get_metric_result(
11641164
for attempt in range(_MAX_RETRIES):
11651165
try:
11661166
api_response = self.module._evaluate_instances(
1167-
metrics=[self.metric],
1167+
metrics_sources=[self.metric],
11681168
instance=payload.get("instance"),
11691169
)
11701170
break
@@ -1242,6 +1242,14 @@ def aggregate(
12421242
)
12431243

12441244

1245+
class RegisteredMetricHandler(PredefinedMetricHandler):
1246+
"""Metric handler for registered metrics."""
1247+
1248+
def __init__(self, module: "evals.Evals", metric: types.Metric):
1249+
# Skip the parent check for SUPPORTED_PREDEFINED_METRICS
1250+
MetricHandler.__init__(self, module=module, metric=metric)
1251+
1252+
12451253
_METRIC_HANDLER_MAPPING = [
12461254
(
12471255
lambda m: hasattr(m, "remote_custom_function") and m.remote_custom_function,
@@ -1251,6 +1259,10 @@ def aggregate(
12511259
lambda m: m.custom_function and isinstance(m.custom_function, Callable),
12521260
CustomMetricHandler,
12531261
),
1262+
(
1263+
lambda m: getattr(m, "metric_resource_name", None) is not None,
1264+
RegisteredMetricHandler,
1265+
),
12541266
(
12551267
lambda m: m.name in ComputationMetricHandler.SUPPORTED_COMPUTATION_METRICS,
12561268
ComputationMetricHandler,

vertexai/_genai/_transformers.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,19 @@ def t_metrics(
3131
Args:
3232
metrics: A list of metrics used for evaluation.
3333
set_default_aggregation_metrics: Whether to set default aggregation metrics.
34+
3435
Returns:
3536
A list of resolved metric payloads for the evaluation request.
3637
"""
3738
metrics_payload = []
3839

3940
for metric in metrics:
41+
# Case 1: Registered Metric Resource Name
42+
if isinstance(metric, str) and metric.startswith("projects/"):
43+
metrics_payload.append({"metric_resource_name": metric})
44+
continue
45+
46+
# Case 2: Inline Metric Configuration
4047
metric_payload_item: dict[str, Any] = {}
4148

4249
metric_name = getv(metric, ["name"]).lower()
@@ -83,5 +90,23 @@ def t_metrics(
8390
raise ValueError(
8491
f"Unsupported metric type or invalid metric name: {metric_name}"
8592
)
86-
metrics_payload.append(metric_payload_item)
93+
metrics_payload.append({"metric": metric_payload_item})
8794
return metrics_payload
95+
96+
97+
def t_metric_sources(metrics: list[Any]) -> list[dict[str, Any]]:
98+
"""Prepares the MetricSource payload for the evaluation request."""
99+
sources_payload = []
100+
for metric in metrics:
101+
# Check if the 'metric' is a resource name string or contains one
102+
resource_name = getattr(metric, "metric_resource_name", None)
103+
if not resource_name and isinstance(metric, str) and metric.startswith("projects/"):
104+
resource_name = metric
105+
106+
if resource_name:
107+
sources_payload.append({"metric_resource_name": resource_name})
108+
else:
109+
# Fallback to existing Metric spec transformation
110+
metric_payload = t_metrics([metric])[0]
111+
sources_payload.append({"metric": metric_payload})
112+
return sources_payload

vertexai/_genai/evals.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -238,11 +238,11 @@ def _EvaluateInstancesRequestParameters_to_vertex(
238238
if getv(from_object, ["autorater_config"]) is not None:
239239
setv(to_object, ["autoraterConfig"], getv(from_object, ["autorater_config"]))
240240

241-
if getv(from_object, ["metrics"]) is not None:
241+
if getv(from_object, ["metrics_sources"]) is not None:
242242
setv(
243243
to_object,
244-
["metrics"],
245-
[item for item in t.t_metrics(getv(from_object, ["metrics"]))],
244+
["metricSources"],
245+
t.t_metric_sources(getv(from_object, ["metrics_sources"])),
246246
)
247247

248248
if getv(from_object, ["instance"]) is not None:
@@ -399,6 +399,13 @@ def _EvaluationRunMetric_from_vertex(
399399
_UnifiedMetric_from_vertex(getv(from_object, ["metricConfig"]), to_object),
400400
)
401401

402+
if getv(from_object, ["metricResourceName"]) is not None:
403+
setv(
404+
to_object,
405+
["metric_resource_name"],
406+
getv(from_object, ["metricResourceName"]),
407+
)
408+
402409
return to_object
403410

404411

@@ -417,6 +424,13 @@ def _EvaluationRunMetric_to_vertex(
417424
_UnifiedMetric_to_vertex(getv(from_object, ["metric_config"]), to_object),
418425
)
419426

427+
if getv(from_object, ["metric_resource_name"]) is not None:
428+
setv(
429+
to_object,
430+
["metricResourceName"],
431+
getv(from_object, ["metric_resource_name"]),
432+
)
433+
420434
return to_object
421435

422436

@@ -512,6 +526,13 @@ def _GenerateInstanceRubricsRequest_to_vertex(
512526
),
513527
)
514528

529+
if getv(from_object, ["metric_resource_name"]) is not None:
530+
setv(
531+
to_object,
532+
["metricResourceName"],
533+
getv(from_object, ["metric_resource_name"]),
534+
)
535+
515536
if getv(from_object, ["config"]) is not None:
516537
setv(to_object, ["config"], getv(from_object, ["config"]))
517538

@@ -976,7 +997,7 @@ def _evaluate_instances(
976997
] = None,
977998
rubric_based_metric_input: Optional[types.RubricBasedMetricInputOrDict] = None,
978999
autorater_config: Optional[genai_types.AutoraterConfigOrDict] = None,
979-
metrics: Optional[list[types.MetricOrDict]] = None,
1000+
metrics_sources: Optional[list[types.MetricOrDict]] = None,
9801001
instance: Optional[types.EvaluationInstanceOrDict] = None,
9811002
config: Optional[types.EvaluateInstancesConfigOrDict] = None,
9821003
) -> types.EvaluateInstancesResponse:
@@ -996,7 +1017,7 @@ def _evaluate_instances(
9961017
tool_parameter_kv_match_input=tool_parameter_kv_match_input,
9971018
rubric_based_metric_input=rubric_based_metric_input,
9981019
autorater_config=autorater_config,
999-
metrics=metrics,
1020+
metrics_sources=metrics_sources,
10001021
instance=instance,
10011022
config=config,
10021023
)
@@ -1049,6 +1070,7 @@ def _generate_rubrics(
10491070
types.PredefinedMetricSpecOrDict
10501071
] = None,
10511072
rubric_generation_spec: Optional[types.RubricGenerationSpecOrDict] = None,
1073+
metric_resource_name: Optional[str] = None,
10521074
config: Optional[types.RubricGenerationConfigOrDict] = None,
10531075
) -> types.GenerateInstanceRubricsResponse:
10541076
"""
@@ -1059,6 +1081,7 @@ def _generate_rubrics(
10591081
contents=contents,
10601082
predefined_rubric_generation_spec=predefined_rubric_generation_spec,
10611083
rubric_generation_spec=rubric_generation_spec,
1084+
metric_resource_name=metric_resource_name,
10621085
config=config,
10631086
)
10641087

@@ -2230,7 +2253,7 @@ async def _evaluate_instances(
22302253
] = None,
22312254
rubric_based_metric_input: Optional[types.RubricBasedMetricInputOrDict] = None,
22322255
autorater_config: Optional[genai_types.AutoraterConfigOrDict] = None,
2233-
metrics: Optional[list[types.MetricOrDict]] = None,
2256+
metrics_sources: Optional[list[types.MetricOrDict]] = None,
22342257
instance: Optional[types.EvaluationInstanceOrDict] = None,
22352258
config: Optional[types.EvaluateInstancesConfigOrDict] = None,
22362259
) -> types.EvaluateInstancesResponse:
@@ -2250,7 +2273,7 @@ async def _evaluate_instances(
22502273
tool_parameter_kv_match_input=tool_parameter_kv_match_input,
22512274
rubric_based_metric_input=rubric_based_metric_input,
22522275
autorater_config=autorater_config,
2253-
metrics=metrics,
2276+
metrics_sources=metrics_sources,
22542277
instance=instance,
22552278
config=config,
22562279
)
@@ -2305,6 +2328,7 @@ async def _generate_rubrics(
23052328
types.PredefinedMetricSpecOrDict
23062329
] = None,
23072330
rubric_generation_spec: Optional[types.RubricGenerationSpecOrDict] = None,
2331+
metric_resource_name: Optional[str] = None,
23082332
config: Optional[types.RubricGenerationConfigOrDict] = None,
23092333
) -> types.GenerateInstanceRubricsResponse:
23102334
"""
@@ -2315,6 +2339,7 @@ async def _generate_rubrics(
23152339
contents=contents,
23162340
predefined_rubric_generation_spec=predefined_rubric_generation_spec,
23172341
rubric_generation_spec=rubric_generation_spec,
2342+
metric_resource_name=metric_resource_name,
23182343
config=config,
23192344
)
23202345

vertexai/_genai/types/common.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,9 @@ class LLMBasedMetricSpec(_common.BaseModel):
23262326
default=None,
23272327
description="""Dynamically generate rubrics using this specification.""",
23282328
)
2329+
metric_resource_name: Optional[str] = Field(
2330+
default=None, description="""The resource name of the metric definition."""
2331+
)
23292332

23302333

23312334
class LLMBasedMetricSpecDict(TypedDict, total=False):
@@ -2350,6 +2353,9 @@ class LLMBasedMetricSpecDict(TypedDict, total=False):
23502353
rubric_generation_spec: Optional[RubricGenerationSpecDict]
23512354
"""Dynamically generate rubrics using this specification."""
23522355

2356+
metric_resource_name: Optional[str]
2357+
"""The resource name of the metric definition."""
2358+
23532359

23542360
LLMBasedMetricSpecOrDict = Union[LLMBasedMetricSpec, LLMBasedMetricSpecDict]
23552361

@@ -2482,6 +2488,9 @@ class EvaluationRunMetric(_common.BaseModel):
24822488
metric_config: Optional[UnifiedMetric] = Field(
24832489
default=None, description="""The unified metric used for evaluation run."""
24842490
)
2491+
metric_resource_name: Optional[str] = Field(
2492+
default=None, description="""The resource name of the metric definition."""
2493+
)
24852494

24862495

24872496
class EvaluationRunMetricDict(TypedDict, total=False):
@@ -2493,6 +2502,9 @@ class EvaluationRunMetricDict(TypedDict, total=False):
24932502
metric_config: Optional[UnifiedMetricDict]
24942503
"""The unified metric used for evaluation run."""
24952504

2505+
metric_resource_name: Optional[str]
2506+
"""The resource name of the metric definition."""
2507+
24962508

24972509
EvaluationRunMetricOrDict = Union[EvaluationRunMetric, EvaluationRunMetricDict]
24982510

@@ -4439,6 +4451,9 @@ class Metric(_common.BaseModel):
44394451
default=None,
44404452
description="""Optional steering instruction parameters for the automated predefined metric.""",
44414453
)
4454+
metric_resource_name: Optional[str] = Field(
4455+
default=None, description="""The resource name of the metric definition."""
4456+
)
44424457

44434458
# Allow extra fields to support metric-specific config fields.
44444459
model_config = ConfigDict(extra="allow")
@@ -4643,6 +4658,9 @@ class MetricDict(TypedDict, total=False):
46434658
metric_spec_parameters: Optional[dict[str, Any]]
46444659
"""Optional steering instruction parameters for the automated predefined metric."""
46454660

4661+
metric_resource_name: Optional[str]
4662+
"""The resource name of the metric definition."""
4663+
46464664

46474665
MetricOrDict = Union[Metric, MetricDict]
46484666

@@ -4679,11 +4697,9 @@ class _EvaluateInstancesRequestParameters(_common.BaseModel):
46794697
autorater_config: Optional[genai_types.AutoraterConfig] = Field(
46804698
default=None, description=""""""
46814699
)
4682-
metrics: Optional[list[Metric]] = Field(
4700+
metrics_sources: Optional[list[Metric]] = Field(
46834701
default=None,
4684-
description="""The metrics used for evaluation.
4685-
Currently, we only support evaluating a single metric. If multiple metrics
4686-
are provided, only the first one will be evaluated.""",
4702+
description="""The metrics used for evaluation. Each can be an inline configuration or a registered metric resource name.""",
46874703
)
46884704
instance: Optional[EvaluationInstance] = Field(
46894705
default=None, description="""The instance to be evaluated."""
@@ -4727,10 +4743,8 @@ class _EvaluateInstancesRequestParametersDict(TypedDict, total=False):
47274743
autorater_config: Optional[genai_types.AutoraterConfigDict]
47284744
""""""
47294745

4730-
metrics: Optional[list[MetricDict]]
4731-
"""The metrics used for evaluation.
4732-
Currently, we only support evaluating a single metric. If multiple metrics
4733-
are provided, only the first one will be evaluated."""
4746+
metrics_sources: Optional[list[MetricDict]]
4747+
"""The metrics used for evaluation. Each can be an inline configuration or a registered metric resource name."""
47344748

47354749
instance: Optional[EvaluationInstanceDict]
47364750
"""The instance to be evaluated."""
@@ -5354,6 +5368,10 @@ class _GenerateInstanceRubricsRequest(_common.BaseModel):
53545368
default=None,
53555369
description="""Specification for how the rubrics should be generated.""",
53565370
)
5371+
metric_resource_name: Optional[str] = Field(
5372+
default=None,
5373+
description="""Registered metric resource name. If this field is set, the configuration provided in this field is used for rubric generation. The `predefined_rubric_generation_spec` and `rubric_generation_spec` fields will be ignored.""",
5374+
)
53575375
config: Optional[RubricGenerationConfig] = Field(default=None, description="""""")
53585376

53595377

@@ -5374,6 +5392,9 @@ class _GenerateInstanceRubricsRequestDict(TypedDict, total=False):
53745392
rubric_generation_spec: Optional[RubricGenerationSpecDict]
53755393
"""Specification for how the rubrics should be generated."""
53765394

5395+
metric_resource_name: Optional[str]
5396+
"""Registered metric resource name. If this field is set, the configuration provided in this field is used for rubric generation. The `predefined_rubric_generation_spec` and `rubric_generation_spec` fields will be ignored."""
5397+
53775398
config: Optional[RubricGenerationConfigDict]
53785399
""""""
53795400

0 commit comments

Comments
 (0)