Skip to content

Commit 1c10430

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add support for referencing registered metrics by resource name in evaluation APIs
PiperOrigin-RevId: 878604099
1 parent b3bae32 commit 1c10430

5 files changed

Lines changed: 106 additions & 12 deletions

File tree

vertexai/_genai/_evals_common.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,15 +1389,17 @@ def _resolve_evaluation_run_metrics(
13891389

13901390

13911391
def _resolve_metrics(
1392-
metrics: list[types.Metric], api_client: Any
1393-
) -> list[types.Metric]:
1392+
metrics: list[Any], api_client: Any
1393+
) -> list[types.MetricSource]:
13941394
"""Resolves a list of metric instances, loading RubricMetric if necessary."""
13951395
resolved_metrics_list = []
13961396
for metric_instance in metrics:
13971397
if isinstance(metric_instance, _evals_metric_loaders.LazyLoadedPrebuiltMetric):
13981398
try:
13991399
resolved_metrics_list.append(
1400-
metric_instance.resolve(api_client=api_client)
1400+
types.MetricSource(
1401+
metric=metric_instance.resolve(api_client=api_client)
1402+
)
14011403
)
14021404
except Exception as e:
14031405
logger.error(
@@ -1408,18 +1410,25 @@ def _resolve_metrics(
14081410
)
14091411
raise
14101412
elif isinstance(metric_instance, types.Metric):
1411-
resolved_metrics_list.append(metric_instance)
1413+
resolved_metrics_list.append(types.MetricSource(metric=metric_instance))
14121414
else:
14131415
try:
14141416
metric_name_str = str(metric_instance)
1417+
if metric_name_str.lower().startswith("projects/"):
1418+
resolved_metrics_list.append(
1419+
types.MetricSource(metric_resource_name=metric_name_str)
1420+
)
1421+
continue
14151422
lazy_metric_instance = getattr(
14161423
_evals_metric_loaders.RubricMetric, metric_name_str.upper()
14171424
)
14181425
if isinstance(
14191426
lazy_metric_instance, _evals_metric_loaders.LazyLoadedPrebuiltMetric
14201427
):
14211428
resolved_metrics_list.append(
1422-
lazy_metric_instance.resolve(api_client=api_client)
1429+
types.MetricSource(
1430+
metric=lazy_metric_instance.resolve(api_client=api_client)
1431+
)
14231432
)
14241433
else:
14251434
raise TypeError(

vertexai/_genai/_transformers.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,28 @@
2323

2424

2525
def t_metrics(
26-
metrics: list["types.MetricSubclass"],
26+
metrics: list["types.MetricSource"],
2727
set_default_aggregation_metrics: bool = False,
2828
) -> list[dict[str, Any]]:
2929
"""Prepares the metric payload for the evaluation request.
3030
3131
Args:
3232
metrics: A list of metrics used for evaluation.
3333
set_default_aggregation_metrics: Whether to set default aggregation metrics.
34+
3435
Returns:
3536
A list of resolved metric payloads for the evaluation request.
3637
"""
3738
metrics_payload = []
3839

39-
for metric in metrics:
40+
for metric_source in metrics:
41+
if metric_source.metric_resource_name:
42+
metrics_payload.append(
43+
{"metric_resource_name": metric_source.metric_resource_name}
44+
)
45+
continue
46+
47+
metric = metric_source.metric
4048
metric_payload_item: dict[str, Any] = {}
4149

4250
metric_name = getv(metric, ["name"]).lower()

vertexai/_genai/evals.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,11 +232,7 @@ def _EvaluateInstancesRequestParameters_to_vertex(
232232
setv(to_object, ["autoraterConfig"], getv(from_object, ["autorater_config"]))
233233

234234
if getv(from_object, ["metrics"]) is not None:
235-
setv(
236-
to_object,
237-
["metrics"],
238-
[item for item in t.t_metrics(getv(from_object, ["metrics"]))],
239-
)
235+
setv(to_object, ["metricSources"], t.t_metrics(getv(from_object, ["metrics"])))
240236

241237
if getv(from_object, ["instance"]) is not None:
242238
setv(
@@ -346,6 +342,13 @@ def _EvaluationRunMetric_from_vertex(
346342
_UnifiedMetric_from_vertex(getv(from_object, ["metricConfig"]), to_object),
347343
)
348344

345+
if getv(from_object, ["metricResourceName"]) is not None:
346+
setv(
347+
to_object,
348+
["metric_resource_name"],
349+
getv(from_object, ["metricResourceName"]),
350+
)
351+
349352
return to_object
350353

351354

@@ -364,6 +367,13 @@ def _EvaluationRunMetric_to_vertex(
364367
_UnifiedMetric_to_vertex(getv(from_object, ["metric_config"]), to_object),
365368
)
366369

370+
if getv(from_object, ["metric_resource_name"]) is not None:
371+
setv(
372+
to_object,
373+
["metricResourceName"],
374+
getv(from_object, ["metric_resource_name"]),
375+
)
376+
367377
return to_object
368378

369379

@@ -440,6 +450,13 @@ def _GenerateInstanceRubricsRequest_to_vertex(
440450
if getv(from_object, ["contents"]) is not None:
441451
setv(to_object, ["contents"], getv(from_object, ["contents"]))
442452

453+
if getv(from_object, ["metric_resource_name"]) is not None:
454+
setv(
455+
to_object,
456+
["metricResourceName"],
457+
getv(from_object, ["metric_resource_name"]),
458+
)
459+
443460
if getv(from_object, ["predefined_rubric_generation_spec"]) is not None:
444461
setv(
445462
to_object,
@@ -989,6 +1006,7 @@ def _generate_rubrics(
9891006
self,
9901007
*,
9911008
contents: list[genai_types.ContentOrDict],
1009+
metric_resource_name: Optional[str] = None,
9921010
predefined_rubric_generation_spec: Optional[
9931011
types.PredefinedMetricSpecOrDict
9941012
] = None,
@@ -1001,6 +1019,7 @@ def _generate_rubrics(
10011019

10021020
parameter_model = types._GenerateInstanceRubricsRequest(
10031021
contents=contents,
1022+
metric_resource_name=metric_resource_name,
10041023
predefined_rubric_generation_spec=predefined_rubric_generation_spec,
10051024
rubric_generation_spec=rubric_generation_spec,
10061025
config=config,
@@ -2245,6 +2264,7 @@ async def _generate_rubrics(
22452264
self,
22462265
*,
22472266
contents: list[genai_types.ContentOrDict],
2267+
metric_resource_name: Optional[str] = None,
22482268
predefined_rubric_generation_spec: Optional[
22492269
types.PredefinedMetricSpecOrDict
22502270
] = None,
@@ -2257,6 +2277,7 @@ async def _generate_rubrics(
22572277

22582278
parameter_model = types._GenerateInstanceRubricsRequest(
22592279
contents=contents,
2280+
metric_resource_name=metric_resource_name,
22602281
predefined_rubric_generation_spec=predefined_rubric_generation_spec,
22612282
rubric_generation_spec=rubric_generation_spec,
22622283
config=config,

vertexai/_genai/types/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,9 @@
690690
from .common import Metric
691691
from .common import MetricDict
692692
from .common import MetricOrDict
693+
from .common import MetricSource
694+
from .common import MetricSourceDict
695+
from .common import MetricSourceOrDict
693696
from .common import MetricPromptBuilder
694697
from .common import MetricResult
695698
from .common import MetricResultDict
@@ -1505,6 +1508,9 @@
15051508
"Metric",
15061509
"MetricDict",
15071510
"MetricOrDict",
1511+
"MetricSource",
1512+
"MetricSourceDict",
1513+
"MetricSourceOrDict",
15081514
"MetricResult",
15091515
"MetricResultDict",
15101516
"MetricResultOrDict",

vertexai/_genai/types/common.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,9 @@ class LLMBasedMetricSpec(_common.BaseModel):
23262326
default=None,
23272327
description="""Dynamically generate rubrics using this specification.""",
23282328
)
2329+
metric_resource_name: Optional[str] = Field(
2330+
default=None, description="""The resource name of the metric definition."""
2331+
)
23292332

23302333

23312334
class LLMBasedMetricSpecDict(TypedDict, total=False):
@@ -2350,6 +2353,9 @@ class LLMBasedMetricSpecDict(TypedDict, total=False):
23502353
rubric_generation_spec: Optional[RubricGenerationSpecDict]
23512354
"""Dynamically generate rubrics using this specification."""
23522355

2356+
metric_resource_name: Optional[str]
2357+
"""The resource name of the metric definition."""
2358+
23532359

23542360
LLMBasedMetricSpecOrDict = Union[LLMBasedMetricSpec, LLMBasedMetricSpecDict]
23552361

@@ -2482,6 +2488,9 @@ class EvaluationRunMetric(_common.BaseModel):
24822488
metric_config: Optional[UnifiedMetric] = Field(
24832489
default=None, description="""The unified metric used for evaluation run."""
24842490
)
2491+
metric_resource_name: Optional[str] = Field(
2492+
default=None, description="""The resource name of the metric definition."""
2493+
)
24852494

24862495

24872496
class EvaluationRunMetricDict(TypedDict, total=False):
@@ -2493,6 +2502,9 @@ class EvaluationRunMetricDict(TypedDict, total=False):
24932502
metric_config: Optional[UnifiedMetricDict]
24942503
"""The unified metric used for evaluation run."""
24952504

2505+
metric_resource_name: Optional[str]
2506+
"""The resource name of the metric definition."""
2507+
24962508

24972509
EvaluationRunMetricOrDict = Union[EvaluationRunMetric, EvaluationRunMetricDict]
24982510

@@ -4640,6 +4652,29 @@ class MetricDict(TypedDict, total=False):
46404652
MetricOrDict = Union[Metric, MetricDict]
46414653

46424654

4655+
class MetricSource(_common.BaseModel):
4656+
"""The metric source used for evaluation."""
4657+
4658+
metric: Optional[Metric] = Field(
4659+
default=None,
4660+
description="""Inline metric config.""",
4661+
)
4662+
metric_resource_name: Optional[str] = Field(
4663+
default=None,
4664+
description="""Resource name for registered metric.""",
4665+
)
4666+
4667+
4668+
class MetricSourceDict(TypedDict, total=False):
4669+
"""The metric source used for evaluation."""
4670+
4671+
metric: Optional[MetricDict]
4672+
metric_resource_name: Optional[str]
4673+
4674+
4675+
MetricSourceOrDict = Union[MetricSource, MetricSourceDict]
4676+
4677+
46434678
class _EvaluateInstancesRequestParameters(_common.BaseModel):
46444679
"""Parameters for evaluating instances."""
46454680

@@ -5334,6 +5369,14 @@ class _GenerateInstanceRubricsRequest(_common.BaseModel):
53345369
default=None,
53355370
description="""The prompt to generate rubrics from. For single-turn queries, this is a single instance. For multi-turn queries, this is a repeated field that contains conversation history + latest request.""",
53365371
)
5372+
metric_resource_name: Optional[str] = Field(
5373+
default=None,
5374+
description="""The resource name of a registered metric. Rubric generation using
5375+
predefined metric spec or LLMBasedMetricSpec is supported. If this field is
5376+
set, the configuration provided in this field is used for rubric
5377+
generation. The `predefined_rubric_generation_spec` and
5378+
`rubric_generation_spec` fields will be ignored.""",
5379+
)
53375380
predefined_rubric_generation_spec: Optional[PredefinedMetricSpec] = Field(
53385381
default=None,
53395382
description="""Specification for using the rubric generation configs of a pre-defined
@@ -5356,6 +5399,13 @@ class _GenerateInstanceRubricsRequestDict(TypedDict, total=False):
53565399
contents: Optional[list[genai_types.ContentDict]]
53575400
"""The prompt to generate rubrics from. For single-turn queries, this is a single instance. For multi-turn queries, this is a repeated field that contains conversation history + latest request."""
53585401

5402+
metric_resource_name: Optional[str]
5403+
"""The resource name of a registered metric. Rubric generation using
5404+
predefined metric spec or LLMBasedMetricSpec is supported. If this field is
5405+
set, the configuration provided in this field is used for rubric
5406+
generation. The `predefined_rubric_generation_spec` and
5407+
`rubric_generation_spec` fields will be ignored."""
5408+
53595409
predefined_rubric_generation_spec: Optional[PredefinedMetricSpecDict]
53605410
"""Specification for using the rubric generation configs of a pre-defined
53615411
metric, e.g. "generic_quality_v1" and "instruction_following_v1".

0 commit comments

Comments
 (0)