From 2b0a98cc06ddaa40dab52359ade3dafa0ab07062 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Tue, 10 Mar 2026 00:09:11 -0700 Subject: [PATCH] feat: GenAI Client(evals): Add validation for `agent_data` in EvaluationDataset in create_evaluation_run PiperOrigin-RevId: 881253576 --- tests/unit/vertexai/genai/test_evals.py | 130 ++++++++++++++++++++++++ vertexai/_genai/_evals_constant.py | 3 + vertexai/_genai/_evals_utils.py | 75 ++++++++++++++ vertexai/_genai/evals.py | 4 + 4 files changed, 212 insertions(+) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 4a8745672f..0fa05aa2cc 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -3292,6 +3292,136 @@ def my_search_tool(query: str) -> str: mock_from_callable.assert_called_once_with(callable=my_search_tool) +class TestValidateDatasetAgentData: + """Unit tests for the _validate_dataset_agent_data function.""" + + def test_valid_agent_data_in_df(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame( + [ + { + "agent_data": { + "turns": [{"turn_index": 0, "turn_id": "1", "events": []}] + } + }, + { + "agent_data": '{"turns": [{"turn_index": 0, "turn_id": "2", "events": []}]}' + }, + { + "agent_data": vertexai_genai_types.evals.AgentData( + turns=[{"turn_index": 0, "turn_id": "3", "events": []}] + ) + }, + ] + ) + ) + _evals_utils._validate_dataset_agent_data(dataset) + + def test_valid_agent_data_in_eval_cases(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_cases=[ + vertexai_genai_types.EvalCase( + agent_data={ + "turns": [{"turn_index": 0, "turn_id": "1", "events": []}] + } + ), + vertexai_genai_types.EvalCase( + agent_data=json.loads( + '{"turns": [{"turn_index": 0, "turn_id": "2", "events": []}]}' + ) + ), + vertexai_genai_types.EvalCase( + agent_data=vertexai_genai_types.evals.AgentData( + turns=[{"turn_index": 0, "turn_id": "3", "events": []}] + ) + ), + ] + ) + _evals_utils._validate_dataset_agent_data(dataset) + + def test_invalid_json_string_raises_error(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame([{"agent_data": '{"turns":'}]) + ) + with pytest.raises(ValueError, match="is not valid JSON"): + _evals_utils._validate_dataset_agent_data(dataset) + + def test_invalid_dict_raises_error(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame([{"agent_data": {"agents": 123}}]) + ) + with pytest.raises(ValueError, match="is inconsistent with AgentData type"): + _evals_utils._validate_dataset_agent_data(dataset) + + def test_valid_agent_data_with_error_in_dict(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame( + [{"agent_data": {"error": "some error message"}}] + ) + ) + _evals_utils._validate_dataset_agent_data(dataset) + + def test_valid_agent_data_with_error_in_string(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame( + [{"agent_data": '{"error": "some error message"}'}] + ) + ) + _evals_utils._validate_dataset_agent_data(dataset) + + def test_invalid_agent_data_type_raises_error(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame([{"agent_data": 123}]) + ) + with pytest.raises(ValueError, match="is inconsistent with AgentData type"): + _evals_utils._validate_dataset_agent_data(dataset) + + def test_conflict_with_inference_configs_raises_error(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame( + [ + { + "agent_data": { + "agents": {"agent1": {"agent_id": "agent1"}}, + "turns": [], + } + } + ] + ) + ) + inference_configs = { + "cand1": {"agent_configs": {"agent1": {"agent_id": "agent1"}}} + } + with pytest.raises( + ValueError, + match="Cannot provide 'agents' in the dataset's 'agent_data'", + ): + _evals_utils._validate_dataset_agent_data(dataset, inference_configs) + + def test_no_conflict_with_inference_configs(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame([{"agent_data": {"turns": []}}]) + ) + inference_configs = {"cand1": {"agent_configs": {"agent1": {"name": "agent1"}}}} + _evals_utils._validate_dataset_agent_data(dataset, inference_configs) + + def test_no_conflict_if_inference_configs_has_no_agent_configs(self): + dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=pd.DataFrame( + [ + { + "agent_data": { + "agents": {"agent1": {"agent_id": "agent1"}}, + "turns": [], + } + } + ] + ) + ) + inference_configs = {"cand1": {"model": "gemini-pro"}} + _evals_utils._validate_dataset_agent_data(dataset, inference_configs) + + class TestEvent: """Unit tests for the Event class.""" diff --git a/vertexai/_genai/_evals_constant.py b/vertexai/_genai/_evals_constant.py index 7f46ac88b2..f825820f09 100644 --- a/vertexai/_genai/_evals_constant.py +++ b/vertexai/_genai/_evals_constant.py @@ -69,5 +69,8 @@ SESSION_INPUT, CONTEXT, HISTORY, + STARTING_PROMPT, + CONVERSATION_PLAN, + AGENT_DATA, } ) diff --git a/vertexai/_genai/_evals_utils.py b/vertexai/_genai/_evals_utils.py index 4a338d34b0..fba82280f9 100644 --- a/vertexai/_genai/_evals_utils.py +++ b/vertexai/_genai/_evals_utils.py @@ -17,6 +17,7 @@ import abc import logging import os +import json from typing import Any, Optional, Union from google.genai import types as genai_types @@ -370,3 +371,77 @@ def _postprocess_user_scenarios_response( return types.EvaluationDataset( eval_cases=eval_cases, eval_dataset_df=eval_dataset_df ) + + +def _validate_dataset_agent_data( + dataset: types.EvaluationDataset, + inference_configs: Optional[dict[str, Any]] = None, +) -> None: + """Validates agent_data in the EvaluationDataset. + + Checks that agent_data matches the expected AgentData type and that + 'agents' are not defined in both the dataset's agent_data and inference_configs. + """ + has_inference_agent_configs = False + if inference_configs: + for cand_config in inference_configs.values(): + if isinstance(cand_config, dict) and cand_config.get("agent_configs"): + has_inference_agent_configs = True + elif hasattr(cand_config, "agent_configs") and cand_config.agent_configs: + has_inference_agent_configs = True + + def _validate_single_agent_data(agent_data_val: Any, identifier: str) -> None: + + if not agent_data_val: + return + + agent_data_obj = None + if isinstance(agent_data_val, str): + try: + agent_data_val = json.loads(agent_data_val) + if "error" in agent_data_val: + return + agent_data_obj = types.evals.AgentData.model_validate(agent_data_val) + except json.JSONDecodeError as e: + raise ValueError( + f"{identifier}: 'agent_data' is not valid JSON: {e}" + ) from e + elif isinstance(agent_data_val, dict) and "error" in agent_data_val: + return + elif isinstance(agent_data_val, dict): + try: + agent_data_obj = types.evals.AgentData.model_validate(agent_data_val) + except Exception as e: + raise ValueError( + f"{identifier}: 'agent_data' " + f"is inconsistent with AgentData type: {e}" + ) from e + elif isinstance(agent_data_val, types.evals.AgentData): + agent_data_obj = agent_data_val + else: + raise ValueError( + f"{identifier}: 'agent_data' is inconsistent with AgentData type. " + f"Got {type(agent_data_val)}" + ) + + if agent_data_obj and agent_data_obj.agents and has_inference_agent_configs: + raise ValueError( + f"{identifier}: Cannot provide 'agents' in the dataset's 'agent_data' " + "and 'agent_configs' in inference_configs at the same time." + ) + + if ( + dataset.eval_dataset_df is not None + and "agent_data" in dataset.eval_dataset_df.columns + ): + for idx, row in dataset.eval_dataset_df.iterrows(): + _validate_single_agent_data(row.get("agent_data"), f"Row {idx}") + + if dataset.eval_cases: + for idx, eval_case in enumerate(dataset.eval_cases): + agent_data = None + if isinstance(eval_case, dict): + agent_data = eval_case.get("agent_data", None) + elif hasattr(eval_case, "agent_data"): + agent_data = eval_case.agent_data + _validate_single_agent_data(agent_data, f"EvalCase {idx}") diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index ebff5e6366..1088df664a 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -1830,6 +1830,8 @@ def create_evaluation_run( if isinstance(agent_info, dict) else (agent_info or evals_types.AgentInfo()) ) + if isinstance(dataset, types.EvaluationDataset): + _evals_utils._validate_dataset_agent_data(dataset, inference_configs) resolved_dataset = _evals_common._resolve_dataset( self._api_client, dataset, dest, agent_info_pydantic ) @@ -2758,6 +2760,8 @@ async def create_evaluation_run( if isinstance(agent_info, dict) else (agent_info or evals_types.AgentInfo()) ) + if isinstance(dataset, types.EvaluationDataset): + _evals_utils._validate_dataset_agent_data(dataset, inference_configs) resolved_dataset = _evals_common._resolve_dataset( self._api_client, dataset, dest, agent_info_pydantic )