From 8ba4707cab8c33aa63bacf1c56bb6b61a74aea70 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Wed, 4 Mar 2026 17:44:26 -0800 Subject: [PATCH] feat: GenAI Client(evals): Patch agent config map to agent data PiperOrigin-RevId: 878749353 --- tests/unit/vertexai/genai/test_evals.py | 34 +++++++++++++++++++++++++ vertexai/_genai/_evals_common.py | 15 ++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index b0b468fec1..b5a32d9c15 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -1387,6 +1387,11 @@ def test_run_inference_with_local_agent( ) mock_agent_instance = mock.Mock() + mock_agent_instance.name = "mock_agent" + mock_agent_instance.description = "mock description" + mock_agent_instance.instruction = "mock instruction" + mock_agent_instance.tools = [] + mock_agent_instance.sub_agents = [] mock_llm_agent.return_value = mock_agent_instance mock_session_service.return_value.create_session = mock.AsyncMock() mock_runner_instance = mock_runner.return_value @@ -1914,6 +1919,35 @@ def test_run_agent_internal_multi_turn_success(self, mock_run_agent): {"turn_index": 1, "turn_id": "t2", "events": []}, ] + @mock.patch.object(_evals_common, "_run_agent") + def test_run_agent_internal_multi_turn_with_agent(self, mock_run_agent): + mock_run_agent.return_value = [ + [ + {"turn_index": 0, "turn_id": "t1", "events": []}, + ] + ] + prompt_dataset = pd.DataFrame({"prompt": ["p1"], "conversation_plan": ["plan"]}) + mock_agent = mock.Mock() + mock_agent.name = "mock_agent" + mock_agent.description = "mock description" + mock_agent.instruction = "mock instruction" + mock_agent.tools = [] + mock_agent.sub_agents = [] + mock_api_client = mock.Mock() + result_df = _evals_common._run_agent_internal( + api_client=mock_api_client, + agent_engine=None, + agent=mock_agent, + prompt_dataset=prompt_dataset, + ) + + assert "agent_data" in result_df.columns + agent_data = result_df["agent_data"][0] + assert agent_data["turns"] == [ + {"turn_index": 0, "turn_id": "t1", "events": []}, + ] + assert "mock_agent" in agent_data["agents"] + @mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip @mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 0eb3fe71d7..31bac8b747 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -1603,18 +1603,25 @@ def _run_agent_internal( processed_intermediate_events = [] processed_responses = [] processed_agent_data = [] + agent_data_agents = None + if agent: + agent_data_agents = types.evals.AgentData._get_agents_map(agent) for resp_item in raw_responses: intermediate_events_row: list[dict[str, Any]] = [] - response_row = None - agent_data_row = None + response_row: Optional[Union[str, dict[str, Any]]] = None + agent_data_row: Optional[Union[str, dict[str, Any]]] = None if _is_multi_turn_agent_run(user_simulator_config, prompt_dataset): if isinstance(resp_item, dict) and "error" in resp_item: - response_row = json.dumps(resp_item) + agent_data_row = json.dumps(resp_item) else: # TODO: Migrate single turn agent run result to AgentData. - agent_data_row = types.evals.AgentData(turns=resp_item).model_dump() + agent_data_row = types.evals.AgentData( + turns=resp_item, + agents=agent_data_agents, + ).model_dump() + else: if isinstance(resp_item, list): try: