Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3292,6 +3292,136 @@ def my_search_tool(query: str) -> str:
mock_from_callable.assert_called_once_with(callable=my_search_tool)


class TestValidateDatasetAgentData:
"""Unit tests for the _validate_dataset_agent_data function."""

def test_valid_agent_data_in_df(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame(
[
{
"agent_data": {
"turns": [{"turn_index": 0, "turn_id": "1", "events": []}]
}
},
{
"agent_data": '{"turns": [{"turn_index": 0, "turn_id": "2", "events": []}]}'
},
{
"agent_data": vertexai_genai_types.evals.AgentData(
turns=[{"turn_index": 0, "turn_id": "3", "events": []}]
)
},
]
)
)
_evals_utils._validate_dataset_agent_data(dataset)

def test_valid_agent_data_in_eval_cases(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_cases=[
vertexai_genai_types.EvalCase(
agent_data={
"turns": [{"turn_index": 0, "turn_id": "1", "events": []}]
}
),
vertexai_genai_types.EvalCase(
agent_data=json.loads(
'{"turns": [{"turn_index": 0, "turn_id": "2", "events": []}]}'
)
),
vertexai_genai_types.EvalCase(
agent_data=vertexai_genai_types.evals.AgentData(
turns=[{"turn_index": 0, "turn_id": "3", "events": []}]
)
),
]
)
_evals_utils._validate_dataset_agent_data(dataset)

def test_invalid_json_string_raises_error(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame([{"agent_data": '{"turns":'}])
)
with pytest.raises(ValueError, match="is not valid JSON"):
_evals_utils._validate_dataset_agent_data(dataset)

def test_invalid_dict_raises_error(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame([{"agent_data": {"agents": 123}}])
)
with pytest.raises(ValueError, match="is inconsistent with AgentData type"):
_evals_utils._validate_dataset_agent_data(dataset)

def test_valid_agent_data_with_error_in_dict(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame(
[{"agent_data": {"error": "some error message"}}]
)
)
_evals_utils._validate_dataset_agent_data(dataset)

def test_valid_agent_data_with_error_in_string(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame(
[{"agent_data": '{"error": "some error message"}'}]
)
)
_evals_utils._validate_dataset_agent_data(dataset)

def test_invalid_agent_data_type_raises_error(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame([{"agent_data": 123}])
)
with pytest.raises(ValueError, match="is inconsistent with AgentData type"):
_evals_utils._validate_dataset_agent_data(dataset)

def test_conflict_with_inference_configs_raises_error(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame(
[
{
"agent_data": {
"agents": {"agent1": {"agent_id": "agent1"}},
"turns": [],
}
}
]
)
)
inference_configs = {
"cand1": {"agent_configs": {"agent1": {"agent_id": "agent1"}}}
}
with pytest.raises(
ValueError,
match="Cannot provide 'agents' in the dataset's 'agent_data'",
):
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)

def test_no_conflict_with_inference_configs(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame([{"agent_data": {"turns": []}}])
)
inference_configs = {"cand1": {"agent_configs": {"agent1": {"name": "agent1"}}}}
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)

def test_no_conflict_if_inference_configs_has_no_agent_configs(self):
dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=pd.DataFrame(
[
{
"agent_data": {
"agents": {"agent1": {"agent_id": "agent1"}},
"turns": [],
}
}
]
)
)
inference_configs = {"cand1": {"model": "gemini-pro"}}
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)


class TestEvent:
"""Unit tests for the Event class."""

Expand Down
3 changes: 3 additions & 0 deletions vertexai/_genai/_evals_constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,8 @@
SESSION_INPUT,
CONTEXT,
HISTORY,
STARTING_PROMPT,
CONVERSATION_PLAN,
AGENT_DATA,
}
)
75 changes: 75 additions & 0 deletions vertexai/_genai/_evals_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import abc
import logging
import os
import json
from typing import Any, Optional, Union

from google.genai import types as genai_types
Expand Down Expand Up @@ -370,3 +371,77 @@ def _postprocess_user_scenarios_response(
return types.EvaluationDataset(
eval_cases=eval_cases, eval_dataset_df=eval_dataset_df
)


def _validate_dataset_agent_data(
dataset: types.EvaluationDataset,
inference_configs: Optional[dict[str, Any]] = None,
) -> None:
"""Validates agent_data in the EvaluationDataset.

Checks that agent_data matches the expected AgentData type and that
'agents' are not defined in both the dataset's agent_data and inference_configs.
"""
has_inference_agent_configs = False
if inference_configs:
for cand_config in inference_configs.values():
if isinstance(cand_config, dict) and cand_config.get("agent_configs"):
has_inference_agent_configs = True
elif hasattr(cand_config, "agent_configs") and cand_config.agent_configs:
has_inference_agent_configs = True

def _validate_single_agent_data(agent_data_val: Any, identifier: str) -> None:

if not agent_data_val:
return

agent_data_obj = None
if isinstance(agent_data_val, str):
try:
agent_data_val = json.loads(agent_data_val)
if "error" in agent_data_val:
return
agent_data_obj = types.evals.AgentData.model_validate(agent_data_val)
except json.JSONDecodeError as e:
raise ValueError(
f"{identifier}: 'agent_data' is not valid JSON: {e}"
) from e
elif isinstance(agent_data_val, dict) and "error" in agent_data_val:
return
elif isinstance(agent_data_val, dict):
try:
agent_data_obj = types.evals.AgentData.model_validate(agent_data_val)
except Exception as e:
raise ValueError(
f"{identifier}: 'agent_data' "
f"is inconsistent with AgentData type: {e}"
) from e
elif isinstance(agent_data_val, types.evals.AgentData):
agent_data_obj = agent_data_val
else:
raise ValueError(
f"{identifier}: 'agent_data' is inconsistent with AgentData type. "
f"Got {type(agent_data_val)}"
)

if agent_data_obj and agent_data_obj.agents and has_inference_agent_configs:
raise ValueError(
f"{identifier}: Cannot provide 'agents' in the dataset's 'agent_data' "
"and 'agent_configs' in inference_configs at the same time."
)

if (
dataset.eval_dataset_df is not None
and "agent_data" in dataset.eval_dataset_df.columns
):
for idx, row in dataset.eval_dataset_df.iterrows():
_validate_single_agent_data(row.get("agent_data"), f"Row {idx}")

if dataset.eval_cases:
for idx, eval_case in enumerate(dataset.eval_cases):
agent_data = None
if isinstance(eval_case, dict):
agent_data = eval_case.get("agent_data", None)
elif hasattr(eval_case, "agent_data"):
agent_data = eval_case.agent_data
_validate_single_agent_data(agent_data, f"EvalCase {idx}")
4 changes: 4 additions & 0 deletions vertexai/_genai/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1830,6 +1830,8 @@ def create_evaluation_run(
if isinstance(agent_info, dict)
else (agent_info or evals_types.AgentInfo())
)
if isinstance(dataset, types.EvaluationDataset):
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
resolved_dataset = _evals_common._resolve_dataset(
self._api_client, dataset, dest, agent_info_pydantic
)
Expand Down Expand Up @@ -2758,6 +2760,8 @@ async def create_evaluation_run(
if isinstance(agent_info, dict)
else (agent_info or evals_types.AgentInfo())
)
if isinstance(dataset, types.EvaluationDataset):
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
resolved_dataset = _evals_common._resolve_dataset(
self._api_client, dataset, dest, agent_info_pydantic
)
Expand Down
Loading