-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Expand file tree
/
Copy pathtest_agent_evaluator.py
More file actions
104 lines (89 loc) · 3.3 KB
/
test_agent_evaluator.py
File metadata and controls
104 lines (89 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import sys
from unittest.mock import MagicMock
from unittest.mock import patch
from google.adk.evaluation.agent_evaluator import _EvalMetricResultWithInvocation
from google.adk.evaluation.agent_evaluator import AgentEvaluator
from google.adk.evaluation.eval_case import Invocation
from google.adk.evaluation.eval_metrics import EvalMetricResult
from google.adk.evaluation.eval_metrics import EvalStatus
from google.genai import types as genai_types
def _make_actual_invocation(
query: str = "user query", response: str = "agent response"
) -> Invocation:
return Invocation(
user_content=genai_types.Content(
parts=[genai_types.Part(text=query)], role="user"
),
final_response=genai_types.Content(
parts=[genai_types.Part(text=response)], role="model"
),
)
def _make_eval_metric_result(
score: float = 0.9, status: EvalStatus = EvalStatus.PASSED
) -> EvalMetricResult:
return EvalMetricResult(
metric_name="test_metric",
threshold=0.8,
score=score,
eval_status=status,
)
def _call_print_details(
items: list[_EvalMetricResultWithInvocation],
) -> MagicMock:
"""Calls _print_details with mocked pandas/tabulate, returns the mock DataFrame class."""
mock_pandas = MagicMock()
mock_tabulate_module = MagicMock()
mock_tabulate_module.tabulate = MagicMock(return_value="table")
with patch.dict(
sys.modules,
{"pandas": mock_pandas, "tabulate": mock_tabulate_module},
):
AgentEvaluator._print_details(
eval_metric_result_with_invocations=items,
overall_eval_status=EvalStatus.PASSED,
overall_score=0.9,
metric_name="test_metric",
threshold=0.8,
)
return mock_pandas.pandas.DataFrame
class TestPrintDetailsWithNoExpectedInvocation:
"""Tests for _print_details when expected_invocation is None."""
def test_does_not_raise(self):
items = [
_EvalMetricResultWithInvocation(
actual_invocation=_make_actual_invocation(),
expected_invocation=None,
eval_metric_result=_make_eval_metric_result(),
)
]
_call_print_details(items) # should not raise
def test_multiple_invocations_all_without_expected(self):
items = [
_EvalMetricResultWithInvocation(
actual_invocation=_make_actual_invocation(response=f"response {i}"),
expected_invocation=None,
eval_metric_result=_make_eval_metric_result(),
)
for i in range(3)
]
mock_df_cls = _call_print_details(items)
data = mock_df_cls.call_args[0][0]
assert len(data) == 3
for row in data:
assert row["prompt"] == ""
assert row["expected_response"] == ""
assert row["expected_tool_calls"] == ""