Skip to content

Commit 6568aed

Browse files
committed
feat(telemetry): capture tool execution errors in OpenTelemetry spans
Introduces ToolExecutionError and ToolErrorType (HTTP status codes) to standardize error reporting for tool failures. Updates trace_tool_call and function execution handlers to extract and record error.type semantics.
1 parent 36f0be7 commit 6568aed

1 file changed

Lines changed: 354 additions & 2 deletions

File tree

tests/unittests/telemetry/test_spans.py

Lines changed: 354 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,359 @@ async def test_generate_content_span(
812812
assert choice_log.attributes == {GEN_AI_SYSTEM: 'test_system'}
813813

814814

815+
def _mock_callable_tool():
816+
"""Description of some tool."""
817+
return 'result'
818+
819+
820+
def _mock_mcp_client_session() -> McpClientSession:
821+
mock_session = mock.create_autospec(spec=McpClientSession, instance=True)
822+
823+
mock_tool_obj = McpTool(
824+
name='mcp_tool',
825+
description='Tool from session',
826+
inputSchema={
827+
'type': 'object',
828+
'properties': {'query': {'type': 'string'}},
829+
},
830+
)
831+
mock_result = mock.create_autospec(McpListToolsResult, instance=True)
832+
mock_result.tools = [mock_tool_obj]
833+
834+
mock_session.list_tools = mock.AsyncMock(return_value=mock_result)
835+
836+
return mock_session
837+
838+
839+
def _mock_mcp_tool():
840+
return McpTool(
841+
name='mcp_tool',
842+
description='A standalone mcp tool',
843+
inputSchema={
844+
'type': 'object',
845+
'properties': {'id': {'type': 'integer'}},
846+
},
847+
)
848+
849+
850+
def _mock_tool_dict() -> types.ToolDict:
851+
return types.ToolDict(
852+
function_declarations=[
853+
types.FunctionDeclarationDict(
854+
name='mock_tool', description='Description of mock tool.'
855+
),
856+
],
857+
google_maps=types.GoogleMaps(),
858+
)
859+
860+
861+
@pytest.mark.asyncio
862+
@mock.patch('google.adk.telemetry.tracing.otel_logger')
863+
@mock.patch('google.adk.telemetry.tracing.tracer')
864+
@mock.patch(
865+
'google.adk.telemetry.tracing._guess_gemini_system_name',
866+
return_value='test_system',
867+
)
868+
@pytest.mark.parametrize(
869+
'capture_content',
870+
['SPAN_AND_EVENT', 'EVENT_ONLY', 'SPAN_ONLY', 'NO_CONTENT'],
871+
)
872+
async def test_generate_content_span_with_experimental_semconv(
873+
mock_guess_system_name,
874+
mock_tracer,
875+
mock_otel_logger,
876+
monkeypatch,
877+
capture_content,
878+
):
879+
"""Test native generate_content span creation with attributes and logs with experimental semconv enabled."""
880+
# Arrange
881+
monkeypatch.setenv(
882+
'OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT',
883+
str(capture_content).lower(),
884+
)
885+
monkeypatch.setenv(
886+
'OTEL_SEMCONV_STABILITY_OPT_IN',
887+
'gen_ai_latest_experimental',
888+
)
889+
monkeypatch.setattr(
890+
'google.adk.telemetry.tracing._instrumented_with_opentelemetry_instrumentation_google_genai',
891+
lambda: False,
892+
)
893+
894+
agent = LlmAgent(name='test_agent', model='not-a-gemini-model')
895+
invocation_context = await _create_invocation_context(agent)
896+
897+
system_instruction = types.Content(
898+
parts=[types.Part.from_text(text='You are a helpful assistant.')],
899+
)
900+
901+
user_content1 = types.Content(role='user', parts=[types.Part(text='Hello')])
902+
user_content2 = types.Content(role='user', parts=[types.Part(text='World')])
903+
904+
model_content = types.Content(
905+
role='model', parts=[types.Part(text='Response')]
906+
)
907+
908+
tools = [
909+
_mock_callable_tool,
910+
_mock_tool_dict(),
911+
_mock_mcp_client_session(),
912+
_mock_mcp_tool(),
913+
]
914+
915+
llm_request = LlmRequest(
916+
model='some-model',
917+
contents=[user_content1, user_content2],
918+
config=types.GenerateContentConfig(
919+
system_instruction=system_instruction, tools=tools
920+
),
921+
)
922+
llm_response = LlmResponse(
923+
content=model_content,
924+
finish_reason=types.FinishReason.STOP,
925+
usage_metadata=types.GenerateContentResponseUsageMetadata(
926+
prompt_token_count=10,
927+
candidates_token_count=20,
928+
),
929+
)
930+
931+
model_response_event = mock.MagicMock()
932+
model_response_event.id = 'event-123'
933+
934+
mock_span = (
935+
mock_tracer.start_as_current_span.return_value.__enter__.return_value
936+
)
937+
938+
# Act
939+
async with use_inference_span(
940+
llm_request,
941+
invocation_context,
942+
model_response_event,
943+
) as gc_span:
944+
assert gc_span.span is mock_span
945+
946+
trace_inference_result(gc_span, llm_response)
947+
948+
# Expected attributes
949+
expected_system_instructions = [
950+
{
951+
'content': 'You are a helpful assistant.',
952+
'type': 'text',
953+
},
954+
]
955+
expected_input_messages = [
956+
{
957+
'role': 'user',
958+
'parts': [
959+
{'content': 'Hello', 'type': 'text'},
960+
],
961+
},
962+
{
963+
'role': 'user',
964+
'parts': [
965+
{'content': 'World', 'type': 'text'},
966+
],
967+
},
968+
]
969+
expected_output_messages = [{
970+
'role': 'assistant',
971+
'parts': [
972+
{'content': 'Response', 'type': 'text'},
973+
],
974+
'finish_reason': 'stop',
975+
}]
976+
expected_tool_definitions = [
977+
{
978+
'name': '_mock_callable_tool',
979+
'description': 'Description of some tool.',
980+
'parameters': None,
981+
'type': 'function',
982+
},
983+
{
984+
'name': 'mock_tool',
985+
'description': 'Description of mock tool.',
986+
'parameters': None,
987+
'type': 'function',
988+
},
989+
{
990+
'name': 'google_maps',
991+
'type': 'google_maps',
992+
},
993+
{
994+
'name': 'mcp_tool',
995+
'description': 'Tool from session',
996+
'parameters': {
997+
'type': 'object',
998+
'properties': {'query': {'type': 'string'}},
999+
},
1000+
'type': 'function',
1001+
},
1002+
{
1003+
'name': 'mcp_tool',
1004+
'description': 'A standalone mcp tool',
1005+
'parameters': {
1006+
'type': 'object',
1007+
'properties': {'id': {'type': 'integer'}},
1008+
},
1009+
'type': 'function',
1010+
},
1011+
]
1012+
expected_tool_definitions_no_content = [
1013+
{
1014+
'name': '_mock_callable_tool',
1015+
'description': 'Description of some tool.',
1016+
'parameters': None,
1017+
'type': 'function',
1018+
},
1019+
{
1020+
'name': 'mock_tool',
1021+
'description': 'Description of mock tool.',
1022+
'parameters': None,
1023+
'type': 'function',
1024+
},
1025+
{
1026+
'name': 'google_maps',
1027+
'type': 'google_maps',
1028+
},
1029+
{
1030+
'name': 'mcp_tool',
1031+
'description': 'Tool from session',
1032+
'parameters': None,
1033+
'type': 'function',
1034+
},
1035+
{
1036+
'name': 'mcp_tool',
1037+
'description': 'A standalone mcp tool',
1038+
'parameters': None,
1039+
'type': 'function',
1040+
},
1041+
]
1042+
expected_tool_definitions_json = (
1043+
'[{"name":"_mock_callable_tool","description":"Description of some'
1044+
' tool.","parameters":null,"type":"function"},{"name":"mock_tool","description":"Description'
1045+
' of mock'
1046+
' tool.","parameters":null,"type":"function"},{"name":"google_maps","type":"google_maps"},{"name":"mcp_tool","description":"Tool'
1047+
' from'
1048+
' session","parameters":{"type":"object","properties":{"query":{"type":"string"}}},"type":"function"},{"name":"mcp_tool","description":"A'
1049+
' standalone mcp'
1050+
' tool","parameters":{"type":"object","properties":{"id":{"type":"integer"}}},"type":"function"}]'
1051+
)
1052+
1053+
expected_tool_definitions_no_content_json = (
1054+
'[{"name":"_mock_callable_tool","description":"Description of some'
1055+
' tool.","parameters":null,"type":"function"},{"name":"mock_tool","description":"Description'
1056+
' of mock'
1057+
' tool.","parameters":null,"type":"function"},{"name":"google_maps","type":"google_maps"},{"name":"mcp_tool","description":"Tool'
1058+
' from'
1059+
' session","parameters":null,"type":"function"},{"name":"mcp_tool","description":"A'
1060+
' standalone mcp tool","parameters":null,"type":"function"}]'
1061+
)
1062+
# Assert Span
1063+
mock_tracer.start_as_current_span.assert_called_once_with(
1064+
'generate_content some-model'
1065+
)
1066+
1067+
mock_span.set_attribute.assert_any_call(
1068+
GEN_AI_OPERATION_NAME, 'generate_content'
1069+
)
1070+
mock_span.set_attribute.assert_any_call(GEN_AI_REQUEST_MODEL, 'some-model')
1071+
mock_span.set_attribute.assert_any_call(
1072+
GEN_AI_RESPONSE_FINISH_REASONS, ['stop']
1073+
)
1074+
mock_span.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 10)
1075+
mock_span.set_attribute.assert_any_call(GEN_AI_USAGE_OUTPUT_TOKENS, 20)
1076+
1077+
mock_span.set_attributes.assert_called_once_with({
1078+
GEN_AI_AGENT_NAME: invocation_context.agent.name,
1079+
GEN_AI_CONVERSATION_ID: invocation_context.session.id,
1080+
USER_ID: invocation_context.session.user_id,
1081+
'gcp.vertex.agent.event_id': 'event-123',
1082+
'gcp.vertex.agent.invocation_id': invocation_context.invocation_id,
1083+
})
1084+
1085+
if capture_content in ['SPAN_AND_EVENT', 'SPAN_ONLY']:
1086+
mock_span.set_attribute.assert_any_call(
1087+
GEN_AI_SYSTEM_INSTRUCTIONS,
1088+
'[{"content":"You are a helpful assistant.","type":"text"}]',
1089+
)
1090+
mock_span.set_attribute.assert_any_call(
1091+
GEN_AI_INPUT_MESSAGES,
1092+
'[{"role":"user","parts":[{"content":"Hello","type":"text"}]},{"role":"user","parts":[{"content":"World","type":"text"}]}]',
1093+
)
1094+
mock_span.set_attribute.assert_any_call(
1095+
GEN_AI_OUTPUT_MESSAGES,
1096+
'[{"role":"assistant","parts":[{"content":"Response","type":"text"}],"finish_reason":"stop"}]',
1097+
)
1098+
mock_span.set_attribute.assert_any_call(
1099+
GEN_AI_TOOL_DEFINITIONS, expected_tool_definitions_json
1100+
)
1101+
else:
1102+
all_attribute_calls = mock_span.set_attribute.call_args_list
1103+
assert GEN_AI_SYSTEM_INSTRUCTIONS not in all_attribute_calls
1104+
assert GEN_AI_INPUT_MESSAGES not in all_attribute_calls
1105+
assert GEN_AI_OUTPUT_MESSAGES not in all_attribute_calls
1106+
mock_span.set_attribute.assert_any_call(
1107+
GEN_AI_TOOL_DEFINITIONS, expected_tool_definitions_no_content_json
1108+
)
1109+
1110+
# Assert Logs
1111+
assert mock_otel_logger.emit.call_count == 1
1112+
1113+
log_records: list[LogRecord] = [
1114+
call.args[0] for call in mock_otel_logger.emit.call_args_list
1115+
]
1116+
1117+
operation_details_log = next(
1118+
(
1119+
lr
1120+
for lr in log_records
1121+
if lr.event_name == 'gen_ai.client.inference.operation.details'
1122+
),
1123+
None,
1124+
)
1125+
1126+
assert operation_details_log is not None
1127+
assert operation_details_log.attributes is not None
1128+
1129+
attributes = operation_details_log.attributes
1130+
1131+
if capture_content in ['SPAN_AND_EVENT', 'EVENT_ONLY']:
1132+
assert GEN_AI_SYSTEM_INSTRUCTIONS in attributes
1133+
assert (
1134+
attributes[GEN_AI_SYSTEM_INSTRUCTIONS] == expected_system_instructions
1135+
)
1136+
assert GEN_AI_INPUT_MESSAGES in attributes
1137+
assert attributes[GEN_AI_INPUT_MESSAGES] == expected_input_messages
1138+
assert GEN_AI_OUTPUT_MESSAGES in attributes
1139+
assert attributes[GEN_AI_OUTPUT_MESSAGES] == expected_output_messages
1140+
assert GEN_AI_TOOL_DEFINITIONS in attributes
1141+
assert attributes[GEN_AI_TOOL_DEFINITIONS] == expected_tool_definitions
1142+
else:
1143+
assert GEN_AI_SYSTEM_INSTRUCTIONS not in attributes
1144+
assert GEN_AI_INPUT_MESSAGES not in attributes
1145+
assert GEN_AI_OUTPUT_MESSAGES not in attributes
1146+
assert GEN_AI_TOOL_DEFINITIONS in attributes
1147+
assert (
1148+
attributes[GEN_AI_TOOL_DEFINITIONS]
1149+
== expected_tool_definitions_no_content
1150+
)
1151+
1152+
assert GEN_AI_USAGE_INPUT_TOKENS in attributes
1153+
assert attributes[GEN_AI_USAGE_INPUT_TOKENS] == 10
1154+
assert GEN_AI_USAGE_OUTPUT_TOKENS in attributes
1155+
assert attributes[GEN_AI_USAGE_OUTPUT_TOKENS] == 20
1156+
assert 'gcp.vertex.agent.event_id' in attributes
1157+
assert attributes['gcp.vertex.agent.event_id'] == 'event-123'
1158+
assert 'gcp.vertex.agent.invocation_id' in attributes
1159+
assert (
1160+
attributes['gcp.vertex.agent.invocation_id']
1161+
== invocation_context.invocation_id
1162+
)
1163+
assert GEN_AI_AGENT_NAME in attributes
1164+
assert attributes[GEN_AI_AGENT_NAME] == invocation_context.agent.name
1165+
assert GEN_AI_CONVERSATION_ID in attributes
1166+
assert attributes[GEN_AI_CONVERSATION_ID] == invocation_context.session.id
1167+
8151168
def test_trace_tool_call_with_tool_execution_error(
8161169
monkeypatch, mock_span_fixture, mock_tool_fixture
8171170
):
@@ -890,5 +1243,4 @@ def test_trace_tool_call_with_standard_error(
8901243
error=test_error,
8911244
)
8921245

893-
assert mock.call('error.type', 'ValueError') in mock_span_fixture.set_attribute.call_args_list
894-
1246+
assert mock.call('error.type', 'ValueError') in mock_span_fixture.set_attribute.call_args_list

0 commit comments

Comments
 (0)