Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/strands/telemetry/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,16 @@ class ToolMetrics:
"""Metrics for a specific tool's usage.

Attributes:
tool: The tool being tracked.
tool: The most recent tool invocation being tracked (for backwards compatibility).
invocations: List of all tool invocations with their inputs.
call_count: Number of times the tool has been called.
success_count: Number of successful tool calls.
error_count: Number of failed tool calls.
total_time: Total execution time across all calls in seconds.
"""

tool: ToolUse
invocations: list[ToolUse] = field(default_factory=list)
call_count: int = 0
success_count: int = 0
error_count: int = 0
Expand All @@ -139,7 +141,8 @@ def add_call(
metrics_client: The metrics client for recording the metrics.
attributes: attributes of the metrics.
"""
self.tool = tool # Update with latest tool state
self.tool = tool # Update with latest tool state for backwards compatibility
self.invocations.append(tool)
self.call_count += 1
self.total_time += duration
metrics_client.tool_call_count.add(1, attributes=attributes)
Expand Down Expand Up @@ -377,6 +380,13 @@ def get_summary(self) -> dict[str, Any]:
"name": metrics.tool.get("name", "unknown"),
"input_params": metrics.tool.get("input", {}),
},
"invocations": [
{
"tool_use_id": inv.get("toolUseId", "N/A"),
"input_params": inv.get("input", {}),
}
for inv in metrics.invocations
],
"execution_stats": {
"call_count": metrics.call_count,
"success_count": metrics.success_count,
Expand Down
52 changes: 52 additions & 0 deletions tests/strands/telemetry/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def test_tool_metrics_add_call(success, tool, tool_metrics, mock_get_meter_provi
tru_attrs = dataclasses.asdict(tool_metrics)
exp_attrs = {
"tool": tool,
"invocations": [tool],
"call_count": 1,
"success_count": success,
"error_count": not success,
Expand Down Expand Up @@ -309,6 +310,7 @@ def test_event_loop_metrics_add_tool_usage(mock_time, trace, tool, event_loop_me
"tool_metrics": {
"tool1": strands.telemetry.metrics.ToolMetrics(
tool=tool,
invocations=[tool],
call_count=1,
success_count=1,
error_count=0,
Expand Down Expand Up @@ -411,6 +413,12 @@ def test_event_loop_metrics_get_summary(trace, tool, event_loop_metrics, mock_ge
"name": "tool1",
"tool_use_id": "123",
},
"invocations": [
{
"tool_use_id": "123",
"input_params": {},
},
],
},
},
"total_cycles": 0,
Expand Down Expand Up @@ -566,3 +574,47 @@ def test_reset_usage_metrics(usage, event_loop_metrics, mock_get_meter_provider)

# Verify accumulated_usage is NOT cleared
assert event_loop_metrics.accumulated_usage["inputTokens"] == 11


def test_tool_metrics_tracks_all_invocations(mock_get_meter_provider):
"""ToolMetrics.invocations should contain all tool calls, not just the latest."""
tool1 = {"name": "weather", "toolUseId": "id1", "input": {"city": "Berlin"}}
tool2 = {"name": "weather", "toolUseId": "id2", "input": {"city": "Paris"}}
tool3 = {"name": "weather", "toolUseId": "id3", "input": {"city": "Rome"}}

metrics_client = MetricsClient()
tm = strands.telemetry.metrics.ToolMetrics(tool=tool1)
tm.add_call(tool1, 0.1, True, metrics_client)
tm.add_call(tool2, 0.2, True, metrics_client)
tm.add_call(tool3, 0.15, False, metrics_client)

assert tm.call_count == 3
assert tm.success_count == 2
assert tm.error_count == 1
assert len(tm.invocations) == 3
assert tm.invocations[0]["input"] == {"city": "Berlin"}
assert tm.invocations[1]["input"] == {"city": "Paris"}
assert tm.invocations[2]["input"] == {"city": "Rome"}
# Backwards compatibility: .tool still points to the latest
assert tm.tool["input"] == {"city": "Rome"}
assert tm.tool["toolUseId"] == "id3"


def test_tool_metrics_invocations_in_summary(tool, event_loop_metrics, mock_get_meter_provider):
"""get_summary() should include all invocations for repeatedly called tools."""
tool1 = {"name": "search", "toolUseId": "s1", "input": {"query": "python"}}
tool2 = {"name": "search", "toolUseId": "s2", "input": {"query": "rust"}}

trace = strands.telemetry.metrics.Trace("test")
msg = {"role": "user", "content": [{"toolResult": {"toolUseId": "s1", "tool_name": "search"}}]}

event_loop_metrics.add_tool_usage(tool1, 0.1, trace, True, msg)
event_loop_metrics.add_tool_usage(tool2, 0.2, trace, True, msg)

summary = event_loop_metrics.get_summary()
tool_usage = summary["tool_usage"]["search"]

assert tool_usage["execution_stats"]["call_count"] == 2
assert len(tool_usage["invocations"]) == 2
assert tool_usage["invocations"][0]["input_params"] == {"query": "python"}
assert tool_usage["invocations"][1]["input_params"] == {"query": "rust"}