diff --git a/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py b/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py index 17b69d0d0..cff0e8788 100644 --- a/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py +++ b/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py @@ -12,13 +12,13 @@ from ..._utils.constants import COMMUNITY_agents_SUFFIX from .._execution_context import eval_set_run_id_context +from .._helpers.evaluators_helpers import trace_to_str from .._helpers.helpers import is_empty_value from ..models import EvaluationResult from ..models.models import ( AgentExecution, LLMResponse, NumericEvaluationResult, - TrajectoryEvaluationTrace, UiPathEvaluationError, UiPathEvaluationErrorCategory, ) @@ -140,10 +140,7 @@ def _create_evaluation_prompt( and agent_run_history and isinstance(agent_run_history[0], ReadableSpan) ): - trajectory_trace = TrajectoryEvaluationTrace.from_readable_spans( - agent_run_history - ) - agent_run_history = str(trajectory_trace.spans) + agent_run_history = trace_to_str(agent_run_history) else: agent_run_history = str(agent_run_history) diff --git a/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py b/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py new file mode 100644 index 000000000..f7121d800 --- /dev/null +++ b/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py @@ -0,0 +1,57 @@ +import uuid + +from opentelemetry.sdk.trace import ReadableSpan + +from uipath.eval.evaluators import LegacyTrajectoryEvaluator +from uipath.eval.models.models import LegacyEvaluatorCategory, LegacyEvaluatorType + + +def _legacy_trajectory_evaluator() -> LegacyTrajectoryEvaluator: + return LegacyTrajectoryEvaluator( + id=str(uuid.uuid4()), + name="Legacy trajectory", + category=LegacyEvaluatorCategory.Trajectory, + type=LegacyEvaluatorType.Trajectory, + prompt="History:\n{{AgentRunHistory}}\nExpected:\n{{ExpectedAgentBehavior}}", + createdAt="2026-05-14T00:00:00Z", + updatedAt="2026-05-14T00:00:00Z", + ) + + +def test_legacy_trajectory_prompt_uses_compact_tool_history() -> None: + long_prompt = "SYSTEM_PROMPT_" + ("x" * 10_000) + spans = [ + ReadableSpan( + name="agent_llm_call", + start_time=0, + end_time=1, + attributes={ + "openinference.span.kind": "LLM", + "input.value": f'{{"messages": [{{"role": "system", "content": "{long_prompt}"}}]}}', + "output.value": '{"generations": []}', + }, + ), + ReadableSpan( + name="search_profiles", + start_time=1, + end_time=2, + attributes={ + "openinference.span.kind": "TOOL", + "tool.name": "search_profiles", + "input.value": '{"query": "mentor"}', + "output.value": '{"content": "found mentor profile"}', + "metadata": f'{{"agent_prompt": "{long_prompt}"}}', + }, + ), + ] + + prompt = _legacy_trajectory_evaluator()._create_evaluation_prompt( + expected_agent_behavior="The agent should search matching profiles.", + agent_run_history=spans, + ) + + assert "SYSTEM_PROMPT_" not in prompt + assert "Tool: search_profiles" in prompt + assert '{"query": "mentor"}' in prompt + assert "found mentor profile" in prompt + assert "agent_llm_call" not in prompt