|
29 | 29 | from humanloop.eval_utils.context import ( |
30 | 30 | EvaluationContext, |
31 | 31 | get_evaluation_context, |
| 32 | + get_prompt_utility_context, |
| 33 | + in_prompt_utility_context, |
32 | 34 | log_belongs_to_evaluated_file, |
33 | 35 | set_evaluation_context, |
34 | 36 | ) |
|
37 | 39 | # We use TypedDicts for requests, which is consistent with the rest of the SDK |
38 | 40 | from humanloop.evaluators.client import EvaluatorsClient |
39 | 41 | from humanloop.flows.client import FlowsClient |
| 42 | +from humanloop.otel.constants import HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE, HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME |
| 43 | +from humanloop.otel.helpers import write_to_opentelemetry_span |
40 | 44 | from humanloop.prompts.client import PromptsClient |
41 | 45 | from humanloop.requests import CodeEvaluatorRequestParams as CodeEvaluatorDict |
42 | 46 | from humanloop.requests import ExternalEvaluatorRequestParams as ExternalEvaluator |
|
62 | 66 | from humanloop.types.datapoint_response import DatapointResponse |
63 | 67 | from humanloop.types.dataset_response import DatasetResponse |
64 | 68 | from humanloop.types.evaluation_run_response import EvaluationRunResponse |
| 69 | +from humanloop.types.prompt_call_response import PromptCallResponse |
65 | 70 | from humanloop.types.run_stats_response import RunStatsResponse |
66 | 71 | from pydantic import ValidationError |
67 | 72 |
|
|
94 | 99 | CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, ToolsClient, FlowsClient, EvaluatorsClient) |
95 | 100 |
|
96 | 101 |
|
| 102 | +class HumanloopUtilitySyntaxError(Exception): |
| 103 | + def __init__(self, message): |
| 104 | + self.message = message |
| 105 | + |
| 106 | + def __str__(self): |
| 107 | + return self.message |
| 108 | + |
| 109 | + |
| 110 | +def prompt_call_evaluation_aware(client: PromptsClient) -> PromptsClient: |
| 111 | + client._call = client.call |
| 112 | + |
| 113 | + def _overload_call(self, **kwargs) -> PromptCallResponse: |
| 114 | + if in_prompt_utility_context(): |
| 115 | + kwargs = {**kwargs, "save": False} |
| 116 | + |
| 117 | + try: |
| 118 | + response = self._call(**kwargs) |
| 119 | + response = typing.cast(PromptCallResponse, response) |
| 120 | + except Exception as e: |
| 121 | + # TODO: Bug found in backend: not specifying a model 400s but creates a File |
| 122 | + raise HumanloopUtilitySyntaxError(message=str(e)) from e |
| 123 | + |
| 124 | + prompt_utility_context = get_prompt_utility_context() |
| 125 | + |
| 126 | + with prompt_utility_context.tracer.start_as_current_span(HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME) as span: |
| 127 | + write_to_opentelemetry_span( |
| 128 | + span=span, |
| 129 | + key=HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE, |
| 130 | + value=response.dict(), |
| 131 | + ) |
| 132 | + return response |
| 133 | + else: |
| 134 | + return self._call(kwargs) |
| 135 | + |
| 136 | + # Replace the original log method with the overloaded one |
| 137 | + client.call = types.MethodType(_overload_call, client) |
| 138 | + # Return the client with the overloaded log method |
| 139 | + logger.debug("Overloaded the .log method of %s", client) |
| 140 | + return client |
| 141 | + |
| 142 | + |
97 | 143 | def log_with_evaluation_context(client: CLIENT_TYPE) -> CLIENT_TYPE: |
98 | 144 | """ |
99 | 145 | Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT. |
@@ -142,7 +188,7 @@ def _overload_log( |
142 | 188 | # Replace the original log method with the overloaded one |
143 | 189 | client.log = types.MethodType(_overload_log, client) |
144 | 190 | # Return the client with the overloaded log method |
145 | | - logger.debug("Overloaded the .log method of %s", client) |
| 191 | + logger.debug("Overloaded the .call method of %s", client) |
146 | 192 | return client |
147 | 193 |
|
148 | 194 |
|
|
0 commit comments