2323from functools import partial
2424from logging import INFO
2525from typing import Callable , Dict , List , Literal , Optional , Sequence , Tuple , TypeVar , Union
26- import warnings
2726
2827from humanloop import EvaluatorResponse , FlowResponse , PromptResponse , ToolResponse
2928from humanloop .core .api_error import ApiError
30- from humanloop .eval_utils .context import (
31- EvaluationContext ,
32- get_evaluation_context ,
33- get_prompt_utility_context ,
34- in_prompt_utility_context ,
35- log_belongs_to_evaluated_file ,
36- set_evaluation_context ,
37- )
29+ from humanloop .context_variables import EvaluationContext , set_evaluation_context
3830from humanloop .eval_utils .types import Dataset , Evaluator , EvaluatorCheck , File
3931
4032# We use TypedDicts for requests, which is consistent with the rest of the SDK
41- from humanloop .evaluators .client import EvaluatorsClient
42- from humanloop .flows .client import FlowsClient
43- from humanloop .otel .constants import HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE , HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
44- from humanloop .otel .helpers import write_to_opentelemetry_span
45- from humanloop .prompts .client import PromptsClient
4633from humanloop .requests import CodeEvaluatorRequestParams as CodeEvaluatorDict
4734from humanloop .requests import ExternalEvaluatorRequestParams as ExternalEvaluator
4835from humanloop .requests import FlowKernelRequestParams as FlowDict
4936from humanloop .requests import HumanEvaluatorRequestParams as HumanEvaluatorDict
5037from humanloop .requests import LlmEvaluatorRequestParams as LLMEvaluatorDict
5138from humanloop .requests import PromptKernelRequestParams as PromptDict
5239from humanloop .requests import ToolKernelRequestParams as ToolDict
53- from humanloop .tools .client import ToolsClient
5440from humanloop .types import BooleanEvaluatorStatsResponse as BooleanStats
5541from humanloop .types import DatapointResponse as Datapoint
5642from humanloop .types import EvaluationResponse , EvaluationStats
6046from humanloop .types import NumericEvaluatorStatsResponse as NumericStats
6147from humanloop .types import PromptKernelRequest as Prompt
6248from humanloop .types import ToolKernelRequest as Tool
63- from humanloop .types .create_evaluator_log_response import CreateEvaluatorLogResponse
64- from humanloop .types .create_flow_log_response import CreateFlowLogResponse
65- from humanloop .types .create_prompt_log_response import CreatePromptLogResponse
66- from humanloop .types .create_tool_log_response import CreateToolLogResponse
6749from humanloop .types .datapoint_response import DatapointResponse
6850from humanloop .types .dataset_response import DatasetResponse
6951from humanloop .types .evaluation_run_response import EvaluationRunResponse
70- from humanloop .types .prompt_call_response import PromptCallResponse
7152from humanloop .types .run_stats_response import RunStatsResponse
7253from pydantic import ValidationError
7354
9778RESET = "\033 [0m"
9879
9980
100- CLIENT_TYPE = TypeVar ("CLIENT_TYPE" , PromptsClient , ToolsClient , FlowsClient , EvaluatorsClient )
101-
102-
10381class HumanloopUtilityError (Exception ):
10482 def __init__ (self , message ):
10583 self .message = message
@@ -108,101 +86,6 @@ def __str__(self):
10886 return self .message
10987
11088
111- def prompt_call_evaluation_aware (client : PromptsClient ) -> PromptsClient :
112- client ._call = client .call
113-
114- def _overload_call (self , ** kwargs ) -> PromptCallResponse :
115- if in_prompt_utility_context ():
116- kwargs = {** kwargs , "save" : False }
117-
118- try :
119- response = self ._call (** kwargs )
120- response = typing .cast (PromptCallResponse , response )
121- except Exception as e :
122- # TODO: Bug found in backend: not specifying a model 400s but creates a File
123- raise HumanloopUtilityError (message = str (e )) from e
124-
125- response_copy = response .dict ()
126- prompt_utility_context = get_prompt_utility_context ()
127- for idx , _ in enumerate (response_copy .get ("logs" , [])):
128- del response_copy ["logs" ][idx ]["created_at" ]
129- for idx , _ in enumerate (response_copy ["prompt" ].get ("environments" , [])):
130- del response_copy ["prompt" ]["environments" ][idx ]["created_at" ]
131- del response_copy ["prompt" ]["last_used_at" ]
132- del response_copy ["prompt" ]["updated_at" ]
133- del response_copy ["prompt" ]["created_at" ]
134- del response_copy ["start_time" ]
135- del response_copy ["end_time" ]
136-
137- with prompt_utility_context .tracer .start_as_current_span (HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME ) as span :
138- write_to_opentelemetry_span (
139- span = span ,
140- key = HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE ,
141- value = response_copy ,
142- )
143- return response
144- else :
145- return self ._call (** kwargs )
146-
147- # Replace the original log method with the overloaded one
148- client .call = types .MethodType (_overload_call , client )
149- # Return the client with the overloaded log method
150- logger .debug ("Overloaded the .log method of %s" , client )
151- return client
152-
153-
154- def log_with_evaluation_context (client : CLIENT_TYPE ) -> CLIENT_TYPE :
155- """
156- Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT.
157-
158- This makes the overloaded log actions be aware of whether the created Log is
159- part of an Evaluation (e.g. one started by eval_utils.run_eval).
160- """
161- # Copy the original log method in a hidden attribute
162- client ._log = client .log
163-
164- def _overload_log (
165- self , ** kwargs
166- ) -> Union [
167- CreatePromptLogResponse ,
168- CreateToolLogResponse ,
169- CreateFlowLogResponse ,
170- CreateEvaluatorLogResponse ,
171- ]:
172- if log_belongs_to_evaluated_file (log_args = kwargs ):
173- evaluation_context = get_evaluation_context ()
174- for attribute in ["source_datapoint_id" , "run_id" ]:
175- if attribute not in kwargs or kwargs [attribute ] is None :
176- kwargs [attribute ] = getattr (evaluation_context , attribute )
177-
178- # Call the original .log method
179- logger .debug (
180- "Logging %s inside _overloaded_log on Thread %s" ,
181- kwargs ,
182- evaluation_context ,
183- threading .get_ident (),
184- )
185-
186- try :
187- response = self ._log (** kwargs )
188- except Exception as e :
189- logger .error (f"Failed to log: { e } " )
190- raise e
191-
192- # Notify the run_eval utility about one Log being created
193- if log_belongs_to_evaluated_file (log_args = kwargs ):
194- evaluation_context = get_evaluation_context ()
195- evaluation_context .upload_callback (log_id = response .id )
196-
197- return response
198-
199- # Replace the original log method with the overloaded one
200- client .log = types .MethodType (_overload_log , client )
201- # Return the client with the overloaded log method
202- logger .debug ("Overloaded the .call method of %s" , client )
203- return client
204-
205-
20689def run_eval (
20790 client : "BaseHumanloop" ,
20891 file : File ,
0 commit comments