Refactoring code

Andrei Bratu · Andrei Bratu · commit ce96aaf5357a · 2025-02-19T17:21:02.000Z
diff --git a/.fernignore b/.fernignore
@@ -3,6 +3,8 @@
 src/humanloop/eval_utils
 src/humanloop/prompt_utils.py
 src/humanloop/client.py
+src/humanloop/overload.py
+src/humanloop/context_variables.py
 mypy.ini
 README.md
 
diff --git a/src/humanloop/context_variables.py b/src/humanloop/context_variables.py
@@ -4,6 +4,18 @@
 from opentelemetry.trace import Tracer
 
 
+_UnsafeContextRead = RuntimeError("Attempting to read from a Context when variable was not set.")
+
+
+class _UnsafeContextRead(RuntimeError):
+    message: str
+
+    def __init__(self, context_variable_name: str):
+        super().__init__(
+            message=f"Attempting to read from a Context when variable {context_variable_name} was not set."
+        )
+
+
 @dataclass
 class EvaluationContext:
     """Context Log to Humanloop.
@@ -27,9 +39,8 @@ class EvaluationContext:
     run_id: str
 
 
-_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar("__EVALUATION_CONTEXT")
-
-_UnsafeContextRead = RuntimeError("Attempting to read from thread Context when variable was not set.")
+_EVALUATION_CONTEXT_VAR_NAME = "__EVALUATION_CONTEXT"
+_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar(_EVALUATION_CONTEXT_VAR_NAME)
 
 
 def set_evaluation_context(context: EvaluationContext):
@@ -40,7 +51,7 @@ def get_evaluation_context() -> EvaluationContext:
     try:
         return _EVALUATION_CONTEXT_VAR.get()
     except LookupError:
-        raise _UnsafeContextRead
+        raise _UnsafeContextRead(_EVALUATION_CONTEXT_VAR_NAME)
 
 
 def evaluation_context_set() -> bool:
@@ -60,12 +71,12 @@ def log_belongs_to_evaluated_file(log_args: dict[str, Any]) -> bool:
         return False
 
 
-def is_evaluated_file(file_path) -> bool:
+def is_evaluated_file(file_path: str) -> bool:
     try:
         evaluation_context = _EVALUATION_CONTEXT_VAR.get()
         return evaluation_context.path == file_path
     except LookupError:
-        raise _UnsafeContextRead
+        raise _UnsafeContextRead(_EVALUATION_CONTEXT_VAR_NAME)
 
 
 @dataclass
@@ -78,7 +89,8 @@ def in_prompt_utility(self) -> bool:
         return self._in_prompt_utility > 0
 
 
-_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar("__PROMPT_UTILITY_CONTEXT")
+_PROMPT_UTILITY_CONTEXT_VAR_NAME = "__PROMPT_UTILITY_CONTEXT"
+_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar(_PROMPT_UTILITY_CONTEXT_VAR_NAME)
 
 
 def in_prompt_utility_context() -> bool:
@@ -108,7 +120,7 @@ def get_prompt_utility_context() -> PromptUtilityContext:
     try:
         return _PROMPT_UTILITY_CONTEXT_VAR.get()
     except LookupError:
-        raise _UnsafeContextRead
+        raise _UnsafeContextRead(_PROMPT_UTILITY_CONTEXT_VAR_NAME)
 
 
 def unset_prompt_utility_context():
@@ -120,4 +132,4 @@ def unset_prompt_utility_context():
         else:
             raise ValueError("No matching unset_prompt_utility_context() call.")
     except LookupError:
-        raise _UnsafeContextRead
+        raise _UnsafeContextRead(_PROMPT_UTILITY_CONTEXT_VAR_NAME)
diff --git a/src/humanloop/eval_utils/run.py b/src/humanloop/eval_utils/run.py
@@ -23,34 +23,20 @@
 from functools import partial
 from logging import INFO
 from typing import Callable, Dict, List, Literal, Optional, Sequence, Tuple, TypeVar, Union
-import warnings
 
 from humanloop import EvaluatorResponse, FlowResponse, PromptResponse, ToolResponse
 from humanloop.core.api_error import ApiError
-from humanloop.eval_utils.context import (
-    EvaluationContext,
-    get_evaluation_context,
-    get_prompt_utility_context,
-    in_prompt_utility_context,
-    log_belongs_to_evaluated_file,
-    set_evaluation_context,
-)
+from humanloop.context_variables import EvaluationContext, set_evaluation_context
 from humanloop.eval_utils.types import Dataset, Evaluator, EvaluatorCheck, File
 
 # We use TypedDicts for requests, which is consistent with the rest of the SDK
-from humanloop.evaluators.client import EvaluatorsClient
-from humanloop.flows.client import FlowsClient
-from humanloop.otel.constants import HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE, HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
-from humanloop.otel.helpers import write_to_opentelemetry_span
-from humanloop.prompts.client import PromptsClient
 from humanloop.requests import CodeEvaluatorRequestParams as CodeEvaluatorDict
 from humanloop.requests import ExternalEvaluatorRequestParams as ExternalEvaluator
 from humanloop.requests import FlowKernelRequestParams as FlowDict
 from humanloop.requests import HumanEvaluatorRequestParams as HumanEvaluatorDict
 from humanloop.requests import LlmEvaluatorRequestParams as LLMEvaluatorDict
 from humanloop.requests import PromptKernelRequestParams as PromptDict
 from humanloop.requests import ToolKernelRequestParams as ToolDict
-from humanloop.tools.client import ToolsClient
 from humanloop.types import BooleanEvaluatorStatsResponse as BooleanStats
 from humanloop.types import DatapointResponse as Datapoint
 from humanloop.types import EvaluationResponse, EvaluationStats
@@ -60,14 +46,9 @@
 from humanloop.types import NumericEvaluatorStatsResponse as NumericStats
 from humanloop.types import PromptKernelRequest as Prompt
 from humanloop.types import ToolKernelRequest as Tool
-from humanloop.types.create_evaluator_log_response import CreateEvaluatorLogResponse
-from humanloop.types.create_flow_log_response import CreateFlowLogResponse
-from humanloop.types.create_prompt_log_response import CreatePromptLogResponse
-from humanloop.types.create_tool_log_response import CreateToolLogResponse
 from humanloop.types.datapoint_response import DatapointResponse
 from humanloop.types.dataset_response import DatasetResponse
 from humanloop.types.evaluation_run_response import EvaluationRunResponse
-from humanloop.types.prompt_call_response import PromptCallResponse
 from humanloop.types.run_stats_response import RunStatsResponse
 from pydantic import ValidationError
 
@@ -97,9 +78,6 @@
 RESET = "\033[0m"
 
 
-CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, ToolsClient, FlowsClient, EvaluatorsClient)
-
-
 class HumanloopUtilityError(Exception):
     def __init__(self, message):
         self.message = message
@@ -108,101 +86,6 @@ def __str__(self):
         return self.message
 
 
-def prompt_call_evaluation_aware(client: PromptsClient) -> PromptsClient:
-    client._call = client.call
-
-    def _overload_call(self, **kwargs) -> PromptCallResponse:
-        if in_prompt_utility_context():
-            kwargs = {**kwargs, "save": False}
-
-            try:
-                response = self._call(**kwargs)
-                response = typing.cast(PromptCallResponse, response)
-            except Exception as e:
-                # TODO: Bug found in backend: not specifying a model 400s but creates a File
-                raise HumanloopUtilityError(message=str(e)) from e
-
-            response_copy = response.dict()
-            prompt_utility_context = get_prompt_utility_context()
-            for idx, _ in enumerate(response_copy.get("logs", [])):
-                del response_copy["logs"][idx]["created_at"]
-            for idx, _ in enumerate(response_copy["prompt"].get("environments", [])):
-                del response_copy["prompt"]["environments"][idx]["created_at"]
-            del response_copy["prompt"]["last_used_at"]
-            del response_copy["prompt"]["updated_at"]
-            del response_copy["prompt"]["created_at"]
-            del response_copy["start_time"]
-            del response_copy["end_time"]
-
-            with prompt_utility_context.tracer.start_as_current_span(HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME) as span:
-                write_to_opentelemetry_span(
-                    span=span,
-                    key=HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE,
-                    value=response_copy,
-                )
-            return response
-        else:
-            return self._call(**kwargs)
-
-    # Replace the original log method with the overloaded one
-    client.call = types.MethodType(_overload_call, client)
-    # Return the client with the overloaded log method
-    logger.debug("Overloaded the .log method of %s", client)
-    return client
-
-
-def log_with_evaluation_context(client: CLIENT_TYPE) -> CLIENT_TYPE:
-    """
-    Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT.
-
-    This makes the overloaded log actions be aware of whether the created Log is
-    part of an Evaluation (e.g. one started by eval_utils.run_eval).
-    """
-    # Copy the original log method in a hidden attribute
-    client._log = client.log
-
-    def _overload_log(
-        self, **kwargs
-    ) -> Union[
-        CreatePromptLogResponse,
-        CreateToolLogResponse,
-        CreateFlowLogResponse,
-        CreateEvaluatorLogResponse,
-    ]:
-        if log_belongs_to_evaluated_file(log_args=kwargs):
-            evaluation_context = get_evaluation_context()
-            for attribute in ["source_datapoint_id", "run_id"]:
-                if attribute not in kwargs or kwargs[attribute] is None:
-                    kwargs[attribute] = getattr(evaluation_context, attribute)
-
-            # Call the original .log method
-            logger.debug(
-                "Logging %s inside _overloaded_log on Thread %s",
-                kwargs,
-                evaluation_context,
-                threading.get_ident(),
-            )
-
-        try:
-            response = self._log(**kwargs)
-        except Exception as e:
-            logger.error(f"Failed to log: {e}")
-            raise e
-
-        # Notify the run_eval utility about one Log being created
-        if log_belongs_to_evaluated_file(log_args=kwargs):
-            evaluation_context = get_evaluation_context()
-            evaluation_context.upload_callback(log_id=response.id)
-
-        return response
-
-    # Replace the original log method with the overloaded one
-    client.log = types.MethodType(_overload_log, client)
-    # Return the client with the overloaded log method
-    logger.debug("Overloaded the .call method of %s", client)
-    return client
-
-
 def run_eval(
     client: "BaseHumanloop",
     file: File,
diff --git a/src/humanloop/otel/exporter.py b/src/humanloop/otel/exporter.py
@@ -11,7 +11,7 @@
 from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
 
 from humanloop.core import ApiError as HumanloopApiError
-from humanloop.eval_utils.context import (
+from humanloop.context_variables import (
     EvaluationContext,
     evaluation_context_set,
     get_evaluation_context,
diff --git a/src/humanloop/overload.py b/src/humanloop/overload.py
@@ -0,0 +1,120 @@
+import logging
+import threading
+import types
+from typing import TypeVar, Union
+import typing
+
+from humanloop.context_variables import (
+    get_evaluation_context,
+    get_prompt_utility_context,
+    in_prompt_utility_context,
+    log_belongs_to_evaluated_file,
+)
+from humanloop.eval_utils.run import HumanloopUtilityError
+from humanloop.flows.client import FlowsClient
+from humanloop.otel.constants import HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE, HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
+from humanloop.otel.helpers import write_to_opentelemetry_span
+from humanloop.prompts.client import PromptsClient
+from humanloop.types.create_evaluator_log_response import CreateEvaluatorLogResponse
+from humanloop.types.create_flow_log_response import CreateFlowLogResponse
+from humanloop.types.create_prompt_log_response import CreatePromptLogResponse
+from humanloop.types.create_tool_log_response import CreateToolLogResponse
+from humanloop.types.prompt_call_response import PromptCallResponse
+
+logger = logging.getLogger("humanloop.sdk")
+
+
+CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, FlowsClient)
+
+
+def overload_log(client: CLIENT_TYPE) -> CLIENT_TYPE:
+    """
+    Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT.
+
+    This makes the overloaded log actions be aware of whether the created Log is
+    part of an Evaluation (e.g. one started by eval_utils.run_eval).
+    """
+    # Copy the original log method in a hidden attribute
+    client._log = client.log
+
+    def _overload_log(
+        self, **kwargs
+    ) -> Union[
+        CreatePromptLogResponse,
+        CreateToolLogResponse,
+        CreateFlowLogResponse,
+        CreateEvaluatorLogResponse,
+    ]:
+        if log_belongs_to_evaluated_file(log_args=kwargs):
+            evaluation_context = get_evaluation_context()
+            for attribute in ["source_datapoint_id", "run_id"]:
+                if attribute not in kwargs or kwargs[attribute] is None:
+                    kwargs[attribute] = getattr(evaluation_context, attribute)
+
+            # Call the original .log method
+            logger.debug(
+                "Logging %s inside _overloaded_log on Thread %s",
+                kwargs,
+                evaluation_context,
+                threading.get_ident(),
+            )
+
+        try:
+            response = self._log(**kwargs)
+        except Exception as e:
+            logger.error(f"Failed to log: {e}")
+            raise e
+
+        # Notify the run_eval utility about one Log being created
+        if log_belongs_to_evaluated_file(log_args=kwargs):
+            evaluation_context = get_evaluation_context()
+            evaluation_context.upload_callback(log_id=response.id)
+
+        return response
+
+    # Replace the original log method with the overloaded one
+    client.log = types.MethodType(_overload_log, client)
+    # Return the client with the overloaded log method
+    logger.debug("Overloaded the .call method of %s", client)
+    return client
+
+
+def overload_prompt_call(client: PromptsClient) -> PromptsClient:
+    client._call = client.call
+
+    def _overload_call(self, **kwargs) -> PromptCallResponse:
+        if in_prompt_utility_context():
+            try:
+                response = self._call(**kwargs)
+                response = typing.cast(PromptCallResponse, response)
+            except Exception as e:
+                # TODO: Bug found in backend: not specifying a model 400s but creates a File
+                raise HumanloopUtilityError(message=str(e)) from e
+
+            response_copy = response.dict()
+            prompt_utility_context = get_prompt_utility_context()
+            for idx, _ in enumerate(response_copy.get("logs", [])):
+                del response_copy["logs"][idx]["created_at"]
+            for idx, _ in enumerate(response_copy["prompt"].get("environments", [])):
+                del response_copy["prompt"]["environments"][idx]["created_at"]
+            del response_copy["prompt"]["last_used_at"]
+            del response_copy["prompt"]["updated_at"]
+            del response_copy["prompt"]["created_at"]
+            del response_copy["start_time"]
+            del response_copy["end_time"]
+
+            with prompt_utility_context.tracer.start_as_current_span(HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME) as span:
+                write_to_opentelemetry_span(
+                    span=span,
+                    key=HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE,
+                    value=response_copy,
+                )
+            return response
+        else:
+            return self._call(**kwargs)
+
+    # Replace the original log method with the overloaded one
+    client.call = types.MethodType(_overload_call, client)
+    # Return the client with the overloaded log method
+    logger.debug("Overloaded the .log method of %s", client)
+    return client
diff --git a/src/humanloop/utilities/prompt.py b/src/humanloop/utilities/prompt.py
@@ -6,7 +6,7 @@
 from opentelemetry.trace import Tracer
 from typing_extensions import Unpack
 
-from humanloop.eval_utils.context import set_prompt_utility_context, unset_prompt_utility_context
+from humanloop.context_variables import set_prompt_utility_context, unset_prompt_utility_context
 from humanloop.eval_utils.run import HumanloopUtilityError
 from humanloop.utilities.helpers import bind_args
 from humanloop.utilities.types import DecoratorPromptKernelRequestParams
@@ -60,6 +60,8 @@ def wrapper(*args: Sequence[Any], **kwargs: Mapping[str, Any]) -> Any:
                 except Exception as e:
                     logger.error(f"Error calling {func.__name__}: {e}")
                     output = None
+                    # TODO: output ought to be None on errors, check
+                    # all decorators
                     output_stringified = jsonify_if_not_string(
                         func=func,
                         output=output,