humanloop
diff --git a/‎poetry.lock‎
Lines changed: 35 additions & 2 deletions b/‎poetry.lock‎
Lines changed: 35 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/humanloop/client.py‎
Lines changed: 3 additions & 0 deletions b/‎src/humanloop/client.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/humanloop/eval_utils/context.py‎
Lines changed: 60 additions & 6 deletions b/‎src/humanloop/eval_utils/context.py‎
Lines changed: 60 additions & 6 deletions
diff --git a/‎src/humanloop/eval_utils/run.py‎
Lines changed: 47 additions & 1 deletion b/‎src/humanloop/eval_utils/run.py‎
Lines changed: 47 additions & 1 deletion
diff --git a/‎src/humanloop/otel/constants.py‎
Lines changed: 2 additions & 0 deletions b/‎src/humanloop/otel/constants.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/humanloop/otel/exporter.py‎
Lines changed: 0 additions & 1 deletion b/‎src/humanloop/otel/exporter.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/humanloop/otel/helpers.py‎
Lines changed: 6 additions & 0 deletions b/‎src/humanloop/otel/helpers.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/humanloop/otel/processor/__init__.py‎
Lines changed: 13 additions & 1 deletion b/‎src/humanloop/otel/processor/__init__.py‎
Lines changed: 13 additions & 1 deletion
@@ -49,6 +49,8 @@ parse = ">=1"
 pydantic = ">= 1.9.2"
 pydantic-core = "^2.18.2"
 typing_extensions = ">= 4.0.0"
+deepdiff = {extras = ["murmur"], version = "^8.2.0"}
+mmh3 = "^5.1.0"
 
 [tool.poetry.dev-dependencies]
 mypy = "1.0.1"
 
@@ -9,6 +9,7 @@
 from opentelemetry.trace import Tracer
 
 from humanloop.core.client_wrapper import SyncClientWrapper
+from humanloop.eval_utils.run import prompt_call_evaluation_aware
 from humanloop.utilities.types import DecoratorPromptKernelRequestParams
 
 from humanloop.eval_utils import log_with_evaluation_context, run_eval
@@ -120,6 +121,7 @@ def __init__(
 
         # Overload the .log method of the clients to be aware of Evaluation Context
         self.prompts = log_with_evaluation_context(client=self.prompts)
+        self.prompts = prompt_call_evaluation_aware(client=self.prompts)
         self.flows = log_with_evaluation_context(client=self.flows)
 
         if opentelemetry_tracer_provider is not None:
@@ -135,6 +137,7 @@ def __init__(
         instrument_provider(provider=self._tracer_provider)
         self._tracer_provider.add_span_processor(
             HumanloopSpanProcessor(
+                client=self,
                 exporter=HumanloopSpanExporter(client=self),
             ),
         )
 
@@ -1,6 +1,7 @@
 from contextvars import ContextVar
 from dataclasses import dataclass
 from typing import Any, Callable
+from opentelemetry.trace import Tracer
 
 
 @dataclass
@@ -26,11 +27,9 @@ class EvaluationContext:
     run_id: str
 
 
-EVALUATION_CONTEXT_VARIABLE_NAME = "__EVALUATION_CONTEXT"
+_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar("__EVALUATION_CONTEXT")
 
-_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar(EVALUATION_CONTEXT_VARIABLE_NAME)
-
-_UnsafeEvaluationContextRead = RuntimeError("EvaluationContext not set in the current thread.")
+_UnsafeContextRead = RuntimeError("Attempting to read from thread Context when variable was not set.")
 
 
 def set_evaluation_context(context: EvaluationContext):
@@ -41,7 +40,7 @@ def get_evaluation_context() -> EvaluationContext:
     try:
         return _EVALUATION_CONTEXT_VAR.get()
     except LookupError:
-        raise _UnsafeEvaluationContextRead
+        raise _UnsafeContextRead
 
 
 def evaluation_context_set() -> bool:
@@ -66,4 +65,59 @@ def is_evaluated_file(file_path) -> bool:
         evaluation_context = _EVALUATION_CONTEXT_VAR.get()
         return evaluation_context.path == file_path
     except LookupError:
-        raise _UnsafeEvaluationContextRead
+        raise _UnsafeContextRead
+
+
+@dataclass
+class PromptUtilityContext:
+    tracer: Tracer
+    _in_prompt_utility: int
+
+    @property
+    def in_prompt_utility(self) -> bool:
+        return self._in_prompt_utility > 0
+
+
+_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar("__PROMPT_UTILITY_CONTEXT")
+
+
+def in_prompt_utility_context() -> bool:
+    try:
+        return _PROMPT_UTILITY_CONTEXT_VAR.get().in_prompt_utility
+    except LookupError:
+        return False
+
+
+def set_prompt_utility_context(tracer: Tracer):
+    global _PROMPT_UTILITY_CONTEXT_VAR
+    try:
+        prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
+        # Already set, push another context
+        prompt_utility_context._in_prompt_utility += 1
+        _PROMPT_UTILITY_CONTEXT_VAR.set(prompt_utility_context)
+    except LookupError:
+        _PROMPT_UTILITY_CONTEXT_VAR.set(
+            PromptUtilityContext(
+                tracer=tracer,
+                _in_prompt_utility=1,
+            )
+        )
+
+
+def get_prompt_utility_context() -> PromptUtilityContext:
+    try:
+        return _PROMPT_UTILITY_CONTEXT_VAR.get()
+    except LookupError:
+        raise _UnsafeContextRead
+
+
+def unset_prompt_utility_context():
+    global _PROMPT_UTILITY_CONTEXT_VAR_TOKEN
+    try:
+        prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
+        if prompt_utility_context._in_prompt_utility >= 1:
+            prompt_utility_context._in_prompt_utility -= 1
+        else:
+            raise ValueError("No matching unset_prompt_utility_context() call.")
+    except LookupError:
+        raise _UnsafeContextRead
@@ -29,6 +29,8 @@
 from humanloop.eval_utils.context import (
     EvaluationContext,
     get_evaluation_context,
+    get_prompt_utility_context,
+    in_prompt_utility_context,
     log_belongs_to_evaluated_file,
     set_evaluation_context,
 )
@@ -37,6 +39,8 @@
 # We use TypedDicts for requests, which is consistent with the rest of the SDK
 from humanloop.evaluators.client import EvaluatorsClient
 from humanloop.flows.client import FlowsClient
+from humanloop.otel.constants import HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE, HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
+from humanloop.otel.helpers import write_to_opentelemetry_span
 from humanloop.prompts.client import PromptsClient
 from humanloop.requests import CodeEvaluatorRequestParams as CodeEvaluatorDict
 from humanloop.requests import ExternalEvaluatorRequestParams as ExternalEvaluator
@@ -62,6 +66,7 @@
 from humanloop.types.datapoint_response import DatapointResponse
 from humanloop.types.dataset_response import DatasetResponse
 from humanloop.types.evaluation_run_response import EvaluationRunResponse
+from humanloop.types.prompt_call_response import PromptCallResponse
 from humanloop.types.run_stats_response import RunStatsResponse
 from pydantic import ValidationError
 
@@ -94,6 +99,47 @@
 CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, ToolsClient, FlowsClient, EvaluatorsClient)
 
 
+class HumanloopUtilitySyntaxError(Exception):
+    def __init__(self, message):
+        self.message = message
+
+    def __str__(self):
+        return self.message
+
+
+def prompt_call_evaluation_aware(client: PromptsClient) -> PromptsClient:
+    client._call = client.call
+
+    def _overload_call(self, **kwargs) -> PromptCallResponse:
+        if in_prompt_utility_context():
+            kwargs = {**kwargs, "save": False}
+
+            try:
+                response = self._call(**kwargs)
+                response = typing.cast(PromptCallResponse, response)
+            except Exception as e:
+                # TODO: Bug found in backend: not specifying a model 400s but creates a File
+                raise HumanloopUtilitySyntaxError(message=str(e)) from e
+
+            prompt_utility_context = get_prompt_utility_context()
+
+            with prompt_utility_context.tracer.start_as_current_span(HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME) as span:
+                write_to_opentelemetry_span(
+                    span=span,
+                    key=HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE,
+                    value=response.dict(),
+                )
+            return response
+        else:
+            return self._call(kwargs)
+
+    # Replace the original log method with the overloaded one
+    client.call = types.MethodType(_overload_call, client)
+    # Return the client with the overloaded log method
+    logger.debug("Overloaded the .log method of %s", client)
+    return client
+
+
 def log_with_evaluation_context(client: CLIENT_TYPE) -> CLIENT_TYPE:
     """
     Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT.
@@ -142,7 +188,7 @@ def _overload_log(
     # Replace the original log method with the overloaded one
     client.log = types.MethodType(_overload_log, client)
     # Return the client with the overloaded log method
-    logger.debug("Overloaded the .log method of %s", client)
+    logger.debug("Overloaded the .call method of %s", client)
     return client
 
 
 
@@ -6,3 +6,5 @@
 HUMANLOOP_PATH_KEY = "humanloop.file.path"
 # Required for the exporter to know when to mark the Flow Log as complete
 HUMANLOOP_FLOW_PREREQUISITES_KEY = "humanloop.flow.prerequisites"
+HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME = "humanloop_intercepted_hl_call"
+HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE = "intercepted_call_response"
@@ -186,7 +186,6 @@ def _do_work(self):
 
     def _export_span_dispatch(self, span: ReadableSpan) -> None:
         """Call the appropriate BaseHumanloop.X.log based on the Span type."""
-        hl_file = read_from_opentelemetry_span(span, key=HUMANLOOP_FILE_KEY)
         file_type = span._attributes.get(HUMANLOOP_FILE_TYPE_KEY)  # type: ignore
         parent_span_id = span.parent.span_id if span.parent else None
 
 
@@ -5,6 +5,8 @@
 from opentelemetry.trace import SpanKind
 from opentelemetry.util.types import AttributeValue
 
+from humanloop.otel.constants import HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
+
 NestedDict = dict[str, Union["NestedDict", AttributeValue]]
 NestedList = list[Union["NestedList", NestedDict]]
 
@@ -262,6 +264,10 @@ def is_llm_provider_call(span: ReadableSpan) -> bool:
     )
 
 
+def is_intercepted_call(span: ReadableSpan) -> bool:
+    return span.name == HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
+
+
 def is_humanloop_span(span: ReadableSpan) -> bool:
     """Check if the Span was created by the Humanloop SDK."""
     return span.name.startswith("humanloop.")
 
@@ -2,13 +2,16 @@
 import logging
 from collections import defaultdict
 from typing import Optional
+import typing
 
 from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor, SpanExporter
 
+from humanloop.base_client import BaseHumanloop
 from humanloop.otel.constants import (
     HUMANLOOP_FILE_TYPE_KEY,
     HUMANLOOP_FLOW_PREREQUISITES_KEY,
+    HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME,
     HUMANLOOP_LOG_KEY,
 )
 from humanloop.otel.helpers import (
@@ -18,6 +21,10 @@
 )
 from humanloop.otel.processor.prompts import enhance_prompt_span
 
+if typing.TYPE_CHECKING:
+    from humanloop.base_client import BaseHumanloop
+
+
 logger = logging.getLogger("humanloop.sdk")
 
 
@@ -49,6 +56,7 @@ class HumanloopSpanProcessor(SimpleSpanProcessor):
     def __init__(
         self,
         exporter: SpanExporter,
+        client: "BaseHumanloop",
     ) -> None:
         super().__init__(exporter)
         # span parent to span children map
@@ -58,6 +66,7 @@ def __init__(
         # They are passed to the Exporter as a span attribute
         # so the Exporter knows when to complete a trace
         self._spans_to_complete_flow_trace: dict[int, list[int]] = {}
+        self._client = client
 
     def shutdown(self):
         return super().shutdown()
@@ -172,6 +181,7 @@ def _send_to_exporter(
             span_id = span.context.span_id
             if file_type == "prompt":
                 enhance_prompt_span(
+                    client=self._client,
                     prompt_span=span,
                     dependencies=dependencies,
                 )
@@ -209,7 +219,9 @@ def _is_dependency(cls, span: ReadableSpan) -> bool:
         # At the moment we only enrich Spans created by the Prompt decorators
         # As we add Instrumentors for other libraries, this function must
         # be expanded
-        return span.parent is not None and is_llm_provider_call(span=span)
+        return span.parent is not None and (
+            is_llm_provider_call(span=span) or span.name == HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
+        )
 
     @classmethod
     def _write_start_end_times(cls, span: ReadableSpan):