humanloop
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 2 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎poetry.lock‎
Lines changed: 1223 additions & 1260 deletions b/‎poetry.lock‎
Lines changed: 1223 additions & 1260 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 6 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/humanloop/client.py‎
Lines changed: 7 additions & 28 deletions b/‎src/humanloop/client.py‎
Lines changed: 7 additions & 28 deletions
diff --git a/‎src/humanloop/eval_utils/context.py‎
Lines changed: 100 additions & 3 deletions b/‎src/humanloop/eval_utils/context.py‎
Lines changed: 100 additions & 3 deletions
@@ -6,7 +6,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repo
         uses: actions/checkout@v3
@@ -25,7 +25,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repo
         uses: actions/checkout@v3
@@ -47,6 +47,7 @@ jobs:
           REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }}
           GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
           COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
+          HUMANLOOP_API_KEY: ${{ secrets.HUMANLOOP_API_KEY }}
 
   publish:
     needs: [compile, test]
 
@@ -5,3 +5,5 @@ poetry.toml
 .ruff_cache/
 .vscode
 .env
+tests/assets/*.jsonl
+tests/assets/*.parquet
@@ -49,7 +49,8 @@ parse = ">=1"
 pydantic = ">= 1.9.2"
 pydantic-core = "^2.18.2"
 typing_extensions = ">= 4.0.0"
-chromadb = "<0.3.7"
+deepdiff = {extras = ["murmur"], version = "^8.2.0"}
+mmh3 = "^5.1.0"
 
 [tool.poetry.dev-dependencies]
 mypy = "1.0.1"
@@ -68,9 +69,11 @@ python-dotenv = "^1.0.1"
 replicate = "^1.0.3"
 ruff = "^0.5.6"
 types-jsonschema = "^4.23.0.20240813"
-chromadb="<0.3.5"
-pandas = "<2.2.0"
+onnxruntime = "<=1.19.2"
+chromadb = "^0.6.3"
+pandas = "^2.2.0"
 pyarrow = "^19.0.0"
+numpy = "<2.0.0"
 
 [tool.pytest.ini_options]
 testpaths = [ "tests" ]
 
@@ -1,4 +1,3 @@
-from contextvars import ContextVar
 import os
 import typing
 from typing import List, Optional, Sequence
@@ -10,8 +9,8 @@
 from opentelemetry.trace import Tracer
 
 from humanloop.core.client_wrapper import SyncClientWrapper
+from humanloop.eval_utils.run import prompt_call_evaluation_aware
 from humanloop.utilities.types import DecoratorPromptKernelRequestParams
-from humanloop.eval_utils.context import EVALUATION_CONTEXT_VARIABLE_NAME, EvaluationContext
 
 from humanloop.eval_utils import log_with_evaluation_context, run_eval
 from humanloop.eval_utils.types import Dataset, Evaluator, EvaluatorCheck, File
@@ -38,10 +37,8 @@ def __init__(
         self,
         *,
         client_wrapper: SyncClientWrapper,
-        evaluation_context_variable: ContextVar[Optional[EvaluationContext]],
     ):
         super().__init__(client_wrapper=client_wrapper)
-        self._evaluation_context_variable = evaluation_context_variable
 
     def run(
         self,
@@ -70,7 +67,6 @@ def run(
             dataset=dataset,
             evaluators=evaluators,
             workers=workers,
-            evaluation_context_variable=self._evaluation_context_variable,
         )
 
 
@@ -118,31 +114,15 @@ def __init__(
             httpx_client=httpx_client,
         )
 
-        self.evaluation_context_variable: ContextVar[Optional[EvaluationContext]] = ContextVar(
-            EVALUATION_CONTEXT_VARIABLE_NAME
-        )
-
-        eval_client = ExtendedEvalsClient(
-            client_wrapper=self._client_wrapper,
-            evaluation_context_variable=self.evaluation_context_variable,
-        )
+        eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
         eval_client.client = self
         self.evaluations = eval_client
         self.prompts = ExtendedPromptsClient(client_wrapper=self._client_wrapper)
 
         # Overload the .log method of the clients to be aware of Evaluation Context
-        # TODO: Overload the log for Evaluators and Tools once run_id is added
-        # to them.
-        self.prompts = log_with_evaluation_context(
-            client=self.prompts,
-            evaluation_context_variable=self.evaluation_context_variable,
-        )
-        # self.evaluators = log_with_evaluation_context(client=self.evaluators)
-        # self.tools = log_with_evaluation_context(client=self.tools)
-        self.flows = log_with_evaluation_context(
-            client=self.flows,
-            evaluation_context_variable=self.evaluation_context_variable,
-        )
+        self.prompts = log_with_evaluation_context(client=self.prompts)
+        self.prompts = prompt_call_evaluation_aware(client=self.prompts)
+        self.flows = log_with_evaluation_context(client=self.flows)
 
         if opentelemetry_tracer_provider is not None:
             self._tracer_provider = opentelemetry_tracer_provider
@@ -157,9 +137,8 @@ def __init__(
         instrument_provider(provider=self._tracer_provider)
         self._tracer_provider.add_span_processor(
             HumanloopSpanProcessor(
-                exporter=HumanloopSpanExporter(
-                    client=self,
-                )
+                client=self,
+                exporter=HumanloopSpanExporter(client=self),
             ),
         )
 
 
@@ -1,7 +1,11 @@
-from typing import Callable, TypedDict
+from contextvars import ContextVar
+from dataclasses import dataclass
+from typing import Any, Callable
+from opentelemetry.trace import Tracer
 
 
-class EvaluationContext(TypedDict):
+@dataclass
+class EvaluationContext:
     """Context Log to Humanloop.
 
     Per datapoint state that is set when an Evaluation is ran.
@@ -23,4 +27,97 @@ class EvaluationContext(TypedDict):
     run_id: str
 
 
-EVALUATION_CONTEXT_VARIABLE_NAME = "__EVALUATION_CONTEXT"
+_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar("__EVALUATION_CONTEXT")
+
+_UnsafeContextRead = RuntimeError("Attempting to read from thread Context when variable was not set.")
+
+
+def set_evaluation_context(context: EvaluationContext):
+    _EVALUATION_CONTEXT_VAR.set(context)
+
+
+def get_evaluation_context() -> EvaluationContext:
+    try:
+        return _EVALUATION_CONTEXT_VAR.get()
+    except LookupError:
+        raise _UnsafeContextRead
+
+
+def evaluation_context_set() -> bool:
+    try:
+        _EVALUATION_CONTEXT_VAR.get()
+        return True
+    except LookupError:
+        return False
+
+
+def log_belongs_to_evaluated_file(log_args: dict[str, Any]) -> bool:
+    try:
+        evaluation_context: EvaluationContext = _EVALUATION_CONTEXT_VAR.get()
+        return evaluation_context.file_id == log_args.get("id") or evaluation_context.path == log_args.get("path")
+    except LookupError:
+        # Not in an evaluation context
+        return False
+
+
+def is_evaluated_file(file_path) -> bool:
+    try:
+        evaluation_context = _EVALUATION_CONTEXT_VAR.get()
+        return evaluation_context.path == file_path
+    except LookupError:
+        raise _UnsafeContextRead
+
+
+@dataclass
+class PromptUtilityContext:
+    tracer: Tracer
+    _in_prompt_utility: int
+
+    @property
+    def in_prompt_utility(self) -> bool:
+        return self._in_prompt_utility > 0
+
+
+_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar("__PROMPT_UTILITY_CONTEXT")
+
+
+def in_prompt_utility_context() -> bool:
+    try:
+        return _PROMPT_UTILITY_CONTEXT_VAR.get().in_prompt_utility
+    except LookupError:
+        return False
+
+
+def set_prompt_utility_context(tracer: Tracer):
+    global _PROMPT_UTILITY_CONTEXT_VAR
+    try:
+        prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
+        # Already set, push another context
+        prompt_utility_context._in_prompt_utility += 1
+        _PROMPT_UTILITY_CONTEXT_VAR.set(prompt_utility_context)
+    except LookupError:
+        _PROMPT_UTILITY_CONTEXT_VAR.set(
+            PromptUtilityContext(
+                tracer=tracer,
+                _in_prompt_utility=1,
+            )
+        )
+
+
+def get_prompt_utility_context() -> PromptUtilityContext:
+    try:
+        return _PROMPT_UTILITY_CONTEXT_VAR.get()
+    except LookupError:
+        raise _UnsafeContextRead
+
+
+def unset_prompt_utility_context():
+    global _PROMPT_UTILITY_CONTEXT_VAR_TOKEN
+    try:
+        prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
+        if prompt_utility_context._in_prompt_utility >= 1:
+            prompt_utility_context._in_prompt_utility -= 1
+        else:
+            raise ValueError("No matching unset_prompt_utility_context() call.")
+    except LookupError:
+        raise _UnsafeContextRead