Skip to content

Commit 2492bb0

Browse files
author
Andrei Bratu
authored
Feature/eng 1574 eval_run utility regressions (#44)
* Use prompt.call inside prompt utility * Fix flow log completions in eval_run context * Refactored code in processor/ exporter
1 parent edce541 commit 2492bb0

File tree

31 files changed

+3424
-1709
lines changed

31 files changed

+3424
-1709
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
runs-on: ubuntu-20.04
77
strategy:
88
matrix:
9-
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
9+
python-version: ["3.9", "3.10", "3.11"]
1010
steps:
1111
- name: Checkout repo
1212
uses: actions/checkout@v3
@@ -25,7 +25,7 @@ jobs:
2525
runs-on: ubuntu-20.04
2626
strategy:
2727
matrix:
28-
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
28+
python-version: ["3.9", "3.10", "3.11"]
2929
steps:
3030
- name: Checkout repo
3131
uses: actions/checkout@v3
@@ -47,6 +47,7 @@ jobs:
4747
REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }}
4848
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
4949
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
50+
HUMANLOOP_API_KEY: ${{ secrets.HUMANLOOP_API_KEY }}
5051

5152
publish:
5253
needs: [compile, test]

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ poetry.toml
55
.ruff_cache/
66
.vscode
77
.env
8+
tests/assets/*.jsonl
9+
tests/assets/*.parquet

poetry.lock

Lines changed: 1223 additions & 1260 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ parse = ">=1"
4949
pydantic = ">= 1.9.2"
5050
pydantic-core = "^2.18.2"
5151
typing_extensions = ">= 4.0.0"
52-
chromadb = "<0.3.7"
52+
deepdiff = {extras = ["murmur"], version = "^8.2.0"}
53+
mmh3 = "^5.1.0"
5354

5455
[tool.poetry.dev-dependencies]
5556
mypy = "1.0.1"
@@ -68,9 +69,11 @@ python-dotenv = "^1.0.1"
6869
replicate = "^1.0.3"
6970
ruff = "^0.5.6"
7071
types-jsonschema = "^4.23.0.20240813"
71-
chromadb="<0.3.5"
72-
pandas = "<2.2.0"
72+
onnxruntime = "<=1.19.2"
73+
chromadb = "^0.6.3"
74+
pandas = "^2.2.0"
7375
pyarrow = "^19.0.0"
76+
numpy = "<2.0.0"
7477

7578
[tool.pytest.ini_options]
7679
testpaths = [ "tests" ]

src/humanloop/client.py

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from contextvars import ContextVar
21
import os
32
import typing
43
from typing import List, Optional, Sequence
@@ -10,8 +9,8 @@
109
from opentelemetry.trace import Tracer
1110

1211
from humanloop.core.client_wrapper import SyncClientWrapper
12+
from humanloop.eval_utils.run import prompt_call_evaluation_aware
1313
from humanloop.utilities.types import DecoratorPromptKernelRequestParams
14-
from humanloop.eval_utils.context import EVALUATION_CONTEXT_VARIABLE_NAME, EvaluationContext
1514

1615
from humanloop.eval_utils import log_with_evaluation_context, run_eval
1716
from humanloop.eval_utils.types import Dataset, Evaluator, EvaluatorCheck, File
@@ -38,10 +37,8 @@ def __init__(
3837
self,
3938
*,
4039
client_wrapper: SyncClientWrapper,
41-
evaluation_context_variable: ContextVar[Optional[EvaluationContext]],
4240
):
4341
super().__init__(client_wrapper=client_wrapper)
44-
self._evaluation_context_variable = evaluation_context_variable
4542

4643
def run(
4744
self,
@@ -70,7 +67,6 @@ def run(
7067
dataset=dataset,
7168
evaluators=evaluators,
7269
workers=workers,
73-
evaluation_context_variable=self._evaluation_context_variable,
7470
)
7571

7672

@@ -118,31 +114,15 @@ def __init__(
118114
httpx_client=httpx_client,
119115
)
120116

121-
self.evaluation_context_variable: ContextVar[Optional[EvaluationContext]] = ContextVar(
122-
EVALUATION_CONTEXT_VARIABLE_NAME
123-
)
124-
125-
eval_client = ExtendedEvalsClient(
126-
client_wrapper=self._client_wrapper,
127-
evaluation_context_variable=self.evaluation_context_variable,
128-
)
117+
eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
129118
eval_client.client = self
130119
self.evaluations = eval_client
131120
self.prompts = ExtendedPromptsClient(client_wrapper=self._client_wrapper)
132121

133122
# Overload the .log method of the clients to be aware of Evaluation Context
134-
# TODO: Overload the log for Evaluators and Tools once run_id is added
135-
# to them.
136-
self.prompts = log_with_evaluation_context(
137-
client=self.prompts,
138-
evaluation_context_variable=self.evaluation_context_variable,
139-
)
140-
# self.evaluators = log_with_evaluation_context(client=self.evaluators)
141-
# self.tools = log_with_evaluation_context(client=self.tools)
142-
self.flows = log_with_evaluation_context(
143-
client=self.flows,
144-
evaluation_context_variable=self.evaluation_context_variable,
145-
)
123+
self.prompts = log_with_evaluation_context(client=self.prompts)
124+
self.prompts = prompt_call_evaluation_aware(client=self.prompts)
125+
self.flows = log_with_evaluation_context(client=self.flows)
146126

147127
if opentelemetry_tracer_provider is not None:
148128
self._tracer_provider = opentelemetry_tracer_provider
@@ -157,9 +137,8 @@ def __init__(
157137
instrument_provider(provider=self._tracer_provider)
158138
self._tracer_provider.add_span_processor(
159139
HumanloopSpanProcessor(
160-
exporter=HumanloopSpanExporter(
161-
client=self,
162-
)
140+
client=self,
141+
exporter=HumanloopSpanExporter(client=self),
163142
),
164143
)
165144

src/humanloop/eval_utils/context.py

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1-
from typing import Callable, TypedDict
1+
from contextvars import ContextVar
2+
from dataclasses import dataclass
3+
from typing import Any, Callable
4+
from opentelemetry.trace import Tracer
25

36

4-
class EvaluationContext(TypedDict):
7+
@dataclass
8+
class EvaluationContext:
59
"""Context Log to Humanloop.
610
711
Per datapoint state that is set when an Evaluation is ran.
@@ -23,4 +27,97 @@ class EvaluationContext(TypedDict):
2327
run_id: str
2428

2529

26-
EVALUATION_CONTEXT_VARIABLE_NAME = "__EVALUATION_CONTEXT"
30+
_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar("__EVALUATION_CONTEXT")
31+
32+
_UnsafeContextRead = RuntimeError("Attempting to read from thread Context when variable was not set.")
33+
34+
35+
def set_evaluation_context(context: EvaluationContext):
36+
_EVALUATION_CONTEXT_VAR.set(context)
37+
38+
39+
def get_evaluation_context() -> EvaluationContext:
40+
try:
41+
return _EVALUATION_CONTEXT_VAR.get()
42+
except LookupError:
43+
raise _UnsafeContextRead
44+
45+
46+
def evaluation_context_set() -> bool:
47+
try:
48+
_EVALUATION_CONTEXT_VAR.get()
49+
return True
50+
except LookupError:
51+
return False
52+
53+
54+
def log_belongs_to_evaluated_file(log_args: dict[str, Any]) -> bool:
55+
try:
56+
evaluation_context: EvaluationContext = _EVALUATION_CONTEXT_VAR.get()
57+
return evaluation_context.file_id == log_args.get("id") or evaluation_context.path == log_args.get("path")
58+
except LookupError:
59+
# Not in an evaluation context
60+
return False
61+
62+
63+
def is_evaluated_file(file_path) -> bool:
64+
try:
65+
evaluation_context = _EVALUATION_CONTEXT_VAR.get()
66+
return evaluation_context.path == file_path
67+
except LookupError:
68+
raise _UnsafeContextRead
69+
70+
71+
@dataclass
72+
class PromptUtilityContext:
73+
tracer: Tracer
74+
_in_prompt_utility: int
75+
76+
@property
77+
def in_prompt_utility(self) -> bool:
78+
return self._in_prompt_utility > 0
79+
80+
81+
_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar("__PROMPT_UTILITY_CONTEXT")
82+
83+
84+
def in_prompt_utility_context() -> bool:
85+
try:
86+
return _PROMPT_UTILITY_CONTEXT_VAR.get().in_prompt_utility
87+
except LookupError:
88+
return False
89+
90+
91+
def set_prompt_utility_context(tracer: Tracer):
92+
global _PROMPT_UTILITY_CONTEXT_VAR
93+
try:
94+
prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
95+
# Already set, push another context
96+
prompt_utility_context._in_prompt_utility += 1
97+
_PROMPT_UTILITY_CONTEXT_VAR.set(prompt_utility_context)
98+
except LookupError:
99+
_PROMPT_UTILITY_CONTEXT_VAR.set(
100+
PromptUtilityContext(
101+
tracer=tracer,
102+
_in_prompt_utility=1,
103+
)
104+
)
105+
106+
107+
def get_prompt_utility_context() -> PromptUtilityContext:
108+
try:
109+
return _PROMPT_UTILITY_CONTEXT_VAR.get()
110+
except LookupError:
111+
raise _UnsafeContextRead
112+
113+
114+
def unset_prompt_utility_context():
115+
global _PROMPT_UTILITY_CONTEXT_VAR_TOKEN
116+
try:
117+
prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
118+
if prompt_utility_context._in_prompt_utility >= 1:
119+
prompt_utility_context._in_prompt_utility -= 1
120+
else:
121+
raise ValueError("No matching unset_prompt_utility_context() call.")
122+
except LookupError:
123+
raise _UnsafeContextRead

0 commit comments

Comments
 (0)