Skip to content

Commit ce96aaf

Browse files
author
Andrei Bratu
committed
Refactoring code
1 parent c2fb326 commit ce96aaf

File tree

6 files changed

+148
-129
lines changed

6 files changed

+148
-129
lines changed

.fernignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
src/humanloop/eval_utils
44
src/humanloop/prompt_utils.py
55
src/humanloop/client.py
6+
src/humanloop/overload.py
7+
src/humanloop/context_variables.py
68
mypy.ini
79
README.md
810

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@
44
from opentelemetry.trace import Tracer
55

66

7+
_UnsafeContextRead = RuntimeError("Attempting to read from a Context when variable was not set.")
8+
9+
10+
class _UnsafeContextRead(RuntimeError):
11+
message: str
12+
13+
def __init__(self, context_variable_name: str):
14+
super().__init__(
15+
message=f"Attempting to read from a Context when variable {context_variable_name} was not set."
16+
)
17+
18+
719
@dataclass
820
class EvaluationContext:
921
"""Context Log to Humanloop.
@@ -27,9 +39,8 @@ class EvaluationContext:
2739
run_id: str
2840

2941

30-
_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar("__EVALUATION_CONTEXT")
31-
32-
_UnsafeContextRead = RuntimeError("Attempting to read from thread Context when variable was not set.")
42+
_EVALUATION_CONTEXT_VAR_NAME = "__EVALUATION_CONTEXT"
43+
_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar(_EVALUATION_CONTEXT_VAR_NAME)
3344

3445

3546
def set_evaluation_context(context: EvaluationContext):
@@ -40,7 +51,7 @@ def get_evaluation_context() -> EvaluationContext:
4051
try:
4152
return _EVALUATION_CONTEXT_VAR.get()
4253
except LookupError:
43-
raise _UnsafeContextRead
54+
raise _UnsafeContextRead(_EVALUATION_CONTEXT_VAR_NAME)
4455

4556

4657
def evaluation_context_set() -> bool:
@@ -60,12 +71,12 @@ def log_belongs_to_evaluated_file(log_args: dict[str, Any]) -> bool:
6071
return False
6172

6273

63-
def is_evaluated_file(file_path) -> bool:
74+
def is_evaluated_file(file_path: str) -> bool:
6475
try:
6576
evaluation_context = _EVALUATION_CONTEXT_VAR.get()
6677
return evaluation_context.path == file_path
6778
except LookupError:
68-
raise _UnsafeContextRead
79+
raise _UnsafeContextRead(_EVALUATION_CONTEXT_VAR_NAME)
6980

7081

7182
@dataclass
@@ -78,7 +89,8 @@ def in_prompt_utility(self) -> bool:
7889
return self._in_prompt_utility > 0
7990

8091

81-
_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar("__PROMPT_UTILITY_CONTEXT")
92+
_PROMPT_UTILITY_CONTEXT_VAR_NAME = "__PROMPT_UTILITY_CONTEXT"
93+
_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar(_PROMPT_UTILITY_CONTEXT_VAR_NAME)
8294

8395

8496
def in_prompt_utility_context() -> bool:
@@ -108,7 +120,7 @@ def get_prompt_utility_context() -> PromptUtilityContext:
108120
try:
109121
return _PROMPT_UTILITY_CONTEXT_VAR.get()
110122
except LookupError:
111-
raise _UnsafeContextRead
123+
raise _UnsafeContextRead(_PROMPT_UTILITY_CONTEXT_VAR_NAME)
112124

113125

114126
def unset_prompt_utility_context():
@@ -120,4 +132,4 @@ def unset_prompt_utility_context():
120132
else:
121133
raise ValueError("No matching unset_prompt_utility_context() call.")
122134
except LookupError:
123-
raise _UnsafeContextRead
135+
raise _UnsafeContextRead(_PROMPT_UTILITY_CONTEXT_VAR_NAME)

src/humanloop/eval_utils/run.py

Lines changed: 1 addition & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -23,34 +23,20 @@
2323
from functools import partial
2424
from logging import INFO
2525
from typing import Callable, Dict, List, Literal, Optional, Sequence, Tuple, TypeVar, Union
26-
import warnings
2726

2827
from humanloop import EvaluatorResponse, FlowResponse, PromptResponse, ToolResponse
2928
from humanloop.core.api_error import ApiError
30-
from humanloop.eval_utils.context import (
31-
EvaluationContext,
32-
get_evaluation_context,
33-
get_prompt_utility_context,
34-
in_prompt_utility_context,
35-
log_belongs_to_evaluated_file,
36-
set_evaluation_context,
37-
)
29+
from humanloop.context_variables import EvaluationContext, set_evaluation_context
3830
from humanloop.eval_utils.types import Dataset, Evaluator, EvaluatorCheck, File
3931

4032
# We use TypedDicts for requests, which is consistent with the rest of the SDK
41-
from humanloop.evaluators.client import EvaluatorsClient
42-
from humanloop.flows.client import FlowsClient
43-
from humanloop.otel.constants import HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE, HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
44-
from humanloop.otel.helpers import write_to_opentelemetry_span
45-
from humanloop.prompts.client import PromptsClient
4633
from humanloop.requests import CodeEvaluatorRequestParams as CodeEvaluatorDict
4734
from humanloop.requests import ExternalEvaluatorRequestParams as ExternalEvaluator
4835
from humanloop.requests import FlowKernelRequestParams as FlowDict
4936
from humanloop.requests import HumanEvaluatorRequestParams as HumanEvaluatorDict
5037
from humanloop.requests import LlmEvaluatorRequestParams as LLMEvaluatorDict
5138
from humanloop.requests import PromptKernelRequestParams as PromptDict
5239
from humanloop.requests import ToolKernelRequestParams as ToolDict
53-
from humanloop.tools.client import ToolsClient
5440
from humanloop.types import BooleanEvaluatorStatsResponse as BooleanStats
5541
from humanloop.types import DatapointResponse as Datapoint
5642
from humanloop.types import EvaluationResponse, EvaluationStats
@@ -60,14 +46,9 @@
6046
from humanloop.types import NumericEvaluatorStatsResponse as NumericStats
6147
from humanloop.types import PromptKernelRequest as Prompt
6248
from humanloop.types import ToolKernelRequest as Tool
63-
from humanloop.types.create_evaluator_log_response import CreateEvaluatorLogResponse
64-
from humanloop.types.create_flow_log_response import CreateFlowLogResponse
65-
from humanloop.types.create_prompt_log_response import CreatePromptLogResponse
66-
from humanloop.types.create_tool_log_response import CreateToolLogResponse
6749
from humanloop.types.datapoint_response import DatapointResponse
6850
from humanloop.types.dataset_response import DatasetResponse
6951
from humanloop.types.evaluation_run_response import EvaluationRunResponse
70-
from humanloop.types.prompt_call_response import PromptCallResponse
7152
from humanloop.types.run_stats_response import RunStatsResponse
7253
from pydantic import ValidationError
7354

@@ -97,9 +78,6 @@
9778
RESET = "\033[0m"
9879

9980

100-
CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, ToolsClient, FlowsClient, EvaluatorsClient)
101-
102-
10381
class HumanloopUtilityError(Exception):
10482
def __init__(self, message):
10583
self.message = message
@@ -108,101 +86,6 @@ def __str__(self):
10886
return self.message
10987

11088

111-
def prompt_call_evaluation_aware(client: PromptsClient) -> PromptsClient:
112-
client._call = client.call
113-
114-
def _overload_call(self, **kwargs) -> PromptCallResponse:
115-
if in_prompt_utility_context():
116-
kwargs = {**kwargs, "save": False}
117-
118-
try:
119-
response = self._call(**kwargs)
120-
response = typing.cast(PromptCallResponse, response)
121-
except Exception as e:
122-
# TODO: Bug found in backend: not specifying a model 400s but creates a File
123-
raise HumanloopUtilityError(message=str(e)) from e
124-
125-
response_copy = response.dict()
126-
prompt_utility_context = get_prompt_utility_context()
127-
for idx, _ in enumerate(response_copy.get("logs", [])):
128-
del response_copy["logs"][idx]["created_at"]
129-
for idx, _ in enumerate(response_copy["prompt"].get("environments", [])):
130-
del response_copy["prompt"]["environments"][idx]["created_at"]
131-
del response_copy["prompt"]["last_used_at"]
132-
del response_copy["prompt"]["updated_at"]
133-
del response_copy["prompt"]["created_at"]
134-
del response_copy["start_time"]
135-
del response_copy["end_time"]
136-
137-
with prompt_utility_context.tracer.start_as_current_span(HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME) as span:
138-
write_to_opentelemetry_span(
139-
span=span,
140-
key=HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE,
141-
value=response_copy,
142-
)
143-
return response
144-
else:
145-
return self._call(**kwargs)
146-
147-
# Replace the original log method with the overloaded one
148-
client.call = types.MethodType(_overload_call, client)
149-
# Return the client with the overloaded log method
150-
logger.debug("Overloaded the .log method of %s", client)
151-
return client
152-
153-
154-
def log_with_evaluation_context(client: CLIENT_TYPE) -> CLIENT_TYPE:
155-
"""
156-
Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT.
157-
158-
This makes the overloaded log actions be aware of whether the created Log is
159-
part of an Evaluation (e.g. one started by eval_utils.run_eval).
160-
"""
161-
# Copy the original log method in a hidden attribute
162-
client._log = client.log
163-
164-
def _overload_log(
165-
self, **kwargs
166-
) -> Union[
167-
CreatePromptLogResponse,
168-
CreateToolLogResponse,
169-
CreateFlowLogResponse,
170-
CreateEvaluatorLogResponse,
171-
]:
172-
if log_belongs_to_evaluated_file(log_args=kwargs):
173-
evaluation_context = get_evaluation_context()
174-
for attribute in ["source_datapoint_id", "run_id"]:
175-
if attribute not in kwargs or kwargs[attribute] is None:
176-
kwargs[attribute] = getattr(evaluation_context, attribute)
177-
178-
# Call the original .log method
179-
logger.debug(
180-
"Logging %s inside _overloaded_log on Thread %s",
181-
kwargs,
182-
evaluation_context,
183-
threading.get_ident(),
184-
)
185-
186-
try:
187-
response = self._log(**kwargs)
188-
except Exception as e:
189-
logger.error(f"Failed to log: {e}")
190-
raise e
191-
192-
# Notify the run_eval utility about one Log being created
193-
if log_belongs_to_evaluated_file(log_args=kwargs):
194-
evaluation_context = get_evaluation_context()
195-
evaluation_context.upload_callback(log_id=response.id)
196-
197-
return response
198-
199-
# Replace the original log method with the overloaded one
200-
client.log = types.MethodType(_overload_log, client)
201-
# Return the client with the overloaded log method
202-
logger.debug("Overloaded the .call method of %s", client)
203-
return client
204-
205-
20689
def run_eval(
20790
client: "BaseHumanloop",
20891
file: File,

src/humanloop/otel/exporter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
1212

1313
from humanloop.core import ApiError as HumanloopApiError
14-
from humanloop.eval_utils.context import (
14+
from humanloop.context_variables import (
1515
EvaluationContext,
1616
evaluation_context_set,
1717
get_evaluation_context,

src/humanloop/overload.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import logging
2+
import threading
3+
import types
4+
from typing import TypeVar, Union
5+
import typing
6+
7+
from humanloop.context_variables import (
8+
get_evaluation_context,
9+
get_prompt_utility_context,
10+
in_prompt_utility_context,
11+
log_belongs_to_evaluated_file,
12+
)
13+
from humanloop.eval_utils.run import HumanloopUtilityError
14+
from humanloop.flows.client import FlowsClient
15+
from humanloop.otel.constants import HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE, HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME
16+
from humanloop.otel.helpers import write_to_opentelemetry_span
17+
from humanloop.prompts.client import PromptsClient
18+
from humanloop.types.create_evaluator_log_response import CreateEvaluatorLogResponse
19+
from humanloop.types.create_flow_log_response import CreateFlowLogResponse
20+
from humanloop.types.create_prompt_log_response import CreatePromptLogResponse
21+
from humanloop.types.create_tool_log_response import CreateToolLogResponse
22+
from humanloop.types.prompt_call_response import PromptCallResponse
23+
24+
logger = logging.getLogger("humanloop.sdk")
25+
26+
27+
CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, FlowsClient)
28+
29+
30+
def overload_log(client: CLIENT_TYPE) -> CLIENT_TYPE:
31+
"""
32+
Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT.
33+
34+
This makes the overloaded log actions be aware of whether the created Log is
35+
part of an Evaluation (e.g. one started by eval_utils.run_eval).
36+
"""
37+
# Copy the original log method in a hidden attribute
38+
client._log = client.log
39+
40+
def _overload_log(
41+
self, **kwargs
42+
) -> Union[
43+
CreatePromptLogResponse,
44+
CreateToolLogResponse,
45+
CreateFlowLogResponse,
46+
CreateEvaluatorLogResponse,
47+
]:
48+
if log_belongs_to_evaluated_file(log_args=kwargs):
49+
evaluation_context = get_evaluation_context()
50+
for attribute in ["source_datapoint_id", "run_id"]:
51+
if attribute not in kwargs or kwargs[attribute] is None:
52+
kwargs[attribute] = getattr(evaluation_context, attribute)
53+
54+
# Call the original .log method
55+
logger.debug(
56+
"Logging %s inside _overloaded_log on Thread %s",
57+
kwargs,
58+
evaluation_context,
59+
threading.get_ident(),
60+
)
61+
62+
try:
63+
response = self._log(**kwargs)
64+
except Exception as e:
65+
logger.error(f"Failed to log: {e}")
66+
raise e
67+
68+
# Notify the run_eval utility about one Log being created
69+
if log_belongs_to_evaluated_file(log_args=kwargs):
70+
evaluation_context = get_evaluation_context()
71+
evaluation_context.upload_callback(log_id=response.id)
72+
73+
return response
74+
75+
# Replace the original log method with the overloaded one
76+
client.log = types.MethodType(_overload_log, client)
77+
# Return the client with the overloaded log method
78+
logger.debug("Overloaded the .call method of %s", client)
79+
return client
80+
81+
82+
def overload_prompt_call(client: PromptsClient) -> PromptsClient:
83+
client._call = client.call
84+
85+
def _overload_call(self, **kwargs) -> PromptCallResponse:
86+
if in_prompt_utility_context():
87+
try:
88+
response = self._call(**kwargs)
89+
response = typing.cast(PromptCallResponse, response)
90+
except Exception as e:
91+
# TODO: Bug found in backend: not specifying a model 400s but creates a File
92+
raise HumanloopUtilityError(message=str(e)) from e
93+
94+
response_copy = response.dict()
95+
prompt_utility_context = get_prompt_utility_context()
96+
for idx, _ in enumerate(response_copy.get("logs", [])):
97+
del response_copy["logs"][idx]["created_at"]
98+
for idx, _ in enumerate(response_copy["prompt"].get("environments", [])):
99+
del response_copy["prompt"]["environments"][idx]["created_at"]
100+
del response_copy["prompt"]["last_used_at"]
101+
del response_copy["prompt"]["updated_at"]
102+
del response_copy["prompt"]["created_at"]
103+
del response_copy["start_time"]
104+
del response_copy["end_time"]
105+
106+
with prompt_utility_context.tracer.start_as_current_span(HUMANLOOP_INTERCEPTED_HL_CALL_SPAN_NAME) as span:
107+
write_to_opentelemetry_span(
108+
span=span,
109+
key=HUMANLOOP_INTERCEPTED_HL_CALL_RESPONSE,
110+
value=response_copy,
111+
)
112+
return response
113+
else:
114+
return self._call(**kwargs)
115+
116+
# Replace the original log method with the overloaded one
117+
client.call = types.MethodType(_overload_call, client)
118+
# Return the client with the overloaded log method
119+
logger.debug("Overloaded the .log method of %s", client)
120+
return client

src/humanloop/utilities/prompt.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from opentelemetry.trace import Tracer
77
from typing_extensions import Unpack
88

9-
from humanloop.eval_utils.context import set_prompt_utility_context, unset_prompt_utility_context
9+
from humanloop.context_variables import set_prompt_utility_context, unset_prompt_utility_context
1010
from humanloop.eval_utils.run import HumanloopUtilityError
1111
from humanloop.utilities.helpers import bind_args
1212
from humanloop.utilities.types import DecoratorPromptKernelRequestParams
@@ -60,6 +60,8 @@ def wrapper(*args: Sequence[Any], **kwargs: Mapping[str, Any]) -> Any:
6060
except Exception as e:
6161
logger.error(f"Error calling {func.__name__}: {e}")
6262
output = None
63+
# TODO: output ought to be None on errors, check
64+
# all decorators
6365
output_stringified = jsonify_if_not_string(
6466
func=func,
6567
output=output,

0 commit comments

Comments
 (0)