Skip to content

Commit 12d3666

Browse files
Eval utilities tweaks (#20)
Extends the base Evaluations client with a run method to avoid type errors, updates the local runner to use the datapoint dict instead of Pydantic object and tweaks some of the warning wording. --------- Co-authored-by: fern-api <115122769+fern-api[bot]@users.noreply.github.com>
1 parent 271d3cf commit 12d3666

File tree

6 files changed

+59
-22
lines changed

6 files changed

+59
-22
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ client.prompts.log(
4141
messages=[{"role": "user", "content": "What really happened at Roswell?"}],
4242
inputs={"person": "Trump"},
4343
created_at=datetime.datetime.fromisoformat(
44-
"2024-07-19 00:29:35.178000+00:00",
44+
"2024-07-18 23:29:35.178000+00:00",
4545
),
4646
provider_latency=6.5931549072265625,
4747
output_message={
@@ -88,7 +88,7 @@ async def main() -> None:
8888
],
8989
inputs={"person": "Trump"},
9090
created_at=datetime.datetime.fromisoformat(
91-
"2024-07-19 00:29:35.178000+00:00",
91+
"2024-07-18 23:29:35.178000+00:00",
9292
),
9393
provider_latency=6.5931549072265625,
9494
output_message={

reference.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ client.prompts.log(
5656
messages=[{"role": "user", "content": "What really happened at Roswell?"}],
5757
inputs={"person": "Trump"},
5858
created_at=datetime.datetime.fromisoformat(
59-
"2024-07-19 00:29:35.178000+00:00",
59+
"2024-07-18 23:29:35.178000+00:00",
6060
),
6161
provider_latency=6.5931549072265625,
6262
output_message={
@@ -6258,10 +6258,10 @@ client.flows.log(
62586258
output="The patient is likely experiencing a myocardial infarction. Immediate medical attention is required.",
62596259
trace_status="incomplete",
62606260
start_time=datetime.datetime.fromisoformat(
6261-
"2024-07-08 22:40:35+00:00",
6261+
"2024-07-08 21:40:35+00:00",
62626262
),
62636263
end_time=datetime.datetime.fromisoformat(
6264-
"2024-07-08 22:40:39+00:00",
6264+
"2024-07-08 21:40:39+00:00",
62656265
),
62666266
)
62676267

src/humanloop/client.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,46 @@
11
import typing
2+
from typing import Optional, List, Sequence
23
import os
34
import httpx
4-
from functools import partial
55

66
from .base_client import BaseHumanloop, AsyncBaseHumanloop
77
from .environment import HumanloopEnvironment
8-
from .eval_utils import _run_eval
8+
from .eval_utils import _run_eval, Dataset, File, Evaluator, EvaluatorCheck
9+
from .base_client import EvaluationsClient
10+
11+
class ExtendedEvalsClient(EvaluationsClient):
12+
13+
client: BaseHumanloop
14+
15+
def run(
16+
self,
17+
file: File,
18+
name: Optional[str],
19+
dataset: Dataset,
20+
evaluators: Optional[Sequence[Evaluator]] = None,
21+
# logs: typing.Sequence[dict] | None = None,
22+
workers: int = 4,
23+
) -> List[EvaluatorCheck]:
24+
"""
25+
Evaluate your function for a given `Dataset` and set of `Evaluators`.
26+
:param file: the Humanloop file being evaluated, including a function to run over the dataset.
27+
:param name: the name of the Evaluation to run. If it does not exist, a new Evaluation will be created under your File.
28+
:param dataset: the dataset to map your function over to produce the outputs required by the Evaluation.
29+
:param evaluators: define how judgments are provided for this Evaluation.
30+
:param workers: the number of threads to process datapoints using your function concurrently.
31+
:return: per Evaluator checks.
32+
"""
33+
if self.client is None:
34+
raise ValueError("Need Humanloop client defined to run evals")
35+
36+
return _run_eval(
37+
client=self.client,
38+
file=file,
39+
name=name,
40+
dataset=dataset,
41+
evaluators=evaluators,
42+
workers=workers,
43+
)
944

1045

1146
class Humanloop(BaseHumanloop):
@@ -37,7 +72,9 @@ def __init__(
3772
follow_redirects=follow_redirects,
3873
httpx_client=httpx_client,
3974
)
40-
self.evaluations.run = partial(_run_eval, client=self) # type: ignore[attr-defined]
75+
eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
76+
eval_client.client = self
77+
self.evaluations = eval_client
4178

4279

4380
class AsyncHumanloop(AsyncBaseHumanloop):

src/humanloop/eval_utils.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ def _run_eval(
205205
try:
206206
Flow.parse_obj(version)
207207
except ValidationError:
208-
logger.warning("Invalid Flow `version` in your `file` request. Setting your version payload as flow `attributes`.")
209208
flow_version = {"attributes": version}
210209
file_dict = {**file, **flow_version}
211210
hl_file = client.flows.upsert(**file_dict)
@@ -307,11 +306,12 @@ def _run_eval(
307306
# Define the function to execute your function in parallel and Log to Humanloop
308307
def process_datapoint(datapoint: Datapoint):
309308
start_time = datetime.now()
309+
datapoint_dict = datapoint.dict()
310310
try:
311-
if datapoint.messages:
312-
output = function_(**datapoint.inputs, messages=datapoint.messages)
311+
if "messages" in datapoint_dict:
312+
output = function_(**datapoint_dict["inputs"], messages=datapoint_dict["messages"])
313313
else:
314-
output = function_(**datapoint.inputs)
314+
output = function_(**datapoint_dict["inputs"])
315315
if custom_logger:
316316
log = function_(client=client, output=output)
317317
else:
@@ -340,7 +340,7 @@ def process_datapoint(datapoint: Datapoint):
340340
start_time = datetime.now()
341341
eval_function = local_evaluator["callable"]
342342
if local_evaluator["args_type"] == "target_required":
343-
judgment = eval_function(log.dict(), datapoint.target)
343+
judgment = eval_function(log.dict(), datapoint_dict["target"])
344344
else:
345345
judgment = eval_function(log.dict())
346346

@@ -375,7 +375,7 @@ def process_datapoint(datapoint: Datapoint):
375375

376376
# Generate locally if a file `callable` is provided
377377
if function_:
378-
logger.info(f"{CYAN}\nRunning {hl_file.name} {type_} callable over {hl_dataset.name}{RESET} Dataset using {workers} workers")
378+
logger.info(f"{CYAN}\nRunning {hl_file.name} over the Dataset {hl_dataset.name} using {workers} workers{RESET} ")
379379
completed_tasks = 0
380380
with ThreadPoolExecutor(max_workers=workers) as executor:
381381
futures = [
@@ -387,7 +387,7 @@ def process_datapoint(datapoint: Datapoint):
387387
_progress_bar(total_datapoints, completed_tasks)
388388
else:
389389
# TODO: trigger run when updated API is available
390-
logger.info(f"{CYAN}\nRunning {type_} {hl_file.name} over the Dataset {hl_dataset.name}{RESET}")
390+
logger.info(f"{CYAN}\nRunning {hl_file.name} over the Dataset {hl_dataset.name}{RESET}")
391391

392392
# Wait for the Evaluation to complete then print the results
393393
complete = False
@@ -471,7 +471,7 @@ def get_score_from_evaluator_stat(stat: Union[NumericStats, BooleanStats]) -> Un
471471
elif isinstance(stat, NumericStats):
472472
score = round(stat.mean, 2)
473473
else:
474-
raise ValueError("Invalid Evaluator Stat type.")
474+
pass
475475
return score
476476

477477

src/humanloop/flows/client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,10 @@ def log(
197197
output="The patient is likely experiencing a myocardial infarction. Immediate medical attention is required.",
198198
trace_status="incomplete",
199199
start_time=datetime.datetime.fromisoformat(
200-
"2024-07-08 22:40:35+00:00",
200+
"2024-07-08 21:40:35+00:00",
201201
),
202202
end_time=datetime.datetime.fromisoformat(
203-
"2024-07-08 22:40:39+00:00",
203+
"2024-07-08 21:40:39+00:00",
204204
),
205205
)
206206
"""
@@ -1366,10 +1366,10 @@ async def main() -> None:
13661366
output="The patient is likely experiencing a myocardial infarction. Immediate medical attention is required.",
13671367
trace_status="incomplete",
13681368
start_time=datetime.datetime.fromisoformat(
1369-
"2024-07-08 22:40:35+00:00",
1369+
"2024-07-08 21:40:35+00:00",
13701370
),
13711371
end_time=datetime.datetime.fromisoformat(
1372-
"2024-07-08 22:40:39+00:00",
1372+
"2024-07-08 21:40:39+00:00",
13731373
),
13741374
)
13751375

src/humanloop/prompts/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def log(
236236
messages=[{"role": "user", "content": "What really happened at Roswell?"}],
237237
inputs={"person": "Trump"},
238238
created_at=datetime.datetime.fromisoformat(
239-
"2024-07-19 00:29:35.178000+00:00",
239+
"2024-07-18 23:29:35.178000+00:00",
240240
),
241241
provider_latency=6.5931549072265625,
242242
output_message={
@@ -2117,7 +2117,7 @@ async def main() -> None:
21172117
],
21182118
inputs={"person": "Trump"},
21192119
created_at=datetime.datetime.fromisoformat(
2120-
"2024-07-19 00:29:35.178000+00:00",
2120+
"2024-07-18 23:29:35.178000+00:00",
21212121
),
21222122
provider_latency=6.5931549072265625,
21232123
output_message={

0 commit comments

Comments
 (0)