Skip to content

Commit 39b862e

Browse files
Sean Smithstainless-app[bot]
authored andcommitted
feat: Add to_dataframe method to BinaryAPIReponse (#56)
Signed-off-by: Sean Smith <sean.smith@contextual.ai>
1 parent 59bb1ab commit 39b862e

File tree

5 files changed

+88
-0
lines changed

5 files changed

+88
-0
lines changed

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ dependencies = [
1414
"anyio>=3.5.0, <5",
1515
"distro>=1.7.0, <2",
1616
"sniffio",
17+
"pandas==2.2.3",
18+
"numpy==2.0.2",
1719
]
1820
requires-python = ">= 3.8"
1921
classifiers = [
@@ -55,6 +57,8 @@ dev-dependencies = [
5557
"importlib-metadata>=6.7.0",
5658
"rich>=13.7.1",
5759
"nest_asyncio==1.6.0",
60+
"pandas==2.2.3",
61+
"numpy==2.0.2",
5862
]
5963

6064
[tool.rye.scripts]

requirements-dev.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,7 @@ virtualenv==20.24.5
102102
# via nox
103103
zipp==3.17.0
104104
# via importlib-metadata
105+
pandas==2.2.3
106+
# via contextual-client
107+
numpy==2.0.2
108+
# via contextual-client

requirements.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,7 @@ typing-extensions==4.12.2
4343
# via contextual-client
4444
# via pydantic
4545
# via pydantic-core
46+
pandas==2.2.3
47+
# via contextual-client
48+
numpy==2.0.2
49+
# via contextual-client

src/contextual/_response.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
22

33
import os
4+
import ast
5+
import json
46
import inspect
57
import logging
68
import datetime
@@ -23,6 +25,7 @@
2325
import anyio
2426
import httpx
2527
import pydantic
28+
from pandas import DataFrame # type: ignore[import]
2629

2730
from ._types import NoneType
2831
from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base
@@ -479,6 +482,61 @@ class BinaryAPIResponse(APIResponse[bytes]):
479482
the API request, e.g. `.with_streaming_response.get_binary_response()`
480483
"""
481484

485+
def to_dataframe(self) -> DataFrame:
486+
"""Convert the response data to a pandas DataFrame.
487+
488+
Note: This method requires the `pandas` library to be installed.
489+
490+
Returns:
491+
DataFrame: Processed evaluation data
492+
"""
493+
# Read the binary content
494+
content = self.read()
495+
496+
# Now decode the content
497+
lines = content.decode("utf-8").strip().split("\n")
498+
499+
# Parse each line and flatten the results
500+
data = []
501+
for line in lines:
502+
try:
503+
entry = json.loads(line)
504+
# Parse the results field directly from JSON
505+
if 'results' in entry:
506+
if isinstance(entry['results'], str):
507+
# Try to handle string representations that are valid JSON
508+
try:
509+
results = json.loads(entry['results'])
510+
except Exception as e:
511+
# If not valid JSON, fall back to safer processing
512+
results = ast.literal_eval(entry['results'])
513+
else:
514+
# Already a dictionary
515+
results = entry['results']
516+
517+
# Remove the original results field
518+
del entry['results']
519+
# Flatten the nested dictionary structure
520+
if isinstance(results, dict):
521+
for key, value in results.items(): # type: ignore
522+
if isinstance(value, dict):
523+
for subkey, subvalue in value.items(): # type: ignore
524+
if isinstance(subvalue, dict):
525+
# Handle one more level of nesting
526+
for subsubkey, subsubvalue in subvalue.items(): # type: ignore
527+
entry[f'{key}_{subkey}_{subsubkey}'] = subsubvalue
528+
else:
529+
entry[f'{key}_{subkey}'] = subvalue
530+
else:
531+
entry[key] = value
532+
533+
data.append(entry) # type: ignore
534+
except Exception as e:
535+
log.error(f"Error processing line: {e}")
536+
log.error(f"Problematic line: {line[:200]}...") # Print first 200 chars of the line
537+
continue
538+
return DataFrame(data)
539+
482540
def write_to_file(
483541
self,
484542
file: str | os.PathLike[str],

tests/test_response.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,24 @@ def test_response_parse_mismatched_basemodel(client: ContextualAI) -> None:
7373
response.parse(to=PydanticModel)
7474

7575

76+
def test_response_binary_response_to_dataframe(client: ContextualAI) -> None:
77+
response = BinaryAPIResponse(
78+
raw=httpx.Response(
79+
200,
80+
content=b'{"prompt": "What was Apple\'s total net sales for 2022?", "reference": "...", "response": "...", "guideline": "", "knowledge": "[]", "results": "{\'equivalence_score\': {\'score\': 0.0, \'metadata\': \\"The generated response does not provide any information about Apple\'s total net sales for 2022, whereas the reference response provides the specific figure.\\"}, \'factuality_v4.5_score\': {\'score\': 0.0, \'metadata\': {\'description\': \'There are claims but no knowledge so response is ungrounded.\'}}}", "status": "completed"}\r\n',
81+
),
82+
client=client,
83+
stream=False,
84+
stream_cls=None,
85+
cast_to=bytes,
86+
options=FinalRequestOptions.construct(method="get", url="/foo"),
87+
)
88+
df = response.to_dataframe()
89+
assert df.shape == (1, 10)
90+
assert df["prompt"].astype(str).iloc[0] == "What was Apple's total net sales for 2022?" # type: ignore
91+
assert df["equivalence_score_score"].astype(float).iloc[0] == 0.0 # type: ignore
92+
93+
7694
@pytest.mark.asyncio
7795
async def test_async_response_parse_mismatched_basemodel(async_client: AsyncContextualAI) -> None:
7896
response = AsyncAPIResponse(

0 commit comments

Comments
 (0)