From 763469ea7a5b673d8f9a67c52d15aa75eeaff4ca Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 28 Feb 2025 12:40:57 -0900 Subject: [PATCH 1/5] Add to_dataframe method to BinaryAPIReponse Signed-off-by: Sean Smith --- src/contextual/_response.py | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/contextual/_response.py b/src/contextual/_response.py index 51fc249d..07b16642 100644 --- a/src/contextual/_response.py +++ b/src/contextual/_response.py @@ -1,6 +1,8 @@ from __future__ import annotations import os +import ast +import json import inspect import logging import datetime @@ -22,6 +24,7 @@ import anyio import httpx +import pandas as pd import pydantic from ._types import NoneType @@ -479,6 +482,43 @@ class BinaryAPIResponse(APIResponse[bytes]): the API request, e.g. `.with_streaming_response.get_binary_response()` """ + def to_dataframe(self) -> pd.DataFrame: + """Convert the response data to a pandas DataFrame. + + Note: This method requires the `pandas` library to be installed. + + Returns: + pd.DataFrame: Processed evaluation data + """ + # Read the binary content + content = self.read() + + # Now decode the content + lines = content.decode("utf-8").strip().split("\n") + + # Parse each line and flatten the results + data = [] + for line in lines: + try: + entry = json.loads(line) + # Parse the results string if it exists + if "results" in entry: + results = ast.literal_eval(entry["results"]) + del entry["results"] + if isinstance(results, dict): + for key, value in results.items(): + if isinstance(value, dict): + for subkey, subvalue in value.items(): + entry[f"{key}_{subkey}"] = subvalue + else: + entry[key] = value + + data.append(entry) + except Exception as e: + log.info(f"Error processing line: {e}") + continue + return pd.DataFrame(data) + def write_to_file( self, file: str | os.PathLike[str], From b58336c6c522c211c6de115ccfc81d498c77cd45 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 28 Feb 2025 13:10:01 -0900 Subject: [PATCH 2/5] add pandas to requirements Signed-off-by: Sean Smith --- pyproject.toml | 2 ++ requirements-dev.lock | 2 ++ requirements.lock | 2 ++ src/contextual/_response.py | 14 +++++++------- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cd4ac761..f3e2ac6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "anyio>=3.5.0, <5", "distro>=1.7.0, <2", "sniffio", + "pandas>=2.1.0, <3", ] requires-python = ">= 3.8" classifiers = [ @@ -55,6 +56,7 @@ dev-dependencies = [ "importlib-metadata>=6.7.0", "rich>=13.7.1", "nest_asyncio==1.6.0", + "pandas", ] [tool.rye.scripts] diff --git a/requirements-dev.lock b/requirements-dev.lock index 83d02e00..b786c0c7 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -102,3 +102,5 @@ virtualenv==20.24.5 # via nox zipp==3.17.0 # via importlib-metadata +pandas==2.1.0 + # via contextual-client diff --git a/requirements.lock b/requirements.lock index bc4698e1..fea64e28 100644 --- a/requirements.lock +++ b/requirements.lock @@ -43,3 +43,5 @@ typing-extensions==4.12.2 # via contextual-client # via pydantic # via pydantic-core +pandas==2.1.0 + # via contextual-client \ No newline at end of file diff --git a/src/contextual/_response.py b/src/contextual/_response.py index 07b16642..e23fbead 100644 --- a/src/contextual/_response.py +++ b/src/contextual/_response.py @@ -24,8 +24,8 @@ import anyio import httpx -import pandas as pd import pydantic +from pandas import DataFrame # type: ignore[import] from ._types import NoneType from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base @@ -482,13 +482,13 @@ class BinaryAPIResponse(APIResponse[bytes]): the API request, e.g. `.with_streaming_response.get_binary_response()` """ - def to_dataframe(self) -> pd.DataFrame: + def to_dataframe(self) -> DataFrame: """Convert the response data to a pandas DataFrame. Note: This method requires the `pandas` library to be installed. Returns: - pd.DataFrame: Processed evaluation data + DataFrame: Processed evaluation data """ # Read the binary content content = self.read() @@ -506,18 +506,18 @@ def to_dataframe(self) -> pd.DataFrame: results = ast.literal_eval(entry["results"]) del entry["results"] if isinstance(results, dict): - for key, value in results.items(): + for key, value in results.items(): # type: ignore if isinstance(value, dict): - for subkey, subvalue in value.items(): + for subkey, subvalue in value.items(): # type: ignore entry[f"{key}_{subkey}"] = subvalue else: entry[key] = value - data.append(entry) + data.append(entry) # type: ignore except Exception as e: log.info(f"Error processing line: {e}") continue - return pd.DataFrame(data) + return DataFrame(data) def write_to_file( self, From 9cc1b516c681d51fa96504bbb77d5be09b0fb385 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 28 Feb 2025 13:37:32 -0900 Subject: [PATCH 3/5] Fix numpy version discrepancy Signed-off-by: Sean Smith --- pyproject.toml | 6 ++++-- requirements-dev.lock | 4 +++- requirements.lock | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f3e2ac6c..c0aa3fa2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,8 @@ dependencies = [ "anyio>=3.5.0, <5", "distro>=1.7.0, <2", "sniffio", - "pandas>=2.1.0, <3", + "pandas==2.2.3", + "numpy==2.0.2", ] requires-python = ">= 3.8" classifiers = [ @@ -56,7 +57,8 @@ dev-dependencies = [ "importlib-metadata>=6.7.0", "rich>=13.7.1", "nest_asyncio==1.6.0", - "pandas", + "pandas==2.2.3", + "numpy==2.0.2", ] [tool.rye.scripts] diff --git a/requirements-dev.lock b/requirements-dev.lock index b786c0c7..19dcb392 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -102,5 +102,7 @@ virtualenv==20.24.5 # via nox zipp==3.17.0 # via importlib-metadata -pandas==2.1.0 +pandas==2.2.3 # via contextual-client +numpy==2.0.2 + # via contextual-client \ No newline at end of file diff --git a/requirements.lock b/requirements.lock index fea64e28..3b833e41 100644 --- a/requirements.lock +++ b/requirements.lock @@ -43,5 +43,7 @@ typing-extensions==4.12.2 # via contextual-client # via pydantic # via pydantic-core -pandas==2.1.0 +pandas==2.2.3 + # via contextual-client +numpy==2.0.2 # via contextual-client \ No newline at end of file From c8684a0dffe99f2108ec6fbae35f15b185e2395d Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 3 Mar 2025 17:16:24 -0800 Subject: [PATCH 4/5] added tests Signed-off-by: Sean Smith --- tests/test_response.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_response.py b/tests/test_response.py index cedd75ba..d4cb409f 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -73,6 +73,24 @@ def test_response_parse_mismatched_basemodel(client: ContextualAI) -> None: response.parse(to=PydanticModel) +def test_response_binary_response_to_dataframe(client: ContextualAI) -> None: + response = BinaryAPIResponse( + raw=httpx.Response( + 200, + content=b'{"prompt": "What was Apple\'s total net sales for 2022?", "reference": "...", "response": "...", "guideline": "", "knowledge": "[]", "results": "{\'equivalence_score\': {\'score\': 0.0, \'metadata\': \\"The generated response does not provide any information about Apple\'s total net sales for 2022, whereas the reference response provides the specific figure.\\"}, \'factuality_v4.5_score\': {\'score\': 0.0, \'metadata\': {\'description\': \'There are claims but no knowledge so response is ungrounded.\'}}}", "status": "completed"}\r\n', + ), + client=client, + stream=False, + stream_cls=None, + cast_to=bytes, + options=FinalRequestOptions.construct(method="get", url="/foo"), + ) + df = response.to_dataframe() + assert df.shape == (1, 10) + assert df["prompt"].astype(str).iloc[0] == "What was Apple's total net sales for 2022?" # type: ignore + assert df["equivalence_score_score"].astype(float).iloc[0] == 0.0 # type: ignore + + @pytest.mark.asyncio async def test_async_response_parse_mismatched_basemodel(async_client: AsyncContextualAI) -> None: response = AsyncAPIResponse( From b2f6efeeef08a211928e8b98c334a624fe628b89 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 10 Mar 2025 16:00:56 -0800 Subject: [PATCH 5/5] Changed logic od DataFrame parsing Signed-off-by: Sean Smith --- src/contextual/_response.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/contextual/_response.py b/src/contextual/_response.py index e23fbead..6d4a7bb1 100644 --- a/src/contextual/_response.py +++ b/src/contextual/_response.py @@ -501,21 +501,39 @@ def to_dataframe(self) -> DataFrame: for line in lines: try: entry = json.loads(line) - # Parse the results string if it exists - if "results" in entry: - results = ast.literal_eval(entry["results"]) - del entry["results"] + # Parse the results field directly from JSON + if 'results' in entry: + if isinstance(entry['results'], str): + # Try to handle string representations that are valid JSON + try: + results = json.loads(entry['results']) + except Exception as e: + # If not valid JSON, fall back to safer processing + results = ast.literal_eval(entry['results']) + else: + # Already a dictionary + results = entry['results'] + + # Remove the original results field + del entry['results'] + # Flatten the nested dictionary structure if isinstance(results, dict): for key, value in results.items(): # type: ignore if isinstance(value, dict): for subkey, subvalue in value.items(): # type: ignore - entry[f"{key}_{subkey}"] = subvalue + if isinstance(subvalue, dict): + # Handle one more level of nesting + for subsubkey, subsubvalue in subvalue.items(): # type: ignore + entry[f'{key}_{subkey}_{subsubkey}'] = subsubvalue + else: + entry[f'{key}_{subkey}'] = subvalue else: entry[key] = value - data.append(entry) # type: ignore + data.append(entry) # type: ignore except Exception as e: - log.info(f"Error processing line: {e}") + log.error(f"Error processing line: {e}") + log.error(f"Problematic line: {line[:200]}...") # Print first 200 chars of the line continue return DataFrame(data)