From 763469ea7a5b673d8f9a67c52d15aa75eeaff4ca Mon Sep 17 00:00:00 2001
From: Sean Smith <sean.smith@contextual.ai>
Date: Fri, 28 Feb 2025 12:40:57 -0900
Subject: [PATCH 1/5] Add to_dataframe method to BinaryAPIReponse

Signed-off-by: Sean Smith <sean.smith@contextual.ai>
---
 src/contextual/_response.py | 40 +++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/src/contextual/_response.py b/src/contextual/_response.py
index 51fc249d..07b16642 100644
--- a/src/contextual/_response.py
+++ b/src/contextual/_response.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 import os
+import ast
+import json
 import inspect
 import logging
 import datetime
@@ -22,6 +24,7 @@
 
 import anyio
 import httpx
+import pandas as pd
 import pydantic
 
 from ._types import NoneType
@@ -479,6 +482,43 @@ class BinaryAPIResponse(APIResponse[bytes]):
     the API request, e.g. `.with_streaming_response.get_binary_response()`
     """
 
+    def to_dataframe(self) -> pd.DataFrame:
+        """Convert the response data to a pandas DataFrame.
+
+        Note: This method requires the `pandas` library to be installed.
+
+        Returns:
+            pd.DataFrame: Processed evaluation data
+        """
+        # Read the binary content
+        content = self.read()
+
+        # Now decode the content
+        lines = content.decode("utf-8").strip().split("\n")
+
+        # Parse each line and flatten the results
+        data = []
+        for line in lines:
+            try:
+                entry = json.loads(line)
+                # Parse the results string if it exists
+                if "results" in entry:
+                    results = ast.literal_eval(entry["results"])
+                    del entry["results"]
+                    if isinstance(results, dict):
+                        for key, value in results.items():
+                            if isinstance(value, dict):
+                                for subkey, subvalue in value.items():
+                                    entry[f"{key}_{subkey}"] = subvalue
+                            else:
+                                entry[key] = value
+
+                data.append(entry)
+            except Exception as e:
+                log.info(f"Error processing line: {e}")
+                continue
+        return pd.DataFrame(data)
+
     def write_to_file(
         self,
         file: str | os.PathLike[str],

From b58336c6c522c211c6de115ccfc81d498c77cd45 Mon Sep 17 00:00:00 2001
From: Sean Smith <sean.smith@contextual.ai>
Date: Fri, 28 Feb 2025 13:10:01 -0900
Subject: [PATCH 2/5] add pandas to requirements

Signed-off-by: Sean Smith <sean.smith@contextual.ai>
---
 pyproject.toml              |  2 ++
 requirements-dev.lock       |  2 ++
 requirements.lock           |  2 ++
 src/contextual/_response.py | 14 +++++++-------
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cd4ac761..f3e2ac6c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
+    "pandas>=2.1.0, <3",
 ]
 requires-python = ">= 3.8"
 classifiers = [
@@ -55,6 +56,7 @@ dev-dependencies = [
     "importlib-metadata>=6.7.0",
     "rich>=13.7.1",
     "nest_asyncio==1.6.0",
+    "pandas",
 ]
 
 [tool.rye.scripts]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 83d02e00..b786c0c7 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -102,3 +102,5 @@ virtualenv==20.24.5
     # via nox
 zipp==3.17.0
     # via importlib-metadata
+pandas==2.1.0
+    # via contextual-client
diff --git a/requirements.lock b/requirements.lock
index bc4698e1..fea64e28 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -43,3 +43,5 @@ typing-extensions==4.12.2
     # via contextual-client
     # via pydantic
     # via pydantic-core
+pandas==2.1.0
+    # via contextual-client
\ No newline at end of file
diff --git a/src/contextual/_response.py b/src/contextual/_response.py
index 07b16642..e23fbead 100644
--- a/src/contextual/_response.py
+++ b/src/contextual/_response.py
@@ -24,8 +24,8 @@
 
 import anyio
 import httpx
-import pandas as pd
 import pydantic
+from pandas import DataFrame  # type: ignore[import]
 
 from ._types import NoneType
 from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base
@@ -482,13 +482,13 @@ class BinaryAPIResponse(APIResponse[bytes]):
     the API request, e.g. `.with_streaming_response.get_binary_response()`
     """
 
-    def to_dataframe(self) -> pd.DataFrame:
+    def to_dataframe(self) -> DataFrame:
         """Convert the response data to a pandas DataFrame.
 
         Note: This method requires the `pandas` library to be installed.
 
         Returns:
-            pd.DataFrame: Processed evaluation data
+            DataFrame: Processed evaluation data
         """
         # Read the binary content
         content = self.read()
@@ -506,18 +506,18 @@ def to_dataframe(self) -> pd.DataFrame:
                     results = ast.literal_eval(entry["results"])
                     del entry["results"]
                     if isinstance(results, dict):
-                        for key, value in results.items():
+                        for key, value in results.items():  # type: ignore
                             if isinstance(value, dict):
-                                for subkey, subvalue in value.items():
+                                for subkey, subvalue in value.items():  # type: ignore
                                     entry[f"{key}_{subkey}"] = subvalue
                             else:
                                 entry[key] = value
 
-                data.append(entry)
+                data.append(entry)  # type: ignore
             except Exception as e:
                 log.info(f"Error processing line: {e}")
                 continue
-        return pd.DataFrame(data)
+        return DataFrame(data)
 
     def write_to_file(
         self,

From 9cc1b516c681d51fa96504bbb77d5be09b0fb385 Mon Sep 17 00:00:00 2001
From: Sean Smith <sean.smith@contextual.ai>
Date: Fri, 28 Feb 2025 13:37:32 -0900
Subject: [PATCH 3/5] Fix numpy version discrepancy

Signed-off-by: Sean Smith <sean.smith@contextual.ai>
---
 pyproject.toml        | 6 ++++--
 requirements-dev.lock | 4 +++-
 requirements.lock     | 4 +++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f3e2ac6c..c0aa3fa2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,8 @@ dependencies = [
     "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
-    "pandas>=2.1.0, <3",
+    "pandas==2.2.3",
+    "numpy==2.0.2",
 ]
 requires-python = ">= 3.8"
 classifiers = [
@@ -56,7 +57,8 @@ dev-dependencies = [
     "importlib-metadata>=6.7.0",
     "rich>=13.7.1",
     "nest_asyncio==1.6.0",
-    "pandas",
+    "pandas==2.2.3",
+    "numpy==2.0.2",
 ]
 
 [tool.rye.scripts]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index b786c0c7..19dcb392 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -102,5 +102,7 @@ virtualenv==20.24.5
     # via nox
 zipp==3.17.0
     # via importlib-metadata
-pandas==2.1.0
+pandas==2.2.3
     # via contextual-client
+numpy==2.0.2
+    # via contextual-client
\ No newline at end of file
diff --git a/requirements.lock b/requirements.lock
index fea64e28..3b833e41 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -43,5 +43,7 @@ typing-extensions==4.12.2
     # via contextual-client
     # via pydantic
     # via pydantic-core
-pandas==2.1.0
+pandas==2.2.3
+    # via contextual-client
+numpy==2.0.2
     # via contextual-client
\ No newline at end of file

From c8684a0dffe99f2108ec6fbae35f15b185e2395d Mon Sep 17 00:00:00 2001
From: Sean Smith <sean.smith@contextual.ai>
Date: Mon, 3 Mar 2025 17:16:24 -0800
Subject: [PATCH 4/5] added tests

Signed-off-by: Sean Smith <sean.smith@contextual.ai>
---
 tests/test_response.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_response.py b/tests/test_response.py
index cedd75ba..d4cb409f 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -73,6 +73,24 @@ def test_response_parse_mismatched_basemodel(client: ContextualAI) -> None:
         response.parse(to=PydanticModel)
 
 
+def test_response_binary_response_to_dataframe(client: ContextualAI) -> None:
+    response = BinaryAPIResponse(
+        raw=httpx.Response(
+            200,
+            content=b'{"prompt": "What was Apple\'s total net sales for 2022?", "reference": "...", "response": "...", "guideline": "", "knowledge": "[]", "results": "{\'equivalence_score\': {\'score\': 0.0, \'metadata\': \\"The generated response does not provide any information about Apple\'s total net sales for 2022, whereas the reference response provides the specific figure.\\"}, \'factuality_v4.5_score\': {\'score\': 0.0, \'metadata\': {\'description\': \'There are claims but no knowledge so response is ungrounded.\'}}}", "status": "completed"}\r\n',
+        ),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=bytes,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+    df = response.to_dataframe()
+    assert df.shape == (1, 10)
+    assert df["prompt"].astype(str).iloc[0] == "What was Apple's total net sales for 2022?"  # type: ignore
+    assert df["equivalence_score_score"].astype(float).iloc[0] == 0.0  # type: ignore
+
+
 @pytest.mark.asyncio
 async def test_async_response_parse_mismatched_basemodel(async_client: AsyncContextualAI) -> None:
     response = AsyncAPIResponse(

From b2f6efeeef08a211928e8b98c334a624fe628b89 Mon Sep 17 00:00:00 2001
From: Sean Smith <sean.smith@contextual.ai>
Date: Mon, 10 Mar 2025 16:00:56 -0800
Subject: [PATCH 5/5] Changed logic od DataFrame parsing

Signed-off-by: Sean Smith <sean.smith@contextual.ai>
---
 src/contextual/_response.py | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/contextual/_response.py b/src/contextual/_response.py
index e23fbead..6d4a7bb1 100644
--- a/src/contextual/_response.py
+++ b/src/contextual/_response.py
@@ -501,21 +501,39 @@ def to_dataframe(self) -> DataFrame:
         for line in lines:
             try:
                 entry = json.loads(line)
-                # Parse the results string if it exists
-                if "results" in entry:
-                    results = ast.literal_eval(entry["results"])
-                    del entry["results"]
+                # Parse the results field directly from JSON
+                if 'results' in entry:
+                    if isinstance(entry['results'], str):
+                        # Try to handle string representations that are valid JSON
+                        try:
+                            results = json.loads(entry['results'])
+                        except Exception as e:
+                            # If not valid JSON, fall back to safer processing
+                            results = ast.literal_eval(entry['results'])
+                    else:
+                        # Already a dictionary
+                        results = entry['results']
+
+                    # Remove the original results field
+                    del entry['results']
+                    # Flatten the nested dictionary structure
                     if isinstance(results, dict):
                         for key, value in results.items():  # type: ignore
                             if isinstance(value, dict):
                                 for subkey, subvalue in value.items():  # type: ignore
-                                    entry[f"{key}_{subkey}"] = subvalue
+                                    if isinstance(subvalue, dict):
+                                        # Handle one more level of nesting
+                                        for subsubkey, subsubvalue in subvalue.items():  # type: ignore
+                                            entry[f'{key}_{subkey}_{subsubkey}'] = subsubvalue
+                                    else:
+                                        entry[f'{key}_{subkey}'] = subvalue
                             else:
                                 entry[key] = value
 
-                data.append(entry)  # type: ignore
+                data.append(entry) # type: ignore
             except Exception as e:
-                log.info(f"Error processing line: {e}")
+                log.error(f"Error processing line: {e}")
+                log.error(f"Problematic line: {line[:200]}...")  # Print first 200 chars of the line
                 continue
         return DataFrame(data)