|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import os |
| 4 | +import ast |
| 5 | +import json |
4 | 6 | import inspect |
5 | 7 | import logging |
6 | 8 | import datetime |
|
23 | 25 | import anyio |
24 | 26 | import httpx |
25 | 27 | import pydantic |
| 28 | +from pandas import DataFrame # type: ignore[import] |
26 | 29 |
|
27 | 30 | from ._types import NoneType |
28 | 31 | from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base |
@@ -479,6 +482,61 @@ class BinaryAPIResponse(APIResponse[bytes]): |
479 | 482 | the API request, e.g. `.with_streaming_response.get_binary_response()` |
480 | 483 | """ |
481 | 484 |
|
| 485 | + def to_dataframe(self) -> DataFrame: |
| 486 | + """Convert the response data to a pandas DataFrame. |
| 487 | +
|
| 488 | + Note: This method requires the `pandas` library to be installed. |
| 489 | +
|
| 490 | + Returns: |
| 491 | + DataFrame: Processed evaluation data |
| 492 | + """ |
| 493 | + # Read the binary content |
| 494 | + content = self.read() |
| 495 | + |
| 496 | + # Now decode the content |
| 497 | + lines = content.decode("utf-8").strip().split("\n") |
| 498 | + |
| 499 | + # Parse each line and flatten the results |
| 500 | + data = [] |
| 501 | + for line in lines: |
| 502 | + try: |
| 503 | + entry = json.loads(line) |
| 504 | + # Parse the results field directly from JSON |
| 505 | + if 'results' in entry: |
| 506 | + if isinstance(entry['results'], str): |
| 507 | + # Try to handle string representations that are valid JSON |
| 508 | + try: |
| 509 | + results = json.loads(entry['results']) |
| 510 | + except Exception as e: |
| 511 | + # If not valid JSON, fall back to safer processing |
| 512 | + results = ast.literal_eval(entry['results']) |
| 513 | + else: |
| 514 | + # Already a dictionary |
| 515 | + results = entry['results'] |
| 516 | + |
| 517 | + # Remove the original results field |
| 518 | + del entry['results'] |
| 519 | + # Flatten the nested dictionary structure |
| 520 | + if isinstance(results, dict): |
| 521 | + for key, value in results.items(): # type: ignore |
| 522 | + if isinstance(value, dict): |
| 523 | + for subkey, subvalue in value.items(): # type: ignore |
| 524 | + if isinstance(subvalue, dict): |
| 525 | + # Handle one more level of nesting |
| 526 | + for subsubkey, subsubvalue in subvalue.items(): # type: ignore |
| 527 | + entry[f'{key}_{subkey}_{subsubkey}'] = subsubvalue |
| 528 | + else: |
| 529 | + entry[f'{key}_{subkey}'] = subvalue |
| 530 | + else: |
| 531 | + entry[key] = value |
| 532 | + |
| 533 | + data.append(entry) # type: ignore |
| 534 | + except Exception as e: |
| 535 | + log.error(f"Error processing line: {e}") |
| 536 | + log.error(f"Problematic line: {line[:200]}...") # Print first 200 chars of the line |
| 537 | + continue |
| 538 | + return DataFrame(data) |
| 539 | + |
482 | 540 | def write_to_file( |
483 | 541 | self, |
484 | 542 | file: str | os.PathLike[str], |
|
0 commit comments