-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathprocess_csv_openai.py
More file actions
126 lines (107 loc) · 5.08 KB
/
process_csv_openai.py
File metadata and controls
126 lines (107 loc) · 5.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import csv
import sys
import json
import re
from openai import OpenAI
def process_csv_with_openai(csv_file_path, output_file_path='output.json'):
"""
Load a CSV file and process each row through OpenAI API.
Args:
csv_file_path: Path to the CSV file to process
output_file_path: Path to save the output JSON file
"""
client = OpenAI(api_key="YOUR OPENAI_API_KEY")
results = []
# Read the CSV file
with open(csv_file_path, 'r', encoding='utf-8') as file:
csv_reader = csv.DictReader(file)
# Process each row
for row_number, row in enumerate(csv_reader, start=1):
print(f"Processing row {row_number}...")
# Extract guest and gift from the CSV row
guest = row.get('guest', '')
print(f"Guest: {guest}")
gift = row.get('gift', '')
print(f"Gift: {gift}")
try:
response = client.responses.create(
prompt={
"id": "YOUR_PROMPT_ID",
"version": "3",
"variables": {
"guest": guest,
"gift": gift
}
}
)
# Parse the response and extract text content
response_data = response.model_dump() if hasattr(response, 'model_dump') else response
# Extract text from the response output
extracted_text = None
if isinstance(response_data, dict):
output = response_data.get('output', [])
if isinstance(output, list) and len(output) > 0:
content = output[0].get('content', [])
if isinstance(content, list) and len(content) > 0:
for item in content:
if isinstance(item, dict) and item.get('type') == 'output_text':
extracted_text = item.get('text')
break
# Parse the JSON from the response text
parsed_data = None
if extracted_text:
# Remove markdown code block markers if present
cleaned_text = re.sub(r'^```json\s*\n?', '', extracted_text.strip())
cleaned_text = re.sub(r'\n?```\s*$', '', cleaned_text)
try:
# Try to parse as JSON
parsed_json = json.loads(cleaned_text)
# Handle both single object and array formats
if isinstance(parsed_json, list) and len(parsed_json) > 0:
parsed_data = parsed_json[0]
elif isinstance(parsed_json, dict):
parsed_data = parsed_json
except json.JSONDecodeError as e:
print(f"Warning: Could not parse JSON from response: {e}")
parsed_data = None
# Store the result with parsed fields
if parsed_data and isinstance(parsed_data, dict):
result_entry = {
"guest": guest,
"gift": gift,
"reasoning": parsed_data.get('reasoning', ''),
"message": parsed_data.get('message', '')
}
else:
result_entry = {
"guest": guest,
"gift": gift,
"response_text": extracted_text
}
results.append(result_entry)
print(f"Row {row_number} processed successfully")
if result_entry.get('reasoning'):
print(f"Reasoning: {result_entry['reasoning'][:100]}...")
print(f"Message: {result_entry['message'][:100]}...")
print("-" * 80)
except Exception as e:
print(f"Error processing row {row_number}: {e}")
results.append({
"guest": guest,
"gift": gift,
"response_text": None,
"error": str(e)
})
continue
# Save results to JSON file
with open(output_file_path, 'w', encoding='utf-8') as output_file:
json.dump(results, output_file, indent=2, ensure_ascii=False)
print(f"\nProcessing complete! Results saved to {output_file_path}")
print(f"Total rows processed: {len(results)}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python process_csv_openai.py <path_to_csv_file> [output_file.json]")
sys.exit(1)
csv_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else 'output.json'
process_csv_with_openai(csv_file, output_file)