-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_ai_summary.py
More file actions
119 lines (102 loc) · 5.15 KB
/
test_ai_summary.py
File metadata and controls
119 lines (102 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/backend')
import sqlite3
import json
from pathlib import Path
from backend.github_cache import github_cache
from datetime import datetime
def clean_ai_fields(paper):
"""Clean AI-related fields, converting empty strings to None for better frontend handling"""
ai_fields = [
'task_reasoning', 'github_reasoning', 'ai_summary',
'ai_key_contributions', 'ai_methodology', 'ai_significance'
]
try:
for field in ai_fields:
if field in paper:
value = paper[field]
# Convert empty strings to None, keep None as is
if value == '' or value == '[]':
paper[field] = None
elif field == 'ai_key_contributions' and isinstance(value, str):
# Handle JSON string for key contributions
try:
parsed = json.loads(value)
if not parsed or parsed == []:
paper[field] = None
else:
paper[field] = parsed
except (json.JSONDecodeError, TypeError):
# If it's not valid JSON, treat as a simple string
if value.strip() == '':
paper[field] = None
else:
# Keep as string if it contains meaningful content
paper[field] = value.strip()
elif field == 'ai_summary' and isinstance(value, str) and value.startswith('```json'):
# Handle markdown-wrapped JSON in summary
try:
print(f"DEBUG: Processing markdown JSON for {field}")
print(f"DEBUG: Value starts with: {value[:100]}")
# Extract JSON from markdown code block
json_start = value.find('{')
json_end = value.rfind('}') + 1
print(f"DEBUG: json_start={json_start}, json_end={json_end}")
if json_start != -1 and json_end > json_start:
json_content = value[json_start:json_end]
print(f"DEBUG: Extracted JSON: {json_content[:200]}...")
parsed = json.loads(json_content)
if isinstance(parsed, dict) and 'summary' in parsed:
print(f"DEBUG: Found summary in parsed JSON")
paper[field] = parsed['summary']
else:
print(f"DEBUG: No summary field found, keeping original")
paper[field] = value.strip()
else:
paper[field] = value.strip()
except (json.JSONDecodeError, TypeError) as e:
print(f"DEBUG: JSON parsing failed: {e}")
paper[field] = value.strip()
# Clean confidence scores - convert 0.0 to None when there's no actual AI data
if paper.get('task_confidence') == 0.0 and not paper.get('is_ai_classified'):
paper['task_confidence'] = None
if paper.get('github_confidence') == 0.0 and not paper.get('is_github_ai_matched'):
paper['github_confidence'] = None
if paper.get('summary_confidence') == 0.0 and not paper.get('is_ai_summarized'):
paper['summary_confidence'] = None
except Exception as e:
# If cleaning fails, log error but don't break the response
print(f"Warning: Error cleaning AI fields for paper {paper.get('id', 'unknown')}: {e}")
return paper
def test_paper(paper_id: str):
"""Test the AI summary for a specific paper"""
DB_PATH = Path(__file__).resolve().parent / "backend" / "paperswithcode.db"
conn = sqlite3.connect(DB_PATH, timeout=30.0, check_same_thread=False)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
print(f"Testing paper {paper_id}")
cursor.execute("SELECT * FROM papers WHERE id = ?", (paper_id,))
paper_row = cursor.fetchone()
if not paper_row:
print("Paper not found")
return
paper = dict(paper_row)
print(f"Raw data:")
print(f" ai_summary: {repr(paper.get('ai_summary', 'N/A'))}")
print(f" summary_confidence: {paper.get('summary_confidence', 'N/A')}")
print(f" is_ai_summarized: {paper.get('is_ai_summarized', 'N/A')}")
# Clean the fields
paper = clean_ai_fields(paper)
print(f"After cleaning:")
print(f" ai_summary: {repr(paper.get('ai_summary', 'N/A'))}")
print(f" summary_confidence: {paper.get('summary_confidence', 'N/A')}")
print(f" is_ai_summarized: {paper.get('is_ai_summarized', 'N/A')}")
conn.close()
if __name__ == "__main__":
# Test paper without AI summary
print("=== Testing paper without AI summary ===")
test_paper('c1abeab1e1f57af7')
print("\n=== Testing paper with AI summary ===")
test_paper('8fc1f0031aa7853e')