|
9 | 9 |
|
10 | 10 | @router.post("/queue/import/linkedin") |
11 | 11 | def import_linkedin_csv() -> dict: |
12 | | - """POST /queue/import/linkedin: Import data from linkedin_sample.csv into the queue table.""" |
13 | | - csv_path = os.path.join(os.path.dirname(__file__), "../csv/linkedin/linkedin_sample.csv") |
| 12 | + """POST /queue/import/linkedin: Import data from linkedin.csv into the queue table, robust for large files.""" |
| 13 | + csv_path = os.path.join(os.path.dirname(__file__), "../csv/linkedin/linkedin.csv") |
14 | 14 | if not os.path.exists(csv_path): |
15 | | - raise HTTPException(status_code=404, detail="linkedin_sample.csv not found") |
| 15 | + raise HTTPException(status_code=404, detail="linkedin.csv not found") |
16 | 16 | try: |
17 | 17 | conn = get_db_connection_direct() |
18 | 18 | cursor = conn.cursor() |
19 | 19 | with open(csv_path, newline='', encoding='utf-8') as csvfile: |
20 | | - reader = csv.DictReader(row for row in csvfile if not row.startswith('Notes:')) |
| 20 | + # Find the header line dynamically |
| 21 | + header_line = None |
| 22 | + pre_data_lines = [] |
| 23 | + while True: |
| 24 | + pos = csvfile.tell() |
| 25 | + line = csvfile.readline() |
| 26 | + if not line: |
| 27 | + break |
| 28 | + if line.strip().startswith("First Name,Last Name,URL,Email Address,Company,Position,Connected On"): |
| 29 | + header_line = line.strip() |
| 30 | + break |
| 31 | + pre_data_lines.append(line) |
| 32 | + if not header_line: |
| 33 | + raise HTTPException(status_code=400, detail="CSV header not found.") |
| 34 | + # Use DictReader with the found header |
| 35 | + fieldnames = header_line.split(",") |
| 36 | + reader = csv.DictReader(csvfile, fieldnames=fieldnames) |
21 | 37 | now = int(time.time()) |
| 38 | + batch = [] |
| 39 | + batch_size = 500 |
| 40 | + first_row = None |
| 41 | + imported_count = 0 |
22 | 42 | for row in reader: |
23 | | - cursor.execute( |
24 | | - """ |
25 | | - INSERT INTO queue (first_name, last_name, url, email_address, company, position, connected_on, created, updated, hidden, collection) |
26 | | - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) |
27 | | - """, |
28 | | - [ |
29 | | - row.get('First Name'), |
30 | | - row.get('Last Name'), |
31 | | - row.get('URL'), |
32 | | - row.get('Email Address'), |
33 | | - row.get('Company'), |
34 | | - row.get('Position'), |
35 | | - row.get('Connected On'), |
36 | | - now, |
37 | | - now, |
38 | | - False, |
39 | | - 'prospects' |
40 | | - ] |
| 43 | + # Skip any rows that are just blank or not data |
| 44 | + if not any(row.values()): |
| 45 | + continue |
| 46 | + if first_row is None: |
| 47 | + first_row = row.copy() |
| 48 | + print("DEBUG: First parsed row from CSV:", first_row) |
| 49 | + batch.append([ |
| 50 | + row.get('First Name'), # first_name |
| 51 | + row.get('Last Name'), # last_name |
| 52 | + row.get('URL'), # linkedin |
| 53 | + row.get('Email Address'), # email |
| 54 | + row.get('Company'), # company |
| 55 | + row.get('Position'), # position |
| 56 | + row.get('Connected On'), # connected_on |
| 57 | + now, # created |
| 58 | + now, # updated |
| 59 | + False, # hidden |
| 60 | + 'prospects', # collection |
| 61 | + 'linkedin' # group |
| 62 | + ]) |
| 63 | + imported_count += 1 |
| 64 | + if len(batch) >= batch_size: |
| 65 | + cursor.executemany( |
| 66 | + '''INSERT INTO queue (first_name, last_name, linkedin, email, company, position, connected_on, created, updated, hidden, collection, "group") |
| 67 | + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', |
| 68 | + batch |
| 69 | + ) |
| 70 | + batch = [] |
| 71 | + if batch: |
| 72 | + cursor.executemany( |
| 73 | + '''INSERT INTO queue (first_name, last_name, linkedin, email, company, position, connected_on, created, updated, hidden, collection, "group") |
| 74 | + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', |
| 75 | + batch |
41 | 76 | ) |
42 | 77 | conn.commit() |
43 | 78 | conn.close() |
44 | | - return {"meta": make_meta("success", "LinkedIn CSV imported")} |
| 79 | + return {"meta": make_meta("success", f"LinkedIn CSV imported (batched): {imported_count} records imported"), "imported": imported_count} |
45 | 80 | except Exception as e: |
46 | 81 | raise HTTPException(status_code=500, detail=str(e)) |
0 commit comments