-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_real_hr_emails.py
More file actions
93 lines (78 loc) · 2.89 KB
/
process_real_hr_emails.py
File metadata and controls
93 lines (78 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python3
"""
Process the real HR emails CSV and create a clean email list
"""
import csv
import re
def extract_company_from_email(email):
"""Extract company name from email domain"""
domain = email.split('@')[1].split('.')[0]
# Map common domains to company names
company_map = {
'ibm': 'IBM',
'wipro': 'Wipro',
'infosys': 'Infosys',
'oracle': 'Oracle',
'cisco': 'Cisco',
'siemens': 'Siemens',
'hp': 'HP',
'intel': 'Intel',
'ericssson': 'Ericsson',
'nokia': 'Nokia',
'lucent': 'Lucent',
'sap': 'SAP',
'ge': 'General Electric',
'honeywell': 'Honeywell',
'philips': 'Philips',
'igate': 'iGATE',
'cgi': 'CGI',
'wipro': 'Wipro',
'tcs': 'TCS',
'accenture': 'Accenture'
}
return company_map.get(domain.lower(), domain.title())
def extract_name_from_email(email):
"""Extract name from email address"""
local_part = email.split('@')[0]
# Remove common prefixes/suffixes
name = local_part.replace('.', ' ').replace('_', ' ').replace('-', ' ')
# Split into parts
parts = name.split()
if len(parts) >= 2:
first_name = parts[0].title()
last_name = parts[1].title()
else:
first_name = parts[0].title()
last_name = "HR"
return first_name, last_name
def process_real_hr_emails():
"""Process the real HR emails CSV"""
# Read the real HR emails
hr_contacts = []
with open('/Users/tarandeepsinghjuneja/Downloads/hr_emails_full.csv', 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
email = row.get('Email', '').strip()
if email and '@' in email:
first_name, last_name = extract_name_from_email(email)
company = extract_company_from_email(email)
hr_contacts.append({
'email': email,
'first_name': first_name,
'last_name': last_name,
'company': company,
'job_title': 'HR Professional',
'location': 'India'
})
# Write clean CSV
with open('/Users/tarandeepsinghjuneja/email/hr_contacts_real.csv', 'w', newline='', encoding='utf-8') as f:
fieldnames = ['email', 'first_name', 'last_name', 'company', 'job_title', 'location']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(hr_contacts)
print(f"Processed {len(hr_contacts)} real HR contacts")
print("Sample contacts:")
for i, contact in enumerate(hr_contacts[:10]):
print(f" {i+1}. {contact['first_name']} {contact['last_name']} - {contact['email']} - {contact['company']}")
if __name__ == "__main__":
process_real_hr_emails()