-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate_bios_json.py
More file actions
200 lines (184 loc) · 7.99 KB
/
generate_bios_json.py
File metadata and controls
200 lines (184 loc) · 7.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python3
"""
Generates JSON blobs of bios.
The bios CSV should contain at least the following columns (? means optional):
email | name | role | course | preferred name? | pronouns | photo url | bio | website url?
The single roster CSV should contain the following columns in order, with no header row:
name | email | role | course
The exec CSV should contain the following columns in order, again with no header row:
name | email | role
It is recommended to first do some preprocessing within Google Sheets to obtain
the desired columns or filter any unwanted entries.
"""
import csv
import json
CURR_SEMESTER = "sp23" # CHANGE ME
BIOS_PATH = "./csvs/bios.csv"
ROSTER_PATH = "./csvs/roster.csv"
DEST_PATH = "./src/data/bios/mentors.json"
EXEC_ROLE_PATH = "./csvs/exec_roles.csv"
class Cols:
"""
The headers of each column as they appear in the CSV.
"""
EMAIL = "Berkeley Email"
NAME = "Name"
ROLE = "For which position are you accepting/rejecting?"
COURSE = "COURSE"
PRONOUNS = "Pronouns"
PREF_NAME = "Preferred Name"
IMG_URL = "Photo"
BIO = "Biography"
WEB_URL = "(Optional) Website"
# This string in the course means we should skip them and move on with life
NORMALIZED_REJECTIONS = {
"iamrejectingallpositionsthatididnotexplicitlyaccept",
"iamrejectingallampositionsthatididnotexplicitlyaccept",
"iamrejectingallcmpositionsthatididnotexplicitlyaccept"
}
# global variables lmao
exec_bios = {} # Written into src/data/bios/exec.json
exec_roles = {} # Written into src/data/team/[SEMESTER].json
def parse_bios(csv_path, master_roster_path):
"""
Reads bios from the given CSV, returning a dictionary of data keyed by emails.
"""
# Start by keying on email without periods so we can find duplicates easily
people_by_email = {}
with open(EXEC_ROLE_PATH) as f:
reader = csv.reader(f)
for name, email, role in reader:
email_no_dot = email.replace(".", "").lower().strip()
# We'll assume nobody is in multiple exec roles
exec_roles[email_no_dot] = {
"name": name,
"imgUrl": "",
"position": role
}
exec_bios[email_no_dot] = {
"name": name,
"role": role,
"imgUrl": ""
}
with open(master_roster_path) as f:
reader = csv.reader(f)
for row in reader:
name, email, role, preproc_course = row
course = preproc_course.lower().replace(" ", "")
email_no_dot = email.replace(".", "").lower().strip()
if not role:
print(f"=== WARNING: EMPTY ROLE IN MASTER ROSTER FOR {email.strip()} ===")
if not course:
if email_no_dot not in exec_bios:
print(f"=== WARNING: EMPTY COURSE IN MASTER ROSTER FOR {email.strip()} AS {role} ===")
continue # skip exec because they're already in exec roster
if role.lower() == "coordinator":
continue # also skip coords because they're already in the exec roster
if email_no_dot not in people_by_email:
people_by_email[email_no_dot] = {
"name": name,
"courses": {course: role},
}
else:
obj = people_by_email[email_no_dot]
obj["name"] = name
obj["courses"][course] = role
with open(csv_path) as f:
reader = csv.DictReader(f)
for row in reader:
email = row[Cols.EMAIL]
email_no_dot = email.replace(".", "").lower().strip()
pref_name = row[Cols.PREF_NAME]
use_pref_name = pref_name and not pref_name.isspace()
name = row[Cols.NAME] if not use_pref_name else pref_name
photo_url = row[Cols.IMG_URL]
bio = row[Cols.BIO]
course = row[Cols.COURSE].lower().replace(" ", "").strip()
role = row[Cols.ROLE]
pronouns = row[Cols.PRONOUNS]
web_url = row[Cols.WEB_URL]
def update(email_no_dot):
# Assume the latest version of the bio is correct
obj = people_by_email[email_no_dot]
if use_pref_name:
obj["name"] = name
if pronouns and not pronouns.isspace():
obj["pronouns"] = pronouns
if course and not course.isspace():
if "courses" not in obj:
obj["courses"] = {}
obj["courses"][course] = role
if photo_url and not photo_url.isspace():
obj["imgUrl"] = photo_url
if bio and not bio.isspace():
obj["details"] = bio
if web_url and not web_url.isspace():
obj["webUrl"] = web_url
if course in NORMALIZED_REJECTIONS:
pass
elif role == "Exec" or email_no_dot in exec_bios:
# print(f"\t{name} for exec")
exec_roles[email_no_dot]["imgUrl"] = photo_url
exec_roles[email_no_dot]["pronouns"] = pronouns
exec_bios[email_no_dot]["imgUrl"] = photo_url
exec_bios[email_no_dot]["pronouns"] = pronouns
exec_bios[email_no_dot]["details"] = bio
exec_bios[email_no_dot]["webUrl"] = web_url
if email_no_dot in people_by_email:
update(email_no_dot)
# else:
# print(f"=== SKIPPING EXEC {name} ===")
else:
# print(f"\t{name} for {course}")
if email_no_dot not in people_by_email:
people_by_email[email_no_dot] = {
"name": name,
"pronouns": pronouns,
"details": bio,
"imgUrl": photo_url,
"webUrl": web_url,
}
if not course or course.isspace():
print(f"=== NO COURSE FOUND FOR {name} ===")
else:
people_by_email[email_no_dot]["courses"] = {course: role}
else:
update(email_no_dot)
# # 61B is doing its own form so I'm just hacking in a snippet here
# with open("csvs/bios-61b.csv") as f:
# reader = csv.DictReader(f)
# for row in reader:
# email = row["Email Address"]
# email_no_dot = email.replace(".", "").lower().strip()
# name = row["Preferred Name"]
# photo_url = row["Photo"]
# bio = row["Biography"]
# # hardcode coords I guess
# if name in ["Samantha Adams", "Ryan Nuqui"]:
# exec_roles[email_no_dot]["imgUrl"] = photo_url
# exec_bios[email_no_dot]["imgUrl"] = photo_url
# exec_bios[email_no_dot]["details"] = bio
# elif email_no_dot not in people_by_email:
# print(f"=== NO ROLE WAS FOUND FOR 61B MENTOR {name}, SKIPPING FOR NOW ===")
# else:
# obj = people_by_email[email_no_dot]
# obj["name"] = name
# obj["imgUrl"] = photo_url
# obj["details"] = bio
# filter exec from people_by_email
for email, bio in people_by_email.items():
if "exec" in bio["courses"]:
del bio["courses"]["exec"]
return people_by_email
if __name__ == '__main__':
print("Parsing bios...")
people_by_email = parse_bios(BIOS_PATH, ROSTER_PATH)
print("Dumping jsons...")
# Write mentor bios
with open(DEST_PATH, "w") as outfile:
json.dump(list(people_by_email.values()), outfile, indent=4)
with open(f"src/data/team/{CURR_SEMESTER}.json", "w") as exec_file:
json.dump(list(exec_roles.values()), exec_file, indent=4)
with open(f"src/data/bios/exec.json", "w") as exec_bio:
json.dump(list(exec_bios.values()), exec_bio, indent=4)
print("Done!")