-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCalculateCountChanges.py
More file actions
84 lines (71 loc) · 3.24 KB
/
CalculateCountChanges.py
File metadata and controls
84 lines (71 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import unicodecsv as csv
# this function finds velocity for the 2nd column in the input file noted by this index
index = 1
monthInd = 3
yearInd = 2
inputFilename = ""
filetype = ".csv"
def findVelocity(row, previousCount, curVel):
newRow = row.copy()
if row[index].isdigit() is False or previousCount[index].isdigit() is False:
return row
elif row[index] == previousCount[index]:
newRow[index] = curVel
return row
else:
newRow[index] = str(int(row[index]) - int(previousCount[index]))
return newRow
def findVelocityRaw(row, previousCount):
newRow = row.copy()
if row[index].isdigit() is False or previousCount[index].isdigit() is False:
return newRow
elif row[index] == previousCount[index]:
return None
else:
newRow[index] = str(int(row[index]) - int(previousCount[index]))
return newRow
def calculateAllVelocities(inputFile):
outputFilename = inputFile + "_velocities"
with open(outputFilename + filetype, 'wb') as f:
w = csv.writer(f, delimiter=',', escapechar=' ', quoting=csv.QUOTE_MINIMAL)
with open(inputFile + filetype, "rb") as csvfile:
csvreader = csv.reader(csvfile, encoding='utf-8-sig')
previousRow = ['0','0','0','0','0']
rowStorage = []
curVel = 0
curRow = 0
for row in csvreader:
foundRow = findVelocity(row, previousRow, curVel)
if foundRow[index] == previousRow[index]: #missing data?
rowStorage.append(foundRow)
else:
curVel = int(foundRow[index])
if curRow == 0:
monthCount = 1
else:
monthCount = int(int(foundRow[monthInd]) - int(previousRow[monthInd]) + 12 * (int(foundRow[yearInd]) - int(previousRow[yearInd])))
previousRow = row.copy()
for adjustRow in rowStorage:
adjustRow[index] = str(curVel/monthCount)
w.writerow(adjustRow)
rowStorage = []
rowStorage.append(row)
curRow +=1
def calculateAllVelocitiesRaw(inputFile):
outputFilename = inputFile + "_velocitiesRaw"
with open(outputFilename + filetype, 'wb') as f:
w = csv.writer(f, delimiter=',', escapechar=' ', quoting=csv.QUOTE_MINIMAL)
with open(inputFile + filetype, "rb") as csvfile:
csvreader = csv.reader(csvfile, encoding='utf-8-sig')
previousRow = ['0','0','0','0','0']
for row in csvreader:
foundRow = findVelocityRaw(row, previousRow)
if foundRow is not None:
w.writerow(foundRow)
previousRow = row
# calculateAllVelocities("FFArchiveHarryPotter")
# calculateAllVelocitiesRaw("FFArchiveHarryPotter")
fandoms = ["Supernatural", "Avatar: Last Airbender", "Naruto", "Wrestling", "Twilight", "Star Wars", "Pokémon", "Pokemon", "NCIS", "Harry Potter", "My Little Pony"]
fandomFiles = ["FFArchiveNaruto_cleaned", "FFArchivePokémon_cleaned", "FFArchiveWrestling_cleaned", "FFArchiveMyLittlePony_cleaned"]
for fandom in fandomFiles:
calculateAllVelocities(fandom)