-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathIMPROVE_tools_v1.py
More file actions
90 lines (70 loc) · 3.15 KB
/
IMPROVE_tools_v1.py
File metadata and controls
90 lines (70 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def improve_reader(filepath):
#---------------------------------------------------------------------------
#This program opens an ASCII file downloaded from IMPROVE and outputs a
#Pandas timeseries dataframe with the data and relevant metadata
#---------------------------------------------------------------------------
import pandas as pan
import numpy as np
print 'Processing '+filepath
df = pan.read_csv(filepath, index_col=[1,2], parse_dates = True)
#delete these two columns that aren't used iun future operations
del df['POC']
del df['Dataset']
#rename columns with simpler names that work better in graphing
oldcols = ['ALf:Value','CAf:Value','FEf:Value','MF:Value','Kf:Value',
'SIf:Value','TIf:Value']
newcols = ['Al','Ca','Fe','PM 2.5','K','Si','Ti']
colnames = dict(zip(oldcols,newcols))
df.rename(columns=colnames, inplace=True)
return df
def dayfilter(df, start, end):
#function that filters an IMPROVE dataframe by date and returns a new
#dataframe containing ony data collected within those dates
#assumes dataframe has heirarchical index ith dates on level 1
#and start and end must be datetime objects
import numpy as np
for row_name, row in df.iterrows():
timetag = row_name[1]
colnames = row.index
if start <= timetag <= end:
try:
tempdat = np.vstack([tempdat, row.values])
index.append(row_name)
except NameError:
tempdat = row.values
index = [row_name]
dfout = pan.DataFrame(data = tempdat, index = index, columns = colnames)
return dfout
if __name__=='__main__':
import os
import pandas as pan
from datetime import datetime
os.chdir('C:\Users\dashamstyr\Dropbox\TransPAC2010\IMPROVE Data')
files = os.listdir(os.getcwd())
for f in files:
[fname,fext] = f.split('.')
if fext == 'txt':
try:
df = pan.concat([df,improve_reader(f)])
except NameError:
df = improve_reader(f)
stat_dat = {'HACR1':(datetime(2010,03,24),datetime(2010,04,29)),
'DENA1':(datetime(2010,03,21),datetime(2010,04,29)),
'REDW1':(datetime(2010,03,24),datetime(2010,04,29)),
'SNPA1':(datetime(2010,03,24),datetime(2010,04,29)),
'GLAC1':(datetime(2010,03,24),datetime(2010,04,29)),
'BADL1':(datetime(2010,03,27),datetime(2010,04,29)),
'VOYA2':(datetime(2010,03,31),datetime(2010,04,29)),
'EGBE1':(datetime(2010,04,02),datetime(2010,04,29)),
'LYBR1':(datetime(2010,04,02),datetime(2010,05,02)),
'ACAD1':(datetime(2010,04,05),datetime(2010,05,05))}
grouped = df.groupby(level=0)
for group in grouped:
timerange = stat_dat[group[0]]
temp = dayfilter(group[1],timerange[0],timerange[1])
try:
df_filt = pan.concat([df_filt,temp])
except NameError:
df_filt = temp
df.save('IMPROVE_data_all.pickle')
df_filt.save('IMPROVE_data_April.pickle')