-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathLNC_tools_v1.py
More file actions
99 lines (77 loc) · 3.47 KB
/
LNC_tools_v1.py
File metadata and controls
99 lines (77 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def lnc_reader(filepath):
#---------------------------------------------------------------------------
#This program opens an ASCII file generated by the LNC program and outputs a
#Pandas timeseries dataframe with the data and relevant metadata
#Note: Input file must be either a 1-D temporal average or a
#"Single Table with Time in the X Axis vs Altitude in the Y Axis"
#NOT A "Series of consecutively listed profiles"
#---------------------------------------------------------------------------
import pandas as pan
import numpy as np
import re
import csv
from dateutil.parser import parse
product = []
#generate a new filename with _proc attached to the end and open both files
[fname,fext] = filepath.split('.')
print 'Processing '+fname
fout_name = fname+'_proc'
fout_name = fout_name+'.'+fext
fin = open(filepath, 'rb')
fout = open(fout_name, 'w')
#copy data table to new *_proc file line by line and replace all spaces
#between data columns with a comma, empty lines have len(2) & are skipped
#first line is the product designation, preserved as metadata
bigspace = re.compile('\s\s\s+')
for line in fin:
if not product:
product.append(line)
elif len(line) == 2:
continue
else:
line = bigspace.sub(',',line)+'\n'
fout.write(line)
#close both files
fin.close()
fout.close()
#use csv.reader to read processed file into a list of lists
temp = []
for row in csv.reader(open(fout_name,'rb'), delimiter=','):
temp.append(row)
#convert to numpy array and transpose list to put datetime entries in first
#column, which will facilitate conversion to pandas timeseries dataframe
temparray = np.array(temp).T
#generate pandas dataframe index by parsing strings into datetime objects
#note: first entry is the word 'Altitude', last entry is an empty space
indexdat = []
for i in temparray[1:-1,0]: indexdat.append(parse(i))
index = pan.Index(indexdat,name = 'Date Time')
#generate column headers from altitudes (not including the word 'Altitude'
coldat = np.array(temparray[0,1:],dtype='float')
columns = pan.Index(coldat,name = temparray[0,0])
#data for dataframe consists of remaining rows and columns
data = temparray[1:-1,1:]
#check data for flags indicating bad results and substitute with NaN
flags = ['-1.#INF','1.#INF','-1.#IND','1.#IND']
clean_data = np.copy(data)
for f in flags: clean_data[data == f] = 'NaN'
#convert data to pandas dataframe
df = pan.DataFrame(clean_data,index=index,columns=columns,dtype='float')
return df, product
def BR_mask(backscatter, data):
#this function takes a pandas timeseries dataframe representing a table of
#backscatter ratios and produces a masking dataframe with values of 0 where
#ratio is identically 1, and 1 elsewhere then applies it to data
print 'masking data'
mask = backscatter.applymap(lambda x: not x == 1)
masked_data = mask*data
return masked_data
if __name__=='__main__':
import pandas as pan
BR_filepath = 'C:\Users\User\Dropbox\UBC_03242010_BR1064.txt'
data_filepath = 'C:\Users\User\Dropbox\UBC_03242010_PR532.txt'
maskout, maskprod = lnc_reader(BR_filepath)
dataout, dataprod = lnc_reader(data_filepath)
dfmasked = BR_mask(maskout, dataout)
store = pan.HDFStore('testdat.h5')
store['dftest'] = dfmasked.T