TransPAC_code/LNC_tools_v1.py at master · dashamstyr/TransPAC_code · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


def lnc_reader(filepath):
    #---------------------------------------------------------------------------
    #This program opens an ASCII file generated by the LNC program and outputs a
    #Pandas timeseries dataframe with the data and relevant metadata
    #Note: Input file must be either a 1-D temporal average or a
    #"Single Table with Time in the X Axis vs Altitude in the Y Axis"
    #NOT A "Series of consecutively listed profiles"
    #---------------------------------------------------------------------------
    import pandas as pan
    import numpy as np
    import re
    import csv
    from dateutil.parser import parse

    product = []

    #generate a new filename with _proc attached to the end and open both files
    [fname,fext] = filepath.split('.')
    print 'Processing '+fname
    fout_name = fname+'_proc'
    fout_name = fout_name+'.'+fext
    fin = open(filepath, 'rb')
    fout = open(fout_name, 'w')

    #copy data table to new *_proc file line by line and replace all spaces
    #between data columns with a comma, empty lines have len(2) & are skipped
    #first line is the product designation, preserved as metadata
    bigspace = re.compile('\s\s\s+')
    for line in fin:
        if not product:
            product.append(line)
        elif len(line) == 2:
            continue
        else:
            line = bigspace.sub(',',line)+'\n'
            fout.write(line)

    #close both files
    fin.close()
    fout.close()

    #use csv.reader to read processed file into a list of lists
    temp = []
    for row in csv.reader(open(fout_name,'rb'), delimiter=','):
        temp.append(row)

    #convert to numpy array and transpose list to put datetime entries in first
    #column, which will facilitate conversion to pandas timeseries dataframe
    temparray = np.array(temp).T

    #generate pandas dataframe index by parsing strings into datetime objects
    #note: first entry is the word 'Altitude', last entry is an empty space
    indexdat = []
    for i in temparray[1:-1,0]: indexdat.append(parse(i))
    index = pan.Index(indexdat,name = 'Date Time')
    #generate column headers from altitudes (not including the word 'Altitude'
    coldat = np.array(temparray[0,1:],dtype='float')
    columns = pan.Index(coldat,name = temparray[0,0])
    #data for dataframe consists of remaining rows and columns
    data = temparray[1:-1,1:]

    #check data for flags indicating bad results and substitute with NaN

    flags = ['-1.#INF','1.#INF','-1.#IND','1.#IND']

    clean_data = np.copy(data)

    for f in flags: clean_data[data == f] = 'NaN'

    #convert data to pandas dataframe
    df = pan.DataFrame(clean_data,index=index,columns=columns,dtype='float')

    return df, product


def BR_mask(backscatter, data):
    #this function takes a pandas timeseries dataframe representing a table of
    #backscatter ratios and produces a masking dataframe with values of 0 where
    #ratio is identically 1, and 1 elsewhere then applies it to data
    print 'masking data'
    mask = backscatter.applymap(lambda x: not x == 1)

    masked_data = mask*data

    return masked_data

if __name__=='__main__':
    import pandas as pan

    BR_filepath = 'C:\Users\User\Dropbox\UBC_03242010_BR1064.txt'
    data_filepath = 'C:\Users\User\Dropbox\UBC_03242010_PR532.txt'
    maskout, maskprod = lnc_reader(BR_filepath)
    dataout, dataprod = lnc_reader(data_filepath)
    dfmasked = BR_mask(maskout, dataout)
    store = pan.HDFStore('testdat.h5')
    store['dftest'] = dfmasked.T