-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
90 lines (64 loc) · 3.19 KB
/
models.py
File metadata and controls
90 lines (64 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""Module containing models representing catchment data.
The Model layer is responsible for the 'business logic' part of the software.
Catchment data is held in a Pandas dataframe (2D array) where each column contains
data for a single measurement site, and each row represents a single measurement
time across all sites.
"""
import pandas as pd
import numpy as np
def read_variable_from_csv(filename):
"""Reads a named variable from a CSV file, and returns a
pandas dataframe containing that variable. The CSV file must contain
a column of dates, a column of site ID's, and (one or more) columns
of data - only one of which will be read.
:param filename: Filename of CSV to load
:return: 2D array of given variable. Index will be dates,
Columns will be the individual sites
"""
dataset = pd.read_csv(filename, usecols=['Date', 'Site', 'Rainfall (mm)'])
dataset = dataset.rename({'Date':'OldDate'}, axis='columns')
dataset['Date'] = [pd.to_datetime(x,dayfirst=True) for x in dataset['OldDate']]
dataset = dataset.drop('OldDate', axis='columns')
newdataset = pd.DataFrame(index=dataset['Date'].unique())
for site in dataset['Site'].unique():
newdataset[site] = dataset[dataset['Site'] == site].set_index('Date')["Rainfall (mm)"]
newdataset = newdataset.sort_index()
return newdataset
def read_variable_from_xml(filename):
"""Reads a named variable from a XML file, and returns a
pandas dataframe containing that variable. The XML file must contain
a column of dates, a column of site ID's, and (one or more) columns
of data - only one of which will be read.
:param filename: Filename of XML to load
:return: 2D array of given variable. Index will be dates,
Columns will be the individual sites
"""
dataset = pd.read_xml(filename)
dataset = dataset.rename({'Date':'OldDate', 'Site_Name':'Site Name', 'Rainfall_mm':'Rainfall (mm)'}, axis='columns')
dataset['Date'] = [pd.to_datetime(x,dayfirst=True) for x in dataset['OldDate']]
dataset = dataset.drop('OldDate', axis='columns')
newdataset = pd.DataFrame(index=dataset['Date'].unique())
for site in dataset['Site'].unique():
newdataset[site] = dataset[dataset['Site'] == site].set_index('Date')["Rainfall (mm)"]
newdataset = newdataset.sort_index()
return newdataset
def daily_total(data):
"""Calculate the daily total of a 2D data array.
Index must be np.datetime64 compatible format."""
return data.groupby(data.index.date).sum()
def daily_mean(data):
"""Calculate the daily mean of a 2D data array.
Index must be np.datetime64 compatible format."""
return data.groupby(data.index.date).mean()
def daily_max(data):
"""Calculate the daily max of a 2D data array.
Index must be np.datetime64 compatible format."""
return data.groupby(data.index.date).max()
def daily_min(data):
"""Calculate the daily min of a 2D data array.
Index must be np.datetime64 compatible format."""
return data.groupby(data.index.date).min()
def data_normalise(data):
"""Normalise any given 2D data array."""
normal_max = np.array(np.max(data, axis=0))
return data / normal_max[np.newaxis, :]