-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathWeek04.03DeepLearning.07RegressionWithDeepLearning.R
More file actions
84 lines (66 loc) · 3.12 KB
/
Week04.03DeepLearning.07RegressionWithDeepLearning.R
File metadata and controls
84 lines (66 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
library(h2o)
h2o.init(nthreads = -1)
data <- h2o.importFile("http://h2o-public-test-data.s3.amazonaws.com/smalldata/airlines/allyears2k_headers.zip")
parts <- h2o.splitFrame(data, c(0.8,0.1), seed = 69)
train <- parts[[1]]; nrow(train) ## 35255
valid <- parts[[2]]; nrow(valid) ## 4272
test <- parts[[3]]; nrow(test) ## 4451
y <- "ArrDelay"
xWithDep <- setdiff(colnames(data), c(
"ArrDelay", "IsArrDelayed",
"ActualElapsedTime", # But CRSElapsedTime is fine
"ArrTime", ## But CRSArrTime is fine
"TailNum" ## High cardinality, (presumed) low information
)
)
system.time( ## 17 to 18s
m_DLR_def <- h2o.deeplearning(xWithDep, y, train,
validation_frame = valid,
model_id = "DLR_def",
variable_importances = TRUE
)
)
h2o.performance(m_DLR_def, valid = TRUE)
plot(m_DLR_def)
h2o.varimp(m_DLR_def)
h2o.varimp_plot(m_DLR_def, 30)
system.time( ## 17 to 18s
m_DLR_200_epochs <- h2o.deeplearning(xWithDep, y, train,
validation_frame = valid,
model_id = "DLR_def",
variable_importances = TRUE,
epochs = 200,
stopping_rounds=5,
stopping_tolerance=0.0,
stopping_metric="deviance"
)
)
h2o.performance(m_DLR_200_epochs, valid = TRUE)
plot(m_DLR_200_epochs)
h2o.varimp(m_DLR_200_epochs)
h2o.varimp_plot(m_DLR_200_epochs, 30)
h2o.scoreHistory(m_DLR_200_epochs)
h2o.hist(train$ArrDelay) ## looks like a laplace distribution
system.time( ## 17 to 18s
m_DLR_laplace <- h2o.deeplearning(xWithDep, y, train,
validation_frame = valid,
model_id = "DLR_def",
variable_importances = TRUE,
epochs = 200,
stopping_rounds=5,
stopping_tolerance=0.0,
stopping_metric="deviance",
distribution = "laplace"
)
)
h2o.performance(m_DLR_laplace, valid = TRUE)
plot(m_DLR_laplace)
allModels <- c(m_DLR_def, m_DLR_200_epochs, m_DLR_laplace)
mae <- signif(sapply(allModels, function(m) {
h2o.mae(m, valid = TRUE)
}), 5)
cat(" defaults:", mae[1], "\n 200 epochs:", mae[2], "\n laplace:", mae[3], "\n")
rmse <- signif(sapply(allModels, function(m) {
h2o.rmse(m, valid = TRUE)
}), 5)
cat(" defaults:", rmse[1], "\n 200 epochs:", rmse[2], "\n laplace:", rmse[3], "\n")