##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## /tmp/RtmpurvVPC/h2o_akshaj_started_from_r.out
## /tmp/RtmpurvVPC/h2o_akshaj_started_from_r.err
##
##
## Starting H2O JVM and connecting: . Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 20 milliseconds
## H2O cluster timezone: Asia/Kolkata
## H2O data parsing timezone: UTC
## H2O cluster version: 3.26.0.1
## H2O cluster version age: 1 month and 6 days
## H2O cluster name: H2O_started_from_R_akshaj_qdt621
## H2O cluster total nodes: 1
## H2O cluster total memory: 3.46 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 4
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## H2O API Extensions: Amazon S3, XGBoost, Algos, AutoML, Core V3, Core V4
## R Version: R version 3.6.1 (2019-07-05)
NN Train Function
hyperParams <- function(optim_method) {
if (!is.element(optim_method, c("gradientDescent"))) stop("Invalid Parameters.")
hidden_activation = "Tanh"
iter <- 10000
params <- paste0("method=", optim_method, "_", "hidden_activation=", hidden_activation)
out <- list(hidden_activation = hidden_activation, iter = iter, params = params)
return (out)
}
NNtrain <- function(x, y, train_data, hidden_neur, optim_method) {
hyper_params <- hyperParams(optim_method)
hidden_activation <- hyper_params$hidden_activation
iter <- hyper_params$iter
NNreg <- h2o::h2o.deeplearning(y = "y",
training_frame = train_data,
overwrite_with_best_model = TRUE,
standardize = FALSE,
activation = hidden_activation,
# adaptive_rate = TRUE,
rate = 0.01,
hidden = hidden_neur,
epochs = iter,
train_samples_per_iteration = -1,
initial_weight_distribution = "Normal",
initial_weight_scale = 0.1,
loss = "Quadratic",
distribution = "gaussian",
stopping_rounds = 500,
stopping_metric = "RMSE",
stopping_tolerance = 1e-5,
seed = as.integer(runif(1)*10000000),
verbose = FALSE
)
return (NNreg)
}
Main Loop
for (dset in names(NNdatasets)) {
##Â =============================================
##Â EXTRACT INFORMATION FROM THE SELECTED DATASET
##Â =============================================
ds <- NNdatasets[[dset]]$ds
Z <- NNdatasets[[dset]]$Z
neur <- NNdatasets[[dset]]$neur
nparNN <- NNdatasets[[dset]]$nparNN
fmlaNN <- NNdatasets[[dset]]$fmlaNN
donotremove <- c("dset", "dsets", "ds", "Z", "neur", "TF", "nrep", "timer",
"donotremove", "donotremove2")
donotremove2 <- c("dset", "dsets")
##Â ===================================================
## SELECT THE FORMAT REQUIRED BY THE PACKAGE/ALGORITHM
## d = data.frame, m = matrix, v = vector/numeric
##Â ATTACH THE OBJECTS CREATED (x, y, Zxy, ... )
## ===================================================
ZZ <- prepareZZ(Z, xdmv = "m", ydmv = "v", zdm = "d", scale = TRUE)
attach(ZZ)
##Â =============================================
##Â SELECT THE PACKAGE USED FOR TRAINING
## nrep => SELECT THE NUMBER OF INDEPENDANT RUNS
##Â iter => SELECT THE MAX NUMBER OF ITERATIONS
##Â TF => PLOT THE RESULTS
##Â =============================================
nrep <- 10
TF <- TRUE
h_Z <- h2o::as.h2o(Z)
method <- c("gradientDescent")
for (m in method) {
descr <- paste(dset, "h2o::h2o.deeplearning", m, sep = "_")
##Â AUTO
Ypred <- list()
Rmse <- numeric(length = nrep)
Mae <- numeric(length = nrep)
for(i in 1:nrep){
event <- paste0(descr, sprintf("_%.2d", i))
timer$start(event)
#### ADJUST THE FOLLOWING LINES TO THE PACKAGE::ALGORITHM
hyper_params <- hyperParams(optim_method = m)
NNreg <- tryCatch(
NNtrain(x = x, y = y, train_data = h_Z, hidden_neur = neur, optim_method = m),
error = function(y) {lm(y ~ 0, data = Zxy)}
)
predictions <- h2o::h2o.predict(NNreg, h_Z)
y_pred <- tryCatch(
ym0 + ysd0*as.data.frame(predictions)$predict,
error = ym0
)
####
Ypred[[i]] <- y_pred
Rmse[i] <- funRMSE(y_pred, y0)
Mae[i] <- funMAE(y_pred, y0)
timer$stop(event, RMSE = Rmse[i], MAE = Mae[i], params = hyper_params$params, printmsg = FALSE)
}
best <- which(Rmse == min(Rmse, na.rm = TRUE))[1]
best ; Rmse[[best]]
## ================================================
##Â PLOT ALL MODELS AND THE MODEL WITH THE BEST RMSE
##Â par OPTIONS CAN BE IMPROVED FOR A BETTER DISPLAY
## ================================================
op <- par(mfcol = c(1,2))
plotNN(xory, y0, uni, TF, main = descr)
for (i in 1:nrep) lipoNN(xory, Ypred[[i]], uni, TF, col = i, lwd = 1)
plotNN(xory, y0, uni, TF, main = descr)
lipoNN(xory, Ypred[[best]], uni, TF, col = 4, lwd = 4)
par(op)
}
##Â ===========================
## DETACH ZZ - END OF THE LOOP
##Â ===========================
detach(ZZ)
}
Results
dfr0 <- getTimer(timer)
dfr <- data.frame(
ds_pkg.fun_algo = stringr::str_sub(dfr0[ ,1], 1, -4),
run = stringr::str_sub(dfr0[ ,1], -2, -1),
dfr0[, c("RMSE","MAE")],
dataset = stringr::str_replace_all(stringr::str_extract(dfr0[, 1], pattern = "^\\w*_"), fixed("_"), ""),
method = stringr::str_replace_all(stringr::str_extract(dfr0[, 1], pattern = "_\\w*_"), fixed("_"), ""),
Elapsed = round(dfr0[ ,4], 5),
params = dfr0$params
)
dfr
Best Results
dataset
|
method
|
minRMSE
|
meanRMSE
|
meanTime
|
mDette
|
gradientDescent
|
106.3830
|
109.43602
|
10.024
|
mFriedman
|
0.2227
|
0.22467
|
9.758
|
mIshigami
|
14.8019
|
15.32325
|
15.360
|
mRef153
|
1529.9687
|
1577.48936
|
4.431
|
uDmod1
|
0.2489
|
0.29730
|
3.405
|
uDmod2
|
0.2488
|
0.26639
|
3.422
|
uDreyfus1
|
1.0815
|
1.09136
|
3.100
|
uDreyfus2
|
1.0915
|
1.10119
|
2.603
|
uGauss1
|
2520.1770
|
2535.90451
|
5.718
|
uGauss2
|
2287.8936
|
2290.10483
|
4.519
|
uGauss3
|
2431.6924
|
2433.90897
|
4.639
|
uNeuroOne
|
0.9572
|
1.25564
|
2.394
|
## [1] "A shutdown has been triggered. "