ParBayesianOptimization/R/bayesOpt.R at e0fbcdccd64d5d8da8ca1b77ddb4cfb02813348b · AnotherSamWilson/ParBayesianOptimization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
#' Bayesian Optimization with Gaussian Processes
#'
#' Maximizes a user defined function within a set of bounds. After the function
#' is sampled a pre-determined number of times, a Gaussian process is fit to
#' the results. An acquisition function is then maximized to determine the most
#' likely location of the global maximum of the user defined function.  This
#' process is repeated for a set number of iterations.
#'
#' @param FUN the function to be maximized. This function should return a
#'   named list with at least 1 component. The first component must be named
#'   \code{Score} and should contain the metric to be maximized. You may
#'   return other named scalar elements that you wish to include in the final
#'   summary table.
#' @param bounds named list of lower and upper bounds for each \code{FUN} input.
#'   The names of the list should be arguments passed to \code{FUN}.
#'   Use "L" suffix to indicate integers.
#' @param saveFile character filepath (including file name and extension, .RDS) that
#'   specifies the location to save results as they are obtained. A \code{bayesOpt}
#'   object is saved to the file after each epoch.
#' @param initGrid user specified points to sample the scoring function, should
#'   be a \code{data.frame} or \code{data.table} with identical column names as bounds.
#' @param initPoints Number of points to initialize the process with. Points are
#'   chosen with latin hypercube sampling within the bounds supplied.
#' @param iters.n The total number of times FUN will be run after initialization.
#' @param iters.k integer that specifies the number of times to sample FUN
#'   at each Epoch (optimization step). If running in parallel, good practice
#'   is to set \code{iters.k} to some multiple of the number of cores you have designated
#'   for this process. Must be lower than, and preferrably some multiple of \code{iters.n}.
#' @param otherHalting A list of other halting specifications. The process will stop if any of
#'   the following is true. These checks are only performed in between optimization steps:
#' \itemize{
#'   \item The elapsed seconds is greater than the list element \code{timeLimit}.
#'   \item The utility expected from the Gaussian process is less than the list element
#'   \code{minUtility}.
#' }
#' @param acq acquisition function type to be used. Can be "ucb", "ei", "eips" or "poi".
#' \itemize{
#'   \item \code{ucb}   Upper Confidence Bound
#'   \item \code{ei}    Expected Improvement
#'   \item \code{eips}  Expected Improvement Per Second
#'   \item \code{poi}   Probability of Improvement
#' }
#' @param kappa tunable parameter kappa of the upper confidence bound.
#'   Adjusts exploitation/exploration. Increasing kappa will increase the
#'   importance that uncertainty (unexplored space) has, therefore incentivising
#'   exploration. This number represents the standard deviations above 0 of your upper
#'   confidence bound. Default is 2.56, which corresponds to the ~99th percentile.
#' @param eps tunable parameter epsilon of ei, eips and poi. Adjusts exploitation/exploration.
#'   This value is added to y_max after the scaling, so should between -0.1 and 0.1.
#'   Increasing eps will make the "improvement" threshold for new points higher, therefore
#'   incentivising exploitation.
#' @param parallel should the process run in parallel? If TRUE, several criteria must be met:
#' \itemize{
#'   \item A parallel backend must be registered
#'   \item Objects required by \code{FUN} must be loaded into each cluster.
#'   \item Packages required by \code{FUN} must be loaded into each cluster. See vignettes.
#'   \item \code{FUN} must be thread safe.
#' }
#' @param gsPoints integer that specifies how many initial points to try when
#'   searching for the optimum of the acquisition function. Increase this for a higher
#'   chance to find global optimum, at the expense of more time.
#' @param convThresh convergence threshold passed to \code{factr} when the
#'   \code{optim} function (L-BFGS-B) is called. Lower values will take longer
#'   to converge, but may be more accurate.
#' @param acqThresh number 0-1. Represents the minimum percentage
#'   of the global optimal utility required for a local optimum to
#'   be included as a candidate parameter set in the next scoring function.
#'   If 1.0, only the global optimum will be used as a candidate
#'   parameter set. If 0.5, only local optimums with 50 percent of the utility
#'   of the global optimum will be used.
#' @param errorHandling If FUN returns an error, how to proceed. All errors are
#'   stored in \code{scoreSummary}. Can be one of 3 options: "stop" stops the
#'   function running and returns results. "continue" keeps the process running.
#'   Passing an integer will allow the process to continue until that many errors
#'   have occured, after which the results will be returned.
#' @param plotProgress Should the progress of the Bayesian optimization be
#'   printed? Top graph shows the score(s) obtained at each iteration.
#'   The bottom graph shows the estimated utility of each point.
#'   This is useful to display how much utility the Gaussian Process is
#'   assuming still exists. If your utility is approaching 0, then you
#'   can be confident you are close to an optimal parameter set.
#' @param verbose Whether or not to print progress to the console.
#'   If 0, nothing will be printed. If 1, progress will be printed.
#'   If 2, progress and information about new parameter-score pairs will be printed.
#' @param ... Other parameters passed to \code{DiceKriging::km()}. All FUN inputs and scores
#' are scaled from 0-1 before being passed to km. FUN inputs are scaled within \code{bounds},
#' and scores are scaled by 0 = min(scores), 1 = max(scores).
#' @return An object of class \code{bayesOpt} containing information about the process.
#' \itemize{
#'   \item \code{FUN}          The scoring function.
#'   \item \code{bounds}       The bounds originally supplied.
#'   \item \code{iters}        The total iterations that have been run.
#'   \item \code{initPars}     The initialization parameters.
#'   \item \code{optPars}      The optimization parameters.
#'   \item \code{GauProList}   A list containing information on the Gaussian Processes used in optimization.
#'   \item \code{scoreSummary} A \code{data.table} with results from the execution of \code{FUN}
#'   at different inputs. Includes information on the epoch, iteration, function inputs, score, and any other
#'   information returned by \code{FUN}.
#'   \item \code{stopStatus}   Information on what caused the function to stop running. Possible explenations are
#'   time limit, minimum utility not met, errors in \code{FUN}, iters.n was reached, or the Gaussian Process encountered
#'   an error.
#'   \item \code{elapsedTime} The total time in seconds the function was executing.
#' }
#' @references Jasper Snoek, Hugo Larochelle, Ryan P. Adams (2012) \emph{Practical Bayesian Optimization of Machine Learning Algorithms}
#'
#' @section Vignettes:
#'
#' It is highly recommended to read the \href{https://github.com/AnotherSamWilson/ParBayesianOptimization}{GitHub} for examples.
#' There are also several vignettes available from the official \href{https://CRAN.R-project.org/package=ParBayesianOptimization}{CRAN Listing}.
#'
#' @examples
#' # Example 1 - Optimization of a continuous single parameter function
#' scoringFunction <- function(x) {
#'   a <- exp(-(2-x)^2)*1.5
#'   b <- exp(-(4-x)^2)*2
#'   c <- exp(-(6-x)^2)*1
#'   return(list(Score = a+b+c))
#' }
#'
#' bounds <- list(x = c(0,8))
#'
#' Results <- bayesOpt(
#'     FUN = scoringFunction
#'   , bounds = bounds
#'   , initPoints = 3
#'   , iters.n = 2
#'   , gsPoints = 10
#' )
#'
#' \dontrun{
#' # Example 2 - Hyperparameter Tuning in xgboost
#' if (requireNamespace('xgboost', quietly = TRUE)) {
#'   library("xgboost")
#'
#'   data(agaricus.train, package = "xgboost")
#'
#'   Folds <- list(
#'       Fold1 = as.integer(seq(1,nrow(agaricus.train$data),by = 3))
#'     , Fold2 = as.integer(seq(2,nrow(agaricus.train$data),by = 3))
#'     , Fold3 = as.integer(seq(3,nrow(agaricus.train$data),by = 3))
#'   )
#'
#'   scoringFunction <- function(max_depth, min_child_weight, subsample) {
#'
#'     dtrain <- xgb.DMatrix(agaricus.train$data,label = agaricus.train$label)
#'
#'     Pars <- list(
#'         booster = "gbtree"
#'       , eta = 0.01
#'       , max_depth = max_depth
#'       , min_child_weight = min_child_weight
#'       , subsample = subsample
#'       , objective = "binary:logistic"
#'       , eval_metric = "auc"
#'     )
#'
#'     xgbcv <- xgb.cv(
#'          params = Pars
#'        , data = dtrain
#'        , nround = 100
#'        , folds = Folds
#'        , prediction = TRUE
#'        , showsd = TRUE
#'        , early_stopping_rounds = 5
#'        , maximize = TRUE
#'        , verbose = 0
#'     )
#'
#'     return(
#'       list(
#'           Score = max(xgbcv$evaluation_log$test_auc_mean)
#'         , nrounds = xgbcv$best_iteration
#'       )
#'     )
#'   }
#'
#'   bounds <- list(
#'       max_depth = c(2L, 10L)
#'     , min_child_weight = c(1, 100)
#'     , subsample = c(0.25, 1)
#'   )
#'
#'   ScoreResult <- bayesOpt(
#'       FUN = scoringFunction
#'     , bounds = bounds
#'     , initPoints = 3
#'     , iters.n = 2
#'     , iters.k = 1
#'     , acq = "ei"
#'     , gsPoints = 10
#'     , parallel = FALSE
#'     , verbose = 1
#'   )
#' }
#' }
#' @importFrom data.table data.table setDT setcolorder := as.data.table copy .I setnames is.data.table rbindlist
#' @importFrom utils head tail
#' @export
bayesOpt <- function(
    FUN
  , bounds
  , saveFile = NULL
  , initGrid
  , initPoints = 4
  , iters.n = 3
  , iters.k = 1
  , otherHalting = list(timeLimit = Inf,minUtility = 0)
  , acq = "ucb"
  , kappa = 2.576
  , eps = 0.0
  , parallel = FALSE
  , gsPoints = pmax(100,length(bounds)^3)
  , convThresh = 1e8
  , acqThresh = 1.000
  , errorHandling = "stop"
  , plotProgress = FALSE
  , verbose = 1
  , ...
) {

  startT <- Sys.time()

  # look for defaults that are literally the same symbol as the arg‐name
  argNames <- names(formals(FUN))[names(formals(FUN)) == as.character(formals(FUN))]
  for (argName in argNames) {
    formals(FUN)[[argName]] <- get(argName)
  }

  # Construct bayesOpt list
  optObj <- list()
  class(optObj) <- "bayesOpt"
  optObj$FUN <- FUN
  optObj$bounds <- bounds
  optObj$iters <- 0
  optObj$initPars <- list()
  optObj$optPars <- list()
  optObj$GauProList <- list()

  # See if saveFile can be written to, and store saveFile if necessary.
  optObj <- changeSaveFile(optObj,saveFile)

  # Check the parameters
  checkParameters(
      bounds
    , iters.n
    , iters.k
    , otherHalting
    , acq
    , acqThresh
    , errorHandling
    , plotProgress
    , parallel
    , verbose
  )

  # Formatting
  boundsDT <- boundsToDT(bounds)
  otherHalting <- formatOtherHalting(otherHalting)

  # Initialization Setup
  if (missing(initGrid) + missing(initPoints) != 1) stop("Please provide 1 of initGrid or initPoints, but not both.")
  if (!missing(initGrid)) {
    setDT(initGrid)
    inBounds <- checkBounds(initGrid,bounds)
    inBounds <- as.logical(apply(inBounds,1,prod))
    if (any(!inBounds)) stop("initGrid not within bounds.")
    optObj$initPars$initialSample <- "User Provided Grid"
    initPoints <- nrow(initGrid)
  } else {
    initGrid <- randParams(boundsDT, initPoints)
    optObj$initPars$initialSample <- "Latin Hypercube Sampling"
  }
  optObj$initPars$initGrid <- initGrid
  if (nrow(initGrid) <= 2) stop("Cannot initialize with less than 3 samples.")
  optObj$initPars$initPoints <- nrow(initGrid)
  if (initPoints <= length(bounds)) stop("initPoints must be greater than the number of FUN inputs.")

  # Output from FUN is sunk into a temporary file.
  sinkFile <- file()
  on.exit(
    {
      while (sink.number() > 0) sink()
      close(sinkFile)
    }
  )

  # Define processing function
  `%op%` <- ParMethod(parallel)
  if(parallel) Workers <- getDoParWorkers() else Workers <- 1

  # Run initialization
  if (verbose > 0) cat("\nRunning initial scoring function",nrow(initGrid),"times in",Workers,"thread(s)...")
  sink(file = sinkFile)

  optObj$initPars$scoreSummary = list()
  listAndSave <- function(already, newRow) {
    # already:    the accumulated result so far (a list of data.tables)
    # newRow:     the one data.table returned by a freshly completed iteration
    if(!is.null(already) && already$Iteration == 1) {
      optObj$initPars$scoreSummary[[1]] <<- already
    }
    nextIdx <- newRow$Iteration
    optObj$initPars$scoreSummary[[nextIdx]] <<- newRow
    optObj$scoreSummary <<- rbindlist(optObj$initPars$scoreSummary, fill = TRUE)
    saveSoFar(optObj, verbose = verbose)
    NULL
  }

  tm <- system.time(
    scoreSummary <- foreach(
        iter = 1:nrow(initGrid)
      , .options.multicore = list(preschedule=FALSE)
      , .combine = listAndSave
      , .multicombine = FALSE
      , .inorder = FALSE
      , .errorhandling = 'pass'
      #, .packages ='data.table'
      , .verbose = FALSE
    ) %op% {

      Params <- initGrid[get("iter"),]
      Elapsed <- system.time(
        Result <- tryCatch(
          {
            do.call(what = FUN, args = as.list(Params))
          }
          , error = function(e) e
        )
      )

      # Make sure everything was returned in the correct format. Any errors here will be passed.
      if (any(class(Result) %in% c("simpleError","error","condition"))) return(
        data.table(Iteration = get("iter"), Params, errorMessage = Result$message))

      if (!inherits(x = Result, what = "list")) return(
        data.table(Iteration = get("iter"), Params, errorMessage = "Object returned from FUN was not a list."))

      resLengths <- lengths(Result)
      if (!any(names(Result) == "Score")) return(
        data.table(Iteration = get("iter"), Params, errorMessage = "FUN must return list with element 'Score' at a minimum."))

      if (!is.numeric(Result$Score)) return(
          data.table(Iteration = get("iter"), Params, errorMessage = "Score returned from FUN was not numeric."))

      if(any(resLengths != 1)) {
        badReturns <- names(Result)[which(resLengths != 1)]
        return(data.table(Iteration = get("iter"), Params, errorMessage = paste0("FUN returned these elements with length > 1: ",paste(badReturns,collapse = ","))))
      }

      data.table(Iteration = get("iter"), Params,Elapsed = Elapsed[[3]],as.data.table(Result))
    }
  )[[3]]
  while (sink.number() > 0) sink()
  if (verbose > 0) cat(" ",tm,"seconds\n")

  scoreSummary = optObj$scoreSummary
  # Scan our list for any simpleErrors. If any exist, stop the process and return the errors.
  if(any(!is.null(scoreSummary$errorMessage))) {
    stop("Errors encountered in initialization are listed above.")
  }

  # Format scoreSummary table. Initial iteration is set to 0
  scoreSummary[,("gpUtility") := rep(as.numeric(NA),nrow(scoreSummary))]
  scoreSummary[,("acqOptimum") := rep(FALSE,nrow(scoreSummary))]
  scoreSummary[,("Epoch") := rep(0,nrow(scoreSummary))]
  scoreSummary[,("Iteration") := 1:nrow(scoreSummary)]
  scoreSummary[,("inBounds") := rep(TRUE,nrow(scoreSummary))]
  scoreSummary[,("errorMessage") := rep(NA,nrow(scoreSummary))]
  extraRet <- setdiff(names(scoreSummary),c("Epoch","Iteration",boundsDT$N,"inBounds","Elapsed","Score","gpUtility","acqOptimum"))
  setcolorder(scoreSummary,c("Epoch","Iteration",boundsDT$N,"gpUtility","acqOptimum","inBounds","Elapsed","Score",extraRet))

  # System.time function is not terribly precise for very small elapsed times.
  if(any(scoreSummary$Elapsed < 1) & acq == "eips") {
    cat("\n   FUN elapsed time is too low to be precise. Switching acq to 'ei'.\n")
    acq <- 'ei'
  }

  # This is the final list returned. It is updated whenever possible.
  # If an error occurs, it is returned in its latest configuration.
  optObj$optPars$acq <- acq
  optObj$optPars$kappa <- kappa
  optObj$optPars$eps <- eps
  optObj$optPars$parallel <- parallel
  optObj$optPars$gsPoints <- gsPoints
  optObj$optPars$convThresh <- convThresh
  optObj$optPars$acqThresh <- acqThresh
  optObj$scoreSummary <- scoreSummary
  optObj$GauProList$gpUpToDate <- FALSE
  optObj$iters <- nrow(scoreSummary)
  optObj$stopStatus <- "OK"
  optObj$elapsedTime <- as.numeric(difftime(Sys.time(),startT,units = "secs"))

  # Save Intermediary Output
  saveSoFar(optObj,0)

  optObj <- addIterations(
      optObj
    , otherHalting = otherHalting
    , iters.n = iters.n
    , iters.k = iters.k
    , parallel = parallel
    , plotProgress = plotProgress
    , errorHandling = errorHandling
    , saveFile = saveFile
    , verbose = verbose
    , ...
  )

  return(optObj)

}
utils::globalVariables(c("."))