context("Basic Tests") test_that("steepest descent with constant step size", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = constant_step_size( value = 0.0001 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 19.18, 15.52, 12.81) g2ns <- c(232.87, 198.56, 170.43, 147.11) steps <- c(0, 0.0233, 0.0199, 0.017) # step size is multiplied by gradient! par <- c(-1.144, 1.023) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) expect_equal(res$f, min(res$progress$f)) expect_equal(res$g2n, min(res$progress$g2n)) expect_equal(res$nf, nfs[length(nfs)]) expect_equal(res$ng, ngs[length(ngs)]) }) test_that("counting result fun grad calls increases counts", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = constant_step_size( value = 0.0001 ) ), verbose = FALSE ) ) res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) # extra f and g calls are recorded corresponding to generating progress # data (including during iteration 0) nfs <- c(1, 2, 3, 4) ngs <- c(1, 2, 3, 4) fs <- c(24.2, 19.18, 15.52, 12.81) g2ns <- c(232.87, 198.56, 170.43, 147.11) steps <- c(0, 0.0233, 0.0199, 0.017) par <- c(-1.144, 1.023) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) expect_equal(res$f, min(res$progress$f)) expect_equal(res$g2n, min(res$progress$g2n)) expect_equal(res$nf, nfs[length(nfs)]) expect_equal(res$ng, ngs[length(ngs)]) }) test_that("can check convergence less often and get fewer fn/gr calls", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = constant_step_size( value = 0.0001 ) ), verbose = FALSE ) ) res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5, check_conv_every = 1000 ) # fewer stored progress values first and last iter only # same number of gradient evaluations, but fewer total function evaluations progress_iters <- c("0", "3") nfs <- c(1, 2) ngs <- c(1, 4) fs <- c(24.2, 12.81) g2ns <- c(232.87, 147.11) steps <- c(0, 0.017) par <- c(-1.144, 1.023) expect_equal(rownames(res$progress), progress_iters) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) expect_equal(res$f, min(res$progress$f)) expect_equal(res$g2n, min(res$progress$g2n)) expect_equal(res$nf, nfs[length(nfs)]) expect_equal(res$ng, ngs[length(ngs)]) }) test_that("no grad norm returned or stored in progress when grad_tol is NULL", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = constant_step_size( value = 0.0001 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = NULL ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 19.18, 15.52, 12.81) steps <- c(0, 0.0233, 0.0199, 0.017) par <- c(-1.144, 1.023) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_true(is.null(res$progress$g2n)) expect_true(is.null(res$g2n)) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("steepest descent with constant step size and normalized direction", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = constant_step_size( value = 0.0001 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 24.18, 24.15, 24.13) g2ns <- c(232.87, 232.72, 232.57, 232.42) steps <- c(0, 1e-4, 1e-4, 1e-4) par <- c(-1.2, 1.0) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("steepest descent with bold driver", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver( init_step_size = 1 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 7, 12) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 4.12, 4.11) g2ns <- c(232.87, 64.72, 2.90, 2.39) steps <- c(0, 0.25, 0.069, 0.0047) par <- c(-1.024, 1.060) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with 0 step size should be like using no momentum", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = constant_step_size( value = 0.01 ) ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 21.95, 19.84, 17.88) g2ns <- c(232.87, 217.96, 203.31, 188.93) steps <- c(0, 1e-2, 1e-2, 1e-2) mus <- c(0, 0, 0, 0) par <- c(-1.172, 1.011) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with constant step size", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = constant_step_size( value = 0.01 ) ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 21.95, 19.44, 17.06) g2ns <- c(232.87, 217.96, 200.42, 182.69) steps <- c(0, 0.01, 0.012, 0.0124) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.168, 1.013) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("eager classical momentum with constant step size should give same results as non-eager", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = constant_step_size( value = 0.01 ) ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt$eager_update <- TRUE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 21.95, 19.44, 17.06) g2ns <- c(232.87, 217.96, 200.42, 182.69) steps <- c(0, 0.01, 0.012, 0.0124) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.168, 1.013) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with bold driver", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 8, 10) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 5.25, 4.62) g2ns <- c(232.87, 64.72, 47.19, 33.88) steps <- c(0, 0.25, 0.020, 0.0795) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.051, 1.040) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("eager classical momentum with bold driver same as 'lazy' result", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$eager_update <- TRUE opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 8, 10) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 5.25, 4.62) g2ns <- c(232.87, 64.72, 47.19, 33.88) steps <- c(0, 0.25, 0.020, 0.0795) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.051, 1.040) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("linear weighted classical momentum with bold driver", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt <- append_stage(opt, momentum_correction_stage()) res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 10, 12) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 4.27, 5.04, 4.67) g2ns <- c(232.87, 17.16, 42.78, 33.27) steps <- c(0, 0.2, 0.027, 0.01) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-0.998, 1.078) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("linear weighted eager classical momentum with bold driver", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt <- append_stage(opt, momentum_correction_stage()) opt$count_res_fg <- FALSE opt$eager_update <- TRUE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 10, 12) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 4.27, 5.04, 4.67) g2ns <- c(232.87, 17.16, 42.78, 33.27) steps <- c(0, 0.2, 0.027, 0.01) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-0.998, 1.078) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("bold classical momentum with bold driver", { # Not a very good idea - momentum component easily shrinks to zero # so you waste a lot of time trying to find a non-existent acceptable step # size - but tests that you can use the same step size method in different # stages and they don't interfere with each other. opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(normalize = TRUE), step_size = bold_driver() ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 6, 19, 45) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 4.12, 4.10) g2ns <- c(232.87, 64.72, 2.81, 2.41) steps <- c(0, 0.25, 0.064, 0.0047) mus <- c(1, 1.1, 4.73e-3, 1.64e-8) par <- c(-1.027, 1.059) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("bold classical momentum with bold driver without cache gives same results, but requires extra work", { # Checks that the caching of function calls works correctly opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(normalize = TRUE), step_size = bold_driver() ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, invalidate_cache = TRUE, grad_tol = 1e-5 ) nfs <- c(0, 6, 20, 47) # extra function evaluations ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 4.12, 4.10) g2ns <- c(232.87, 64.72, 2.81, 2.41) steps <- c(0, 0.25, 0.064, 0.0047) mus <- c(1, 1.1, 4.73e-3, 1.64e-8) par <- c(-1.027, 1.059) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with bold driver and fn adaptive restart, same results as without when everything is ok", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt <- adaptive_restart(opt, "fn") res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) # Have to carry out one extra fn evaluation when doing the first validation # After that, bold driver doesn't need to do a calculation for f0 on the # subsequent step nfs <- c(0, 5, 9, 11) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 5.25, 4.62) g2ns <- c(232.87, 64.72, 47.19, 33.88) steps <- c(0, 0.25, 0.020, 0.0795) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.051, 1.040) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with bold driver and gr adaptive restart, same results as without when everything is ok", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt <- adaptive_restart(opt, "gr") res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) # Get adaptive update check for free if using gradient! nfs <- c(0, 4, 8, 10) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 5.25, 4.62) g2ns <- c(232.87, 64.72, 47.19, 33.88) steps <- c(0, 0.25, 0.020, 0.0795) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.051, 1.040) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with bold driver aggressive momentum can cause cost increase", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.4 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 8, 10) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 8.71, 4.69) g2ns <- c(232.87, 64.72, 91.13, 34.39) steps <- c(0, 0.25, 0.033, 0.064) mus <- c(0.4, 0.4, 0.4, 0.4) par <- c(-0.989, 1.064) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with bold driver adaptive gr momentum prevents cost increase", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.4 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt <- adaptive_restart(opt, "gr") res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 8, 10) ngs <- c(0, 1, 2, 2) # no grad calc needed on repeated step fs <- c(24.2, 6.32, 6.32, 4.12) g2ns <- c(232.87, 64.72, 64.72, 2.90) steps <- c(0, 0.25, 0, 0.069) mus <- c(0.4, 0.4, 0.4, 0.4) par <- c(-1.029, 1.061) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with bold driver adaptive fn momentum prevents cost increase", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.4 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt <- adaptive_restart(opt, "fn") res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 5, 9, 11) ngs <- c(0, 1, 2, 2) # no grad calc needed on repeated step fs <- c(24.2, 6.32, 6.32, 4.12) g2ns <- c(232.87, 64.72, 64.72, 2.90) steps <- c(0, 0.25, 0, 0.069) mus <- c(0.4, 0.4, 0.4, 0.4) par <- c(-1.029, 1.061) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with constant function factory", { # should give same results as using constant step size opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = constant_step_size( value = 0.01 ) ), momentum_stage( direction = momentum_direction(), step_size = make_momentum_step( make_constant( value = 0.2 ), use_init_mom = TRUE ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 21.95, 19.44, 17.06) g2ns <- c(232.87, 217.96, 200.42, 182.69) steps <- c(0, 0.01, 0.012, 0.0124) mus <- c(0, 0.2, 0.2, 0.2) par <- c(-1.168, 1.013) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with ramp function", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = constant_step_size( value = 0.01 ) ), momentum_stage( direction = momentum_direction(), step_size = make_momentum_step( make_ramp( init_value = 0.1, final_value = 0.3, wait = 0 ), use_init_mom = TRUE ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 21.95, 19.44, 16.84) g2ns <- c(232.87, 217.96, 200.42, 181.00) steps <- c(0, 0.01, 0.012, 0.0136) mus <- c(0, 0.1, 0.2, 0.3) par <- c(-1.167, 1.014) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("classical momentum with switch function", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = constant_step_size( value = 0.01 ) ), momentum_stage( direction = momentum_direction(), step_size = make_momentum_step( make_switch( init_value = 0.5, final_value = 0.8, switch_iter = 2 ), use_init_mom = TRUE ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 21.95, 18.26, 14.00) g2ns <- c(232.87, 217.96, 191.79, 157.67) steps <- c(0, 0.01, 0.018, 0.0244) mus <- c(0, 0.5, 0.8, 0.8) par <- c(-1.152, 1.020) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("sutskever nesterov momentum with bold driver", { opt <- make_opt( make_stages( momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt$eager_update <- TRUE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 4, 7, 10) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 4.33, 4.78) g2ns <- c(232.87, 64.72, 22.22, 36.89) steps <- c(0, 0.25, 0.088, 0.0584) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.042, 1.046) # best par, not last par! expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) expect_equal(res$f, min(fs), tol = 1e-3) }) test_that("nesterov momentum with bold driver and adaptive fn", { opt <- make_opt( make_stages( momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), gradient_stage( direction = sd_direction(normalize = TRUE), step_size = bold_driver() ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt$eager_update <- TRUE opt <- adaptive_restart(opt, "fn") res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 5, 8, 11) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 6.32, 4.33, 4.33) g2ns <- c(232.87, 64.72, 22.22, 22.22) steps <- c(0, 0.25, 0.088, 0) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-1.042, 1.046) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("nesterov accelerated gradient with wolfe line search", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = more_thuente_ls(c2 = 1.e-9) ), momentum_stage( direction = nesterov_momentum_direction(), step_size = nesterov_convex_step() ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt$eager_update <- TRUE # opt$depends <- c(opt$depends, 'log_vals') opt$depends <- c(opt$depends, "keep_stage_fs") res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, ret_opt = TRUE, grad_tol = 1e-5 ) nfs <- c(0, 9, 17, 22) ngs <- c(0, 9, 17, 22) fs <- c(24.2, 4.128, 3.913, 3.558) g2ns <- c(232.87, 1.777, 23.908, 7.200) steps <- c(0, 0.184, 0.301, 0.048) mus <- c(0, 0, 0.282, 0.434) par <- c(-0.869, 0.781) # also records fn after gradient stage - compare with momentum-first # and see that these have the same values all_fs <- c(4.128, 4.128, 3.886, 3.913, 3.583, 3.558) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) expect_equal(res$opt$all_fs, all_fs, tol = 1e-3) }) test_that("nesterov momentum with wolfe line search is like NAG", { # Add burn_in = 2 so the first two updates are gradient-only, like NAG # But even then, won't give exactly same result as "real" NAG # because parameters are returned half-way between steps compared to real NAG # Uncomment the 'log_vals' hook in this and the test above to see that # you do get the same values for each stage opt <- make_opt( make_stages( momentum_stage( direction = momentum_direction(), step_size = nesterov_convex_step(burn_in = 2) ), gradient_stage( direction = sd_direction(), step_size = more_thuente_ls(c2 = 1.e-9) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE opt$eager_update <- TRUE # Uncomment this to see the grad and mom values logged to screen # opt$depends <- c(opt$depends, 'log_vals') opt$depends <- c(opt$depends, "keep_stage_fs") res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, ret_opt = TRUE, grad_tol = 1e-5 ) nfs <- c(0, 9, 17, 22) ngs <- c(0, 9, 17, 22) fs <- c(24.2, 4.128, 3.886, 3.582) g2ns <- c(232.87, 1.777, 18.114, 1.858) steps <- c(0, 0.184, 0.235, 0.0709) mus <- c(0, 0, 0, 0.282) par <- c(-0.891, 0.802) # compare with real NAG all_fns, after first stage, consecutive values are # the same all_fs <- c(24.2, 4.128, 4.128, 3.886, 3.913, 3.583) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) expect_equal(res$opt$all_fs, all_fs, tol = 1e-3) }) test_that("NAG with q = 1 is steepest descent", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = more_thuente_ls(c2 = 1.e-9) ), momentum_stage( direction = nesterov_momentum_direction(), step_size = nesterov_convex_step(q = 1) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 9, 17, 22) ngs <- c(0, 9, 17, 22) fs <- c(24.2, 4.128, 3.886, 3.704) g2ns <- c(232.87, 1.777, 18.114, 1.843) steps <- c(0, 0.184, 0.235, 0.020) mus <- c(0, 0, 0, 0) par <- c(-0.923, 0.860) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("NAG with q close to 0 is the same as == 0", { # This should be the same as NAG with Wolfe Line Search test # If is exactly zero we use the slightly simpler momentum expression # given by Sutskever in the appendix, otherwise use the expression # in the Candes paper which is more complex but allows q to vary from zero opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = more_thuente_ls(c2 = 1.e-9) ), momentum_stage( direction = nesterov_momentum_direction(), step_size = nesterov_convex_step(q = 1e-8) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 9, 17, 22) ngs <- c(0, 9, 17, 22) fs <- c(24.2, 4.128, 3.913, 3.558) g2ns <- c(232.87, 1.777, 23.908, 7.200) steps <- c(0, 0.184, 0.301, 0.048) mus <- c(0, 0, 0.282, 0.434) par <- c(-0.869, 0.781) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("NAG with q = 0.5 between full NAG and SD", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = more_thuente_ls(c2 = 1.e-9) ), momentum_stage( direction = nesterov_momentum_direction(), step_size = nesterov_convex_step(q = 0.5) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 9, 17, 21) ngs <- c(0, 9, 17, 21) fs <- c(24.2, 4.128, 3.891, 3.664) g2ns <- c(232.87, 1.777, 20.711, 3.442) steps <- c(0, 0.184, 0.265, 0.028) mus <- c(0, 0, 0.128, 0.159) par <- c(-0.903, 0.831) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("NAG with approximate convex momentum", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = more_thuente_ls(c2 = 1.e-9) ), momentum_stage( direction = nesterov_momentum_direction(), step_size = nesterov_convex_approx_step(use_init_mu = FALSE) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 9, 17, 22) ngs <- c(0, 9, 17, 22) fs <- c(24.2, 4.128, 4.004, 3.337) g2ns <- c(232.87, 1.777, 30.137, 7.777) steps <- c(0, 0.184, 0.369, 0.0999) # From reading the Sutskever paper, it seems to be the case that mu = 0.4 # (from t = 0) is not intended to ever be used, so default is that mu = 0 mus <- c(0, 0, 0.571, 0.625) par <- c(-0.805, 0.676) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("NAG with approximate convex momentum and mu = 0.5 at t = 1", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(), step_size = more_thuente_ls(c2 = 1.e-9, max_alpha_mult = 10) ), momentum_stage( direction = nesterov_momentum_direction(), step_size = nesterov_convex_approx_step(use_init_mu = TRUE) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 9, 15, 26) ngs <- c(0, 9, 15, 26) fs <- c(24.2, 8.223, 4.097, 15.55) g2ns <- c(232.87, 86.69, 1.791, 77.44) steps <- c(0, 0.275, 0.0926, 1.703) # Only difference from previous test is that mu = 0.5 at t = 1 # But affects length of gradient descent step even though velocity is 0 for # momentum step mus <- c(0, 0.5, 0.571, 0.625) par <- c(-0.09, -0.371) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) # Wolfe line search test_that("Polak Ribiere CG with Rasmussen LS", { opt <- make_opt( make_stages( gradient_stage( direction = cg_direction(cg_update = pr_update), step_size = rasmussen_ls( initial_step_length = "r", max_alpha_mult = 10 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 6, 10, 12) ngs <- c(0, 6, 10, 12) fs <- c(24.2, 4.13, 3.84, 3.52) g2ns <- c(232.87, 1.87, 19.06, 25.24) steps <- c(0, 0.184, 0.273, 0.311) par <- c(-0.777, 0.543) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("BFGS with More-Thuente LS", { opt <- make_opt( make_stages( gradient_stage( direction = bfgs_direction(), step_size = more_thuente_ls( c2 = 1e-9, initial_step_length = "sci", initializer = "q", try_newton_step = TRUE ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosen_no_hess, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 6, 11, 17) ngs <- c(0, 6, 11, 17) fs <- c(24.2, 4.13, 3.85, 3.53) g2ns <- c(232.87, 1.78, 18.62, 24.98) steps <- c(0, 0.184, 0.261, 0.307) par <- c(-0.785, 0.558) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("L-BFGS with More-Thuente LS gives same results as BFGS with sufficiently high memory", { opt <- make_opt( make_stages( gradient_stage( direction = lbfgs_direction(), step_size = more_thuente_ls( c2 = 1e-9, initial_step_length = "sci", initializer = "q", try_newton_step = TRUE ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosen_no_hess, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 6, 11, 17) ngs <- c(0, 6, 11, 17) fs <- c(24.2, 4.13, 3.85, 3.53) g2ns <- c(232.87, 1.78, 18.62, 24.98) steps <- c(0, 0.184, 0.261, 0.307) par <- c(-0.785, 0.558) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("delta bar delta adaptive learning rate", { opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = delta_bar_delta( epsilon = 0.1, kappa_fun = `*`, kappa = 1.1, phi = 0.5, theta = 0.2 ) ) ), verbose = FALSE ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 7.10, 5.28, 4.21) g2ns <- c(232.87, 83.18, 47.48, 13.66) steps <- c(0, 0.11, 0.121, 0.0605) par <- c(-1.040, 1.060) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("delta bar delta adaptive learning rate using momentum", { # similar to van der Maaten formulation in t-SNE matlab code opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = TRUE), step_size = delta_bar_delta( epsilon = 0.1, use_momentum = TRUE, kappa_fun = `*`, kappa = 1.1, phi = 0.5 ) ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.2 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 7.10, 6.53, 4.84) g2ns <- c(232.87, 83.18, 67.62, 37.87) steps <- c(0, 0.11, 0.143, 0.032) mus <- c(0.2, 0.2, 0.2, 0.2) par <- c(-0.993, 1.080) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) }) test_that("delta bar delta adaptive learning rate using momentum and additive increase", { # even more similar to van der Maaten formulation in t-SNE matlab code # in that the step length is increased with a fixed value rather than a # percent; also don't normalize direction vector opt <- make_opt( make_stages( gradient_stage( direction = sd_direction(normalize = FALSE), step_size = delta_bar_delta( epsilon = 0.001, use_momentum = TRUE, kappa_fun = `+`, kappa = 0.02, phi = 0.8 ) ), momentum_stage( direction = momentum_direction(), step_size = constant_step_size( value = 0.4 ) ), verbose = FALSE ) ) opt$count_res_fg <- FALSE res <- opt_loop(opt, rb0, rosenbrock_fg, 3, store_progress = TRUE, verbose = FALSE, grad_tol = 1e-5 ) nfs <- c(0, 0, 0, 0) ngs <- c(0, 1, 2, 3) fs <- c(24.2, 5.59, 9.45, 6.01) g2ns <- c(232.87, 53.36, 97.69, 60.21) steps <- c(0, 0.238, 0.052, 0.044) mus <- c(0.4, 0.4, 0.4, 0.4) par <- c(-0.966, 1.079) expect_equal(res$progress$nf, nfs) expect_equal(res$progress$ng, ngs) expect_equal(res$progress$f, fs, tol = 1e-3) expect_equal(res$progress$g2n, g2ns, tol = 1e-3) expect_equal(res$progress$step, steps, tol = 1e-3) expect_equal(res$progress$mu, mus, tol = 1e-3) expect_equal(res$par, par, tol = 1e-3) })