context("API tests")

# Repeat some of the basic tests, using the consumer API
test_that("steepest descent with constant step size", {
  res <- mize(rb0, rosenbrock_fg,
    method = "SD", max_iter = 3,
    line_search = "const", step0 = 0.0001, grad_tol = 1e-5,
    check_conv_every = NULL
  )

  expect_equal(res$nf, 1)
  expect_equal(res$ng, 4)
  expect_equal(res$f, 12.81, tol = 1e-3)
  expect_equal(res$g2n, 147.11, tol = 1e-3)
  expect_equal(res$par, c(-1.144, 1.023), tol = 1e-3)
})

test_that("grad norm not returned (or calculated) if grad tol is NULL", {
  res <- mize(rb0, rosenbrock_fg,
    method = "SD", max_iter = 3,
    line_search = "const", step0 = 0.0001, grad_tol = NULL,
    check_conv_every = NULL
  )

  expect_equal(res$nf, 1)
  expect_equal(res$ng, 3)
  expect_equal(res$f, 12.81, tol = 1e-3)
  expect_true(is.null(res$g2n))
  expect_equal(res$par, c(-1.144, 1.023), tol = 1e-3)
})


test_that("L-BFGS with More-Thuente LS", {
  # can abbreviate line search name and initializer
  res <- mize(rb0, rosen_no_hess,
    method = "L-BFGS", max_iter = 3,
    line_search = "mo", c1 = 5e-10, c2 = 1e-9, step0 = "scipy",
    step_next_init = "q", scale_hess = FALSE, grad_tol = 1e-5
  )

  expect_equal(res$nf, 17)
  expect_equal(res$ng, 17)
  expect_equal(res$f, 3.53, tol = 1e-3)
  expect_equal(res$g2n, 24.98, tol = 1e-3)
  expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})

test_that("L-BFGS with More-Thuente LS and inv Hess initial guess", {
  res <- mize(rb0, rosenbrock_fg,
    method = "L-BFGS", max_iter = 3,
    line_search = "mo", c1 = 5e-10, c2 = 1e-9, step0 = "scipy",
    step_next_init = "q", scale_hess = FALSE, grad_tol = 1e-5
  )

  expect_equal(res$nf, 19)
  expect_equal(res$ng, 19)
  expect_equal(res$f, 3.71, tol = 1e-3)
  expect_equal(res$g2n, 27.40, tol = 1e-3)
  expect_equal(res$par, c(-0.820, 0.610), tol = 1e-3)
})


test_that("L-BFGS with More-Thuente LS and max alpha guess increase", {
  # can abbreviate line search name and initializer
  res <- mize(rb0, rosen_no_hess,
    method = "L-BFGS", max_iter = 3,
    line_search = "mo", c1 = 5e-10, c2 = 1e-9, step0 = "scipy",
    step_next_init = "q", scale_hess = FALSE, grad_tol = 1e-5,
    ls_max_alpha_mult = 2
  )
  # Get to the same result as without ls_max_alpha_mult but more evaluations
  expect_equal(res$nf, 21)
  expect_equal(res$ng, 21)
  expect_equal(res$f, 3.53, tol = 1e-3)
  expect_equal(res$g2n, 24.98, tol = 1e-3)
  expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})


test_that("BFGS with Hessian inverse diagonal", {
  res <- mize(rb0, rosenbrock_fg,
    method = "BFGS", max_iter = 3,
    line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
    step0 = "sci", ls_max_alpha = 0.5,
    step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
  )

  expect_equal(res$nf, 4)
  expect_equal(res$ng, 4)
  expect_equal(res$f, 4.461, tol = 1e-3)
  expect_equal(res$g2n, 6.524, tol = 1e-3)
  expect_equal(res$par, c(-1.112, 1.231), tol = 1e-3)
})

test_that("BFGS with Hessian inverse matrix", {
  rb_hi <- rosenbrock_fg
  rb_hi$hi <- function(par) {
    solve(rosenbrock_fg$hs(par))
  }
  res <- mize(rb0, rb_hi,
    method = "BFGS", max_iter = 3,
    line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
    step0 = "sci", ls_max_alpha = 0.5,
    step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
  )

  expect_equal(res$nf, 4)
  expect_equal(res$ng, 4)
  expect_equal(res$f, 4.968, tol = 1e-3)
  expect_equal(res$g2n, 31.81, tol = 1e-3)
  expect_equal(res$par, c(-1.160, 1.290), tol = 1e-3)
})

test_that("BFGS with More-Thuente LS and max alpha", {
  res <- mize(rb0, rosen_no_hess,
    method = "BFGS", max_iter = 3,
    line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
    step0 = "sci", ls_max_alpha = 0.5,
    step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
  )

  expect_equal(res$nf, 14)
  expect_equal(res$ng, 14)
  expect_equal(res$f, 3.726, tol = 1e-3)
  expect_equal(res$g2n, 18.83, tol = 1e-3)
  expect_equal(res$par, c(-0.893, 0.760), tol = 1e-3)
})

test_that("BFGS with More-Thuente LS and fixed initial alpha guess", {
  res <- mize(rb0, rosen_no_hess,
    method = "BFGS", max_iter = 3,
    line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
    step0 = "sci",
    step_next_init = 0.1, scale_hess = FALSE, grad_tol = 1e-5
  )
  # Get to the same result as without step_next_init but more evaluations
  expect_equal(res$nf, 21)
  expect_equal(res$ng, 21)
  expect_equal(res$f, 3.53, tol = 1e-3)
  expect_equal(res$g2n, 24.98, tol = 1e-3)
  expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})

test_that("SR1 with More-Thuente LS", {
  res <- mize(rb0, rosen_no_hess,
    method = "SR1", max_iter = 3,
    line_search = "more-thuente", c1 = 1e-4, c2 = 0.9,
    step0 = "sci",
    step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
  )

  expect_equal(res$nf, 6)
  expect_equal(res$ng, 6)
  expect_equal(res$f, 3.47, tol = 1e-3)
  expect_equal(res$g2n, 17.87, tol = 1e-3)
  expect_equal(res$par, c(-0.824, 0.641), tol = 1e-3)
})

test_that("SR1 with approx Hessian init", {
  res <- mize(rb0, rosenbrock_fg,
    method = "SR1", max_iter = 3,
    line_search = "more-thuente", c1 = 1e-4, c2 = 0.9,
    step0 = "sci",
    step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
  )

  expect_equal(res$nf, 4)
  expect_equal(res$ng, 4)
  expect_equal(res$f, 4.432, tol = 1e-3)
  expect_equal(res$g2n, 5.289, tol = 1e-3)
  expect_equal(res$par, c(-1.105, 1.219), tol = 1e-3)
})

test_that("CG with Schmidt LS", {
  # lower case names should be ok for method, cg_update, step0 etc.
  res <- mize(rb0, rosenbrock_fg,
    method = "cg",
    cg_update = "pr+",
    max_iter = 3,
    line_search = "schmidt", c1 = 1e-4, c2 = 0.1, step0 = "schmidt",
    step_next_init = "slope", ls_max_alpha_mult = 10,
    grad_tol = 1e-5
  )

  expect_equal(res$nf, 10)
  expect_equal(res$ng, 10)
  expect_equal(res$f, 2.859, tol = 1e-3)
  expect_equal(res$g2n, 3.650, tol = 1e-3)
  expect_equal(res$par, c(-0.682, 0.483), tol = 1e-3)
})

# Tests error referenced in https://github.com/jlmelville/mize/pull/1
test_that("ls_max_fn", {
  res <- mize(rb0, rosenbrock_fg,
    method = "cg",
    cg_update = "pr+",
    max_iter = 3,
    line_search = "schmidt", c1 = 1e-4, c2 = 0.1, step0 = "schmidt",
    step_next_init = "slope", ls_max_alpha_mult = 10, ls_max_fn = 2,
    grad_tol = 1e-5
  )

  expect_equal(res$nf, 7)
  expect_equal(res$ng, 7)
  expect_equal(res$f, 3.947, tol = 1e-3)
  expect_equal(res$g2n, 1.872, tol = 1e-3)
  expect_equal(res$par, c(-0.986, 0.978), tol = 1e-3)
})

test_that("CG with Rasmussen LS", {
  # lower case names should be ok for method, cg_update, step0 etc.
  res <- mize(rb0, rosenbrock_fg,
    method = "cg",
    cg_update = "pr+",
    max_iter = 3,
    line_search = "ras", c1 = 5e-10, c2 = 1e-9, step0 = "ras",
    step_next_init = "slope", ls_max_alpha_mult = 10,
    grad_tol = 1e-5
  )

  expect_equal(res$nf, 27)
  expect_equal(res$ng, 27)
  expect_equal(res$f, 3.53, tol = 1e-3)
  expect_equal(res$g2n, 24.98, tol = 1e-3)
  expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})

test_that("HZ CG with HZ LS", {
  # Also use HZ suggestions for initial step guess and next step guess
  # (the latter of which costs an extra fn evaluation per iteration)
  res <- mize(rb0, rosenbrock_fg,
    method = "cg",
    cg_update = "hz+",
    max_iter = 3,
    line_search = "hz", c1 = 5e-10, c2 = 1e-9, step0 = "hz",
    step_next_init = "hz", grad_tol = 1e-5
  )

  expect_equal(res$nf, 10)
  expect_equal(res$ng, 8)
  expect_equal(res$f, 4.09, tol = 1e-3)
  expect_equal(res$g2n, 1.789, tol = 1e-3)
  expect_equal(res$par, c(-1.020, 1.050), tol = 1e-3)
})


test_that("CG with Rasmussen LS and max_fn termination", {
  res <- mize(rb0, rosenbrock_fg,
    method = "cg",
    cg_update = "pr+",
    max_iter = 3, max_fn = 20, ls_max_alpha_mult = 10,
    line_search = "ras", c1 = 5e-10, c2 = 1e-9, step0 = "ras",
    step_next_init = "slope", grad_tol = 1e-5
  )

  expect_equal(res$nf, 20)
  expect_equal(res$ng, 20)
  expect_equal(res$f, 3.54, tol = 1e-3)
  expect_equal(res$g2n, 23.23, tol = 1e-3)
  expect_equal(res$par, c(-0.806, 0.596), tol = 1e-3)
})


test_that("NAG with Rasmussen LS", {
  res <- mize(rb0, rosenbrock_fg,
    method = "NAG",
    nest_convex_approx = FALSE, nest_q = 0, nest_burn_in = 0,
    max_iter = 3,
    line_search = "rasmussen", c1 = 5e-10, c2 = 1e-9,
    step0 = "rasmussen",
    step_next_init = "slope", ls_max_alpha_mult = 10,
    grad_tol = 1e-5
  )

  expect_equal(res$nf, 29)
  expect_equal(res$ng, 29)
  expect_equal(res$f, 3.56, tol = 1e-3)
  expect_equal(res$g2n, 7.2, tol = 1e-3)
  expect_equal(res$par, c(-0.869, 0.781), tol = 1e-3)
})

test_that("bold driver SD and classical momentum", {
  res <- mize(rb0, rosenbrock_fg,
    method = "SD", norm_direction = TRUE,
    line_search = "bold",
    mom_type = "classical",
    mom_schedule = "ramp", mom_init = 0.1, mom_final = 0.3,
    max_iter = 3, grad_tol = 1e-5
  )

  expect_equal(res$nf, 12)
  expect_equal(res$ng, 5) # extra grad eval needed to get data for best found
  expect_equal(res$f, 4.38, tol = 1e-3)
  expect_equal(res$g2n, 23.21, tol = 1e-3)
  expect_equal(res$par, c(-1.006, 1.071), tol = 1e-3)
})

test_that("bold driver SD and nesterov momentum", {
  res <- mize(rb0, rosenbrock_fg,
    method = "SD", norm_direction = TRUE,
    line_search = "bold",
    mom_type = "nesterov",
    mom_schedule = "ramp", mom_init = 0.1, mom_final = 0.3,
    max_iter = 3, grad_tol = 1e-5
  )

  expect_equal(res$nf, 12)
  expect_equal(res$ng, 5)
  expect_equal(res$f, 4.38, tol = 1e-3)
  expect_equal(res$g2n, 23.21, tol = 1e-3)
  expect_equal(res$par, c(-1.006, 1.071), tol = 1e-3)
})

test_that("Delta bar delta adaptive learning rate and nesterov momentum", {
  res <- mize(rb0, rosenbrock_fg,
    method = "DBD", norm_direction = TRUE,
    step0 = 0.1,
    mom_type = "nesterov",
    mom_schedule = 0.2,
    max_iter = 3, grad_tol = 1e-5, rel_tol = NULL, abs_tol = NULL
  )

  expect_equal(res$nf, 1)
  expect_equal(res$ng, 4)
  expect_equal(res$f, 4.84, tol = 1e-3)
  expect_equal(res$g2n, 37.85, tol = 1e-3)
  expect_equal(res$par, c(-0.993, 1.079), tol = 1e-3)
})

test_that("Terminates semi-gracefully if function value is non-finite", {
  res <- mize(rb0, rosenbrock_fg, "DBD", step0 = 1, check_conv_every = 1)
  expect_equal(res$terminate$what, "fn_inf")
  expect_equal(res$iter, 4)
})

test_that("Terminates semi-gracefully if gradient is non-finite", {
  # If we don't check convergence often enough, solution can diverge
  # in between checks. If NaN is detected in a gradient calculation, we
  # terminate early even if not on a convergence check iteration
  res <- mize(rb0, rosenbrock_fg, "DBD", step0 = 1, check_conv_every = 10)
  expect_equal(res$terminate$what, "gr_inf")
  expect_equal(res$iter, 6)
})

test_that("Step tolerance is triggered when progress stalls", {
  # NULL abs_tol to stop it from triggering before step_tol
  res <- mize(rb0, rosen_no_hess, "L-BFGS",
    memory = 5, abs_tol = NULL,
    step_tol = 1e-7, step_next_init = "quad",
    step0 = 1
  )
  expect_equal(res$nf, 57)
  expect_equal(res$ng, 57)
  expect_equal(res$f, 0, tol = 1e-3)
  expect_equal(res$par, c(1, 1))
  expect_equal(res$terminate$what, "step_tol")
})

test_that("Step tolerance is not triggered when restarting", {
  res <- mize(rb0, rosenbrock_fg,
    method = "NAG", max_iter = 55, restart = "fn",
    store_progress = TRUE
  )
  expect_equal(res$iter, 55)
  expect_equal(res$progress["52", "step"], 0)
  expect_equal(res$terminate$what, "max_iter")
})

test_that("max_fn errs on the side of caution", {
  # In this test we ask for 15 function evaluations, but only get 14
  # this is because we need one function evaluation spare to calculate
  # f for the return value and mize has determined it isn't available
  # for "free" by being calculated during the iteration
  res <- mize(rb0, rosenbrock_fg,
    method = "NAG", max_fn = 15,
    step_next_init = "slope",
    ls_max_alpha_mult = 10
  )
  expect_equal(res$terminate$what, "max_fn")
  expect_equal(res$terminate$val, 14)
  expect_equal(res$nf, 14)
  expect_equal(res$f, 4.08, tol = 1e-3)
})

test_that("max_fg also errs on the side of caution", {
  res <- mize(rb0, rosenbrock_fg,
    method = "NAG", max_fg = 30,
    step_next_init = "slope", ls_max_alpha_mult = 10
  )
  expect_equal(res$terminate$what, "max_fg")
  expect_equal(res$terminate$val, 29)
  expect_equal(res$nf, 15)
  expect_equal(res$ng, 14)
  expect_equal(res$f, 4.08, tol = 1e-3)
})

test_that("max_fn with DBD", {
  # Don't leave one function evaluation spare with DBD because it
  # doesn't use them during its iteration
  res <- mize(rb0, rosenbrock_fg, method = "DBD", max_fn = 30)
  expect_equal(res$terminate$what, "max_fn")
  expect_equal(res$terminate$val, 30)
  expect_equal(res$nf, 30)
  expect_equal(res$ng, 30)
  expect_equal(res$f, 4.133, tol = 1e-3)
})

test_that("max_fg with DBD", {
  res <- mize(rb0, rosenbrock_fg, method = "DBD", max_fg = 30)
  expect_equal(res$terminate$what, "max_fg")
  expect_equal(res$terminate$val, 30)
  expect_equal(res$nf, 15)
  expect_equal(res$ng, 15)
  expect_equal(res$f, 7.914, tol = 1e-3)
})

test_that("max functions per line search", {
  # This starts at the minimum and probably due to a lack of smoothness
  # at the minimum due to floating point issues rather than the function itself,
  # doesn't make a lot of progress - but at least stops after 20 steps
  # without a max on the line search, this can take 100s of steps to give up
  # (or worse)
  res <- mize(c(3, 0.5), tricky_fg(), method = "SD", ls_max_fn = 20)
  expect_equal(res$terminate$what, "step_tol")
  expect_equal(res$terminate$val, 0)
  expect_equal(res$nf, 21)
  expect_equal(res$ng, 21)
  expect_equal(res$f, 0, tol = 1e-3)
})

test_that("backtracking line search", {
  res <- mize(rb0, rosenbrock_fg,
    method = "NAG", line_search = "BACK",
    max_iter = 3, step_next_init = "slope"
  )
  expect_equal(res$nf, 7)
  expect_equal(res$ng, 6)
  expect_equal(res$f, 20.44, tol = 1e-3)
  expect_equal(res$par, c(-1.184, 1.006), tol = 1e-3)
})

test_that("MT safeguard cubic", {
  # Chosen only because I couldn't find a simpler example that yielded a
  # difference
  res <- mize(rb0, rosenbrock_fg,
    method = "CG", c2 = 0.9, grad_tol = 0.1,
    max_iter = 11, ls_safe_cubic = TRUE
  )
  # These three should be different from ls_safe_cubic = FALSE
  expect_equal(res$nf, 25) # 24 otherwise
  expect_equal(res$ng, 25)
  expect_equal(res$g2n, 4.604, tol = 1e-3) # approx 4.627 otherwise

  # These are the same within tolerance
  expect_equal(res$f, 2.8, tol = 1e-3)
  expect_equal(res$par, c(-0.6605, 0.4568), tol = 1e-3)
})

test_that("Truncated Newton with constant step size", {
  res <- mize(rb0, rosenbrock_fg,
    method = "TN", max_iter = 3,
    line_search = "const", step0 = 1, grad_tol = 1e-5,
    check_conv_every = NULL
  )

  expect_equal(res$nf, 1)
  expect_equal(res$ng, 8)
  expect_equal(res$f, 4.118, tol = 1e-3)
  expect_equal(res$g2n, 4.219, tol = 1e-3)
  expect_equal(res$par, c(-1.023, 1.062), tol = 1e-3)
})

# Ensure TN direction can't exceed gr budget
test_that("Truncated Newton with max_gr", {
  res <- mize(rb0, rosenbrock_fg,
    method = "TN", max_iter = 3,
    check_conv_every = NULL, line_search = "const", step0 = 1,
    max_gr = 6
  )

  # Should give the same f/par results as without max_gr, as we would quit with -ve
  # curvature anyway
  expect_equal(res$nf, 1)
  # If grad_tol or ginf_tol was calculated we would get max_gr + 1
  expect_equal(res$ng, 6)
  expect_equal(res$f, 4.118, tol = 1e-3)
  expect_equal(res$par, c(-1.023, 1.062), tol = 1e-3)
})


test_that("Report ng correctly with simple backtracking line search", {
  res <- mize(rb0, rosenbrock_fg, method = "L-BFGS", 
              line_search = "backtracking", step_next_init = 1, max_iter = 2, 
              step_down = 0.5)
  expect_equal(res$nf, 3)
  expect_equal(res$ng, 2)

  res <- mize(rb0, rosenbrock_fg, method = "L-BFGS", 
              line_search = "backtracking", step_next_init = 1, max_iter = 2)
  expect_equal(res$nf, 3)
  expect_equal(res$ng, 3)
})