test_that("qn_polish is disabled by default", { rosen <- function(x) (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2 rosen_gr <- function(x) { c(-2 * (1 - x[1]) - 400 * x[1] * (x[2] - x[1]^2), 200 * (x[2] - x[1]^2)) } rosen_hess <- function(x) { matrix(c(1200 * x[1]^2 - 400 * x[2] + 2, -400 * x[1], -400 * x[1], 200), 2, 2) } res <- arcopt(c(-1.2, 1), rosen, rosen_gr, rosen_hess, control = list(maxit = 200, trace = 0)) expect_true(res$converged) expect_equal(res$diagnostics$qn_polish_switches, 0L) expect_equal(res$diagnostics$qn_polish_reverts, 0L) expect_equal(res$diagnostics$solver_mode_final, "cubic") }) test_that("arcopt result includes qn_polish fields", { fn <- function(x) 0.5 * sum(x^2) gr <- function(x) x hess_fn <- function(x) diag(length(x)) res <- arcopt(c(1, 1), fn, gr, hess_fn, control = list(maxit = 50, trace = 0)) for (f in c("qn_polish_switches", "qn_polish_reverts", "hess_evals_at_polish_switch")) { expect_true(f %in% names(res$diagnostics), info = paste("missing diagnostics field:", f)) } expect_equal(res$diagnostics$qn_polish_switches, 0L) expect_equal(res$diagnostics$qn_polish_reverts, 0L) }) test_that("qn_polish fires on smooth convex with quartic tail", { # f(x) = sum(0.5 * x^2 + x^4 / 12), PD everywhere, well-conditioned in # basin. Newton takes ~8 iterations from x0=10*rep(1,n); cubic/Newton # transitional iterations don't occur because H is PD throughout. fn <- function(x) sum(0.5 * x^2 + x^4 / 12) gr <- function(x) x + x^3 / 3 hess_fn <- function(x) diag(1 + x^2) x0 <- rep(10, 5) res <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = TRUE)) expect_true(res$converged) expect_true(max(abs(res$par)) < 1e-4) expect_gte(res$diagnostics$qn_polish_switches, 1L) expect_equal(res$diagnostics$solver_mode_final, "qn_polish") }) test_that("qn_polish saves >= 30% Hessian evaluations on smooth convex", { fn <- function(x) sum(0.5 * x^2 + x^4 / 12) gr <- function(x) x + x^3 / 3 hess_fn <- function(x) diag(1 + x^2) x0 <- rep(10, 5) res_off <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = FALSE)) evals_off <- res_off$evaluations$hess res_on <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = TRUE)) evals_on <- res_on$evaluations$hess expect_true(res_off$converged) expect_true(res_on$converged) expect_gte(res_on$diagnostics$qn_polish_switches, 1L) # Saves >= 30% Hessian evals on this problem with default thresholds expect_true(evals_on <= 0.7 * evals_off, info = sprintf("evals_off=%d evals_on=%d", evals_off, evals_on)) }) test_that("qn_polish saves >= 50% Hessian evaluations with loose thresholds", { fn <- function(x) sum(0.5 * x^2 + x^4 / 12) gr <- function(x) x + x^3 / 3 hess_fn <- function(x) diag(1 + x^2) x0 <- rep(10, 5) res_off <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = FALSE)) res_on <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = TRUE, qn_polish_window = 3, qn_polish_rho = 0.3, qn_polish_g_decay = 0.95)) expect_true(res_off$converged) expect_true(res_on$converged) expect_gte(res_on$diagnostics$qn_polish_switches, 1L) expect_true(res_on$evaluations$hess <= 0.5 * res_off$evaluations$hess, info = sprintf("evals_off=%d evals_on=%d", res_off$evaluations$hess, res_on$evaluations$hess)) }) test_that("qn_polish does not fire on pure quadratic (Newton done in 1 iter)", { # Pure quadratic: Newton converges in one step -- detector window never # fills. This is a test that the detector doesn't spuriously fire. A <- diag(c(1, 2, 4, 8, 16)) b <- c(1, 1, 1, 1, 1) fn <- function(x) 0.5 * as.numeric(t(x) %*% A %*% x) - sum(b * x) gr <- function(x) as.vector(A %*% x) - b hess_fn <- function(x) A res <- arcopt(rep(0, 5), fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = TRUE)) expect_true(res$converged) expect_equal(res$diagnostics$qn_polish_switches, 0L) expect_equal(res$diagnostics$solver_mode_final, "cubic") expect_equal(res$par, solve(A, b), tolerance = 1e-6) }) test_that("qn_polish and tr_fallback do not collide on well-behaved problems", { # Neither should fire on Rosenbrock with default settings rosen <- function(x) (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2 rosen_gr <- function(x) { c(-2 * (1 - x[1]) - 400 * x[1] * (x[2] - x[1]^2), 200 * (x[2] - x[1]^2)) } rosen_hess <- function(x) { matrix(c(1200 * x[1]^2 - 400 * x[2] + 2, -400 * x[1], -400 * x[1], 200), 2, 2) } res <- arcopt(c(-1.2, 1), rosen, rosen_gr, rosen_hess, control = list(maxit = 200, trace = 0, tr_fallback_enabled = TRUE, qn_polish_enabled = TRUE)) expect_true(res$converged) expect_equal(res$par, c(1, 1), tolerance = 1e-4) expect_equal(res$diagnostics$ridge_switches, 0L) }) test_that("qn_polish branch produces finite outputs on near-minimum start", { # Starting very close to the minimum: polish detector should fire fast fn <- function(x) 0.5 * sum(x^2) + 0.01 * sum(x^4) gr <- function(x) x + 0.04 * x^3 hess_fn <- function(x) diag(1 + 0.12 * x^2) x0 <- rep(0.01, 5) # very close to the minimum res <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = TRUE)) expect_true(all(is.finite(res$par))) expect_true(is.finite(res$value)) }) # ---- Phase 4: bidirectional logic (revert + cooldown) ---- test_that("qn_polish reverts to cubic when line search fails repeatedly", { # Force line search to fail by giving it a single iteration budget plus # a very tight curvature condition c2 = 0.01. After # qn_polish_max_fail = 2 consecutive failures, polish should revert to # cubic and the revert counter should reflect this. fn <- function(x) sum(0.5 * x^2 + x^4 / 12) gr <- function(x) x + x^3 / 3 hess_fn <- function(x) diag(1 + x^2) x0 <- rep(10, 5) res <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = TRUE, qn_polish_window = 3, qn_polish_rho = 0.3, qn_polish_g_decay = 0.95, qn_polish_c2 = 0.01, qn_polish_max_ls_iter = 1, qn_polish_max_fail = 2)) # Polish should fire, then revert, possibly multiple times. # Must still converge. expect_true(res$converged) expect_gte(res$diagnostics$qn_polish_switches, 1L, label = "polish fired at least once") expect_gte(res$diagnostics$qn_polish_reverts, 1L, label = "revert happened at least once under tight c2") }) test_that("qn_polish cooldown prevents immediate re-entry after revert", { # Same setup as above, but check that reverts don't happen every # single iteration (cooldown spreads them out). If the cooldown weren't # respected, we'd see reverts == switches - 0 or 1 with each iteration # reverting. With cooldown, reverts should be bounded even on a # LS-hostile problem. fn <- function(x) sum(0.5 * x^2 + x^4 / 12) gr <- function(x) x + x^3 / 3 hess_fn <- function(x) diag(1 + x^2) x0 <- rep(10, 5) res <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 50, trace = 0, qn_polish_enabled = TRUE, qn_polish_window = 3, qn_polish_rho = 0.3, qn_polish_g_decay = 0.95, qn_polish_c2 = 0.01, qn_polish_max_ls_iter = 1, qn_polish_max_fail = 2, qn_polish_reenter_delay = 5)) expect_true(res$converged) # With reenter_delay = 5, between consecutive polish->cubic->polish # cycles there must be at least 5 cubic iterations. Over maxit = 50, # this bounds the number of switches. expect_true(res$diagnostics$qn_polish_switches <= ceiling(50 / 5), info = sprintf("switches = %d", res$diagnostics$qn_polish_switches)) }) test_that("qn_polish revert leaves cubic state usable", { # After a revert, cubic mode must resume correctly and converge fn <- function(x) sum(0.5 * x^2 + x^4 / 12) gr <- function(x) x + x^3 / 3 hess_fn <- function(x) diag(1 + x^2) x0 <- rep(10, 5) res <- arcopt(x0, fn, gr, hess_fn, control = list(maxit = 100, trace = 0, qn_polish_enabled = TRUE, qn_polish_window = 3, qn_polish_rho = 0.3, qn_polish_g_decay = 0.95, qn_polish_c2 = 0.01, qn_polish_max_ls_iter = 1, qn_polish_max_fail = 2)) expect_true(res$converged) expect_true(max(abs(res$par)) < 1e-3, info = "post-revert cubic mode must still find minimum") expect_true(all(is.finite(res$gradient))) })