# Tests for Quasi-Newton ARC Optimizer (Algorithm 4) # ================================================== # # arcopt_qn() is no longer publicly exported in v0.2.0. The shim below # routes every call site in this file through the public dispatch path # `arcopt(control = list(use_qn = TRUE, ...))`, so the existing tests # continue to exercise the same code path without per-call rewrites. arcopt_qn <- function(x0, fn, gr, hess = NULL, ..., lower = rep(-Inf, length(x0)), upper = rep(Inf, length(x0)), control = list()) { control$use_qn <- TRUE arcopt(x0 = x0, fn = fn, gr = gr, hess = hess, ..., lower = lower, upper = upper, control = control) } # ============================================================================= # Test Functions # ============================================================================= # Sphere function (convex quadratic) sphere_fn <- function(x) sum(x^2) sphere_gr <- function(x) 2 * x sphere_hess <- function(x) 2 * diag(length(x)) # Rosenbrock function (n-dimensional) rosenbrock_fn <- function(x) { n <- length(x) sum(100 * (x[2:n] - x[1:(n - 1)]^2)^2 + (1 - x[1:(n - 1)])^2) } rosenbrock_gr <- function(x) { n <- length(x) g <- numeric(n) for (i in 1:(n - 1)) { g[i] <- g[i] - 400 * x[i] * (x[i + 1] - x[i]^2) - 2 * (1 - x[i]) g[i + 1] <- g[i + 1] + 200 * (x[i + 1] - x[i]^2) } g } rosenbrock_hess <- function(x) { n <- length(x) h <- matrix(0, n, n) for (i in 1:(n - 1)) { h[i, i] <- h[i, i] + 1200 * x[i]^2 - 400 * x[i + 1] + 2 h[i, i + 1] <- h[i, i + 1] - 400 * x[i] h[i + 1, i] <- h[i + 1, i] - 400 * x[i] h[i + 1, i + 1] <- h[i + 1, i + 1] + 200 } h } # Simple quadratic with known minimum quadratic_fn <- function(x) { 0.5 * (x[1]^2 + 10 * x[2]^2) # Ill-conditioned } quadratic_gr <- function(x) c(x[1], 10 * x[2]) quadratic_hess <- function(x) diag(c(1, 10)) # ============================================================================= # Basic Functionality Tests # ============================================================================= test_that("arcopt_qn forwards ... to fn and gr", { # Scaled sphere: f(x; s) = s * sum(x^2) scaled_fn <- function(x, scale) scale * sum(x^2) scaled_gr <- function(x, scale) scale * 2 * x result <- arcopt_qn( x0 = c(5, 5), fn = scaled_fn, gr = scaled_gr, scale = 2, control = list(qn_method = "sr1", trace = 0) ) expect_true(result$converged) expect_equal(result$par, c(0, 0), tolerance = 1e-4) }) test_that("arcopt_qn forwards ... to hess in hybrid mode", { # Scaled sphere with Hessian scaled_fn <- function(x, scale) scale * sum(x^2) scaled_gr <- function(x, scale) scale * 2 * x scaled_hess <- function(x, scale) scale * 2 * diag(length(x)) result <- arcopt_qn( x0 = c(5, 5), fn = scaled_fn, gr = scaled_gr, hess = scaled_hess, scale = 2, control = list(qn_method = "sr1", trace = 0) ) expect_true(result$converged) expect_equal(result$par, c(0, 0), tolerance = 1e-4) # Hessian should have been evaluated once for initialization expect_equal(result$evaluations$hess, 1) }) test_that("arcopt_qn validates inputs correctly", { # Invalid x0 expect_error( arcopt_qn(c(1, NA), sphere_fn, sphere_gr), "x0 must be a finite numeric vector" ) # Missing gradient expect_error( arcopt_qn(c(1, 1), sphere_fn, gr = NULL), "gr" ) # Invalid QN method expect_error( arcopt_qn(c(1, 1), sphere_fn, sphere_gr, control = list(qn_method = "invalid")), "qn_method must be one of" ) }) test_that("arcopt_qn returns correct output structure", { result <- arcopt_qn( c(2, 2), sphere_fn, sphere_gr, control = list(maxit = 10, trace = 0) ) expect_true(is.list(result)) expect_true("par" %in% names(result)) expect_true("value" %in% names(result)) expect_true("gradient" %in% names(result)) expect_true("converged" %in% names(result)) expect_true("iterations" %in% names(result)) expect_true("evaluations" %in% names(result)) expect_true("qn_updates" %in% names(result$diagnostics)) expect_true("qn_skips" %in% names(result$diagnostics)) expect_true("qn_restarts" %in% names(result$diagnostics)) expect_equal(length(result$par), 2) expect_true(is.numeric(result$value)) expect_equal(length(result$gradient), 2) }) # ============================================================================= # Pure QN Mode Tests (no Hessian provided) # ============================================================================= test_that("arcopt_qn converges on sphere function with SR1", { x0 <- c(5, -3, 2) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "sr1", trace = 0) ) expect_true(result$converged) expect_equal(result$par, rep(0, 3), tolerance = 1e-4) expect_equal(result$value, 0, tolerance = 1e-8) }) test_that("arcopt_qn converges on sphere function with BFGS", { x0 <- c(5, -3, 2) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "bfgs", trace = 0) ) expect_true(result$converged) expect_equal(result$par, rep(0, 3), tolerance = 1e-4) }) test_that("arcopt_qn converges on ill-conditioned quadratic", { x0 <- c(10, 10) result <- arcopt_qn( x0, quadratic_fn, quadratic_gr, control = list(qn_method = "sr1", trace = 0) ) expect_true(result$converged) expect_equal(result$par, c(0, 0), tolerance = 1e-4) }) # ============================================================================= # Hybrid Mode Tests (with Hessian) # ============================================================================= test_that("arcopt_qn uses provided Hessian for initialization", { x0 <- c(2, 2) # With Hessian - should use exact initialization result_hybrid <- arcopt_qn( x0, sphere_fn, sphere_gr, sphere_hess, control = list(qn_method = "sr1", trace = 0) ) # Without Hessian - uses scaled identity result_pure <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "sr1", trace = 0) ) # Both should converge expect_true(result_hybrid$converged) expect_true(result_pure$converged) # Hybrid should use Hessian evaluation expect_equal(result_hybrid$evaluations$hess, 1) expect_equal(result_pure$evaluations$hess, 0) }) test_that("arcopt_qn hybrid mode converges on Rosenbrock 2D", { x0 <- c(-1.2, 1) result <- arcopt_qn( x0, rosenbrock_fn, rosenbrock_gr, rosenbrock_hess, control = list(qn_method = "sr1", maxit = 500, trace = 0) ) expect_true(result$converged) expect_equal(result$par, c(1, 1), tolerance = 1e-3) }) # ============================================================================= # QN Update Tracking Tests # ============================================================================= test_that("arcopt_qn tracks QN updates correctly", { x0 <- c(5, 5) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "sr1", trace = 0) ) # Should have some updates expect_true(result$diagnostics$qn_updates >= 0) expect_true(result$diagnostics$qn_skips >= 0) expect_true(result$diagnostics$qn_restarts >= 0) # Total should be related to iterations # (each successful step attempts an update) expect_true(result$diagnostics$qn_updates + result$diagnostics$qn_skips <= result$iterations) }) test_that("BFGS counts updates and skips", { # BFGS may skip when curvature condition violated x0 <- c(5, 5) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "bfgs", trace = 0) ) expect_true(result$diagnostics$qn_updates >= 0) expect_true(result$diagnostics$qn_skips >= 0) }) # ============================================================================= # Box Constraint Tests # ============================================================================= test_that("arcopt_qn respects box constraints", { x0 <- c(0.5, 0.5) lower <- c(0.1, 0.1) upper <- c(0.9, 0.9) result <- arcopt_qn( x0, sphere_fn, sphere_gr, lower = lower, upper = upper, control = list(qn_method = "sr1", maxit = 500, trace = 0) ) # Solution should respect bounds expect_true(all(result$par >= lower - 1e-8)) expect_true(all(result$par <= upper + 1e-8)) # Solution should be at lower bounds (minimum of sphere in [0.1, 0.9]^2) # Allow some tolerance since QN may not converge exactly at boundary expect_equal(result$par, lower, tolerance = 1e-3) }) test_that("arcopt_qn validates bound dimensions", { expect_error( arcopt_qn(c(1, 1), sphere_fn, sphere_gr, lower = c(0), upper = c(2, 2)), "lower and upper must have same length" ) expect_error( arcopt_qn(c(1, 1), sphere_fn, sphere_gr, lower = c(0, 0, 0), upper = c(2, 2, 2)), "lower and upper must have same length" ) }) test_that("arcopt_qn validates bound ordering", { expect_error( arcopt_qn(c(1, 1), sphere_fn, sphere_gr, lower = c(2, 0), upper = c(1, 2)), "lower must be strictly less than upper" ) }) # ============================================================================= # Rosenbrock Convergence Tests # ============================================================================= test_that("arcopt_qn converges on Rosenbrock 2D (full-matrix methods)", { x0 <- c(-1.2, 1) methods <- c("sr1", "bfgs") for (method in methods) { result <- arcopt_qn( x0, rosenbrock_fn, rosenbrock_gr, control = list(qn_method = method, maxit = 1000, trace = 0) ) expect_true( result$converged || sqrt(sum(result$gradient^2)) < 1e-3, info = paste("Method:", method) ) # Should be close to (1, 1) expect_equal( result$par, c(1, 1), tolerance = 0.1, info = paste("Method:", method) ) } }) # ============================================================================= # Comparison with Standard ARC # ============================================================================= test_that("arcopt routes correctly with use_qn", { x0 <- c(2, 2) # Standard ARC (requires Hessian) result_std <- arcopt( x0, sphere_fn, sphere_gr, sphere_hess, control = list(trace = 0) ) # QN-ARC via router result_qn <- arcopt( x0, sphere_fn, sphere_gr, sphere_hess, control = list(use_qn = TRUE, qn_method = "sr1", trace = 0) ) # Both should converge expect_true(result_std$converged) expect_true(result_qn$converged) # Both should find the same minimum expect_equal(result_std$par, result_qn$par, tolerance = 1e-4) }) # ============================================================================= # Edge Cases # ============================================================================= test_that("arcopt_qn handles starting at optimum", { x0 <- c(0, 0, 0) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "sr1", trace = 0) ) expect_true(result$converged) expect_equal(result$par, c(0, 0, 0), tolerance = 1e-10) expect_equal(result$iterations, 0) # Should converge immediately }) test_that("arcopt_qn handles single dimension", { fn <- function(x) x^2 gr <- function(x) 2 * x result <- arcopt_qn( 5, fn, gr, control = list(qn_method = "sr1", trace = 0) ) expect_true(result$converged) expect_equal(result$par, 0, tolerance = 1e-6) }) test_that("arcopt_qn stops at maxit", { # Use very restrictive tolerance x0 <- c(100, 100) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "sr1", maxit = 5, gtol_abs = 1e-50, trace = 0) ) expect_false(result$converged) expect_equal(result$iterations, 5) expect_equal(result$message, "maxit") }) # ============================================================================= # Numerical Robustness # ============================================================================= test_that("arcopt_qn handles NaN in objective", { bad_fn <- function(x) { if (x[1] < 0.1) return(NaN) sum(x^2) } bad_gr <- function(x) 2 * x x0 <- c(1, 1) # Should handle NaN by increasing sigma and rejecting step result <- arcopt_qn( x0, bad_fn, bad_gr, control = list(qn_method = "sr1", maxit = 100, trace = 0) ) # May or may not converge, but shouldn't crash expect_true(is.list(result)) expect_true(all(is.finite(result$par))) }) test_that("arcopt_qn handles very small sigma_min", { x0 <- c(2, 2) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list( qn_method = "sr1", sigma_min = 1e-12, trace = 0 ) ) expect_true(result$converged) }) test_that("arcopt_qn handles large sigma_max", { x0 <- c(2, 2) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list( qn_method = "sr1", sigma_max = 1e20, trace = 0 ) ) expect_true(result$converged) }) # ============================================================================= # Higher Dimensional Tests # ============================================================================= test_that("arcopt_qn handles moderate dimensions with full-matrix SR1", { n <- 20 x0 <- rep(5, n) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "sr1", trace = 0) ) expect_true(result$converged) expect_equal(result$par, rep(0, n), tolerance = 1e-4) }) # ============================================================================= # Nesterov Acceleration Tests (Algorithm 4b) # ============================================================================= test_that("arcopt_qn with acceleration finds near-optimal on sphere", { x0 <- c(5, -3, 2) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0) ) # Check that solution is near optimal (may not fully converge due to # acceleration extrapolation, but should be close) expect_equal(result$par, rep(0, 3), tolerance = 1e-3) expect_true(sqrt(sum(result$gradient^2)) < 0.01) }) test_that("arcopt_qn with acceleration finds near-optimal on quadratic", { x0 <- c(10, 10) result <- arcopt_qn( x0, quadratic_fn, quadratic_gr, control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0) ) # Check that solution is near optimal expect_equal(result$par, c(0, 0), tolerance = 1e-3) expect_true(sqrt(sum(result$gradient^2)) < 0.01) }) test_that("arcopt_qn acceleration reaches similar solution as non-accelerated", { x0 <- c(10, 10) # Non-accelerated result_normal <- arcopt_qn( x0, quadratic_fn, quadratic_gr, control = list(qn_method = "bfgs", use_accel_qn = FALSE, trace = 0) ) # Accelerated result_accel <- arcopt_qn( x0, quadratic_fn, quadratic_gr, control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0) ) # Normal should converge expect_true(result_normal$converged) # Both should reach similar solution expect_equal(result_normal$par, result_accel$par, tolerance = 1e-2) }) test_that("arcopt_qn acceleration works with SR1", { x0 <- c(5, 5, 5) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "sr1", use_accel_qn = TRUE, maxit = 2000, trace = 0) ) expect_equal(result$par, rep(0, 3), tolerance = 1e-3) }) test_that("arcopt_qn acceleration respects box constraints", { x0 <- c(0.5, 0.5) lower <- c(0.1, 0.1) upper <- c(0.9, 0.9) result <- arcopt_qn( x0, sphere_fn, sphere_gr, lower = lower, upper = upper, control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 1000, trace = 0) ) # Solution should respect bounds expect_true(all(result$par >= lower - 1e-8)) expect_true(all(result$par <= upper + 1e-8)) }) test_that("arcopt_qn acceleration handles single dimension", { fn <- function(x) x^2 gr <- function(x) 2 * x result <- arcopt_qn( 5, fn, gr, control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0) ) expect_equal(result$par, 0, tolerance = 1e-3) }) # ============================================================================= # Hybrid Method Tests # ============================================================================= test_that("hybrid is the default qn_method", { # Verify default by checking result without specifying method x0 <- c(2, 2) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(trace = 0) ) expect_true(result$converged) expect_equal(result$par, c(0, 0), tolerance = 1e-4) }) test_that("arcopt_qn converges on sphere with hybrid method", { x0 <- c(5, -3, 2) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "hybrid", trace = 0) ) expect_true(result$converged) expect_equal(result$par, rep(0, 3), tolerance = 1e-4) }) test_that("arcopt_qn hybrid converges on Rosenbrock", { x0 <- c(-1.2, 1) result <- arcopt_qn( x0, rosenbrock_fn, rosenbrock_gr, control = list(qn_method = "hybrid", maxit = 1000, trace = 0) ) # Rosenbrock minimum is at (1, 1) expect_true(result$converged || sqrt(sum(result$gradient^2)) < 1e-3) expect_equal(result$par, c(1, 1), tolerance = 0.05) }) test_that("arcopt_qn hybrid handles ill-conditioned quadratic", { x0 <- c(10, 10) result <- arcopt_qn( x0, quadratic_fn, quadratic_gr, control = list(qn_method = "hybrid", trace = 0) ) expect_true(result$converged) expect_equal(result$par, c(0, 0), tolerance = 1e-4) }) test_that("arcopt_qn hybrid respects bfgs_tol parameter", { x0 <- c(5, 5) # With very low bfgs_tol, BFGS should be used more often result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "hybrid", bfgs_tol = 1e-15, trace = 0) ) expect_true(result$converged) }) test_that("arcopt_qn hybrid respects box constraints", { x0 <- c(0.5, 0.5) lower <- c(0.1, 0.1) upper <- c(0.9, 0.9) result <- arcopt_qn( x0, sphere_fn, sphere_gr, lower = lower, upper = upper, control = list(qn_method = "hybrid", trace = 0) ) # Solution should respect bounds expect_true(all(result$par >= lower - 1e-8)) expect_true(all(result$par <= upper + 1e-8)) # Minimum is at (0,0) but constrained, so should be at (0.1, 0.1) expect_equal(result$par, c(0.1, 0.1), tolerance = 1e-4) }) test_that("arcopt_qn hybrid tracks QN updates correctly", { x0 <- c(5, 5) result <- arcopt_qn( x0, sphere_fn, sphere_gr, control = list(qn_method = "hybrid", trace = 0) ) # Should have some updates expect_true(result$diagnostics$qn_updates > 0) # Hybrid method should rarely skip since Powell is a fallback expect_true(result$diagnostics$qn_skips <= result$diagnostics$qn_updates) }) # ============================================================================= # FD-Hessian Seeding and State-Aware Hybrid Routing Tests (v0.1.1) # ============================================================================= # A small two-component Gaussian mixture creates a saddle at the # symmetric starting point (mu_1 = mu_2 = 0). Identity-initialized QN # methods reliably stay on the symmetric axis; FD-seeded methods can # escape it. mix_data <- local({ set.seed(1) c(rnorm(100, -2, 1), rnorm(100, 2, 1)) }) mix_nll <- function(theta) { mu1 <- theta[1] mu2 <- theta[2] -sum(log(0.5 * dnorm(mix_data, mu1, 1) + 0.5 * dnorm(mix_data, mu2, 1))) } mix_gr <- function(theta) { mu1 <- theta[1] mu2 <- theta[2] p1 <- 0.5 * dnorm(mix_data, mu1, 1) p2 <- 0.5 * dnorm(mix_data, mu2, 1) w1 <- p1 / (p1 + p2) w2 <- p2 / (p1 + p2) c(-sum(w1 * (mix_data - mu1)), -sum(w2 * (mix_data - mu2))) } test_that("arcopt_qn FD-seeds B_0 by default (no hess supplied)", { # FD seeding at n-dim x_0 costs 2*n gradient evaluations before the # main loop. For the symmetric mixture saddle this is what allows # the QN iteration to escape at all, so convergence itself is the # test. result <- arcopt_qn( x0 = c(0.01, 0.01), fn = mix_nll, gr = mix_gr, control = list(qn_method = "sr1", maxit = 500, trace = 0) ) # Should escape the saddle to distinct mus expect_gt(abs(result$par[1] - result$par[2]), 0.5) }) test_that("arcopt_qn respects identity opt-out via hess = diag", { # Passing hess = function(x) diag(length(x)) recovers the classical # identity-initialized behavior. On the symmetric mixture saddle, # the BFGS QN update should then stay on the symmetric axis (or at # least not escape as cleanly as the FD-seeded default). result <- arcopt_qn( x0 = c(0.01, 0.01), fn = mix_nll, gr = mix_gr, hess = function(x) diag(length(x)), control = list(qn_method = "bfgs", maxit = 500, trace = 0) ) expect_type(result$par, "double") expect_length(result$par, 2L) }) test_that("arcopt_qn returns qn_fd_refreshes in the result list", { result <- arcopt_qn( x0 = c(0.01, 0.01), fn = mix_nll, gr = mix_gr, control = list(qn_method = "hybrid", maxit = 500, trace = 0) ) expect_true("qn_fd_refreshes" %in% names(result$diagnostics)) expect_true(is.numeric(result$diagnostics$qn_fd_refreshes)) expect_gte(result$diagnostics$qn_fd_refreshes, 0) }) test_that("arcopt_qn hybrid accepts routing control parameters", { # Should run without error when the new routing controls are set. result <- arcopt_qn( x0 = c(0.01, 0.01), fn = mix_nll, gr = mix_gr, control = list( qn_method = "hybrid", qn_route_demote_rho = 0.3, qn_route_promote_rho = 0.6, qn_route_demote_k = 3L, qn_route_promote_k = 2L, qn_fd_refresh_k = 4L, qn_stuck_refresh_k = 50L, maxit = 500, trace = 0 ) ) expect_type(result$par, "double") }) test_that("arcopt_qn FD refresh is active only in hybrid mode", { # Non-hybrid modes should not perform FD refreshes. for (method in c("bfgs", "sr1")) { result <- arcopt_qn( x0 = c(0.01, 0.01), fn = mix_nll, gr = mix_gr, control = list(qn_method = method, maxit = 200, trace = 0) ) expect_identical(result$diagnostics$qn_fd_refreshes, 0L, info = paste("method =", method)) } })