# Tests for Quasi-Newton ARC Optimizer (Algorithm 4)
# ==================================================
#
# arcopt_qn() is no longer publicly exported in v0.2.0. The shim below
# routes every call site in this file through the public dispatch path
# `arcopt(control = list(use_qn = TRUE, ...))`, so the existing tests
# continue to exercise the same code path without per-call rewrites.
arcopt_qn <- function(x0, fn, gr, hess = NULL, ...,
                      lower = rep(-Inf, length(x0)),
                      upper = rep(Inf, length(x0)),
                      control = list()) {
  control$use_qn <- TRUE
  arcopt(x0 = x0, fn = fn, gr = gr, hess = hess, ...,
         lower = lower, upper = upper, control = control)
}

# =============================================================================
# Test Functions
# =============================================================================

# Sphere function (convex quadratic)
sphere_fn <- function(x) sum(x^2)
sphere_gr <- function(x) 2 * x
sphere_hess <- function(x) 2 * diag(length(x))

# Rosenbrock function (n-dimensional)
rosenbrock_fn <- function(x) {
  n <- length(x)
  sum(100 * (x[2:n] - x[1:(n - 1)]^2)^2 + (1 - x[1:(n - 1)])^2)
}

rosenbrock_gr <- function(x) {
  n <- length(x)
  g <- numeric(n)
  for (i in 1:(n - 1)) {
    g[i] <- g[i] - 400 * x[i] * (x[i + 1] - x[i]^2) - 2 * (1 - x[i])
    g[i + 1] <- g[i + 1] + 200 * (x[i + 1] - x[i]^2)
  }
  g
}

rosenbrock_hess <- function(x) {
  n <- length(x)
  h <- matrix(0, n, n)
  for (i in 1:(n - 1)) {
    h[i, i] <- h[i, i] + 1200 * x[i]^2 - 400 * x[i + 1] + 2
    h[i, i + 1] <- h[i, i + 1] - 400 * x[i]
    h[i + 1, i] <- h[i + 1, i] - 400 * x[i]
    h[i + 1, i + 1] <- h[i + 1, i + 1] + 200
  }
  h
}

# Simple quadratic with known minimum
quadratic_fn <- function(x) {
  0.5 * (x[1]^2 + 10 * x[2]^2)  # Ill-conditioned
}
quadratic_gr <- function(x) c(x[1], 10 * x[2])
quadratic_hess <- function(x) diag(c(1, 10))


# =============================================================================
# Basic Functionality Tests
# =============================================================================

test_that("arcopt_qn forwards ... to fn and gr", {
  # Scaled sphere: f(x; s) = s * sum(x^2)
  scaled_fn <- function(x, scale) scale * sum(x^2)
  scaled_gr <- function(x, scale) scale * 2 * x

  result <- arcopt_qn(
    x0 = c(5, 5),
    fn = scaled_fn,
    gr = scaled_gr,
    scale = 2,
    control = list(qn_method = "sr1", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, c(0, 0), tolerance = 1e-4)
})

test_that("arcopt_qn forwards ... to hess in hybrid mode", {
  # Scaled sphere with Hessian
  scaled_fn <- function(x, scale) scale * sum(x^2)
  scaled_gr <- function(x, scale) scale * 2 * x
  scaled_hess <- function(x, scale) scale * 2 * diag(length(x))

  result <- arcopt_qn(
    x0 = c(5, 5),
    fn = scaled_fn,
    gr = scaled_gr,
    hess = scaled_hess,
    scale = 2,
    control = list(qn_method = "sr1", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, c(0, 0), tolerance = 1e-4)
  # Hessian should have been evaluated once for initialization
  expect_equal(result$evaluations$hess, 1)
})

test_that("arcopt_qn validates inputs correctly", {
  # Invalid x0

expect_error(
    arcopt_qn(c(1, NA), sphere_fn, sphere_gr),
    "x0 must be a finite numeric vector"
  )

  # Missing gradient
  expect_error(
    arcopt_qn(c(1, 1), sphere_fn, gr = NULL),
    "gr"
  )

  # Invalid QN method
  expect_error(
    arcopt_qn(c(1, 1), sphere_fn, sphere_gr,
              control = list(qn_method = "invalid")),
    "qn_method must be one of"
  )
})

test_that("arcopt_qn returns correct output structure", {
  result <- arcopt_qn(
    c(2, 2), sphere_fn, sphere_gr,
    control = list(maxit = 10, trace = 0)
  )

  expect_true(is.list(result))
  expect_true("par" %in% names(result))
  expect_true("value" %in% names(result))
  expect_true("gradient" %in% names(result))
  expect_true("converged" %in% names(result))
  expect_true("iterations" %in% names(result))
  expect_true("evaluations" %in% names(result))
  expect_true("qn_updates" %in% names(result$diagnostics))
  expect_true("qn_skips" %in% names(result$diagnostics))
  expect_true("qn_restarts" %in% names(result$diagnostics))

  expect_equal(length(result$par), 2)
  expect_true(is.numeric(result$value))
  expect_equal(length(result$gradient), 2)
})


# =============================================================================
# Pure QN Mode Tests (no Hessian provided)
# =============================================================================

test_that("arcopt_qn converges on sphere function with SR1", {
  x0 <- c(5, -3, 2)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "sr1", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, rep(0, 3), tolerance = 1e-4)
  expect_equal(result$value, 0, tolerance = 1e-8)
})

test_that("arcopt_qn converges on sphere function with BFGS", {
  x0 <- c(5, -3, 2)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "bfgs", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, rep(0, 3), tolerance = 1e-4)
})

test_that("arcopt_qn converges on ill-conditioned quadratic", {
  x0 <- c(10, 10)
  result <- arcopt_qn(
    x0, quadratic_fn, quadratic_gr,
    control = list(qn_method = "sr1", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, c(0, 0), tolerance = 1e-4)
})


# =============================================================================
# Hybrid Mode Tests (with Hessian)
# =============================================================================

test_that("arcopt_qn uses provided Hessian for initialization", {
  x0 <- c(2, 2)

  # With Hessian - should use exact initialization
  result_hybrid <- arcopt_qn(
    x0, sphere_fn, sphere_gr, sphere_hess,
    control = list(qn_method = "sr1", trace = 0)
  )

  # Without Hessian - uses scaled identity
  result_pure <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "sr1", trace = 0)
  )

  # Both should converge
  expect_true(result_hybrid$converged)
  expect_true(result_pure$converged)

  # Hybrid should use Hessian evaluation
  expect_equal(result_hybrid$evaluations$hess, 1)
  expect_equal(result_pure$evaluations$hess, 0)
})

test_that("arcopt_qn hybrid mode converges on Rosenbrock 2D", {
  x0 <- c(-1.2, 1)
  result <- arcopt_qn(
    x0, rosenbrock_fn, rosenbrock_gr, rosenbrock_hess,
    control = list(qn_method = "sr1", maxit = 500, trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, c(1, 1), tolerance = 1e-3)
})


# =============================================================================
# QN Update Tracking Tests
# =============================================================================

test_that("arcopt_qn tracks QN updates correctly", {
  x0 <- c(5, 5)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "sr1", trace = 0)
  )

  # Should have some updates
  expect_true(result$diagnostics$qn_updates >= 0)
  expect_true(result$diagnostics$qn_skips >= 0)
  expect_true(result$diagnostics$qn_restarts >= 0)

  # Total should be related to iterations
  # (each successful step attempts an update)
  expect_true(result$diagnostics$qn_updates + result$diagnostics$qn_skips <= result$iterations)
})

test_that("BFGS counts updates and skips", {
  # BFGS may skip when curvature condition violated
  x0 <- c(5, 5)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "bfgs", trace = 0)
  )

  expect_true(result$diagnostics$qn_updates >= 0)
  expect_true(result$diagnostics$qn_skips >= 0)
})


# =============================================================================
# Box Constraint Tests
# =============================================================================

test_that("arcopt_qn respects box constraints", {
  x0 <- c(0.5, 0.5)
  lower <- c(0.1, 0.1)
  upper <- c(0.9, 0.9)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    lower = lower, upper = upper,
    control = list(qn_method = "sr1", maxit = 500, trace = 0)
  )

  # Solution should respect bounds
  expect_true(all(result$par >= lower - 1e-8))
  expect_true(all(result$par <= upper + 1e-8))

  # Solution should be at lower bounds (minimum of sphere in [0.1, 0.9]^2)
  # Allow some tolerance since QN may not converge exactly at boundary
  expect_equal(result$par, lower, tolerance = 1e-3)
})

test_that("arcopt_qn validates bound dimensions", {
  expect_error(
    arcopt_qn(c(1, 1), sphere_fn, sphere_gr,
              lower = c(0), upper = c(2, 2)),
    "lower and upper must have same length"
  )

  expect_error(
    arcopt_qn(c(1, 1), sphere_fn, sphere_gr,
              lower = c(0, 0, 0), upper = c(2, 2, 2)),
    "lower and upper must have same length"
  )
})

test_that("arcopt_qn validates bound ordering", {
  expect_error(
    arcopt_qn(c(1, 1), sphere_fn, sphere_gr,
              lower = c(2, 0), upper = c(1, 2)),
    "lower must be strictly less than upper"
  )
})


# =============================================================================
# Rosenbrock Convergence Tests
# =============================================================================

test_that("arcopt_qn converges on Rosenbrock 2D (full-matrix methods)", {
  x0 <- c(-1.2, 1)

  methods <- c("sr1", "bfgs")

  for (method in methods) {
    result <- arcopt_qn(
      x0, rosenbrock_fn, rosenbrock_gr,
      control = list(qn_method = method, maxit = 1000, trace = 0)
    )

    expect_true(
      result$converged || sqrt(sum(result$gradient^2)) < 1e-3,
      info = paste("Method:", method)
    )

    # Should be close to (1, 1)
    expect_equal(
      result$par, c(1, 1), tolerance = 0.1,
      info = paste("Method:", method)
    )
  }
})


# =============================================================================
# Comparison with Standard ARC
# =============================================================================

test_that("arcopt routes correctly with use_qn", {
  x0 <- c(2, 2)

  # Standard ARC (requires Hessian)
  result_std <- arcopt(
    x0, sphere_fn, sphere_gr, sphere_hess,
    control = list(trace = 0)
  )

  # QN-ARC via router
  result_qn <- arcopt(
    x0, sphere_fn, sphere_gr, sphere_hess,
    control = list(use_qn = TRUE, qn_method = "sr1", trace = 0)
  )

  # Both should converge
  expect_true(result_std$converged)
  expect_true(result_qn$converged)

  # Both should find the same minimum
  expect_equal(result_std$par, result_qn$par, tolerance = 1e-4)
})


# =============================================================================
# Edge Cases
# =============================================================================

test_that("arcopt_qn handles starting at optimum", {
  x0 <- c(0, 0, 0)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "sr1", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, c(0, 0, 0), tolerance = 1e-10)
  expect_equal(result$iterations, 0)  # Should converge immediately
})

test_that("arcopt_qn handles single dimension", {
  fn <- function(x) x^2
  gr <- function(x) 2 * x

  result <- arcopt_qn(
    5, fn, gr,
    control = list(qn_method = "sr1", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, 0, tolerance = 1e-6)
})

test_that("arcopt_qn stops at maxit", {
  # Use very restrictive tolerance
  x0 <- c(100, 100)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "sr1", maxit = 5, gtol_abs = 1e-50, trace = 0)
  )

  expect_false(result$converged)
  expect_equal(result$iterations, 5)
  expect_equal(result$message, "maxit")
})


# =============================================================================
# Numerical Robustness
# =============================================================================

test_that("arcopt_qn handles NaN in objective", {
  bad_fn <- function(x) {
    if (x[1] < 0.1) return(NaN)
    sum(x^2)
  }
  bad_gr <- function(x) 2 * x

  x0 <- c(1, 1)

  # Should handle NaN by increasing sigma and rejecting step
  result <- arcopt_qn(
    x0, bad_fn, bad_gr,
    control = list(qn_method = "sr1", maxit = 100, trace = 0)
  )

  # May or may not converge, but shouldn't crash
  expect_true(is.list(result))
  expect_true(all(is.finite(result$par)))
})

test_that("arcopt_qn handles very small sigma_min", {
  x0 <- c(2, 2)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(
      qn_method = "sr1",
      sigma_min = 1e-12,
      trace = 0
    )
  )

  expect_true(result$converged)
})

test_that("arcopt_qn handles large sigma_max", {
  x0 <- c(2, 2)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(
      qn_method = "sr1",
      sigma_max = 1e20,
      trace = 0
    )
  )

  expect_true(result$converged)
})


# =============================================================================
# Higher Dimensional Tests
# =============================================================================

test_that("arcopt_qn handles moderate dimensions with full-matrix SR1", {
  n <- 20
  x0 <- rep(5, n)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "sr1", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, rep(0, n), tolerance = 1e-4)
})


# =============================================================================
# Nesterov Acceleration Tests (Algorithm 4b)
# =============================================================================

test_that("arcopt_qn with acceleration finds near-optimal on sphere", {
  x0 <- c(5, -3, 2)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0)
  )

  # Check that solution is near optimal (may not fully converge due to
  # acceleration extrapolation, but should be close)
  expect_equal(result$par, rep(0, 3), tolerance = 1e-3)
  expect_true(sqrt(sum(result$gradient^2)) < 0.01)
})

test_that("arcopt_qn with acceleration finds near-optimal on quadratic", {
  x0 <- c(10, 10)
  result <- arcopt_qn(
    x0, quadratic_fn, quadratic_gr,
    control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0)
  )

  # Check that solution is near optimal
  expect_equal(result$par, c(0, 0), tolerance = 1e-3)
  expect_true(sqrt(sum(result$gradient^2)) < 0.01)
})

test_that("arcopt_qn acceleration reaches similar solution as non-accelerated", {
  x0 <- c(10, 10)

  # Non-accelerated
  result_normal <- arcopt_qn(
    x0, quadratic_fn, quadratic_gr,
    control = list(qn_method = "bfgs", use_accel_qn = FALSE, trace = 0)
  )

  # Accelerated
  result_accel <- arcopt_qn(
    x0, quadratic_fn, quadratic_gr,
    control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0)
  )

  # Normal should converge
  expect_true(result_normal$converged)

  # Both should reach similar solution
  expect_equal(result_normal$par, result_accel$par, tolerance = 1e-2)
})

test_that("arcopt_qn acceleration works with SR1", {
  x0 <- c(5, 5, 5)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "sr1", use_accel_qn = TRUE, maxit = 2000, trace = 0)
  )

  expect_equal(result$par, rep(0, 3), tolerance = 1e-3)
})

test_that("arcopt_qn acceleration respects box constraints", {
  x0 <- c(0.5, 0.5)
  lower <- c(0.1, 0.1)
  upper <- c(0.9, 0.9)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    lower = lower, upper = upper,
    control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 1000, trace = 0)
  )

  # Solution should respect bounds
  expect_true(all(result$par >= lower - 1e-8))
  expect_true(all(result$par <= upper + 1e-8))
})

test_that("arcopt_qn acceleration handles single dimension", {
  fn <- function(x) x^2
  gr <- function(x) 2 * x

  result <- arcopt_qn(
    5, fn, gr,
    control = list(qn_method = "bfgs", use_accel_qn = TRUE, maxit = 2000, trace = 0)
  )

  expect_equal(result$par, 0, tolerance = 1e-3)
})


# =============================================================================
# Hybrid Method Tests
# =============================================================================

test_that("hybrid is the default qn_method", {
  # Verify default by checking result without specifying method
  x0 <- c(2, 2)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, c(0, 0), tolerance = 1e-4)
})

test_that("arcopt_qn converges on sphere with hybrid method", {
  x0 <- c(5, -3, 2)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "hybrid", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, rep(0, 3), tolerance = 1e-4)
})

test_that("arcopt_qn hybrid converges on Rosenbrock", {
  x0 <- c(-1.2, 1)
  result <- arcopt_qn(
    x0, rosenbrock_fn, rosenbrock_gr,
    control = list(qn_method = "hybrid", maxit = 1000, trace = 0)
  )

  # Rosenbrock minimum is at (1, 1)
  expect_true(result$converged || sqrt(sum(result$gradient^2)) < 1e-3)
  expect_equal(result$par, c(1, 1), tolerance = 0.05)
})

test_that("arcopt_qn hybrid handles ill-conditioned quadratic", {
  x0 <- c(10, 10)
  result <- arcopt_qn(
    x0, quadratic_fn, quadratic_gr,
    control = list(qn_method = "hybrid", trace = 0)
  )

  expect_true(result$converged)
  expect_equal(result$par, c(0, 0), tolerance = 1e-4)
})

test_that("arcopt_qn hybrid respects bfgs_tol parameter", {
  x0 <- c(5, 5)

  # With very low bfgs_tol, BFGS should be used more often
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "hybrid", bfgs_tol = 1e-15, trace = 0)
  )

  expect_true(result$converged)
})

test_that("arcopt_qn hybrid respects box constraints", {
  x0 <- c(0.5, 0.5)
  lower <- c(0.1, 0.1)
  upper <- c(0.9, 0.9)

  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    lower = lower, upper = upper,
    control = list(qn_method = "hybrid", trace = 0)
  )

  # Solution should respect bounds
  expect_true(all(result$par >= lower - 1e-8))
  expect_true(all(result$par <= upper + 1e-8))
  # Minimum is at (0,0) but constrained, so should be at (0.1, 0.1)
  expect_equal(result$par, c(0.1, 0.1), tolerance = 1e-4)
})

test_that("arcopt_qn hybrid tracks QN updates correctly", {
  x0 <- c(5, 5)
  result <- arcopt_qn(
    x0, sphere_fn, sphere_gr,
    control = list(qn_method = "hybrid", trace = 0)
  )

  # Should have some updates
  expect_true(result$diagnostics$qn_updates > 0)
  # Hybrid method should rarely skip since Powell is a fallback
  expect_true(result$diagnostics$qn_skips <= result$diagnostics$qn_updates)
})


# =============================================================================
# FD-Hessian Seeding and State-Aware Hybrid Routing Tests (v0.1.1)
# =============================================================================

# A small two-component Gaussian mixture creates a saddle at the
# symmetric starting point (mu_1 = mu_2 = 0). Identity-initialized QN
# methods reliably stay on the symmetric axis; FD-seeded methods can
# escape it.
mix_data <- local({
  set.seed(1)
  c(rnorm(100, -2, 1), rnorm(100, 2, 1))
})
mix_nll <- function(theta) {
  mu1 <- theta[1]
  mu2 <- theta[2]
  -sum(log(0.5 * dnorm(mix_data, mu1, 1) + 0.5 * dnorm(mix_data, mu2, 1)))
}
mix_gr <- function(theta) {
  mu1 <- theta[1]
  mu2 <- theta[2]
  p1 <- 0.5 * dnorm(mix_data, mu1, 1)
  p2 <- 0.5 * dnorm(mix_data, mu2, 1)
  w1 <- p1 / (p1 + p2)
  w2 <- p2 / (p1 + p2)
  c(-sum(w1 * (mix_data - mu1)), -sum(w2 * (mix_data - mu2)))
}

test_that("arcopt_qn FD-seeds B_0 by default (no hess supplied)", {
  # FD seeding at n-dim x_0 costs 2*n gradient evaluations before the
  # main loop. For the symmetric mixture saddle this is what allows
  # the QN iteration to escape at all, so convergence itself is the
  # test.
  result <- arcopt_qn(
    x0 = c(0.01, 0.01),
    fn = mix_nll, gr = mix_gr,
    control = list(qn_method = "sr1", maxit = 500, trace = 0)
  )
  # Should escape the saddle to distinct mus
  expect_gt(abs(result$par[1] - result$par[2]), 0.5)
})

test_that("arcopt_qn respects identity opt-out via hess = diag", {
  # Passing hess = function(x) diag(length(x)) recovers the classical
  # identity-initialized behavior. On the symmetric mixture saddle,
  # the BFGS QN update should then stay on the symmetric axis (or at
  # least not escape as cleanly as the FD-seeded default).
  result <- arcopt_qn(
    x0 = c(0.01, 0.01),
    fn = mix_nll, gr = mix_gr,
    hess = function(x) diag(length(x)),
    control = list(qn_method = "bfgs", maxit = 500, trace = 0)
  )
  expect_type(result$par, "double")
  expect_length(result$par, 2L)
})

test_that("arcopt_qn returns qn_fd_refreshes in the result list", {
  result <- arcopt_qn(
    x0 = c(0.01, 0.01),
    fn = mix_nll, gr = mix_gr,
    control = list(qn_method = "hybrid", maxit = 500, trace = 0)
  )
  expect_true("qn_fd_refreshes" %in% names(result$diagnostics))
  expect_true(is.numeric(result$diagnostics$qn_fd_refreshes))
  expect_gte(result$diagnostics$qn_fd_refreshes, 0)
})

test_that("arcopt_qn hybrid accepts routing control parameters", {
  # Should run without error when the new routing controls are set.
  result <- arcopt_qn(
    x0 = c(0.01, 0.01),
    fn = mix_nll, gr = mix_gr,
    control = list(
      qn_method = "hybrid",
      qn_route_demote_rho = 0.3,
      qn_route_promote_rho = 0.6,
      qn_route_demote_k = 3L,
      qn_route_promote_k = 2L,
      qn_fd_refresh_k = 4L,
      qn_stuck_refresh_k = 50L,
      maxit = 500,
      trace = 0
    )
  )
  expect_type(result$par, "double")
})

test_that("arcopt_qn FD refresh is active only in hybrid mode", {
  # Non-hybrid modes should not perform FD refreshes.
  for (method in c("bfgs", "sr1")) {
    result <- arcopt_qn(
      x0 = c(0.01, 0.01),
      fn = mix_nll, gr = mix_gr,
      control = list(qn_method = method, maxit = 200, trace = 0)
    )
    expect_identical(result$diagnostics$qn_fd_refreshes, 0L,
                     info = paste("method =", method))
  }
})