local_null_coalesce <- function(x, y) {
  if (is.null(x)) y else x
}

build_tuning_env_from_fit <- function(fit,
                                      tuning_criterion = "loo",
                                      gcv_gamma = 1.4) {
  X_std <- lapply(fit$X, fit$std_X)
  smoothing_spline_penalty <- lgspline:::get_2ndDerivPenalty(
    colnm_expansions = fit$raw_expansion_names,
    C = fit$X[[1]],
    power1_cols = fit$power1_cols,
    power2_cols = fit$power2_cols,
    power3_cols = fit$power3_cols,
    power4_cols = fit$power4_cols,
    interaction_single_cols = fit$interaction_single_cols,
    interaction_quad_cols = fit$interaction_quad_cols,
    triplet_cols = fit$triplet_cols,
    p_expansions = fit$p
  )
  y_vec <- fit$y
  y_list <- lapply(fit$order_list, function(idx) y_vec[idx])
  X_gram <- lapply(X_std, crossprod)
  Xy <- lapply(seq_along(X_std), function(k) crossprod(X_std[[k]], cbind(y_list[[k]])))
  delta <- fit$.fit_call_args$delta
  if (is.null(delta) && paste0(fit$family)[2] != "identity") {
    delta <- lgspline:::.compute_tuning_delta(
      family = fit$family,
      unl_y = y_vec,
      N_obs = fit$N,
      observation_weights = NULL,
      opt = TRUE
    )
  }
  if (is.null(delta)) {
    delta <- 0
  }

  A <- fit$A
  if (is.null(A)) {
    A <- cbind(rep(0, (fit$K + 1) * fit$p))
    A <- cbind(A, A)
  }
  R_constraints <- ncol(A)

  observation_weights <- fit$weights
  if (is.null(observation_weights)) {
    observation_weights <- rep(1, fit$N)
  }
  observation_weights_list <- lapply(fit$order_list, function(idx) {
    observation_weights[idx]
  })
  homogenous_weights <- length(unique(observation_weights)) == 1

  flat_cols <- c()
  flat_terms <- local_null_coalesce(
    fit$.fit_call_args$just_linear_without_interactions,
    c()
  )
  if (length(flat_terms) > 0) {
    flat_cols <- which(fit$raw_expansion_names %in% paste0("_", flat_terms, "_"))
  }
  use_blockfit <- isTRUE(fit$.fit_call_args$blockfit) &&
    length(flat_cols) > 0 && fit$K > 0

  lgspline:::.build_tuning_env(
    y = y_list,
    X = X_std,
    X_gram = X_gram,
    Xy = Xy,
    smoothing_spline_penalty = smoothing_spline_penalty,
    A = A,
    R_constraints = R_constraints,
    K = fit$K,
    p_expansions = fit$p,
    N_obs = fit$N,
    custom_penalty_mat = fit$.fit_call_args$custom_penalty_mat,
    colnm_expansions = fit$raw_expansion_names,
    unique_penalty_per_predictor = fit$.fit_call_args$unique_penalty_per_predictor,
    unique_penalty_per_partition = fit$.fit_call_args$unique_penalty_per_partition,
    meta_penalty = fit$.fit_call_args$meta_penalty,
    family = fit$family,
    delta = delta,
    order_list = fit$order_list,
    observation_weights = observation_weights_list,
    homogenous_weights = homogenous_weights,
    parallel = FALSE,
    parallel_eigen = FALSE,
    parallel_trace = FALSE,
    parallel_aga = FALSE,
    parallel_matmult = FALSE,
    parallel_unconstrained = FALSE,
    cl = NULL,
    chunk_size = 1,
    num_chunks = 0,
    rem_chunks = 0,
    unconstrained_fit_fxn = fit$.fit_call_args$unconstrained_fit_fxn,
    keep_weighted_Lambda = fit$.fit_call_args$keep_weighted_Lambda,
    iterate = fit$.fit_call_args$iterate_tune,
    qp_score_function = fit$.fit_call_args$qp_score_function,
    quadprog = FALSE,
    qp_Amat = NULL,
    qp_bvec = NULL,
    qp_meq = 0,
    tol = fit$.fit_call_args$tol,
    sd_y = fit$sd_y,
    tuning_criterion = tuning_criterion,
    gcv_gamma = gcv_gamma,
    constraint_value_vectors = local_null_coalesce(fit$constraint_values, cbind()),
    glm_weight_function = fit$.fit_call_args$glm_weight_function,
    schur_correction_function = fit$.fit_call_args$schur_correction_function,
    need_dispersion_for_estimation = fit$.fit_call_args$need_dispersion_for_estimation,
    dispersion_function = fit$.fit_call_args$dispersion_function,
    blockfit = fit$.fit_call_args$blockfit,
    just_linear_without_interactions = flat_terms,
    Vhalf = fit$Vhalf,
    VhalfInv = fit$VhalfInv,
    verbose = FALSE,
    include_warnings = FALSE,
    flat_cols = flat_cols,
    use_blockfit = use_blockfit
  )
}

make_gaussian_linear_fit <- function(wiggle_penalty = 1e-3,
                                     flat_ridge_penalty = 0.2) {
  t <- seq(-2, 2, length.out = 8)
  y <- 1 + 0.5 * t + c(0.1, -0.05, 0.02, -0.03, 0.04, -0.02, 0.01, -0.04)

  fit <- lgspline(
    cbind(t),
    y,
    K = 0,
    opt = FALSE,
    wiggle_penalty = wiggle_penalty,
    flat_ridge_penalty = flat_ridge_penalty,
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    just_linear_without_interactions = 1,
    standardize_response = FALSE,
    include_warnings = FALSE
  )

  list(
    t = t,
    y = y,
    fit = fit,
    wiggle_penalty = wiggle_penalty,
    flat_ridge_penalty = flat_ridge_penalty
  )
}

test_that(".compute_loocv matches explicit Gaussian refits at fixed penalties", {
  case <- make_gaussian_linear_fit()
  env <- build_tuning_env_from_fit(case$fit, tuning_criterion = "loo")
  par <- log(c(case$wiggle_penalty, case$flat_ridge_penalty))

  loo_obj <- lgspline:::.compute_loocv(par, c(), env)

  loo_refit <- sapply(seq_along(case$y), function(i) {
    refit <- lgspline(
      cbind(case$t[-i]),
      case$y[-i],
      K = 0,
      opt = FALSE,
      wiggle_penalty = exp(par[1]),
      flat_ridge_penalty = exp(par[2]),
      unique_penalty_per_predictor = FALSE,
      unique_penalty_per_partition = FALSE,
      just_linear_without_interactions = 1,
      standardize_response = FALSE,
      include_warnings = FALSE
    )
    as.numeric(predict(refit, cbind(case$t[i])))
  })

  loo_refit_value <- mean((case$y - loo_refit)^2)

  expect_equal(loo_obj$LOO_u, loo_refit_value, tolerance = 1e-4)
  expect_equal(loo_obj$criterion_value, loo_obj$LOO_u, tolerance = 1e-10)
})

test_that(".compute_loocv_gradient matches finite differences in a Gaussian case", {
  case <- make_gaussian_linear_fit(wiggle_penalty = 5e-3, flat_ridge_penalty = 0.3)
  env <- build_tuning_env_from_fit(case$fit, tuning_criterion = "loo")
  par <- log(c(5e-3, 0.3))

  analytic <- lgspline:::.compute_loocv_gradient(par, c(), env = env)$gradient[1:2]

  h <- 1e-5
  numeric <- vapply(seq_along(par), function(j) {
    step <- rep(0, length(par))
    step[j] <- h
    up <- lgspline:::.compute_loocv(par + step, c(), env)$criterion_value
    down <- lgspline:::.compute_loocv(par - step, c(), env)$criterion_value
    (up - down) / (2 * h)
  }, numeric(1))

  expect_equal(analytic, numeric, tolerance = 1e-4)
})

test_that("default tuning is LOO and gcv_gamma affects only the GCV objective", {
  t <- seq(-2, 2, length.out = 30)
  y <- sin(t) + 0.1 * t^2
  fit_gcv_case <- lgspline(
    cbind(t), y,
    K = 1,
    opt = FALSE,
    wiggle_penalty = 5e-3,
    flat_ridge_penalty = 0.3,
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    standardize_response = FALSE,
    include_warnings = FALSE
  )
  env_loo_1 <- build_tuning_env_from_fit(fit_gcv_case, tuning_criterion = "loo", gcv_gamma = 1)
  env_loo_9 <- build_tuning_env_from_fit(fit_gcv_case, tuning_criterion = "loo", gcv_gamma = 9)
  env_gcv_1 <- build_tuning_env_from_fit(fit_gcv_case, tuning_criterion = "gcv", gcv_gamma = 1)
  env_gcv_9 <- build_tuning_env_from_fit(fit_gcv_case, tuning_criterion = "gcv", gcv_gamma = 9)
  par <- log(c(5e-3, 0.3))

  loo_1 <- lgspline:::.compute_loocv(par, c(), env_loo_1)$criterion_value
  loo_9 <- lgspline:::.compute_loocv(par, c(), env_loo_9)$criterion_value
  gcv_1 <- lgspline:::.compute_gcvu(par, c(), env_gcv_1)$criterion_value
  gcv_9 <- lgspline:::.compute_gcvu(par, c(), env_gcv_9)$criterion_value

  expect_equal(loo_1, loo_9, tolerance = 1e-10)
  expect_false(isTRUE(all.equal(gcv_1, gcv_9, tolerance = 1e-10)))

  t <- seq(-1, 1, length.out = 20)
  y <- sin(t) + 0.1 * cos(3 * t)
  fit_default <- lgspline(
    cbind(t), y,
    K = 1,
    opt = TRUE,
    use_custom_bfgs = FALSE,
    gcv_gamma = 9,
    initial_wiggle = c(1e-4, 5e-4),
    initial_flat = c(0.2, 0.5),
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    just_linear_without_interactions = 1,
    standardize_response = FALSE,
    include_warnings = FALSE
  )
  fit_explicit_loo <- lgspline(
    cbind(t), y,
    K = 1,
    opt = TRUE,
    use_custom_bfgs = FALSE,
    tuning_criterion = "loo",
    gcv_gamma = 1,
    initial_wiggle = c(1e-4, 5e-4),
    initial_flat = c(0.2, 0.5),
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    just_linear_without_interactions = 1,
    standardize_response = FALSE,
    include_warnings = FALSE
  )

  expect_equal(fit_default$penalties$Lambda,
               fit_explicit_loo$penalties$Lambda,
               tolerance = 1e-8)
})

test_that("LOO and GCV tuning helpers stay finite for a non-Gaussian case", {
  set.seed(2)
  x <- seq(-1, 1, length.out = 40)
  eta <- -0.3 + 1.2 * x
  p <- plogis(eta)
  y <- rbinom(length(x), 1, p)

  fit <- lgspline(
    cbind(x),
    y,
    family = binomial(),
    K = 1,
    opt = FALSE,
    wiggle_penalty = 1e-2,
    flat_ridge_penalty = 0.5,
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    standardize_response = FALSE,
    include_warnings = FALSE
  )

  env_loo <- build_tuning_env_from_fit(fit, tuning_criterion = "loo")
  env_gcv <- build_tuning_env_from_fit(fit, tuning_criterion = "gcv", gcv_gamma = 1.4)
  par <- log(c(1e-2, 0.5))

  loo_obj <- lgspline:::.compute_loocv(par, c(), env_loo)
  loo_grad <- lgspline:::.compute_loocv_gradient(par, c(), env = env_loo)
  gcv_obj <- lgspline:::.compute_gcvu(par, c(), env_gcv)
  gcv_grad <- lgspline:::.compute_gcvu_gradient(par, c(), env = env_gcv)

  expect_true(is.finite(loo_obj$criterion_value))
  expect_true(all(is.finite(loo_grad$gradient)))
  expect_true(is.finite(gcv_obj$criterion_value))
  expect_true(all(is.finite(gcv_grad$gradient)))
})

test_that("use_custom_bfgs = FALSE works for both tuning criteria", {
  x <- seq(-1, 1, length.out = 12)
  y <- x^2 + 0.1 * x

  fit_loo <- lgspline(
    cbind(x), y,
    K = 0,
    opt = TRUE,
    use_custom_bfgs = FALSE,
    tuning_criterion = "loo",
    initial_wiggle = c(1e-4, 5e-4),
    initial_flat = c(0.2, 0.5),
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    just_linear_without_interactions = 1,
    standardize_response = FALSE,
    include_warnings = FALSE
  )
  fit_gcv <- lgspline(
    cbind(x), y,
    K = 0,
    opt = TRUE,
    use_custom_bfgs = FALSE,
    tuning_criterion = "gcv",
    gcv_gamma = 1.4,
    initial_wiggle = c(1e-4, 5e-4),
    initial_flat = c(0.2, 0.5),
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    just_linear_without_interactions = 1,
    standardize_response = FALSE,
    include_warnings = FALSE
  )

  expect_true(all(is.finite(fit_loo$penalties$Lambda)))
  expect_true(all(is.finite(fit_gcv$penalties$Lambda)))
})

test_that("post-optimization sample-size adjustment shrinks tuned penalties for both criteria", {
  x <- seq(-1, 1, length.out = 14)
  y <- sin(pi * x) + 0.05 * x^2
  initial_wiggle <- c(1e-4, 5e-4)
  initial_flat <- c(0.2, 0.5)
  sample_size_adjustment <- ((length(x) - 2) / (length(x) + 2))^2

  base_fit <- lgspline(
    cbind(x), y,
    K = 1,
    opt = FALSE,
    wiggle_penalty = 5e-3,
    flat_ridge_penalty = 0.3,
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    just_linear_without_interactions = 1,
    standardize_response = FALSE,
    include_warnings = FALSE
  )

  for (criterion in c("loo", "gcv")) {
    env <- build_tuning_env_from_fit(
      base_fit,
      tuning_criterion = criterion,
      gcv_gamma = 1.4
    )
    criterion_fxn <- if (criterion == "loo") {
      lgspline:::.compute_loocv
    } else {
      lgspline:::.compute_gcvu
    }
    best_start <- lgspline:::.tune_grid_search(
      log(initial_wiggle),
      log(initial_flat),
      c(),
      criterion_fxn,
      env,
      include_warnings = FALSE
    )
    opt_res <- optim(
      c(best_start, c()),
      fn = function(par) criterion_fxn(par, c(), env)$criterion_value,
      method = "BFGS"
    )
    raw_penalties <- exp(opt_res$par[1:2])

    fit_tuned <- lgspline(
      cbind(x), y,
      K = 1,
      opt = TRUE,
      use_custom_bfgs = FALSE,
      tuning_criterion = criterion,
      gcv_gamma = 1.4,
      initial_wiggle = initial_wiggle,
      initial_flat = initial_flat,
      unique_penalty_per_predictor = FALSE,
      unique_penalty_per_partition = FALSE,
      just_linear_without_interactions = 1,
      standardize_response = FALSE,
      include_warnings = FALSE
    )

    expect_equal(
      fit_tuned$penalties$wiggle_penalty,
      raw_penalties[1] * sample_size_adjustment,
      tolerance = 1e-8
    )
    expect_equal(
      fit_tuned$penalties$flat_ridge_penalty,
      raw_penalties[2] * sample_size_adjustment,
      tolerance = 1e-8
    )
    expect_lt(fit_tuned$penalties$wiggle_penalty, raw_penalties[1])
    expect_lt(fit_tuned$penalties$flat_ridge_penalty, raw_penalties[2])
  }
})

test_that(".damped_bfgs never returns a GCV solution worse than the grid start", {
  set.seed(123)
  x <- seq(-2, 2, length.out = 30)
  y <- sin(2 * x) + 0.15 * x^2

  fit <- lgspline(
    cbind(x), y,
    K = 1,
    opt = FALSE,
    wiggle_penalty = 5e-3,
    flat_ridge_penalty = 0.3,
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = FALSE,
    just_linear_without_interactions = 1,
    standardize_response = FALSE,
    include_warnings = FALSE
  )

  env <- build_tuning_env_from_fit(fit, tuning_criterion = "gcv", gcv_gamma = 1.4)
  log_penalty_vec <- c()
  log_initial_wiggle <- log(c(1e-4, 5e-4, 5e-3))
  log_initial_flat <- log(c(0.2, 0.5, 1))

  best_start <- lgspline:::.tune_grid_search(
    log_initial_wiggle,
    log_initial_flat,
    log_penalty_vec,
    lgspline:::.compute_gcvu,
    env,
    include_warnings = FALSE
  )

  start_value <- lgspline:::.compute_gcvu(best_start, log_penalty_vec, env)$criterion_value
  res <- lgspline:::.damped_bfgs(
    c(best_start, log_penalty_vec),
    log_penalty_vec,
    lgspline:::.compute_gcvu,
    lgspline:::.compute_gcvu_gradient,
    env,
    tol = 1e-6
  )

  expect_lte(res$criterion_value, start_value + 1e-10)
})

test_that(".damped_bfgs preserves the start point when the first step is worse", {
  env <- list()
  start <- c(0, 0)

  criterion_fxn <- function(par, log_penalty_vec, env, ...) {
    list(criterion_value = sum(par^2))
  }
  gr_fxn <- function(par, log_penalty_vec, outlist, env, ...) {
    list(
      gradient = c(-10, -10),
      outlist = list(criterion_value = sum(par^2))
    )
  }

  res <- lgspline:::.damped_bfgs(
    par = start,
    log_penalty_vec = c(),
    criterion_fxn = criterion_fxn,
    gr_fxn = gr_fxn,
    env = env,
    tol = 1e-8,
    max_iter = 2
  )

  expect_equal(res$par, start, tolerance = 1e-12)
  expect_equal(res$criterion_value, 0, tolerance = 1e-12)
})

test_that("custom GCV tuning does not inflate partition penalties relative to finite differences", {
  set.seed(1234)
  x <- sort(runif(80, -3, 3))
  eta <- sin(x) - 0.2 * x^2
  y <- rbinom(length(x), 1, plogis(eta))
  dat <- data.frame(x = x, y = y)

  fit_fd <- lgspline(
    y ~ spl(x),
    K = 1,
    dat,
    tuning_criterion = "gcv",
    use_custom_bfgs = FALSE,
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = TRUE,
    include_warnings = FALSE
  )
  fit_custom <- lgspline(
    y ~ spl(x),
    K = 1,
    dat,
    tuning_criterion = "gcv",
    use_custom_bfgs = TRUE,
    unique_penalty_per_predictor = FALSE,
    unique_penalty_per_partition = TRUE,
    include_warnings = FALSE
  )

  ratio <- max(fit_custom$penalties$other_penalties) /
    max(fit_fd$penalties$other_penalties)

  expect_lte(ratio, 10)
})