test_that("cv_ranking_stability returns expected structure", {
  models <- list(
    m1 = mpg ~ wt + hp,
    m2 = mpg ~ wt + hp + disp
  )
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 10, seed = 1)

  expect_type(cv, "list")
  expect_named(cv, c("settings", "rmse_mat", "rank_mat", "summary"))
  expect_equal(nrow(cv$rmse_mat), 10L)
  expect_equal(ncol(cv$rmse_mat), 2L)
  expect_equal(nrow(cv$summary), 2L)
  expect_named(cv$summary,
               c("model", "mean_rmse", "sd_rmse", "mean_rank", "top1_frequency"))
})

test_that("top1_frequency values sum to approximately 1", {
  models <- list(m1 = mpg ~ wt, m2 = mpg ~ wt + hp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 20, seed = 42)
  expect_equal(sum(cv$summary$top1_frequency), 1, tolerance = 1e-6)
})

test_that("mean_rank values sum to M*(M+1)/2 / M = (M+1)/2", {
  models <- list(m1 = mpg ~ wt, m2 = mpg ~ wt + hp, m3 = mpg ~ wt + hp + disp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 10, seed = 1)
  # For M models, sum of mean ranks = M*(M+1)/2 / R * R = M*(M+1)/2 but
  # colMeans of ranks: each row sums to M*(M+1)/2, so colMeans sum to (M+1)/2 * M
  # More simply: sum(colMeans(rank_mat)) should equal M*(M+1)/2
  expect_equal(sum(cv$summary$mean_rank), 3 * (3 + 1) / 2, tolerance = 1e-6)
})

test_that("rmse values are non-negative", {
  models <- list(m1 = mpg ~ wt, m2 = mpg ~ wt + hp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 10, seed = 1)
  expect_true(all(cv$summary$mean_rmse >= 0))
  expect_true(all(cv$summary$sd_rmse >= 0))
})

test_that("unnamed formulas get auto-names", {
  models <- list(mpg ~ wt, mpg ~ wt + hp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 5, seed = 1)
  expect_true(all(grepl("model_", cv$summary$model)))
})

test_that("non-list formulas error", {
  expect_error(cv_ranking_stability(mpg ~ wt, mtcars))
})

test_that("v > n errors", {
  models <- list(m1 = mpg ~ wt)
  expect_error(cv_ranking_stability(models, mtcars, v = 100))
})

test_that("v < 2 errors", {
  models <- list(m1 = mpg ~ wt)
  expect_error(cv_ranking_stability(models, mtcars, v = 1))
})

test_that("R < 1 errors", {
  models <- list(m1 = mpg ~ wt, m2 = mpg ~ wt + hp)
  expect_error(cv_ranking_stability(models, mtcars, v = 5, R = 0))
})

test_that("glm backend with logloss works", {
  models <- list(
    m1 = am ~ wt + hp,
    m2 = am ~ wt + hp + qsec
  )
  # GLM convergence warnings on small CV folds are expected and suppressed here
  cv <- suppressWarnings(
    cv_ranking_stability(models, mtcars, v = 5, R = 10, seed = 1,
                         family = stats::binomial(), metric = "logloss")
  )
  expect_equal(cv$settings$metric, "logloss")
  expect_equal(cv$settings$backend, "glm")
  expect_true(all(cv$summary$mean_rmse > 0))
})

test_that("auto metric selects logloss for binomial glm", {
  models <- list(m1 = am ~ wt, m2 = am ~ wt + hp)
  cv <- suppressWarnings(
    cv_ranking_stability(models, mtcars, v = 5, R = 5, seed = 1,
                         family = stats::binomial(), metric = "auto")
  )
  expect_equal(cv$settings$metric, "logloss")
})

test_that("auto metric selects rmse for lm", {
  models <- list(m1 = mpg ~ wt, m2 = mpg ~ wt + hp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 5, seed = 1)
  expect_equal(cv$settings$metric, "rmse")
})

test_that("rlm backend works if MASS available", {
  skip_if_not_installed("MASS")
  models <- list(m1 = mpg ~ wt + hp, m2 = mpg ~ wt + hp + disp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 5, seed = 1,
                              backend = "rlm")
  expect_equal(cv$settings$backend, "rlm")
  expect_equal(nrow(cv$summary), 2L)
})

test_that("plot_cv_stability runs without error", {
  models <- list(m1 = mpg ~ wt, m2 = mpg ~ wt + hp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 10, seed = 1)
  expect_silent(plot_cv_stability(cv, metric = "top1_frequency"))
  expect_silent(plot_cv_stability(cv, metric = "mean_rank"))
})

test_that("summary is ordered by mean_rank", {
  models <- list(m1 = mpg ~ wt, m2 = mpg ~ wt + hp, m3 = mpg ~ wt + hp + disp)
  cv <- cv_ranking_stability(models, mtcars, v = 5, R = 10, seed = 1)
  expect_true(all(diff(cv$summary$mean_rank) >= 0))
})