# tests/testthat/test-misl.R
# Run with: devtools::test()

# ── Shared test fixtures ──────────────────────────────────────────────────────

make_cont_data <- function(n = 50, seed = 1) {
  set.seed(seed)
  df <- data.frame(x1 = rnorm(n), x2 = rnorm(n), y = rnorm(n))
  df[sample(n, 10), "y"] <- NA
  df
}

make_bin_data <- function(n = 50, seed = 2) {
  set.seed(seed)
  df <- data.frame(x1 = rnorm(n), x2 = rnorm(n), y = rbinom(n, 1, 0.5))
  df[sample(n, 10), "y"] <- NA
  df
}

make_cat_data <- function(n = 50, seed = 3) {
  set.seed(seed)
  df <- data.frame(
    x1 = rnorm(n),
    x2 = rnorm(n),
    y  = factor(sample(c("a", "b", "c"), n, replace = TRUE))
  )
  df[sample(n, 10), "y"] <- NA
  df
}

make_mixed_data <- function(n = 80, seed = 4) {
  set.seed(seed)
  df <- data.frame(
    cont = rnorm(n),
    bin  = rbinom(n, 1, 0.5),
    cat  = factor(sample(c("x", "y", "z"), n, replace = TRUE)),
    pred = rnorm(n)
  )
  df[sample(n, 10), "cont"] <- NA
  df[sample(n, 10), "bin"]  <- NA
  df[sample(n, 10), "cat"]  <- NA
  df
}

# ── check_dataset() ───────────────────────────────────────────────────────────

test_that("check_dataset() errors on non-data-frame input", {
  expect_error(check_dataset("a string"),    "data frame or matrix")
  expect_error(check_dataset(1:10),          "data frame or matrix")
  expect_error(check_dataset(list(a = 1:5)), "data frame or matrix")
})

test_that("check_dataset() errors on empty input", {
  expect_error(check_dataset(data.frame()),              "at least one row")
  expect_error(check_dataset(data.frame(a = integer())), "at least one row")
})

test_that("check_dataset() errors on complete dataset", {
  expect_error(check_dataset(datasets::iris), "complete")
})

test_that("check_dataset() passes silently on valid incomplete data", {
  df <- make_cont_data()
  expect_invisible(check_dataset(df))
})

# ── check_datatype() ──────────────────────────────────────────────────────────

test_that("check_datatype() identifies categorical columns", {
  expect_equal(check_datatype(factor(c("a", "b", "a"))), "categorical")
})

test_that("check_datatype() identifies binomial columns", {
  expect_equal(check_datatype(c(0, 1, 0, 1, NA)), "binomial")
  expect_equal(check_datatype(c(0, 0, 0)),         "binomial")
})

test_that("check_datatype() identifies continuous columns", {
  expect_equal(check_datatype(c(1.5, 2.3, 3.7)), "continuous")
  expect_equal(check_datatype(c(1L, 2L, 3L)),     "continuous")
  expect_equal(check_datatype(c(0, 1, 2)),         "continuous")
})

# ── list_learners() ───────────────────────────────────────────────────────────

test_that("list_learners() returns a tibble with expected columns", {
  result <- list_learners()
  expect_s3_class(result, "tbl_df")
  expect_true(all(c("learner", "description", "package", "installed") %in%
                    colnames(result)))
})

test_that("list_learners() filters by outcome type", {
  cont <- list_learners("continuous")
  bin  <- list_learners("binomial")
  cat  <- list_learners("categorical")

  expect_true("glm"  %in% cont$learner)
  expect_true("glm"  %in% bin$learner)
  expect_false("glm" %in% cat$learner)

  expect_true("multinom_reg"  %in% cat$learner)
  expect_false("multinom_reg" %in% cont$learner)
  expect_false("multinom_reg" %in% bin$learner)

  expect_false("continuous"  %in% colnames(cont))
  expect_false("binomial"    %in% colnames(bin))
  expect_false("categorical" %in% colnames(cat))
})

test_that("list_learners() installed_only returns only installed learners", {
  result <- list_learners(installed_only = TRUE)
  expect_true(all(result$installed))
})

test_that("list_learners() errors on invalid outcome_type", {
  expect_error(list_learners("invalid"), "should be one of")
})

# ── misl() -- output structure ────────────────────────────────────────────────

test_that("misl() returns a list of length m", {
  result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1)
  expect_type(result, "list")
  expect_length(result, 2)
})

test_that("misl() each element has $datasets and $trace", {
  result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1)
  for (i in seq_along(result)) {
    expect_named(result[[i]], c("datasets", "trace"))
  }
})

test_that("misl() imputed datasets have same dimensions as input", {
  df     <- make_cont_data()
  result <- misl(df, m = 2, maxit = 1, con_method = "glm", seed = 1)
  for (i in seq_along(result)) {
    expect_equal(dim(result[[i]]$datasets), dim(df))
  }
})

# ── misl() -- no remaining missingness ───────────────────────────────────────

test_that("misl() produces no NAs for continuous outcome", {
  result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
  expect_false(anyNA(result[[1]]$datasets))
  expect_false(anyNA(result[[2]]$datasets))
})

test_that("misl() produces no NAs for binomial outcome", {
  result <- misl(make_bin_data(), m = 2, maxit = 2, bin_method = "glm", seed = 2)
  expect_false(anyNA(result[[1]]$datasets))
})

test_that("misl() produces no NAs for categorical outcome", {
  result <- misl(make_cat_data(), m = 2, maxit = 2,
                 cat_method = "rand_forest", seed = 3)
  expect_false(anyNA(result[[1]]$datasets))
})

test_that("misl() produces no NAs for mixed outcome types", {
  result <- misl(
    make_mixed_data(), m = 2, maxit = 2,
    con_method = "glm", bin_method = "glm", cat_method = "rand_forest",
    seed = 4
  )
  expect_false(anyNA(result[[1]]$datasets))
})

# ── misl() -- imputed values are plausible ────────────────────────────────────

test_that("misl() binary imputations are only 0 or 1", {
  df     <- make_bin_data()
  result <- misl(df, m = 2, maxit = 2, bin_method = "glm", seed = 2)
  for (i in seq_along(result)) {
    expect_true(all(result[[i]]$datasets$y %in% c(0, 1)))
  }
})

test_that("misl() categorical imputations stay within observed levels", {
  df     <- make_cat_data()
  result <- misl(df, m = 2, maxit = 2, cat_method = "rand_forest", seed = 3)
  for (i in seq_along(result)) {
    expect_true(all(result[[i]]$datasets$y %in% levels(df$y)))
  }
})

test_that("misl() does not alter observed values", {
  df      <- make_cont_data()
  result  <- misl(df, m = 2, maxit = 2, con_method = "glm", seed = 1)
  obs_idx <- !is.na(df$y)
  for (i in seq_along(result)) {
    expect_equal(result[[i]]$datasets$y[obs_idx], df$y[obs_idx])
  }
})

# ── misl() -- trace plot ──────────────────────────────────────────────────────

test_that("misl() trace has expected columns", {
  result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
  expect_named(
    result[[1]]$trace,
    c("statistic", "variable", "m", "iteration", "value")
  )
})

test_that("misl() trace has mean and sd rows for continuous columns", {
  result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
  trace  <- result[[1]]$trace
  y_rows <- trace[trace$variable == "y", ]
  expect_true("mean" %in% y_rows$statistic)
  expect_true("sd"   %in% y_rows$statistic)
})

# ── misl() -- cv_folds parameter ─────────────────────────────────────────────

test_that("misl() runs with cv_folds = 3", {
  skip_if_not_installed("ranger")
  result <- misl(
    make_cont_data(), m = 2, maxit = 1,
    con_method = c("glm", "rand_forest"),
    cv_folds = 3, seed = 1
  )
  expect_false(anyNA(result[[1]]$datasets))
})

test_that("misl() cv_folds is ignored for single learner", {
  result <- misl(
    make_cont_data(), m = 2, maxit = 1,
    con_method = "glm", cv_folds = 3, seed = 1
  )
  expect_false(anyNA(result[[1]]$datasets))
})

test_that("misl() errors on invalid cv_folds", {
  df <- make_cont_data()
  expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = 1),   "'cv_folds' must be an integer >= 2.")
  expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = 1.5), "'cv_folds' must be an integer >= 2.")
  expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = "3"), "'cv_folds' must be an integer >= 2.")
})

# ── misl() -- binomial bootstrap guard ───────────────────────────────────────

test_that("misl() handles imbalanced binary columns without error", {
  set.seed(99)
  n  <- 100
  df <- data.frame(
    x  = rnorm(n),
    y  = c(rep(1, 95), rep(0, 5))   # heavily imbalanced
  )
  df[sample(which(df$y == 1), 5), "y"] <- NA
  expect_no_error(
    misl(df, m = 2, maxit = 1, bin_method = "glm", seed = 99)
  )
})

# ── misl() -- ignore_predictors ──────────────────────────────────────────────

test_that("misl() respects ignore_predictors", {
  df <- make_cont_data()
  expect_no_error(
    misl(df, m = 1, maxit = 1, con_method = "glm",
         ignore_predictors = "x1", seed = 1)
  )
})

# ── misl() -- single vs multiple learners ────────────────────────────────────

test_that("misl() runs with a single learner (no stacking)", {
  result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1)
  expect_false(anyNA(result[[1]]$datasets))
})

test_that("misl() runs with multiple learners (stacking)", {
  skip_if_not_installed("ranger")
  result <- misl(
    make_cont_data(), m = 2, maxit = 1,
    con_method = c("glm", "rand_forest"), seed = 1
  )
  expect_false(anyNA(result[[1]]$datasets))
})

# ── misl() -- multinom_reg learner ───────────────────────────────────────────

test_that("misl() runs with multinom_reg for categorical outcome", {
  result <- misl(
    make_cat_data(), m = 2, maxit = 1,
    cat_method = "multinom_reg", seed = 3
  )
  expect_false(anyNA(result[[1]]$datasets))
  expect_true(all(result[[1]]$datasets$y %in% levels(make_cat_data()$y)))
})

# ── misl() -- error handling ──────────────────────────────────────────────────

test_that("misl() errors with a helpful message for unknown learner", {
  suppressWarnings(
    expect_error(
      misl(make_cont_data(), m = 1, maxit = 1, con_method = "lasso"),
      "Unknown learner"
    )
  )
})

test_that("misl() errors with install hint for missing backend package", {
  local_mocked_bindings(
    requireNamespace = function(pkg, ...) if (pkg == "xgboost") FALSE else TRUE,
    .package = "base"
  )
  suppressWarnings(
    expect_error(
      misl(make_cont_data(), m = 1, maxit = 1, con_method = "boost_tree"),
      "install.packages"
    )
  )
})