# tests/testthat/test-misl.R # Run with: devtools::test() # ── Shared test fixtures ────────────────────────────────────────────────────── make_cont_data <- function(n = 50, seed = 1) { set.seed(seed) df <- data.frame(x1 = rnorm(n), x2 = rnorm(n), y = rnorm(n)) df[sample(n, 10), "y"] <- NA df } make_bin_data <- function(n = 50, seed = 2) { set.seed(seed) df <- data.frame(x1 = rnorm(n), x2 = rnorm(n), y = rbinom(n, 1, 0.5)) df[sample(n, 10), "y"] <- NA df } make_cat_data <- function(n = 50, seed = 3) { set.seed(seed) df <- data.frame( x1 = rnorm(n), x2 = rnorm(n), y = factor(sample(c("a", "b", "c"), n, replace = TRUE)) ) df[sample(n, 10), "y"] <- NA df } make_mixed_data <- function(n = 80, seed = 4) { set.seed(seed) df <- data.frame( cont = rnorm(n), bin = rbinom(n, 1, 0.5), cat = factor(sample(c("x", "y", "z"), n, replace = TRUE)), pred = rnorm(n) ) df[sample(n, 10), "cont"] <- NA df[sample(n, 10), "bin"] <- NA df[sample(n, 10), "cat"] <- NA df } # ── check_dataset() ─────────────────────────────────────────────────────────── test_that("check_dataset() errors on non-data-frame input", { expect_error(check_dataset("a string"), "data frame or matrix") expect_error(check_dataset(1:10), "data frame or matrix") expect_error(check_dataset(list(a = 1:5)), "data frame or matrix") }) test_that("check_dataset() errors on empty input", { expect_error(check_dataset(data.frame()), "at least one row") expect_error(check_dataset(data.frame(a = integer())), "at least one row") }) test_that("check_dataset() errors on complete dataset", { expect_error(check_dataset(datasets::iris), "complete") }) test_that("check_dataset() passes silently on valid incomplete data", { df <- make_cont_data() expect_invisible(check_dataset(df)) }) # ── check_datatype() ────────────────────────────────────────────────────────── test_that("check_datatype() identifies categorical columns", { expect_equal(check_datatype(factor(c("a", "b", "a"))), "categorical") }) test_that("check_datatype() identifies binomial columns", { expect_equal(check_datatype(c(0, 1, 0, 1, NA)), "binomial") expect_equal(check_datatype(c(0, 0, 0)), "binomial") }) test_that("check_datatype() identifies continuous columns", { expect_equal(check_datatype(c(1.5, 2.3, 3.7)), "continuous") expect_equal(check_datatype(c(1L, 2L, 3L)), "continuous") expect_equal(check_datatype(c(0, 1, 2)), "continuous") }) # ── list_learners() ─────────────────────────────────────────────────────────── test_that("list_learners() returns a tibble with expected columns", { result <- list_learners() expect_s3_class(result, "tbl_df") expect_true(all(c("learner", "description", "package", "installed") %in% colnames(result))) }) test_that("list_learners() filters by outcome type", { cont <- list_learners("continuous") bin <- list_learners("binomial") cat <- list_learners("categorical") expect_true("glm" %in% cont$learner) expect_true("glm" %in% bin$learner) expect_false("glm" %in% cat$learner) expect_true("multinom_reg" %in% cat$learner) expect_false("multinom_reg" %in% cont$learner) expect_false("multinom_reg" %in% bin$learner) expect_false("continuous" %in% colnames(cont)) expect_false("binomial" %in% colnames(bin)) expect_false("categorical" %in% colnames(cat)) }) test_that("list_learners() installed_only returns only installed learners", { result <- list_learners(installed_only = TRUE) expect_true(all(result$installed)) }) test_that("list_learners() errors on invalid outcome_type", { expect_error(list_learners("invalid"), "should be one of") }) # ── misl() -- output structure ──────────────────────────────────────────────── test_that("misl() returns a list of length m", { result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1) expect_type(result, "list") expect_length(result, 2) }) test_that("misl() each element has $datasets and $trace", { result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1) for (i in seq_along(result)) { expect_named(result[[i]], c("datasets", "trace")) } }) test_that("misl() imputed datasets have same dimensions as input", { df <- make_cont_data() result <- misl(df, m = 2, maxit = 1, con_method = "glm", seed = 1) for (i in seq_along(result)) { expect_equal(dim(result[[i]]$datasets), dim(df)) } }) # ── misl() -- no remaining missingness ─────────────────────────────────────── test_that("misl() produces no NAs for continuous outcome", { result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1) expect_false(anyNA(result[[1]]$datasets)) expect_false(anyNA(result[[2]]$datasets)) }) test_that("misl() produces no NAs for binomial outcome", { result <- misl(make_bin_data(), m = 2, maxit = 2, bin_method = "glm", seed = 2) expect_false(anyNA(result[[1]]$datasets)) }) test_that("misl() produces no NAs for categorical outcome", { result <- misl(make_cat_data(), m = 2, maxit = 2, cat_method = "rand_forest", seed = 3) expect_false(anyNA(result[[1]]$datasets)) }) test_that("misl() produces no NAs for mixed outcome types", { result <- misl( make_mixed_data(), m = 2, maxit = 2, con_method = "glm", bin_method = "glm", cat_method = "rand_forest", seed = 4 ) expect_false(anyNA(result[[1]]$datasets)) }) # ── misl() -- imputed values are plausible ──────────────────────────────────── test_that("misl() binary imputations are only 0 or 1", { df <- make_bin_data() result <- misl(df, m = 2, maxit = 2, bin_method = "glm", seed = 2) for (i in seq_along(result)) { expect_true(all(result[[i]]$datasets$y %in% c(0, 1))) } }) test_that("misl() categorical imputations stay within observed levels", { df <- make_cat_data() result <- misl(df, m = 2, maxit = 2, cat_method = "rand_forest", seed = 3) for (i in seq_along(result)) { expect_true(all(result[[i]]$datasets$y %in% levels(df$y))) } }) test_that("misl() does not alter observed values", { df <- make_cont_data() result <- misl(df, m = 2, maxit = 2, con_method = "glm", seed = 1) obs_idx <- !is.na(df$y) for (i in seq_along(result)) { expect_equal(result[[i]]$datasets$y[obs_idx], df$y[obs_idx]) } }) # ── misl() -- trace plot ────────────────────────────────────────────────────── test_that("misl() trace has expected columns", { result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1) expect_named( result[[1]]$trace, c("statistic", "variable", "m", "iteration", "value") ) }) test_that("misl() trace has mean and sd rows for continuous columns", { result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1) trace <- result[[1]]$trace y_rows <- trace[trace$variable == "y", ] expect_true("mean" %in% y_rows$statistic) expect_true("sd" %in% y_rows$statistic) }) # ── misl() -- cv_folds parameter ───────────────────────────────────────────── test_that("misl() runs with cv_folds = 3", { skip_if_not_installed("ranger") result <- misl( make_cont_data(), m = 2, maxit = 1, con_method = c("glm", "rand_forest"), cv_folds = 3, seed = 1 ) expect_false(anyNA(result[[1]]$datasets)) }) test_that("misl() cv_folds is ignored for single learner", { result <- misl( make_cont_data(), m = 2, maxit = 1, con_method = "glm", cv_folds = 3, seed = 1 ) expect_false(anyNA(result[[1]]$datasets)) }) test_that("misl() errors on invalid cv_folds", { df <- make_cont_data() expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = 1), "'cv_folds' must be an integer >= 2.") expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = 1.5), "'cv_folds' must be an integer >= 2.") expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = "3"), "'cv_folds' must be an integer >= 2.") }) # ── misl() -- binomial bootstrap guard ─────────────────────────────────────── test_that("misl() handles imbalanced binary columns without error", { set.seed(99) n <- 100 df <- data.frame( x = rnorm(n), y = c(rep(1, 95), rep(0, 5)) # heavily imbalanced ) df[sample(which(df$y == 1), 5), "y"] <- NA expect_no_error( misl(df, m = 2, maxit = 1, bin_method = "glm", seed = 99) ) }) # ── misl() -- ignore_predictors ────────────────────────────────────────────── test_that("misl() respects ignore_predictors", { df <- make_cont_data() expect_no_error( misl(df, m = 1, maxit = 1, con_method = "glm", ignore_predictors = "x1", seed = 1) ) }) # ── misl() -- single vs multiple learners ──────────────────────────────────── test_that("misl() runs with a single learner (no stacking)", { result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1) expect_false(anyNA(result[[1]]$datasets)) }) test_that("misl() runs with multiple learners (stacking)", { skip_if_not_installed("ranger") result <- misl( make_cont_data(), m = 2, maxit = 1, con_method = c("glm", "rand_forest"), seed = 1 ) expect_false(anyNA(result[[1]]$datasets)) }) # ── misl() -- multinom_reg learner ─────────────────────────────────────────── test_that("misl() runs with multinom_reg for categorical outcome", { result <- misl( make_cat_data(), m = 2, maxit = 1, cat_method = "multinom_reg", seed = 3 ) expect_false(anyNA(result[[1]]$datasets)) expect_true(all(result[[1]]$datasets$y %in% levels(make_cat_data()$y))) }) # ── misl() -- error handling ────────────────────────────────────────────────── test_that("misl() errors with a helpful message for unknown learner", { suppressWarnings( expect_error( misl(make_cont_data(), m = 1, maxit = 1, con_method = "lasso"), "Unknown learner" ) ) }) test_that("misl() errors with install hint for missing backend package", { local_mocked_bindings( requireNamespace = function(pkg, ...) if (pkg == "xgboost") FALSE else TRUE, .package = "base" ) suppressWarnings( expect_error( misl(make_cont_data(), m = 1, maxit = 1, con_method = "boost_tree"), "install.packages" ) ) })