# Out-of-sample (OOS) Application iris1 <- generateNA(iris, p = c(Sepal.Width = 0.2, Species = 0.1), seed = 1L) iris2 <- generateNA(iris, p = 0.2, seed = 1L) test_that("OOS can work in some cases without forests", { imp <- missRanger(iris1, verbose = 0, seed = 1L, num.trees = 10, data_only = FALSE) # There are no missings expect_equal(predict(imp, head(iris)), head(iris)) # We use univariate imputation at prediction stage expect_message( pred <- predict(imp, tail(iris1), iter = 0L), "Only univariate imputations done" ) expect_true(!anyNA(pred)) # We use univariate imputation at missRanger() stage imp <- missRanger( iris1, . ~ 1, verbose = 0, seed = 1L, num.trees = 10, data_only = FALSE ) expect_message( pred <- predict(imp, tail(iris1)), "Only univariate imputations done" ) expect_true(!anyNA(pred)) # But usually, it gives an error imp <- missRanger(iris1, verbose = 0, seed = 1L, num.trees = 10, data_only = FALSE) # There are no missings expect_error(predict(imp, tail(iris1)), "No random forests") }) test_that("OOS removes all missings with and without PMM", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) expect_true(!anyNA(predict(imp, iris2))) expect_true(!anyNA(predict(imp, iris2, pmm.k = 1L))) imp <- missRanger(iris1, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) expect_true(!anyNA(predict(imp, iris1))) expect_true(!anyNA(predict(imp, iris1, pmm.k = 1L))) expect_error(predict(imp, iris2), "not allowed.") }) test_that("OOS is picky about wrong input", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) one_row <- iris2[1L, ] one_row[, "Species"] <- "new species" expect_error(predict(imp, one_row), "Inconsistency between.") one_row <- iris2[1L, ] one_row[, "Sepal.Length"] <- "new species" expect_error(predict(imp, one_row), "Inconsistency between.") }) test_that("OOS expects columns to be present, but extra columns are okay", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) expect_error(predict(imp, head(iris2[, -5])), "Variables not present.") expect_no_error(predict(imp, iris2 |> transform(additional = 1))) }) test_that("OOS can deal with single-iteration fits, when one forest is missing", { imp <- missRanger( iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE, maxiter = 1L ) expect_message(pred <- predict(imp, iris2), "No random forest") expect_no_message(pred <- predict(imp, iris2, verbose = FALSE)) }) test_that("OOS seed works", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) x1 <- predict(imp, iris2, seed = 1L) x2 <- predict(imp, iris2, seed = 1L) x3 <- predict(imp, iris2, seed = 2L) expect_equal(x1, x2) expect_false(identical(x1, x3)) }) test_that("OOS PMM works", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) x1 <- predict(imp, iris2, seed = 1L) x2 <- predict(imp, iris2, seed = 1L, pmm.k = 5) expect_false(identical(x1, x2)) # Fitted with PMM imp <- missRanger( iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE, pmm.k = 5L ) x1 <- predict(imp, iris2, seed = 1L, pmm.k = 0) x2 <- predict(imp, iris2, seed = 1L, pmm.k = 5) expect_false(identical(x1, x2)) }) test_that("OOS iter works", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) x1 <- predict(imp, iris2, seed = 1L, iter = 1, pmm.k = 4L) x2 <- predict(imp, iris2, seed = 1L, iter = 4, pmm.k = 4L) expect_false(identical(x1, x2)) }) test_that("OOS is not working when there are missings in impute_by", { imp <- missRanger( iris1, Species ~ Sepal.Length, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE ) new_row <- iris[1L, ] new_row[, c("Sepal.Length", "Species")] <- NA expect_error(predict(imp, new_row), "not allowed") }) test_that("OOS does 1 iter if only easy case", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) new_row <- iris[1L, ] new_row[, "Sepal.Length"] <- NA x1 <- predict(imp, new_row, seed = 1L, pmm.k = 5L, iter = 10L) x2 <- predict(imp, new_row, seed = 1L, pmm.k = 5L, iter = 1L) expect_equal(x1, x2) }) test_that("OOS does not fail if there is all missings, even of wrong type", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) new_rows <- iris[1:2, ] new_rows[] <- NA expect_no_error(x1 <- predict(imp, new_rows, seed = 1L)) }) test_that("case works where easy case fills a complete column", { imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE) X <- data.frame( Sepal.Length = c(5.1, NA), Sepal.Width = c(3.5, 3.4), Petal.Length = c(NA, 1.4), Petal.Width = c(NA, 0.3), Species = factor("setosa") ) expect_no_error(predict(imp, X)) }) n <- 200L X <- data.frame( int = seq_len(n), double = log(seq_len(n)), char = rep(LETTERS[1:4], n %/% 4), fact = factor(rep(LETTERS[1:2], n %/% 2)), logi = seq_len(n) > n %/% 3 ) X_NA <- generateNA(X[1:5], p = 0.2, seed = 1L) test_that("non-syntactic column names work", { X_NA2 <- X_NA colnames(X_NA2)[1:2] <- c("1bad name", "2 also bad") imp1 <- missRanger(X_NA2, num.trees = 20L, verbose = 0L, seed = 1L, keep_forests = TRUE) expect_equal(colnames(predict(imp1, head(X_NA2))), colnames(X_NA2)) }) test_that("OOS does not fail if there is all missings, even of wrong type (MORE TYPES)", { imp1 <- missRanger( X_NA, num.trees = 20L, verbose = 0L, seed = 1L, keep_forests = TRUE ) new_rows <- X[1:2, ] new_rows[] <- NA expect_no_error(x1 <- predict(imp1, new_rows, seed = 1L, pmm.k = 1L)) expect_equal(lapply(x1, class), lapply(X, class)) # Without PMM, ints might turn into doubles expect_no_error(x1 <- predict(imp1, new_rows, seed = 1L, pmm.k = 0L)) xp <- list(int = "numeric", double = "numeric", char = "character", fact = "factor", logi = "logical") expect_equal(lapply(x1, class), xp) })