# varsel() ---------------------------------------------------------------- context("varsel()") test_that(paste( "`object` of class `refmodel` and arguments `method`, `nterms_max`,", "`nclusters`, and `nclusters_pred` work" ), { skip_if_not(run_vs) for (tstsetup in names(vss)) { tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref mod_crr <- args_vs[[tstsetup]]$mod_nm fam_crr <- args_vs[[tstsetup]]$fam_nm prj_crr <- args_vs[[tstsetup]]$prj_nm meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward" vsel_tester( vss[[tstsetup]], refmod_expected = refmods[[tstsetup_ref]], prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, search_terms_expected = args_vs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_vs[[tstsetup]]$search_terms) && all(grepl("\\+", args_vs[[tstsetup]]$search_terms)), search_control_expected = args_vs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) test_that("invalid `object` fails", { expect_error(varsel(rnorm(5), verbose = FALSE), "no applicable method") }) test_that("invalid `method` fails", { for (tstsetup in names(refmods)) { expect_error(varsel(refmods[[tstsetup]], method = "k-fold"), "^Unexpected value for argument `method`\\.$", info = tstsetup) if (args_ref[[tstsetup]]$mod_nm != "glm") { expect_error(varsel(refmods[[tstsetup]], method = "L1"), paste("^L1 search is only supported for reference models", "without multilevel and without additive", "\\(\"smoothing\"\\) terms\\.$"), info = tstsetup) } if (args_ref[[tstsetup]]$mod_nm == "glm" && args_ref[[tstsetup]]$prj_nm == "augdat") { expect_error(varsel(refmods[[tstsetup]], method = "L1"), paste("^Currently, the augmented-data projection may not be", "combined with an L1 search\\.$"), info = tstsetup) } } }) test_that("`seed` works (and restores the RNG state afterwards)", { skip_if_not(run_vs) # To save time: tstsetups <- grep("\\.glm\\.gauss\\.", names(vss), value = TRUE) for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] vs_orig <- vss[[tstsetup]] rand_orig <- runif(1) # Just to advance `.Random.seed[2]`. .Random.seed_repr1 <- .Random.seed vs_repr <- do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]]), excl_nonargs(args_vs_i) )) .Random.seed_repr2 <- .Random.seed rand_new <- runif(1) # Just to advance `.Random.seed[2]`. # Expected equality: expect_equal(vs_repr, vs_orig, info = tstsetup) expect_equal(.Random.seed_repr2, .Random.seed_repr1, info = tstsetup) # Expected inequality: expect_false(isTRUE(all.equal(rand_new, rand_orig)), info = tstsetup) } }) ## d_test ----------------------------------------------------------------- test_that(paste( "`d_test` set to the training data gives the same results as its default" ), { skip_if_not(run_vs) tstsetups <- names(vss) for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] tstsetup_ref <- args_vs_i$tstsetup_ref pkg_crr <- args_vs_i$pkg_nm mod_crr <- args_vs_i$mod_nm fam_crr <- args_vs_i$fam_nm prj_crr <- args_vs_i$prj_nm if (!all(refmods[[tstsetup_ref]]$offset == 0)) { offs_crr <- offs_tst } else { offs_crr <- rep(0, nobsv) } if (!all(refmods[[tstsetup_ref]]$wobs == 1)) { wobs_crr <- wobs_tst } else { wobs_crr <- rep(1, nobsv) } formul_fit_crr <- args_fit[[args_vs_i$tstsetup_fit]]$formula dat_crr <- get_dat_formul(formul_crr = formul_fit_crr, needs_adj = grepl("\\.spclformul", tstsetup)) d_test_crr <- list( data = dat, offset = offs_crr, weights = wobs_crr, y = dat_crr[[stdize_lhs(formul_fit_crr)$y_nm]] ) y_oscale_crr <- d_test_crr$y if (prj_crr %in% c("latent", "augdat")) { if (use_fac) { yunqs <- yunq_chr } else { yunqs <- as.character(yunq_num) } if (!(prj_crr == "latent" && fam_crr == "brnll")) { lvls_crr <- args_ref[[args_vs_i$tstsetup_ref]]$augdat_y_unqs lvls_crr <- lvls_crr %||% args_ref[[args_vs_i$tstsetup_ref]]$latent_y_unqs lvls_crr <- lvls_crr %||% yunqs y_oscale_crr <- factor(as.character(y_oscale_crr), levels = lvls_crr, ordered = is.ordered(y_oscale_crr)) } if (prj_crr == "augdat") { d_test_crr$y <- y_oscale_crr } else if (prj_crr == "latent") { d_test_crr$y <- colMeans( unname(posterior_linpred(fits[[args_vs_i$tstsetup_fit]])) ) } } d_test_crr$y_oscale <- y_oscale_crr # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: vs_repr <- suppressWarnings(do.call(varsel, c( list(object = refmods[[tstsetup_ref]], d_test = d_test_crr), excl_nonargs(args_vs_i) ))) meth_exp_crr <- args_vs_i$method %||% "forward" vsel_tester( vs_repr, refmod_expected = refmods[[tstsetup_ref]], ywtest_expected = setNames( as.data.frame(d_test_crr[nms_y_wobs_test(wobs_nm = "weights")]), nms_y_wobs_test() ), prd_trms_len_expected = args_vs_i$nterms_max, method_expected = meth_exp_crr, search_terms_expected = args_vs_i$search_terms, search_trms_empty_size = length(args_vs_i$search_terms) && all(grepl("\\+", args_vs_i$search_terms)), search_control_expected = args_vs_i[c("avoid.increase")], info_str = tstsetup ) expect_equal(vs_repr[setdiff(names(vs_repr), c("type_test", "y_wobs_test"))], vss[[tstsetup]][setdiff(names(vss[[tstsetup]]), c("type_test", "y_wobs_test"))], info = tstsetup) y_wobs_test_orig <- vss[[tstsetup]]$y_wobs_test if (pkg_crr == "brms") { # brms seems to set argument `contrasts`, but this is not important for # projpred, so ignore it in the comparison: attr(y_wobs_test_orig$y, "contrasts") <- NULL attr(y_wobs_test_orig$y_oscale, "contrasts") <- NULL } expect_equal(vs_repr$y_wobs_test, y_wobs_test_orig, info = tstsetup) } }) test_that(paste( "`d_test` set to actual test data gives a `$summaries$sub`", "object that can be reproduced by proj_linpred() and a", "`$summaries$ref` object that can be reproduced by", "posterior_epred() and log_lik()" ), { skip_if_not(run_vs) if (exists(".Random.seed", envir = .GlobalEnv)) { rng_old <- get(".Random.seed", envir = .GlobalEnv) } tstsetups <- names(vss) ### TODO (GAMMs): Currently, the following test setups (can) lead to the error ### ``` ### Error in t(as.matrix(b$reTrms$Zt[ii, ])) %*% ### as.matrix(c(as.matrix(ranef[[i]]))) : ### non-conformable arguments ### ``` ### thrown by predict.gamm4(). This needs to be fixed. For now, exclude these ### test setups: tstsetups <- grep("\\.gamm\\.", tstsetups, value = TRUE, invert = TRUE) ### for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] tstsetup_ref <- args_vs_i$tstsetup_ref pkg_crr <- args_vs_i$pkg_nm mod_crr <- args_vs_i$mod_nm fam_crr <- args_vs_i$fam_nm prj_crr <- args_vs_i$prj_nm if (!all(refmods[[tstsetup_ref]]$offset == 0)) { offs_crr <- offs_indep } else { offs_crr <- rep(0, nobsv_indep) } if (!all(refmods[[tstsetup_ref]]$wobs == 1)) { wobs_crr <- wobs_indep } else { wobs_crr <- rep(1, nobsv_indep) } formul_fit_crr <- args_fit[[args_vs_i$tstsetup_fit]]$formula y_nm_crr <- stdize_lhs(formul_fit_crr)$y_nm dat_indep_crr <- get_dat_formul( formul_crr = formul_fit_crr, needs_adj = grepl("\\.spclformul", tstsetup), dat_crr = dat_indep ) d_test_crr <- list( data = dat_indep, offset = offs_crr, weights = wobs_crr, y = dat_indep_crr[[y_nm_crr]] ) y_oscale_crr <- d_test_crr$y if (prj_crr %in% c("latent", "augdat")) { if (use_fac) { yunqs <- yunq_chr } else { yunqs <- as.character(yunq_num) } if (!(prj_crr == "latent" && fam_crr == "brnll")) { lvls_crr <- args_ref[[args_vs_i$tstsetup_ref]]$augdat_y_unqs lvls_crr <- lvls_crr %||% args_ref[[args_vs_i$tstsetup_ref]]$latent_y_unqs lvls_crr <- lvls_crr %||% yunqs y_oscale_crr <- factor(as.character(y_oscale_crr), levels = lvls_crr, ordered = is.ordered(y_oscale_crr)) } if (prj_crr == "augdat") { d_test_crr$y <- y_oscale_crr } else if (prj_crr == "latent") { if (pkg_crr == "rstanarm") { post_linpred <- posterior_linpred(fits[[args_vs_i$tstsetup_fit]], newdata = dat_indep, offset = d_test_crr$offset) } else { post_linpred <- posterior_linpred(fits[[args_vs_i$tstsetup_fit]], newdata = dat_indep) } d_test_crr$y <- colMeans(unname(post_linpred)) } } d_test_crr$y_oscale <- y_oscale_crr # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: vs_indep <- suppressWarnings(do.call(varsel, c( list(object = refmods[[tstsetup_ref]], d_test = d_test_crr), excl_nonargs(args_vs_i) ))) meth_exp_crr <- args_vs_i$method %||% "forward" vsel_tester( vs_indep, refmod_expected = refmods[[tstsetup_ref]], ywtest_expected = setNames( as.data.frame(d_test_crr[nms_y_wobs_test(wobs_nm = "weights")]), nms_y_wobs_test() ), prd_trms_len_expected = args_vs_i$nterms_max, method_expected = meth_exp_crr, search_terms_expected = args_vs_i$search_terms, search_trms_empty_size = length(args_vs_i$search_terms) && all(grepl("\\+", args_vs_i$search_terms)), search_control_expected = args_vs_i[c("avoid.increase")], info_str = tstsetup ) ### Summaries for the submodels ------------------------------------------- if (!(getOption("projpred.mlvl_pred_new", FALSE) && mod_crr %in% c("glmm", "gamm") && any(grepl("\\|", vs_indep$predictor_ranking)))) { # In the negation of this case (i.e., multilevel models with option # `projpred.mlvl_pred_new` being set to `TRUE`), proj_linpred() can't be # used to calculate the reference model's performance statistics because # proj_linpred()'s argument `.seed` cannot be set such that the # .Random.seed from inside proj_linpred() at the place where the new # group-level effects are drawn coincides with .Random.seed from inside # varsel() at the place where the new group-level effects are drawn (not # even `.seed = NA` with an appropriate preparation is possible). # For getting the correct seed in proj_linpred(): set.seed(args_vs_i$seed) p_sel_dummy <- get_refdist(refmods[[tstsetup_ref]], nclusters = vs_indep$nprjdraws_search) # Use suppressWarnings() because test_that() somehow redirects stderr() # and so throws warnings that projpred wants to capture internally: pl_indep <- suppressWarnings(proj_linpred( vs_indep, newdata = dat_indep_crr, offsetnew = d_test_crr$offset, weightsnew = d_test_crr$weights, transform = TRUE, integrated = TRUE, .seed = NA, nterms = c(0L, seq_along(vs_indep$predictor_ranking)), nclusters = args_vs_i$nclusters_pred, seed = NA )) summ_sub_ch <- lapply(pl_indep, function(pl_indep_k) { names(pl_indep_k)[names(pl_indep_k) == "pred"] <- "mu" names(pl_indep_k)[names(pl_indep_k) == "lpd"] <- "lppd" pl_indep_k$mu <- unname(drop(pl_indep_k$mu)) pl_indep_k$lppd <- drop(pl_indep_k$lppd) if (!is.null(refmods[[tstsetup_ref]]$family$cats)) { pl_indep_k$mu <- structure( as.vector(pl_indep_k$mu), ndiscrete = length(refmods[[tstsetup_ref]]$family$cats), class = "augvec" ) } return(pl_indep_k) }) if (prj_crr == "latent") { # For getting the correct seed in proj_linpred(): set.seed(args_vs_i$seed) p_sel_dummy <- get_refdist(refmods[[tstsetup_ref]], nclusters = vs_indep$nprjdraws_search) dat_indep_crr[[paste0(".", y_nm_crr)]] <- d_test_crr$y expect_warning( pl_indep_lat <- proj_linpred( vs_indep, newdata = dat_indep_crr, offsetnew = d_test_crr$offset, weightsnew = d_test_crr$weights, transform = FALSE, integrated = TRUE, .seed = NA, nterms = c(0L, seq_along(vs_indep$predictor_ranking)), nclusters = args_vs_i$nclusters_pred, seed = NA ), get_warn_wrhs_orhs(tstsetup, weightsnew = d_test_crr$weights, offsetnew = d_test_crr$offset), info = tstsetup ) y_lat_mat <- matrix(d_test_crr$y, nrow = args_vs_i$nclusters_pred, ncol = nobsv_indep, byrow = TRUE) summ_sub_ch_lat <- lapply(seq_along(pl_indep_lat), function(k_idx) { pl_indep_k <- pl_indep_lat[[k_idx]] names(pl_indep_k)[names(pl_indep_k) == "pred"] <- "mu" names(pl_indep_k)[names(pl_indep_k) == "lpd"] <- "lppd" pl_indep_k$mu <- unname(drop(pl_indep_k$mu)) pl_indep_k$lppd <- drop(pl_indep_k$lppd) return(pl_indep_k) }) summ_sub_ch <- lapply(seq_along(summ_sub_ch), function(k_idx) { c(summ_sub_ch_lat[[k_idx]], list("oscale" = summ_sub_ch[[k_idx]])) }) } names(summ_sub_ch) <- NULL expect_equal(vs_indep$summaries$sub, summ_sub_ch, tolerance = .Machine$double.eps, info = tstsetup) } ### Summaries for the reference model ------------------------------------- if (getOption("projpred.mlvl_pred_new", FALSE)) { dat_indep_crr$z.1 <- as.factor(paste0("NEW_", dat_indep_crr$z.1)) } if (pkg_crr == "rstanarm") { mu_new <- rstantools::posterior_epred(refmods[[tstsetup_ref]]$fit, newdata = dat_indep_crr, offset = d_test_crr$offset) if (fam_crr == "cumul") { eta_new <- rstantools::posterior_linpred(refmods[[tstsetup_ref]]$fit, newdata = dat_indep_crr, offset = d_test_crr$offset) # The following shows that in case of an rstanarm::stan_polr() fit, # rstantools::posterior_epred() returns the linear predictors with a # threshold of zero, transformed to response scale (which is not really # helpful): mu_new_ch <- augdat_ilink_cumul( array(eta_new, dim = c(dim(eta_new), 1L)), link = link_str ) stopifnot(isTRUE(all.equal(unname(mu_new), mu_new_ch[, , 1], tolerance = .Machine$double.eps))) # Therefore, `mu_new` has to be adapted to incorporate the correct # thresholds: drws <- as.matrix(refmods[[tstsetup_ref]]$fit) drws_thres <- drws[, grep("\\|", colnames(drws))] mu_new <- apply(drws_thres, 2, function(thres_vec) { thres_vec - eta_new }, simplify = FALSE) mu_new <- abind::abind(mu_new, rev.along = 0) mu_new <- augdat_ilink_cumul(mu_new, link = link_str) } if (grepl("\\.without_wobs", tstsetup)) { lppd_new <- rstantools::log_lik(refmods[[tstsetup_ref]]$fit, newdata = dat_indep_crr, offset = d_test_crr$offset) } else { # Currently, rstanarm issue #567 causes an error to be thrown when # calling log_lik(). Therefore, use the following dummy which guarantees # test success: lppd_new <- matrix(vs_indep$summaries$ref$lppd, nrow = nrefdraws, ncol = nobsv_indep, byrow = TRUE) } if (prj_crr == "latent") { mu_new_lat <- rstantools::posterior_linpred(refmods[[tstsetup_ref]]$fit, newdata = dat_indep_crr, offset = d_test_crr$offset) } } else if (pkg_crr == "brms") { expr_seed <- expression({ set.seed(seed2_tst) kfold_seed_dummy <- sample.int(.Machine$integer.max, 1) refprd_seed_dummy <- sample.int(.Machine$integer.max, 1) set.seed(refprd_seed_dummy) }) eval(expr_seed) mu_new <- rstantools::posterior_epred(refmods[[tstsetup_ref]]$fit, newdata = dat_indep_crr, allow_new_levels = TRUE, sample_new_levels = "gaussian") if (fam_crr == "binom") { # Compared to rstanarm, brms uses a different convention for the # binomial family: The values returned by posterior_epred() are not # probabilities, but the expected values on the scale of the response # (so the probabilities multiplied by the number of trials). Thus, we # have to revert this here: mu_new <- mu_new / matrix(wobs_indep, nrow = nrow(mu_new), ncol = ncol(mu_new), byrow = TRUE) } eval(expr_seed) lppd_new <- rstantools::log_lik(refmods[[tstsetup_ref]]$fit, newdata = dat_indep_crr, allow_new_levels = TRUE, sample_new_levels = "gaussian") if (prj_crr == "latent") { eval(expr_seed) mu_new_lat <- rstantools::posterior_linpred( refmods[[tstsetup_ref]]$fit, newdata = dat_indep_crr, allow_new_levels = TRUE, sample_new_levels = "gaussian" ) } } if (length(dim(mu_new)) == 2) { mu_new <- colMeans(mu_new) } else if (length(dim(mu_new)) == 3) { # In fact, we have `identical(colMeans(mu_new), apply(mu_new, c(2, 3), # mean))` giving `TRUE`, but it's better to be explicit: ncat_crr <- dim(mu_new)[3] mu_new <- apply(mu_new, c(2, 3), mean) mu_new <- structure(as.vector(mu_new), ndiscrete = ncat_crr, class = "augvec") } else { stop("Unexpected number of margins for `mu_new`.") } summ_ref_ch <- list( mu = unname(mu_new), lppd = unname(apply(lppd_new, 2, log_sum_exp) - log(nrefdraws)) ) if (prj_crr == "augdat" && fam_crr %in% c("brnll", "binom")) { summ_ref_ch$mu <- structure(c(1 - summ_ref_ch$mu, summ_ref_ch$mu), ndiscrete = 2, class = "augvec") } if (prj_crr == "latent") { y_lat_mat <- matrix(d_test_crr$y, nrow = nrefdraws, ncol = nobsv_indep, byrow = TRUE) lppd_new_lat <- dnorm(y_lat_mat, mean = mu_new_lat, sd = refmods[[tstsetup_ref]]$dis, log = TRUE) summ_ref_ch_lat <- list( mu = unname(colMeans(mu_new_lat)), lppd = unname(apply(lppd_new_lat, 2, log_sum_exp) - log(nrefdraws)) ) summ_ref_ch <- c(summ_ref_ch_lat, list("oscale" = summ_ref_ch)) } expect_equal(vs_indep$summaries$ref, summ_ref_ch, tolerance = 1e3 * .Machine$double.eps, info = tstsetup) lppd_ref_ch2 <- unname(loo::elpd(lppd_new)$pointwise[, "elpd"]) if (prj_crr == "latent") { lppd_ref_ch2_oscale <- lppd_ref_ch2 expect_equal(vs_indep$summaries$ref$oscale$lppd, lppd_ref_ch2_oscale, tolerance = 1e1 * .Machine$double.eps, info = tstsetup) lppd_ref_ch2 <- loo::elpd(lppd_new_lat)$pointwise[, "elpd"] } expect_equal(vs_indep$summaries$ref$lppd, lppd_ref_ch2, tolerance = 1e2 * .Machine$double.eps, info = tstsetup) } if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv) }) ## refit_prj -------------------------------------------------------------- test_that("`refit_prj` works", { skip_if_not(run_vs) if (run_more) { tstsetups <- names(vss) } else { tstsetups <- head(grep("\\.glm\\.", names(vss), value = TRUE), 1) } for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] args_vs_i$refit_prj <- FALSE # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: vs_reuse <- suppressWarnings(do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]]), excl_nonargs(args_vs_i) ))) mod_crr <- args_vs_i$mod_nm fam_crr <- args_vs_i$fam_nm prj_crr <- args_vs_i$prj_nm meth_exp_crr <- args_vs_i$method %||% "forward" extra_tol_crr <- 1.1 if (meth_exp_crr == "L1" && any(grepl(":", ranking(vs_reuse)[["fulldata"]]))) { ### Testing for non-increasing element `ce` (for increasing model size) ### doesn't make sense if the ranking of predictors involved in ### interactions has been changed, so we choose a higher `extra_tol`: extra_tol_crr <- 1.2 ### } vsel_tester( vs_reuse, refmod_expected = refmods[[args_vs_i$tstsetup_ref]], prd_trms_len_expected = args_vs_i$nterms_max, method_expected = meth_exp_crr, refit_prj_expected = FALSE, search_terms_expected = args_vs_i$search_terms, search_trms_empty_size = length(args_vs_i$search_terms) && all(grepl("\\+", args_vs_i$search_terms)), search_control_expected = args_vs_i[c("avoid.increase")], extra_tol = extra_tol_crr, info_str = tstsetup ) } }) ## Warning for full Gaussian multilevel models ---------------------------- if (run_more) { test_that(paste( "the warning for full Gaussian multilevel models is thrown correctly" ), { skip_if_not(run_vs) warn_instable_orig <- options(projpred.warn_instable_projections = NULL) tstsetups <- names(vss) pick_these <- sapply(tstsetups, function(tstsetup) { refmod_i <- refmods[[args_vs[[tstsetup]]$tstsetup_ref]] return( (refmod_i$family$family == "gaussian" || refmod_i$family$for_latent) && is.null(args_vs[[tstsetup]]$search_terms) ) }) tstsetups <- tstsetups[pick_these] skip_if_not(length(tstsetups) > 0) for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] if (identical(args_vs_i$nterms_max, unname(ntermss[args_vs_i$mod_nm])) && any(grepl("\\|", labels(terms( args_fit[[args_vs_i$tstsetup_fit]]$formula ))))) { warn_expected <- "the projection onto the full model can be instable" } else { warn_expected <- NA } args_vs_i$refit_prj <- FALSE expect_warning( vs_tmp <- do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]]), excl_nonargs(args_vs_i) )), warn_expected, info = tstsetup ) } options(warn_instable_orig) }) } ## Message when cutting off the search ------------------------------------ if (run_more) { test_that("the message when cutting off the search is thrown correctly", { skip_if_not(run_vs) skip_if_not_installed("rstanarm") mssg_cut_search_orig <- options(projpred.mssg_cut_search = NULL) dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x) stopifnot(sum(grepl("^X", names(dat_gauss))) > 19) fit_exceed <- suppressWarnings(rstanarm::stan_glm( y ~ ., family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1, iter = 500, refresh = 0, seed = 9876 )) fit_not_exceed <- suppressWarnings(rstanarm::stan_glm( y ~ . - X20, family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1, iter = 500, refresh = 0, seed = 9876 )) for (nterms_max_i in list(NULL, 0, 20)) { if (is.null(nterms_max_i)) { mssg_expected <- "Cutting off the search at size 19\\." } else { mssg_expected <- NA } expect_message( vs_tmp <- varsel( fit_exceed, method = "forward", nterms_max = nterms_max_i, nclusters = 1, refit_prj = FALSE, seed = 5555, verbose = FALSE ), mssg_expected, info = paste("fit_exceed, nterms_max =", nterms_max_i %||% "NULL") ) expect_message( vs_tmp <- varsel( fit_not_exceed, method = "forward", nterms_max = nterms_max_i, nclusters = 1, refit_prj = FALSE, seed = 5555, verbose = FALSE ), NA, info = paste("fit_not_exceed, nterms_max =", nterms_max_i %||% "NULL") ) } options(mssg_cut_search_orig) }) } ## Regularization --------------------------------------------------------- # In fact, `regul` is already checked in `test_project.R`, so the `regul` tests # could be omitted here since varsel() and cv_varsel() also pass `regul` to # proj_to_submodl() (usually via perf_eval(), just like project()). This doesn't # hold for L1 search, though. So for L1 search, the `regul` tests are still # needed. test_that(paste( "for GLMs with L1 search, `regul` only has an effect on prediction, not on", "selection" ), { skip_if_not(run_vs) regul_tst <- c(regul_default, 1e-1, 1e2) stopifnot(regul_tst[1] == regul_default) stopifnot(all(diff(regul_tst) > 0)) # Output elements of `vsel` objects that may be influenced by `regul`: vsel_nms_regul <- c("summaries", "ce") vsel_nms_regul_opt <- character() # The setups that should be tested: tstsetups <- grep("\\.glm\\..*\\.L1\\.", names(vss), value = TRUE) for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] m_max <- args_vs_i$nterms_max + 1L ssq_regul_prd <- array(dim = c(length(regul_tst), m_max)) for (j in seq_along(regul_tst)) { if (regul_tst[j] == regul_default) { vs_regul <- vss[[tstsetup]] } else { vs_regul <- do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]], regul = regul_tst[j]), excl_nonargs(args_vs_i) )) vsel_tester( vs_regul, refmod_expected = refmods[[args_vs_i$tstsetup_ref]], prd_trms_len_expected = args_vs_i$nterms_max, method_expected = "L1", info_str = tstsetup ) # Expect equality for all components not related to `regul`: expect_equal(vs_regul[setdiff(vsel_nms, vsel_nms_regul)], vss[[tstsetup]][setdiff(vsel_nms, vsel_nms_regul)], info = paste(tstsetup, j, sep = "__")) # Expect inequality for the elements related to `regul` (the elements # from `vsel_nms_regul_opt` can be, but don't need to be differing): for (vsel_nm in setdiff(vsel_nms_regul, vsel_nms_regul_opt)) { expect_false(isTRUE(all.equal(vs_regul[[vsel_nm]], vss[[tstsetup]][[vsel_nm]])), info = paste(tstsetup, j, vsel_nm, sep = "__")) } } # Check the inequality of the prediction components in detail: Expect a # reduction of the sum of the squared coefficients (excluding the # intercept) for increasing `regul`: for (m in seq_len(m_max)) { # Since varsel() doesn't output object `p_sub`, use the linear predictor # here (instead of the coefficients themselves, which would only be # accessible from `p_sub`): mu_jm_regul <- vs_regul$refmodel$family$linkfun( vs_regul$summaries$sub[[m]]$mu ) if (grepl("\\.with_offs", tstsetup)) { mu_jm_regul <- mu_jm_regul - offs_tst } # In fact, `sum((mu - offset - intercept)^2)` would make more sense than # `var(mu - offset) = sum((mu - offset - mean(mu - offset))^2)` but # since varsel() doesn't output object `p_sub`, the intercept from the # prediction is not accessible here. ssq_regul_prd[j, m] <- var(mu_jm_regul) } } # For the intercept-only model, the linear predictor consists only # of the intercept, so we expect no variation in `mu_jm_regul`: expect_true(all(ssq_regul_prd[, 1] <= 1e-5), info = tstsetup) # All other (i.e., not intercept-only) models: for (j in seq_len(dim(ssq_regul_prd)[1])[-1]) { for (m in seq_len(dim(ssq_regul_prd)[2])[-1]) { expect_lt(ssq_regul_prd[!!j, !!m], ssq_regul_prd[j - 1, m]) } } } }) test_that(paste( "for GLMs with forward search, `regul` has an expected effect on selection", "as well as on prediction" ), { skip_if_not(run_vs) regul_tst <- c(regul_default, 1e-1, 1e2) stopifnot(regul_tst[1] == regul_default) stopifnot(all(diff(regul_tst) > 0)) tstsetups <- setdiff(grep("\\.glm\\.", names(vss), value = TRUE), grep("\\.glm\\..*\\.L1\\.", names(vss), value = TRUE)) tstsetups <- grep(fam_nms_aug_regex, tstsetups, value = TRUE, invert = TRUE) for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] m_max <- args_vs_i$nterms_max + 1L if (length(args_vs_i$search_terms) && all(grepl("\\+", args_vs_i$search_terms))) { # This is the "empty_size" setting, so we have to subtract the skipped # model size (see issue #307): m_max <- m_max - 1L } ncl_crr <- args_vs_i$nclusters ssq_regul_sel_alpha <- array(dim = c(length(regul_tst), m_max, ncl_crr)) ssq_regul_sel_beta <- array(dim = c(length(regul_tst), m_max, ncl_crr)) ssq_regul_prd <- array(dim = c(length(regul_tst), m_max)) for (j in seq_along(regul_tst)) { if (regul_tst[j] == regul_default) { vs_regul <- vss[[tstsetup]] } else { vs_regul <- do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]], regul = regul_tst[j]), excl_nonargs(args_vs_i) )) vsel_tester( vs_regul, refmod_expected = refmods[[args_vs_i$tstsetup_ref]], prd_trms_len_expected = args_vs_i$nterms_max, method_expected = "forward", search_terms_expected = args_vs_i$search_terms, search_trms_empty_size = length(args_vs_i$search_terms) && all(grepl("\\+", args_vs_i$search_terms)), search_control_expected = c(args_vs_i[c("avoid.increase")], list(regul = regul_tst[j])), info_str = tstsetup ) } for (m in seq_len(m_max)) { # Selection: outdmin_jm_regul <- vs_regul$search_path$outdmins[[m]] if (ncl_crr == 1) { outdmin_jm_regul <- list(outdmin_jm_regul) } else { stopifnot(identical(ncl_crr, length(outdmin_jm_regul))) } for (nn in seq_len(ncl_crr)) { stopifnot(length(outdmin_jm_regul[[nn]]$alpha) == 1) ssq_regul_sel_alpha[j, m, nn] <- outdmin_jm_regul[[nn]]$alpha^2 if (length(outdmin_jm_regul[[nn]]$beta) > 0) { ssq_regul_sel_beta[j, m, nn] <- sum(outdmin_jm_regul[[nn]]$beta^2) } } # Prediction: # Since varsel() doesn't output object `p_sub`, use the linear predictor # here (instead of the coefficients themselves, which would only be # accessible from `p_sub`): mu_jm_regul <- vs_regul$summaries$sub[[m]]$mu if (args_vs_i$prj_nm == "augdat") { mu_jm_regul <- augvec2augmat(mu_jm_regul) } mu_jm_regul <- vs_regul$refmodel$family$linkfun(mu_jm_regul) if (grepl("\\.with_offs", tstsetup)) { mu_jm_regul <- mu_jm_regul - offs_tst } # In fact, `sum((mu - offset - intercept)^2)` would make more sense than # `var(mu - offset) = sum((mu - offset - mean(mu - offset))^2)` but # since varsel() doesn't output object `p_sub`, the intercept from the # prediction is not accessible here. if (args_vs_i$prj_nm == "augdat") { # Take the maximum variance across the response categories (i.e., the # worst-case scenario): mu_jm_regul <- augmat2arr(mu_jm_regul) mu_jm_regul <- matrix(mu_jm_regul, nrow = dim(mu_jm_regul)[1], ncol = dim(mu_jm_regul)[2]) var_jm_regul <- max(apply(mu_jm_regul, 2, var)) } else { var_jm_regul <- var(mu_jm_regul) } ssq_regul_prd[j, m] <- var_jm_regul } } # Selection: # For the intercept-only model: for (nn in seq_len(dim(ssq_regul_sel_alpha)[3])) { expect_length(unique(ssq_regul_sel_alpha[, 1, !!nn]), 1) } expect_true(all(is.na(ssq_regul_sel_beta[, 1, ])), info = tstsetup) # All other (i.e., not intercept-only) models (note: as discussed at issue # #169, the intercept is not tested here to stay the same): ssq_regul_sel_beta_cond <- array( dim = dim(ssq_regul_sel_beta) + c(-1L, -1L, 0L) ) for (j in seq_len(dim(ssq_regul_sel_beta)[1])[-1]) { for (m in seq_len(dim(ssq_regul_sel_beta)[2])[-1]) { for (nn in seq_len(dim(ssq_regul_sel_beta)[3])) { ssq_regul_sel_beta_cond[j - 1, m - 1, nn] <- ssq_regul_sel_beta[j, m, nn] < ssq_regul_sel_beta[j - 1, m, nn] } } } expect_true(all(ssq_regul_sel_beta_cond), info = tstsetup) # Prediction: # For the intercept-only model, the linear predictor consists only # of the intercept, so we expect no variation in `mu_jm_regul`: expect_true(all(ssq_regul_prd[, 1] <= 1e-12), info = tstsetup) # All other (i.e., not intercept-only) models: for (j in seq_len(dim(ssq_regul_prd)[1])[-1]) { for (m in seq_len(dim(ssq_regul_prd)[2])[-1]) { expect_lt(ssq_regul_prd[!!j, !!m], ssq_regul_prd[j - 1, m]) } } } }) ## Penalty ---------------------------------------------------------------- test_that("`penalty` of invalid length fails", { skip_if_not(run_vs) tstsetups <- grep("\\.L1\\.", names(vss), value = TRUE) for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] formul_crr <- get_formul_from_fit(fits[[args_vs_i$tstsetup_fit]]) formul_crr <- rm_addresp(formul_crr) penal_possbl <- get_penal_possbl(formul_crr) len_penal <- length(penal_possbl) # The `penalty` objects to be tested: penal_tst <- list(rep(1, len_penal + 1), rep(1, len_penal - 1)) for (penal_crr in penal_tst) { expect_error( do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]], penalty = penal_crr), excl_nonargs(args_vs_i) )), paste0("^Incorrect length of penalty vector \\(should be ", len_penal, "\\)\\.$"), info = paste(tstsetup, which(sapply(penal_tst, identical, penal_crr)), sep = "__") ) } } }) test_that("for forward search, `penalty` has no effect", { skip_if_not(run_vs) penal_tst <- 2 tstsetups <- grep("\\.L1\\.", names(vss), value = TRUE, invert = TRUE) # To save time: if (!run_more) { tstsetups <- head(tstsetups, 1) } for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: vs_penal <- suppressWarnings(do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]], penalty = penal_tst), excl_nonargs(args_vs_i) ))) vs_penal$args_search["penalty"] <- list(NULL) expect_equal(vs_penal, vss[[tstsetup]], info = tstsetup) } }) test_that("for L1 search, `penalty` has an expected effect", { skip_if_not(run_vs) tstsetups <- grep("\\.L1\\.", names(vss), value = TRUE) for (tstsetup in tstsetups) { args_vs_i <- args_vs[[tstsetup]] formul_crr <- get_formul_from_fit(fits[[args_vs_i$tstsetup_fit]]) formul_crr <- rm_addresp(formul_crr) penal_possbl <- get_penal_possbl(formul_crr) len_penal <- length(penal_possbl) penal_crr <- rep(1, len_penal) stopifnot(len_penal >= 3) # TODO: This test should be extended to also test the case where a # categorical predictor (more precisely, one of its dummy variables) or a # poly() term (more precisely, one of its lower-order terms resulting from # the expansion of the poly() term) gets zero or infinite penalty. For now, # the following code ensures that no categorical predictors and no poly() # terms get zero or infinite penalty. idx_cat <- grep("xca\\.", penal_possbl) idx_poly <- grep("poly[m]*\\(", penal_possbl) # Two predictors without cost: idx_penal_0 <- head(setdiff(seq_along(penal_crr), c(idx_cat, idx_poly)), 2) stopifnot(length(idx_penal_0) == 2) # One predictor with infinite penalty: idx_penal_Inf <- head(setdiff(seq_along(penal_crr), c(idx_penal_0, idx_cat, idx_poly)), 1) stopifnot(length(idx_penal_Inf) == 1) penal_crr[idx_penal_0] <- 0 penal_crr[idx_penal_Inf] <- Inf vs_penal <- do.call(varsel, c( list(object = refmods[[args_vs_i$tstsetup_ref]], penalty = penal_crr), excl_nonargs(args_vs_i, nms_excl_add = "nterms_max") )) nterms_max_crr <- count_terms_in_formula(formul_crr) - 1L vsel_tester( vs_penal, refmod_expected = refmods[[args_vs_i$tstsetup_ref]], prd_trms_len_expected = nterms_max_crr, method_expected = "L1", penalty_expected = penal_crr, info_str = tstsetup ) # Check that the variables with no cost are selected first and the ones # with infinite penalty last: prd_trms_penal <- vs_penal$predictor_ranking prd_trms_penal <- sub("(I\\(.*as\\.logical\\(.*\\)\\))", "\\1TRUE", prd_trms_penal) expect_identical(prd_trms_penal[seq_along(idx_penal_0)], penal_possbl[idx_penal_0], info = tstsetup) expect_identical(rev(prd_trms_penal)[seq_along(idx_penal_Inf)], rev(penal_possbl[idx_penal_Inf]), info = tstsetup) } }) ## L1 search and interactions --------------------------------------------- test_that("L1 search handles three-way (second-order) interactions correctly", { skip_if_not(run_vs) skip_if_not_installed("rstanarm") warn_L1_ia_orig <- options(projpred.warn_L1_interactions = TRUE) main_terms_in_ia <- c("xca.2", "xco.3", "xco.1") all_ias_split <- lapply(seq_along(main_terms_in_ia), combn, x = main_terms_in_ia, simplify = FALSE) all_ias <- unlist(lapply(all_ias_split, function(ia_split) { lapply(ia_split, all_ia_perms, is_split = TRUE) })) trms_universe_split_bu <- trms_universe_split trms_universe_split <<- union(trms_universe_split, all_ias) tstsetup <- head(grep("^rstanarm\\.glm\\.", names(fits), value = TRUE), 1) args_fit_i <- args_fit[[tstsetup]] stopifnot(!(args_fit_i$pkg_nm == "rstanarm" && args_fit_i$fam_nm == "cumul")) fit_fun_nm <- get_fit_fun_nm(args_fit_i) args_fit_i$formula <- update(args_fit_i$formula, . ~ . + xca.2 * xco.3 * xco.1) fit <- suppressWarnings(do.call( get(fit_fun_nm, asNamespace(args_fit_i$pkg_nm)), excl_nonargs(args_fit_i) )) args_ref_i <- args_ref[[paste0(tstsetup, ".trad")]] refmod <- do.call(get_refmodel, c( list(object = fit), excl_nonargs(args_ref_i) )) args_vs_i <- args_vs[[paste0(tstsetup, ".trad.L1.default_search_trms")]] args_vs_i$refit_prj <- FALSE args_vs_i$nterms_max <- NULL expect_warning( vs <- do.call(varsel, c( list(object = refmod), excl_nonargs(args_vs_i) )), "was selected before all.+lower-order interaction terms have been selected", info = tstsetup ) vsel_tester( vs, refmod_expected = refmod, prd_trms_len_expected = count_terms_in_formula(refmod$formula) - 1L, method_expected = "L1", refit_prj_expected = FALSE, ### Testing for non-increasing element `ce` (for increasing model size) ### doesn't make sense if the ranking of predictors involved in interactions ### has been changed, so we choose a higher `extra_tol` than by default: extra_tol = 1.2, ### info_str = tstsetup ) rk <- ranking(vs)[["fulldata"]] expect_true( all(sapply(grep(":", rk), function(ia_idx) { main_terms_in_ia <- strsplit(rk[ia_idx], ":")[[1]] all_ias_split <- lapply(seq_len(length(main_terms_in_ia) - 1L), combn, x = main_terms_in_ia, simplify = FALSE) ias_lower <- unlist(lapply(all_ias_split, function(ia_split) { lapply(ia_split, all_ia_perms, is_split = TRUE) })) return(all(which(rk %in% ias_lower) < ia_idx)) })), info = tstsetup ) trms_universe_split <<- trms_universe_split_bu options(warn_L1_ia_orig) }) ## search_terms ----------------------------------------------------------- test_that(paste( "including all terms in `search_terms` gives the same results as the default", "`search_terms`" ), { skip_if_not(run_vs) tstsetups <- grep("\\.alltrms", names(vss), value = TRUE) for (tstsetup in tstsetups) { tstsetup_default <- sub("\\.alltrms", "\\.default_search_trms", tstsetup) if (!tstsetup_default %in% names(vss)) next vs_search_terms <- vss[[tstsetup]] vs_search_terms$args_search["search_terms"] <- list(NULL) expect_identical(vs_search_terms, vss[[tstsetup_default]], info = tstsetup) } }) test_that(paste( "forcing the inclusion of a term in the candidate models via `search_terms`", "works as expected" ), { skip_if_not(run_vs) tstsetups <- grep("\\.fixed", names(vss), value = TRUE) for (tstsetup in tstsetups) { # In principle, `search_trms_tst$fixed$search_terms[1]` could be used # instead of `"xco.1"`, but that would seem like the forced term always has # to come first in `search_terms` (which is not the case): expect_identical(vss[[tstsetup]]$predictor_ranking[1], "xco.1", info = tstsetup) } }) test_that(paste( "forcing the exclusion of a term in the candidate models via `search_terms`", "works as expected" ), { skip_if_not(run_vs) tstsetups <- grep("\\.excluded", names(vss), value = TRUE) for (tstsetup in tstsetups) { expect_false("xco.1" %in% vss[[tstsetup]]$predictor_ranking, info = tstsetup) } }) test_that(paste( "forcing the skipping of a model size via `search_terms` works as expected" ), { skip_if_not(run_vs) tstsetups <- grep("\\.empty_size", names(vss), value = TRUE) for (tstsetup in tstsetups) { rk_fulldata_out <- vss[[tstsetup]]$predictor_ranking expect_true(grepl("\\+", rk_fulldata_out[1]) && !any(grepl("\\+", rk_fulldata_out[-1])), info = tstsetup) } }) ## varsel.vsel() ---------------------------------------------------------- test_that("varsel.vsel() works for `vsel` objects from varsel()", { skip_if_not(run_vs) tstsetups <- names(vss) if (!run_more) { tstsetups <- head(tstsetups, 1) } for (tstsetup in tstsetups) { vs_eval <- varsel(vss[[tstsetup]], refit_prj = FALSE, verbose = FALSE, seed = seed2_tst) tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward" extra_tol_crr <- 1.1 if (meth_exp_crr == "L1" && any(grepl(":", ranking(vs_eval)[["fulldata"]]))) { ### Testing for non-increasing element `ce` (for increasing model size) ### doesn't make sense if the ranking of predictors involved in ### interactions has been changed, so we choose a higher `extra_tol`: extra_tol_crr <- 1.2 ### } vsel_tester( vs_eval, refmod_expected = refmods[[tstsetup_ref]], prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, refit_prj_expected = FALSE, search_terms_expected = args_vs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_vs[[tstsetup]]$search_terms) && all(grepl("\\+", args_vs[[tstsetup]]$search_terms)), search_control_expected = args_vs[[tstsetup]][c("avoid.increase")], extra_tol = extra_tol_crr, info_str = tstsetup ) } }) test_that("varsel.vsel() works for `vsel` objects from cv_varsel()", { skip_if_not(run_vs) skip_if_not(run_cvvs) tstsetup_counter <- 0L for (tstsetup in names(cvvss)) { if (!run_more && tstsetup_counter > 0L) { next } else if (run_more && tstsetup_counter > 0L) { refit_prj_crr <- TRUE nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L } else { refit_prj_crr <- FALSE nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred } fam_crr <- args_cvvs[[tstsetup]]$fam_nm prj_crr <- args_cvvs[[tstsetup]]$prj_nm # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: vs_eval <- suppressWarnings(varsel( cvvss[[tstsetup]], refit_prj = refit_prj_crr, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" vsel_tester( vs_eval, refmod_expected = refmods[[tstsetup_ref]], prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, refit_prj_expected = refit_prj_crr, nprjdraws_eval_expected = if (!refit_prj_crr && meth_exp_crr == "L1") { 1L } else if (!refit_prj_crr) { nclusters_tst } else { nclusters_pred_crr }, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) tstsetup_counter <- tstsetup_counter + 1L } }) # cv_varsel() ------------------------------------------------------------- context("cv_varsel()") test_that(paste( "`object` of class `refmodel` and arguments `method`, `cv_method`,", "`nterms_max`, `nclusters`, and `nclusters_pred` work" ), { skip_if_not(run_cvvs) for (tstsetup in names(cvvss)) { mod_crr <- args_cvvs[[tstsetup]]$mod_nm fam_crr <- args_cvvs[[tstsetup]]$fam_nm prj_crr <- args_cvvs[[tstsetup]]$prj_nm meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" vsel_tester( cvvss[[tstsetup]], with_cv = TRUE, refmod_expected = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]], cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cvfitss[[args_cvvs[[tstsetup]]$tstsetup_ref]] } else { refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]]$cvfits }, prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = args_cvvs[[tstsetup]]$cv_method, nloo_expected = args_cvvs[[tstsetup]]$nloo, valsearch_expected = args_cvvs[[tstsetup]]$validate_search, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) test_that("invalid `object` fails", { expect_error(cv_varsel(rnorm(5)), "^no applicable method for") }) test_that("invalid `method` fails", { for (tstsetup in names(refmods)) { expect_error(cv_varsel(refmods[[tstsetup]], method = "k-fold"), "^Unexpected value for argument `method`\\.$", info = tstsetup) if (args_ref[[tstsetup]]$mod_nm != "glm") { expect_error(cv_varsel(refmods[[tstsetup]], method = "L1"), paste("^L1 search is only supported for reference models", "without multilevel and without additive", "\\(\"smoothing\"\\) terms\\.$"), info = tstsetup) } if (args_ref[[tstsetup]]$mod_nm == "glm" && args_ref[[tstsetup]]$prj_nm == "augdat") { expect_error(cv_varsel(refmods[[tstsetup]], method = "L1"), paste("^Currently, the augmented-data projection may not be", "combined with an L1 search\\.$"), info = tstsetup) } } }) test_that("invalid `cv_method` fails", { for (tstsetup in names(refmods)) { expect_error(cv_varsel(refmods[[tstsetup]], cv_method = "k-fold"), "^Unknown `cv_method`\\.$", info = tstsetup) } }) test_that("`seed` works (and restores the RNG state afterwards)", { skip_if_not(run_cvvs) # To save time: tstsetups <- union( grep("\\.glm\\.gauss.*\\.default_search_trms", names(cvvss), value = TRUE), # Important for testing get_refmodel.brmsfit()'s internal `kfold_seed` (and # also `refprd_seed` if we are lucky and get a fold which separates out at # least one group): grep("^brms\\.(glmm|gamm)\\..*\\.kfold", names(cvvss), value = TRUE) ) for (tstsetup in tstsetups) { args_cvvs_i <- args_cvvs[[tstsetup]] cvvs_orig <- cvvss[[tstsetup]] rand_orig <- runif(1) # Just to advance `.Random.seed[2]`. .Random.seed_repr1 <- .Random.seed # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_repr <- suppressWarnings(do.call(cv_varsel, c( list(object = refmods[[args_cvvs_i$tstsetup_ref]], cvfits = if (identical(args_cvvs_i$cv_method, "kfold")) { cvfitss[[args_cvvs_i$tstsetup_ref]] } else { refmods[[args_cvvs_i$tstsetup_ref]]$cvfits # should be `NULL` }), excl_nonargs(args_cvvs_i) ))) .Random.seed_repr2 <- .Random.seed rand_new <- runif(1) # Just to advance `.Random.seed[2]`. # Expected equality: expect_equal(cvvs_repr, cvvs_orig, info = tstsetup) expect_equal(.Random.seed_repr2, .Random.seed_repr1, info = tstsetup) # Expected inequality: expect_false(isTRUE(all.equal(rand_new, rand_orig)), info = tstsetup) } }) ## refit_prj -------------------------------------------------------------- test_that("`refit_prj` works", { skip_if_not(run_cvvs) if (run_more) { tstsetups <- names(cvvss) } else { tstsetups <- head(grep("\\.glm\\.", names(cvvss), value = TRUE), 1) } for (tstsetup in tstsetups) { args_cvvs_i <- args_cvvs[[tstsetup]] if (identical(args_cvvs_i$cv_method, "kfold") && isFALSE(args_cvvs_i$validate_search)) { # K-fold CV with `validate_search = FALSE` does not allow to specify # `refit_prj = FALSE`: next } args_cvvs_i$refit_prj <- FALSE cvvs_reuse <- suppressWarnings(do.call(cv_varsel, c( list(object = refmods[[args_cvvs_i$tstsetup_ref]], cvfits = if (identical(args_cvvs_i$cv_method, "kfold")) { cvfitss[[args_cvvs_i$tstsetup_ref]] } else { refmods[[args_cvvs_i$tstsetup_ref]]$cvfits # should be `NULL` }), excl_nonargs(args_cvvs_i) ))) mod_crr <- args_cvvs_i$mod_nm fam_crr <- args_cvvs_i$fam_nm prj_crr <- args_cvvs_i$prj_nm meth_exp_crr <- args_cvvs_i$method %||% "forward" vsel_tester( cvvs_reuse, with_cv = TRUE, refmod_expected = refmods[[args_cvvs_i$tstsetup_ref]], cvfits_expected = if (identical(args_cvvs_i$cv_method, "kfold")) { cvfitss[[args_cvvs_i$tstsetup_ref]] } else { refmods[[args_cvvs_i$tstsetup_ref]]$cvfits }, prd_trms_len_expected = args_cvvs_i$nterms_max, method_expected = meth_exp_crr, refit_prj_expected = FALSE, cv_method_expected = args_cvvs_i$cv_method, nloo_expected = args_cvvs_i$nloo, valsearch_expected = args_cvvs_i$validate_search, search_terms_expected = args_cvvs_i$search_terms, search_trms_empty_size = length(args_cvvs_i$search_terms) && all(grepl("\\+", args_cvvs_i$search_terms)), search_control_expected = args_cvvs_i[c("avoid.increase")], info_str = tstsetup ) } }) ## Message when cutting off the search ------------------------------------ if (run_more) { test_that("the message when cutting off the search is thrown correctly", { skip_if_not(run_vs) skip_if_not_installed("rstanarm") mssg_cut_search_orig <- options(projpred.mssg_cut_search = NULL) dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x) stopifnot(sum(grepl("^X", names(dat_gauss))) > 19) fit_exceed <- suppressWarnings(rstanarm::stan_glm( y ~ ., family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1, iter = 500, refresh = 0, seed = 9876 )) fit_not_exceed <- suppressWarnings(rstanarm::stan_glm( y ~ . - X20, family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1, iter = 500, refresh = 0, seed = 9876 )) for (nterms_max_i in list(NULL, 0, 20)) { if (is.null(nterms_max_i)) { mssg_expected <- "Cutting off the search at size 19\\." } else { mssg_expected <- NA } expect_message( vs_tmp <- suppressWarnings(cv_varsel( fit_exceed, validate_search = FALSE, method = "forward", nterms_max = nterms_max_i, nclusters = 2, refit_prj = FALSE, seed = 5555, verbose = FALSE )), mssg_expected, info = paste("fit_exceed, nterms_max =", nterms_max_i %||% "NULL") ) expect_message( vs_tmp <- suppressWarnings(cv_varsel( fit_not_exceed, validate_search = FALSE, method = "forward", nterms_max = nterms_max_i, nclusters = 2, refit_prj = FALSE, seed = 5555, verbose = FALSE )), NA, info = paste("fit_not_exceed, nterms_max =", nterms_max_i %||% "NULL") ) } options(mssg_cut_search_orig) }) } ## nloo ------------------------------------------------------------------- test_that("invalid `nloo` fails", { skip_if_not(run_cvvs) tstsetups_nonkfold <- grep("\\.kfold", names(cvvss), value = TRUE, invert = TRUE) valsearch_arg <- lapply(args_cvvs[tstsetups_nonkfold], "[[", "validate_search") tstsetups_nonkfold <- tstsetups_nonkfold[!sapply(valsearch_arg, isFALSE)] skip_if(length(tstsetups_nonkfold) == 0) for (tstsetup in head(tstsetups_nonkfold, 1)) { args_cvvs_i <- args_cvvs[[tstsetup]] # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: expect_error( suppressWarnings(do.call(cv_varsel, c( list(object = refmods[[args_cvvs_i$tstsetup_ref]], nloo = -1), excl_nonargs(args_cvvs_i, nms_excl_add = "nloo") ))), "^nloo must be at least 1$", info = tstsetup ) } }) test_that(paste( "setting `nloo` at least as large as the number of observations doesn't", "change results" ), { skip_if_not(run_cvvs) nloo_tst <- nobsv + 1L tstsetups <- grep( "\\.glm\\.gauss\\..*\\.default_cvmeth\\.default_search_trms\\.default_nloo", names(cvvss), value = TRUE ) valsearch_arg <- lapply(args_cvvs[tstsetups], "[[", "validate_search") tstsetups <- tstsetups[!sapply(valsearch_arg, isFALSE)] skip_if(length(tstsetups) == 0) for (tstsetup in tstsetups) { args_cvvs_i <- args_cvvs[[tstsetup]] # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_nloo <- suppressWarnings(do.call(cv_varsel, c( list(object = refmods[[args_cvvs_i$tstsetup_ref]], nloo = nloo_tst), excl_nonargs(args_cvvs_i) ))) expect_equal(cvvs_nloo, cvvss[[tstsetup]], info = tstsetup) } }) test_that("setting `nloo` smaller than the number of observations works", { skip_if_not(run_cvvs) # Output elements of `vsel` objects that may be influenced by `nloo`: vsel_nms_nloo <- c("summaries", "summaries_fast", "predictor_ranking_cv", "nloo", "loo_inds", "ce") # In general, element `ce` is affected as well (because the PRNG state when # doing the clustering for the performance evaluation is different when `nloo` # is smaller than the number of observations compared to when `nloo` is equal # to the number of observations): vsel_nms_nloo_opt <- c("ce") # The setups that should be tested: tstsetups <- grep( "\\.default_cvmeth\\.default_search_trms\\.default_nloo", names(cvvss), value = TRUE ) valsearch_arg <- lapply(args_cvvs[tstsetups], "[[", "validate_search") tstsetups <- tstsetups[!sapply(valsearch_arg, isFALSE)] skip_if(length(tstsetups) == 0) for (tstsetup in tstsetups) { args_cvvs_i <- args_cvvs[[tstsetup]] tstsetup_nloo <- sub("\\.default_nloo", "\\.subsmpl", tstsetup) stopifnot(tstsetup_nloo %in% names(cvvss)) cvvs_nloo <- cvvss[[tstsetup_nloo]] # Expected equality for most elements with a few exceptions: vsel_nms_nloo_crr <- vsel_nms_nloo if (isFALSE(args_cvvs_i$validate_search)) { vsel_nms_nloo_crr <- setdiff(vsel_nms_nloo_crr, "predictor_ranking_cv") } expect_equal(cvvs_nloo[setdiff(vsel_nms, vsel_nms_nloo_crr)], cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_nloo_crr)], info = tstsetup) # Expected inequality for the exceptions (the elements from # `vsel_nms_nloo_opt` can be, but don't need to be differing): for (vsel_nm in setdiff(vsel_nms_nloo_crr, vsel_nms_nloo_opt)) { expect_false(isTRUE(all.equal(cvvs_nloo[[vsel_nm]], cvvss[[tstsetup]][[vsel_nm]])), info = paste(tstsetup, vsel_nm, sep = "__")) } } }) ## validate_search -------------------------------------------------------- test_that("`validate_search` works", { skip_if_not(run_cvvs) # Output elements of `vsel` objects that may be influenced by # `validate_search`: vsel_nms_valsearch <- c("validate_search", "summaries", "ce", "predictor_ranking_cv") # The setups that should be tested: tstsetups <- names(cvvss) if (!run_valsearch_always) { has_valsearch_true <- sapply(tstsetups, function(tstsetup_cvvs) { !isFALSE(args_cvvs[[tstsetup_cvvs]]$validate_search) && (args_cvvs[[tstsetup_cvvs]]$nloo %||% nobsv) == nobsv }) tstsetups <- tstsetups[has_valsearch_true] } suggsize_cond <- setNames(rep(NA, length(tstsetups)), nm = tstsetups) for (tstsetup in tstsetups) { args_cvvs_i <- args_cvvs[[tstsetup]] stopifnot(is.null(args_cvvs_i$validate_search) || isTRUE(args_cvvs_i$validate_search)) tstsetup_ref <- args_cvvs_i$tstsetup_ref mod_crr <- args_cvvs_i$mod_nm fam_crr <- args_cvvs_i$fam_nm prj_crr <- args_cvvs_i$prj_nm meth_exp_crr <- args_cvvs_i$method %||% "forward" # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_valsearch <- suppressWarnings(do.call(cv_varsel, c( list(object = refmods[[args_cvvs_i$tstsetup_ref]], validate_search = FALSE, cvfits = if (identical(args_cvvs_i$cv_method, "kfold")) { cvfitss[[args_cvvs_i$tstsetup_ref]] } else { refmods[[args_cvvs_i$tstsetup_ref]]$cvfits # should be `NULL` }), excl_nonargs(args_cvvs_i, nms_excl_add = "validate_search") ))) vsel_tester( cvvs_valsearch, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = if (identical(args_cvvs_i$cv_method, "kfold")) { cvfitss[[args_cvvs_i$tstsetup_ref]] } else { refmods[[args_cvvs_i$tstsetup_ref]]$cvfits }, prd_trms_len_expected = args_cvvs_i$nterms_max, method_expected = meth_exp_crr, cv_method_expected = args_cvvs_i$cv_method, valsearch_expected = FALSE, search_terms_expected = args_cvvs_i$search_terms, search_trms_empty_size = length(args_cvvs_i$search_terms) && all(grepl("\\+", args_cvvs_i$search_terms)), search_control_expected = args_cvvs_i[c("avoid.increase")], info_str = tstsetup ) # Expected equality for most elements with a few exceptions: vsel_nms_valsearch_crr <- vsel_nms_valsearch if (identical(args_cvvs_i$cv_method, "kfold")) { vsel_nms_valsearch_crr <- setdiff(vsel_nms_valsearch_crr, "ce") } expect_equal(cvvs_valsearch[setdiff(vsel_nms, vsel_nms_valsearch_crr)], cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_valsearch_crr)], info = tstsetup) expect_identical(cvvs_valsearch$summaries$ref, cvvss[[tstsetup]]$summaries$ref, info = tstsetup) # Expected inequality for the exceptions: for (vsel_nm in vsel_nms_valsearch_crr) { expect_false(isTRUE(all.equal(cvvs_valsearch[[vsel_nm]], cvvss[[tstsetup]][[vsel_nm]])), info = paste(tstsetup, vsel_nm, sep = "__")) } # Check the expected inequalities more specifically: # Without a validated search, we expect increased LPPDs (and consequently # also an increased ELPD) in the submodels (due to overfitting): tol_crr <- 2e-1 # Allow for just a small proportion of extreme differences: prop_as_expected <- if (identical(args_cvvs_i$cv_method, "kfold")) { 0.8 } else { 0.9 } for (j in seq_along(cvvs_valsearch$summaries$sub)) { expect_true(mean(cvvs_valsearch$summaries$sub[[j]]$lppd >= cvvss[[tstsetup]]$summaries$sub[[j]]$lppd - tol_crr) >= prop_as_expected, info = paste(tstsetup, j, sep = "__")) } # Again allow for just a small proportion of extreme differences: prop_sizes_as_expected <- if (identical(args_cvvs_i$cv_method, "kfold")) { 0.5 } else { 1 } expect_true( mean( summary(cvvs_valsearch)$perf_sub$elpd >= summary(cvvss[[tstsetup]])$perf_sub$elpd ) >= prop_sizes_as_expected, info = tstsetup ) # Without a validated search, we expect overfitting in the suggested size: sgg_size_valsearch <- suggest_size(cvvs_valsearch, warnings = FALSE) sgg_size <- suggest_size(cvvss[[tstsetup]], warnings = FALSE) if (!is.na(sgg_size_valsearch) & !is.na(sgg_size)) { suggsize_cond[tstsetup] <- sgg_size_valsearch >= sgg_size } } if (!all(is.na(suggsize_cond))) { expect_true(mean(!suggsize_cond, na.rm = TRUE) <= 0.25) } }) ## Arguments specific to K-fold CV ---------------------------------------- test_that("invalid `K` fails", { skip_if_not(length(fits) > 0) expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = 1), "^`K` must be at least 2\\.$") expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = 1000), "^`K` cannot exceed the number of observations\\.$") expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = c(4, 9)), "^`K` must be a single integer value\\.$") expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = "a"), "^`K` must be a single integer value\\.$") expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = dat), "^`K` must be a single integer value\\.$") }) test_that(paste( "`cvfits` included in the `refmodel` object works for rstanarm reference", "models" ), { skip_if_not(run_cvvs) tstsetups <- grep("^rstanarm\\..*\\.kfold", names(cvvss), value = TRUE) if (!run_cvfits_all) { tstsetups_tmp <- head(grep("\\.glmm\\.", tstsetups, value = TRUE), 1) if (length(tstsetups_tmp) == 0) { tstsetups_tmp <- head(tstsetups, 1) } tstsetups <- tstsetups_tmp } for (tstsetup in tstsetups) { args_cvvs_i <- args_cvvs[[tstsetup]] tstsetup_fit <- args_cvvs_i$tstsetup_fit mod_crr <- args_cvvs_i$mod_nm fam_crr <- args_cvvs_i$fam_nm prj_crr <- args_cvvs_i$prj_nm meth_exp_crr <- args_cvvs_i$method %||% "forward" fit_crr <- fits[[tstsetup_fit]] K_crr <- K_tst # Refit `K_crr` times (note: below, the seed for constructing `folds_vec` # had to be changed in some cases to avoid unfavorable PRNG situations, # leading to technical issues such as nonconvergence of the submodel fitter; # this is also tied to the value of `seed_tst`): if (grepl("\\.glmm\\.", tstsetup)) { # Perform a grouped K-fold CV to test an edge case where all observations # belonging to the same level of a variable with group-level effects are # in the same fold, so prediction is performed for new levels (see, e.g., # brms's GitHub issue #1286): if (exists(".Random.seed", envir = .GlobalEnv)) { rng_old <- get(".Random.seed", envir = .GlobalEnv) } # Make the construction of the CV folds reproducible: set.seed(seed2_tst * 3L) folds_vec <- loo::kfold_split_grouped(K = K_crr, x = dat$z.1) if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv) } else { folds_vec <- cv_folds(nobsv, K = K_crr, seed = seed2_tst) } # Additionally to suppressWarnings(), suppressMessages() could be used here # (but is not necessary since messages seem to be suppressed within # test_that()'s `code`); furthermore, try() is used because rstanarm # sometimes fails to refit: kfold_obj <- try(suppressWarnings(kfold(fit_crr, K = K_crr, folds = folds_vec, save_fits = TRUE, cores = 1)), silent = TRUE) if (inherits(kfold_obj, "try-error")) { message("Could not test `tstsetup = \"", tstsetup, "\"` in the rstanarm ", "`cvfits` test. Error message: \"", attr(kfold_obj, "condition")$message, "\"") next } kfold_obj <- structure(kfold_obj$fits[, "fit"], folds = folds_vec) # Create `refmodel` object with `cvfits`: refmod_crr <- do.call(get_refmodel, c( list(object = fit_crr, cvfits = kfold_obj), excl_nonargs(args_ref[[args_cvvs_i$tstsetup_ref]]) )) # Run cv_varsel(): # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_cvfits <- suppressWarnings(do.call(cv_varsel, c( list(object = refmod_crr), excl_nonargs(args_cvvs_i, nms_excl_add = "K") ))) # Checks: vsel_tester( cvvs_cvfits, with_cv = TRUE, refmod_expected = refmod_crr, cvfits_expected = kfold_obj, prd_trms_len_expected = args_cvvs_i$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "kfold", valsearch_expected = args_cvvs_i$validate_search, search_terms_expected = args_cvvs_i$search_terms, search_trms_empty_size = length(args_cvvs_i$search_terms) && all(grepl("\\+", args_cvvs_i$search_terms)), info_str = tstsetup ) # Expected equality for most elements with a few exceptions: # TODO: Currently, `check.environment = FALSE` is needed. The reason is # probably that in the divergence minimizers, the projpred-extended family # is passed to argument `family` of the external model fitting functions # like lme4::glmer(). This should be fixed and then `check.environment = # FALSE` should be removed. expect_equal(cvvs_cvfits[setdiff(vsel_nms, vsel_nms_cvfits)], cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_cvfits)], check.environment = FALSE, info = tstsetup) # Expected inequality for the exceptions (the elements from # `vsel_nms_cvfits_opt` can be, but don't need to be differing): for (vsel_nm in setdiff(vsel_nms_cvfits, vsel_nms_cvfits_opt)) { # Suppress warning # "longer object length is not a multiple of shorter object length": suppressWarnings( expect_false(isTRUE(all.equal(cvvs_cvfits[[vsel_nm]], cvvss[[tstsetup]][[vsel_nm]])), info = paste(tstsetup, vsel_nm, sep = "__")) ) } } }) test_that(paste( "`cvfits` included in the `refmodel` object works for brms reference models" ), { skip_if_not(run_cvvs) skip_if_not(packageVersion("brms") >= "2.16.4") tstsetups <- grep("^brms\\..*\\.kfold", names(cvvss), value = TRUE) if (!run_cvfits_all) { tstsetups_tmp <- head(grep("\\.glmm\\.", tstsetups, value = TRUE), 1) if (length(tstsetups_tmp) == 0) { tstsetups_tmp <- head(tstsetups, 1) } tstsetups <- tstsetups_tmp } for (tstsetup in tstsetups) { args_cvvs_i <- args_cvvs[[tstsetup]] tstsetup_fit <- args_cvvs_i$tstsetup_fit mod_crr <- args_cvvs_i$mod_nm fam_crr <- args_cvvs_i$fam_nm prj_crr <- args_cvvs_i$prj_nm meth_exp_crr <- args_cvvs_i$method %||% "forward" fit_crr <- fits[[tstsetup_fit]] K_crr <- K_tst # Refit `K_crr` times (note: below, the seed for constructing `folds_vec` # had to be changed in some cases to avoid unfavorable PRNG situations, # leading to technical issues such as nonconvergence of the submodel fitter; # this is also tied to the value of `seed_tst`): if (grepl("\\.glmm\\.", tstsetup)) { # Perform a grouped K-fold CV to test an edge case where all observations # belonging to the same level of a variable with group-level effects are # in the same fold, so prediction is performed for new levels (see, e.g., # brms's GitHub issue #1286): if (exists(".Random.seed", envir = .GlobalEnv)) { rng_old <- get(".Random.seed", envir = .GlobalEnv) } # Make the construction of the CV folds reproducible: set.seed(seed2_tst + 10L) folds_vec <- loo::kfold_split_grouped(K = K_crr, x = dat$z.1) if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv) } else if (grepl("\\.gam\\.", tstsetup)) { folds_vec <- cv_folds(nobsv, K = K_crr, seed = seed2_tst + 10L) } else { folds_vec <- cv_folds(nobsv, K = K_crr, seed = seed2_tst) } kfold_obj <- kfold(fit_crr, K = K_crr, folds = folds_vec, save_fits = TRUE, seed = seed_fit) kfold_obj <- structure(kfold_obj$fits[, "fit"], folds = folds_vec) # Create `refmodel` object with `cvfits`: refmod_crr <- do.call(get_refmodel, c( list(object = fit_crr, cvfits = kfold_obj), excl_nonargs(args_ref[[args_cvvs_i$tstsetup_ref]]) )) # Run cv_varsel(): cvvs_cvfits <- try( do.call(cv_varsel, c( list(object = refmod_crr), excl_nonargs(args_cvvs_i, nms_excl_add = "K") )), silent = TRUE ) if (inherits(cvvs_cvfits, "try-error")) { message("Failure for `tstsetup = \"", tstsetup, "\"` in the brms ", "`cvfits` test. Error message: \"", attr(cvvs_cvfits, "condition")$message, "\"") # Check that this is a "pwrssUpdate" failure in lme4, so for solving this, # we would either need to tweak the lme4 tuning parameters manually (via # `...`) or change the data-generating mechanism here in the tests (to # obtain less extreme or more data): expect_true(grepl("pwrssUpdate", attr(cvvs_cvfits, "condition")$message), info = tstsetup) # Furthermore, this should only occur in the `run_more = TRUE` case, so it # can be skipped (because there are enough other `tstsetups` for which # this works): expect_true(run_more, info = tstsetup) next } # Test the reproducibility of ref_predfun() when applied to new observations # (should be ensured by get_refmodel.brmsfit()'s internal `refprd_seed`): runif(1) cvvs_cvfits_repr <- do.call(cv_varsel, c( list(object = refmod_crr), excl_nonargs(args_cvvs_i, nms_excl_add = "K") )) # Checks: expect_equal(cvvs_cvfits, cvvs_cvfits_repr, info = tstsetup) vsel_tester( cvvs_cvfits, with_cv = TRUE, refmod_expected = refmod_crr, cvfits_expected = kfold_obj, prd_trms_len_expected = args_cvvs_i$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "kfold", valsearch_expected = args_cvvs_i$validate_search, search_terms_expected = args_cvvs_i$search_terms, search_trms_empty_size = length(args_cvvs_i$search_terms) && all(grepl("\\+", args_cvvs_i$search_terms)), search_control_expected = args_cvvs_i[c("avoid.increase")], info_str = tstsetup ) # Expected equality for most elements with a few exceptions: # TODO: Currently, `check.environment = FALSE` is needed. The reason is # probably that in the divergence minimizers, the projpred-extended family # is passed to argument `family` of the external model fitting functions # like lme4::glmer(). This should be fixed and then `check.environment = # FALSE` should be removed. expect_equal(cvvs_cvfits[setdiff(vsel_nms, vsel_nms_cvfits)], cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_cvfits)], check.environment = FALSE, info = tstsetup) # Expected inequality for the exceptions (the elements from # `vsel_nms_cvfits_opt` can be, but don't need to be differing): for (vsel_nm in setdiff(vsel_nms_cvfits, vsel_nms_cvfits_opt)) { # Suppress warning # "longer object length is not a multiple of shorter object length": suppressWarnings( expect_false(isTRUE(all.equal(cvvs_cvfits[[vsel_nm]], cvvss[[tstsetup]][[vsel_nm]])), info = paste(tstsetup, vsel_nm, sep = "__")) ) } } }) test_that("`cvfun` included in the `refmodel` object works", { skip_if_not(run_cvvs) tstsetups <- c( head(grep("^rstanarm\\..*\\.kfold", names(cvvss), value = TRUE), 1), head(grep("^brms\\..*\\.kfold", names(cvvss), value = TRUE), 1) ) for (tstsetup in tstsetups) { cvvs_cvfun <- do.call(cv_varsel, c( list(object = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]], K = K_tst), excl_nonargs(args_cvvs[[tstsetup]]) )) vsel_tester( cvvs_cvfun, with_cv = TRUE, refmod_expected = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]], cvfits_expected = NULL, prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = args_cvvs[[tstsetup]]$method %||% "forward", cv_method_expected = "kfold", valsearch_expected = args_cvvs[[tstsetup]]$validate_search, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) ## cv_varsel.vsel() ------------------------------------------------------- test_that(paste( "cv_varsel.vsel() (in particular, re-using `cv_method` and", "`validate_search`) works for `vsel` objects from cv_varsel()" ), { skip_if_not(run_cvvs) tstsetups <- names(cvvss) if (!run_more) { tstsetups <- head(tstsetups, 2) } for (tstsetup in tstsetups) { refit_prj_crr <- !identical(args_cvvs[[tstsetup]]$validate_search, FALSE) || identical(args_cvvs[[tstsetup]]$cv_method, "kfold") nclusters_pred_crr <- nclusters_pred_tst - if (refit_prj_crr) 1L else 0L # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], refit_prj = refit_prj_crr, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cvfitss[[tstsetup_ref]] } else { refmods[[tstsetup_ref]]$cvfits }, prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = args_cvvs[[tstsetup]]$cv_method, nloo_expected = args_cvvs[[tstsetup]]$nloo, valsearch_expected = args_cvvs[[tstsetup]]$validate_search, refit_prj_expected = refit_prj_crr, nprjdraws_eval_expected = if (!refit_prj_crr && meth_exp_crr == "L1") { 1L } else if (!refit_prj_crr) { nclusters_tst } else { nclusters_pred_crr }, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) test_that(paste( "cv_varsel.vsel() with PSIS-LOO CV and `validate_search = FALSE` works for", "`vsel` objects from varsel()" ), { skip_if_not(run_vs) skip_if_not(run_cvvs) tstsetup_counter <- 0L for (tstsetup in names(vss)) { if (!run_more && tstsetup_counter > 0L) { next } else if (run_more && tstsetup_counter > 0L) { refit_prj_crr <- TRUE nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L } else { refit_prj_crr <- FALSE nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred } # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_eval <- suppressWarnings(cv_varsel( vss[[tstsetup]], validate_search = FALSE, refit_prj = refit_prj_crr, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "LOO", valsearch_expected = FALSE, refit_prj_expected = refit_prj_crr, nprjdraws_eval_expected = if (!refit_prj_crr && meth_exp_crr == "L1") { 1L } else if (!refit_prj_crr) { nclusters_tst } else { nclusters_pred_crr }, search_terms_expected = args_vs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_vs[[tstsetup]]$search_terms) && all(grepl("\\+", args_vs[[tstsetup]]$search_terms)), search_control_expected = args_vs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) tstsetup_counter <- tstsetup_counter + 1L } }) test_that(paste( "cv_varsel.vsel() with K-fold CV and `validate_search = FALSE` works for", "`vsel` objects from varsel()" ), { skip_if_not(run_vs) skip_if_not(run_cvvs) tstsetup_counter <- 0L for (tstsetup in names(vss)) { tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref if (grepl("\\.with_wobs", tstsetup_ref) && args_vs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't support observation # weights: next } else if (args_vs[[tstsetup]]$fam_nm == "cumul" && args_vs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't work for # rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564: next } else if (!tstsetup_ref %in% names(cvfitss)) { # We would need to call run_cvfun() for this reference model object: next } if (!run_more && tstsetup_counter > 0L) { next } else if (run_more && tstsetup_counter > 0L) { nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L } else { nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred } # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_eval <- suppressWarnings(cv_varsel( vss[[tstsetup]], cv_method = "kfold", cvfits = cvfitss[[tstsetup_ref]], validate_search = FALSE, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = cvfitss[[tstsetup_ref]], prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "kfold", nloo_expected = NULL, valsearch_expected = FALSE, nprjdraws_eval_expected = nclusters_pred_crr, search_terms_expected = args_vs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_vs[[tstsetup]]$search_terms) && all(grepl("\\+", args_vs[[tstsetup]]$search_terms)), search_control_expected = args_vs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) tstsetup_counter <- tstsetup_counter + 1L } }) test_that(paste( "cv_varsel.vsel() with PSIS-LOO CV and `validate_search = FALSE` works for", "`vsel` objects from cv_varsel() created with `validate_search = TRUE`" ), { skip_if_not(run_cvvs) tstsetups <- names(cvvss) if (!run_more) { tstsetups <- head(tstsetups, 1) } for (tstsetup in tstsetups) { if (isFALSE(args_cvvs[[tstsetup]]$validate_search)) { next } # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = "LOO", validate_search = FALSE, refit_prj = FALSE, verbose = FALSE, seed = seed2_tst )) } else { cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], nloo = nobsv, validate_search = FALSE, refit_prj = FALSE, verbose = FALSE, seed = seed2_tst )) } tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" extra_tol_crr <- 1.1 if (meth_exp_crr == "L1" && any(grepl(":", ranking(cvvs_eval)[["fulldata"]]))) { ### Testing for non-increasing element `ce` (for increasing model size) ### doesn't make sense if the ranking of predictors involved in ### interactions has been changed, so we choose a higher `extra_tol`: extra_tol_crr <- 1.2 ### } vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cvfitss[[tstsetup_ref]] } else { refmods[[tstsetup_ref]]$cvfits }, prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "LOO", valsearch_expected = FALSE, refit_prj_expected = FALSE, K_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { K_tst } else { NULL }, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], extra_tol = extra_tol_crr, info_str = tstsetup ) } }) test_that(paste( "cv_varsel.vsel() with K-fold CV and `validate_search = FALSE` works for", "`vsel` objects from cv_varsel() created with `validate_search = TRUE`" ), { skip_if_not(run_cvvs) tstsetups <- names(cvvss) if (!run_more) { tstsetups <- head(tstsetups, 11) } for (tstsetup in tstsetups) { tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref if (isFALSE(args_cvvs[[tstsetup]]$validate_search)) { next } else if (grepl("\\.with_wobs", tstsetup_ref) && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't support observation # weights: next } else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't work for # rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564: next } nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], validate_search = FALSE, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) } else { cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = "kfold", K = K_tst, cvfits = cvfitss[[tstsetup_ref]], validate_search = FALSE, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) } meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = cvfitss[[tstsetup_ref]], prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "kfold", valsearch_expected = FALSE, nprjdraws_eval_expected = nclusters_pred_crr, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) test_that(paste( "switching the CV method in cv_varsel.vsel() works for `vsel` objects from", "cv_varsel() created with `validate_search = FALSE`" ), { skip_if_not(run_cvvs) tstsetups <- names(cvvss) if (!run_more) { tstsetups <- head(tstsetups, 3) } for (tstsetup in tstsetups) { tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref if (!isFALSE(args_cvvs[[tstsetup]]$validate_search)) { next } else if (grepl("\\.with_wobs", tstsetup_ref) && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't support observation # weights: next } else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't work for # rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564: next } nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cv_meth_crr <- "LOO" cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = cv_meth_crr, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) } else { cv_meth_crr <- "kfold" cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = cv_meth_crr, K = K_tst, cvfits = cvfitss[[tstsetup_ref]], nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) } meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = cvfitss[[tstsetup_ref]], prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = cv_meth_crr, valsearch_expected = FALSE, K_expected = K_tst, nprjdraws_eval_expected = nclusters_pred_crr, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) test_that(paste( "switching the CV method in cv_varsel.vsel() works for `vsel` objects from", "cv_varsel() created with `validate_search = TRUE`" ), { skip_if_not(run_cvvs) tstsetups <- names(cvvss) if (!run_more) { tstsetups <- head(tstsetups, 11) } for (tstsetup in tstsetups) { tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref if (isFALSE(args_cvvs[[tstsetup]]$validate_search)) { next } else if (grepl("\\.with_wobs", tstsetup_ref) && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't support observation # weights: next } else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't work for # rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564: next } nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cv_meth_crr <- "LOO" nloo_crr <- nloo_tst[["subsmpl"]][["nloo"]] cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = cv_meth_crr, nloo = nloo_crr, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) } else { cv_meth_crr <- "kfold" nloo_crr <- NULL cvvs_eval <- suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = cv_meth_crr, K = K_tst, cvfits = cvfitss[[tstsetup_ref]], nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )) } meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = cvfitss[[tstsetup_ref]], prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = cv_meth_crr, nloo_expected = nloo_crr, valsearch_expected = TRUE, K_expected = K_tst, nprjdraws_eval_expected = nclusters_pred_crr, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) test_that(paste( "cv_varsel.vsel() with PSIS-LOO CV and `validate_search = TRUE` works for", "`vsel` objects from varsel()" ), { skip_if_not(run_vs) skip_if_not(run_cvvs) tstsetup_counter <- 0L for (tstsetup in names(vss)) { if ((!run_more && tstsetup_counter > 0L) || (run_more && !args_vs[[tstsetup]]$mod_nm %in% c("glm", "gam"))) { next } else if (run_more && tstsetup_counter > 0L) { nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L } else { nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred } nloo_crr <- nloo_tst[["subsmpl"]][["nloo"]] # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_eval <- try( suppressWarnings(cv_varsel( vss[[tstsetup]], nloo = nloo_crr, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )), silent = TRUE ) if (inherits(cvvs_eval, "try-error")) { message("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ", "test. Error message: \"", attr(cvvs_eval, "condition")$message, "\"") # Check that this is a "pwrssUpdate" failure in lme4, so for solving this, # we would either need to tweak the lme4 tuning parameters manually (via # `...`) or change the data-generating mechanism here in the tests (to # obtain less extreme or more data): expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message), info = tstsetup) # Furthermore, this should only occur in the `run_more = TRUE` case, so it # can be skipped (because there are enough other `tstsetups` for which # this works): expect_true(run_more, info = tstsetup) next } tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "LOO", nloo_expected = nloo_crr, valsearch_expected = TRUE, nprjdraws_eval_expected = nclusters_pred_crr, search_terms_expected = args_vs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_vs[[tstsetup]]$search_terms) && all(grepl("\\+", args_vs[[tstsetup]]$search_terms)), search_control_expected = args_vs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) tstsetup_counter <- tstsetup_counter + 1L } }) test_that(paste( "cv_varsel.vsel() with K-fold CV and `validate_search = TRUE` works for", "`vsel` objects from varsel()" ), { skip_if_not(run_vs) skip_if_not(run_cvvs) tstsetup_counter <- 0L for (tstsetup in names(vss)) { tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref if (grepl("\\.with_wobs", tstsetup_ref) && args_vs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't support observation # weights: next } else if (args_vs[[tstsetup]]$fam_nm == "cumul" && args_vs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't work for # rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564: next } else if (!tstsetup_ref %in% names(cvfitss)) { # We would need to call run_cvfun() for this reference model object: next } if (!run_more && tstsetup_counter > 0L) { next } else if (run_more && tstsetup_counter > 0L) { nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L } else { nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred } # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: cvvs_eval <- try( suppressWarnings(cv_varsel( vss[[tstsetup]], cv_method = "kfold", cvfits = cvfitss[[tstsetup_ref]], nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )), silent = TRUE ) if (inherits(cvvs_eval, "try-error")) { message("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ", "test. Error message: \"", attr(cvvs_eval, "condition")$message, "\"") # Check that this is a "pwrssUpdate" failure in lme4, so for solving this, # we would either need to tweak the lme4 tuning parameters manually (via # `...`) or change the data-generating mechanism here in the tests (to # obtain less extreme or more data): expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message), info = tstsetup) # Furthermore, this should only occur in the `run_more = TRUE` case, so it # can be skipped (because there are enough other `tstsetups` for which # this works): expect_true(run_more, info = tstsetup) next } meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = cvfitss[[tstsetup_ref]], prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "kfold", valsearch_expected = TRUE, nprjdraws_eval_expected = nclusters_pred_crr, search_terms_expected = args_vs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_vs[[tstsetup]]$search_terms) && all(grepl("\\+", args_vs[[tstsetup]]$search_terms)), search_control_expected = args_vs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) tstsetup_counter <- tstsetup_counter + 1L } }) test_that(paste( "cv_varsel.vsel() with PSIS-LOO CV and `validate_search = TRUE` works for", "`vsel` objects from cv_varsel() created with `validate_search = FALSE`" ), { skip_if_not(run_cvvs) tstsetups <- names(cvvss) if (!run_more) { tstsetups <- head(tstsetups, 3) } for (tstsetup in tstsetups) { if (run_more && !args_cvvs[[tstsetup]]$mod_nm %in% c("glm", "gam")) { # Save some time: next } if (!isFALSE(args_cvvs[[tstsetup]]$validate_search)) { next } nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { nloo_crr <- nloo_tst[["subsmpl"]][["nloo"]] cvvs_eval <- try( suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = "LOO", nloo = nloo_crr, validate_search = TRUE, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )), silent = TRUE ) } else { nloo_crr <- cvvss[[tstsetup]][["nloo"]] cvvs_eval <- try( suppressWarnings(cv_varsel( cvvss[[tstsetup]], validate_search = TRUE, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )), silent = TRUE ) } if (inherits(cvvs_eval, "try-error")) { message("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ", "test. Error message: \"", attr(cvvs_eval, "condition")$message, "\"") # Check that this is a "pwrssUpdate" failure in lme4, so for solving this, # we would either need to tweak the lme4 tuning parameters manually (via # `...`) or change the data-generating mechanism here in the tests (to # obtain less extreme or more data): expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message), info = tstsetup) # Furthermore, this should only occur in the `run_more = TRUE` case, so it # can be skipped (because there are enough other `tstsetups` for which # this works): expect_true(run_more, info = tstsetup) next } tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" extra_tol_crr <- 1.1 if (meth_exp_crr == "L1" && any(grepl(":", ranking(cvvs_eval)[["fulldata"]]))) { ### Testing for non-increasing element `ce` (for increasing model size) ### doesn't make sense if the ranking of predictors involved in ### interactions has been changed, so we choose a higher `extra_tol`: extra_tol_crr <- 1.2 ### } vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cvfitss[[tstsetup_ref]] } else { refmods[[tstsetup_ref]]$cvfits }, prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "LOO", nloo_expected = nloo_crr, valsearch_expected = TRUE, nprjdraws_eval_expected = nclusters_pred_crr, K_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { K_tst } else { NULL }, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], extra_tol = extra_tol_crr, info_str = tstsetup ) } }) test_that(paste( "cv_varsel.vsel() with K-fold CV and `validate_search = TRUE` works for", "`vsel` objects from cv_varsel() created with `validate_search = FALSE`" ), { skip_if_not(run_cvvs) tstsetups <- names(cvvss) if (!run_more) { tstsetups <- head(tstsetups, 3) } for (tstsetup in tstsetups) { tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref if (!isFALSE(args_cvvs[[tstsetup]]$validate_search)) { next } else if (grepl("\\.with_wobs", tstsetup_ref) && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't support observation # weights: next } else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" && args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") { # Currently, rstanarm:::kfold.stanreg() doesn't work for # rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564: next } nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L # Use suppressWarnings() because test_that() somehow redirects stderr() and # so throws warnings that projpred wants to capture internally: if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) { cvvs_eval <- try( suppressWarnings(cv_varsel( cvvss[[tstsetup]], validate_search = TRUE, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )), silent = TRUE ) } else { cvvs_eval <- try( suppressWarnings(cv_varsel( cvvss[[tstsetup]], cv_method = "kfold", K = K_tst, cvfits = cvfitss[[tstsetup_ref]], validate_search = TRUE, nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst )), silent = TRUE ) } if (inherits(cvvs_eval, "try-error")) { message("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ", "test. Error message: \"", attr(cvvs_eval, "condition")$message, "\"") # Check that this is a "pwrssUpdate" failure in lme4, so for solving this, # we would either need to tweak the lme4 tuning parameters manually (via # `...`) or change the data-generating mechanism here in the tests (to # obtain less extreme or more data): expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message), info = tstsetup) # Furthermore, this should only occur in the `run_more = TRUE` case, so it # can be skipped (because there are enough other `tstsetups` for which # this works): expect_true(run_more, info = tstsetup) next } meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward" vsel_tester( cvvs_eval, with_cv = TRUE, refmod_expected = refmods[[tstsetup_ref]], cvfits_expected = cvfitss[[tstsetup_ref]], prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max, method_expected = meth_exp_crr, cv_method_expected = "kfold", valsearch_expected = TRUE, nprjdraws_eval_expected = nclusters_pred_crr, search_terms_expected = args_cvvs[[tstsetup]]$search_terms, search_trms_empty_size = length(args_cvvs[[tstsetup]]$search_terms) && all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)), search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")], info_str = tstsetup ) } }) # run_cvfun() ------------------------------------------------------------- test_that("argument `folds` of run_cvfun() works", { skip_if_not(run_cvvs) tstsetups <- names(cvfitss) if (!run_more) { tstsetups <- head(tstsetups, 1) } if (exists(".Random.seed", envir = .GlobalEnv)) { rng_old <- get(".Random.seed", envir = .GlobalEnv) } for (tstsetup in tstsetups) { set.seed(seed3_tst) folds_sep <- cv_folds(nobsv, K = K_tst) cvfits_sep <- run_cvfun(object = refmods[[tstsetup]], folds = folds_sep) expect_identical(lapply(cvfits_sep, as.matrix), lapply(cvfitss[[tstsetup]], as.matrix), info = tstsetup) } if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv) })