# A helper function for testing the structure of an expected extended `family` # object. # # @param extfam An object of class `family` (at least expected so) which has # been extended by projpred. # @param fam_orig The original object of class `family` which has been used as # input to extend_family(). # @param from_brms A single logical value indicating whether the extended family # was created for a reference model based on a `brmsfit` (`TRUE`) or not # (`FALSE`). # @param augdat_expected A single logical value indicating whether the extended # family is expected to be for augmented-data projection (`TRUE`) or not # (`FALSE`). # @param latent_expected A single logical value indicating whether the reference # model is expected to be for latent projection (`TRUE`) or not (`FALSE`). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). extfam_tester <- function(extfam, fam_orig, from_brms = FALSE, augdat_expected = FALSE, latent_expected = FALSE, info_str) { # General structure tests ------------------------------------------------- ## For `fam_orig` --------------------------------------------------------- expect_s3_class(fam_orig, "family") expect_type(fam_orig, "list") # Basic names of `fam_orig` (will be adapted later if necessary): fam_orig_nms <- c("family", "link") # Check for element `family`: expect_true("family" %in% names(fam_orig), info = info_str) expect_true(is.vector(fam_orig$family, "character"), info = info_str) expect_length(fam_orig$family, 1) # Family "cumulative_rstanarm" is an artificial family, so it lacks some # elements present in actual families: if (fam_orig$family != "cumulative_rstanarm") { fam_orig_nms <- c(fam_orig_nms, "linkfun", "linkinv") } # Families from brms which require the augmented-data projection: bfam_nms <- c("categorical", "cumulative", "cratio", "sratio", "acat") # Check the names of `fam_orig`: if (fam_orig$family %in% bfam_nms) { # Just check that the basic names exist (to avoid depending too strongly on # brms internals): expect_true(all(fam_orig_nms %in% names(fam_orig)), info = info_str) } else { if (fam_orig$family != "cumulative_rstanarm") { fam_orig_nms <- c(fam_orig_nms, "variance", "dev.resids", "aic", "mu.eta", "initialize", "validmu", "valideta") if (fam_orig$family %in% c("binomial", "poisson")) { fam_orig_nms <- c(fam_orig_nms, "simulate") } } expect_true(all(fam_orig_nms %in% names(fam_orig)), info = info_str) } ## For `extfam` ----------------------------------------------------------- expect_s3_class(extfam, "family") expect_type(extfam, "list") expect_true("for_augdat" %in% names(extfam), info = info_str) expect_true(isTRUE(extfam$for_augdat) || isFALSE(extfam$for_augdat), info = info_str) expect_identical(extfam$for_augdat, augdat_expected, info = info_str) expect_true("for_latent" %in% names(extfam), info = info_str) expect_true(isTRUE(extfam$for_latent) || isFALSE(extfam$for_latent), info = info_str) expect_identical(extfam$for_latent, latent_expected, info = info_str) extfam_nms_add <- c("ce", "dis_fun", "predvar", "ll_fun", "deviance", "ppd", "for_latent", "for_augdat", "is_extended") if (extfam$for_augdat) { extfam_nms_add <- setdiff(extfam_nms_add, "deviance") extfam_nms_add <- c(extfam_nms_add, "cats", "ce_ptwise") if (extfam$family == "categorical") { extfam_nms_add <- c(extfam_nms_add, "refcat") } if (extfam$family == "cumulative_rstanarm") { extfam_nms_add <- c(extfam_nms_add, "linkfun", "linkinv") } } else if (extfam$for_latent) { extfam_nms_add <- c(extfam_nms_add, "family_oscale", "link_oscale", "latent_ilink", "latent_ll_oscale", "latent_ppd_oscale") if (extfam$family_oscale != "binomial") { extfam_nms_add <- c(extfam_nms_add, "cats") } } extfam_nms <- c(fam_orig_nms, extfam_nms_add) expect_true(all(extfam_nms %in% names(extfam)), info = info_str) # Detailed tests ---------------------------------------------------------- ## For `fam_orig` --------------------------------------------------------- fam_orig_ch <- structure( extfam[setdiff(names(fam_orig), c("dpars", "multi_dpars"))], class = if (fam_orig$family %in% bfam_nms) { c("brmsfamily", "family") } else { "family" } ) if (extfam$family == "binomial") { fam_orig_ch$initialize <- fam_orig$initialize if (extfam$for_augdat) { expect_identical( get("augdat_link", envir = environment(fam_orig_ch$linkfun)), augdat_link_binom, info = info_str ) expect_identical( get("augdat_ilink", envir = environment(fam_orig_ch$linkinv)), augdat_ilink_binom, info = info_str ) fam_orig_ch$linkfun <- fam_orig$linkfun fam_orig_ch$linkinv <- fam_orig$linkinv } } else if (extfam$for_augdat) { fam_orig_ch$linkfun <- fam_orig$linkfun fam_orig_ch$linkinv <- fam_orig$linkinv } if (!from_brms) { expect_identical(fam_orig_ch, fam_orig, ignore.environment = extfam$for_latent, info = info_str) } else if (extfam$family %in% bfam_nms) { expect_identical( fam_orig_ch, structure(fam_orig[setdiff(names(fam_orig), c("dpars", "multi_dpars"))], class = class(fam_orig)), ignore.environment = TRUE, info = info_str ) } else { expect_identical(fam_orig_ch, fam_orig, ignore.environment = TRUE, info = info_str) } ## For `extfam` ----------------------------------------------------------- if ("family_oscale" %in% names(extfam)) { expect_true(extfam$family_oscale %in% fam_nms_long, info = info_str) } if ("link_oscale" %in% names(extfam)) { expect_true(extfam$link_oscale %in% link_str, info = info_str) } if ("latent_ll_oscale" %in% names(extfam)) { if (extfam$family_oscale == "binomial") { expect_identical(extfam$latent_ll_oscale, latent_ll_oscale_binom_nocats, info = info_str) } else { expect_identical(extfam$latent_ll_oscale, latent_ll_oscale_cats, info = info_str) } } if ("latent_ppd_oscale" %in% names(extfam)) { if (extfam$family_oscale == "binomial") { expect_identical(extfam$latent_ppd_oscale, latent_ppd_oscale_binom_nocats, info = info_str) } else { expect_identical(extfam$latent_ppd_oscale, latent_ppd_oscale_cats, info = info_str) } } if ("refcat" %in% names(extfam)) { expect_true(is.vector(extfam$refcat, "character"), info = info_str) expect_length(extfam$refcat, 1) } if ("cats" %in% names(extfam)) { expect_true(is.vector(extfam$cats, "character"), info = info_str) } expect_true(extfam$is_extended, info = info_str) el_nms_clos <- setdiff( extfam_nms_add, c("family_oscale", "link_oscale", "refcat", "cats", "for_latent", "for_augdat", "is_extended") ) for (el_nm in el_nms_clos) { expect_type(extfam[[el_nm]], "closure") } if (extfam$for_augdat) { arr_pr <- abind::abind(array(c(0.7, 0.6, 0.3, 0.4), dim = c(2, 1, 2)), array(c(0.1, 0.2, 0.9, 0.8), dim = c(2, 1, 2)), along = 2) augm_pr <- arr2augmat(arr_pr, margin_draws = 1) expect_equal(extfam$linkinv(extfam$linkfun(augm_pr)), augm_pr, info = info_str) # We expect an N x S matrix: ce_pt <- extfam$ce_ptwise(mu_ref = augm_pr, mu_sub = augm_pr) expect_identical(dim(ce_pt), c(2L, 2L), info = info_str) expect_equal(ce_pt, unname(t(-apply(arr_pr * log(arr_pr), c(1, 2), sum))), info = info_str) # We expect a vector of length S: ce_summed <- extfam$ce(pref = list(mu = augm_pr), data = list(weights = rep(1, 2)), psub = list(mu = augm_pr)) expect_length(ce_summed, 2) expect_equal(ce_summed, colMeans(ce_pt), info = info_str) # We expect a vector of length S: expect_equal(extfam$dis_fun(pref = list(mu = augm_pr), psub = list(mu = augm_pr)), rep(NA, 2), info = info_str) # We expect a vector of length N_augcat = nrow(augm_pr): expect_equal(extfam$predvar(mu = augm_pr, dis = NA), rep(NA, 4), info = info_str) # We expect an N x S matrix: expect_equal(extfam$ll_fun(mu = augm_pr, y = factor(head(letters, 2)[c(2, 1)])), matrix(log(c(0.3, 0.1, 0.4, 0.2)), ncol = 2), info = info_str) # We expect a vector of length N: if (exists(".Random.seed", envir = .GlobalEnv)) { rng_state_old <- get(".Random.seed", envir = .GlobalEnv) on.exit(assign(".Random.seed", rng_state_old, envir = .GlobalEnv)) } set.seed(seed2_tst) ppd_draws <- extfam$ppd(mu = augm_pr[, 1]) expect_true(is.vector(ppd_draws, "integer"), info = info_str) expect_length(ppd_draws, 2) } # TODO: For the traditional (and latent) projection, add some mathematical # checks (i.e., check that the calculations for the objects listed in # `extfam_nms_add` are mathematically correct). # Output ------------------------------------------------------------------ return(invisible(TRUE)) } # A helper function for testing the structure of an expected `refmodel` object. # # @param refmod An object of class `refmodel` (at least expected so). # @param is_datafit A single logical value indicating whether the reference # model is expected to be a `datafit` (`TRUE`) or not (`FALSE`). # @param pkg_nm A single character string specifying the name of the package # upon whose fit the reference model (or `datafit`) is based. # @param fit_expected The expected `refmod$fit` object. # @param formul_expected The expected `refmod$formula` object. A cbind() # expression on the left-hand side of the formula is handled automatically. # @param data_expected The original dataset used for the reference model fit or # as input to get_refmodel() or init_refmodel(). Internal changes (i.e., # inside of projpred) of this dataset are performed automatically. # @param with_spclformul A single logical value indicating whether the reference # model has a special formula (`TRUE`) or not (`FALSE`). # @param nobsv_expected A single integer value giving the expected number of # observations. # @param wobs_expected The expected numeric vector of observation weights. # @param offs_expected The expected numeric vector of offsets. # @param nrefdraws_expected A single integer value giving the expected number of # posterior draws in the reference model. # @param fam_orig The original object of class `family` which has been used as # input to extend_family(). # @param mod_nm A single character string specifying the type of model (see # object `mod_nms`. # @param fam_nm A single character string specifying the family (see object # `fam_nms`. # @param augdat_expected A single logical value indicating whether the reference # model is expected to be for augmented-data projection (`TRUE`) or not # (`FALSE`). # @param latent_expected A single logical value indicating whether the reference # model is expected to be for latent projection (`TRUE`) or not (`FALSE`). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). refmodel_tester <- function( refmod, is_datafit = FALSE, pkg_nm, fit_expected, formul_expected = get_formul_from_fit(fit_expected), data_expected = dat, with_spclformul = FALSE, nobsv_expected = nobsv, wobs_expected = wobs_tst, offs_expected = offs_tst, nrefdraws_expected = chains_tst * (iter_tst %/% 2L), fam_orig, mod_nm, fam_nm, augdat_expected = FALSE, latent_expected = FALSE, info_str ) { # Preparations: needs_wobs_added <- !is_datafit && pkg_nm == "rstanarm" && ((length(refmod$fit$weights) > 0 && !all(refmod$fit$weights == 1)) || fam_nm == "binom") if (needs_wobs_added) { data_expected$projpred_internal_wobs_stanreg <- wobs_expected } needs_offs_added <- !is_datafit && pkg_nm == "rstanarm" && length(refmod$fit$offset) > 0 && !all(refmod$fit$offset == 0) if (needs_offs_added) { data_expected$projpred_internal_offs_stanreg <- refmod$fit$offset } if (refmod$family$for_latent) { formul_expected[[2]] <- str2lang( paste0(".", as.character(formul_expected[[2]])) ) } if (!is.null(attr(terms(formul_expected), "offset"))) { # In the reference model, the offset() term is placed last: formul_expected <- update(formul_expected, . ~ . - offset(offs_col) + offset(offs_col)) } stdized_lhs <- stdize_lhs(formul_expected) formul_expected <- stdized_lhs$fml if (with_spclformul) { # Reference models take arithmetic expressions on the left-hand side of # the formula into account: y_spclformul <- stdized_lhs$y_nm_orig y_spclformul_new <- stdized_lhs$y_nm formul_expected <- update( formul_expected, as.formula(paste(y_spclformul_new, "~ .")) ) data_expected <- within(data_expected, { assign(y_spclformul_new, eval(str2lang(y_spclformul))) }) } if (!is_datafit && pkg_nm == "rstanarm" && refmod$fit$stan_function == "stan_gamm4" && refmod$family$family_oscale %||% refmod$family$family == "binomial") { # A column added internally by rstanarm which is not relevant for projpred: data_expected$temp_y <- 1 } has_grp <- mod_nm %in% c("glmm", "gamm") has_add <- mod_nm %in% c("gam", "gamm") is_gamm <- mod_nm == "gamm" # Test the general structure of the object: refmod_nms <- c( "fit", "formula", "div_minimizer", "family", "eta", "mu", "mu_offs", "dis", "y", "fetch_data", "wobs", "wdraws_ref", "offset", "cvfun", "cvfits", "extract_model_data", "ref_predfun", "mu_fun", "cvrefbuilder", "y_oscale", "nobs" ) refmod_class_expected <- "refmodel" if (is_datafit) { refmod_class_expected <- c("datafit", refmod_class_expected) } expect_s3_class(refmod, refmod_class_expected, exact = TRUE) expect_type(refmod, "list") expect_named(refmod, refmod_nms, info = info_str) # fit if (!is_datafit && pkg_nm == "rstanarm" && mod_nm == "gamm" && !all(wobs_expected == 1)) { fit_expected$weights <- wobs_expected } expect_identical(refmod$fit, fit_expected, info = info_str) # formula if (!is_gamm) { # TODO (GAMMs): Adapt the expected formula to GAMMs. if (is_datafit && pkg_nm == "brms") { expect_equal(refmod$formula, formul_expected, info = info_str) } else { expect_identical(refmod$formula, formul_expected, info = info_str) } } # div_minimizer expect_type(refmod$div_minimizer, "closure") # family extfam_tester(refmod$family, fam_orig = fam_orig, from_brms = (pkg_nm == "brms"), augdat_expected = augdat_expected, latent_expected = latent_expected, info_str = info_str) # eta # In principle, it would be desirable to compare `refmod$eta` to # `refmod$family$linkfun(refmod$mu)`, but numerical underflow and overflow can # make this problematic. (Here in the unit tests, we generate rather extreme # linear predictors, which should be avoided in the first place, but doesn't # seem to be that simple in the context of these specific unit tests.) if (refmod$family$family %in% c("binomial")) { eta_cut <- refmod$eta mu_cut <- refmod$mu tol_ex <- 1e-12 eta_cut[eta_cut < f_binom$linkfun(tol_ex)] <- f_binom$linkfun(tol_ex) eta_cut[eta_cut > f_binom$linkfun(1 - tol_ex)] <- f_binom$linkfun(1 - tol_ex) mu_cut[mu_cut < tol_ex] <- tol_ex mu_cut[mu_cut > 1 - tol_ex] <- 1 - tol_ex expect_equal(eta_cut, refmod$family$linkfun(mu_cut), info = info_str) } else if (refmod$family$family %in% fam_nms_aug_long && (any(abs(refmod$mu - 0) <= .Machine$double.eps) || any(abs(refmod$mu - 1) <= .Machine$double.eps))) { # The degenerate probabilities in `refmod$mu` are probably due to numerical # underflow and overflow (for zeros and ones, respectively), so applying the # link function would lead to infinite values. Thus, the only sensible (and # quickly feasible) check is: expect_equal(refmod$mu, refmod$family$linkinv(refmod$eta), info = info_str) } else { expect_equal(refmod$eta, refmod$family$linkfun(refmod$mu), info = info_str) } # mu ### Not needed because of the more precise test below: # expect_true(is.matrix(refmod$mu), info = info_str) # expect_type(refmod$mu, "double") # expect_identical(dim(refmod$mu), c(nobsv_expected, nrefdraws_expected), # info = info_str) ### if (!is_datafit) { ### Helpful for debugging: # mu_expected_ch <- unname(t(posterior_linpred(refmod$fit))) ### if (pkg_nm == "rstanarm") { # In this case, the linear predictors are calculated manually because of # the offset issues in rstanarm. drws <- as.matrix(refmod$fit) if ("(Intercept)" %in% colnames(drws)) { drws_icpt <- drws[, "(Intercept)"] } else { drws_icpt <- numeric(nrow(drws)) drws_thres <- drws[, grep("\\|", colnames(drws))] } drws_beta_cont <- drws[ , setdiff(grep("xco\\.", colnames(drws), value = TRUE), grep("z\\.", colnames(drws), value = TRUE)), drop = FALSE ] predictors_cont <- colnames(drws_beta_cont) predictors_cont <- sub("(I\\(.*as\\.logical\\(.*\\)\\))TRUE", "\\1", predictors_cont) predictors_cont <- unique(sub("(poly[m]*\\(.*\\))[[:digit:]]+", "\\1", predictors_cont)) mm_cont <- model.matrix( as.formula(paste("~", paste(predictors_cont, collapse = " + "))), data = data_expected ) stopifnot(identical(c("(Intercept)", colnames(drws_beta_cont)), colnames(mm_cont))) mu_expected <- cbind(drws_icpt, drws_beta_cont) %*% t(mm_cont) cate_post <- lapply(names(x_cate_list), function(x_cate_idx) { sapply(x_cate_list[[x_cate_idx]]$x_cate, function(lvl_obs_i) { if (lvl_obs_i != "lvl1") { return(drws[, paste0("xca.", x_cate_idx, lvl_obs_i)]) } else { return(matrix(0, nrow = nrow(drws))) } }) }) mu_expected <- mu_expected + do.call("+", cate_post) if (has_grp) { r_post <- lapply(names(z_list), function(z_nm) { unname( drws[, paste0("b[(Intercept) ", z_nm, ":", z_list[[z_nm]]$z, "]")] + drws[, paste0("b[xco.1 ", z_nm, ":", z_list[[z_nm]]$z, "]")] %*% diag(x_cont[, 1]) ) }) mu_expected <- mu_expected + do.call("+", r_post) } if (has_add) { drws_beta_s <- drws[ , grep("^s\\(", colnames(drws), value = TRUE), drop = FALSE ] ### TODO (GAMs and GAMMs): Do this manually: mm_s <- rstanarm:::pp_data(refmod$fit)$x mm_s <- mm_s[, grep("^s\\(", colnames(mm_s), value = TRUE), drop = FALSE] ### mu_expected <- mu_expected + drws_beta_s %*% t(mm_s) } mu_expected <- unname(mu_expected) } else if (pkg_nm == "brms") { mu_expected <- sweep(posterior_linpred(refmod$fit), 2L, offs_expected) if (fam_nm %in% fam_nms_ordin) { drws <- as.matrix(refmod$fit) drws_thres <- drws[, grep("b_Intercept\\[", colnames(drws))] } } if (refmod$family$family != "gaussian") { if (refmod$family$family %in% fam_nms_aug_long) { if (refmod$family$family %in% fam_nms_ordin_long) { if (refmod$family$family %in% c("cumulative", "cumulative_rstanarm", "sratio")) { mu_expected <- apply(drws_thres, 2, function(thres_vec) { thres_vec - mu_expected }, simplify = FALSE) } else if (refmod$family$family %in% c("cratio", "acat")) { mu_expected <- apply(drws_thres, 2, function(thres_vec) { mu_expected - thres_vec }, simplify = FALSE) } mu_expected <- do.call(abind::abind, c(mu_expected, rev.along = 0)) } mu_expected <- arr2augmat(mu_expected, margin_draws = 1) mu_expected <- refmod$family$linkinv(mu_expected) } else { mu_expected <- fam_orig$linkinv(mu_expected) } } if (refmod$family$for_augdat && refmod$family$family == "binomial") { mu_expected <- cbind(1 - mu_expected, mu_expected) } if (!refmod$family$family %in% fam_nms_aug_long) { mu_expected <- t(mu_expected) } if (refmod$family$for_augdat && refmod$family$family == "binomial") { mu_expected <- structure(mu_expected, ndiscrete = 2L, class = "augmat") } expect_equal(refmod$mu, mu_expected, info = info_str) } else { if (refmod$family$family != "binomial") { expect_identical(refmod$mu, as.matrix(refmod$y), info = info_str) } else { expect_identical(refmod$mu, as.matrix(refmod$y / refmod$wobs), info = info_str) } } # mu_offs expect_equal( refmod$mu_offs, refmod$family$linkinv( refmod$eta + ifelse(refmod$family$family %in% fams_neg_linpred(), -1, 1) * refmod$offset ), info = info_str ) if (!is_datafit) { expect_equal( refmod$mu_offs, refmod$family$linkinv(refmod$ref_predfun( refmod$fit, excl_offs = FALSE, mlvl_allrandom = getOption("projpred.mlvl_proj_ref_new", FALSE) )), info = info_str ) } # dis if (refmod$family$family == "gaussian") { if (is_datafit) { expect_identical(refmod$dis, 0, info = info_str) } else if (latent_expected) { expect_identical(refmod$dis, rep(1.6, nrefdraws_expected), info = info_str) } else { expect_true(is.vector(refmod$dis, "double"), info = info_str) expect_length(refmod$dis, nrefdraws_expected) expect_true(all(refmod$dis > 0), info = info_str) } } else { expect_identical(refmod$dis, rep(NA, nrefdraws_expected), info = info_str) } # y ### Not needed because of the more precise test below: # expect_true(is.vector(refmod$y, "numeric"), info = info_str) # expect_length(refmod$y, nobsv_expected) ### if (!with_spclformul) { y_expected <- dat[[paste("y", mod_nm, fam_nm, sep = "_")]] if (!is_datafit && pkg_nm == "brms" && packageVersion("brms") < "2.16.11" && fam_nm == "brnll") { # Fixed (as a side effect) by brms PR #1314: y_expected <- as.numeric(y_expected) } if (latent_expected) { y_expected <- unname(colMeans(posterior_linpred(fit_expected))) } } else { y_expected <- data_expected[[y_spclformul_new]] } if (refmod$family$for_augdat) { y_expected <- as.factor(y_expected) if (fam_nm %in% fam_nms_aug && pkg_nm == "brms") { # brms seems to set argument `contrasts`, but this is not important for # projpred, so ignore it in the comparison: attr(y_expected, "contrasts") <- attr(refmod$y, "contrasts") } } expect_identical(refmod$y, y_expected, info = info_str) # fetch_data expect_type(refmod$fetch_data, "closure") if (!is_gamm) { # TODO (GAMMs): Adapt this to GAMMs. if (latent_expected) { data_expected[[stdized_lhs$y_nm]] <- y_expected } if ((!is_datafit && pkg_nm != "brms") || (is_datafit && (pkg_nm == "brms" || fam_nm != "binom"))) { if (pkg_nm != "brms" && fam_nm == "binom" && needs_wobs_added) { data_expected <- data_expected[, c( setdiff(names(data_expected), "projpred_internal_wobs_stanreg"), "projpred_internal_wobs_stanreg" )] } expect_identical(refmod$fetch_data(), data_expected, info = info_str) } else if (!is_datafit && pkg_nm == "brms") { if (!with_spclformul) { refdat_colnms <- as.character( attr(terms(formul_expected), "variables") )[-1] refdat_colnms <- sub(".*\\|[[:blank:]]*", "", refdat_colnms) refdat_colnms <- sub("s\\((.*)\\)", "\\1", refdat_colnms) if (!all(wobs_expected == 1)) { refdat_colnms <- c(head(refdat_colnms, 1), "wobs_col", tail(refdat_colnms, -1)) } refdat_colnms <- sub("^offset\\((.*)\\)$", "\\1", refdat_colnms) if (latent_expected) { # Re-order: refdat_colnms <- c(sub("^\\.", "", stdized_lhs$y_nm), setdiff(refdat_colnms, stdized_lhs$y_nm), stdized_lhs$y_nm) } refdat_ch <- data_expected[, refdat_colnms, drop = FALSE] expect_equal(refmod$fetch_data(), refdat_ch, check.attributes = FALSE, info = info_str) } else { # TODO: The check in this case is not optimal yet because it subsets # `refmod$fetch_data()` to only those columns which also exist in the # rebuilt dataset. It would be more desirable to check *all* columns of # `refmod$fetch_data()`. refdat_ch <- model.frame(formul_expected, data = data_expected) if (!all(wobs_expected == 1)) { refdat_ch <- cbind(refdat_ch, wobs_col = data_expected$wobs_col) } names(refdat_ch) <- sub("^offset\\((.*)\\)$", "\\1", names(refdat_ch)) expect_equal(refmod$fetch_data()[, names(refdat_ch), drop = FALSE], refdat_ch, check.attributes = FALSE, info = info_str) } } else if (is_datafit && pkg_nm != "brms" && fam_nm == "binom") { refdat_ch <- data_expected y_nm <- paste("y", mod_nm, fam_nm, sep = "_") refdat_ch$dummy_nm <- refdat_ch$wobs_col - refdat_ch[, y_nm] names(refdat_ch)[names(refdat_ch) == "dummy_nm"] <- paste("wobs_col -", y_nm) expect_identical(refmod$fetch_data(), refdat_ch, info = info_str) } else { stop("This case should not occur.") } } # wobs ### Not needed because of the more precise test below: # expect_true(is.vector(refmod$wobs, "numeric"), info = info_str) # expect_length(refmod$wobs, nobsv_expected) # expect_true(all(refmod$wobs > 0), info = info_str) ### if (!is_gamm) { # TODO (GAMMs): Adapt the expected observation weights to GAMMs. expect_identical(refmod$wobs, wobs_expected, info = info_str) } # wdraws_ref expect_true(is.vector(refmod$wdraws_ref, "double"), info = info_str) expect_length(refmod$wdraws_ref, nrefdraws_expected) expect_true(all(refmod$wdraws_ref > 0), info = info_str) # offset ### Not needed because of the more precise test below: # expect_true(is.vector(refmod$offset, "double"), info = info_str) # expect_length(refmod$offset, nobsv_expected) ### expect_identical(refmod$offset, offs_expected, info = info_str) # cvfun expect_type(refmod$cvfun, "closure") # cvfits expect_null(refmod$cvfits, info = info_str) expect_false(is.null(refmod$cvfun) && is.null(refmod$cvfits), info = info_str) # extract_model_data expect_type(refmod$extract_model_data, "closure") # ref_predfun expect_type(refmod$ref_predfun, "closure") # mu_fun expect_type(refmod$mu_fun, "closure") # cvrefbuilder expect_type(refmod$cvrefbuilder, "closure") # y_oscale if (latent_expected) { y_oscale_expected <- data_expected[[sub("^\\.", "", stdized_lhs$y_nm)]] if (!is.null(refmod$family$cats) && !is.factor(y_oscale_expected)) { y_oscale_expected <- as.factor(y_oscale_expected) } if (pkg_nm == "brms") { # brms seems to set argument `contrasts`, but this is not important for # projpred, so ignore it in the comparison: attr(y_oscale_expected, "contrasts") <- attr(refmod$y_oscale, "contrasts") } } else { y_oscale_expected <- y_expected } expect_identical(refmod$y_oscale, y_oscale_expected, info = info_str) # nobs expect_identical(refmod$nobs, nrow(refmod$fetch_data()), info = info_str) return(invisible(TRUE)) } # A helper function for testing the structure of a list of fits for the same # single submodel, with that submodel coming from a traditional projection. # # @inheritParams outdmin_tester # @param seq_extensive_tests A numeric vector of indexes from # `seq_along(outdmin_totest)` indicating those elements of `outdmin_totest` # for which more extensive tests should be conducted. # @param nobsv The number of (possibly augmented) observations. # # @return `TRUE` (invisible). outdmin_tester_trad <- function( outdmin_totest, nprjdraws_expected, sub_formul, sub_data, sub_fam, has_grp = formula_contains_group_terms(sub_formul[[1]]), has_add = formula_contains_additive_terms(sub_formul[[1]]), wobs_expected = wobs_tst, prd_trms_vsel_L1_search = NULL, with_offs = FALSE, augdat_cats = NULL, allow_w_zero = FALSE, check_y_from_resp = TRUE, seq_extensive_tests = seq_extensive_tests, nobsv = nobsv, info_str ) { from_vsel_L1_search <- !is.null(prd_trms_vsel_L1_search) if (!has_grp && !has_add) { sub_x_expected <- model.matrix(sub_formul[[1]], data = sub_data) sub_x_expected <- sub_x_expected[ , colnames(sub_x_expected) != "(Intercept)", drop = FALSE ] I_logic_excl <- grep("I\\(.*as\\.logical\\(.*\\)\\)FALSE", colnames(sub_x_expected), value = TRUE) if (length(I_logic_excl) && from_vsel_L1_search) { sub_x_expected <- sub_x_expected[ , setdiff(colnames(sub_x_expected), I_logic_excl), drop = FALSE ] } ncoefs <- ncol(sub_x_expected) if (from_vsel_L1_search) { # Unfortunately, model.matrix() uses terms() and there seems to be no way # to set `keep.order = TRUE` in that internal terms() call. Thus, we have # to reorder the columns manually: if (length(prd_trms_vsel_L1_search)) { terms_contr_expd <- lapply(prd_trms_vsel_L1_search, function(term_crr) { term_crr <- strsplit(term_crr, ":")[[1]] main_terms_expand <- lapply(term_crr, function(main_term_crr) { if (!is.factor(sub_data[[main_term_crr]])) { main_term_crr <- sub("(I\\(.*as\\.logical\\(.*\\)\\))", "\\1TRUE", main_term_crr) main_term_crr <- expand_poly( main_term_crr, info_str = paste0(info_str, "__", main_term_crr) ) return(main_term_crr) } else { return(paste0(main_term_crr, levels(sub_data[[main_term_crr]])[-1])) } }) return(apply(expand.grid(main_terms_expand), 1, paste, collapse = ":")) }) terms_contr_expd <- unlist(terms_contr_expd) colnames(sub_x_expected) <- reorder_ias(colnames(sub_x_expected), terms_contr_expd) sub_x_expected <- sub_x_expected[ , colnames(sub_x_expected)[order(match(colnames(sub_x_expected), terms_contr_expd))], drop = FALSE ] } } subfit_nms <- c("alpha", "beta", "w", "formula", "x", "y", "xlvls") if (from_vsel_L1_search) { subfit_nms <- setdiff(subfit_nms, "y") } for (j in seq_along(outdmin_totest)) { expect_s3_class(outdmin_totest[[!!j]], "subfit") expect_type(outdmin_totest[[!!j]], "list") expect_named(outdmin_totest[[!!j]], subfit_nms, info = info_str) if (j %in% seq_extensive_tests) { expect_true(is.vector(outdmin_totest[[!!j]]$alpha, "double"), info = info_str) expect_length(outdmin_totest[[!!j]]$alpha, 1) if (ncoefs > 0 || !from_vsel_L1_search) { expect_true(is.matrix(outdmin_totest[[!!j]]$beta), info = info_str) expect_true(is.numeric(outdmin_totest[[!!j]]$beta), info = info_str) expect_identical(dim(outdmin_totest[[!!j]]$beta), c(ncoefs, 1L), info = info_str) expect_identical(rownames(outdmin_totest[[!!j]]$beta), colnames(outdmin_totest[[!!j]]$x), info = info_str) } else { expect_null(outdmin_totest[[!!j]]$beta, info = info_str) } if (!from_vsel_L1_search) { expect_true(is.matrix(outdmin_totest[[!!j]]$w), info = info_str) expect_type(outdmin_totest[[!!j]]$w, "double") expect_identical(dim(outdmin_totest[[!!j]]$w), c(nobsv, 1L), info = info_str) } else { expect_true(is.vector(outdmin_totest[[!!j]]$w, "double"), info = info_str) expect_length(outdmin_totest[[!!j]]$w, nobsv) } if (allow_w_zero) { expect_true(all(outdmin_totest[[!!j]]$w >= 0), info = info_str) } else { expect_true(all(outdmin_totest[[!!j]]$w > 0), info = info_str) } if (sub_fam == "gaussian") { # Note: For non-Gaussian families, a comparison of # `outdmin_totest[[j]]$w` with `wobs_expected` doesn't make sense # since `glm_ridge(<...>)$w` contains the weights of the # pseudo-Gaussian observations as calculated in pseudo_data(). expect_equal(as.vector(outdmin_totest[[!!j]]$w), wobs_expected %||% rep(1, nobsv), info = info_str) } expect_s3_class(outdmin_totest[[!!j]]$formula, "formula") if (!grepl(":", as.character(outdmin_totest[[j]]$formula)[3])) { expect_equal(outdmin_totest[[!!j]]$formula, sub_formul[[!!j]], info = info_str) } else { # The order of terms as well as the order of individual terms within # ":" interaction terms might be changed in the reference model: expect_equal(outdmin_totest[[!!j]]$formula[[2]], sub_formul[[!!j]][[2]], info = info_str) trms_to_test <- labels(terms(outdmin_totest[[j]]$formula)) trms_ch <- labels(terms(sub_formul[[j]])) trms_ch <- reorder_ias(trms_ch, trms_to_test) expect_identical(trms_to_test, trms_ch, info = info_str) } x_to_test <- outdmin_totest[[j]]$x x_ch <- sub_x_expected dimnames(x_ch)[[2]] <- reorder_ias(dimnames(x_ch)[[2]], dimnames(x_to_test)[[2]]) expect_identical(x_to_test, x_ch, info = info_str) if (!from_vsel_L1_search) { y_ch <- setNames(eval(str2lang(as.character(sub_formul[[j]])[2]), sub_data), seq_len(nobsv)) if (!is.null(augdat_cats)) { stopifnot(is.factor(y_ch), identical(levels(y_ch), c("0", "1"))) y_ch <- setNames(as.integer(y_ch) - 1L, names(y_ch)) } expect_identical(outdmin_totest[[!!j]]$y, y_ch, info = info_str) } # TODO: For now, we don't need to take `character` predictors into # account because in the tests, we are currently always using `factor`s # in such cases. In the future, however, we should extend the tests to # `character` predictors as well and then the following needs to be # adapted. nms_fac <- grep("xca", labels(terms(outdmin_totest[[j]]$formula)), value = TRUE) nms_fac <- grep(":", nms_fac, value = TRUE, invert = TRUE) if (length(nms_fac)) { xlvls_ch <- lapply(setNames(nm = nms_fac), function(nm_fac) { levels(droplevels(sub_data[[nm_fac]])) }) expect_setequal(names(outdmin_totest[[!!j]]$xlvls), names(xlvls_ch)) xlvls_ch <- xlvls_ch[names(outdmin_totest[[j]]$xlvls)] expect_identical(outdmin_totest[[!!j]]$xlvls, xlvls_ch, info = info_str) } else { expect_null(outdmin_totest[[!!j]]$xlvls, info = info_str) } } } } else if (has_grp && !has_add) { if (sub_fam == "gaussian") { for (j in seq_along(outdmin_totest)) { expect_s4_class(outdmin_totest[[!!j]], "lmerMod") } } else { for (j in seq_along(outdmin_totest)) { expect_s4_class(outdmin_totest[[!!j]], "glmerMod") } } sub_trms_for_mf <- labels(terms(sub_formul[[1]])) sub_trms_for_mf <- sub( "^[[:blank:]]*(.*)[[:blank:]]*\\|[[:blank:]]*(.*)[[:blank:]]*$", "\\1 + \\2", sub_trms_for_mf ) sub_trms_for_mf <- unique(sub_trms_for_mf) sub_formul_for_mf <- as.formula(paste( "~", paste(sub_trms_for_mf, collapse = " + ") )) sub_mf_expected <- model.frame(sub_formul_for_mf, data = sub_data) sub_formul_for_mm <- grep("\\|", labels(terms(sub_formul[[1]])), value = TRUE, invert = TRUE) if (length(sub_formul_for_mm) == 0) { sub_formul_for_mm <- "1" } sub_formul_for_mm <- as.formula(paste( "~", paste(sub_formul_for_mm, collapse = " + ") )) mm_expected <- model.matrix(sub_formul_for_mm, data = sub_mf_expected) attr(mm_expected, "msgScaleX") <- character() for (j in seq_extensive_tests) { # formula if (!with_offs) { sub_formul_expected <- flatten_formula(sub_formul[[j]]) } else { sub_formul_expected <- sub_formul[[j]] } expect_equal(outdmin_totest[[!!j]]@call[["formula"]], sub_formul_expected, info = info_str) # resp if (!with_offs) { offs_expected <- numeric(nobsv) } else { offs_expected <- offs_tst } expect_identical(outdmin_totest[[!!j]]@resp$offset, offs_expected, info = info_str) if (!is.null(wobs_expected)) { if (is.matrix(wobs_expected)) { wobs_expected_j <- wobs_expected[, j] } else { wobs_expected_j <- wobs_expected } expect_equal(outdmin_totest[[!!j]]@resp$weights, wobs_expected_j, info = info_str) } else { expect_equal(outdmin_totest[[!!j]]@resp$weights, rep(1, nobsv), info = info_str) } y_from_resp <- outdmin_totest[[j]]@resp$y if (!is.null(augdat_cats)) { y_from_resp <- as.factor(y_from_resp) } if (check_y_from_resp) { expect_equal(y_from_resp, eval(str2lang(as.character(sub_formul[[!!j]])[2]), sub_data), info = info_str) } # frame expect_identical(outdmin_totest[[!!j]]@frame, model.frame(outdmin_totest[[!!j]]), info = info_str) if (check_y_from_resp) { expect_equal( outdmin_totest[[!!j]]@frame[[ grep("y_|ybinprop", names(outdmin_totest[[!!j]]@frame), value = TRUE) ]], y_from_resp, info = info_str ) } if (!is.null(wobs_expected)) { expect_equal(structure(outdmin_totest[[!!j]]@frame$`(weights)`, ndiscrete = NULL, class = NULL), outdmin_totest[[!!j]]@resp$weights, info = info_str) } else { expect_null(outdmin_totest[[!!j]]@frame$`(weights)`, info = info_str) } if (with_offs) { expect_equal(outdmin_totest[[!!j]]@frame$`offset(offs_col)`, offs_expected, info = info_str) } frame_nms <- grep("y_|ybinprop|^\\(weights\\)$|^offset\\(.*\\)$", names(outdmin_totest[[j]]@frame), value = TRUE, invert = TRUE) expect_setequal(frame_nms, names(sub_mf_expected)) expect_equal( outdmin_totest[[!!j]]@frame[frame_nms], sub_mf_expected[frame_nms], info = info_str ) # model.matrix() expect_identical(model.matrix(outdmin_totest[[!!j]]), mm_expected, info = info_str) # flist expect_type(outdmin_totest[[!!j]]@flist, "list") expect_length(outdmin_totest[[!!j]]@flist, length(nlvl_ran)) z_nms <- intersect(names(outdmin_totest[[j]]@flist), names(outdmin_totest[[j]]@frame)) expect_identical(outdmin_totest[[!!j]]@flist[z_nms], as.list(outdmin_totest[[!!j]]@frame[z_nms]), info = info_str) # coef() coefs_crr <- coef(outdmin_totest[[j]]) expect_type(coefs_crr, "list") expect_length(coefs_crr, length(nlvl_ran)) for (zz in seq_len(length(nlvl_ran))) { expect_true(is.data.frame(coefs_crr[[zz]]), info = paste(info_str, j, zz, sep = "__")) expect_identical(nrow(coefs_crr[[zz]]), unname(nlvl_ran[zz]), info = paste(info_str, j, zz, sep = "__")) expect_true(all(sapply(coefs_crr[[zz]], is.numeric)), info = paste(info_str, j, zz, sep = "__")) } } } else if (!has_grp && has_add) { for (j in seq_along(outdmin_totest)) { expect_s3_class(outdmin_totest[[!!j]], "gam") } # TODO (GAMs): Add more expectations for GAMs. } else if (has_grp && has_add) { for (j in seq_along(outdmin_totest)) { expect_s3_class(outdmin_totest[[!!j]], "gamm4") } # TODO (GAMMs): Add more expectations for GAMMs. } return(invisible(TRUE)) } # A helper function for testing the structure of a list of fits for the same # single submodel, with that submodel coming from an augmented-data projection. # # @inheritParams outdmin_tester_trad # # @return `TRUE` (invisible). outdmin_tester_aug <- function( outdmin_totest, nprjdraws_expected, sub_formul, sub_data, sub_fam, has_grp = formula_contains_group_terms(sub_formul[[1]]), has_add = formula_contains_additive_terms(sub_formul[[1]]), wobs_expected = wobs_tst, with_offs = FALSE, augdat_cats = NULL, allow_w_zero = FALSE, check_y_from_resp = TRUE, seq_extensive_tests = seq_extensive_tests, nobsv = nobsv, info_str ) { sub_formul <- sub_formul[[1]] if (has_add) { stop("This case should not occur (yet).") } else if (!has_grp) { if (sub_fam %in% c("cumulative", "cumulative_rstanarm")) { for (j in seq_along(outdmin_totest)) { expect_s3_class(outdmin_totest[[!!j]], "polr") } for (j in seq_extensive_tests) { # coef() coefs_crr <- coef(outdmin_totest[[j]]) expect_true(is.vector(coefs_crr, "numeric"), info = info_str) expect_named( coefs_crr, grep("Intercept", colnames(model.matrix(sub_formul, data = sub_data)), value = TRUE, invert = TRUE), info = info_str ) # zeta zeta_crr <- outdmin_totest[[j]]$zeta expect_true(is.vector(zeta_crr, "numeric"), info = info_str) expect_named( zeta_crr, paste(head(augdat_cats, -1), tail(augdat_cats, -1), sep = "|"), info = info_str ) # lev expect_identical(outdmin_totest[[!!j]]$lev, augdat_cats, info = info_str) # method expect_identical(outdmin_totest[[!!j]]$method, "logistic", info = info_str) } } else if (sub_fam == "categorical") { for (j in seq_along(outdmin_totest)) { expect_s3_class(outdmin_totest[[!!j]], c("multinom", "nnet")) } for (j in seq_extensive_tests) { # coef() coefs_crr <- coef(outdmin_totest[[j]]) expect_true(is.matrix(coefs_crr), info = info_str) expect_true(is.numeric(coefs_crr), info = info_str) expect_identical( dimnames(coefs_crr), list(tail(augdat_cats, -1), colnames(model.matrix(sub_formul, data = sub_data))), info = info_str ) # lev expect_identical(outdmin_totest[[!!j]]$lev, augdat_cats, info = info_str) } } else { stop("Unexpected `sub_fam` value of `", sub_fam, "`. Info: ", info_str) } } else if (has_grp) { grp_trms_for_coef <- extract_terms_response(sub_formul)$group_terms grp_trms_for_coef <- sub("[[:blank:]]*\\|.*$", "", grp_trms_for_coef) coef_nms <- strsplit(grp_trms_for_coef, "[[:blank:]]*\\+[[:blank:]]*") coef_nms <- union("1", coef_nms) coef_nms <- sub("^1$", "(Intercept)", unlist(coef_nms)) if (sub_fam %in% c("cumulative", "cumulative_rstanarm")) { for (j in seq_along(outdmin_totest)) { expect_s3_class(outdmin_totest[[!!j]], "clmm") } for (j in seq_extensive_tests) { # alpha alpha_crr <- outdmin_totest[[j]]$alpha expect_true(is.vector(alpha_crr, "numeric"), info = info_str) expect_named( alpha_crr, paste(head(augdat_cats, -1), tail(augdat_cats, -1), sep = "|"), info = info_str ) # beta coefs_crr <- outdmin_totest[[j]]$beta expect_true(is.vector(coefs_crr, "numeric"), info = info_str) ### A quick-and-dirty workaround to get rid of group-level terms: stopifnot(identical(trms_grp, c("(xco.1 | z.1)"))) sub_formul_no_grp <- update(sub_formul, . ~ . - (1 | z.1) - (xco.1 | z.1)) ### coefs_nms_expected <- grep( "Intercept", colnames(model.matrix(sub_formul_no_grp, data = sub_data)), value = TRUE, invert = TRUE ) if (length(coefs_nms_expected)) { expect_named(coefs_crr, coefs_nms_expected, info = info_str) } else { expect_length(coefs_crr, 0) } # ordinal::ranef() ranef_crr <- ordinal::ranef(outdmin_totest[[j]]) expect_type(ranef_crr, "list") expect_named(ranef_crr, "z.1", info = info_str) expect_true(is.data.frame(ranef_crr[["z.1"]]), info = info_str) expect_named(ranef_crr[["z.1"]], coef_nms, info = info_str) expect_true(is.vector(ranef_crr[["z.1"]][["(Intercept)"]], "numeric"), info = info_str) expect_length(ranef_crr[["z.1"]][["(Intercept)"]], nlevels(dat$z.1)) if ("xco.1" %in% coef_nms) { expect_true(is.vector(ranef_crr[["z.1"]][["xco.1"]], "numeric"), info = info_str) expect_length(ranef_crr[["z.1"]][["xco.1"]], nlevels(dat$z.1)) } # ordinal::VarCorr() VarCorr_crr <- ordinal::VarCorr(outdmin_totest[[j]]) expect_type(VarCorr_crr, "list") expect_named(VarCorr_crr, "z.1", info = info_str) expect_true(is.matrix(VarCorr_crr[["z.1"]]), info = info_str) expect_true(is.numeric(VarCorr_crr[["z.1"]]), info = info_str) expect_identical(dimnames(VarCorr_crr[["z.1"]]), replicate(2, coef_nms, simplify = FALSE), info = info_str) expect_true(is.vector(attr(VarCorr_crr[["z.1"]], "stddev"), "numeric"), info = info_str) expect_named(attr(VarCorr_crr[["z.1"]], "stddev"), coef_nms, info = info_str) if ("xco.1" %in% coef_nms) { expect_true(is.matrix(attr(VarCorr_crr[["z.1"]], "correlation")), info = info_str) expect_true(is.numeric(attr(VarCorr_crr[["z.1"]], "correlation")), info = info_str) expect_identical(dimnames(attr(VarCorr_crr[["z.1"]], "correlation")), replicate(2, coef_nms, simplify = FALSE), info = info_str) } # formula() formula_crr <- formula(outdmin_totest[[j]]) # Unfortunately, there are some minor caveats to take care of when # comparing `formula_crr` with `sub_formul`: (i) the intercept (`1`) # needs to included explicitly, (ii) group-level terms need to be # "flattened" (but flatten_formula() would omit offset terms; adding an # argument `incl_offs` to flatten_formula() could solve this, but the # internal update() call would then still move offset terms to the end): sub_trms <- labels(terms(sub_formul)) nongrp_trms <- grep("\\|", sub_trms, value = TRUE, invert = TRUE) grp_trms <- grep("\\|", sub_trms, value = TRUE) grp_trms_flat <- flatten_group_terms(grp_trms) offs_trms <- if (with_offs) "offset(offs_col)" else NULL expect_identical( formula_crr, # Add the intercept explicitly and use the same environment: as.formula(paste(as.character(sub_formul[[2]]), "~ 1 +", paste(c(nongrp_trms, offs_trms, grp_trms_flat), collapse = " + ")), env = environment(formula_crr)), info = info_str ) # xlevels xlevels_crr <- outdmin_totest[[j]]$xlevels xca_nms <- grep("^xca\\.", labels(terms(sub_formul)), value = TRUE) if (length(xca_nms)) { expect_identical(xlevels_crr, lapply(dat[xca_nms], levels), info = info_str) } else { expect_null(xlevels_crr, info = info_str) } } } else if (sub_fam == "categorical") { for (j in seq_along(outdmin_totest)) { expect_s3_class(outdmin_totest[[!!j]], c("mmblogit", "mblogit", "mmclogit", "mclogit", "lm")) } coef_nms <- sub("^\\(Intercept\\)$", "1", coef_nms) for (j in seq_extensive_tests) { # coefmat coefs_crr <- outdmin_totest[[j]]$coefmat expect_true(is.matrix(coefs_crr), info = info_str) expect_true(is.numeric(coefs_crr), info = info_str) ### A quick-and-dirty workaround to get rid of group-level terms: stopifnot(identical(trms_grp, c("(xco.1 | z.1)"))) sub_formul_no_grp <- update(sub_formul, . ~ . - (1 | z.1) - (xco.1 | z.1)) ### expect_identical( dimnames(coefs_crr), list("Response categories" = tail(augdat_cats, -1), "Predictors" = colnames(model.matrix(sub_formul_no_grp, data = sub_data))), info = info_str ) # random.effects ranef_crr <- outdmin_totest[[j]]$random.effects expect_type(ranef_crr, "list") expect_length(ranef_crr, 1) expect_named(ranef_crr, NULL, info = info_str) expect_true(is.matrix(ranef_crr[[1]]), info = info_str) expect_true(is.numeric(ranef_crr[[1]]), info = info_str) if (packageVersion("Matrix") >= "1.5-0") { expect_null(dimnames(ranef_crr[[1]]), info = info_str) } else { expect_identical(dimnames(ranef_crr[[1]]), replicate(2, NULL, simplify = FALSE), info = info_str) } expect_identical(dim(ranef_crr[[1]]), c(nthres * length(coef_nms) * nlevels(dat$z.1), 1L), info = info_str) # VarCov VarCorr_crr <- outdmin_totest[[j]]$VarCov expect_type(VarCorr_crr, "list") expect_named(VarCorr_crr, "z.1", info = info_str) expect_true(is.matrix(VarCorr_crr[["z.1"]]), info = info_str) expect_true(is.numeric(VarCorr_crr[["z.1"]]), info = info_str) coef_nms_y <- unlist(lapply(coef_nms, function(coef_nm) { paste(tail(augdat_cats, -1), coef_nm, sep = "~") })) expect_identical(dimnames(VarCorr_crr[["z.1"]]), replicate(2, coef_nms_y, simplify = FALSE), info = info_str) # D D_crr <- outdmin_totest[[j]]$D expect_identical(D_crr, contrasts(as.factor(augdat_cats)), info = info_str) # random random_crr <- outdmin_totest[[j]]$random expect_type(random_crr, "list") expect_length(random_crr, 1) expect_named(random_crr, NULL, info = info_str) expect_named(random_crr[[1]], c("formula", "groups"), info = info_str) coef_nms_no_icpt <- setdiff(coef_nms, "1") if (length(coef_nms_no_icpt)) { expect_equal( random_crr[[1]]$formula, as.formula(paste("~", paste(coef_nms_no_icpt, collapse = "+"))), info = info_str ) } else { expect_equal(random_crr[[1]]$formula, ~ 1, info = info_str) } expect_identical(random_crr[[1]]$groups, "z.1", info = info_str) # groups groups_crr <- outdmin_totest[[j]]$groups expect_type(groups_crr, "list") expect_named(groups_crr, "z.1", info = info_str) expect_identical(levels(groups_crr[["z.1"]]), levels(dat$z.1), info = info_str) # xlevels xlevels_crr <- outdmin_totest[[j]]$xlevels xca_nms <- grep("^xca\\.", labels(terms(sub_formul)), value = TRUE) if (length(xca_nms)) { expect_identical(xlevels_crr, lapply(dat[xca_nms], levels), info = info_str) } else { expect_null(xlevels_crr, info = info_str) } } } else { stop("Unexpected `sub_fam` value of `", sub_fam, "`. Info: ", info_str) } } return(invisible(TRUE)) } # A helper function for testing the structure of a list of fits for the same # single submodel. # # @param outdmin_totest The `outdmin` object (a list of fits for a single # submodel, with one fit per projected draw) to test. # @param nprjdraws_expected A single numeric value giving the expected number of # projected draws. # @param sub_formul A list of formulas for the submodel (with one element per # projected draw). # @param sub_data The dataset used for fitting the submodel. # @param sub_fam A single character string giving the submodel's family. # @param has_grp A single logical value indicating whether the fits in # `outdmin_totest` are expected to be of class `lmerMod` or `glmerMod` (if, at # the same time, `has_add` is `FALSE`). # @param has_add A single logical value indicating whether the fits in # `outdmin_totest` are expected to be of class `gam` or `gamm4` (depending on # whether the submodel is non-multilevel or multilevel, respectively). # @param wobs_expected The expected numeric vector of observation weights. # @param prd_trms_vsel_L1_search If `outdmin_totest` comes from the L1 # `search_path` of an object of class `vsel`, provide here the predictor # ranking. Otherwise, use `NULL`. # @param with_offs A single logical value indicating whether `outdmin_totest` is # expected to include offsets (`TRUE`) or not (`FALSE`). # @param augdat_cats A character vector of response levels in case of the # augmented-data projection. Needs to be `NULL` for the traditional and the # latent projection. # @param allow_w_zero A single logical value indicating whether observation # weights are allowed to have a value of zero (`TRUE`) or not (`FALSE`). # @param check_y_from_resp A single logical value indicating whether to check # elements `outdmin_totest[[j]]@resp$y` for GLMMs (`TRUE`) or not (`FALSE`). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). outdmin_tester <- function( outdmin_totest, nprjdraws_expected, sub_formul, sub_data, sub_fam, has_grp = formula_contains_group_terms(sub_formul[[1]]), has_add = formula_contains_additive_terms(sub_formul[[1]]), wobs_expected = wobs_tst, prd_trms_vsel_L1_search = NULL, with_offs = FALSE, augdat_cats = NULL, allow_w_zero = FALSE, check_y_from_resp = TRUE, info_str ) { expect_type(outdmin_totest, "list") expect_length(outdmin_totest, nprjdraws_expected) seq_extensive_tests <- unique(round( seq(1, length(outdmin_totest), length.out = min(length(outdmin_totest), nclusters_pred_tst)) )) if (!is.null(augdat_cats)) { nobsv <- nobsv * length(augdat_cats) expect_length(sub_formul, 1) sub_formul <- replicate(nprjdraws_expected, sub_formul[[1]], simplify = FALSE) } if (!is.null(augdat_cats) && sub_fam %in% fam_nms_aug_long) { outdmin_tester_aug( outdmin_totest = outdmin_totest, nprjdraws_expected = nprjdraws_expected, sub_formul = sub_formul, sub_data = sub_data, sub_fam = sub_fam, has_grp = has_grp, has_add = has_add, wobs_expected = wobs_expected, with_offs = with_offs, augdat_cats = augdat_cats, allow_w_zero = allow_w_zero, check_y_from_resp = check_y_from_resp, seq_extensive_tests = seq_extensive_tests, nobsv = nobsv, info_str = info_str ) } else { outdmin_tester_trad( outdmin_totest = outdmin_totest, nprjdraws_expected = nprjdraws_expected, sub_formul = sub_formul, sub_data = sub_data, sub_fam = sub_fam, has_grp = has_grp, has_add = has_add, wobs_expected = wobs_expected, prd_trms_vsel_L1_search = prd_trms_vsel_L1_search, with_offs = with_offs, augdat_cats = augdat_cats, allow_w_zero = allow_w_zero, check_y_from_resp = check_y_from_resp, seq_extensive_tests = seq_extensive_tests, nobsv = nobsv, info_str = info_str ) } return(invisible(TRUE)) } # A helper function for testing the structure of get_refdist()'s output. # # @param refd Output of get_refdist(). # @param nprjdraws_expected A single numeric value giving the expected number of # projected draws. # @param nrefdraws_expected A single numeric value giving the expected number of # posterior draws in the reference model. # @param clust_expected A single logical value giving the expected value for # `refd$clust_used`. # @param aug_expected A single logical value indicating whether get_refdist() is # assumed to have been applied for an augmented-data projection. # @param fam_expected The name of the expected `family` (as a character string). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). refdist_tester <- function(refd, nobsv_expected = nobsv, nprjdraws_expected = nclusters_pred_tst, nrefdraws_expected = nrefdraws, clust_expected = TRUE, aug_expected = FALSE, fam_expected, info_str) { # General structure: expect_type(refd, "list") refd_nms <- c("mu", "mu_offs", "var", "dis", "wdraws_prj", "const_wdraws_prj", "nprjdraws", "cl", "wdraws_orig", "clust_used") expect_named(refd, refd_nms, info = info_str) # mu expect_equal(dim(refd$mu), c(nobsv_expected, nprjdraws_expected), info = info_str) expect_true(is.numeric(refd$mu), info = info_str) if (aug_expected) { expect_s3_class(refd$mu, "augmat") } # mu_offs expect_equal(dim(refd$mu_offs), c(nobsv_expected, nprjdraws_expected), info = info_str) expect_true(is.numeric(refd$mu_offs), info = info_str) if (aug_expected) { expect_s3_class(refd$mu_offs, "augmat") } # var expect_equal(dim(refd$var), c(nobsv_expected, nprjdraws_expected), info = info_str) if (fam_expected == "gaussian") { expect_true(is.numeric(refd$var), info = info_str) } else { expect_true(all(is.na(refd$var)), info = info_str) } if (aug_expected) { expect_s3_class(refd$var, "augmat") } # dis if (fam_expected == "gaussian") { expect_true(is.vector(refd$dis, "numeric"), info = info_str) } else { expect_true(all(is.na(refd$dis)), info = info_str) } expect_length(refd$dis, nprjdraws_expected) # wdraws_prj expect_true(is.vector(refd$wdraws_prj, "numeric"), info = info_str) expect_length(refd$wdraws_prj, nprjdraws_expected) # const_wdraws_prj expect_identical(refd$const_wdraws_prj, length(unique(refd$wdraws_prj)) == 1, info = info_str) # nprjdraws expect_equal(refd$nprjdraws, nprjdraws_expected, info = info_str) # cl expect_true(is.vector(refd$cl, "numeric"), info = info_str) expect_length(refd$cl, nrefdraws_expected) # wdraws_orig expect_identical(refd$wdraws_orig, rep(1, nrefdraws_expected), info = info_str) # clust_used expect_identical(refd$clust_used, clust_expected, info = info_str) return(invisible(TRUE)) } # A helper function for testing the structure of an expected `projection` # object. # # @param p An object of class `projection` (at least expected so). # @param prd_trms_expected Either a single numeric value giving the expected # length of the predictor ranking (not counting the intercept, even for the # intercept-only model), a character vector giving the expected predictor # ranking, or `NULL` for not testing the predictor ranking at all. # @param nprjdraws_expected A single numeric value giving the expected number of # projected draws. # @param with_clusters A single logical value indicating whether clustering was # used (`TRUE`) or not (`FALSE`). # @param seed_expected The seed which was used for clustering the posterior # draws of the reference model. # @param prjdraw_weights_expected The expected weights for the projected draws # or `NULL` for not testing these weights at all. # @param from_vsel_L1_search A single logical value indicating whether `p` uses # the L1 `search_path` of an object of class `vsel` for extracting the # subfit(s) (`TRUE`) or not (`FALSE`). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). projection_tester <- function(p, refmod_expected, prd_trms_expected, nprjdraws_expected, with_clusters, seed_expected = seed_tst, prjdraw_weights_expected = NULL, from_vsel_L1_search = FALSE, info_str = "") { expect_s3_class(p, "projection") expect_type(p, "list") # Check the names using `ignore.order = FALSE` because an incorrect # order would mean that the documentation of project()'s return value # would have to be updated: expect_named( p, c("dis", "ce", "wdraws_prj", "const_wdraws_prj", "predictor_terms", "outdmin", "cl_ref", "wdraws_ref", "clust_used", "nprjdraws", "refmodel"), info = info_str ) # refmodel # Note: Extensive tests for `refmodel`s and `datafit`s may be run via # refmodel_tester(). expect_identical(p$refmodel, refmod_expected, info = info_str) # predictor_terms if (is.numeric(prd_trms_expected)) { expect_length(p$predictor_terms, prd_trms_expected) # Same check, but using count_terms_chosen(): expect_equal(count_terms_chosen(p$predictor_terms), prd_trms_expected + 1, info = info_str) } else if (is.character(prd_trms_expected)) { expect_identical(p$predictor_terms, prd_trms_expected, info = info_str) } # outdmin sub_trms_crr <- p$predictor_terms if (length(sub_trms_crr) == 0) { sub_trms_crr <- "1" } if (!from_vsel_L1_search) { y_nm <- as.character(p$refmodel$formula)[2] prd_trms_vsel_L1_search_crr <- NULL } else { y_nm <- "" prd_trms_vsel_L1_search_crr <- p$predictor_terms } y_nms <- y_nm # For checking for the augmented-data projection case, we use the "unsafer" # `p$refmodel$family$for_augdat` here instead of an extra argument such as # `augdat_expected` because such an argument already exists in # extfam_tester(): if (!p$refmodel$family$for_augdat) { y_nms <- paste0(".", y_nms) } # A preliminary check for `nprjdraws_expected` (doesn't work for `datafit`s # and, because of issue #131, for submodels which are GAMMs): sub_formul_crr_rhs <- as.formula(paste( "~", paste(sub_trms_crr, collapse = " + ") )) if (all(grepl("\\+", sub_trms_crr))) { # Avoid duplicated terms in the "empty_size" `search_terms` setting: sub_formul_crr_rhs <- update(sub_formul_crr_rhs, . ~ .) } if (!inherits(p$refmodel, "datafit") && !(formula_contains_additive_terms(sub_formul_crr_rhs) && formula_contains_group_terms(sub_formul_crr_rhs))) { # Number of projected draws in as.matrix.projection() (note that more # extensive tests for as.matrix.projection() may be found in # "test_as_matrix.R"): expect_identical( nrow(as.matrix(p, allow_nonconst_wdraws_prj = TRUE)), nprjdraws_expected, info = info_str ) } if (!p$refmodel$family$for_augdat && nprjdraws_expected > 1) { y_nms <- paste0(y_nms, ".", seq_len(nprjdraws_expected)) } sub_formul_crr <- lapply(y_nms, function(y_nm_i) { fml_tmp <- as.formula(paste( y_nm_i, "~", paste(sub_trms_crr, collapse = " + ") )) if (all(grepl("\\+", sub_trms_crr))) { # Avoid duplicated terms in the "empty_size" `search_terms` setting: fml_tmp <- update(fml_tmp, . ~ .) } return(fml_tmp) }) sub_data_crr <- p$refmodel$fetch_data() if (with_clusters) { if (exists(".Random.seed", envir = .GlobalEnv)) { rng_state_old <- get(".Random.seed", envir = .GlobalEnv) on.exit(assign(".Random.seed", rng_state_old, envir = .GlobalEnv)) } set.seed(seed_expected) clust_ref <- get_refdist(p$refmodel, nclusters = nprjdraws_expected) } else { clust_ref <- get_refdist(p$refmodel, ndraws = nprjdraws_expected) } if (p$refmodel$family$for_augdat) { # Create the augmented dataset: y_unqs <- p$refmodel$family$cats sub_data_crr <- do.call(rbind, lapply(y_unqs, function(y_unq) { sub_data_crr_j <- sub_data_crr sub_data_crr_j[[y_nms]] <- y_unq return(sub_data_crr_j) })) sub_data_crr[[y_nms]] <- factor(sub_data_crr[[y_nms]], levels = y_unqs) } else { for (i in seq_len(nprjdraws_expected)) { sub_data_crr[[y_nms[i]]] <- clust_ref$mu[, i] } } if (p$refmodel$family$for_augdat) { wobs_expected_crr <- unclass(clust_ref$mu) } else { wobs_expected_crr <- p$refmodel$wobs } if (p$refmodel$family$for_augdat) { augdat_cats_crr <- p$refmodel$family$cats } else { augdat_cats_crr <- NULL } outdmin_tester(p$outdmin, nprjdraws_expected = nprjdraws_expected, sub_formul = sub_formul_crr, sub_data = sub_data_crr, sub_fam = p$refmodel$family$family, wobs_expected = wobs_expected_crr, prd_trms_vsel_L1_search = prd_trms_vsel_L1_search_crr, augdat_cats = augdat_cats_crr, info_str = info_str) # dis expect_length(p$dis, nprjdraws_expected) # ce expect_type(p$ce, "double") expect_length(p$ce, 1) expect_true(!is.na(p$ce), info = info_str) # wdraws_prj expect_length(p$wdraws_prj, nprjdraws_expected) if (!is.null(prjdraw_weights_expected)) { expect_identical(p$wdraws_prj, prjdraw_weights_expected, info = info_str) } if (nprjdraws_expected == 1) { expect_identical(p$wdraws_prj, 1, info = info_str) } # cl_ref expect_true(is.vector(p$cl_ref, "numeric"), info = info_str) expect_length(p$cl_ref, length(p$refmodel$wdraws_ref)) # wdraws_ref expect_identical(p$wdraws_ref, rep(1, length(p$refmodel$wdraws_ref)), info = info_str) # clust_used expect_identical(p$clust_used, with_clusters, info = info_str) # nprjdraws expect_equal(p$nprjdraws, nprjdraws_expected, info = info_str) # const_wdraws_prj expect_identical(p$const_wdraws_prj, length(unique(p$wdraws_prj)) == 1, info = info_str) return(invisible(TRUE)) } # A helper function for testing the structure of an expected `proj_list` # object. # # @param p An object of (informal) class `proj_list` (at least expected so). # @param len_expected The expected length of `p`. # @param is_seq A single logical value indicating whether `p` is expected to be # sequential (i.e., the number of predictor terms increases by 1 from one # element of `p` to the next). # @param extra_tol A single numeric value giving the relative tolerance when # checking the monotonicity of the KL divergences. Because this is a # *relative* tolerance, 1 is the neutral value. # @param info_str A single character string giving information to be printed in # case of failure. # @param ... Arguments passed to projection_tester(), apart from # projection_tester()'s arguments `p`, `prd_trms_expected`, and `info_str`. # # @return `TRUE` (invisible). proj_list_tester <- function(p, len_expected = nterms_max_tst + 1L, is_seq = TRUE, extra_tol = 1.1, info_str = "", ...) { expect_type(p, "list") expect_length(p, len_expected) expect_true(is_proj_list(p), info = info_str) for (j in seq_along(p)) { if (is_seq) { # The j-th element should have j predictor terms (not counting the # intercept, even for the intercept-only model): prd_trms_expected_crr <- j - 1 } else { prd_trms_expected_crr <- NULL } projection_tester(p[[j]], prd_trms_expected = prd_trms_expected_crr, info_str = paste(info_str, j, sep = "__"), ...) } if (is_seq) { # For a sequential `proj_list` object and training data, `ce` should be # non-increasing for increasing model size: ceseq <- sapply(p, function(x) sum(x$ce)) expect_true(all(ifelse(sign(head(ceseq, -1)) == 1, tail(ceseq, -1) <= extra_tol * head(ceseq, -1), tail(ceseq, -1) <= 1 / extra_tol * head(ceseq, -1))), info = info_str) } return(invisible(TRUE)) } # A helper function for testing the structure of an object returned by # proj_linpred(). # # @param pl An object resulting from a call to proj_linpred(). # @param len_expected The number of `projection` objects used for `pl`. # @param nprjdraws_expected The expected number of projected draws in `pl`. # @param wdraws_prj_expected The expected attribute `wdraws_prj` (containing the # weights of the projected draws) of the `pred` and `lpd` elements. # @param nobsv_expected The expected number of observations in `pl`. # @param lpd_null_expected A single logical value indicating whether output # element `lpd` is expected to be `NULL` (`TRUE`) or not (`FALSE`). # @param ncats_nlats_expected A list of length `len_expected`. If the # augmented-data projection is expected to have been applied in case of # element `j`, then element `j` has to be a single integer value giving the # number of response categories or latent response categories (depending on # whether the linear predictor was transformed to response scale or not). # Else, element `j` has to be `integer()`. # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). pl_tester <- function(pl, len_expected = 1, nprjdraws_expected = nclusters_pred_tst, wdraws_prj_expected = NULL, nobsv_expected = nobsv, lpd_null_expected = FALSE, ncats_nlats_expected = replicate(len_expected, integer(), simplify = FALSE), info_str) { if (len_expected == 1) { pl <- list(pl) } else { expect_type(pl, "list") expect_length(pl, len_expected) } for (j in seq_along(pl)) { expect_named(pl[[!!j]], c("pred", "lpd"), info = info_str) expect_identical( dim(pl[[!!j]]$pred), c(nprjdraws_expected, nobsv_expected, ncats_nlats_expected[[!!j]]), info = info_str ) expect_identical(attr(pl[[!!j]]$pred, "wdraws_prj"), wdraws_prj_expected, info = info_str) if (!lpd_null_expected) { expect_identical(dim(pl[[!!j]]$lpd), c(nprjdraws_expected, nobsv_expected), info = info_str) expect_identical(attr(pl[[!!j]]$lpd, "wdraws_prj"), wdraws_prj_expected, info = info_str) } else { expect_null(pl[[!!j]]$lpd, info = info_str) } } return(invisible(TRUE)) } # A helper function for testing the structure of an object returned by # proj_predict(). # # @param pp An object resulting from a call to proj_predict(). # @param len_expected The number of `projection` objects used for `pp`. # @param nprjdraws_out_expected The expected number of projected draws in `pp`. # In contrast to argument `nprjdraws_expected` of pl_tester(), this also needs # to take proj_predict()'s argument `nresample_clusters` into account. # @param nobsv_expected The expected number of observations in `pp`. # @param lpd_null_expected A single logical value indicating whether output # element `lpd` is expected to be `NULL` (`TRUE`) or not (`FALSE`). # @param cats_expected A list of length `len_expected`. If the # augmented-data projection is expected to have been applied in case of # element `j`, then element `j` has to be a character vector giving the # response categories. Else, element `j` has to be `NULL`. # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). pp_tester <- function(pp, len_expected = 1, nprjdraws_out_expected = nresample_clusters_default, nobsv_expected = nobsv, cats_expected = replicate(len_expected, NULL, simplify = FALSE), info_str) { if (len_expected == 1) { pp <- list(pp) } else { expect_type(pp, "list") expect_length(pp, len_expected) } for (j in seq_along(pp)) { expect_identical(dim(pp[[!!j]]), c(nprjdraws_out_expected, nobsv_expected), info = info_str) expect_identical(attr(pp[[!!j]], "cats"), cats_expected[[j]], info = info_str) } return(invisible(TRUE)) } # A helper function for testing the structure of an expected `vsel` object. # # @param vs An object of class `vsel` (at least expected so). # @param with_cv A single logical value indicating whether `vs` was created by # cv_varsel() (`TRUE`) or not (`FALSE`). # @param refmod_expected The expected `vs$refmodel` object. # @param ywtest_expected If `vs` was created with a non-`NULL` argument `d_test` # (which is only possible for varsel()), then this needs to be the expected # `vs$y_wobs_test` object. Otherwise, this needs to be `NULL`. # @param prd_trms_len_expected A single numeric value giving the expected length # of the predictor ranking (not counting the intercept, even for the # intercept-only model). # @param method_expected The expected `vs$method` object. # @param cv_method_expected The expected `vs$cv_method` object. # @param valsearch_expected The expected `vs$validate_search` object. # @param refit_prj_expected A single logical value indicating whether argument # `refit_prj` was set to `TRUE` when calling varsel() or cv_varsel() for # creating `vs` (`TRUE`) or not (`FALSE`). # @param cl_search_expected The expected `vs$clust_used_search` object. # @param cl_eval_expected The expected `vs$clust_used_eval` object. # @param nprjdraws_search_expected The expected `vs$nprjdraws_search` object. # @param nprjdraws_eval_expected The expected `vs$nprjdraws_eval` object. # @param seed_expected The seed which was used for clustering the posterior # draws of the reference model. # @param nloo_expected Only relevant if `with_cv` is `TRUE`. The value which was # used for argument `nloo` of cv_varsel(). # @param search_trms_empty_size A single logical value indicating whether # `search_terms` was constructed in a way that causes a model size to be # without candidate models. # @param extra_tol A single numeric value giving the relative tolerance when # checking the monotonicity of the KL divergences. Because this is a # *relative* tolerance, 1 is the neutral value. # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). vsel_tester <- function( vs, with_cv = FALSE, from_datafit = FALSE, refmod_expected, ywtest_expected = NULL, cvfits_expected = refmod_expected$cvfits, prd_trms_len_expected, method_expected, cv_method_expected = NULL, valsearch_expected = NULL, refit_prj_expected = !from_datafit, cl_search_expected = !from_datafit, cl_eval_expected = !from_datafit, nprjdraws_search_expected = if (from_datafit || method_expected == "L1") { 1L } else { nclusters_tst }, nprjdraws_eval_expected = if (from_datafit || (!refit_prj_expected && method_expected == "L1")) { 1L } else if (!refit_prj_expected) { nclusters_tst } else { nclusters_pred_tst }, seed_expected = seed_tst, nloo_expected = if (with_cv && !identical(cv_method_expected, "kfold")) { refmod_expected$nobs } else { NULL }, K_expected = NULL, penalty_expected = NULL, search_terms_expected = NULL, search_trms_empty_size = FALSE, search_control_expected = NULL, extra_tol = 1.1, info_str = "" ) { # Preparations: if (with_cv) { if (is.null(cv_method_expected)) { cv_method_expected <- "LOO" } if (is.null(valsearch_expected)) { valsearch_expected <- TRUE } } if (method_expected == "L1") { cl_search_expected <- !from_datafit } if (with_cv && identical(cv_method_expected, "LOO")) { if (is.null(nloo_expected) || nloo_expected > nobsv) { nloo_expected <- nobsv } } if (search_trms_empty_size) { # This is the "empty_size" setting, so we have to subtract the skipped model # size (see issue #307): prd_trms_len_expected <- prd_trms_len_expected - 1L } # Test the general structure of the object: expect_s3_class(vs, "vsel") expect_type(vs, "list") expect_named(vs, vsel_nms, info = info_str) # refmodel expect_identical(vs$refmodel, refmod_expected, info = info_str) # nobs_train expect_identical(vs$nobs_train, vs$refmodel$nobs, info = info_str) # search_path expect_type(vs$search_path, "list") expect_named(vs$search_path, c("predictor_ranking", "outdmins", "p_sel"), info = info_str) expect_identical(vs$search_path$predictor_ranking, vs$predictor_ranking, info = info_str) expect_type(vs$search_path$outdmins, "list") expect_length(vs$search_path$outdmins, prd_trms_len_expected + 1) from_vsel_L1_search <- method_expected == "L1" if (exists(".Random.seed", envir = .GlobalEnv)) { rng_state_old <- get(".Random.seed", envir = .GlobalEnv) on.exit(assign(".Random.seed", rng_state_old, envir = .GlobalEnv)) } set.seed(seed_expected) if (cl_search_expected) { clust_ref <- get_refdist(vs$refmodel, nclusters = nprjdraws_search_expected) } else { clust_ref <- get_refdist(vs$refmodel, ndraws = nprjdraws_search_expected) } if (!from_vsel_L1_search) { y_nm <- as.character(vs$refmodel$formula)[2] prd_trms_vsel_L1_search_crr <- NULL } else { y_nm <- "" prd_trms_vsel_L1_search_crr <- vs$predictor_ranking } y_nms <- y_nm # For checking for the augmented-data projection case, we use the "unsafer" # `vs$refmodel$family$for_augdat` here instead of an extra argument such as # `augdat_expected` because such an argument already exists in # extfam_tester(): if (!vs$refmodel$family$for_augdat) { y_nms <- paste0(".", y_nms) } if (!vs$refmodel$family$for_augdat && nprjdraws_search_expected > 1) { y_nms <- paste0(y_nms, ".", seq_len(nprjdraws_search_expected)) } sub_data_crr <- vs$refmodel$fetch_data() if (vs$refmodel$family$for_augdat) { # Create the augmented dataset: y_unqs <- vs$refmodel$family$cats sub_data_crr <- do.call(rbind, lapply(y_unqs, function(y_unq) { sub_data_crr_j <- sub_data_crr sub_data_crr_j[[y_nms]] <- y_unq return(sub_data_crr_j) })) sub_data_crr[[y_nms]] <- factor(sub_data_crr[[y_nms]], levels = y_unqs) } else { for (i in seq_len(nprjdraws_search_expected)) { sub_data_crr[[y_nms[i]]] <- clust_ref$mu[, i] } } if (vs$refmodel$family$for_augdat) { wobs_expected_crr <- unclass(clust_ref$mu) } else { wobs_expected_crr <- vs$refmodel$wobs } prd_trms_for_sub <- c("1", vs$predictor_ranking) for (i in seq_along(vs$search_path$outdmins)) { sub_trms_crr <- head(prd_trms_for_sub, i) if (length(sub_trms_crr) > 1) { sub_trms_crr <- setdiff(sub_trms_crr, "1") } sub_formul_crr <- lapply(y_nms, function(y_nm_i) { fml_tmp <- as.formula(paste( y_nm_i, "~", paste(sub_trms_crr, collapse = " + ") )) if (all(grepl("\\+", sub_trms_crr))) { # Avoid duplicated terms in the "empty_size" `search_terms` setting: fml_tmp <- update(fml_tmp, . ~ .) } return(fml_tmp) }) if (vs$refmodel$family$for_augdat) { augdat_cats_crr <- vs$refmodel$family$cats } else { augdat_cats_crr <- NULL } outdmin_tester( vs$search_path$outdmins[[i]], nprjdraws_expected = nprjdraws_search_expected, sub_formul = sub_formul_crr, sub_data = sub_data_crr, sub_fam = vs$refmodel$family$family, wobs_expected = wobs_expected_crr, prd_trms_vsel_L1_search = prd_trms_vsel_L1_search_crr, augdat_cats = augdat_cats_crr, info_str = paste(info_str, i, sep = "__") ) } if (!vs$refmodel$family$for_augdat) { ncats <- 1L } else { ncats <- length(vs$refmodel$family$cats) } nobsv_aug <- nobsv * ncats refdist_tester(vs$search_path$p_sel, nobsv_expected = nobsv_aug, nprjdraws_expected = nprjdraws_search_expected, nrefdraws_expected = ncol(vs$refmodel$mu), clust_expected = cl_search_expected, aug_expected = vs$refmodel$family$for_augdat, fam_expected = vs$refmodel$family$family, info_str = info_str) # predictor_ranking expect_type(vs$predictor_ranking, "character") expect_length(vs$predictor_ranking, prd_trms_len_expected) rk_fulldata <- vs$predictor_ranking for (rk_fulldata_plus in grep("\\+", rk_fulldata, value = TRUE)) { rk_fulldata <- setdiff(rk_fulldata, rk_fulldata_plus) rk_fulldata <- c(rk_fulldata, labels(terms(as.formula(paste(". ~", rk_fulldata_plus))))) } expect_true(all(rk_fulldata %in% trms_universe_split), info = info_str) # predictor_ranking_cv if (with_cv && isTRUE(vs$validate_search)) { expect_true(is.matrix(vs$predictor_ranking_cv), info = info_str) expect_type(vs$predictor_ranking_cv, "character") if (identical(cv_method_expected, "kfold")) { n_folds <- K_tst } else { n_folds <- nobsv } expect_identical(dim(vs$predictor_ranking_cv), c(n_folds, prd_trms_len_expected), info = info_str) # We need the addition of `NA_character_` because of subsampled PSIS-LOO CV: rk_cv <- unique(as.vector(vs$predictor_ranking_cv)) for (rk_cv_plus in grep("\\+", rk_cv, value = TRUE)) { rk_cv <- setdiff(rk_cv, rk_cv_plus) rk_cv <- c(rk_cv, labels(terms(as.formula(paste(". ~", rk_cv_plus))))) } expect_true( all(rk_cv %in% c(trms_universe_split, sub("^s\\((.*)\\)$", "\\1", grep("^s\\(.*\\)$", trms_universe_split, value = TRUE)), NA_character_)), info = info_str ) } else { expect_null(vs$predictor_ranking_cv, info = info_str) } # ce if (with_cv && (valsearch_expected || cv_method_expected == "kfold")) { expect_identical(vs$ce, rep(NA_real_, prd_trms_len_expected + 1)) } else { expect_type(vs$ce, "double") expect_length(vs$ce, prd_trms_len_expected + 1) # Expected to be non-increasing for increasing model size: expect_true(all(ifelse(sign(head(vs$ce, -1)) == 1, tail(vs$ce, -1) <= extra_tol * head(vs$ce, -1), tail(vs$ce, -1) <= 1 / extra_tol * head(vs$ce, -1))), info = info_str) } # type_test type_test_expected <- cv_method_expected if (length(type_test_expected) == 0) { if (is.null(ywtest_expected)) { type_test_expected <- "train" } else { type_test_expected <- "test_hold-out" } } expect_identical(vs$type_test, type_test_expected, info = info_str) # y_wobs_test if (is.null(ywtest_expected)) { expect_true(is.data.frame(vs$y_wobs_test)) expect_named(vs$y_wobs_test, nms_y_wobs_test(), info = info_str) expect_identical(vs$y_wobs_test$wobs, vs$refmodel$wobs, info = info_str) if (vs$refmodel$family$for_latent && with_cv) { if (identical(cv_method_expected, "kfold")) { expect_true(all(is.na(vs$y_wobs_test$y)), info = info_str) } else if (inherits(vs$refmodel$fit, "stanreg") && formula_contains_additive_terms(vs$refmodel$formula) && formula_contains_group_terms(vs$refmodel$formula)) { # Due to rstanarm issue stan-dev/rstanarm#604, we need to be consistent # with loo_varsel() in case of an rstanarm GAMM reference model: mu_offs_oscale_tst <- vs$refmodel$family$latent_ilink( t(vs$refmodel$mu_offs), cl_ref = seq_along(vs$refmodel$wdraws_ref), wdraws_ref = vs$refmodel$wdraws_ref ) ll_forPSIS <- vs$refmodel$family$latent_ll_oscale( mu_offs_oscale_tst, dis = vs$refmodel$dis, y_oscale = vs$refmodel$y_oscale, wobs = vs$refmodel$wobs, cl_ref = seq_along(vs$refmodel$wdraws_ref), wdraws_ref = vs$refmodel$wdraws_ref ) psisloo_tst <- suppressWarnings( loo::psis(-ll_forPSIS, cores = 1, r_eff = NA) ) y_lat_loo <- loo::E_loo( t(vs$refmodel$ref_predfun( vs$refmodel$fit, excl_offs = FALSE, mlvl_allrandom = getOption("projpred.mlvl_proj_ref_new", FALSE) )), psis_object = psisloo_tst, log_ratios = -ll_forPSIS ) expect_equal(vs$y_wobs_test$y, y_lat_loo$value, info = info_str) } else { ll_forPSIS <- rstantools::log_lik(vs$refmodel$fit) lwdraws_ref <- weights(suppressWarnings( loo::psis(-ll_forPSIS, cores = 1, r_eff = NA) )) y_lat_loo <- colSums( t(vs$refmodel$ref_predfun( vs$refmodel$fit, excl_offs = FALSE, mlvl_allrandom = getOption("projpred.mlvl_proj_ref_new", FALSE) )) * exp(lwdraws_ref) ) expect_equal(vs$y_wobs_test$y, y_lat_loo, info = info_str) } } else { expect_identical(vs$y_wobs_test$y, vs$refmodel$y, info = info_str) } expect_identical(vs$y_wobs_test$y_oscale, vs$refmodel$y_oscale, info = info_str) } else { expect_identical(vs$y_wobs_test, ywtest_expected, info = info_str) } # nobs_test expect_identical(vs$nobs_test, nrow(vs$y_wobs_test), info = info_str) # summaries smmrs_tester(vs$summaries, from_datafit = from_datafit, with_cv = with_cv, valsearch_expected = valsearch_expected, cv_method_expected = cv_method_expected, nloo_expected = nloo_expected, ncats = ncats, ywtest_expected = ywtest_expected, ref_fam = vs$refmodel$family, prd_trms_len_expected = prd_trms_len_expected, info_str = info_str) # summaries_fast if (isTRUE(vs$nloo < vs$refmodel$nobs)) { smmrs_tester(vs$summaries_fast, from_datafit = from_datafit, with_cv = with_cv, valsearch_expected = valsearch_expected, cv_method_expected = cv_method_expected, nloo_expected = nobsv, ncats = ncats, ywtest_expected = ywtest_expected, ref_fam = vs$refmodel$family, prd_trms_len_expected = prd_trms_len_expected, info_str = info_str) expect_identical(vs$summaries_fast$ref, vs$summaries$ref, info = info_str) } else { expect_null(vs$summaries_fast, info = info_str) } # nterms_all expect_identical(vs$nterms_all, count_terms_in_formula(vs$refmodel$formula) - 1L, info = info_str) # nterms_max nterms_max_expected <- prd_trms_len_expected if (search_trms_empty_size) { nterms_max_expected <- nterms_max_expected + 1 } expect_equal(vs$nterms_max, nterms_max_expected, info = info_str) # method expect_identical(vs$method, method_expected, info = info_str) # cv_method expect_identical(vs$cv_method, cv_method_expected, info = info_str) # nloo expect_identical(vs$nloo, nloo_expected, info = info_str) # K if (!is.null(K_expected)) { expect_identical(vs$K, K_expected, info = info_str) } else if (identical(cv_method_expected, "kfold")) { expect_identical(vs$K, K_tst, info = info_str) } else if (with_cv) { expect_identical(vs$K, if (from_datafit) 10 else 5, info = info_str) } else { expect_null(vs$K, info = info_str) } # validate_search expect_identical(vs$validate_search, valsearch_expected, info = info_str) # cvfits expect_identical(vs$cvfits, cvfits_expected, info = info_str) # args_search sce <- search_control_expected[!sapply(search_control_expected, is.null)] if (!length(sce)) { sce <- if (method_expected == "forward") list() else NULL } expect_equal( vs$args_search, list( method = method_expected, ndraws = NULL, nclusters = if (from_datafit && method_expected == "forward") { 20 } else if (cl_search_expected || from_datafit) { nprjdraws_search_expected } else { NULL }, nterms_max = vs$nterms_max, search_control = sce, penalty = penalty_expected, search_terms = if (is.null(search_terms_expected)) { NULL } else { union("1", search_terms_expected) } ), info = info_str ) # clust_used_search expect_equal(vs$clust_used_search, cl_search_expected, info = info_str) # clust_used_eval expect_equal(vs$clust_used_eval, cl_eval_expected, info = info_str) # nprjdraws_search expect_equal(vs$nprjdraws_search, nprjdraws_search_expected, info = info_str) # nprjdraws_eval expect_equal(vs$nprjdraws_eval, nprjdraws_eval_expected, info = info_str) # refit_prj expect_equal(vs$refit_prj, refit_prj_expected, info = info_str) # projpred_version expect_true(is.package_version(vs$projpred_version), info = info_str) return(invisible(TRUE)) } # A helper function for testing the structure of element `summaries` or # `summaries_fast` of a `vsel` object. # # @param smmrs Element `summaries` or `summaries_fast` of a `vsel` object. # @param from_datafit A single logical value indicating whether an object of # class `datafit` was used for creating the `vsel` object (`TRUE`) or not # (`FALSE`). # @param with_cv A single logical value indicating whether the `vsel` object was # created by cv_varsel() (`TRUE`) or not (`FALSE`). # @param valsearch_expected The expected element `validate_search` of the `vsel` # object. # @param cv_method_expected Either `character()` for the no-CV case or a single # character string giving the CV method (see argument `cv_method` of # cv_varsel()). # @param nloo_expected Only relevant if `with_cv` is `TRUE`. The value which was # used for argument `nloo` of cv_varsel(). # @param ncats The expected number of categories (for augmented-data and # possibly latent projection). # @param ywtest_expected If `vs` was created with a non-`NULL` argument `d_test` # (which is only possible for varsel()), then this needs to be the expected # `vs$y_wobs_test` object. Otherwise, this needs to be `NULL`. # @param ref_fam Element `family` of element `refmodel` of the `vsel` object. # @param prd_trms_len_expected A single numeric value giving the expected length # of the predictor ranking (not counting the intercept, even for the # intercept-only model). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). smmrs_tester <- function(smmrs, from_datafit, with_cv, valsearch_expected, cv_method_expected, nloo_expected, ncats, ywtest_expected, ref_fam, prd_trms_len_expected, info_str) { expect_type(smmrs, "list") expect_named(smmrs, c("sub", "ref"), info = info_str) expect_type(smmrs$sub, "list") expect_length(smmrs$sub, prd_trms_len_expected + 1) nobsv_summ <- nobsv nobsv_aug <- nobsv * ncats nobsv_summ_aug <- nobsv_aug if (!is.null(ywtest_expected)) { nobsv_summ <- nrow(ywtest_expected) nobsv_summ_aug <- nobsv_summ * ncats } if (identical(cv_method_expected, "LOO")) { # Re-order: vsel_smmrs_sub_nms[1:2] <- vsel_smmrs_sub_nms[2:1] vsel_smmrs_ref_nms[1:2] <- vsel_smmrs_ref_nms[2:1] } if (ref_fam$for_latent) { vsel_smmrs_sub_nms <- c(vsel_smmrs_sub_nms, "oscale") vsel_smmrs_ref_nms <- c(vsel_smmrs_ref_nms, "oscale") } smmrs_sub_j_tester <- function(smmrs_sub_j, tests_oscale = FALSE) { if (tests_oscale) { vsel_smmrs_sub_nms <- setdiff(vsel_smmrs_sub_nms, "oscale") if (!is.null(ref_fam$cats)) { ncats <- length(ref_fam$cats) } else { ncats <- 1L } nobsv_summ_aug <- nobsv_summ * ncats } expect_type(smmrs_sub_j, "list") expect_named(smmrs_sub_j, vsel_smmrs_sub_nms, info = info_str) expect_type(smmrs_sub_j$mu, "double") expect_length(smmrs_sub_j$mu, nobsv_summ_aug) if (ref_fam$for_augdat || (ref_fam$for_latent && tests_oscale && !is.null(ref_fam$cats))) { expect_s3_class(smmrs_sub_j$mu, "augvec") } if (with_cv && valsearch_expected && identical(cv_method_expected, "LOO")) { expect_identical(sum(!is.na(smmrs_sub_j$mu)), nloo_expected * ncats, info = info_str) } else { expect_true(all(!is.na(smmrs_sub_j$mu)), info = info_str) } expect_type(smmrs_sub_j$lppd, "double") expect_length(smmrs_sub_j$lppd, nobsv_summ) if (with_cv) { if (ref_fam$for_latent && !tests_oscale && identical(cv_method_expected, "kfold")) { expect_true(all(is.na(smmrs_sub_j$lppd)), info = info_str) } else if (valsearch_expected && identical(cv_method_expected, "LOO")) { expect_identical(sum(!is.na(smmrs_sub_j$lppd)), nloo_expected, info = info_str) } } else { expect_true(all(!is.na(smmrs_sub_j$lppd)), info = info_str) } return(invisible(TRUE)) } for (j in seq_along(smmrs$sub)) { smmrs_sub_j_tester(smmrs$sub[[j]]) if (ref_fam$for_latent) { smmrs_sub_j_tester(smmrs$sub[[j]]$oscale, tests_oscale = TRUE) } } smmrs_ref_tester <- function(smmrs_ref, tests_oscale = FALSE) { if (tests_oscale) { vsel_smmrs_ref_nms <- setdiff(vsel_smmrs_ref_nms, "oscale") if (!is.null(ref_fam$cats)) { ncats <- length(ref_fam$cats) } else { ncats <- 1L } nobsv_summ_aug <- nobsv_summ * ncats } expect_type(smmrs_ref, "list") expect_named(smmrs_ref, vsel_smmrs_ref_nms, info = info_str) if (!from_datafit) { expect_type(smmrs_ref$mu, "double") } expect_length(smmrs_ref$mu, nobsv_summ_aug) if (ref_fam$for_augdat || (ref_fam$for_latent && tests_oscale && !is.null(ref_fam$cats))) { expect_s3_class(smmrs_ref$mu, "augvec") } if (!from_datafit) { expect_true(all(!is.na(smmrs_ref$mu)), info = info_str) } else { expect_true(all(is.na(smmrs_ref$mu)), info = info_str) } if (!from_datafit) { expect_type(smmrs_ref$lppd, "double") } expect_length(smmrs_ref$lppd, nobsv_summ) if (!from_datafit && !(ref_fam$for_latent && !tests_oscale && identical(cv_method_expected, "kfold"))) { expect_true(all(!is.na(smmrs_ref$lppd)), info = info_str) } else { expect_true(all(is.na(smmrs_ref$lppd)), info = info_str) } return(invisible(TRUE)) } smmrs_ref_tester(smmrs$ref) if (ref_fam$for_latent) { smmrs_ref_tester(smmrs$ref$oscale, tests_oscale = TRUE) } return(invisible(TRUE)) } # A helper function for testing the structure of an object as returned by # summary.vsel(). # # @param smmry An object as returned by summary.vsel(). # @param vsel_expected The `vsel` object which was used in the summary.vsel() # call. # @param nterms_max_expected A single numeric value as supplied to # summary.vsel()'s argument `nterms_max`. # @param resp_oscale_expected A single logical value indicating whether # element `resp_oscale` is expected to be `TRUE` or `FALSE`. # @param search_trms_empty_size A single logical value indicating whether # `search_terms` was constructed in a way that causes a model size to be # without candidate models. # @param cumul_expected A single logical value indicating whether argument # `cumulate` of summary.vsel() was set to `TRUE` or `FALSE`. # @param info_str A single character string giving information to be printed in # case of failure. # @param ... Arguments passed to smmry_sub_tester() / smmry_sub_tester(), apart # from smmry_sub_tester() / smmry_ref_tester()'s arguments `smmry_sub` / # `smmry_ref`, `nterms_max_expected`, and `info_str`. # # @return `TRUE` (invisible). smmry_tester <- function(smmry, vsel_expected, nterms_max_expected = NULL, resp_oscale_expected = TRUE, search_trms_empty_size = FALSE, cumul_expected = FALSE, info_str, ...) { expect_s3_class(smmry, "vselsummary") expect_type(smmry, "list") expect_named( smmry, c("formula", "family", "nobs_train", "type_test", "nobs_test", "method", "cv_method", "nloo", "K", "validate_search", "clust_used_search", "clust_used_eval", "nprjdraws_search", "nprjdraws_eval", "refit_prj", "search_included", "nterms", "perf_sub", "perf_ref", "resp_oscale", "deltas", "cumulate"), info = info_str ) expect_null(smmry$fit, info = info_str) expect_identical(smmry$formula, vsel_expected$refmodel$formula, info = info_str) expect_identical(smmry$family, vsel_expected$refmodel$family, info = info_str) for (nm in c( "nobs_train", "type_test", "nobs_test", "method", "cv_method", "nloo", "K", "validate_search", "clust_used_search", "clust_used_eval", "nprjdraws_search", "nprjdraws_eval", "refit_prj" )) { expect_identical(smmry[[nm]], vsel_expected[[nm]], info = paste(info_str, nm, sep = "__")) } expect_true( smmry$search_included %in% c( "search included (i.e., fold-wise searches)", "search not included (i.e., a full-data search only)" ), info = info_str ) expect_identical( smmry$search_included == "search included (i.e., fold-wise searches)", isTRUE(vsel_expected$validate_search), info = info_str ) if (is.null(nterms_max_expected)) { nterms_ch <- vsel_expected$nterms_max } else { nterms_ch <- nterms_max_expected } if (search_trms_empty_size) { # This is the "empty_size" setting, so we have to subtract the skipped model # size (see issue #307): nterms_ch <- nterms_ch - 1 } expect_equal(smmry$nterms, nterms_ch, info = info_str) if (isTRUE(vsel_expected$validate_search)) { pr_rk_diag_expected <- head(diag(cv_proportions(vsel_expected, cumulate = cumul_expected)), nterms_ch) } else { pr_rk_diag_expected <- rep(NA, nterms_ch) } smmry_sub_tester(smmry$perf_sub, summaries_ref = vsel_expected$summaries$ref, nterms_max_expected = nterms_max_expected, latent_expected = vsel_expected$refmodel$family$for_latent, resp_oscale_expected = resp_oscale_expected, pr_rk_diag_expected = pr_rk_diag_expected, info_str = info_str, ...) smmry_ref_tester(smmry$perf_ref, latent_expected = vsel_expected$refmodel$family$for_latent, resp_oscale_expected = resp_oscale_expected, info_str = info_str, ...) expect_identical(smmry$resp_oscale, resp_oscale_expected, info = info_str) expect_identical(smmry$deltas, FALSE, info = info_str) expect_identical(smmry$cumulate, cumul_expected, info = info_str) return(invisible(TRUE)) } # A helper function for testing the structure of a `data.frame` as returned by # summary.vsel() in its output element `perf_sub`. # # @param smmry_sub A `data.frame` as returned by summary.vsel() in its output # element `perf_sub`. # @param summaries_ref The reference model's summaries, as stored in # `$summaries$ref`. # @param stats_expected A character vector of expected `stats` (see the # corresponding argument of summary.vsel()). Use `NULL` for the default. # @param type_expected A character vector of expected `type`s (see the # corresponding argument of summary.vsel()). Use `NULL` for the default. # @param nterms_max_expected A single numeric value as supplied to # summary.vsel()'s argument `nterms_max`. # @param cv_method_expected Either `character()` for the no-CV case or a single # character string giving the CV method (see argument `cv_method` of # cv_varsel()). # @param prd_trms_expected A character vector giving the expected predictor # ranking (not counting the intercept, even for the intercept-only model). # @param from_datafit A single logical value indicating whether an object of # class `datafit` was used for creating the `vsel` object (from which # `smmry_sub` was created) (`TRUE`) or not (`FALSE`). # @param latent_expected A single logical value indicating whether the reference # model is expected to be for latent projection (`TRUE`) or not (`FALSE`). # @param resp_oscale_expected A single logical value indicating whether argument # `resp_oscale` of summary.vsel() was set to `TRUE` or `FALSE`. # @param pr_rk_diag_expected A numeric vector giving the expected values for # column `cv_proportions_diag` of `smmry_sub` (except for the first one). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). smmry_sub_tester <- function( smmry_sub, summaries_ref, stats_expected = NULL, type_expected = NULL, nterms_max_expected = NULL, cv_method_expected = character(), prd_trms_expected, from_datafit = FALSE, latent_expected = FALSE, resp_oscale_expected = TRUE, pr_rk_diag_expected = rep( NA, nterms_max_expected %||% length(prd_trms_expected) ), info_str ) { if (is.null(stats_expected)) { stats_expected <- "elpd" } if (is.null(type_expected)) { type_expected <- c("mean", "se", "diff", "diff.se") } if (is.null(nterms_max_expected)) { nterms_max_expected <- length(prd_trms_expected) } else { prd_trms_expected <- head(prd_trms_expected, nterms_max_expected) } expect_s3_class(smmry_sub, "data.frame") # Rows: expect_identical(nrow(smmry_sub), nterms_max_expected + 1L, info = info_str) # Columns: smmry_nms <- c("size", "ranking_fulldata", "cv_proportions_diag") stats_mean_name <- stats_expected smmry_nms <- c(smmry_nms, sapply(seq_along(stats_expected), function(stat_idx) { c(stats_mean_name[stat_idx], paste(stats_expected[stat_idx], setdiff(type_expected, "mean"), sep = ".")) })) expect_named(smmry_sub, smmry_nms, info = info_str) # Columns in detail: expect_identical(smmry_sub$size, seq_len(nrow(smmry_sub)) - 1, info = info_str) expect_identical(smmry_sub$ranking_fulldata, c("(Intercept)", prd_trms_expected), info = info_str) expect_identical(smmry_sub$cv_proportions_diag, c(NA, pr_rk_diag_expected), info = info_str) is_lat_kfold <- latent_expected && !resp_oscale_expected && identical(cv_method_expected, "kfold") if (is_lat_kfold) { for (stat_idx in seq_along(stats_expected)) { expect_true(all(is.na(smmry_sub[, stats_mean_name[stat_idx]])), info = info_str) } } if ("diff" %in% type_expected) { diff_nm <- paste(stats_expected, "diff", sep = ".") for (stat_idx in seq_along(stats_expected)) { if (!from_datafit && !is_lat_kfold) { if (stats_expected[stat_idx] != "gmpd") { # A very basic test: expect_equal( diff(smmry_sub[, stats_mean_name[stat_idx]]), diff(smmry_sub[, diff_nm[stat_idx]]), info = info_str ) } else { # A very basic test: expect_equal( exp(diff(log(smmry_sub[, stats_mean_name[stat_idx]]))), exp(diff(log(smmry_sub[, diff_nm[stat_idx]]))), info = info_str ) } if ("oscale" %in% names(summaries_ref) && resp_oscale_expected) { summaries_ref <- summaries_ref[["oscale"]] } if (stats_expected[stat_idx] == "elpd") { stat_ref <- sum(summaries_ref$lppd) } else if (stats_expected[stat_idx] %in% c("mlpd", "gmpd")) { stat_ref <- mean(summaries_ref$lppd) } else { # TODO: Implement `stat_ref` for the remaining `stats`. stat_ref <- NULL } if (!is.null(stat_ref)) { if (stats_expected[stat_idx] != "gmpd") { expect_equal( smmry_sub[, stats_mean_name[stat_idx]] - stat_ref, smmry_sub[, diff_nm[stat_idx]], tolerance = 1e-12, info = info_str ) } else { expect_equal( exp(log(smmry_sub[, stats_mean_name[stat_idx]]) - stat_ref), smmry_sub[, diff_nm[stat_idx]], tolerance = 1e-12, info = info_str ) } } } else { expect_true(all(is.na(smmry_sub[, diff_nm[stat_idx]])), info = info_str) } } } if ("lower" %in% type_expected && !is_lat_kfold) { lower_nm <- paste(stats_expected, "lower", sep = ".") for (stat_idx in seq_along(stats_expected)) { if (!stats_expected[stat_idx] %in% c("auc")) { # AUC is excluded here because of PR #347 (originally, RMSE was excluded # as well, but PR #496 switched to the delta method for RMSE). expect_true(all(smmry_sub[, stats_mean_name[stat_idx]] >= smmry_sub[, lower_nm[stat_idx]]), info = info_str) } } } if ("upper" %in% type_expected && !is_lat_kfold) { upper_nm <- paste(stats_expected, "upper", sep = ".") for (stat_idx in seq_along(stats_expected)) { if (!stats_expected[stat_idx] %in% c("auc")) { # AUC is excluded here because of PR #347 (originally, RMSE was excluded # as well, but PR #496 switched to the delta method for RMSE). expect_true(all(smmry_sub[, stats_mean_name[stat_idx]] <= smmry_sub[, upper_nm[stat_idx]]), info = info_str) } } } return(invisible(TRUE)) } # A helper function for testing the structure of a `data.frame` as returned by # summary.vsel() in its output element `perf_ref`. # # @param smmry_ref A vector as returned by summary.vsel() in its output element # `perf_ref`. # @param stats_expected A character vector of expected `stats` (see the # corresponding argument of summary.vsel()). Use `NULL` for the default. # @param type_expected A character vector of expected `type`s (see the # corresponding argument of summary.vsel()). Use `NULL` for the default. # @param cv_method_expected Either `character()` for the no-CV case or a single # character string giving the CV method (see argument `cv_method` of # cv_varsel()). # @param from_datafit A single logical value indicating whether an object of # class `datafit` was used for creating the `vsel` object (from which # `smmry_sub` was created) (`TRUE`) or not (`FALSE`). # @param latent_expected A single logical value indicating whether the reference # model is expected to be for latent projection (`TRUE`) or not (`FALSE`). # @param resp_oscale_expected A single logical value indicating whether argument # `resp_oscale` of summary.vsel() was set to `TRUE` or `FALSE`. # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). smmry_ref_tester <- function( smmry_ref, stats_expected = NULL, type_expected = NULL, cv_method_expected = character(), from_datafit = FALSE, latent_expected = FALSE, resp_oscale_expected = TRUE, info_str, ... ) { if (is.null(stats_expected)) { stats_expected <- "elpd" } if (is.null(type_expected)) { type_expected <- c("mean", "se", "diff", "diff.se") } type_expected <- setdiff(type_expected, c("diff", "diff.se")) is_lat_kfold <- latent_expected && !resp_oscale_expected && identical(cv_method_expected, "kfold") expect_true(is.vector(smmry_ref, "numeric"), info = info_str) smmry_nms <- character() stats_mean_name <- stats_expected smmry_nms <- c(smmry_nms, sapply(seq_along(stats_expected), function(stat_idx) { c(stats_mean_name[stat_idx], paste(stats_expected[stat_idx], setdiff(type_expected, "mean"), sep = ".")) })) expect_named(smmry_ref, smmry_nms, info = info_str) # Elements in detail: if (is_lat_kfold) { for (stat_idx in seq_along(stats_expected)) { expect_true(all(is.na(smmry_ref[stats_mean_name[stat_idx]])), info = info_str) } } if ("lower" %in% type_expected && !is_lat_kfold && !from_datafit) { lower_nm <- paste(stats_expected, "lower", sep = ".") for (stat_idx in seq_along(stats_expected)) { if (!stats_expected[stat_idx] %in% c("auc")) { # AUC is excluded here because of PR #347 (originally, RMSE was excluded # as well, but PR #496 switched to the delta method for RMSE). expect_true(all(smmry_ref[stats_mean_name[stat_idx]] >= smmry_ref[lower_nm[stat_idx]]), info = info_str) } } } if ("upper" %in% type_expected && !is_lat_kfold && !from_datafit) { upper_nm <- paste(stats_expected, "upper", sep = ".") for (stat_idx in seq_along(stats_expected)) { if (!stats_expected[stat_idx] %in% c("auc")) { # AUC is excluded here because of PR #347 (originally, RMSE was excluded # as well, but PR #496 switched to the delta method for RMSE). expect_true(all(smmry_ref[stats_mean_name[stat_idx]] <= smmry_ref[upper_nm[stat_idx]]), info = info_str) } } } return(invisible(TRUE)) } # A helper function for testing the structure of the return value of # performances(). # # @param perf_vsel The return value of performances(). # @param smmry_expected The `vselsummary` object which was used in the # performances() call. # @param info_str A single character string giving information to be printed in # case of failure and also for naming vdiffr::expect_doppelganger() output. # # @return `TRUE` (invisible). performances_tester <- function( perf_vsel, smmry_expected, info_str ) { expect_type(perf_vsel, "list") expect_named(perf_vsel, c("submodels", "reference_model"), info = info_str) # submodels smmry_expected_sub <- smmry_expected[["perf_sub"]] smmry_expected_sub <- smmry_expected_sub[ , -grep("ranking_fulldata|cv_proportions_diag", names(smmry_expected_sub)) ] expect_identical(perf_vsel[["submodels"]], smmry_expected_sub, info = info_str) # reference_model expect_identical(perf_vsel[["reference_model"]], smmry_expected[["perf_ref"]], info = info_str) return(invisible(TRUE)) } # A helper function for testing the structure of the return value of # plot.vsel(). # # @param plot_vsel The return value of plot.vsel(). # @param nterms_max_expected The value that was passed to argument `nterms_max` # of plot.vsel(). # @param rk_max_expected The value that was passed to argument # `ranking_nterms_max` of plot.vsel(). # @param rk_expected The full-data predictor ranking that is expected to have # been used in `plot_vsel`. # @param abbv_expected The value that was passed to argument # `ranking_abbreviate` of plot.vsel(). # @param abbv_args_expected The `list` that was passed to argument # `ranking_abbreviate_args` of plot.vsel(). # @param info_str A single character string giving information to be printed in # case of failure and also for naming vdiffr::expect_doppelganger() output. # # @return `TRUE` (invisible). plot_vsel_tester <- function( plot_vsel, nterms_max_expected = NULL, rk_max_expected = NULL, rk_expected, abbv_expected = NULL, abbv_args_expected = list(), info_str ) { expect_s3_class(plot_vsel, c("gg", "ggplot")) if (isTRUE(abbv_expected) && (is.null(rk_max_expected) || !is.na(rk_max_expected))) { if (!is.null(rk_max_expected)) { rk_expected <- head(rk_expected, rk_max_expected) } else if (!is.null(nterms_max_expected)) { rk_expected <- head(rk_expected, nterms_max_expected) } attr_abbv_expected <- do.call( abbreviate, c(list(names.arg = c("(Intercept)", rk_expected)), abbv_args_expected) ) } else { attr_abbv_expected <- NULL } expect_identical(attr(plot_vsel, "projpred_ranking_abbreviated"), attr_abbv_expected, info = info_str) if (run_snaps) { vdiffr::expect_doppelganger(info_str, plot_vsel) } return(invisible(TRUE)) } # A helper function for testing the structure of a `ranking` object as returned # by ranking(). # # @param rk A `ranking` object as returned by ranking(). # @param fulldata_expected The expected `fulldata` element of `rk`. # @param foldwise_expected The expected `foldwise` element of `rk`. # @param nterms_max_expected A single numeric value as supplied to # ranking.vsel()'s argument `nterms_max`. # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). ranking_tester <- function(rk, fulldata_expected, foldwise_expected, nterms_max_expected, info_str) { expect_s3_class(rk, "ranking", exact = TRUE) expect_type(rk, "list") expect_named(rk, c("fulldata", "foldwise"), info = info_str) # Since we test elements `predictor_ranking` and `predictor_ranking_cv` # already thoroughly in vsel_tester(), we can simply plug these into # `fulldata_expected` and `foldwise_expected` and then test via identical() # (after taking `nterms_max` into account): if (!is.null(nterms_max_expected)) { fulldata_expected <- head(fulldata_expected, nterms_max_expected) if (!is.null(foldwise_expected)) { foldwise_expected <- foldwise_expected[, seq_len(nterms_max_expected), drop = FALSE] } } expect_identical(rk[["fulldata"]], fulldata_expected, info = info_str) expect_identical(rk[["foldwise"]], foldwise_expected, info = info_str) return(invisible(TRUE)) } # A helper function for testing the structure of a `cv_proportions` object as # returned by cv_proportions(). # # @param pr A `cv_proportions` object as returned by cv_proportions(). # @param cumulate_expected A single logical value indicating whether # cv_proportions() was run with `cumulate = TRUE` (`TRUE`) or not (`FALSE`). # @param nterms_max_expected A single numeric value as supplied to the # underlying ranking.vsel() call's argument `nterms_max`, but must not be # `NULL`. # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). cv_proportions_tester <- function(pr, cumulate_expected = FALSE, nterms_max_expected, cnms_expected, info_str) { classes_expected <- "cv_proportions" if (cumulate_expected) { classes_expected <- c("cv_proportions_cumul", classes_expected) } expect_s3_class(pr, classes_expected, exact = TRUE) expect_equal(dim(pr), c(nterms_max_expected, nterms_max_expected), info = info_str) expect_true(is.numeric(pr), info = info_str) rnms_expected <- as.character(seq_len(nterms_max_expected)) if (cumulate_expected) { rnms_expected <- paste0("<=", rnms_expected) } if (length(cnms_expected) > nterms_max_expected) { cnms_expected <- head(cnms_expected, nterms_max_expected) } expect_identical(dimnames(pr), list("size" = rnms_expected, "predictor" = cnms_expected), info = info_str) return(invisible(TRUE)) }