test_that("selectboost_quantile returns a well-formed object", { sim <- simulate_quantile_data(n = 80, p = 12, active = 1:3, seed = 11) fit <- selectboost_quantile( sim$x, sim$y, tau = 0.5, B = 6, step_num = 0.5, seed = 99, verbose = FALSE ) expect_s3_class(fit, "selectboost_quantile") expect_true(is.matrix(fit$frequencies)) expect_equal(ncol(fit$frequencies), ncol(sim$x)) expect_equal(nrow(fit$frequencies), length(fit$c0_seq)) expect_true(all(fit$frequencies >= 0)) expect_true(all(fit$frequencies <= 1)) expect_true("(Intercept)" %in% names(fit$baseline)) }) test_that("multi-tau fits return one result per tau", { sim <- simulate_quantile_data(n = 70, p = 10, active = 1:3, seed = 14) fit <- selectboost_quantile( sim$x, sim$y, tau = c(0.25, 0.5, 0.75), B = 4, step_num = 0.5, seed = 14, verbose = FALSE ) expect_s3_class(fit, "selectboost_quantile") expect_true(is.list(fit$frequencies)) expect_equal(length(fit$frequencies), 3) expect_identical(names(fit$frequencies), c("tau = 0.25", "tau = 0.5", "tau = 0.75")) expect_true(all(vapply(fit$frequencies, is.matrix, logical(1)))) }) test_that("group functions return valid neighborhoods", { abs_corr <- matrix( c( 1.0, 0.8, 0.1, 0.8, 1.0, 0.2, 0.1, 0.2, 1.0 ), nrow = 3, byrow = TRUE ) neighbors <- group_neighbors(abs_corr, c0 = 0.5) components <- group_components(abs_corr, c0 = 0.5) expect_equal(neighbors[[1]], c(1L, 2L)) expect_equal(neighbors[[2]], c(1L, 2L)) expect_equal(neighbors[[3]], 3L) expect_equal(components[[1]], c(1L, 2L)) expect_equal(components[[2]], c(1L, 2L)) expect_equal(components[[3]], 3L) }) test_that("active variables tend to rank near the top", { sim <- simulate_quantile_data(n = 120, p = 20, active = 1:4, seed = 5) fit <- selectboost_quantile( sim$x, sim$y, tau = 0.5, B = 8, step_num = 0.5, seed = 5, verbose = FALSE ) ranked <- names(sort(colMeans(fit$frequencies), decreasing = TRUE))[1:6] expect_true(length(intersect(ranked, sim$active)) >= 2) }) test_that("summary preserves variable names in stable support", { sim <- simulate_quantile_data(n = 100, p = 16, active = 1:4, seed = 8) fit <- selectboost_quantile( sim$x, sim$y, tau = 0.5, B = 8, step_num = 0.5, seed = 8, verbose = FALSE ) smry <- summary(fit, threshold = 0.3) expect_true(is.character(smry$stable_support)) expect_true(length(smry$stable_support) >= 1) expect_true(all(smry$stable_support %in% colnames(fit$frequencies))) }) test_that("hybrid summary downweights weak-effect frequent variables", { freq <- matrix( c( 0.95, 0.92, 0.93, 0.94, 0.91, 0.92 ), nrow = 2, byrow = TRUE, dimnames = list(c("c0 = 1", "c0 = 0"), c("x1", "x2", "x3")) ) baseline <- c("(Intercept)" = 0, x1 = 2, x2 = 0.05, x3 = 1) sm_freq <- SelectBoost.quantile:::selectboost_summary_single( freq = freq, c0_seq = c(1, 0), threshold = 0.6, enforce_monotone = TRUE, tau = 0.5, baseline = baseline, selection_metric = "frequency" ) sm_hybrid <- SelectBoost.quantile:::selectboost_summary_single( freq = freq, c0_seq = c(1, 0), threshold = 0.6, enforce_monotone = TRUE, tau = 0.5, baseline = baseline, selection_metric = "hybrid" ) expect_true("x2" %in% sm_freq$stable_support) expect_false("x2" %in% sm_hybrid$stable_support) expect_gt(sm_hybrid$selection_score[["x1"]], sm_hybrid$selection_score[["x2"]]) }) test_that("support helper uses hybrid selection logic by default", { freq <- matrix( c( 0.95, 0.92, 0.93, 0.94, 0.91, 0.92 ), nrow = 2, byrow = TRUE, dimnames = list(c("c0 = 1", "c0 = 0"), c("x1", "x2", "x3")) ) object <- structure( list( frequencies = freq, baseline = c("(Intercept)" = 0, x1 = 2, x2 = 0.05, x3 = 1), c0_seq = c(1, 0), tau = 0.5, predictors = c("x1", "x2", "x3") ), class = "selectboost_quantile" ) support_hybrid <- support_selectboost_quantile(object, threshold = 0.6) support_frequency <- support_selectboost_quantile( object, threshold = 0.6, selection_metric = "frequency" ) expect_equal(support_hybrid, c("x1", "x3")) expect_true("x2" %in% support_frequency) }) test_that("directional fits are cached across repeated neighborhoods", { sim <- simulate_quantile_data(n = 60, p = 8, active = 1:3, seed = 4) prepared <- SelectBoost.quantile:::prepare_design(sim$x, sim$y, standardize = TRUE) stages <- SelectBoost.quantile:::prepare_selectboost_stages(prepared$x_selectboost) groups <- SelectBoost.quantile:::build_groups( abs_corr = stages$abs_corr, c0 = 0.5, group_fun = group_components ) cache <- SelectBoost.quantile:::make_directional_cache() first <- SelectBoost.quantile:::fit_directional_neighborhoods( projected = stages$projected, corr_sign = stages$corr_sign, groups = groups, cache = cache ) size_first <- SelectBoost.quantile:::directional_cache_size(first$cache) second <- SelectBoost.quantile:::fit_directional_neighborhoods( projected = stages$projected, corr_sign = stages$corr_sign, groups = groups, cache = first$cache ) size_second <- SelectBoost.quantile:::directional_cache_size(second$cache) expect_gt(size_first, 0) expect_equal(size_second, size_first) }) test_that("formula interface and lambda tuning work together", { sim <- simulate_quantile_data(n = 70, p = 10, active = 1:3, seed = 21) dat <- data.frame(y = sim$y, sim$x) fit <- selectboost_quantile( y ~ ., data = dat, tau = 0.5, B = 4, step_num = 0.5, tune_lambda = "bic", nlambda = 5, seed = 7, verbose = FALSE ) expect_s3_class(fit, "selectboost_quantile") expect_s3_class(fit$lambda_tuning, "tuned_lambda_quantile") expect_true(length(fit$lambda) == ncol(sim$x) + 1) expect_true(all(sim$active %in% names(fit$baseline))) }) test_that("predict and support helpers work for formula fits with factors", { set.seed(123) dat <- data.frame( y = rnorm(80), grp = factor(sample(c("a", "b", "c"), 80, replace = TRUE)), z1 = rnorm(80), z2 = rnorm(80) ) dat$y <- 0.5 * (dat$grp == "b") - 0.7 * (dat$grp == "c") + dat$z1 + rnorm(80) fit <- selectboost_quantile( y ~ grp + z1 + z2, data = dat, tau = c(0.25, 0.5), B = 3, step_num = 1, tune_lambda = "bic", nlambda = 4, seed = 123, verbose = FALSE ) newdat <- data.frame( grp = factor(c("a", "c"), levels = levels(dat$grp)), z1 = c(0, 1), z2 = c(1, -1) ) pred <- predict(fit, newdata = newdat) supp <- support_selectboost_quantile(fit, tau = 0.25, threshold = 0.5) cf <- coef(fit, tau = 0.5, threshold = 0.5) expect_true(is.matrix(pred)) expect_equal(dim(pred), c(2, 2)) expect_true(is.character(supp)) expect_true(is.numeric(cf)) expect_true("(Intercept)" %in% names(cf)) }) test_that("alternative grouping rules work in the main workflow", { sim <- simulate_quantile_data(n = 70, p = 10, active = 1:3, seed = 12) fit <- selectboost_quantile( sim$x, sim$y, tau = 0.5, B = 4, step_num = 0.5, group = group_components, seed = 12, verbose = FALSE ) expect_s3_class(fit, "selectboost_quantile") expect_identical(fit$group, "group_components") expect_true(fit$directional_cache_size >= 0) }) test_that("screening retains a reduced design but expands outputs to the full predictor set", { sim <- simulate_quantile_data(n = 50, p = 80, active = 1:4, seed = 31) fit <- selectboost_quantile( sim$x, sim$y, tau = 0.5, B = 2, step_num = 1, screen = "quantile_rank", screen_size = 15, seed = 31, verbose = FALSE ) expect_equal(fit$screen, "quantile_rank") expect_equal(length(fit$screened_predictors), 15) expect_equal(length(fit$screened_out), ncol(sim$x) - 15) expect_equal(ncol(fit$frequencies), ncol(sim$x)) expect_true(all(fit$baseline[fit$screened_out] == 0)) }) test_that("lambda tuning supports stronger rules and inflation", { sim <- simulate_quantile_data(n = 60, p = 8, active = 1:2, seed = 32) tuned <- tune_lambda_quantile( sim$x, sim$y, tau = 0.5, method = "cv", rule = "one_se", lambda_inflation = 1.5, nlambda = 4, folds = 3, repeats = 2, seed = 32, verbose = FALSE ) smry <- summary(tuned) expect_equal(tuned$rule, "one_se") expect_equal(tuned$lambda_inflation, 1.5) expect_equal(tuned$factor, tuned$selected_factor * tuned$lambda_inflation) expect_true(all(c("se", "rule", "lambda_inflation") %in% colnames(smry))) expect_equal(sum(smry$selected), 1) }) test_that("stability selection aggregates over complementary pairs", { sim <- simulate_quantile_data(n = 60, p = 12, active = 1:3, seed = 33) fit <- selectboost_quantile( sim$x, sim$y, tau = 0.5, B = 2, step_num = 1, subsamples = 3, sample_fraction = 0.5, complementary_pairs = TRUE, seed = 33, verbose = FALSE ) expect_equal(fit$subsamples, 3) expect_true(fit$complementary_pairs) expect_equal(fit$sample_fraction, 0.5) expect_true(all(fit$frequencies >= 0)) expect_true(all(fit$frequencies <= 1)) }) test_that("neighborhood caps retain the anchor and strongest correlations", { abs_corr <- matrix( c( 1.0, 0.9, 0.8, 0.2, 0.9, 1.0, 0.7, 0.1, 0.8, 0.7, 1.0, 0.6, 0.2, 0.1, 0.6, 1.0 ), nrow = 4, byrow = TRUE ) capped <- SelectBoost.quantile:::build_groups( abs_corr = abs_corr, c0 = 0.1, group_fun = group_neighbors, max_group_size = 2 ) expect_equal(capped[[1]], c(1L, 2L)) expect_equal(capped[[3]], c(1L, 3L)) expect_true(all(vapply(capped, length, integer(1)) <= 2)) }) test_that("tune_lambda_quantile returns a penalty profile", { sim <- simulate_quantile_data(n = 60, p = 8, active = 1:2, seed = 9) tuned <- tune_lambda_quantile( sim$x, sim$y, tau = 0.5, method = "cv", nlambda = 4, folds = 3, seed = 9, verbose = FALSE ) expect_s3_class(tuned, "tuned_lambda_quantile") expect_true(is.numeric(tuned$lambda)) expect_equal(length(tuned$lambda), ncol(sim$x) + 1) expect_true(is.numeric(tuned$score)) }) test_that("multi-tau tuning returns inspectable summaries", { sim <- simulate_quantile_data(n = 60, p = 8, active = 1:2, seed = 19) tuned <- tune_lambda_quantile( sim$x, sim$y, tau = c(0.25, 0.5), method = "cv", nlambda = 4, folds = 3, repeats = 2, seed = 19, verbose = FALSE ) smry <- summary(tuned) expect_s3_class(tuned, "tuned_lambda_quantile_multi") expect_true(is.data.frame(smry)) expect_true(all(c("tau", "factor", "score", "selected") %in% colnames(smry))) }) test_that("benchmark scenario presets expand over regimes and tau", { scenarios <- default_quantile_benchmark_scenarios( tau = c(0.25, 0.5), regimes = c("moderate_corr", "heavy_tail") ) expect_true(is.list(scenarios)) expect_equal(length(scenarios), 4) expect_true(all(vapply(scenarios, function(x) length(x$tau) == 1L, logical(1)))) expect_true(all(c( "moderate_corr_tau_0_25", "moderate_corr_tau_0_5", "heavy_tail_tau_0_25", "heavy_tail_tau_0_5" ) %in% names(scenarios))) }) test_that("benchmark_quantile_selection returns validation metrics", { scenarios <- list( smoke = list( n = 50, p = 12, active = 1:3, beta = c(2, 1.5, -1.25), tau = 0.5, rho = 0.7, correlation = "toeplitz", error = "gaussian" ) ) bench <- benchmark_quantile_selection( scenarios = scenarios, methods = c("lasso", "lasso_tuned", "selectboost"), replications = 2, threshold = 0.6, selectboost_args = list(B = 2, step_num = 1, tune_lambda = "bic", nlambda = 3), tuned_args = list(method = "bic", nlambda = 3), seed = 123, verbose = FALSE ) smry <- summary(bench) expect_s3_class(bench, "benchmark_quantile_selection") expect_true(is.data.frame(bench$results)) expect_equal(sort(unique(bench$results$method)), c("lasso", "lasso_tuned", "selectboost")) expect_true(all(c("tp", "fp", "fn", "tpr", "fdr", "runtime_sec", "success", "error_message") %in% names(bench$results))) expect_true(is.data.frame(smry)) expect_true(all(c( "scenario", "tau", "method", "mean_tpr", "mean_fdr", "failure_rate", "support_stability" ) %in% colnames(smry))) }) test_that("benchmark_quantile_selection records method failures instead of aborting", { scenarios <- list( smoke = list( n = 40, p = 10, active = 1:2, beta = c(1.5, -1), tau = 0.5, rho = 0.5, correlation = "toeplitz", error = "gaussian" ) ) bench <- suppressWarnings( benchmark_quantile_selection( scenarios = scenarios, methods = "lasso", replications = 1, lasso_args = list(lambda = "bad"), seed = 1, verbose = FALSE ) ) expect_false(bench$results$success[[1]]) expect_true(is.na(bench$results$tpr[[1]])) expect_true(nzchar(bench$results$error_message[[1]])) expect_equal(summary(bench)$failure_rate[[1]], 1) })