test_that("importance() accepts all ci_method values", { set.seed(123) task = sim_dgp_independent(n = 100) pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("subsampling", repeats = 5), n_repeats = 2 ) pfi$compute() # Test that all variance methods work imp_none = pfi$importance(ci_method = "none") imp_raw = pfi$importance(ci_method = "raw") expect_warning( imp_nb <- pfi$importance(ci_method = "nadeau_bengio") ) imp_quantile = pfi$importance(ci_method = "quantile") expect_importance_dt(imp_none, features = pfi$features) expect_importance_dt(imp_raw, features = pfi$features) expect_importance_dt(imp_nb, features = pfi$features) expect_importance_dt(imp_quantile, features = pfi$features) }) test_that("ci_method='none' produces no variance columns", { set.seed(123) task = sim_dgp_independent(n = 100) pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("subsampling", repeats = 5), n_repeats = 2 ) pfi$compute() imp_none = pfi$importance(ci_method = "none") # Check that only feature and importance columns exist expect_equal(names(imp_none), c("feature", "importance")) }) test_that("raw CIs are narrower than nadeau_bengio corrected CIs", { set.seed(123) task = sim_dgp_independent(n = 200) pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("subsampling", repeats = 11, ratio = 0.8), n_repeats = 3 ) pfi$compute() imp_raw = pfi$importance(ci_method = "raw") imp_nb = pfi$importance(ci_method = "nadeau_bengio") # Calculate CI widths width_raw = imp_raw$conf_upper - imp_raw$conf_lower width_nb = imp_nb$conf_upper - imp_nb$conf_lower # Raw CIs should be narrower than corrected ones on average # Compare the mean widths instead of individual features # The nadeau_bengio correction factor should make CIs wider on average expect_true(mean(width_nb) > mean(width_raw)) }) test_that("nadeau_bengio correction requires appropriate resampling", { set.seed(123) task = sim_dgp_independent(n = 100) # Cross-validation is not supported for nadeau_bengio pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("cv", folds = 3), n_repeats = 2 ) pfi$compute() # Should error for unsupported resampling expect_warning( pfi$importance(ci_method = "nadeau_bengio"), regexp = "recommended for resampling types" ) # But raw variance should still work imp_raw = pfi$importance(ci_method = "raw") expect_importance_dt(imp_raw, features = pfi$features) }) test_that("confidence level parameter works correctly", { set.seed(123) task = sim_dgp_independent(n = 100) pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("subsampling", repeats = 5), n_repeats = 2 ) pfi$compute() # Test different confidence levels imp_90 = pfi$importance(ci_method = "raw", conf_level = 0.90) imp_95 = pfi$importance(ci_method = "raw", conf_level = 0.95) imp_99 = pfi$importance(ci_method = "raw", conf_level = 0.99) # Calculate CI widths width_90 = imp_90$conf_upper - imp_90$conf_lower width_95 = imp_95$conf_upper - imp_95$conf_lower width_99 = imp_99$conf_upper - imp_99$conf_lower # Higher confidence level should produce wider CIs (on average) expect_true(mean(width_90) < mean(width_95)) expect_true(mean(width_95) < mean(width_99)) }) test_that("variance estimation works with bootstrap resampling", { set.seed(123) task = sim_dgp_independent(n = 100) pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("bootstrap", repeats = 11), n_repeats = 2 ) pfi$compute() # Both raw and nadeau_bengio should work with bootstrap imp_raw = pfi$importance(ci_method = "raw") imp_nb = pfi$importance(ci_method = "nadeau_bengio") expect_importance_dt(imp_raw, features = pfi$features) expect_importance_dt(imp_nb, features = pfi$features) # Verify variance columns exist expect_true(all(c("se", "conf_lower", "conf_upper") %in% names(imp_raw))) expect_true(all(c("se", "conf_lower", "conf_upper") %in% names(imp_nb))) }) test_that("quantile variance method works", { set.seed(123) task = sim_dgp_independent(n = 200) pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("subsampling", repeats = 5), n_repeats = 2 ) pfi$compute() imp_quantile = pfi$importance(ci_method = "quantile") # Check structure expect_importance_dt(imp_quantile, features = pfi$features) # Verify CI columns exist (no se for quantile method) expected_cols = c("feature", "importance", "conf_lower", "conf_upper") expect_true(all(expected_cols %in% names(imp_quantile))) expect_false("se" %in% names(imp_quantile)) # All CIs should be valid intervals expect_true(all(imp_quantile$conf_lower <= imp_quantile$conf_upper)) # Point estimates should be between lower and upper bounds (or close) # Due to using mean vs quantiles, this is not guaranteed but usually holds expect_true(all( imp_quantile$importance >= imp_quantile$conf_lower | abs(imp_quantile$importance - imp_quantile$conf_lower) < 0.01 )) expect_true(all( imp_quantile$importance <= imp_quantile$conf_upper | abs(imp_quantile$importance - imp_quantile$conf_upper) < 0.01 )) }) test_that("quantile CIs differ from parametric methods", { set.seed(123) task = sim_dgp_independent(n = 200) pfi = PFI$new( task = task, learner = lrn("regr.rpart"), measure = msr("regr.mse"), resampling = rsmp("subsampling", repeats = 15), n_repeats = 3 ) pfi$compute() imp_raw = pfi$importance(ci_method = "raw") imp_quantile = pfi$importance(ci_method = "quantile") # Point estimates should be the same (both use mean) expect_equal(imp_raw$importance, imp_quantile$importance) # CIs should generally differ between methods # (quantile is non-parametric, raw assumes normality) expect_false(all(imp_raw$conf_lower == imp_quantile$conf_lower)) expect_false(all(imp_raw$conf_upper == imp_quantile$conf_upper)) })