# test that extrinsic selection works # load required functions and packages library("testthat") library("SuperLearner") library("ranger") library("xgboost") library("glmnet") library("kernlab") library("caret") library("polspline") # generate the data -- note that this is a simple setting, for speed set.seed(4747) p <- 2 n <- 500 x <- replicate(p, stats::rnorm(n, 0, 1)) x_df <- as.data.frame(x) x_names <- names(x_df) # apply the function to the x's y <- 1 + 0.5 * x[, 1] + 0.75 * x[, 2] + stats::rnorm(n, 0, 1) y_bin <- as.numeric(0.5 * x[, 1] + 0.75 * x[, 2] + stats::rnorm(n, 0, 1) > 0) # fit a Super Learner ensemble set.seed(1234) learners <- c("SL.xgboost", "SL.ranger.imp", "SL.glm", "SL.glmnet", "SL.ksvm", "SL.polymars", "SL.mean") V <- 2 fit <- SuperLearner::SuperLearner(Y = y, X = x_df, SL.library = learners, cvControl = list(V = V)) coef_nms <- names(fit$coef) fit_bin <- SuperLearner::SuperLearner(Y = y_bin, X = x_df, family = "binomial", SL.library = learners, cvControl = list(V = V)) coef_nms_bin <- names(fit_bin$coef) # extract algorithm-specific importance test_that("algorithm-specific importance extraction works", { mean_importance <- extract_importance_mean( fit = fit$fitLibrary$SL.mean_All$object, coef = fit$coef[grepl("mean", coef_nms)], feature_names = x_names ) expect_equal(mean_importance$rank, c(1.5, 1.5)) glm_importance <- extract_importance_glm( fit = fit$fitLibrary$SL.glm_All$object, coef = fit$coef[grepl("glm_All", coef_nms)], feature_names = x_names ) expect_equal(glm_importance$feature, c("V2", "V1")) glmnet_importance <- extract_importance_glmnet( fit = fit$fitLibrary$SL.glmnet_All$object, coef = fit$coef[grepl("glmnet", coef_nms)], feature_names = x_names ) expect_equal(glmnet_importance$feature, c("V2", "V1")) ranger_importance <- extract_importance_ranger( fit = fit$fitLibrary$SL.ranger.imp_All$object, coef = fit$coef[grepl("ranger", coef_nms)], feature_names = x_names ) expect_equal(ranger_importance$feature, c("V2", "V1")) xgboost_importance <- extract_importance_xgboost( fit = fit$fitLibrary$SL.xgboost_All$object, coef = fit$coef[grepl("xgboost", coef_nms)], feature_names = x_names ) expect_equal(xgboost_importance$feature, c("V2", "V1")) xgboost_importance_bin <- extract_importance_xgboost( fit = fit_bin$fitLibrary$SL.xgboost_All$object, coef = fit$coef[grepl("xgboost", coef_nms)], feature_names = x_names ) expect_equal(xgboost_importance_bin$feature, c("V2", "V1")) svm_importance <- extract_importance_svm( fit = fit$fitLibrary$SL.ksvm_All$object, coef = fit$coef[grepl("svm", coef_nms)], feature_names = x_names, x = x_df, y = y ) expect_equal(svm_importance$feature, c("V2", "V1")) polymars_importance <- extract_importance_polymars( fit = fit$fitLibrary$SL.polymars_All$object, coef = fit$coef[grepl("polymars", coef_nms)], feature_names = x_names ) expect_equal(polymars_importance$feature, c("V1", "V2")) # note different name, issue raised at SL polyclass_importance <- extract_importance_polymars( fit = fit_bin$fitLibrary$SL.polymars_All$fit, coef = fit$coef[grepl("polymars", coef_nms)], feature_names = x_names ) expect_equal(polyclass_importance$feature, c("V2", "V1")) }) # extract importance for the whole Super Learner ensemble test_that("SL importance extraction works", { sl_importance_all <- extract_importance_SL( fit = fit, feature_names = x_names, import_type = "all", x = x_df, y = y ) expect_equal(sl_importance_all$feature, c("V2", "V1")) sl_importance_best <- extract_importance_SL( fit = fit, feature_names = x_names, import_type = "best" ) expect_equal(sl_importance_best$feature, c("V2", "V1")) }) # do extrinsic variable selection test_that("Extrinsic variable selection works", { extrinsic_selected <- extrinsic_selection( fit = fit, feature_names = x_names, threshold = 1.5, import_type = "all" ) expect_equal(extrinsic_selected$selected, c(TRUE, FALSE)) })