library(funcml) test_that("shap additivity approximately holds", { set.seed(1) dat <- data.frame(y = rnorm(30), x = rnorm(30), z = rnorm(30)) f <- fit(y ~ x + z, data = dat, model = "glm") sh <- interpret(f, dat, method = "shap", nsim = 20, nsamples = 20) preds <- predict(f, dat[1, , drop = FALSE]) baseline <- sh$result$baseline[1] approx_sum <- sum(sh$result$shap) + baseline expect_true(abs(approx_sum - preds[1]) < 1) }) test_that("classification interpretability supports subsetted features", { set.seed(11) dat <- data.frame( x1 = rnorm(80), x2 = rnorm(80), x3 = rnorm(80) ) eta <- 1.2 * dat$x1 - 0.8 * dat$x2 + 0.4 * dat$x3 pr <- stats::plogis(eta) dat$y <- factor(ifelse(runif(80) < pr, "yes", "no"), levels = c("no", "yes")) fit_obj <- fit(y ~ x1 + x2 + x3, data = dat, model = "glm") vi <- interpret( fit_obj, dat, method = "vip", features = c("x1", "x2"), metric = "logloss", nsim = 3, seed = 1 ) loc <- interpret( fit_obj, dat, method = "local_model", features = c("x1", "x2"), newdata = dat[1, , drop = FALSE], nsamples = 40, class_level = "yes", k = 2 ) sh <- interpret( fit_obj, dat, method = "shap", features = c("x1", "x2"), newdata = dat[1, , drop = FALSE], nsim = 12, nsamples = 50, class_level = "yes", seed = 2 ) expect_true(all(c("x1", "x2") %in% vi$result$scores$feature)) expect_gt(nrow(loc$result$results), 0) expect_true(all(grepl(" = ", loc$result$results$feature.value, fixed = TRUE))) expect_true(all(c("observed_value", "encoded_value", "beta", "effect") %in% names(loc$result$results))) expect_equal(sort(sh$result$feature), c("x1", "x2")) expect_true(all(grepl(" = ", sh$result$feature_label, fixed = TRUE))) }) test_that("vip always uses internal permutation importance", { set.seed(21) dat <- data.frame( x1 = rnorm(90), x2 = rnorm(90), x3 = rnorm(90) ) dat$y <- 1.4 * dat$x1 - 0.7 * dat$x2 + 0.2 * dat$x3 + rnorm(90, sd = 0.2) fit_obj <- fit(y ~ x1 + x2 + x3, data = dat, model = "rpart") vip_auto <- interpret( fit_obj, dat, method = "vip", importance_type = "auto", nsim = 6, seed = 5 ) vip_model <- interpret( fit_obj, dat, method = "vip", importance_type = "model", nsim = 6, seed = 5 ) expect_equal(vip_auto$result$scores, vip_model$result$scores) expect_equal(vip_auto$diagnostics$engine, "permute") expect_equal(vip_model$diagnostics$engine, "permute") }) test_that("ICE handles vectorized feature grids for numeric predictors", { fit_obj <- fit(mpg ~ wt + hp, data = mtcars, model = "glm") ice <- interpret( fit_obj, mtcars, method = "ice", features = "wt", nsamples = 10 ) expect_s3_class(ice, "funcml_ice") expect_gt(nrow(ice$result$curves), 0) expect_true(all(c("id", "feature", "value", "yhat") %in% names(ice$result$curves))) }) test_that("shap paths sum back to the prediction", { set.seed(31) dat <- data.frame(y = rnorm(50), x = rnorm(50), z = rnorm(50)) dat$y <- 1.8 * dat$x - 0.9 * dat$z + rnorm(50, sd = 0.2) fit_obj <- fit(y ~ x + z, data = dat, model = "glm") sh <- interpret( fit_obj, dat, method = "shap", newdata = dat[1, , drop = FALSE], nsim = 40, nsamples = 30, seed = 1 ) expect_equal(sh$result$baseline[1] + sum(sh$result$shap), sh$result$prediction[1], tolerance = 1e-8) }) test_that("shap supports multiple observations for summary-style output", { set.seed(41) dat <- data.frame(y = rnorm(30), x = rnorm(30), z = rnorm(30)) dat$y <- 1.5 * dat$x - 0.6 * dat$z + rnorm(30, sd = 0.2) fit_obj <- fit(y ~ x + z, data = dat, model = "glm") sh <- interpret( fit_obj, dat, method = "shap", newdata = dat[1:5, , drop = FALSE], nsim = 12, nsamples = 20, seed = 1 ) expect_equal(sort(unique(sh$result$observation)), 1:5) expect_true(all(c("observation", "feature", "shap", "feature_value", "feature_label") %in% names(sh$result))) }) test_that("breakdown is no longer an available interpretability method", { fit_obj <- fit(mpg ~ wt + hp, data = mtcars, model = "glm") expect_error( interpret(fit_obj, mtcars, method = "breakdown"), "arg" ) }) test_that("calibration interpretability returns curve and calibration errors", { set.seed(51) dat <- data.frame( x1 = rnorm(120), x2 = rnorm(120) ) eta <- 1.1 * dat$x1 - 0.7 * dat$x2 dat$y <- factor(ifelse(runif(120) < stats::plogis(eta), "yes", "no"), levels = c("no", "yes")) fit_obj <- fit(y ~ x1 + x2, data = dat, model = "glm") cal <- interpret(fit_obj, dat, method = "calibration", bins = 6, strategy = "quantile") expect_s3_class(cal, "funcml_calibration") expect_true(all(c("curve", "prob", "truth", "positive", "ece", "mce") %in% names(cal$result))) expect_equal(nrow(cal$result$curve), 6) expect_true(is.finite(cal$result$ece)) expect_true(is.finite(cal$result$mce)) })