# ═══════════════════════════════════════════════════════════ # Core sarb() tests — every edge case covered # ═══════════════════════════════════════════════════════════ # ── Basic fitting ── test_that("sarb fits with x/y interface", { set.seed(42) X <- matrix(rnorm(500), 100, 5) colnames(X) <- paste0("V", 1:5) y <- X[,1] * 2 + X[,2] * -1 + rnorm(100, sd = 0.5) model <- sarb(x = X, y = y, n_trees = 50, verbose = 0, seed = 42) expect_s3_class(model, "sarb") expect_equal(model$n_trees_fit, 50) expect_equal(model$n_features, 5) expect_equal(model$n_train, 100) expect_equal(length(model$lambdas), 50) expect_equal(length(model$train_loss), 50) expect_true(all(is.finite(model$train_loss))) expect_true(model$train_loss[50] < model$train_loss[1]) }) test_that("sarb fits with formula interface", { df <- data.frame(x1 = rnorm(50), x2 = rnorm(50)) df$y <- df$x1 * 2 + rnorm(50) model <- sarb(y ~ ., data = df, n_trees = 30, verbose = 0, seed = 1) expect_s3_class(model, "sarb") expect_equal(model$n_features, 2) }) test_that("sarb handles unnamed matrix", { X <- matrix(rnorm(200), 100, 2) # No colnames y <- X[,1] + rnorm(100) model <- sarb(x = X, y = y, n_trees = 20, verbose = 0) expect_equal(model$feature_names, c("V1", "V2")) }) # ── Predictions ── test_that("predict returns correct dimensions", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100, sd = 0.1) model <- sarb(x = X, y = y, n_trees = 30, verbose = 0, seed = 42) X_new <- matrix(rnorm(20), 10, 2) preds <- predict(model, X_new) expect_length(preds, 10) expect_true(all(is.finite(preds))) }) test_that("predict works with data.frame", { df <- data.frame(x1 = rnorm(50), x2 = rnorm(50)) df$y <- df$x1 + rnorm(50) model <- sarb(y ~ ., data = df, n_trees = 20, verbose = 0) preds <- predict(model, df[1:5, ]) expect_length(preds, 5) }) test_that("partial ensemble prediction differs from full", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100, sd = 0.1) model <- sarb(x = X, y = y, n_trees = 50, verbose = 0, seed = 42) X_new <- matrix(rnorm(20), 10, 2) p10 <- predict(model, X_new, n_trees = 10) p50 <- predict(model, X_new, n_trees = 50) expect_length(p10, 10) expect_false(all(p10 == p50)) }) test_that("contributions have correct dimensions", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100) model <- sarb(x = X, y = y, n_trees = 20, verbose = 0, seed = 1) contrib <- predict(model, X[1:5, ], type = "contributions") expect_equal(nrow(contrib), 5) expect_equal(ncol(contrib), 21) # 20 trees + 1 intercept expect_equal(colnames(contrib)[1], "intercept") }) # ── Feature importance ── test_that("importance returns correct structure", { X <- matrix(rnorm(500), 100, 5) colnames(X) <- paste0("V", 1:5) y <- X[,1] * 3 + rnorm(100) model <- sarb(x = X, y = y, n_trees = 50, verbose = 0, seed = 42) imp <- importance(model) expect_s3_class(imp, "data.frame") expect_equal(nrow(imp), 5) expect_true("feature" %in% names(imp)) expect_true("importance" %in% names(imp)) expect_true(all(imp$importance >= 0)) # V1 should be most important expect_equal(imp$feature[1], "V1") }) test_that("anchor importance works", { X <- matrix(rnorm(500), 100, 5) colnames(X) <- paste0("V", 1:5) y <- X[,1] * 3 + rnorm(100) model <- sarb(x = X, y = y, n_trees = 50, verbose = 0, seed = 42) imp <- importance(model, type = "anchor") expect_s3_class(imp, "data.frame") expect_equal(nrow(imp), 5) expect_true(all(imp$importance >= 0 & imp$importance <= 1)) }) # ── Loss functions ── test_that("all loss functions fit without error", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100) for (l in c("squared_error", "absolute_error", "huber", "quantile")) { model <- sarb(x = X, y = y, n_trees = 20, loss = l, verbose = 0, seed = 42) expect_s3_class(model, "sarb") preds <- predict(model, X[1:5, ]) expect_true(all(is.finite(preds))) } }) test_that("poisson loss works with positive response", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- exp(X[,1]) + 1 # Positive values model <- sarb(x = X, y = y, n_trees = 20, loss = "poisson", verbose = 0, seed = 42) expect_s3_class(model, "sarb") }) # ── Reproducibility ── test_that("same seed gives identical results", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100) m1 <- sarb(x = X, y = y, n_trees = 30, verbose = 0, seed = 123) m2 <- sarb(x = X, y = y, n_trees = 30, verbose = 0, seed = 123) expect_equal(m1$lambdas, m2$lambdas) expect_equal(m1$train_loss, m2$train_loss) }) # ── Edge cases ── test_that("warmup_frac = 0 (all exploration)", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100) model <- sarb(x = X, y = y, n_trees = 30, warmup_frac = 0, verbose = 0, seed = 42) expect_equal(model$n_warmup, 0) expect_true(all(model$phases == 2L)) }) test_that("warmup_frac = 1 (no exploration)", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100) model <- sarb(x = X, y = y, n_trees = 30, warmup_frac = 1.0, verbose = 0, seed = 42) expect_equal(model$n_warmup, 30) expect_true(all(model$phases == 1L)) expect_equal(model$n_rejected, 0L) }) test_that("single feature works", { X <- matrix(rnorm(100), 100, 1) colnames(X) <- "V1" y <- X[,1] * 2 + rnorm(100) model <- sarb(x = X, y = y, n_trees = 20, n_anchors = 0, verbose = 0, seed = 42) expect_s3_class(model, "sarb") expect_equal(model$n_features, 1) }) test_that("small dataset (n=20) works", { X <- matrix(rnorm(40), 20, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(20) model <- sarb(x = X, y = y, n_trees = 10, verbose = 0, seed = 42) expect_s3_class(model, "sarb") }) # ── Error messages ── test_that("errors for missing input", { expect_error(sarb(), "No data provided") }) test_that("error for formula without data", { expect_error(sarb(y ~ .), "data.*required") }) test_that("error for x without y", { expect_error(sarb(x = matrix(1:4, 2, 2)), "y.*required") }) test_that("error for mismatched dimensions", { expect_error( sarb(x = matrix(1:6, 3, 2), y = 1:4), "must match" ) }) test_that("error for non-numeric response", { expect_error( sarb(x = matrix(1:4, 2, 2), y = c("a", "b")), "numeric" ) }) test_that("error for NA in response", { expect_error( sarb(x = matrix(1:4, 2, 2), y = c(1, NA)), "non-finite" ) }) test_that("error for invalid loss", { X <- matrix(rnorm(40), 20, 2) expect_error( sarb(x = X, y = rnorm(20), loss = "invalid", verbose = 0), "Unknown loss" ) }) test_that("error for bad parameters", { X <- matrix(rnorm(40), 20, 2) y <- rnorm(20) expect_error(sarb(x = X, y = y, warmup_frac = 2, verbose = 0), "warmup_frac") expect_error(sarb(x = X, y = y, learning_rate = 0, verbose = 0), "learning_rate") expect_error(sarb(x = X, y = y, colsample = 0, verbose = 0), "colsample") }) test_that("error for predict with wrong columns", { X <- matrix(rnorm(200), 100, 2) colnames(X) <- c("V1", "V2") y <- X[,1] + rnorm(100) model <- sarb(x = X, y = y, n_trees = 10, verbose = 0) expect_error(predict(model, matrix(1:3, 1, 3)), "columns") }) test_that("error for predict with missing data.frame columns", { df <- data.frame(V1 = rnorm(50), V2 = rnorm(50)) df$y <- df$V1 + rnorm(50) model <- sarb(y ~ ., data = df, n_trees = 10, verbose = 0) expect_error( predict(model, data.frame(V1 = 1)), # Missing V2 "missing columns" ) }) test_that("error for predict with invalid type", { X <- matrix(rnorm(200), 100, 2) model <- sarb(x = X, y = rnorm(100), n_trees = 10, verbose = 0) expect_error(predict(model, X, type = "invalid"), "type") })