# Test file for the fit_TwinBKP function # # The tests intentionally use fixed theta_g in most cases to keep runtime small. # Hyperparameter optimization is covered by one small smoke test. test_that("fit_TwinBKP runs correctly with a documentation-style example", { set.seed(1) n <- 30 X <- matrix(runif(n * 2), ncol = 2) m <- sample(10:30, n, replace = TRUE) p <- plogis(2 * (X[, 1] - 0.5)) y <- rbinom(n, size = m, prob = p) expect_no_error({ model <- fit_TwinBKP(X, y, m, g = 10, twins = 2, theta_g = 0.25) }) expect_s3_class(model, "TwinBKP") expect_true(is.list(model)) expect_true(all(model$global_indices >= 1L)) expect_true(all(model$global_indices <= nrow(X))) expect_equal(length(model$global_indices), length(unique(model$global_indices))) expect_true(is.finite(model$theta_l)) expect_true(model$theta_l > 0) expect_equal(model$theta_g, 0.25) }) test_that("fit_TwinBKP handles input validation similarly to fit_BKP", { expect_error(fit_TwinBKP(), "Arguments 'X', 'y', and 'm' must be provided.") n <- 12 d <- 2 X_test <- matrix(runif(n * d), nrow = n) y_test <- rbinom(n, size = 20, prob = 0.5) m_test <- rep(20, n) expect_error( fit_TwinBKP(X = X_test, y = y_test), "Arguments 'X', 'y', and 'm' must be provided." ) expect_error( fit_TwinBKP(X = as.character(X_test), y = y_test, m = m_test), "'X' must be a numeric matrix or data frame." ) expect_error( fit_TwinBKP(X = X_test, y = as.character(y_test), m = m_test), "'y' must be numeric." ) expect_error( fit_TwinBKP(X = X_test, y = y_test, m = as.character(m_test)), "'m' must be numeric." ) expect_error( fit_TwinBKP(X = X_test, y = y_test[1:(n - 1)], m = m_test), "'y' must have the same number of rows as 'X'." ) expect_error( fit_TwinBKP(X = X_test, y = y_test, m = m_test[1:(n - 1)]), "'m' must have the same number of rows as 'X'." ) expect_error( fit_TwinBKP(X = X_test, y = rep(-1, n), m = m_test), "'y' must be nonnegative." ) expect_error( fit_TwinBKP(X = X_test, y = y_test, m = rep(0, n)), "'m' must be strictly positive." ) expect_error( fit_TwinBKP(X = X_test, y = m_test + 1, m = m_test), "Each element of 'y' must be less than or equal to corresponding element of 'm'." ) X_na <- X_test X_na[1, 1] <- NA expect_error( fit_TwinBKP(X = X_na, y = y_test, m = m_test), "Missing values are not allowed in 'X', 'y', or 'm'." ) X_inf <- X_test X_inf[1, 1] <- Inf expect_error( fit_TwinBKP(X = X_inf, y = y_test, m = m_test), "'X', 'y', and 'm' must contain only finite values." ) expect_error( fit_TwinBKP(X_test, y_test, m_test, store_kernel = c(TRUE, FALSE)), "'store_kernel' must be a single logical value." ) }) test_that("fit_TwinBKP validates Xbounds and prior settings", { set.seed(2) n <- 12 d <- 2 X <- matrix(runif(n * d), nrow = n) y <- rbinom(n, size = 20, prob = 0.5) m <- rep(20, n) expect_error( fit_TwinBKP(X, y, m, Xbounds = 1), "'Xbounds' must be a numeric matrix." ) expect_error( fit_TwinBKP(X, y, m, Xbounds = matrix(1, nrow = d, ncol = 3)), paste0("'Xbounds' must be a matrix with dimensions d x 2, where d = ", d, ".") ) expect_error( fit_TwinBKP(X, y, m, Xbounds = matrix(c(0, 1, 0.5, 0.4), ncol = 2, byrow = TRUE)), "Each row of 'Xbounds' must satisfy lower < upper" ) expect_error( fit_TwinBKP(X, y, m, r0 = 0), "'r0' must be a positive scalar" ) expect_error( fit_TwinBKP(X, y, m, prior = "fixed", p0 = 1), "For fixed prior in BKP, 'p0' must be a scalar in \\(0, 1\\)." ) X_out <- matrix(runif(n * d, min = -0.2, max = 1.2), nrow = n) expect_warning( expect_s3_class( fit_TwinBKP(X_out, y, m, theta_g = 0.25, g = 6, twins = 1), "TwinBKP" ), "Input X does not appear to be normalized to \\[0,1\\]" ) }) test_that("fit_TwinBKP validates hyperparameter and twinning controls", { set.seed(3) n <- 14 d <- 3 X <- matrix(runif(n * d), nrow = n) y <- rbinom(n, size = 15, prob = 0.5) m <- rep(15, n) expect_error( fit_TwinBKP(X, y, m, theta_g = "bad"), "'theta_g' must be numeric." ) expect_error( fit_TwinBKP(X, y, m, theta_g = c(0.2, 0.3), isotropic = TRUE), "When isotropic=TRUE, 'theta_g' must be a scalar." ) expect_error( fit_TwinBKP(X, y, m, theta_g = c(0.2, 0.3), isotropic = FALSE), "length 3" ) expect_error( fit_TwinBKP(X, y, m, theta_g = -0.2), "'theta_g' must be strictly positive." ) expect_error( fit_TwinBKP(X, y, m, theta_l = 0), "'theta_l' must be a positive scalar." ) expect_error( fit_TwinBKP(X, y, m, isotropic = c(TRUE, FALSE)), "'isotropic' must be a single logical value." ) expect_error( fit_TwinBKP(X, y, m, n_multi_start = 0), "'n_multi_start' must be a positive integer." ) expect_error( fit_TwinBKP(X, y, m, n_threads = 0), "'n_threads' must be a positive integer." ) expect_error( fit_TwinBKP(X, y, m, theta_g = 0.3, g = 6, twins = 0), "'twins' must be a positive integer." ) expect_error( fit_TwinBKP(X, y, m, theta_g = 0.3, g = 6, twins = NA), "'twins' must be a positive integer." ) expect_error( fit_TwinBKP(X, y, m, g = 1), "'g' must be an integer between 2 and n - 1." ) expect_error( fit_TwinBKP(X, y, m, l = -1), "'l' must be a nonnegative integer." ) expect_error( fit_TwinBKP(X, y, m, l = n), "'l' cannot exceed the number of non-global training points." ) }) test_that("fit_TwinBKP returns an object with expected structure and content", { set.seed(4) n <- 24 d <- 2 X <- matrix(runif(n * d), nrow = n) y <- rbinom(n, size = 12, prob = 0.45) m <- rep(12, n) model <- fit_TwinBKP( X = X, y = y, m = m, theta_g = 0.3, theta_l = 0.4, g = 8, twins = 2 ) expect_s3_class(model, "TwinBKP") expect_true(is.list(model)) expected_names <- c( "theta_opt", "theta_g", "theta_l", "kernel", "global_kernel", "local_kernel", "isotropic", "loss", "loss_min", "X", "Xnorm", "Xbounds", "y", "m", "prior", "r0", "p0", "alpha0", "beta0", "alpha_n", "beta_n", "global_indices", "control", "diagnostics" ) expect_true(all(expected_names %in% names(model))) expect_equal(model$theta_opt, model$theta_g) expect_equal(model$theta_g, 0.3) expect_equal(model$theta_l, 0.4) expect_equal(model$kernel, model$global_kernel) expect_equal(model$global_kernel, "gaussian") expect_equal(model$local_kernel, "wendland") expect_equal(model$loss, "brier") expect_equal(model$prior, "noninformative") expect_equal(model$control$twins, 2L) expect_true(is.integer(model$control$u1)) expect_equal(length(model$control$u1), 2L) expect_true(is.numeric(model$control$r) || is.integer(model$control$r)) expect_true(model$control$r >= 2L) expect_equal(model$control$leaf_size, 8L) expect_equal(model$X, X) expect_equal(dim(model$Xnorm), dim(X)) expect_null(model$diagnostics$K) expect_null(model$diagnostics$K_global) expect_null(model$diagnostics$K_local) expect_false(model$control$store_kernel) expect_true(all(is.finite(model$alpha_n))) expect_true(all(is.finite(model$beta_n))) local_indices <- twin_local_indices_rcpp( Xtrain_norm = model$Xnorm, Xquery_norm = model$Xnorm, g_indices = model$global_indices, l = model$control$l, leaf_size = model$control$leaf_size ) expect_equal(nrow(local_indices), nrow(model$X)) expect_equal(ncol(local_indices), model$control$l) expect_false(any(local_indices %in% model$global_indices)) }) test_that("fit_TwinBKP returns compact nested control and diagnostics", { set.seed(123) n <- 20 X <- matrix(runif(n * 2), ncol = 2) m <- sample(8:20, n, replace = TRUE) y <- rbinom(n, size = m, prob = 0.4) model <- fit_TwinBKP( X, y, m, theta_g = 0.3, theta_l = 0.4, g = 6, twins = 2 ) expect_s3_class(model, "TwinBKP") expect_true(is.list(model$control)) expect_true(is.list(model$diagnostics)) expect_true(all(c( "g_target", "g", "l", "r", "twins", "u1", "leaf_size", "store_kernel" ) %in% names(model$control))) expect_true(all(c( "twin_info", "K", "K_global", "K_local" ) %in% names(model$diagnostics))) expect_null(model[["local_indices"]]) expect_null(model[["twin_data"]]) expect_null(model[["complexity"]]) expect_true(is.list(model$diagnostics$twin_info)) expect_equal(model$global_indices, model$diagnostics$twin_info$g_indices) }) test_that("fit_TwinBKP stores dense diagnostic kernels only on request", { set.seed(44) n <- 24 X <- matrix(runif(n * 2), ncol = 2) m <- sample(8:15, n, replace = TRUE) y <- rbinom(n, size = m, prob = 0.45) model <- fit_TwinBKP( X = X, y = y, m = m, theta_g = 0.3, theta_l = 0.4, g = 8, twins = 2, store_kernel = TRUE ) expect_true(is.matrix(model$diagnostics$K)) expect_true(is.matrix(model$diagnostics$K_global)) expect_true(is.matrix(model$diagnostics$K_local)) expect_equal(model$diagnostics$K, model$diagnostics$K_global + model$diagnostics$K_local) expect_equal(model$alpha_n, model$alpha0 + as.vector(model$diagnostics$K %*% model$y)) expect_equal(model$beta_n, model$beta0 + as.vector(model$diagnostics$K %*% (model$m - model$y))) expect_equal(dim(model$diagnostics$K), c(nrow(X), nrow(X))) expect_equal(dim(model$diagnostics$K_global), c(nrow(X), nrow(X))) expect_equal(dim(model$diagnostics$K_local), c(nrow(X), nrow(X))) }) test_that("fit_TwinBKP computes theta_l as the empirical covering radius", { set.seed(5) n <- 30 X <- matrix(runif(n * 2), ncol = 2) y <- rbinom(n, size = 10, prob = 0.5) m <- rep(10, n) model <- fit_TwinBKP( X = X, y = y, m = m, theta_g = 0.25, g = 10, twins = 2 ) G <- model$global_indices theta_manual <- max(vapply(seq_len(nrow(model$Xnorm)), function(i) { dif <- sweep(model$Xnorm[G, , drop = FALSE], 2, model$Xnorm[i, ], "-") min(sqrt(rowSums(dif^2))) }, numeric(1))) expect_equal(model$theta_l, theta_manual, tolerance = 1e-10) }) test_that("fit_TwinBKP posterior matches the combined global-local kernel update", { set.seed(6) n <- 22 X <- matrix(runif(n * 2), ncol = 2) m <- sample(8:15, n, replace = TRUE) y <- rbinom(n, size = m, prob = 0.4) model <- fit_TwinBKP( X = X, y = y, m = m, theta_g = 0.25, theta_l = 0.35, g = 8, twins = 2, store_kernel = TRUE ) prior_par <- get_prior( prior = model$prior, model = "BKP", r0 = model$r0, p0 = model$p0, y = model$y, m = model$m, K = model$diagnostics$K ) expect_equal(model$alpha0, prior_par$alpha0) expect_equal(model$beta0, prior_par$beta0) expect_equal(model$alpha_n, prior_par$alpha0 + as.vector(model$diagnostics$K %*% model$y)) expect_equal(model$beta_n, prior_par$beta0 + as.vector(model$diagnostics$K %*% (model$m - model$y))) }) test_that("fit_TwinBKP supports fixed prior, adaptive prior, and anisotropic theta_g", { set.seed(7) n <- 24 X <- matrix(runif(n * 2), ncol = 2) m <- sample(8:20, n, replace = TRUE) y <- rbinom(n, size = m, prob = 0.45) fixed_model <- fit_TwinBKP( X, y, m, prior = "fixed", r0 = 0.5, p0 = 0.5, theta_g = 0.3, theta_l = 0.4, g = 8, twins = 2 ) adaptive_model <- fit_TwinBKP( X, y, m, prior = "adaptive", r0 = mean(m), theta_g = c(0.3, 0.4), theta_l = 0.4, isotropic = FALSE, g = 8, twins = 2 ) expect_s3_class(fixed_model, "TwinBKP") expect_s3_class(adaptive_model, "TwinBKP") expect_equal(length(adaptive_model$theta_g), 2) expect_equal(adaptive_model$isotropic, FALSE) }) test_that("fit_TwinBKP rejects removed ESS arguments", { X <- matrix(runif(12), ncol = 2) m <- rep(10, 6) y <- rbinom(6, size = m, prob = 0.4) expect_error( fit_TwinBKP(X, y, m, theta_g = 0.3, ess = "shepard", g = 2, twins = 1), "unused argument" ) }) test_that("fit_TwinBKP is reproducible with a fixed random seed", { set.seed(9) n <- 26 X <- matrix(runif(n * 2), ncol = 2) m <- sample(10:20, n, replace = TRUE) y <- rbinom(n, size = m, prob = 0.4) set.seed(123) model1 <- fit_TwinBKP( X, y, m, theta_g = 0.25, g = 8, twins = 3 ) set.seed(123) model2 <- fit_TwinBKP( X, y, m, theta_g = 0.25, g = 8, twins = 3 ) expect_equal(model1$control$u1, model2$control$u1) expect_equal(model1$global_indices, model2$global_indices) expect_equal(model1$theta_l, model2$theta_l) expect_equal(model1$diagnostics$K, model2$diagnostics$K) }) test_that("fit_TwinBKP can optimize theta_g on a small global subset", { set.seed(10) n <- 18 X <- matrix(runif(n * 2), ncol = 2) m <- sample(8:15, n, replace = TRUE) y <- rbinom(n, size = m, prob = 0.5) model <- fit_TwinBKP( X, y, m, g = 6, twins = 1, n_multi_start = 1, n_threads = 1 ) expect_s3_class(model, "TwinBKP") expect_true(all(is.finite(model$theta_g))) expect_true(all(model$theta_g > 0)) expect_true(is.finite(model$loss_min)) }) test_that("fit_TwinBKP passes n_threads to twinning backend", { set.seed(123) X <- matrix(seq(0, 1, length.out = 30), ncol = 1) m <- rep(10, 30) y <- rbinom(30, size = m, prob = 0.4 + 0.2 * X[, 1]) fit <- fit_TwinBKP( X, y, m, theta_g = 0.3, g = 8, l = 3, twins = 2, n_threads = 1 ) expect_s3_class(fit, "TwinBKP") expect_equal(fit$control$n_threads, 1L) if (!is.null(fit$diagnostics$twin_info$n_threads)) { expect_equal(as.integer(fit$diagnostics$twin_info$n_threads), 1L) } })