#### Comprehensive Edge Case Tests for bmco #### library(testthat) library(bmco) # --------------------------------------------------------------------------- # Helper: checks whether all warnings originating from expr are given. # use this when a call generates multiple warnings and you wish to verify presence of all of them. # --------------------------------------------------------------------------- expect_all_warnings <- function(expr, patterns) { warns <- testthat::capture_warnings(expr) expect_true( all(colSums(sapply(patterns, grepl, warns)) > 0), info = paste("Not all expected warnings found.\nFound:", paste(warns, collapse = "\n")) ) } # ============================================================================= # PART 1: DATA STRUCTURE EDGE CASES # ============================================================================= test_that("error when data is NULL", { expect_error( bmvb(data = NULL, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2")), "data frame" ) }) test_that("error when data is not a data frame", { expect_error( bmvb(data = list(a = 1, b = 2), grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2")), "data frame" ) }) test_that("error when data has zero rows", { empty_data <- data.frame(grp = character(0), y1 = integer(0), y2 = integer(0)) expect_error( bmvb(data = empty_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2")), "Group.*not found in grouping variable" ) }) # ============================================================================= # PART 2: GROUP VARIABLE EDGE CASES # ============================================================================= test_that("error when group variable doesn't exist", { test_data <- data.frame( treatment = rep(c("A", "B"), each = 20), y1 = rbinom(40, 1, 0.5), y2 = rbinom(40, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "nonexistent", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2")), "not found in data" ) }) test_that("error when grp_a value not found", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = rbinom(40, 1, 0.5), y2 = rbinom(40, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "X", grp_b = "B", y_vars = c("y1", "y2")), "not found in grouping variable" ) }) test_that("error when grp_b value not found", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = rbinom(40, 1, 0.5), y2 = rbinom(40, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "Y", y_vars = c("y1", "y2")), "not found in grouping variable" ) }) test_that("error when grp has only one unique value", { test_data <- data.frame( grp = rep("A", 40), y1 = rbinom(40, 1, 0.5), y2 = rbinom(40, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2")), "not found in grouping variable" ) }) test_that("error when one group is empty after NA removal", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = c(rep(NA, 20), rbinom(20, 1, 0.5)), # All A are NA y2 = rbinom(40, 1, 0.5) ) expect_error( suppressWarnings(bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"))), "No observations in group" ) }) # ============================================================================= # PART 3: OUTCOME VARIABLE EDGE CASES # ============================================================================= test_that("error when outcome variable doesn't exist", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = rbinom(40, 1, 0.5), y2 = rbinom(40, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y_missing")), "not found in data" ) }) test_that("error for more than 2 outcomes", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = rbinom(40, 1, 0.5), y2 = rbinom(40, 1, 0.5), y3 = rbinom(40, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2", "y3")), "only supported for 2 outcomes" ) }) test_that("error for non-binary outcome values", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = sample(0:2, 40, replace = TRUE), # 0, 1, 2 y2 = rbinom(40, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2")), "must be binary" ) }) test_that("warning for outcome with no variation (all zeros) — three warnings expected", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = rep(0, 40), y2 = rbinom(40, 1, 0.5) ) # check_input geeft: globale warning (sectie 2) + per-groep warning (sectie 3) expect_all_warnings( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), n_it = 100), c( "no variation \\(all 0\\)", "no variation in group A \\(all 0\\)", "no variation in group B \\(all 0\\)" ) ) }) test_that("warning for outcome with no variation (all ones) — drie warnings verwacht", { test_data <- data.frame( grp = rep(c("A", "B"), each = 20), y1 = rep(1, 40), y2 = rbinom(40, 1, 0.5) ) expect_all_warnings( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), n_it = 100), c( "no variation \\(all 1\\)", "no variation in group A \\(all 1\\)", "no variation in group B \\(all 1\\)" ) ) }) # ============================================================================= # PART 4: COVARIATE EDGE CASES (bglm) # ============================================================================= test_that("error when covariate doesn't exist", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "nonexistent", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 10, n_it = 20), "not found in data" ) }) test_that("warning when covariate has no variation", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rep(50, 60), # No variation y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_all_warnings( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 50, n_burn = 10, n_it = 20), c("no variation", "sample size", "converged") ) }) test_that("warning when covariate is highly collinear with group", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), # x nearly perfectly predicts group x = rep(c(1, 2), each = 30) + rnorm(60, 0, 0.01), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_all_warnings( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "x", y_vars = c("y1", "y2"), x_method = "Empirical", x_def = c(-Inf, Inf), n_burn = 10, n_it = 20), c("High correlation", "sample size", "converged") ) }) test_that("error for discrete covariate with more than 2 levels", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), severity = factor(sample(c("mild", "moderate", "severe"), 60, replace = TRUE)), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "severity", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 10, n_it = 20), "only binary covariates.*2 categories.*supported" ) }) test_that("error for discrete covariate with only 1 level", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), constant = factor(rep("same", 60)), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "constant", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 10, n_it = 20), "only 1 category.*At least 2 categories" ) }) test_that("error when x_method = 'Value' but x_def is a vector", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60, 50, 10), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = c(40, 60), n_burn = 10, n_it = 20), "x_method" ) }) test_that("error when x_method = 'Empirical' but x_def is a scalar", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60, 50, 10), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Empirical", x_def = 50, n_burn = 10, n_it = 20), "x_method" ) }) test_that("error when x_def range is inverted", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60, 50, 10), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Empirical", x_def = c(60, 40), n_burn = 10, n_it = 20), "increasing order" ) }) test_that("warning when x_def excludes most data", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60, 50, 10), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Empirical", x_def = c(-100, min(test_data$age) - 1), n_burn = 10, n_it = 20), "below minimum observed value" ) }) # ============================================================================= # PART 5: CLUSTER EDGE CASES (bglmm) # ============================================================================= test_that("error when id_var is NULL for bglmm", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglmm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", id_var = NULL, x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 10, n_it = 20), "id_var must be specified" ) }) test_that("error when id_var doesn't exist", { test_data <- data.frame( cluster = factor(rep(1:6, each = 10)), grp = rep(c("A", "B"), times = 30), age = rnorm(60), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglmm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", id_var = "nonexistent", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 10, n_it = 20), "not found in data" ) }) test_that("warning for very few clusters (J < 5)", { test_data <- data.frame( cluster = factor(rep(1:4, each = 15)), grp = rep(c("A", "B"), times = 30), age = rnorm(60), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) warns <- testthat::capture_warnings( bglmm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", id_var = "cluster", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 20, n_it = 1000, return_diagnostics = FALSE) ) expect_true(any(grepl("Very few clusters", warns))) }) test_that("warning for small cluster size", { test_data <- data.frame( cluster = factor(c(rep(1, 2), rep(2:10, each = 10))), grp = rep(c("A", "B"), length.out = 92), age = rnorm(92), y1 = rbinom(92, 1, 0.5), y2 = rbinom(92, 1, 0.5) ) warns <- testthat::capture_warnings( bglmm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", id_var = "cluster", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 100, n_it = 1000, return_diagnostics = FALSE) ) expect_true(any(grepl("very few observations", warns))) }) test_that("warning for cluster with only one group", { test_data <- data.frame( cluster = factor(rep(1:6, each = 10)), grp = c(rep("A", 10), rep(c("A", "B"), times = 25)), # Cluster 1 all A age = rnorm(60), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) warns <- testthat::capture_warnings( bglmm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", id_var = "cluster", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 100, n_it = 1000, return_diagnostics = FALSE) ) expect_true(any(grepl("only one group", warns))) }) # ============================================================================= # PART 6: DECISION RULE EDGE CASES # ============================================================================= test_that("error when rule = 'Comp' but w is NULL", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_message( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), rule = "Comp", w = NULL, n_it = 100), "equal weights" ) }) test_that("error when w has wrong length", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), rule = "Comp", w = c(0.5), n_it = 100), "not equal to number of outcomes" ) }) test_that("error when w contains negative values", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), rule = "Comp", w = c(-0.5, 0.5), n_it = 100), "non-negative" ) }) # ============================================================================= # PART 7: MCMC SETTINGS EDGE CASES # ============================================================================= test_that("error when n_it = 0", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), n_it = 0), "iterations" ) }) test_that("warning when n_burn = 0", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60), y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 0, n_it = 100, n_thin = 1) , "burnin" ) }) # NOT RUN (computation time) #test_that("message when n_it is very large without thinning", { # test_data <- data.frame( # grp = rep(c("A", "B"), each = 30), # age = rnorm(60), # y1 = rbinom(60, 1, 0.5), # y2 = rbinom(60, 1, 0.5) # ) # expect_message( # bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", # x_var = "age", y_vars = c("y1", "y2"), # x_method = "Value", x_def = 0, n_burn = 10, n_it = 1+1e6, n_thin = 1), # "thinning" # ) #}) # ============================================================================= # PART 8: PERFECT SEPARATION CASES # ============================================================================= test_that("model runs with perfect group separation in outcomes", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), y1 = c(rep(0, 30), rep(1, 30)), # Perfect separation y2 = rbinom(60, 1, 0.5) ) # Should run but may have poor diagnostics expect_s3_class( suppressWarnings(bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), n_it = 100)), "bmvb" ) }) # ============================================================================= # PART 9: NUMERICAL STABILITY TESTS # ============================================================================= test_that("error when covariate has extreme values", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = c(rnorm(30, 50000, 100), rnorm(30, 50000, 100)), # ~50k (extreme) y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 50000, n_burn = 10, n_it = 20), "extreme values.*numerical instability" ) }) test_that("error when covariate range is very wide", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = c(runif(30, 0, 20000), runif(30, 0, 20000)), # Range = 20k (too wide) y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) expect_error( bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Empirical", x_def = c(-Inf, Inf), n_burn = 10, n_it = 20), "very wide range.*numerical instability" ) }) test_that("model works with moderate values and range", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age = rnorm(60, 50, 10), # Moderate: mean=50, range ~20-80 y1 = rbinom(60, 1, 0.5), y2 = rbinom(60, 1, 0.5) ) # Should run without error expect_s3_class( suppressWarnings(bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 50, n_burn = 50, n_it = 100)), "bglm" ) }) test_that("standardized covariates work well", { test_data <- data.frame( grp = rep(c("A", "B"), each = 30), age_raw = rnorm(60, 50, 10) ) # Standardize (mean=0, sd=1) test_data$age <- scale(test_data$age_raw)[, 1] test_data$y1 <- rbinom(60, 1, 0.5) test_data$y2 <- rbinom(60, 1, 0.5) expect_s3_class( suppressWarnings(bglm(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", x_var = "age", y_vars = c("y1", "y2"), x_method = "Value", x_def = 0, n_burn = 50, n_it = 100)), "bglm" ) }) # ============================================================================= # PART 10: EXTREME IMBALANCE # ============================================================================= test_that("model handles extreme group imbalance", { test_data <- data.frame( grp = c(rep("A", 10), rep("B", 100)), y1 = rbinom(110, 1, 0.5), y2 = rbinom(110, 1, 0.5) ) expect_s3_class( suppressWarnings(bmvb(data = test_data, grp = "grp", grp_a = "A", grp_b = "B", y_vars = c("y1", "y2"), n_it = 100)), "bmvb" ) })