# tests/testthat/test-bivarhr.R library(testthat) library(bivarhr) create_test_data <- function(n = 30) { set.seed(42) data.table::data.table( n = seq_len(n), window_start = seq(1900, by = 10, length.out = n), window_end = seq(1910, by = 10, length.out = n), I = sample(0:3, n, replace = TRUE, prob = c(0.4, 0.3, 0.2, 0.1)), C = sample(0:4, n, replace = TRUE, prob = c(0.3, 0.3, 0.2, 0.1, 0.1)), EconCycle = sample(0:2, n, replace = TRUE), PopDensity = runif(n, 10, 500), Epidemics = sample(0:1, n, replace = TRUE, prob = c(0.8, 0.2)), Climate = sample(0:1, n, replace = TRUE, prob = c(0.7, 0.3)), War = sample(0:2, n, replace = TRUE, prob = c(0.6, 0.3, 0.1)) ) } test_that("disc_terciles returns ordered factor with 3 levels", { x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9) result <- disc_terciles(x) expect_true(is.factor(result)) expect_true(is.ordered(result)) expect_equal(nlevels(result), 3) expect_equal(levels(result), c("low", "medium", "high")) expect_equal(length(result), length(x)) }) test_that("disc_terciles handles vectors with NA", { x <- c(1, 2, NA, 4, 5, NA, 7, 8, 9) result <- disc_terciles(x) expect_equal(length(result), length(x)) expect_equal(sum(is.na(result)), sum(is.na(x))) }) test_that("disc_terciles handles all-NA vector", { x <- rep(NA_real_, 10) result <- disc_terciles(x) expect_true(is.factor(result)) expect_true(all(is.na(result))) }) test_that("disc_terciles is deterministic", { x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) result1 <- disc_terciles(x) result2 <- disc_terciles(x) result3 <- disc_terciles(x) expect_identical(result1, result2) expect_identical(result2, result3) }) test_that("disc_terciles handles short vectors", { x1 <- 5 result1 <- disc_terciles(x1) expect_equal(length(result1), 1) expect_true(is.factor(result1)) x2 <- c(1, 10) result2 <- disc_terciles(x2) expect_equal(length(result2), 2) }) test_that("disc_terciles handles ties", { x <- c(1, 1, 1, 2, 2, 2, 3, 3, 3) result <- disc_terciles(x) expect_true(is.factor(result)) expect_equal(length(result), 9) expect_true(all(c("low", "medium", "high") %in% levels(result))) }) test_that("disc_terciles handles extreme values", { x <- c(-1e10, 0, 1e10) result <- disc_terciles(x) expect_true(is.factor(result)) expect_equal(length(result), 3) }) test_that("standardize_continuous with zscore produces mean~0 and sd~1", { set.seed(123) DT <- data.table::data.table( x = rnorm(100, mean = 50, sd = 10), y = runif(100, 0, 100) ) result <- standardize_continuous(DT, cols = c("x", "y"), method = "zscore") expect_true(is.list(result)) expect_true("DT" %in% names(result)) expect_true("scalers" %in% names(result)) expect_equal(mean(result$DT$x), 0, tolerance = 1e-10) expect_equal(mean(result$DT$y), 0, tolerance = 1e-10) expect_equal(sd(result$DT$x), 1, tolerance = 1e-10) expect_equal(sd(result$DT$y), 1, tolerance = 1e-10) }) test_that("standardize_continuous with robust uses median and MAD", { DT <- data.table::data.table( x = c(1, 2, 3, 4, 5, 100) ) result <- standardize_continuous(DT, cols = "x", method = "robust") expect_true(is.list(result)) expect_equal(result$scalers$x$method, "robust") expect_true("center" %in% names(result$scalers$x)) expect_true("scale" %in% names(result$scalers$x)) }) test_that("standardize_continuous does not modify binary 0/1 columns", { DT <- data.table::data.table( binary_col = c(0, 1, 0, 1, 1, 0), continuous_col = c(10, 20, 30, 40, 50, 60) ) result <- standardize_continuous(DT, cols = c("binary_col", "continuous_col"), method = "zscore") expect_equal(result$DT$binary_col, DT$binary_col) expect_equal(mean(result$DT$continuous_col), 0, tolerance = 1e-10) }) test_that("standardize_continuous handles nonexistent columns gracefully", { DT <- data.table::data.table(x = 1:10) result <- standardize_continuous(DT, cols = c("x", "nonexistent_column"), method = "zscore") expect_true(is.list(result)) expect_equal(ncol(result$DT), 1) }) test_that("standardize_continuous handles sd = 0 (constant column)", { DT <- data.table::data.table( constant = rep(5, 10), variable = 1:10 ) result <- standardize_continuous(DT, cols = c("constant", "variable"), method = "zscore") expect_true(all(is.finite(result$DT$constant))) expect_true(all(is.finite(result$DT$variable))) }) test_that("standardize_continuous preserves scalers for reproducibility", { set.seed(456) DT <- data.table::data.table(x = rnorm(50)) result <- standardize_continuous(DT, cols = "x", method = "zscore") expect_true("x" %in% names(result$scalers)) expect_true("center" %in% names(result$scalers$x)) expect_true("scale" %in% names(result$scalers$x)) expect_equal(result$scalers$x$method, "zscore") }) test_that("make_lags creates correct lag matrix", { x <- 1:10 result <- make_lags(x, k = 2) expect_true(is.matrix(result)) expect_equal(nrow(result), length(x)) expect_equal(ncol(result), 2) expect_true(is.na(result[1, 1])) expect_true(is.na(result[1, 2])) expect_true(is.na(result[2, 2])) expect_equal(as.numeric(result[3, 1]), 2) expect_equal(as.numeric(result[3, 2]), 1) }) test_that("make_lags with k = 0 returns empty matrix", { x <- 1:10 result <- make_lags(x, k = 0) expect_true(is.matrix(result)) expect_equal(ncol(result), 0) expect_equal(nrow(result), length(x)) }) test_that("make_lags with negative k returns empty matrix", { x <- 1:10 result <- make_lags(x, k = -1) expect_true(is.matrix(result)) expect_equal(ncol(result), 0) }) test_that("make_lags preserves column names", { x <- 1:10 result <- make_lags(x, k = 3) expect_true(!is.null(colnames(result))) expect_equal(ncol(result), 3) }) test_that("build_design constructs design matrices correctly", { DT <- create_test_data(30) DT[, `:=`( window_years = window_end - window_start, exposure50 = pmax((window_end - window_start) / 50, 1e-6), zI = as.integer(I > 0), zC = as.integer(C > 0), t_norm = (seq_len(.N) - 0.5) / .N, t_poly2 = ((seq_len(.N) - 0.5) / .N)^2, mid_year = (window_start + window_end) / 2 )] DT[, log_exposure50 := log(exposure50)] DT[, Regime := factor(data.table::fifelse(mid_year < 1950, "Pre", "Post"))] DT[, `:=`(trans_PS = 0L, trans_SF = 0L, trans_FC = 0L)] result <- build_design(DT, k = 2, include_C_to_I = TRUE, include_I_to_C = TRUE, include_trend = TRUE, controls = character(0)) expect_true(is.list(result)) expect_true("idx" %in% names(result)) expect_true("y_I" %in% names(result)) expect_true("y_C" %in% names(result)) expect_true("X_pi_I" %in% names(result)) expect_true("X_mu_I" %in% names(result)) expect_true("X_pi_C" %in% names(result)) expect_true("X_mu_C" %in% names(result)) expect_equal(length(result$idx), nrow(DT) - 2) expect_equal(length(result$y_I), length(result$idx)) expect_equal(nrow(result$X_pi_I), length(result$idx)) }) test_that("build_design with k = 0 includes no lags", { DT <- create_test_data(20) DT[, `:=`( window_years = window_end - window_start, exposure50 = pmax((window_end - window_start) / 50, 1e-6), zI = as.integer(I > 0), zC = as.integer(C > 0), t_norm = (seq_len(.N) - 0.5) / .N, t_poly2 = ((seq_len(.N) - 0.5) / .N)^2, mid_year = (window_start + window_end) / 2 )] DT[, log_exposure50 := log(exposure50)] DT[, Regime := factor(data.table::fifelse(mid_year < 1950, "Pre", "Post"))] DT[, `:=`(trans_PS = 0L, trans_SF = 0L, trans_FC = 0L)] result <- build_design(DT, k = 0, include_C_to_I = TRUE, include_I_to_C = TRUE) expect_equal(length(result$idx), nrow(DT)) expect_equal(result$nlags_pi_I, 0) expect_equal(result$nlags_mu_I, 0) }) test_that("build_design includes controls when specified", { DT <- create_test_data(25) DT[, `:=`( window_years = window_end - window_start, exposure50 = pmax((window_end - window_start) / 50, 1e-6), zI = as.integer(I > 0), zC = as.integer(C > 0), t_norm = (seq_len(.N) - 0.5) / .N, t_poly2 = ((seq_len(.N) - 0.5) / .N)^2, mid_year = (window_start + window_end) / 2 )] DT[, log_exposure50 := log(exposure50)] DT[, Regime := factor(data.table::fifelse(mid_year < 1950, "Pre", "Post"))] DT[, `:=`(trans_PS = 0L, trans_SF = 0L, trans_FC = 0L)] result_no_ctrl <- build_design(DT, k = 1, controls = character(0)) result_with_ctrl <- build_design(DT, k = 1, controls = c("PopDensity", "War")) expect_gt(ncol(result_with_ctrl$X_mu_I), ncol(result_no_ctrl$X_mu_I)) }) test_that("prewhiten_count_glm returns valid residuals", { skip_if_not_installed("MASS") DT <- create_test_data(50) DT[, `:=`( t_norm = (seq_len(.N) - 0.5) / .N, log_exposure50 = log(pmax((window_end - window_start) / 50, 1e-6)), mid_year = (window_start + window_end) / 2 )] DT[, Regime := factor(data.table::fifelse(mid_year < 1950, "Pre", "Post"))] result <- prewhiten_count_glm(DT, "I") expect_true(is.numeric(result)) expect_equal(length(result), nrow(DT)) expect_true(all(is.finite(result))) }) test_that("prewhiten_rate_glm returns valid residuals", { DT <- create_test_data(50) DT[, `:=`( t_norm = (seq_len(.N) - 0.5) / .N, exposure50 = pmax((window_end - window_start) / 50, 1e-6), mid_year = (window_start + window_end) / 2 )] DT[, Regime := factor(data.table::fifelse(mid_year < 1950, "Pre", "Post"))] DT[, I := I / exposure50] result <- prewhiten_rate_glm(DT, "I") expect_true(is.numeric(result)) expect_equal(length(result), nrow(DT)) }) test_that("prewhiten_bin_glm returns valid residuals for binary variable", { DT <- create_test_data(50) DT[, `:=`( t_norm = (seq_len(.N) - 0.5) / .N, mid_year = (window_start + window_end) / 2 )] DT[, Regime := factor(data.table::fifelse(mid_year < 1950, "Pre", "Post"))] DT[, I := as.integer(I > 0)] result <- prewhiten_bin_glm(DT, "I") expect_true(is.numeric(result)) expect_equal(length(result), nrow(DT)) }) test_that("prewhiten_bin_glm fails with non-binary variable", { DT <- create_test_data(30) DT[, `:=`( t_norm = (seq_len(.N) - 0.5) / .N, mid_year = (window_start + window_end) / 2 )] DT[, Regime := factor("Single")] expect_error(prewhiten_bin_glm(DT, "I"), "binaria") }) test_that("add_qsig adds q_value and sig columns", { df <- data.frame( model = c("A", "B", "C", "D"), p_value = c(0.001, 0.02, 0.08, 0.5) ) result <- add_qsig(df) expect_true("q_value" %in% names(result)) expect_true("sig" %in% names(result)) expect_equal(nrow(result), nrow(df)) expect_true(all(result$q_value >= result$p_value)) }) test_that("add_qsig handles empty data.frame", { df <- data.frame(model = character(0), p_value = numeric(0)) result <- add_qsig(df) expect_equal(nrow(result), 0) }) test_that("add_qsig handles NULL input", { result <- add_qsig(NULL) expect_null(result) }) test_that("add_qsig assigns significance stars correctly", { df <- data.frame( model = c("A", "B", "C", "D"), p_value = c(0.0001, 0.005, 0.03, 0.2) ) result <- add_qsig(df) expect_true(is.character(result$sig) || is.factor(result$sig)) }) test_that("fit_one works with CmdStan available", { skip_on_cran() skip_if_not_installed("cmdstanr") cmdstan_ok <- tryCatch({ v <- cmdstanr::cmdstan_version() !is.null(v) }, error = function(e) FALSE) skip_if_not(cmdstan_ok, "CmdStan not installed") expect_true(TRUE) }) test_that("run_transfer_entropy works with RTransferEntropy", { skip_on_cran() skip_if_not_installed("RTransferEntropy") DT <- create_test_data(50) DT[, `:=`( t_norm = (seq_len(.N) - 0.5) / .N, exposure50 = pmax((window_end - window_start) / 50, 1e-6), log_exposure50 = log(pmax((window_end - window_start) / 50, 1e-6)), mid_year = (window_start + window_end) / 2 )] DT[, Regime := factor(data.table::fifelse(mid_year < 1950, "Pre", "Post"))] temp_dir <- tempdir() result <- tryCatch({ assign("dir_csv", temp_dir, envir = .GlobalEnv) run_transfer_entropy(DT, lags = 1:2, shuffles = 10, seed = 123) }, error = function(e) NULL, finally = { if (exists("dir_csv", envir = .GlobalEnv)) rm("dir_csv", envir = .GlobalEnv) }) skip_if(is.null(result), "run_transfer_entropy failed") expect_true(is.data.frame(result)) expect_true("lag" %in% names(result)) }) test_that("run_hmm works with depmixS4", { skip_on_cran() skip_if_not_installed("depmixS4") DT <- create_test_data(40) temp_dir <- tempdir() result <- tryCatch({ assign("dir_csv", temp_dir, envir = .GlobalEnv) assign("dir_out", temp_dir, envir = .GlobalEnv) run_hmm(DT, nstates = 2) }, error = function(e) NULL, finally = { if (exists("dir_csv", envir = .GlobalEnv)) rm("dir_csv", envir = .GlobalEnv) if (exists("dir_out", envir = .GlobalEnv)) rm("dir_out", envir = .GlobalEnv) }) skip_if(is.null(result), "run_hmm failed or did not converge") expect_true(is.list(result)) }) test_that("summarise_hurdle_top3_posthoc handles NULL input", { result <- summarise_hurdle_top3_posthoc(NULL, tempdir()) expect_true(is.data.frame(result)) expect_true("model" %in% names(result)) expect_equal(result$model[1], "Hurdle-NB") }) test_that("summarise_te_top3_posthoc handles empty input", { result <- summarise_te_top3_posthoc(NULL, tempdir()) expect_true(is.data.frame(result)) expect_true("model" %in% names(result)) }) test_that("summarise_placebo_top3_posthoc orders by diff descending", { placebo_tab <- data.frame( perm = 1:5, elpd_orig = rep(-100, 5), elpd_perm = c(-110, -105, -120, -102, -115), diff = c(10, 5, 20, 2, 15) ) result <- summarise_placebo_top3_posthoc(placebo_tab, tempdir()) expect_equal(nrow(result), 3) expect_equal(result$diff, c(20, 15, 10)) }) test_that("summarise_tvarstar_posthoc handles NULL", { result <- summarise_tvarstar_posthoc(NULL) expect_true(is.data.frame(result)) expect_equal(nrow(result), 3) }) test_that("summarise_varx_posthoc handles NULL", { result <- summarise_varx_posthoc(NULL) expect_true(is.data.frame(result)) expect_equal(result$model[1], "VARX") expect_true(is.na(result$AIC)) }) test_that("package loads without errors", { expect_true(requireNamespace("bivarhr", quietly = TRUE)) }) test_that("test data is created correctly", { DT <- create_test_data(50) expect_true(data.table::is.data.table(DT)) expect_equal(nrow(DT), 50) expect_true(all(c("I", "C", "PopDensity") %in% names(DT))) }) test_that("exported functions exist", { expected_exports <- c( "disc_terciles", "standardize_continuous", "make_lags", "build_design" ) for (fn in expected_exports) { expect_true( exists(fn, envir = asNamespace("bivarhr")), info = paste("Function not found:", fn) ) } }) test_that("documentation examples are valid", { x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9) result <- disc_terciles(x) expect_true(is.factor(result)) })