test_that("RPIV_test returns correct structure for default arguments", { set.seed(1) n <- 20 Z <- rnorm(n) C <- rnorm(n) X <- Z + rnorm(n) Y <- X - C + rnorm(n) result <- RPIV_test(Y, X, C, Z) expect_type(result, "list") expect_named(result, c("p_value", "test_statistic", "var_fraction", "T_null", "variance_estimator")) expect_true(is.numeric(result$p_value)) expect_true(is.numeric(result$test_statistic)) expect_true(is.numeric(result$T_null)) expect_true(is.numeric(result$var_fraction)) expect_equal(result$variance_estimator, "heteroskedastic") }) test_that("RPIV_test works with C = NULL", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) expect_no_error(RPIV_test(Y = Y, X = X, Z = Z)) }) test_that("RPIV_test handles multiple variance estimators", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) result <- RPIV_test(Y, X, Z = Z, variance_estimator = c("homoskedastic", "heteroskedastic")) expect_type(result, "list") expect_named(result, c("homoskedastic", "heteroskedastic")) expect_true(all(c("p_value", "test_statistic", "var_fraction", "T_null", "variance_estimator") %in% names(result$homoskedastic))) }) test_that("RPIV_test yields different test_statistic and T_null if gamma is large", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) result <- RPIV_test(Y, X, Z = Z, gamma = 1) expect_false(result$test_statistic == result$T_null) }) test_that("RPIV_test yields same test_statistic and T_null if gamma is small", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) result <- RPIV_test(Y, X, Z = Z, gamma = 0.00001) expect_equal(result$test_statistic, result$T_null) }) test_that("RPIV_test works with clustering", { set.seed(1) n <- 20 cluster_ids <- rep(1:10, each = 2) Z <- rnorm(n) C <- rnorm(n) X <- Z + rnorm(n) Y <- X - C + rnorm(n) result <- RPIV_test(Y, X, C, Z, clustering = cluster_ids, variance_estimator = "cluster") expect_type(result, "list") expect_equal(result$variance_estimator, "cluster") cluster_ids <- as.factor(cluster_ids) result <- RPIV_test(Y, X, C, Z, clustering = cluster_ids, variance_estimator = "cluster") expect_type(result, "list") expect_equal(result$variance_estimator, "cluster") cluster_ids <- as.character(cluster_ids) result <- RPIV_test(Y, X, C, Z, clustering = cluster_ids, variance_estimator = "cluster") expect_type(result, "list") expect_equal(result$variance_estimator, "cluster") }) test_that("RPIV_test works with various dimensional input and throws error in underidentified case", { set.seed(1) n <- 20 Z1 <- rnorm(n) Z2 <- cbind(Z1, rnorm(n)) Z3 <- cbind(Z2, rnorm(n)) C <- cbind(rnorm(n), rnorm(n)) X1 <- rnorm(n) X2 <- cbind(X1, rnorm(n)) X3 <- cbind(X2, rnorm(n)) Y <- rnorm(n) expect_error(RPIV_test(Y, X2, C, Z1)) expect_error(RPIV_test(Y, X3, C, Z2)) expect_no_error(RPIV_test(Y, X1, C, Z2)) expect_no_error(RPIV_test(Y, X2, C, Z2)) expect_no_error(RPIV_test(Y, X2, C, Z3)) expect_no_error(RPIV_test(Y, X3, C, Z3)) }) test_that("RPIV_test throws errors on bad input", { set.seed(1) n <- 20 Z <- rnorm(n) X <- rnorm(n) Y <- rnorm(n) expect_error(RPIV_test(Y[1:19], X = X, Z = Z)) expect_error(RPIV_test(Y, X, Z = matrix(Z, ncol = 1), frac_A = -0.1), "frac_A must be NULL or a numeric scalar in") expect_error(RPIV_test(Y, X, Z = Z, regr_par = list(num_trees = 5)), "regr_par contains invalid entries") expect_error(RPIV_test(Y, X, Z = Z, variance_estimator = "homoscedastic"), "variance_estimator must be one or more of") }) test_that("RPIV_test works with fit_intercept = FALSE", { set.seed(1) n <- 20 Z <- rnorm(n) C <- rnorm(n) X <- rnorm(n) Y <- rnorm(n) result <- RPIV_test(Y, X, C, Z, fit_intercept = FALSE) expect_type(result, "list") expect_equal(result$variance_estimator, "heteroskedastic") expect_no_error(RPIV_test(Y, X, C = NULL, Z, fit_intercept = FALSE)) }) test_that("RPIV_test works with dataframes and vectors with attributes", { set.seed(1) n <- 20 Z <- data.frame(cbind(rnorm(n), rnorm(n))) C <- data.frame(cbind(rnorm(n), rnorm(n))) X <- data.frame(cbind(rnorm(n), rnorm(n))) Y <- rnorm(n) attr(Y, "label") <- "testlabel" result <- RPIV_test(Y, X, C, Z) expect_type(result, "list") expect_equal(result$variance_estimator, "heteroskedastic") }) test_that("RPIV_test raises error if Z is not convertible to vector", { set.seed(1) n <- 20 Z <- list(a = c("a", "aa"), b = rnorm(n)) C <- rnorm(n) X <- rnorm(n) Y <- rnorm(n) expect_error(RPIV_test(Y, X, C, Z)) }) test_that("weak_RPIV_test returns a function with correct output structure", { set.seed(1) n <- 20 Z <- rnorm(n) C <- rnorm(n) X <- Z + rnorm(n) Y <- X - C + rnorm(n) test_fun <- weak_RPIV_test(Y, X, C, Z) expect_type(test_fun, "closure") result <- test_fun(beta = 1, type = "tune_and_fit") expect_type(result, "double") expect_named(result, "heteroskedastic") expect_true(is.numeric(result["heteroskedastic"])) }) test_that("weak_RPIV_test works with C = NULL and/or fit_intercept = FALSE", { set.seed(1) n <- 20 Z <- rnorm(n) C <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) test_fun1 <- weak_RPIV_test(Y = Y, X = X, Z = Z) test_fun2 <- weak_RPIV_test(Y = Y, X = X, C = C, Z = Z, fit_intercept = FALSE) test_fun3 <- weak_RPIV_test(Y = Y, X = X, Z = Z, fit_intercept = FALSE) expect_no_error(test_fun1(beta = 0, type = "tune_and_fit")) expect_no_error(test_fun2(beta = 0, type = "tune_and_fit")) expect_no_error(test_fun3(beta = 0, type = "tune_and_fit")) }) test_that("weak_RPIV_test handles multiple variance estimators", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) test_fun <- weak_RPIV_test( Y, X, Z = Z, variance_estimator = c("homoskedastic", "heteroskedastic") ) result <- test_fun(beta = 0, type = "tune_and_fit") expect_type(result, "double") expect_named(result, c("homoskedastic", "heteroskedastic")) }) test_that("weak_RPIV_test works with clustering", { set.seed(1) n <- 20 cluster_ids <- rep(1:10, each = 2) Z <- rnorm(n) C <- rnorm(n) X <- Z + rnorm(n) Y <- X - C + rnorm(n) test_fun <- weak_RPIV_test( Y, X, C, Z, clustering = cluster_ids, variance_estimator = "cluster" ) result <- test_fun(beta = 1, type = "tune_and_fit") expect_named(result, "cluster") }) test_that("weak_RPIV_test handles various dimensions and TSLS requirements", { set.seed(1) n <- 20 Z1 <- rnorm(n) Z2 <- cbind(Z1, rnorm(n)) Z3 <- cbind(Z2, rnorm(n)) C <- cbind(rnorm(n), rnorm(n)) X1 <- rnorm(n) X2 <- cbind(X1, rnorm(n)) X3 <- cbind(X2, rnorm(n)) Y <- rnorm(n) expect_error(weak_RPIV_test(Y, X2, C, Z1, fit_at_tsls = TRUE)) expect_error(weak_RPIV_test(Y, X3, C, Z2, fit_at_tsls = TRUE)) fun1 <- weak_RPIV_test(Y, X2, C, Z1, fit_at_tsls = FALSE) fun2 <- weak_RPIV_test(Y, X3, C, Z2, fit_at_tsls = FALSE) fun3 <- weak_RPIV_test(Y, X2, C, Z2) fun4 <- weak_RPIV_test(Y, X3, C, Z3) expect_error(fun1(1, "tune_and_fit")) expect_error(fun1(c(1,2), type = "fit")) expect_no_error(fun1(c(1,2), "tune_and_fit")) expect_no_error(fun2(c(1,2,3), "tune_and_fit")) expect_no_error(fun3(c(1,2), "fit")) expect_no_error(fun4(c(1,2,3), "recalculate")) }) test_that("weak_RPIV_test throws errors on bad input", { set.seed(1) n <- 20 Z <- rnorm(n) X <- rnorm(n) Y <- rnorm(n) expect_error(weak_RPIV_test(Y[1:19], X = X, Z = Z)) expect_error( weak_RPIV_test(Y, X, Z = Z, frac_A = -0.1), "frac_A must be NULL or a numeric scalar" ) expect_error( weak_RPIV_test(Y, X, Z = Z, regr_par = list(num_trees = 5)), "regr_par contains invalid entries" ) expect_error( weak_RPIV_test(Y, X, Z = Z, variance_estimator = "homoscedastic"), "variance_estimator must be one or more of" ) }) test_that("weak_RPIV_test works with dataframes and vectors with attributes", { set.seed(1) n <- 20 Z <- data.frame(cbind(rnorm(n), rnorm(n))) C <- data.frame(cbind(rnorm(n), rnorm(n))) X <- data.frame(cbind(rnorm(n), rnorm(n))) Y <- rnorm(n) attr(Y, "label") <- "testlabel" test_fun <- weak_RPIV_test(Y, X, C, Z) result <- test_fun(beta = c(0, 0), type = "tune_and_fit") expect_type(result, "double") }) test_that("weak_RPIV_test raises error if Z is not convertible to matrix", { set.seed(1) n <- 20 Z <- list(a = c("a", "aa"), b = rnorm(n)) C <- rnorm(n) X <- rnorm(n) Y <- rnorm(n) expect_error(weak_RPIV_test(Y, X, C, Z)) }) test_that("weak_RPIV_test rejects invalid type", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) test_fun <- weak_RPIV_test(Y, X, Z = Z) expect_error( test_fun(beta = 0, type = "invalid_type"), "type needs to be one of" ) }) test_that("weak_RPIV_test rejects invalid type", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- X + rnorm(n) test_fun <- weak_RPIV_test(Y, X, Z = Z) expect_error( test_fun(beta = 0, type = "invalid_type"), "type needs to be one of" ) }) test_that("test_fun returned weak_RPIV_test is deterministic when type = 'recalculate'", { set.seed(1) n <- 20 Z <- rnorm(n) X <- Z + rnorm(n) Y <- rnorm(n) test_fun <- weak_RPIV_test(Y, X, Z = Z) T1 <- test_fun(beta = 0, type = "recalculate") T2 <- test_fun(beta = 0, type = "recalculate") expect_true(abs(T1 - T2) < 10^(-10)) }) test_that("weak_RPIV_test detects misspecification in C if use_C_for_prediction = TRUE (default), but not if use_C_for_prediction = FALSE", { set.seed(1) n <- 100 Z <- rnorm(n) C <- rnorm(n) X <- Z + rnorm(n) Y <- sign(C) test_fun1 <- weak_RPIV_test(Y, X, C, Z, use_C_for_prediction = FALSE) T1 <- test_fun1(beta = 0, type = "recalculate") expect_false(T1 > 4) test_fun2 <- weak_RPIV_test(Y, X, C, Z) T2 <- test_fun2(beta = 0, type = "recalculate") expect_true(T2 > 4) })