# Numerical validation tests for iv.sensemakr # These tests verify computed quantities against hand-computed values. # --- Helper: set up Card data consistently across tests --- card_setup <- function() { data("card", package = "iv.sensemakr") y <- card$lwage d <- card$educ z <- card$nearc4 x <- model.matrix( ~ exper + expersq + black + south + smsa + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + smsa66, data = card ) list(y = y, d = d, z = z, x = x) } # ========================================================================== # (a) IV estimation correctness # ========================================================================== test_that("IV point estimate equals RF/FS coefficient ratio from direct lm()", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) # Reconstruct the FS and RF regressions manually df <- card.fit$data fs <- lm(d ~ -1 + ., data = df[, c("d", "z", setdiff(names(df), c("y", "d")))]) rf <- lm(y ~ -1 + ., data = df[, c("y", "z", setdiff(names(df), c("y", "d")))]) # IV = RF_z / FS_z iv.manual <- unname(coef(rf)["z"] / coef(fs)["z"]) expect_equal(coef(card.fit, parm = "iv")[["iv"]], iv.manual) }) test_that("FS and RF coefficients match direct lm() output", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) # Extract FS estimates from the stored lm model fs.lm <- card.fit$models$fs rf.lm <- card.fit$models$rf fs.summ <- coef(summary(fs.lm))["z", ] rf.summ <- coef(summary(rf.lm))["z", ] # FS: coefficient, SE, t-value, p-value expect_equal(card.fit$estimates$fs$estimate, unname(fs.summ["Estimate"])) expect_equal(card.fit$estimates$fs$se, unname(fs.summ["Std. Error"])) expect_equal(card.fit$estimates$fs$t.value, unname(fs.summ["t value"])) expect_equal(card.fit$estimates$fs$p.value, unname(fs.summ["Pr(>|t|)"])) # RF: coefficient, SE, t-value, p-value expect_equal(card.fit$estimates$rf$estimate, unname(rf.summ["Estimate"])) expect_equal(card.fit$estimates$rf$se, unname(rf.summ["Std. Error"])) expect_equal(card.fit$estimates$rf$t.value, unname(rf.summ["t value"])) expect_equal(card.fit$estimates$rf$p.value, unname(rf.summ["Pr(>|t|)"])) }) test_that("AR t-value matches lm(y - h0*d ~ z + x) under null", { dat <- card_setup() # Fit with h0 = 0 (default) card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) ar.lm <- card.fit$models$ar ar.summ <- coef(summary(ar.lm))["z", ] expect_equal(card.fit$estimates$iv$t.value, unname(ar.summ["t value"])) expect_equal(card.fit$estimates$iv$p.value, unname(ar.summ["Pr(>|t|)"])) }) test_that("AR CI endpoints satisfy test-inversion property", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) ci <- confint(card.fit, parm = "iv") alpha <- card.fit$pars$alpha dof <- card.fit$estimates$iv$dof # Critical F value ts <- sqrt(qf(p = log(alpha), df1 = 1, df2 = dof, log.p = TRUE, lower.tail = FALSE)) # At each CI endpoint tau0, the AR test stat should equal the critical value for (tau0 in ci) { if (is.finite(tau0)) { # Refit AR regression at this endpoint df <- card.fit$data df$ar_y <- df$y - tau0 * df$d covnames <- setdiff(names(df), c("y", "d", "ar_y")) fmla <- as.formula(paste0("ar_y ~ -1 + ", paste(paste0("`", covnames, "`"), collapse = " + "))) ar.lm <- lm(fmla, data = df) t.val <- abs(coef(summary(ar.lm))["z", "t value"]) expect_equal(t.val, ts, tolerance = 1e-6) } } }) test_that("confint for FS and RF match standard lm confint", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) # FS ci.fs.pkg <- confint(card.fit, parm = "fs") ci.fs.lm <- confint(card.fit$models$fs, parm = "z") expect_equal(unname(ci.fs.pkg), unname(ci.fs.lm)) # RF ci.rf.pkg <- confint(card.fit, parm = "rf") ci.rf.lm <- confint(card.fit$models$rf, parm = "z") expect_equal(unname(ci.rf.pkg), unname(ci.rf.lm)) }) # ========================================================================== # (b) Robustness values # ========================================================================== test_that("RV for IV matches manual rv_from_summary_stats computation", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) # Extract summary stats manually fs.lm <- card.fit$models$fs rf.lm <- card.fit$models$rf fs.coef <- unname(coef(fs.lm)["z"]) rf.coef <- unname(coef(rf.lm)["z"]) fs.se <- unname(coef(summary(fs.lm))["z", "Std. Error"]) rf.se <- unname(coef(summary(rf.lm))["z", "Std. Error"]) rho.val <- cor(resid(fs.lm), resid(rf.lm)) dof <- fs.lm$df.residual # Manual RV computation (q=1) q <- 1 tau.r <- rf.coef / fs.coef tau0 <- (1 - q) * tau.r # = 0 when q=1 phi.r <- rf.coef - tau0 * fs.coef se.phi.r <- sqrt(rf.se^2 + tau0^2 * fs.se^2 - 2 * tau0 * rho.val * fs.se * rf.se) t.phi.r <- abs(phi.r / se.phi.r) t.fs.r <- abs(fs.coef / fs.se) rv.phi <- sensemakr::rv(t_statistic = t.phi.r, dof = dof, q = 1, alpha = 0.05) rv.fs <- sensemakr::rv(t_statistic = t.fs.r, dof = dof, q = 1, alpha = 0.05) # RV_IV = min(rv.phi, rv.fs) when min=TRUE rv.iv.manual <- min(rv.phi, rv.fs) rv.iv.pkg <- as.numeric(rv(card.fit, parm = "iv", q = 1, alpha = 0.05)) expect_equal(rv.iv.pkg, rv.iv.manual) }) test_that("XRV for IV matches manual computation", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) fs.lm <- card.fit$models$fs rf.lm <- card.fit$models$rf fs.coef <- unname(coef(fs.lm)["z"]) rf.coef <- unname(coef(rf.lm)["z"]) fs.se <- unname(coef(summary(fs.lm))["z", "Std. Error"]) rf.se <- unname(coef(summary(rf.lm))["z", "Std. Error"]) rho.val <- cor(resid(fs.lm), resid(rf.lm)) dof <- fs.lm$df.residual tau.r <- rf.coef / fs.coef tau0 <- 0 # q=1 phi.r <- rf.coef - tau0 * fs.coef se.phi.r <- sqrt(rf.se^2 + tau0^2 * fs.se^2 - 2 * tau0 * rho.val * fs.se * rf.se) t.phi.r <- abs(phi.r / se.phi.r) t.fs.r <- abs(fs.coef / fs.se) xrv.phi <- sensemakr::xrv(t_statistic = t.phi.r, dof = dof, q = 1, alpha = 0.05) xrv.fs <- sensemakr::xrv(t_statistic = t.fs.r, dof = dof, q = 1, alpha = 0.05) xrv.iv.manual <- min(xrv.phi, xrv.fs) xrv.iv.pkg <- as.numeric(xrv(card.fit, parm = "iv", q = 1, alpha = 0.05)) expect_equal(xrv.iv.pkg, xrv.iv.manual) }) test_that("RV with q=0.5 matches manual computation", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) fs.lm <- card.fit$models$fs rf.lm <- card.fit$models$rf fs.coef <- unname(coef(fs.lm)["z"]) rf.coef <- unname(coef(rf.lm)["z"]) fs.se <- unname(coef(summary(fs.lm))["z", "Std. Error"]) rf.se <- unname(coef(summary(rf.lm))["z", "Std. Error"]) rho.val <- cor(resid(fs.lm), resid(rf.lm)) dof <- fs.lm$df.residual q <- 0.5 tau.r <- rf.coef / fs.coef tau0 <- (1 - q) * tau.r phi.r <- rf.coef - tau0 * fs.coef se.phi.r <- sqrt(rf.se^2 + tau0^2 * fs.se^2 - 2 * tau0 * rho.val * fs.se * rf.se) t.phi.r <- abs(phi.r / se.phi.r) t.fs.r <- abs(fs.coef / fs.se) rv.phi <- sensemakr::rv(t_statistic = t.phi.r, dof = dof, q = 1, alpha = 0.05) rv.fs <- sensemakr::rv(t_statistic = t.fs.r, dof = dof, q = 1, alpha = 0.05) rv.iv.manual <- min(rv.phi, rv.fs) rv.iv.pkg <- as.numeric(rv(card.fit, parm = "iv", q = q, alpha = 0.05)) expect_equal(rv.iv.pkg, rv.iv.manual) }) test_that("RV with min=FALSE returns rv.phi (not min)", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) fs.lm <- card.fit$models$fs rf.lm <- card.fit$models$rf fs.coef <- unname(coef(fs.lm)["z"]) rf.coef <- unname(coef(rf.lm)["z"]) fs.se <- unname(coef(summary(fs.lm))["z", "Std. Error"]) rf.se <- unname(coef(summary(rf.lm))["z", "Std. Error"]) rho.val <- cor(resid(fs.lm), resid(rf.lm)) dof <- fs.lm$df.residual q <- 1 tau0 <- 0 phi.r <- rf.coef - tau0 * fs.coef se.phi.r <- sqrt(rf.se^2 + tau0^2 * fs.se^2 - 2 * tau0 * rho.val * fs.se * rf.se) t.phi.r <- abs(phi.r / se.phi.r) rv.phi <- sensemakr::rv(t_statistic = t.phi.r, dof = dof, q = 1, alpha = 0.05) rv.iv.pkg <- as.numeric(rv(card.fit, parm = "iv", q = 1, alpha = 0.05, min = FALSE)) expect_equal(rv.iv.pkg, as.numeric(rv.phi)) }) test_that("RV for FS and RF dispatch to sensemakr correctly", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) # FS rv.fs.pkg <- as.numeric(rv(card.fit, parm = "fs")) rv.fs.direct <- as.numeric(sensemakr::rv(card.fit$models$fs, covariates = "z")) expect_equal(rv.fs.pkg, rv.fs.direct) # RF rv.rf.pkg <- as.numeric(rv(card.fit, parm = "rf")) rv.rf.direct <- as.numeric(sensemakr::rv(card.fit$models$rf, covariates = "z")) expect_equal(rv.rf.pkg, rv.rf.direct) }) # ========================================================================== # (c) Sensitivity analysis structure and bounds # ========================================================================== test_that("sensemakr() output has correct structure", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) card.sens <- sensemakr(card.fit, benchmark_covariates = c("black", "smsa")) expect_s3_class(card.sens, "iv.sensemakr") expect_true(all(c("pars", "unadjusted", "sensitivity_stats", "bounds") %in% names(card.sens))) expect_s3_class(card.sens$unadjusted, "iv_fit") expect_true(all(c("iv", "fs", "rf") %in% names(card.sens$sensitivity_stats))) expect_true(all(c("iv", "fs", "rf") %in% names(card.sens$bounds))) }) test_that("sensitivity_stats have expected columns", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) card.sens <- sensemakr(card.fit, benchmark_covariates = c("black", "smsa")) expected_cols <- c("estimate", "lwr", "upr", "t.value", "xrv_qa", "rv_qa", "q", "min", "alpha", "dof") for (parm in c("iv", "fs", "rf")) { ss <- card.sens$sensitivity_stats[[parm]] expect_true(all(expected_cols %in% names(ss)), info = paste("Missing columns for", parm)) } }) test_that("bounds partial R2 values are in [0, 1]", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) card.sens <- sensemakr(card.fit, benchmark_covariates = c("black", "smsa")) iv_bounds <- card.sens$bounds$iv expect_true(all(iv_bounds$r2zw.x >= 0 & iv_bounds$r2zw.x <= 1)) expect_true(all(iv_bounds$r2y0w.zx >= 0 & iv_bounds$r2y0w.zx <= 1)) }) test_that("manual bounds (r2zw.x) produce valid adjusted CI", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) card.sens <- sensemakr(card.fit, r2zw.x = 0.02) # Manual bounds should be in the bounds table expect_true(nrow(card.sens$bounds$iv) >= 1) manual_row <- card.sens$bounds$iv[card.sens$bounds$iv$bound_label == "Manual Bound", ] expect_equal(nrow(manual_row), 1) expect_equal(manual_row$r2zw.x, 0.02) # Adjusted CI should be wider than unadjusted (lower bound smaller, upper bound larger) unadj_ci <- confint(card.fit, parm = "iv") expect_true(manual_row$lwr <= unadj_ci[1] || manual_row$upr >= unadj_ci[2]) }) # ========================================================================== # (d) AR CI edge cases # ========================================================================== test_that("irrelevant instrument produces unbounded CI", { set.seed(123) n <- 100 z <- rnorm(n) d <- rnorm(n) # instrument completely irrelevant y <- d + rnorm(n) fit <- iv_fit(y, d, z) ci <- confint(fit, parm = "iv") # Should be unbounded (-Inf, Inf) expect_true(any(is.infinite(ci))) }) test_that("iv_fit works without covariates (x=NULL)", { set.seed(42) n <- 500 z <- rbinom(n, 1, 0.5) d <- 0.5 * z + rnorm(n) y <- 2 * d + rnorm(n) fit <- iv_fit(y, d, z) expect_s3_class(fit, "iv_fit") expect_true(is.numeric(coef(fit, parm = "iv"))) ci <- confint(fit, parm = "iv") expect_true(length(ci) >= 2) }) test_that("confint with different levels works", { dat <- card_setup() card.fit <- iv_fit(dat$y, dat$d, dat$z, dat$x) ci_95 <- confint(card.fit, parm = "iv") ci_99 <- confint(card.fit, parm = "iv", level = 0.99) # 99% CI should be wider than 95% expect_true(ci_99[1] <= ci_95[1]) expect_true(ci_99[2] >= ci_95[2]) })