context("Estimator - difference_in_means") test_that("DIM", { dat <- data.frame(Y = rnorm(100), Z = sample(1:3, 100, replace = TRUE), X = rnorm(100)) difference_in_means(Y ~ Z, condition1 = 1, condition2 = 2, data = dat) difference_in_means(Y ~ Z, condition1 = 2, condition2 = 1, data = dat) difference_in_means(Y ~ Z, condition1 = 3, condition2 = 1, data = dat) dimo <- difference_in_means(Y ~ Z, condition1 = 3, condition2 = 2, data = dat) expect_equal( dimo$design, "Standard" ) }) test_that("DIM arguments parsed correctly", { dat <- data.frame(Y = rnorm(100), Z = rbinom(100, 1, .5), X = rnorm(100)) expect_equivalent( as.matrix(tidy(difference_in_means( Y ~ Z, data = dat, ci = FALSE ))[, c("p.value", "conf.low", "conf.high")]), matrix(NA, nrow = 1, ncol = 3) ) expect_error( difference_in_means(Y ~ Z + X, data = dat), "must have only one variable on the right-hand side" ) dat$bl <- rep(1:10, each = 10) dat$bad_cl <- rep(1:10, times = 10) expect_error( difference_in_means(Y ~ Z, blocks = bl, clusters = bad_cl, data = dat), "All `clusters` must be contained within `blocks`" ) dat$bad_bl <- c(1, rep(2:10, length.out = 99)) expect_error( difference_in_means(Y ~ Z, blocks = bad_bl, data = dat), "All `blocks` must have multiple units" ) dat$bad_mp <- rep(1:50, each = 2) dat$bad_mp[dat$bad_mp == 50] <- 49 dat$Z <- 0:1 expect_warning( difference_in_means(Y ~ Z, blocks = bad_mp, data = dat), "Some `blocks` have two units/`clusters` while other blocks have more units/`clusters`" ) expect_error( difference_in_means(Y ~ Z + X, data = dat), "must have only one variable on the right-hand side" ) # not matched pair but has some blocks with only 1 treated bl <- rep(1:2, each = 4) z <- c(1, 0, 0, 0, 1, 1, 0, 0) y <- rnorm(8) expect_error( difference_in_means(y ~ z, blocks = bl), "If design is not pair\\-matched\\, every block must" ) }) test_that("DIM Blocked", { dat <- data.frame( Y = rnorm(100), Z = rbinom(100, 1, .5), block = sample(c("A", "B", "C"), 100, replace = TRUE) ) difference_in_means(Y ~ Z, blocks = block, data = dat) dim_normal <- difference_in_means(Y ~ Z, condition1 = 0, condition2 = 1, blocks = block, data = dat) dim_reverse <- difference_in_means(Y ~ Z, condition1 = 1, condition2 = 0, blocks = block, data = dat) expect_equal( tidy(dim_normal)[c("estimate", "std.error")], tidy(dim_reverse)[c("estimate", "std.error")] * c(-1, 1) ) difference_in_means(Y ~ Z, alpha = .05, blocks = block, data = dat) difference_in_means(Y ~ Z, alpha = .10, blocks = block, data = dat) expect_equal( dim_normal$design, "Blocked" ) # Blocks and conditions works dat <- data.frame( y = 1:12, z = rep(0:2, each = 4), bl = rep(1:2, times = 6) ) dim_01 <- difference_in_means(y ~ z, blocks = bl, data = dat, condition1 = "0", condition2 = "1") dim_01_num <- difference_in_means(y ~ z, blocks = bl, data = dat, condition1 = 0, condition2 = 1) expect_equal( condchr(rmcall(dim_01)), condchr(rmcall(dim_01_num)) ) expect_equivalent(dim_01$coefficients, 4) expect_equivalent( tidy(dim_01), tidy( difference_in_means(y ~ z, blocks = bl, data = dat, subset = z < 2) ) ) dim_02 <- difference_in_means(y ~ z, blocks = bl, data = dat, condition1 = 0, condition2 = 2) expect_equivalent(dim_02$coefficients, 8) expect_equivalent( tidy(dim_02), tidy( difference_in_means(y ~ z, blocks = bl, data = dat, subset = z != 1) ) ) }) test_that("DIM same as t.test", { # test df correction dat <- data.frame(Y = rnorm(100), Z = rbinom(100, 1, .5), X = rnorm(100)) expect_equal( unlist( difference_in_means(Y ~ Z, data = dat)[c("p.value", "conf.low", "conf.high", "df")], F, F ), unlist( with(dat, t.test(Y[Z == 1], Y[Z == 0]))[c("p.value", "conf.int", "parameter")], F, F ) ) }) test_that("DIM Weighted", { n <- 100 dat <- data.frame(y = rnorm(n), z = 0:1, w = 1, bl = rep(1:10, each = 10)) dimw <- difference_in_means(y ~ z, weights = w, data = dat) difference_in_means(y ~ z, data = dat) dimbw <- difference_in_means(y ~ z, weights = w, blocks = bl, data = dat) difference_in_means(y ~ z, blocks = bl, data = dat) expect_equal( dimw$design, "Standard (weighted)" ) expect_equal( dimbw$design, "Blocked (weighted)" ) # Weights and conditions works dat <- data.frame( y = 1:12, z = rep(0:2, each = 4), w = rep(1:6, each = 2) ) dim_01 <- difference_in_means(y ~ z, weights = w, data = dat, condition1 = "0", condition2 = "1") expect_equivalent( tidy(dim_01), tidy( difference_in_means(y ~ z, weights = w, data = dat, subset = z < 2) ) ) dim_02 <- difference_in_means(y ~ z, weights = w, data = dat, condition1 = 0, condition2 = 2) expect_equivalent( tidy(dim_02), tidy( difference_in_means(y ~ z, weights = w, data = dat, subset = z != 1) ) ) }) test_that("DIM Clustered", { dat <- data.frame( weights = runif(100), weights2 = 1, J = rep(1:4, each = 25) ) dat$Y <- rnorm(100, mean = rep(rnorm(4, sd = sqrt(0.1)), each = 25), sd = sqrt(0.9)) dat$Z <- as.numeric(dat$J %in% 1:2) difference_in_means(Y ~ Z, alpha = .05, data = dat) dim_05 <- difference_in_means(Y ~ Z, alpha = .05, clusters = J, data = dat) dim_10 <- difference_in_means(Y ~ Z, alpha = .10, clusters = J, data = dat) expect_true(dim_05$conf.low < dim_10$conf.low) expect_equal( dim_10$design, "Clustered" ) # Clusters and conditions works dat <- data.frame( y = 1:12, z = rep(0:2, each = 4), cl = rep(1:6, each = 2) ) dim_01 <- difference_in_means(y ~ z, clusters = cl, data = dat, condition1 = "0", condition2 = "1") dim_01_num <- difference_in_means(y ~ z, clusters = cl, data = dat, condition1 = 0, condition2 = 1) expect_equal( condchr(rmcall(dim_01)), condchr(rmcall(dim_01_num)) ) expect_equivalent( tidy(dim_01), tidy( difference_in_means(y ~ z, clusters = cl, data = dat, subset = z < 2) ) ) expect_equivalent(dim_01$coefficients, 4) dim_02 <- difference_in_means(y ~ z, clusters = cl, data = dat, condition1 = 0, condition2 = 2) expect_equivalent(dim_02$coefficients, 8) expect_equivalent( tidy(dim_02), tidy( difference_in_means(y ~ z, clusters = cl, data = dat, subset = z != 1) ) ) }) test_that("DIM Pair Matched", { dat <- data.frame( Y = rnorm(100), Z = rbinom(100, 1, .5), weights = runif(100), weights2 = 1, block = rep(1:50, each = 2) ) expect_error( difference_in_means(Y ~ Z, alpha = .05, blocks = block, data = dat), "both treatment" ) dat$Z <- rep(0:1, 50) dim_mp <- difference_in_means(Y ~ Z, alpha = .05, blocks = block, data = dat) expect_equal( dim_mp$design, "Matched-pair" ) }) test_that("DIM Matched Pair Cluster Randomization", { dat <- data.frame( Y = rnorm(100), block = rep(1:25, each = 4), cluster = as.character(rep(1:50, each = 2)), Z = rep(0:1, times = 50) ) expect_error( difference_in_means( Y ~ Z, alpha = .05, blocks = block, clusters = cluster, data = dat ), "same treatment condition" ) dat$Z <- c(rep(rep(0:1, each = 4), 12), rep(0, 4)) expect_error( difference_in_means( Y ~ Z, alpha = .05, blocks = block, clusters = cluster, data = dat ), "both treatment conditions" ) dat$Z <- rep(rep(0:1, each = 2), 25) dim_mpc <- difference_in_means( Y ~ Z, alpha = .05, blocks = block, clusters = cluster, data = dat ) expect_equal( dim_mpc$design, "Matched-pair clustered" ) }) test_that("DIM Matched Pair Cluster Randomization = Matched Pair when cluster size is 1", { dat <- data.frame( Y = rnorm(100), block = rep(1:25, each = 4), cluster = 1:100, Z = rep(c(0, 0, 1, 1), times = 25) ) expect_equal( tidy(difference_in_means( Y ~ Z, alpha = .05, blocks = block, clusters = cluster, data = dat )), tidy(difference_in_means( Y ~ Z, alpha = .05, blocks = block, data = dat )) ) }) test_that("DIM works with missingness", { dat <- data.frame( Y = rnorm(100), block = rep(1:2, each = 50), cluster = 1:100, Z = rep(c(0, 0, 1, 1), times = 25) ) ## Missingness on treatment dat$Z[23] <- NA expect_error( estimatr_dim_out <- difference_in_means( Y ~ Z, alpha = .05, blocks = block, data = dat ), NA ) expect_equal( estimatr_dim_out, difference_in_means( Y ~ Z, alpha = .05, blocks = block, data = dat[-23, ] ) ) ## Missingness on block dat$block[35] <- NA expect_warning( estimatr_missblock_dim <- difference_in_means( Y ~ Z, alpha = .05, blocks = block, data = dat ), "missingness in the block" ) expect_equal( estimatr_missblock_dim, difference_in_means( Y ~ Z, alpha = .05, blocks = block, data = dat[-c(23, 35), ] ) ) ## Missingness on cluster dat$cluster[1] <- NA expect_warning( estimatr_missclust_dim <- difference_in_means( Y ~ Z, alpha = .05, clusters = cluster, data = dat ), "missingness in the cluster" ) expect_equal( estimatr_missclust_dim, difference_in_means( Y ~ Z, alpha = .05, clusters = cluster, data = dat[-c(1, 23), ] ) ) }) test_that("DIM works with character args", { dat <- data.frame( Y = rnorm(100), block = rep(1:25, each = 4), cluster = 1:100, Z = rep(c(0, 0, 1, 1), times = 25) ) dim_unquote <- difference_in_means( Y ~ Z, alpha = .05, blocks = block, clusters = cluster, data = dat ) dim_quote <- difference_in_means( Y ~ Z, alpha = .05, blocks = block, clusters = cluster, data = dat ) expect_equal( dim_unquote, dim_quote ) expect_equivalent( difference_in_means( Y ~ Z, alpha = .05, weights = cluster, data = dat ), difference_in_means( Y ~ Z, alpha = .05, weights = cluster, data = dat ) ) }) test_that("DIM unbiased", { dat <- data.frame( i = 1:10, Y0 = c( 2.1, 3.5, -131.2, -1.3, -4, 0.1, 8.1, -1.3, 1.1, 9.1 ), Y1 = c( 2.6, 3.0, -132, -0.7, -3.3, 0.5, 24.3, -1, 1.6, 0.3 ) ) # True SATE = 0.91 trueSATE <- mean(dat$Y1) - mean(dat$Y0) ## Complete Randomization # True se(SATE_hat) true_seSATE <- sqrt((var(dat$Y0) + var(dat$Y1) + 2 * cov(dat$Y0, dat$Y1)) / (10 - 1)) declaration <- randomizr::declare_ra(N = nrow(dat)) treatment_perms <- randomizr::obtain_permutation_matrix(declaration) ests <- apply( treatment_perms, 2, function(x) { dat$Z <- x dat$Y <- ifelse(dat$Z, dat$Y1, dat$Y0) dim <- difference_in_means(Y ~ Z, data = dat) coef(dim) } ) expect_equivalent( trueSATE, mean(ests) ) ## cluster randomized design, 5 blocks of 2 dat$cluster <- rep(1:5, each = 2) declaration <- randomizr::declare_ra( N = nrow(dat), clusters = dat$cluster ) treatment_perms <- randomizr::obtain_permutation_matrix(declaration) ests <- apply( treatment_perms, 2, function(x) { dat$Z <- x dat$Y <- ifelse(dat$Z, dat$Y1, dat$Y0) dim <- difference_in_means( Y ~ Z, clusters = cluster, data = dat ) coef(dim) } ) expect_equivalent( trueSATE, mean(ests) ) ## Matched pair design, 5 blocks of 2 dat$blocks <- rep(1:5, each = 2) declaration <- randomizr::declare_ra( N = nrow(dat), blocks = dat$blocks, block_m = rep(1, 5) ) treatment_perms <- randomizr::obtain_permutation_matrix(declaration) ests <- apply( treatment_perms, 2, function(x) { dat$Z <- x dat$Y <- ifelse(dat$Z, dat$Y1, dat$Y0) dim <- difference_in_means( Y ~ Z, blocks = blocks, data = dat ) coef(dim) } ) expect_equivalent( trueSATE, mean(ests) ) ## block randomized design, 2 blocks of 5 dat$blocks <- rep(1:2, each = 5) declaration <- randomizr::declare_ra( N = nrow(dat), blocks = dat$blocks, block_m = c(3, 3) ) treatment_perms <- randomizr::obtain_permutation_matrix(declaration) ests <- apply( treatment_perms, 2, function(x) { dat$Z <- x dat$Y <- ifelse(dat$Z, dat$Y1, dat$Y0) dim <- difference_in_means( Y ~ Z, blocks = blocks, data = dat ) coef(dim) } ) expect_equivalent( trueSATE, mean(ests) ) ## cluster matched pair, different sized blocks dat$blocks <- rep(1:3, times = c(4, 4, 2)) dat$clusters <- c(1, 1, 2, 2, 3, 3, 4, 4, 5, 6) declaration <- randomizr::declare_ra( N = nrow(dat), blocks = dat$blocks, clusters = dat$clusters ) treatment_perms <- randomizr::obtain_permutation_matrix(declaration) ests <- apply( treatment_perms, 2, function(x) { dat$Z <- x dat$Y <- ifelse(dat$Z, dat$Y1, dat$Y0) dim <- difference_in_means( Y ~ Z, blocks = blocks, clusters = clusters, data = dat ) coef(dim) } ) expect_equivalent( trueSATE, mean(ests) ) }) test_that("DIM matches lm_robust under certain conditions", { n <- 400 dat <- data.frame(Y = rnorm(n)) ## DIM and lm_robust agree without clustering except for DoF because DIM uses Satterthwaite approx dat$z <- c(0, 1) lm_o <- lm_robust(Y ~ z, data = dat) dim_o <- difference_in_means(Y ~ z, data = dat) expect_equivalent( tidy(lm_o)[2, 1:3], tidy(dim_o)[, 1:3] ) ## DIM and lm_robust agree with clustering becuase DIM just uses lm_robust w/ CR2! dat$cl_diff_size <- sample(100, size = 400, replace = TRUE) dat$z_clustered <- as.numeric(dat$cl_diff_size <= 50) lm_cl_o <- lm_robust(Y ~ z_clustered, clusters = cl_diff_size, data = dat) dim_cl_o <- difference_in_means(Y ~ z_clustered, clusters = cl_diff_size, data = dat) expect_equivalent( tidy(lm_cl_o)[2, ], tidy(dim_cl_o) ) ## Blocked design is equivalent to lm_lin dat$bl <- rep(1:25, each = 16) dat$z_blocked <- rep(c(0, 1), each = 2) lm_bl_o <- lm_lin(Y ~ z_blocked, ~ factor(bl), data = dat) dim_bl_o <- difference_in_means(Y ~ z_blocked, blocks = bl, data = dat) # Not identical since row name of lm_bl_o is 2 due to the intercept expect_equivalent( tidy(lm_bl_o)[2, ], tidy(dim_bl_o) ) ## Block-clustered is equivalent to lm_lin ## (and indeed uses lm_robust machinery for the ests and ses, the DoF are equivalent with equal clusters ## by design dat$cl <- rep(1:200, each = 2) lm_blcl_o <- lm_lin(Y ~ z_blocked, ~ factor(bl), data = dat, clusters = cl) dim_blcl_o <- difference_in_means(Y ~ z_blocked, blocks = bl, clusters = cl, data = dat) # Not identical since row name of lm_bl_o is 2 due to the intercept expect_equivalent( tidy(lm_blcl_o)[2, ], tidy(dim_blcl_o) ) # With weights now, identical to lm_robust, HC2 by force! except for matched pairs which fails dat$w <- runif(nrow(dat)) # simple W expect_equivalent( tidy(lm_robust(Y ~ z, data = dat, weights = w))[2, ], tidy(difference_in_means(Y ~ z, data = dat, weights = w)) ) # blocked W expect_equivalent( tidy(lm_lin(Y ~ z_blocked, ~ factor(bl), weights = w, data = dat))[2, ], tidy(difference_in_means(Y ~ z_blocked, data = dat, blocks = bl, weights = w)) ) # blocked-clustered W (goes to CR2) # DF different in clustered case dim_bl_cl_w <- difference_in_means( Y ~ z_blocked, data = dat, clusters = cl, blocks = bl, weights = w ) expect_equivalent( tidy(lm_lin(Y ~ z_blocked, ~ factor(bl), clusters = cl, weights = w, data = dat))[2, 1:3], tidy(dim_bl_cl_w)[, 1:3] ) expect_equal( dim_bl_cl_w$design, "Block-clustered (weighted)" ) # Clustered W dim_cl_w <- difference_in_means( Y ~ z_clustered, data = dat, clusters = cl_diff_size, weights = w ) expect_equivalent( tidy(lm_robust(Y ~ z_clustered, clusters = cl_diff_size, weights = w, data = dat))[2, 1:3], tidy(dim_cl_w)[, 1:3] ) expect_equal( dim_cl_w$design, "Clustered (weighted)" ) # errors with matched pairs dat$mps <- rep(1:(n / 2), each = 2) dat$z_mps <- rep(0:1, times = (n / 2)) expect_error( difference_in_means(Y ~ z_mps, data = dat, weights = w, blocks = mps), "Cannot use `weights` with matched pairs design at the moment" ) }) test_that("se_type = none works", { # simple dim <- difference_in_means(mpg ~ am, data = mtcars) dim_no <- difference_in_means(mpg ~ am, data = mtcars, se_type = "none") expect_equal( dim$coefficients, dim_no$coefficients ) expect_equivalent( tidy(dim_no)[c("std.error", "p.value", "conf.low", "conf.high")], rep(NA_real_, 4) ) # block dimb <- difference_in_means(mpg ~ am, blocks = cyl, data = mtcars) dimb_no <- difference_in_means(mpg ~ am, blocks = cyl, data = mtcars, se_type = "none") expect_equal( dimb$coefficients, dimb_no$coefficients ) expect_equivalent( tidy(dimb_no)[c("std.error", "p.value", "conf.low", "conf.high")], rep(NA_real_, 4) ) # cluster mtcars$z <- as.numeric(mtcars$cyl < 5) dimc <- difference_in_means(mpg ~ z, clusters = cyl, data = mtcars) dimc_no <- difference_in_means(mpg ~ z, clusters = cyl, data = mtcars, se_type = "none") expect_equal( dimc$coefficients, dimc_no$coefficients ) expect_equivalent( tidy(dimc_no)[c("std.error", "p.value", "conf.low", "conf.high")], rep(NA_real_, 4) ) # weight dimw <- difference_in_means(mpg ~ am, weights = wt, data = mtcars) dimw_no <- difference_in_means(mpg ~ am, weights = wt, data = mtcars, se_type = "none") expect_equal( dimw$coefficients, dimw_no$coefficients ) expect_equivalent( tidy(dimw_no)[c("std.error", "p.value", "conf.low", "conf.high")], rep(NA_real_, 4) ) # cluster, weight dimcw <- difference_in_means(mpg ~ z, weights = wt, clusters = cyl, data = mtcars) dimcw_no <- difference_in_means(mpg ~ z, weights = wt, clusters = cyl, data = mtcars, se_type = "none") expect_equal( dimcw$coefficients, dimcw_no$coefficients ) expect_equivalent( tidy(dimcw_no)[c("std.error", "p.value", "conf.low", "conf.high")], rep(NA_real_, 4) ) # matched-pair mtcars$mp <- rep(1:(nrow(mtcars) / 2), each = 2) mtcars$z <- rep(c(0, 1), times = nrow(mtcars) / 2) dimmp <- difference_in_means(mpg ~ z, blocks = mp, data = mtcars) dimmp_no <- difference_in_means(mpg ~ z, blocks = mp, data = mtcars, se_type = "none") expect_equal( dimmp$coefficients, dimmp_no$coefficients ) expect_equivalent( tidy(dimmp_no)[c("std.error", "p.value", "conf.low", "conf.high")], rep(NA_real_, 4) ) })