context("Estimator - lm_lin") test_that("Test LM Lin", { dat <- data.frame( Y = rnorm(100), Z = rbinom(100, 1, .5), X1 = rnorm(100), X2 = rbinom(100, 1, .5), cluster = sample(1:10, size = 100, replace = T) ) lm_lin_out <- lm_lin(Y ~ Z, covariates = ~ X1 + X2, data = dat) expect_error( lm_lin_out <- lm_lin(Y ~ Z, data = dat, covariates = ~ X1 + X2), NA ) expect_error( lm_lin( Y ~ Z + X1, covariates = ~ X2, data = dat ), "must only have the treatment variable on the right-hand side of the formula" ) dat2 <- dat dat2$Z <- rnorm(100) # For now we allow huge multi-valued treatments, but output is wonky # expect_error( # lm_lin(Y ~ Z, # covariates = ~ X1, # data = dat2), # 'binary' # ) expect_error( lm_lin( Y ~ Z, covariates = Y ~ X1, data = dat ), "right-sided formula" ) # Now allows multi-valued treatments expect_error( lm_lin( Y ~ factor(cluster), ~ X1, data = dat ), NA ) expect_error( lm_lin(Y ~ treat, dat$X1, data = dat), "must be specified as a formula" ) expect_error( lm_lin(Y ~ treat, ~ 1, data = dat), "variable on the right-hand side" ) # works with one covar expect_error( lm_lin( Y ~ Z, covariates = ~ X1, data = dat ), NA ) dat$X1_c <- dat$X1 - mean(dat$X1) dat$X2_c <- dat$X2 - mean(dat$X2) lm_rob_out <- lm_robust(Y ~ Z + Z * X1_c + Z * X2_c, data = dat) expect_equal( tidy(lm_lin_out), tidy(lm_rob_out) ) expect_equivalent( coef(lm_lin_out), coef(lm(Y ~ Z + Z * X1_c + Z * X2_c, data = dat)) ) ## Works with multi-valued treatments dat$Z_mult <- rep_len(1:3, length.out = 100) test_mult <- function(treatment_type, dat) { dat$Z_mult <- switch(treatment_type, int = rep_len(1:3, length.out = 100), num = rep_len(c(1.5, 2.5, 5), length.out = 100), char = rep_len(letters[1:4], length.out = 100), bin = rbinom(100, 1, 0.5) ) mult_out <- lm_lin( Y ~ Z_mult, covariates = ~ X1 + X2, data = dat ) fact_mult_out <- lm_lin( Y ~ factor(Z_mult), covariates = ~ X1 + X2, data = dat ) noint_mult_out <- lm_lin( Y ~ Z_mult + 0, covariates = ~ X1 + X2, data = dat ) noint_fact_mult_out <- lm_lin( Y ~ factor(Z_mult) + 0, covariates = ~ X1 + X2, data = dat ) expect_equal( tidy(mult_out)[, -1], tidy(fact_mult_out)[, -1] ) rob_fact_mult_out <- lm_robust(Y ~ factor(Z_mult) * X1_c + factor(Z_mult) * X2_c, data = dat) expect_equal( tidy(fact_mult_out), tidy(rob_fact_mult_out) ) # Also works with no intercept! expect_equal( tidy(noint_mult_out)[, -1], tidy(noint_fact_mult_out)[, -1] ) } test_mult("int", dat) test_mult("num", dat) test_mult("char", dat) # test_mult("bin", dat) this gives weird answers because it isn't really valid when not a factor! ## Works with missing data in treatment dat$Z[23] <- NA dat$X1_c <- dat$X1 - mean(dat$X1[-23]) dat$X2_c <- dat$X2 - mean(dat$X2[-23]) expect_equal( tidy(lm_lin( Y ~ Z, covariates = ~ X1 + X2, data = dat )), tidy(lm_robust( Y ~ Z + Z * X1_c + Z * X2_c, data = dat )) ) ## Test cluster passes through expect_equal( tidy(lm_lin( Y ~ Z, covariates = ~ X1 + X2, data = dat, clusters = cluster )), tidy(lm_robust( Y ~ Z + Z * X1_c + Z * X2_c, data = dat, clusters = cluster )) ) ## Test that it works with subset keep <- setdiff(which(dat$Y > 0), 23) dat$X1_c <- dat$X1 - mean(dat$X1[keep]) dat$X2_c <- dat$X2 - mean(dat$X2[keep]) expect_equal( tidy(lm_lin( Y ~ Z, covariates = ~ X1 + X2, data = dat, clusters = cluster, subset = Y > 0 )), tidy(lm_robust( Y ~ Z + Z * X1_c + Z * X2_c, data = dat, clusters = cluster, subset = Y > 0 )) ) # Works with factors dat <- data.frame( Y = rnorm(100), treat = as.factor(rbinom(100, 1, .5)), X1 = rnorm(100), X2 = as.factor(rbinom(100, 1, .5)), cluster = sample(1:10, size = 100, replace = T) ) dat$X1_c <- dat$X1 - mean(dat$X1) dat$X21_c <- as.numeric(dat$X2 == 1) - mean(dat$X2 == 1) expect_equivalent( tidy(lm_lin( Y ~ treat, covariates = ~ X1 + X2, data = dat, clusters = cluster )), tidy(lm_robust( Y ~ treat + treat * X1_c + treat * X21_c, data = dat, clusters = cluster )) ) ## Works with a factor with spaces in the name (often the case for clusters) dat$X2 <- as.factor(sample( c("This is a level", "Get on my level"), size = 100, replace = T )) ## for lm_robust dat$X2_c <- as.numeric(dat$X2 == "This is a level") - mean(dat$X2 == "This is a level") ## Names will differ expect_equivalent( tidy( lm_lin( Y ~ treat, covariates = ~ X1 + X2, data = dat, clusters = cluster ) )[, -1], tidy( lm_robust( Y ~ treat + treat * X1_c + treat * X2_c, data = dat, clusters = cluster ) )[, -1] ) ## Works with missingness on cluster dat$cluster[40] <- NA dat$X1_c <- dat$X1 - mean(dat$X1[-40]) dat$X2_c <- as.numeric(dat$X2 == "This is a level") - mean(dat$X2[-40] == "This is a level") expect_warning( lin_out <- lm_lin( Y ~ treat, covariates = ~ X1 + X2, data = dat, clusters = cluster ), "missingness in the cluster" ) expect_warning( rob_out <- lm_robust( Y ~ treat + treat * X1_c + treat * X2_c, data = dat, clusters = cluster ), "missingness in the cluster" ) # drop coefficient name because names will differ expect_equivalent( tidy(lin_out)[, -1], tidy(rob_out)[, -1] ) # rank deficient cases dat$treat2 <- dat$treat dat$X1_2 <- dat$X1 lm_lin(Y ~ treat, ~ treat2 + X1, data = dat) # somewhat odd behavior expect_equivalent( is.na(coef(lm_lin(Y ~ treat, ~ X1_2 + X1, data = dat))), c(FALSE, FALSE, FALSE, TRUE, FALSE, TRUE) ) # Does lm_lin parse covariate names correctly? # Previously it dropped the factor(), now properly builds factor lmlo <- lm_lin(Y ~ treat, ~factor(cluster) + X1, data = dat) expect_equal( nrow(tidy(lmlo)), 22 ) # works with a binary with no intercept (bit odd, though!) dat$z <- rbinom(nrow(dat), 1, 0.5) lmlo <- lm_lin(Y ~ z + 0, ~X1, data = dat) expect_equal( lmlo$term, c("z", "X1_c", "z:X1_c") ) # behaves correctly with "odd" covariates (see issue #283) lmlo <- lm_lin(Y ~ z, ~ is.na(X1), data = dat) expect_equal( lmlo$term, c("(Intercept)", "z", "(is.na(X1)TRUE)_c", "z:(is.na(X1)TRUE)_c") ) }) test_that("lm_lin same as sampling perspective", { # Unweighted matches sampling view lmo <- lm_lin(mpg ~ am, ~ hp, data = mtcars) m_hp <- mean(mtcars$hp) areg <- lm(mpg ~ hp, data = mtcars, subset = am == 1) breg <- lm(mpg ~ hp, data = mtcars, subset = am == 0) ate <- with(mtcars[mtcars$am == 1, ], mean(mpg) + (m_hp - mean(hp)) * coef(areg)[2]) - with(mtcars[mtcars$am == 0, ], mean(mpg) + (m_hp - mean(hp)) * coef(breg)[2]) expect_equivalent( ate, coef(lmo)["am"] ) }) test_that("weighted lm_lin same as with one covar sampling view", { # Weighted matches (one covar) lmwo <- lm_lin(mpg ~ am, ~ hp, weights = wt, data = mtcars) hp_wmean <- weighted.mean(mtcars$hp, mtcars$wt) wareg <- lm(mpg ~ hp, data = mtcars, subset = am == 1, weights = wt) wbreg <- lm(mpg ~ hp, data = mtcars, subset = am == 0, weights = wt) wate <- with(mtcars[mtcars$am == 1, ], weighted.mean(mpg, wt) + (hp_wmean - weighted.mean(hp, wt)) * coef(wareg)[2]) - with(mtcars[mtcars$am == 0, ], weighted.mean(mpg, wt) + (hp_wmean - weighted.mean(hp, wt)) * coef(wbreg)[2]) expect_equivalent( wate, coef(lmwo)["am"] ) }) test_that("weighted lm_lin same as with two covar sampling view", { # Weighted matches (two covars) lmw2o <- lm_lin(mpg ~ am, ~ hp + cyl, weights = wt, data = mtcars) hpcyl_wmean <- apply(mtcars[, c("hp", "cyl")], 2, weighted.mean, mtcars$wt) w2areg <- lm(mpg ~ hp + cyl, data = mtcars, subset = am == 1, weights = wt) w2breg <- lm(mpg ~ hp + cyl, data = mtcars, subset = am == 0, weights = wt) w2ate <- with(mtcars[mtcars$am == 1, ], weighted.mean(mpg, wt) + (hpcyl_wmean - apply(cbind(hp, cyl), 2, weighted.mean, wt)) %*% coef(w2areg)[2:3]) - with(mtcars[mtcars$am == 0, ], weighted.mean(mpg, wt) + (hpcyl_wmean - apply(cbind(hp, cyl), 2, weighted.mean, wt)) %*% coef(w2breg)[2:3]) expect_equivalent( w2ate, coef(lmw2o)["am"] ) }) test_that("lm_lin properly renames trickily named variables", { # lm_lin should add parentheses around variables that have colons in their name # or that have parentheses in the name that are not in the first position lo <- lm_lin(mpg ~ am, ~ wt*cyl + log(wt), mtcars) expect_equal( lo$term, c("(Intercept)", "am", "wt_c", "cyl_c", "(log(wt))_c", "(wt:cyl)_c", "am:wt_c", "am:cyl_c", "am:(log(wt))_c", "am:(wt:cyl)_c") ) }) test_that("lm_lin works with multiple outcomes", { lmpg <- lm_lin(mpg ~ am, ~ cyl, mtcars) lwt <- lm_lin(wt ~ am, ~ cyl, mtcars) lboth <- lm_lin(cbind(mpg, wt) ~ am, ~ cyl, mtcars) expect_equivalent( tidy(lmpg), tidy(lboth)[1:4, ] ) expect_equivalent( tidy(lwt), tidy(lboth)[5:8, ] ) expect_equivalent( lboth$fstatistic[1:2], c(lmpg$fstatistic[1], lwt$fstatistic[1]) ) })