context("Verification - lm and iv match Stata") test_that("lm_robust matches stata", { # write.csv(mtcars, # file = 'tests/testthat/mtcars.csv', # row.names = F) stata_ests <- read.table( "stata-ests.txt", col.names = c("model", "se1", "se2", "df", "fstat"), stringsAsFactors = FALSE ) mtcars$w <- mtcars$drat / 5 estimatr_mat <- matrix(NA, 10, 4) lm_c <- lm_robust(mpg ~ hp, data = mtcars, se_type = "classical") estimatr_mat[1, ] <- c(lm_c$std.error ^ 2, lm_c$df[2], lm_c$fstatistic[1]) lm_hc1 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC1") estimatr_mat[2, ] <- c(lm_hc1$std.error ^ 2, lm_hc1$df[2], lm_hc1$fstatistic[1]) lm_hc2 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC2") estimatr_mat[3, ] <- c(lm_hc2$std.error ^ 2, lm_hc2$df[2], lm_hc2$fstatistic[1]) lm_hc3 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC3") estimatr_mat[4, ] <- c(lm_hc3$std.error ^ 2, lm_hc3$df[2], lm_hc3$fstatistic[1]) lm_stata <- lm_robust(mpg ~ hp, clusters = cyl, data = mtcars, se_type = "stata") estimatr_mat[5, ] <- c(lm_stata$std.error ^ 2, lm_stata$df[2], lm_stata$fstatistic[1]) lm_c_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "classical") estimatr_mat[6, ] <- c(lm_c_w$std.error ^ 2, lm_c_w$df[2], lm_c_w$fstatistic[1]) lm_hc1_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC1") estimatr_mat[7, ] <- c(lm_hc1_w$std.error ^ 2, lm_hc1_w$df[2], lm_hc1_w$fstatistic[1]) lm_hc2_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC2") estimatr_mat[8, ] <- c(lm_hc2_w$std.error ^ 2, lm_hc2_w$df[2], lm_hc2_w$fstatistic[1]) lm_hc3_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC3") estimatr_mat[9, ] <- c(lm_hc3_w$std.error ^ 2, lm_hc3_w$df[2], lm_hc3_w$fstatistic[1]) lm_stata_w <- lm_robust(mpg ~ hp, clusters = cyl, weights = w, data = mtcars, se_type = "stata") estimatr_mat[10, ] <- c(lm_stata_w$std.error ^ 2, lm_stata_w$df[2], lm_stata_w$fstatistic[1]) # All look numerically identical except for HC2 and HC3 with weights which # have non-negligible difference. This is due to differences in how the hat # matrix is built that are still unresolved # Therefore rows 8 and 9 will have larger differences expect_true( max(abs(estimatr_mat[c(1:7, 10), 1:4] - apply(stata_ests[c(1:7, 10), c(3, 2, 4, 5)], 2, as.numeric))) < 1e-5 ) }) test_that("iv_robust matches stata", { skip_if_not_installed("AER") # write.csv(mtcars, # file = 'tests/testthat/mtcars.csv', # row.names = F) stata_ests <- read.table( "stata-iv-ests.txt", col.names = c("model", "v1", "v2", "v3", "fstat", "r2", "r2_a", "rmse"), stringsAsFactors = FALSE ) mtcars$w <- mtcars$drat / 5 estimatr_mat <- matrix(NA, 6, 7) iv_c <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, se_type = "classical") estimatr_mat[1, ] <- c(iv_c$std.error ^ 2, iv_c$fstatistic[1], iv_c$r.squared, iv_c$adj.r.squared, sqrt(iv_c$res_var)) iv_hc1 <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, se_type = "HC1") estimatr_mat[2, ] <- c(iv_hc1$std.error ^ 2, iv_hc1$fstatistic[1], iv_hc1$r.squared, iv_hc1$adj.r.squared, sqrt(iv_hc1$res_var)) iv_stata <- iv_robust(mpg ~ hp + am | wt + gear, clusters = cyl, data = mtcars, se_type = "stata") estimatr_mat[3, ] <- c(iv_stata$std.error ^ 2, iv_stata$fstatistic[1], iv_stata$r.squared, iv_stata$adj.r.squared, sqrt(iv_stata$res_var)) iv_c_w <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, weights = w, se_type = "classical") estimatr_mat[4, ] <- c(iv_c_w$std.error ^ 2, iv_c_w$fstatistic[1], iv_c_w$r.squared, iv_c_w$adj.r.squared, sqrt(iv_c_w$res_var)) iv_hc1_w <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, weights = w, se_type = "HC1") estimatr_mat[5, ] <- c(iv_hc1_w$std.error ^ 2, iv_hc1_w$fstatistic[1], iv_hc1_w$r.squared, iv_hc1_w$adj.r.squared, sqrt(iv_hc1_w$res_var)) iv_stata_w <- iv_robust(mpg ~ hp + am | wt + gear, clusters = cyl, weights = w, data = mtcars, se_type = "stata") estimatr_mat[6, ] <- c(iv_stata_w$std.error ^ 2, iv_stata_w$fstatistic[1], iv_stata_w$r.squared, iv_stata_w$adj.r.squared, sqrt(iv_stata_w$res_var)) expect_true( max(abs(estimatr_mat[, 1] - as.numeric(stata_ests[, 4]))) < 2e-05 ) expect_true( max(abs(estimatr_mat[, 4] - as.numeric(stata_ests[, 5]))) < 3e-05 ) # Note, RMSE is different for stata with weights than ivreg or iv_robust expect_true( max(abs(estimatr_mat[, 5:6] - stata_ests[, 6:7])) < 4e-08 ) ivrego_w <- AER::ivreg(mpg ~ hp + am | wt + gear, data = mtcars, weights = w) expect_equal( ivrego_w$sigma, sqrt(iv_c_w$res_var) ) expect_equal( ivrego_w$sigma, sqrt(iv_hc1_w$res_var) ) expect_equal( ivrego_w$sigma, sqrt(iv_stata_w$res_var) ) })