context("lm objects")
set.seed(20190513)

m <- 8
cluster <- factor(rep(LETTERS[1:m], 3 + rpois(m, 5)))
n <- length(cluster)
const <- rep("whuzzup.", n)
X <- matrix(rnorm(3 * n), n, 3)
nu <- rnorm(m)[cluster]
e <- rnorm(n)
w <- rgamma(n, shape = 3, scale = 3)
y <- X %*% c(.4, .3, -.3) + nu + e

dat <- data.frame(y, X, cluster, const, w, row = 1:n)

lm_fit <- lm(y ~ X1 + X2 + X3, data = dat)
WLS_fit <- lm(y ~ X1 + X2 + X3, data = dat, weights = w)
CR_types <- paste0("CR",0:4)

# obj <- WLS_fit
# y <- dat$y
# type <- "CR2"
# vcov <- vcovCR(obj, cluster = cluster, type = type)
# target = NULL
# inverse_var = FALSE

test_that("bread works", {
  
  expect_true(check_bread(lm_fit, cluster = dat$cluster, y = dat$y))
  lm_vcov <- bread(lm_fit) * summary(lm_fit)$sigma^2 / v_scale(lm_fit)
  expect_equal(vcov(lm_fit), lm_vcov)
  
  expect_true(check_bread(WLS_fit, cluster = dat$cluster, y = dat$y))
  wls_vcov <- bread(WLS_fit) * summary(WLS_fit)$sigma^2 / v_scale(WLS_fit)
  expect_equal(vcov(WLS_fit), wls_vcov)
})

test_that("vcovCR options don't matter for CR0", {
  expect_error(vcovCR(lm_fit, type = "CR0"))
  CR0 <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0")
  expect_output(print(CR0))
  attr(CR0, "target") <- NULL
  attr(CR0, "inverse_var") <- NULL
  CR0_A <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w)
  attr(CR0_A, "target") <- NULL
  attr(CR0_A, "inverse_var") <- NULL
  expect_identical(CR0_A, CR0)
  CR0_B <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = FALSE)
  attr(CR0_B, "target") <- NULL
  attr(CR0_B, "inverse_var") <- NULL
  expect_identical(CR0_A, CR0)
  CR0_C <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = TRUE)
  attr(CR0_C, "target") <- NULL
  attr(CR0_C, "inverse_var") <- NULL
  expect_identical(CR0_C, CR0)
  
  wCR0 <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0")
  attr(wCR0, "target") <- NULL
  attr(wCR0, "inverse_var") <- NULL
  wCR0_A <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w)
  attr(wCR0_A, "target") <- NULL
  attr(wCR0_A, "inverse_var") <- NULL
  expect_identical(wCR0_A, wCR0)
  wCR0_B <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = FALSE)
  attr(wCR0_B, "target") <- NULL
  attr(wCR0_B, "inverse_var") <- NULL
  expect_identical(wCR0_B, wCR0)
  wCR0_C <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = TRUE)
  attr(wCR0_C, "target") <- NULL
  attr(wCR0_C, "inverse_var") <- NULL
  expect_identical(wCR0_C, wCR0)
})

test_that("vcovCR options work for CR2", {
  CR2_iv <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR2")
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", inverse_var = TRUE), CR2_iv)
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n), inverse_var = TRUE), CR2_iv)
  
  attr(CR2_iv, "inverse_var") <- FALSE
  CR2_not <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", inverse_var = FALSE)
  expect_equal(CR2_not, CR2_iv)
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n)), CR2_not)
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n), inverse_var = FALSE), CR2_not)
  expect_false(identical(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = 1 / dat$w), CR2_not))
  
  wCR2_id <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2")
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", inverse_var = FALSE), wCR2_id)
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n)), wCR2_id)
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n), inverse_var = FALSE), wCR2_id)
  
  wCR2_iv <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", inverse_var = TRUE)
  wCR2_target <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = 1 / dat$w, inverse_var = TRUE)
  expect_false(identical(wCR2_target, wCR2_id))
  expect_equal(matrix(wCR2_target, dim(wCR2_target)), matrix(wCR2_iv, dim(wCR2_iv)))
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = 1 / dat$w, inverse_var = TRUE), wCR2_target)
})

test_that("vcovCR options work for CR4", {
  CR4_iv <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR4")
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", inverse_var = TRUE), CR4_iv)
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n), inverse_var = TRUE), CR4_iv)
  
  attr(CR4_iv, "inverse_var") <- FALSE
  CR4_not <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", inverse_var = FALSE)
  expect_equal(CR4_not, CR4_iv)
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n)), CR4_not)
  expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n), inverse_var = FALSE), CR4_not)
  expect_false(identical(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = 1 / dat$w), CR4_not))
  
  wCR4_id <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4")
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", inverse_var = FALSE), wCR4_id)
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n)), wCR4_id)
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n), inverse_var = FALSE), wCR4_id)
  
  wCR4_iv <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", inverse_var = TRUE)
  wCR4_target <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = 1 / dat$w, inverse_var = TRUE)
  expect_false(identical(wCR4_target, wCR4_id))
  expect_equal(matrix(wCR4_target, dim(wCR4_target)), matrix(wCR4_iv, dim(wCR4_iv)))
  expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = 1 / dat$w, inverse_var = TRUE), wCR4_target)
  
})


test_that("CR2 and CR4 are target-unbiased", {
  expect_true(check_CR(lm_fit, vcov = "CR2", cluster = dat$cluster))
  expect_true(check_CR(WLS_fit, vcov = "CR2", cluster = dat$cluster))
  expect_true(check_CR(lm_fit, vcov = "CR4", cluster = dat$cluster))
  expect_true(check_CR(WLS_fit, vcov = "CR4", cluster = dat$cluster))
})

test_that("vcovCR is equivalent to vcovHC when clusters are all of size 1", {
  library(sandwich, quietly=TRUE)
  CR0 <- vcovCR(lm_fit, cluster = dat$row, type = "CR0")
  expect_equal(vcovHC(lm_fit, type = "HC0"), as.matrix(CR0))
  CR1 <- vcovCR(lm_fit, cluster = dat$row, type = "CR1S")
  expect_equal(vcovHC(lm_fit, type = "HC1"), as.matrix(CR1))
  CR2 <- vcovCR(lm_fit, cluster = dat$row, type = "CR2")
  expect_equal(vcovHC(lm_fit, type = "HC2"), as.matrix(CR2))
  CR3 <- vcovCR(lm_fit, cluster = dat$row, type = "CR3")
  expect_equal(vcovHC(lm_fit, type = "HC3"), as.matrix(CR3))
})

test_that("CR2 is equivalent to Welch t-test for DiD design", {
  m0 <- 4
  m1 <- 9
  m <- m0 + m1
  cluster <- factor(rep(LETTERS[1:m], each = 2))
  n <- length(cluster)
  time <- rep(c(1,2), m)
  trt_clusters <- c(rep(0,m0), rep(1,m1))
  trt <- (time - 1) * rep(trt_clusters, each = 2)
  nu <- rnorm(m)[cluster]
  e <- rnorm(n)
  y <- 0.4 * trt + nu + e
  
  dat <- data.frame(y, time, trt, cluster)
  lm_DID <- lm(y ~ cluster + factor(time) + trt, data = dat)
  t_Satt <- coef_test(lm_DID, vcov = "CR2", cluster = dat$cluster)["trt",]
  
  y_diff <- apply(matrix(y, nrow = 2), 2, diff)
  t_Welch <- t.test(y_diff ~ trt_clusters)
  
  expect_equal(with(t_Welch, estimate[[2]] - estimate[[1]]), t_Satt$beta)
  expect_equal(as.numeric(-t_Welch$statistic), with(t_Satt, beta / SE))
  expect_is(all.equal(as.numeric(t_Welch$parameter), t_Satt$df), "character")
  
  df <- m^2 * (m0 - 1) * (m1 - 1) / (m0^2 * (m0 - 1) + m1^2 * (m1 - 1))
  expect_equal(t_Satt$df, df)
})

test_that("Order doesn't matter.",{
  
  check_sort_order(WLS_fit, dat, "cluster")
  
})

test_that("clubSandwich works with dropped observations", {
  dat_miss <- dat
  miss_indicator <- sample.int(n, size = round(n / 10))
  dat_miss$X1[miss_indicator] <- NA
  dat_miss$cluster[miss_indicator] <- NA
  
  lm_dropped <- lm(y ~ X1 + X2 + X3, data = dat_miss)
  dat_complete <- subset(dat_miss, !is.na(X1))
  lm_complete <- lm(y ~ X1 + X2 + X3, data = dat_complete)
  
  CR_drop <- lapply(CR_types, function(x) vcovCR(lm_dropped, cluster = dat_miss$cluster, type = x))
  CR_complete <- lapply(CR_types, function(x) vcovCR(lm_complete, cluster = dat_complete$cluster, type = x))
  expect_equal(CR_drop, CR_complete)
  
  test_drop <- lapply(CR_types, function(x) coef_test(lm_dropped, vcov = x, cluster = dat_miss$cluster, test = "All", p_values = FALSE))
  test_complete <- lapply(CR_types, function(x) coef_test(lm_complete, vcov = x, cluster = dat_complete$cluster, test = "All", p_values = FALSE))
  expect_equal(test_drop, test_complete)
})


test_that("clubSandwich requires no missing values on the clustering variable", {
  dat_miss <- dat
  miss_indicator <- sample.int(n, size = round(n / 10))
  dat_miss$cluster[miss_indicator] <- NA
  
  lm_dropped <- lm(y ~ X1 + X2 + X3, data = dat_miss)
  
  expect_error(vcovCR(lm_dropped, cluster = dat_miss$cluster, type = "CR0"), 
               "Clustering variable cannot have missing values.")
  expect_error(coef_test(lm_dropped, vcov = "CR0", cluster = dat_miss$cluster, test = "All"),
               "Clustering variable cannot have missing values.")
})


test_that("clubSandwich works with aliased predictors", {
  
  data(npk, package = "datasets")
  npk_alias <- lm(yield ~ block + N*P*K, npk)
  npk_drop <- lm(yield ~ block + N + P + K + N:P + N:K + P:K, npk)
  
  CR_alias <- lapply(CR_types[-4], function(x) vcovCR(npk_alias, cluster = npk$block, type = x))
  CR_drop <- lapply(CR_types[-4], function(x) vcovCR(npk_drop, cluster = npk$block, type = x))
  expect_equal(CR_alias, CR_drop)
  
  test_drop <- lapply(CR_types[-4], function(x) coef_test(npk_alias, vcov = x, cluster = npk$block, test = c("z","naive-t","Satterthwaite"), coefs = 7:12, p_values = FALSE))
  test_complete <- lapply(CR_types[-4], function(x) coef_test(npk_drop, vcov = x, cluster = npk$block, test = c("z","naive-t","Satterthwaite"), coefs = 7:12, p_values = FALSE))
  expect_equal(test_drop, test_complete)
})


test_that("weight scale doesn't matter", {
  
  lm_fit_w <- lm(y ~ X1 + X2 + X3, data = dat, weights = rep(4, nrow(dat)))
  
  unweighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit, cluster = cluster, type = x))
  weighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit_w, cluster = cluster, type = x))
  
  expect_equal(lapply(unweighted_fit, as.matrix), 
               lapply(weighted_fit, as.matrix), 
               tol = 5 * 10^-7)  
  
  target <- 1 + rpois(nrow(dat), lambda = 8)
  unweighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit, cluster = cluster, type = x, target = target))
  weighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit_w, cluster = cluster, type = x, target = target * 15))

  expect_equal(lapply(unweighted_fit, as.matrix), 
               lapply(weighted_fit, as.matrix), 
               tol = 5 * 10^-7)  
  
})

test_that("clubSandwich works with weights of zero.", {
  
  data("LifeCycleSavings", package = "datasets")
  
  n_life <- nrow(LifeCycleSavings)
  LifeCycleSavings$cl <- substr(rownames(LifeCycleSavings), 1, 1)
  table(LifeCycleSavings$cl)
  LifeCycleSavings$wt <- rpois(n_life, lambda = 0.8)
  table(LifeCycleSavings$wt)
  
  lm_full <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = LifeCycleSavings, weights = wt)
  LCS_sub <- subset(LifeCycleSavings, wt > 0)
  lm_sub <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = LCS_sub, weights = wt)
  
  CR_full <- lapply(CR_types, function(x) vcovCR(lm_full, cluster = LifeCycleSavings$cl, type = x))
  CR_sub <- lapply(CR_types, function(x) vcovCR(lm_sub, cluster = LCS_sub$cl, type = x))
  expect_equal(CR_full, CR_sub, check.attributes = FALSE)
  
  test_full <- lapply(CR_types, function(x) coef_test(lm_full, vcov = x, cluster = LifeCycleSavings$cl, test = c("z","naive-t","Satterthwaite"), p_values = TRUE))
  test_sub <- lapply(CR_types, function(x) coef_test(lm_sub, vcov = x, cluster = LCS_sub$cl, test = c("z","naive-t","Satterthwaite"), p_values = TRUE))
  expect_equal(test_full, test_sub, check.attributes = FALSE)
  
  dat_miss <- LifeCycleSavings
  miss_indicator <- sample.int(n_life, size = round(n_life / 5))
  dat_miss$pop15[miss_indicator] <- NA
  dat_miss$cl[miss_indicator] <- NA
  with(dat_miss, table(wt, is.na(pop15)))
  
  lm_dropped <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = dat_miss, weights = wt)
  dat_complete <- subset(dat_miss, !is.na(pop15))
  lm_complete <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = dat_complete, weights = wt)
  
  CR_drop <- lapply(CR_types, function(x) vcovCR(lm_dropped, cluster = dat_miss$cl, type = x))
  CR_complete <- lapply(CR_types, function(x) vcovCR(lm_complete, cluster = dat_complete$cl, type = x))
  expect_equal(CR_drop, CR_complete)
  
  test_drop <- lapply(CR_types, function(x) coef_test(lm_dropped, vcov = x, cluster = dat_miss$cl, test = "All", p_values = FALSE))
  test_complete <- lapply(CR_types, function(x) coef_test(lm_complete, vcov = x, cluster = dat_complete$cl, test = "All", p_values = FALSE))
  expect_equal(test_drop, test_complete)
  
})

test_that("vcovCR errors when there is only one cluster.", {
  
  single_cluster_error_msg <- "Cluster-robust variance estimation will not work when the data only includes a single cluster."
  
  expect_error(
    vcovCR(lm_fit, cluster = dat$const, type = "CR0"), 
    single_cluster_error_msg
  )
  
  expect_error(
    conf_int(WLS_fit, vcov = "CR1", cluster = dat$const), single_cluster_error_msg
  )
  
  expect_error(
    coef_test(lm_fit, vcov = "CR2", cluster = dat$const), single_cluster_error_msg
  )
  
  expect_error(
    Wald_test(WLS_fit, constraints = constrain_zero(2:4), 
              vcov = "CR3", cluster = dat$const),
    single_cluster_error_msg
  )
  
})


test_that("vcovCR works with intercept-only model and user-specified weights.", {
  
  lm_int <- lm(y ~ 1, data = dat)
  HC_un <- coef_test(lm_int, vcov="CR2", cluster=dat$row, test = "All")
  
  # Unweighted, HC-robust
  N <- nobs(lm_int)
  yi <- dat$y
  wi <- rep(1, N)
  W <- sum(wi)
  wi <- wi / W
  vi <- rep(1, N)
  V <- sum(vi)
  ei <- residuals_CS(lm_int)
  M <- sum(wi^2 * vi)
  ai <- 1 / sqrt(1 - 2 * wi + M / vi)
  V_hand <- sum(wi^2 * ai^2 * ei^2)
  pi_theta_pj <- diag(vi) - tcrossprod(rep(1,N), wi * vi) - tcrossprod(wi * vi, rep(1, N)) + M
  df <- M^2 / sum(tcrossprod(ai^2 * wi^2) * (pi_theta_pj^2))
  
  expect_true(check_bread(lm_int, cluster = dat$row, y = dat$y))
  expect_true(check_CR(lm_int, vcov = "CR2", cluster = dat$row))
  expect_equal(sqrt(V_hand), HC_un$SE)
  expect_equal(df, HC_un$df_Satt)
  expect_equal(Inf, HC_un$df_z)
  expect_equal(N - 1, HC_un$df_t)
  

  # Unweighted, cluster-robust
  CR_un <- coef_test(lm_int, vcov="CR2", cluster=dat$cluster, test = "All")

  J <- nlevels(dat$cluster)
  w_j <- tapply(wi, dat$cluster, sum)
  e_j <- tapply(wi * ei, dat$cluster, sum) / w_j
  v_j <- tapply(wi^2 * vi, dat$cluster, sum) / w_j^2
  a_j <- 1 / sqrt(1 - 2 * w_j + M / v_j)
  V_hand <- sum(w_j^2 * a_j^2 * e_j^2)
  pi_theta_pj <- diag(v_j) - tcrossprod(rep(1,J), w_j * v_j) - tcrossprod(w_j * v_j, rep(1, J)) + M
  df <- M^2 / sum(tcrossprod(a_j^2 * w_j^2) * (pi_theta_pj^2))
  
  expect_true(check_bread(lm_int, cluster = dat$cluster, y = dat$y))
  expect_true(check_CR(lm_int, vcov = "CR2", cluster = dat$cluster))
  expect_equal(sqrt(V_hand), CR_un$SE)
  expect_equal(df, CR_un$df_Satt)
  expect_equal(Inf, CR_un$df_z)
  expect_equal(J - 1, CR_un$df_t)

  
  # Weighted, HC-robust  
  WLS_int <- lm(y ~ 1, data = dat, weights = w)
  HC_wt <- coef_test(WLS_int, vcov="CR2", cluster=dat$row, test = "All")
  
  N <- nobs(WLS_int)
  yi <- dat$y
  wi <- WLS_int$weights
  W <- sum(wi)
  wi <- wi / W
  vi <- rep(1, N)
  V <- sum(vi)
  ei <- residuals_CS(WLS_int)
  M <- sum(wi^2 * vi)
  ai <- 1 / sqrt(1 - 2 * wi + M / vi)
  V_hand <- sum(wi^2 * ai^2 * ei^2)
  pi_theta_pj <- diag(vi) - tcrossprod(rep(1,N), wi * vi) - tcrossprod(wi * vi, rep(1, N)) + M
  df <- M^2 / sum(tcrossprod(ai^2 * wi^2) * (pi_theta_pj^2))
  
  expect_true(check_bread(WLS_int, cluster = dat$row, y = dat$y))
  expect_true(check_CR(WLS_int, vcov = "CR2", cluster = dat$row))
  expect_equal(sqrt(V_hand), HC_wt$SE)
  expect_equal(df, HC_wt$df_Satt)
  expect_equal(Inf, HC_wt$df_z)
  expect_equal(N - 1, HC_wt$df_t)
  
  # Weighted, cluster-robust
  CR_wt <- coef_test(WLS_int, vcov="CR2", cluster=dat$cluster, test = "All")
  
  J <- nlevels(dat$cluster)
  w_j <- tapply(wi, dat$cluster, sum)
  e_j <- tapply(wi * ei, dat$cluster, sum) / w_j
  v_j <- tapply(wi^2 * vi, dat$cluster, sum) / w_j^2
  a_j <- 1 / sqrt(1 - 2 * w_j + M / v_j)
  V_hand <- sum(w_j^2 * a_j^2 * e_j^2)
  pi_theta_pj <- diag(v_j) - tcrossprod(rep(1,J), w_j * v_j) - tcrossprod(w_j * v_j, rep(1, J)) + M
  df <- M^2 / sum(tcrossprod(a_j^2 * w_j^2) * (pi_theta_pj^2))
  
  expect_true(check_bread(WLS_int, cluster = dat$cluster, y = dat$y))
  expect_true(check_CR(WLS_int, vcov = "CR2", cluster = dat$cluster))
  expect_equal(sqrt(V_hand), CR_wt$SE, tolerance = 10^-3)
  expect_equal(df, CR_wt$df_Satt, tolerance = 10^-3)
  expect_equal(Inf, CR_wt$df_z)
  expect_equal(J - 1, CR_wt$df_t)
  
})