context("geeglm objects") set.seed(202201030) skip_if_not_installed("geepack") library(geepack) J <- 20 tp <- 5 # Simulating a dataset idvar <- rep(1:J, each=tp) idvar2 <- factor(sample(LETTERS)[idvar]) timeorder <- rep(1:tp, J) tvar <- timeorder + rnorm(length(timeorder)) x1 <- rnorm(length(timeorder)) x2 <- rnorm(J)[idvar] uuu <- rep(rnorm(J), each=tp) yvar <- 1 + 0.4 * x1 + 0.2 * x2 + 2 * tvar + uuu + rnorm(length(tvar)) simdat <- data.frame(idvar, timeorder, x1, x2, tvar, yvar, idvar2) simdatPerm <- simdat[sample(nrow(simdat)),] simdatPerm <- simdatPerm[order(simdatPerm$idvar),] wav <- simdatPerm$timeorder # AR1 + wave geeglm_AR1_wav <- geeglm(yvar ~ tvar + x1, id = idvar, data = simdatPerm, corstr = "ar1", waves = timeorder) geeglm_AR1_wav # AR1 geeglm_AR1 <- geeglm(yvar ~ tvar, id = idvar, data = simdat, family = "gaussian", corstr = "ar1") # Independence geeglm_ind <- geeglm(yvar ~ tvar + x1 + x2, id = idvar, data = simdat, corstr = "independence") # Exchangeable geeglm_exch <- geeglm(yvar ~ tvar + x2, id = idvar, data = simdat, corstr = "exchangeable") geeglm_exch2 <- geeglm(yvar ~ tvar + x2, id = idvar2, data = simdat, corstr = "exchangeable") # Unstructured geeglm_unstr <- geeglm(yvar ~ tvar, id = idvar, data = simdat, corstr = "unstructured") geeglm_unstr2 <- geeglm(yvar ~ tvar, id = idvar2, data = simdat, corstr = "unstructured") # User-defined zcor <- genZcor(clusz = table(simdat$idvar), waves = simdat$timeorder, corstrv = 4) zcor_user <- zcor geeglm_user <- geeglm(yvar ~ tvar + x1, id = idvar, waves = timeorder, data = simdat, zcor = zcor_user, corstr = "userdefined") # User-defined, Toeplitz zcor_toep <- matrix(NA, nrow(zcor), 4) zcor_toep[,1] <- apply(zcor[,c(1, 5, 8,10)], 1, sum) zcor_toep[,2] <- apply(zcor[,c(2, 6, 9)], 1, sum) zcor_toep[,3] <- apply(zcor[,c(3, 7)], 1, sum) zcor_toep[,4] <- zcor[,4] geeglm_toep <- geeglm(yvar ~ tvar + x1, id = idvar, waves = timeorder, data = simdat, zcor = zcor_toep, corstr = "userdefined") # Fixed correlation cor_fix <- matrix(c(1 , 0.5 , 0.25, 0.125, 0.125, 0.5 , 1 , 0.25, 0.125, 0.125, 0.25 , 0.25 , 1 , 0.5 , 0.125, 0.125, 0.125, 0.5 , 1 , 0.125, 0.125, 0.125, 0.125, 0.125, 1 ), nrow=5, ncol=5) zcor_fix <- fixed2Zcor(cor_fix, id=simdat$idvar, waves=simdat$timeorder) geeglm_fix <- geeglm(yvar ~ tvar + x1 + x2, id = idvar, waves = timeorder, data = simdat, zcor = zcor_fix, corstr = "fixed") test_that("bread works", { expect_true(check_bread(geeglm_AR1_wav, cluster = simdatPerm$idvar, y = simdatPerm$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_AR1, cluster = simdat$idvar, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_ind, cluster = simdat$idvar, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_exch, cluster = simdat$idvar, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_exch2, cluster = simdat$idvar2, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_unstr, cluster = simdat$idvar, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_unstr2, cluster = simdat$idvar2, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_user, cluster = simdat$idvar, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_toep, cluster = simdat$idvar, y = simdat$yvar, tol = 1e-5)) expect_true(check_bread(geeglm_fix, cluster = simdat$idvar, y = simdat$yvar, tol = 1e-5)) }) test_that("vcovCR options work for CR2", { CR2_AR1_wav <- vcovCR(geeglm_AR1_wav, type = "CR2") expect_equal(vcovCR(geeglm_AR1_wav, cluster = simdatPerm$idvar, type = "CR2"), CR2_AR1_wav) expect_equal(vcovCR(geeglm_AR1_wav, cluster = simdatPerm$idvar, type = "CR2", inverse_var = TRUE), CR2_AR1_wav) expect_false(identical(vcovCR(geeglm_AR1_wav, type = "CR2", cluster = simdatPerm$idvar, inverse_var = FALSE), CR2_AR1_wav)) target <- targetVariance(geeglm_AR1_wav, cluster = simdatPerm$idvar) expect_equal(vcovCR(geeglm_AR1_wav, type = "CR2", target = target, inverse_var = TRUE), CR2_AR1_wav, ignore_attr = TRUE) attr(CR2_AR1_wav, "inverse_var") <- FALSE expect_equal(vcovCR(geeglm_AR1_wav, type = "CR2", target = target, inverse_var = FALSE), CR2_AR1_wav, ignore_attr = TRUE) }) test_that("vcovCR options work for CR4", { CR4_AR1_wav <- vcovCR(geeglm_AR1_wav, type = "CR4") expect_equal(vcovCR(geeglm_AR1_wav, cluster = simdatPerm$idvar, type = "CR4"), CR4_AR1_wav) expect_equal(vcovCR(geeglm_AR1_wav, cluster = simdatPerm$idvar, type = "CR4", inverse_var = TRUE), CR4_AR1_wav) expect_false(identical(vcovCR(geeglm_AR1_wav, cluster = simdatPerm$idvar, type = "CR4", inverse_var = FALSE), CR4_AR1_wav)) target <- targetVariance(geeglm_AR1_wav, cluster = simdatPerm$idvar) expect_equal(vcovCR(geeglm_AR1_wav, cluster = simdatPerm$idvar, type = "CR4", target = target, inverse_var = TRUE), CR4_AR1_wav, ignore_attr = TRUE) attr(CR4_AR1_wav, "inverse_var") <- FALSE expect_equal(vcovCR(geeglm_AR1_wav, cluster = simdatPerm$idvar, type = "CR4", target = target, inverse_var = FALSE), CR4_AR1_wav, ignore_attr = TRUE) }) test_that("CR2 and CR4 are target-unbiased", { expect_true(check_CR(geeglm_AR1_wav, cluster = simdatPerm$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_AR1, cluster = simdat$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_ind, cluster = simdat$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_exch, cluster = simdat$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_exch2, cluster = simdat$idvar2, vcov = "CR2")) expect_true(check_CR(geeglm_unstr, cluster = simdat$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_unstr2, cluster = simdat$idvar2, vcov = "CR2")) expect_true(check_CR(geeglm_user, cluster = simdat$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_toep, cluster = simdat$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_fix, cluster = simdat$idvar, vcov = "CR2")) expect_true(check_CR(geeglm_AR1_wav, cluster = simdatPerm$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_AR1, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_ind, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_exch, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_exch2, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_unstr, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_unstr2, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_user, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_toep, cluster = simdat$idvar, vcov = "CR4")) expect_true(check_CR(geeglm_fix, cluster = simdat$idvar, vcov = "CR4")) }) CR_types <- paste0("CR",0:4) test_that("Order doesn't matter.", { skip_on_cran() check_sort_order(geeglm_ind, dat = simdat, CR_types = CR_types) check_sort_order(geeglm_ind, dat = simdat, arrange = "idvar2", CR_types = CR_types) check_sort_order(geeglm_exch, dat = simdat, arrange = "idvar", CR_types = CR_types) check_sort_order(geeglm_exch2, dat = simdat, arrange = "idvar", CR_types = CR_types) check_sort_order(geeglm_exch2, dat = simdat, arrange = "idvar2", CR_types = CR_types) expect_error(check_sort_order(geeglm_unstr, dat = simdat, arrange = "idvar", CR_types = CR_types)) }) test_that("clubSandwich works with dropped observations", { dat_miss <- simdat dat_miss$yvar[sample.int(nrow(simdat), size = round(nrow(simdat) / 10))] <- NA dat_complete <- subset(dat_miss, !is.na(yvar)) mod_dropped <- geeglm(yvar ~ tvar, id = idvar, data = dat_miss, corstr = "independence") mod_complete <- geeglm(yvar ~ tvar, id = idvar, data = dat_complete, corstr = "independence") CR_drop <- lapply(CR_types, function(x) vcovCR(mod_dropped, type = x)) CR_complete <- lapply(CR_types, function(x) vcovCR(mod_complete, type = x)) expect_equal(CR_drop, CR_complete) test_drop <- lapply(CR_types, function(x) coef_test(mod_dropped, cluster = dat_miss$idvar, vcov = x, test = "All", p_values = FALSE)) test_complete <- lapply(CR_types, function(x) coef_test(mod_complete, cluster = dat_complete$idvar, vcov = x, test = "All", p_values = FALSE)) compare_ttests(test_drop, test_complete) }) test_that("vcovCR works for clustering variables higher than id variable.", { # create higher level pair_id <- rep(1:nlevels(as.factor(simdat$idvar)), each = 3, length.out = nlevels(as.factor(simdat$idvar)))[as.factor(simdat$idvar)] # factor cluster pair_id <- factor(pair_id) # cluster at higher level V_AR1 <- vcovCR(geeglm_AR1, type = "CR2", cluster = pair_id) V_AR1_wav <- vcovCR(geeglm_AR1_wav, type = "CR2", cluster = pair_id) V_ind <- vcovCR(geeglm_ind, type = "CR2", cluster = pair_id) V_exch <- vcovCR(geeglm_exch, type = "CR2", cluster = pair_id) V_exch2 <- vcovCR(geeglm_exch2, type = "CR2", cluster = pair_id) V_unstr <- vcovCR(geeglm_unstr, type = "CR2", cluster = pair_id) V_unstr2 <- vcovCR(geeglm_unstr2, type = "CR2", cluster = pair_id) V_user <- vcovCR(geeglm_user, type = "CR2", cluster = pair_id) V_toep <- vcovCR(geeglm_toep, type = "CR2", cluster = pair_id) V_fix <- vcovCR(geeglm_fix, type = "CR2", cluster = pair_id) expect_is(V_AR1, "vcovCR") expect_is(V_AR1_wav, "vcovCR") expect_is(V_ind, "vcovCR") expect_is(V_exch, "vcovCR") expect_is(V_exch2, "vcovCR") expect_is(V_unstr, "vcovCR") expect_is(V_unstr2, "vcovCR") expect_is(V_user, "vcovCR") expect_is(V_toep, "vcovCR") expect_is(V_fix, "vcovCR") # check that result does not depend on sort-order scramble_id <- factor(simdat$idvar, levels = sample(1:J)) dat_higher <- cbind(simdat, pair_id = factor(pair_id), scramble_id) dat_higher <- dat_higher[order(dat_higher$scramble_id),] dat_scramble <- dat_higher[sample(nrow(dat_higher)),] dat_scramble <- dat_scramble[order(dat_scramble$scramble_id),] V_AR1_scramble <- vcovCR(update(geeglm_AR1, data = dat_higher), type = "CR2", cluster = dat_higher$pair_id) expect_equal(V_AR1, V_AR1_scramble, tol = 10^-6, check.attributes = FALSE) V_AR1_wav_scramble <- vcovCR(update(geeglm_AR1_wav, data = dat_higher), type = "CR2", cluster = dat_higher$pair_id) expect_equal(V_AR1_wav, V_AR1_wav_scramble, tol = 10^-6, check.attributes = FALSE) V_ind_scramble <- vcovCR(update(geeglm_ind, data = dat_scramble), type = "CR2", cluster = dat_scramble$pair_id) expect_equal(V_ind, V_ind_scramble, tol = 10^-6, check.attributes = FALSE) V_exch2_scramble <- vcovCR(update(geeglm_exch2, data = dat_scramble), type = "CR2", cluster = dat_scramble$pair_id) expect_equal(V_exch2, V_exch2_scramble, tol = 10^-6, check.attributes = FALSE) V_unstr_scramble <- vcovCR(update(geeglm_unstr, data = dat_higher), type = "CR2", cluster = dat_higher$pair_id) expect_equal(V_unstr, V_unstr_scramble, tol = 10^-6, check.attributes = FALSE) }) check_geeglm <- function(obj, type = c("CR0","CR3")) { if (obj$std.err == "san.se") { const <- 1 cr_club <- as.matrix(vcovCR(obj, type = "CR0")) } else if (obj$std.err == "jack") { J <- length(unique(obj$id)) p <- nrow(obj$geese$infls) const <- (J - p) / J cr_club <- as.matrix(vcovCR(obj, type = "CR3")) } cr_pack <- stats::vcov(obj) expect_equal(cr_pack, const * cr_club) } geeglm_user_jack <- update(geeglm_user, std.err = "jack") geeglm_toep_jack <- update(geeglm_toep, std.err = "jack") geeglm_fix_jack <- update(geeglm_fix, std.err = "jack") test_that("vcovCR agrees with geeglm for CR0 and CR3.", { check_geeglm(geeglm_AR1_wav) check_geeglm(geeglm_AR1) check_geeglm(geeglm_ind) check_geeglm(geeglm_exch) check_geeglm(geeglm_exch2) check_geeglm(geeglm_unstr) check_geeglm(geeglm_unstr2) check_geeglm(geeglm_user) check_geeglm(geeglm_toep) check_geeglm(geeglm_fix) check_geeglm(update(geeglm_AR1_wav, std.err = "jack")) check_geeglm(update(geeglm_AR1, std.err = "jack")) check_geeglm(update(geeglm_ind, std.err = "jack")) check_geeglm(update(geeglm_exch, std.err = "jack")) check_geeglm(update(geeglm_exch2, std.err = "jack")) check_geeglm(update(geeglm_unstr, std.err = "jack")) check_geeglm(update(geeglm_unstr2, std.err = "jack")) check_geeglm(geeglm_user_jack) check_geeglm(geeglm_toep_jack) check_geeglm(geeglm_fix_jack) }) test_that("vcovCR agrees with geeglm for examples from geepack documentation.", { # Poisson with unstructured correlation data(seizure, package = "geepack") seiz.l <- reshape(seizure, varying=list(c("base","y1", "y2", "y3", "y4")), v.names="y", times=0:4, direction="long") seiz.l <- seiz.l[order(seiz.l$id, seiz.l$time),] seiz.l$t <- ifelse(seiz.l$time == 0, 8, 2) seiz.l$x <- ifelse(seiz.l$time == 0, 0, 1) m_pois_un <- geeglm(y ~ offset(log(t)) + x + trt + x:trt, id = id, data=seiz.l, corstr="exch", family=poisson, std.err = "jack") m_pois_un3 <- update(m_pois_un) check_geeglm(m_pois_un) check_geeglm(m_pois_un3) # Poisson with no correlation m_pois_id <- geeglm(y ~ offset(log(t)) + x + trt + x:trt, id = id, data = seiz.l, family = poisson, corstr = "independence", std.err = "jack") check_geeglm(m_pois_id) # Binomial with exchangeable correlation data(ohio, package = "geepack") m_bin_ex <- geeglm(resp ~ age + smoke + age:smoke, id=id, data=ohio, family=binomial, corstr="exch", scale.fix=TRUE, std.err = "jack") check_geeglm(m_bin_ex) # Binomial with AR(1) correlation m_bin_ar <- geeglm(resp ~ age + smoke + age:smoke, id=id, data=ohio, family=binomial, corstr="ar1", scale.fix=TRUE, std.err = "jack") check_geeglm(m_bin_ar) # Binomial with unstructured correlation data(respiratory, package = "geepack") respiratory$center <- factor(respiratory$center) respiratory$id_center <- factor(paste(respiratory$center, respiratory$id, sep = "-")) m_bin_un <- geeglm(outcome ~ center + treat + age + baseline, data = respiratory, id = id_center, family = binomial(), corstr = "unstructured", std.err = "jack") check_geeglm(m_bin_un) })