R Under development (unstable) (2023-08-08 r84908 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > cat("# testing use of 'id' to get appropriate standard errors \n") # testing use of 'id' to get appropriate standard errors > library("qgcomp") > #install.packages('sandwich', repos="https://cran.rstudio.com/", dependencies = c("Depends")) > 2#library("sandwich") [1] 2 > set.seed(2123) > N = 250 > t = 4 > dat <- data.frame(row.names = 1:(N*t)) > dat <- within(dat, { + id = do.call("c", lapply(1:N, function(x) rep(x, t))) + u = do.call("c", lapply(1:N, function(x) rep(runif(1), t))) + x1 = rnorm(N, u) + x2 = rnorm(N, u) + y = rnorm(N) + u + x1 - x2*x1 + }) > > > # from sandwich package 2.5.1 (to avoid having to install zoo dependency) > bread.glm <- function (x, ...) { + if (!is.null(x$na.action)) + class(x$na.action) <- "omit" + sx <- summary(x) + wres <- as.vector(residuals(x, "working")) * weights(x, "working") + dispersion <- if (substr(x$family$family, 1, 17) %in% c("poisson", + "binomial", "Negative Binomial")) + 1 + else sum(wres^2)/sum(weights(x, "working")) + return(sx$cov.unscaled * as.vector(sum(sx$df[1:2])) * dispersion) + } > > bread <- function (x, ...) + { + UseMethod("bread") + } > > > sandwich <- function (x, bread. = bread, meat. = meat, ...) { + if (is.list(x) && !is.null(x$na.action)) + class(x$na.action) <- "omit" + if (is.function(bread.)) + bread. <- bread.(x) + if (is.function(meat.)) + meat. <- meat.(x, ...) + n <- NROW(estfun(x)) + return(1/n * (bread. %*% meat. %*% bread.)) + } > > meatCL <- function (x, cluster = NULL, type = NULL, cadjust = TRUE, multi0 = FALSE, + ...) + { + if (is.list(x) && !is.null(x$na.action)) + class(x$na.action) <- "omit" + ef <- estfun(x, ...) + k <- NCOL(ef) + n <- NROW(ef) + rval <- matrix(0, nrow = k, ncol = k, dimnames = list(colnames(ef), + colnames(ef))) + if (is.null(cluster)) + cluster <- attr(x, "cluster") + if (is.null(cluster)) + cluster <- seq_len(n) + if (inherits(cluster, "formula")) { + cluster_tmp <- expand.model.frame(x, cluster, na.expand = FALSE) + cluster <- model.frame(cluster, cluster_tmp, na.action = na.pass) + } + else { + cluster <- as.data.frame(cluster) + } + if ((n != NROW(cluster)) && !is.null(x$na.action) && (class(x$na.action) %in% + c("exclude", "omit"))) { + cluster <- cluster[-x$na.action, , drop = FALSE] + } + if (NROW(cluster) != n) + stop("number of observations in 'cluster' and 'estfun()' do not match") + p <- NCOL(cluster) + if (p > 1L) { + cl <- lapply(seq_len(p), function(i) combn(seq_len(p), i, simplify = FALSE)) + cl <- unlist(cl, recursive = FALSE) + sign <- sapply(cl, function(i) (-1L)^(length(i) + 1L)) + paste_ <- function(...) paste(..., sep = "_") + for (i in (p + 1L):length(cl)) { + cluster <- cbind(cluster, Reduce(paste_, unclass(cluster[, + cl[[i]]]))) + } + if (multi0) + cluster[[length(cl)]] <- seq_len(n) + } + else { + cl <- list(1) + sign <- 1 + } + g <- sapply(1L:length(cl), function(i) { + if (is.factor(cluster[[i]])) { + length(levels(cluster[[i]])) + } + else { + length(unique(cluster[[i]])) + } + }) + if (is.null(type)) { + type <- if (class(x)[1L] == "lm") + "HC1" + else "HC0" + } + type <- match.arg(type, c("HC", "HC0", "HC1", "HC2", "HC3")) + if (type == "HC") + type <- "HC0" + if (type %in% c("HC2", "HC3")) { + if (any(g == n)) + h <- hatvalues(x) + if (!all(g == n)) { + if (!(class(x)[1L] %in% c("lm", "glm"))) + warning("clustered HC2/HC3 are only applicable to (generalized) linear regression models") + X <- model.matrix(x) + if (any(alias <- is.na(coef(x)))) + X <- X[, !alias, drop = FALSE] + attr(X, "assign") <- NULL + w <- weights(x, "working") + XX1 <- if (is.null(w)) + chol2inv(qr.R(qr(X))) + else chol2inv(qr.R(qr(X * sqrt(w)))) + res <- rowMeans(ef/X, na.rm = TRUE) + res[apply(abs(ef) < .Machine$double.eps, 1L, all)] <- 0 + matpower <- function(X, p) { + if ((ncol(X) == 1L) && (nrow(X) == 1L)) + return(X^p) + Xeig <- eigen(X, symmetric = TRUE) + if (any(Xeig$values < 0)) + stop("matrix is not positive semidefinite") + sqomega <- diag(Xeig$values^p) + return(Xeig$vectors %*% sqomega %*% t(Xeig$vectors)) + } + } + } + for (i in 1L:length(cl)) { + efi <- ef + adj <- if (multi0 & (i == length(cl))) { + if (type == "HC1") + (n - k)/(n - 1L) + else 1 + } + else { + if (cadjust) + g[i]/(g[i] - 1L) + else 1 + } + if (type %in% c("HC2", "HC3")) { + if (g[i] == n) { + efi <- if (type == "HC2") { + efi/sqrt(1 - h) + } + else { + efi/(1 - hatvalues(x)) + } + } + else { + for (j in unique(cluster[[i]])) { + ij <- which(cluster[[i]] == j) + Hij <- if (is.null(w)) { + X[ij, , drop = FALSE] %*% XX1 %*% t(X[ij, + , drop = FALSE]) + } + else { + X[ij, , drop = FALSE] %*% XX1 %*% t(X[ij, + , drop = FALSE]) %*% diag(w[ij], nrow = length(ij), + ncol = length(ij)) + } + Hij <- if (type == "HC2") { + matpower(diag(length(ij)) - Hij, -0.5) + } + else { + solve(diag(length(ij)) - Hij) + } + efi[ij, ] <- drop(Hij %*% res[ij]) * X[ij, + , drop = FALSE] + } + } + efi <- sqrt((g[i] - 1L)/g[i]) * efi + } + efi <- if (g[i] < n) + apply(efi, 2L, rowsum, cluster[[i]]) + else efi + rval <- rval + sign[i] * adj * crossprod(efi)/n + } + if (type == "HC1") + rval <- (n - 1L)/(n - k) * rval + return(rval) + } > > estfun.glm <- function (x, ...) { + xmat <- model.matrix(x) + xmat <- naresid(x$na.action, xmat) + if (any(alias <- is.na(coef(x)))) + xmat <- xmat[, !alias, drop = FALSE] + wres <- as.vector(residuals(x, "working")) * weights(x, "working") + dispersion <- if (substr(x$family$family, 1, 17) %in% c("poisson", + "binomial", "Negative Binomial")) + 1 + else sum(wres^2, na.rm = TRUE)/sum(weights(x, "working"), + na.rm = TRUE) + rval <- wres * xmat/dispersion + attr(rval, "assign") <- NULL + attr(rval, "contrasts") <- NULL + #res <- residuals(x, type = "pearson") + #if (is.ts(res)) + # rval <- ts(rval, start = start(res), frequency = frequency(res)) + #if (is.zoo(res)) + # rval <- zoo(rval, index(res), attr(res, "frequency")) + return(rval) + } > > estfun <- function (x, ...) + { + UseMethod("estfun") + } > > vcovCL <- function (x, cluster = NULL, type = NULL, sandwich = TRUE, fix = FALSE, + ...) + { + rval <- meatCL(x, cluster = cluster, type = type, ...) + if (sandwich) + rval <- sandwich(x, meat. = rval) + if (fix && any((eig <- eigen(rval, symmetric = TRUE))$values < + 0)) { + eig$values <- pmax(eig$values, 0) + rval[] <- crossprod(sqrt(eig$values) * t(eig$vectors)) + } + return(rval) + } > ################################################################################ > # > # > # > ################################################################################ > # pre quantize > expnms = "x1" > datl = quantize(dat, expnms = expnms) > > > #' \dontrun{ > #' > #' # delta method/bootstrap variance ignoring clustering > #' noclust = qgcomp.noboot(y~ x1, data=datl$dat, id="id", family=gaussian(), q = NULL) > #' noclust.b = qgcomp.boot(y~ x1, data=datl$dat, family=gaussian(), q = NULL, MCsize=1000) > #' > #' # bootstrap variance with sampling by cluster > #' clust.b = qgcomp.boot(y~ x1, data=datl$dat, id="id", family=gaussian(), q = NULL, MCsize=5000, B = 500) > #' #clust.g = summary(geeglm(y~x1, data=datl$dat, id=id, corstr = "independence")) > #' fitglm = glm(y~x1, data=datl$dat) > #' # cluster robust variance > #' sw.cov = vcovCL(fitglm, cluster=~id, type = "HC0")[2,2] > #' > #' stopifnot(all.equal(clust.b$var.psi, sw.cov, tolerance = 0.005)) > #' > #' # change in variance should be the same in gee and bootstrap > #' stopifnot( > #' (clust.b$var.psi > noclust.b$var.psi) & (sw.cov > noclust.b$var.psi) | > #' (clust.b$var.psi < noclust.b$var.psi) & (sw.cov < noclust.b$var.psi) > #' ) > #' stopifnot( > #' (clust.b$var.psi > noclust$var.psi) & (sw.cov > noclust$var.psi) | > #' (clust.b$var.psi < noclust$var.psi) & (sw.cov < noclust$var.psi) > #' ) > #' } > cat("done") done> > proc.time() user system elapsed 2.75 0.35 3.12