R Under development (unstable) (2024-02-25 r85988 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(lfe) Loading required package: Matrix > # From http://diffuseprior.wordpress.com/2012/06/15/standard-robust-and-clustered-standard-errors-computed-in-r/ > set.seed(123) > options(lfe.threads=2,digits=5,warn=1) > ols <- function(form, data, robust=FALSE, cluster=NULL,digits=getOption('digits')){ + r1 <- lm(form, data) + if(length(cluster)!=0){ + data <- na.omit(data[,c(colnames(r1$model),cluster)]) + r1 <- lm(form, data) + } + X <- model.matrix(r1) + n <- dim(X)[1] + k <- dim(X)[2] + if(robust==FALSE & length(cluster)==0){ + se <- sqrt(diag(solve(crossprod(X)) * as.numeric(crossprod(resid(r1))/(n-k)))) + res <- cbind(coef(r1),se) + } + if(robust==TRUE){ + u <- matrix(resid(r1)) + meat1 <- t(X) %*% diag(diag(crossprod(t(u)))) %*% X + dfc <- n/(n-k) + se <- sqrt(dfc*diag(solve(crossprod(X)) %*% meat1 %*% solve(crossprod(X)))) + res <- cbind(coef(r1),se) + } + if(length(cluster)!=0){ + clus <- cbind(X,data[,cluster],resid(r1)) + colnames(clus)[(dim(clus)[2]-1):dim(clus)[2]] <- c(cluster,"resid") + m <- dim(table(clus[,cluster])) + dfc <- (m/(m-1))*((n-1)/(n-k)) + uclust <- apply(resid(r1)*X,2, function(x) tapply(x, clus[,cluster], sum)) + se <- sqrt(diag(solve(crossprod(X)) %*% (t(uclust) %*% uclust) %*% solve(crossprod(X)))*dfc) + res <- cbind(coef(r1),se) + } + res <- cbind(res,res[,1]/res[,2],(1-pnorm(abs(res[,1]/res[,2])))*2) + res1 <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),res)),nrow=dim(res)[1]) + rownames(res1) <- rownames(res) + colnames(res1) <- c("Estimate","Std. Error","t value","Pr(>|t|)") + return(res1) + } > > > > x <- rnorm(1000) > f1 <- sample(8,length(x), repl=T) > clu <- factor(sample(10,length(x), replace=T)) > cluerr <- rnorm(nlevels(clu))[clu] > clu2 <- factor(sample(10,length(x), replace=T)) > cluerr2 <- rnorm(nlevels(clu2))[clu2] > err <- abs(x)*rnorm(length(x)) + cluerr + cluerr2 > y <- x +rnorm(nlevels(clu),sd=0.3)[clu] + log(f1) + err > dat <- data.frame(y, x, f1=factor(f1), cluster=clu,cluster2=clu2) > summary(felm(y ~x |f1, dat)) Call: felm(formula = y ~ x | f1, data = dat) Residuals: Min 1Q Median 3Q Max -7.468 -0.873 0.022 1.036 5.203 Coefficients: Estimate Std. Error t value Pr(>|t|) x 0.9058 0.0487 18.6 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.52 on 991 degrees of freedom Multiple R-squared(full model): 0.357 Adjusted R-squared: 0.352 Multiple R-squared(proj model): 0.259 Adjusted R-squared: 0.253 F-statistic(full model):68.8 on 8 and 991 DF, p-value: <2e-16 F-statistic(proj model): 346 on 1 and 991 DF, p-value: <2e-16 > # CGM clustering, i.e. one factor means standard one-way clustering > summary(felm(y ~x + f1, dat, clustervar='clu')) Warning in felm(y ~ x + f1, dat, clustervar = "clu") : Argument(s) clustervar are deprecated and will be removed, use multipart formula instead Call: felm(formula = y ~ x + f1, data = dat, clustervar = "clu") Residuals: Min 1Q Median 3Q Max -7.468 -0.873 0.022 1.036 5.203 Coefficients: Estimate Cluster s.e. t value Pr(>|t|) (Intercept) -0.494 0.363 -1.36 0.1743 x 0.906 0.064 14.15 < 2e-16 *** f12 0.680 0.202 3.36 0.0008 *** f13 1.081 0.273 3.97 7.8e-05 *** f14 1.392 0.181 7.69 3.4e-14 *** f15 1.501 0.238 6.30 4.5e-10 *** f16 1.919 0.308 6.24 6.6e-10 *** f17 1.855 0.244 7.59 7.3e-14 *** f18 2.175 0.203 10.69 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.52 on 991 degrees of freedom Multiple R-squared(full model): 0.357 Adjusted R-squared: 0.352 Multiple R-squared(proj model): 0.357 Adjusted R-squared: 0.352 F-statistic(full model, *iid*):68.8 on 8 and 991 DF, p-value: <2e-16 F-statistic(proj model): 2.01e+03 on 8 and 9 DF, p-value: 1.04e-13 > # this will make my experimental clustered errors for f1, typically better for few groups > # summary(felm(y ~x + f1|0|0|cluster+cluster2, dat)) > summary(felm(y ~x + f1|0|0|cluster+cluster2, dat, psdef=FALSE)) Call: felm(formula = y ~ x + f1 | 0 | 0 | cluster + cluster2, data = dat, psdef = FALSE) Residuals: Min 1Q Median 3Q Max -7.468 -0.873 0.022 1.036 5.203 Coefficients: Estimate Cluster s.e. t value Pr(>|t|) (Intercept) -0.4939 0.3894 -1.27 0.20502 x 0.9058 0.0622 14.56 < 2e-16 *** f12 0.6796 0.1825 3.72 0.00021 *** f13 1.0809 0.2300 4.70 3.0e-06 *** f14 1.3923 0.1769 7.87 9.4e-15 *** f15 1.5012 0.2291 6.55 9.0e-11 *** f16 1.9191 0.2780 6.90 9.0e-12 *** f17 1.8555 0.2133 8.70 < 2e-16 *** f18 2.1749 0.2310 9.42 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.52 on 991 degrees of freedom Multiple R-squared(full model): 0.357 Adjusted R-squared: 0.352 Multiple R-squared(proj model): 0.357 Adjusted R-squared: 0.352 F-statistic(full model, *iid*):68.8 on 8 and 991 DF, p-value: <2e-16 F-statistic(proj model): -39.3 on 8 and 9 DF, p-value: 1 > # this will sample them for f1, also test having cluster in the third component > summary(estg <- felm(y ~x | f1|0|cluster, dat)) Call: felm(formula = y ~ x | f1 | 0 | cluster, data = dat) Residuals: Min 1Q Median 3Q Max -7.468 -0.873 0.022 1.036 5.203 Coefficients: Estimate Cluster s.e. t value Pr(>|t|) x 0.906 0.064 14.2 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.52 on 991 degrees of freedom Multiple R-squared(full model): 0.357 Adjusted R-squared: 0.352 Multiple R-squared(proj model): 0.259 Adjusted R-squared: 0.253 F-statistic(full model, *iid*):68.8 on 8 and 991 DF, p-value: <2e-16 F-statistic(proj model): 200 on 1 and 9 DF, p-value: 1.87e-07 > # Comparable estimable function > ef <- function(gamma, addnames) { + ref1 <- gamma[[1]] + res <- c(gamma[[1]],gamma[2:8]-gamma[[1]]) + if(addnames) { + names(res) <- c('icpt',paste('f1',2:8,sep='.')) + } + res + } > getfe(estg,ef=ef,se=TRUE,bN=200) effect clusterse se icpt -0.49390 0.34199 0.34199 f1.2 0.67959 0.19774 0.19774 f1.3 1.08092 0.25140 0.25140 f1.4 1.39229 0.16588 0.16588 f1.5 1.50124 0.21863 0.21863 f1.6 1.91912 0.28410 0.28410 f1.7 1.85550 0.22160 0.22160 f1.8 2.17492 0.18960 0.18960 > > #summary(estr <- felm(y ~x + G(f1) + G(f2), dat), robust=TRUE) > #ols(y ~x + f1 + f2, dat, robust=TRUE) > #getfe(estr,ef=ef,se=T,bN=2000, robust=TRUE) > #ols(y ~x + f1 + f2, dat, cluster="cluster") > > proc.time() user system elapsed 1.32 0.10 1.42