R Under development (unstable) (2024-02-21 r85967 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(lfe)
Loading required package: Matrix
> # From http://diffuseprior.wordpress.com/2012/06/15/standard-robust-and-clustered-standard-errors-computed-in-r/
> set.seed(123)
> options(lfe.threads=2,digits=5,warn=1)
> ols <- function(form, data, robust=FALSE, cluster=NULL,digits=getOption('digits')){
+     r1 <- lm(form, data)
+       if(length(cluster)!=0){
+             data <- na.omit(data[,c(colnames(r1$model),cluster)])
+                 r1 <- lm(form, data)
+           }
+       X <- model.matrix(r1)
+       n <- dim(X)[1]
+       k <- dim(X)[2]
+       if(robust==FALSE & length(cluster)==0){
+             se <- sqrt(diag(solve(crossprod(X)) * as.numeric(crossprod(resid(r1))/(n-k))))
+                 res <- cbind(coef(r1),se)
+           }
+       if(robust==TRUE){
+             u <- matrix(resid(r1))
+                 meat1 <- t(X) %*% diag(diag(crossprod(t(u)))) %*% X
+                 dfc <- n/(n-k)
+                 se <- sqrt(dfc*diag(solve(crossprod(X)) %*% meat1 %*% solve(crossprod(X))))
+                 res <- cbind(coef(r1),se)
+           }
+       if(length(cluster)!=0){
+             clus <- cbind(X,data[,cluster],resid(r1))
+                 colnames(clus)[(dim(clus)[2]-1):dim(clus)[2]] <- c(cluster,"resid")
+                 m <- dim(table(clus[,cluster]))
+                 dfc <- (m/(m-1))*((n-1)/(n-k))
+                 uclust  <- apply(resid(r1)*X,2, function(x) tapply(x, clus[,cluster], sum))
+                 se <- sqrt(diag(solve(crossprod(X)) %*% (t(uclust) %*% uclust) %*% solve(crossprod(X)))*dfc)
+                 res <- cbind(coef(r1),se)
+           }
+       res <- cbind(res,res[,1]/res[,2],(1-pnorm(abs(res[,1]/res[,2])))*2)
+       res1 <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),res)),nrow=dim(res)[1])
+       rownames(res1) <- rownames(res)
+       colnames(res1) <- c("Estimate","Std. Error","t value","Pr(>|t|)")
+       return(res1)
+   }
> 
> 
> 
> x <- rnorm(1000) 
> f1 <- sample(8,length(x), repl=T)
> clu <- factor(sample(10,length(x), replace=T))
> cluerr <- rnorm(nlevels(clu))[clu]
> clu2 <- factor(sample(10,length(x), replace=T))
> cluerr2 <- rnorm(nlevels(clu2))[clu2]
> err <- abs(x)*rnorm(length(x)) + cluerr + cluerr2
> y <- x +rnorm(nlevels(clu),sd=0.3)[clu] +  log(f1) + err
> dat <- data.frame(y, x, f1=factor(f1), cluster=clu,cluster2=clu2)
> summary(felm(y ~x |f1, dat))

Call:
   felm(formula = y ~ x | f1, data = dat) 

Residuals:
   Min     1Q Median     3Q    Max 
-7.468 -0.873  0.022  1.036  5.203 

Coefficients:
  Estimate Std. Error t value Pr(>|t|)    
x   0.9058     0.0487    18.6   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.52 on 991 degrees of freedom
Multiple R-squared(full model): 0.357   Adjusted R-squared: 0.352 
Multiple R-squared(proj model): 0.259   Adjusted R-squared: 0.253 
F-statistic(full model):68.8 on 8 and 991 DF, p-value: <2e-16 
F-statistic(proj model):  346 on 1 and 991 DF, p-value: <2e-16 


> # CGM clustering, i.e. one factor means standard one-way clustering
> summary(felm(y ~x + f1, dat, clustervar='clu'))
Warning in felm(y ~ x + f1, dat, clustervar = "clu") :
  Argument(s) clustervar are deprecated and will be removed, use multipart formula instead

Call:
   felm(formula = y ~ x + f1, data = dat, clustervar = "clu") 

Residuals:
   Min     1Q Median     3Q    Max 
-7.468 -0.873  0.022  1.036  5.203 

Coefficients:
            Estimate Cluster s.e. t value Pr(>|t|)    
(Intercept)   -0.494        0.363   -1.36   0.1743    
x              0.906        0.064   14.15  < 2e-16 ***
f12            0.680        0.202    3.36   0.0008 ***
f13            1.081        0.273    3.97  7.8e-05 ***
f14            1.392        0.181    7.69  3.4e-14 ***
f15            1.501        0.238    6.30  4.5e-10 ***
f16            1.919        0.308    6.24  6.6e-10 ***
f17            1.855        0.244    7.59  7.3e-14 ***
f18            2.175        0.203   10.69  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.52 on 991 degrees of freedom
Multiple R-squared(full model): 0.357   Adjusted R-squared: 0.352 
Multiple R-squared(proj model): 0.357   Adjusted R-squared: 0.352 
F-statistic(full model, *iid*):68.8 on 8 and 991 DF, p-value: <2e-16 
F-statistic(proj model): 2.01e+03 on 8 and 9 DF, p-value: 1.04e-13 


> # this will make my experimental clustered errors for f1, typically better for few groups
> # summary(felm(y ~x + f1|0|0|cluster+cluster2, dat))
> summary(felm(y ~x + f1|0|0|cluster+cluster2, dat, psdef=FALSE))

Call:
   felm(formula = y ~ x + f1 | 0 | 0 | cluster + cluster2, data = dat,      psdef = FALSE) 

Residuals:
   Min     1Q Median     3Q    Max 
-7.468 -0.873  0.022  1.036  5.203 

Coefficients:
            Estimate Cluster s.e. t value Pr(>|t|)    
(Intercept)  -0.4939       0.3894   -1.27  0.20502    
x             0.9058       0.0622   14.56  < 2e-16 ***
f12           0.6796       0.1825    3.72  0.00021 ***
f13           1.0809       0.2300    4.70  3.0e-06 ***
f14           1.3923       0.1769    7.87  9.4e-15 ***
f15           1.5012       0.2291    6.55  9.0e-11 ***
f16           1.9191       0.2780    6.90  9.0e-12 ***
f17           1.8555       0.2133    8.70  < 2e-16 ***
f18           2.1749       0.2310    9.42  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.52 on 991 degrees of freedom
Multiple R-squared(full model): 0.357   Adjusted R-squared: 0.352 
Multiple R-squared(proj model): 0.357   Adjusted R-squared: 0.352 
F-statistic(full model, *iid*):68.8 on 8 and 991 DF, p-value: <2e-16 
F-statistic(proj model): -39.3 on 8 and 9 DF, p-value: 1 


> # this will sample them for f1, also test having cluster in the third component
> summary(estg <- felm(y ~x | f1|0|cluster, dat))

Call:
   felm(formula = y ~ x | f1 | 0 | cluster, data = dat) 

Residuals:
   Min     1Q Median     3Q    Max 
-7.468 -0.873  0.022  1.036  5.203 

Coefficients:
  Estimate Cluster s.e. t value Pr(>|t|)    
x    0.906        0.064    14.2   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.52 on 991 degrees of freedom
Multiple R-squared(full model): 0.357   Adjusted R-squared: 0.352 
Multiple R-squared(proj model): 0.259   Adjusted R-squared: 0.253 
F-statistic(full model, *iid*):68.8 on 8 and 991 DF, p-value: <2e-16 
F-statistic(proj model):  200 on 1 and 9 DF, p-value: 1.87e-07 


> # Comparable estimable function
> ef <- function(gamma, addnames) {
+   ref1 <- gamma[[1]]
+   res <- c(gamma[[1]],gamma[2:8]-gamma[[1]])
+   if(addnames) {
+     names(res) <- c('icpt',paste('f1',2:8,sep='.'))
+   }
+   res
+ }
> getfe(estg,ef=ef,se=TRUE,bN=200)
       effect clusterse      se
icpt -0.49390   0.34199 0.34199
f1.2  0.67959   0.19774 0.19774
f1.3  1.08092   0.25140 0.25140
f1.4  1.39229   0.16588 0.16588
f1.5  1.50124   0.21863 0.21863
f1.6  1.91912   0.28410 0.28410
f1.7  1.85550   0.22160 0.22160
f1.8  2.17492   0.18960 0.18960
> 
> #summary(estr <- felm(y ~x + G(f1) + G(f2), dat), robust=TRUE)
> #ols(y ~x + f1 + f2, dat, robust=TRUE)
> #getfe(estr,ef=ef,se=T,bN=2000, robust=TRUE)
> #ols(y ~x + f1 + f2, dat, cluster="cluster")
> 
> proc.time()
   user  system elapsed 
   0.98    0.17    1.14