R Under development (unstable) (2024-05-30 r86651 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ###These are tests of the basic function of xBalance under some > ###restricted sitations and using functions that hew closely to the > ###expressions in Hansen and Bowers 2008. For example, with only > ###binary treatment. The idea here to show that the math in that > ###article is equivalent to the output from xBalance. > > require("RItools") Loading required package: RItools Loading required package: ggplot2 > > data(nuclearplants) > > s2.fn<-function(x){sum((x-mean(x))^2)/(length(x)-1)} ##same as sd(x) > h.fn<-function(n,m){(m*(n-m))/n} > > var1<-function(x,m){ ##var(d) + h<-h.fn(n=length(x),m=m) + (1/h)*s2.fn(x) + } > > var2<-function(x,m){ ##var(Z'x) (i.e. var of the sum statistic) + h<-h.fn(n=length(x),m=m) + (h)*s2.fn(x) + } > > #####First just looking at the unstratified calculations > xb1a<-xBalance(pr~ date+ t1 + t2 + cap + ne + ct + bw + cum.n, + strata=list(nostrat=NULL), + data=nuclearplants, + report=c("adj.means","adj.mean.diffs","adj.mean.diffs.null.sd","std.diffs","z.scores","p.values")) > > ##print(xb1a,digits=4) > > testxb1a<-t(sapply(nuclearplants[,dimnames(xb1a$results)$vars],function(thevar){ + myssn<-with(nuclearplants,sum((pr-mean(pr))*thevar)) + myadjdiff<-myssn/h.fn(m=sum(nuclearplants$pr),n=length(nuclearplants$pr)) + mynullvar1<-var1(x=thevar,m=sum(nuclearplants$pr)) + myz<-myadjdiff/sqrt(mynullvar1) + return(c(adj.diff=myadjdiff,adj.diff.null.sd=sqrt(mynullvar1),z=myz)) + })) > > ##print(testxb1a,digits=4) > > all.equal(xb1a$results[,c("adj.diff","adj.diff.null.sd","z"),"nostrat"],testxb1a,check.attributes = FALSE) [1] TRUE > > ###Now with strata. > xb2<-xBalance(pr~ date+ t1 + t2 + cap + ne + ct + bw + cum.n, + strata=list(pt=~pt), + data=nuclearplants, + report=c("all")) > > > test2.fn<-function(zz,mm,ss){ + ##Some notes on xBalanceEngine + ##dv = h/(n-1) + ##unsplit(tapply(zz,ss,function(z){h.fn(m=sum(z),n=length(z))/(length(z)-1)}),ss) ##h/(n-1) + ##tmat*tmat = squared mean deviations + ##sapply(split(data.frame(mm),ss),function(x){sapply(x,function(var){(var-mean(var))^2})}) + ##so dv*tmat*tmat=(h/(n-1))*(x_i-\bar(x))^2=h*s^2 + + myssn<-apply(mm,2,function(x){sum((zz-unsplit(tapply(zz,ss,mean),ss))*x)}) + + hs<-tapply(zz,ss,function(z){h.fn(m=sum(z),n=length(z))}) + mywtsum<-sum(hs) + + myadjdiff<-myssn/mywtsum + + s2s<-sapply(data.frame(mm),function(x){sapply(split(x,ss),function(var){var(var)})}) + + myssvar<-apply(s2s,2,function(s2){sum(hs*s2)}) + mynullsd2<-apply(s2s,2,function(s2){sqrt((1/(sum(hs)^2))*sum(hs*s2))}) ##from StatSci eq 6 + mynullsd1<-sqrt(myssvar*(1/mywtsum)^2) ##with (1/h) + + stopifnot(all.equal(mynullsd1,mynullsd2,check.attributes=FALSE)) + + ##If numbers of treated and controls are the same for all blocks: + if( length(unique(ss))>1 & all(diff(hs)==0) ){ + mynullsd3<-apply(s2s,2,function(s2){sqrt((1/(length(hs)))^2*sum((1/hs)*s2))}) ##(1/B^2)*\sum_{b=1}^B (1/h) s2 + stopifnot(all.equal(mynullsd3,mynullsd2)) + + ##For fun, the version ignoring the stratification. + m<-sum(zz) + n<-length(zz) + h<-(m*(n-m))/n + mynullsd.nostrat<-sqrt((1/h)*apply(mm,2,var)) + + } + ##For pairs + if((length(unique(ss))>1 & all(table(ss)==2)) & all(diff(hs)==0)){ + mys2s.pairs<-sapply(data.frame(mm),function(x){sapply(split(x,ss),function(var){sum((var-mean(var))^2)})}) + mynullsd4<-apply(s2s,2,function(s2){sqrt((2/(length(s2)^2))*sum(s2))}) ##(1/B^2)*\sum_{b=1}^B (1/h) s2 = (2/B^2)\sum_{b=1}^B \sum_{i=1}^2 s2 + stopifnot(all.equal(mynullsd4,mynullsd2)) + } + myz2<-myadjdiff/mynullsd2 + myz3<-myadjdiff/mynullsd1 + myz1<-myssn/sqrt(myssvar) + + stopifnot(all.equal(myz1,myz2,check.attributes=FALSE)) + stopifnot(all.equal(myz2,myz3,check.attributes=FALSE)) + + return(cbind(adj.diff=myadjdiff,adj.diff.null.sd=mynullsd2,z=myz2)) + } > > mymm<-model.matrix(pr~ date+ t1 + t2 + cap + ne + ct + bw + cum.n-1,data=nuclearplants) > > test2.fn(zz=nuclearplants$pr,mm=mymm,ss=nuclearplants$pt) adj.diff adj.diff.null.sd z date 0.09790698 0.3349829 0.2922746 t1 0.95348837 1.2041819 0.7918142 t2 9.37209302 4.0582505 2.3093925 cap 66.56976744 72.4498365 0.9188394 ne -0.02325581 0.1609195 -0.1445183 ct -0.11046512 0.1895349 -0.5828221 bw -0.04651163 0.1506341 -0.3087722 cum.n -0.81976744 2.4129633 -0.3397347 > > all.equal(test2.fn(zz=nuclearplants$pr,mm=mymm,ss=nuclearplants$pt), + xb2$results[,c("adj.diff","adj.diff.null.sd","z"),"pt"],check.attributes=FALSE) [1] TRUE > > proc.time() user system elapsed 1.54 0.28 1.81