R Under development (unstable) (2024-03-19 r86153 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> ##
> ## Calibration examples
> ##
> 
> 
> ## Example of calibration to first-stage clusters
> library(survey)
Loading required package: grid
Loading required package: Matrix
Loading required package: survival

Attaching package: 'survey'

The following object is masked from 'package:graphics':

    dotchart

> data(api)
> 
> clusters<-table(apiclus2$dnum)
> clusters<-clusters[clusters>1 & names(clusters)!="639"]
> apiclus2a<-subset(apiclus2, dnum %in% as.numeric(names(clusters)))
> 
> dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2a)
> 
> popclusters<-subset(apipop, dnum %in% as.numeric(names(clusters)))
> 
> pop<-lapply(as.numeric(names(clusters)), function(cluster) {
+   colSums(model.matrix(~api99, model.frame(~api99, subset(popclusters, dnum %in% cluster))))})
> 
> names(pop)<-names(clusters)
> 
> dclus2g<-calibrate(dclus2, ~api99, pop,stage=1)
> 
> svymean(~api99, dclus2)
        mean     SE
api99 642.14 31.434
> svymean(~api99, dclus2g)
        mean    SE
api99 654.49 29.82
> 
> round(svyby(~api99, ~dnum, design=dclus2, svymean),4)
    dnum    api99      se
83    83 694.3333  0.0000
132  132 505.0000  0.0000
152  152 574.0000  0.0000
173  173 894.7500  0.0000
198  198 533.7500  0.0000
200  200 589.8000  6.8335
228  228 477.0000  0.0000
295  295 646.4000  0.0000
302  302 903.5000  0.0000
403  403 852.4000  0.0000
452  452 533.0000  0.0000
480  480 614.2000  0.0000
523  523 580.5000  0.0000
534  534 564.6000  0.0000
549  549 896.2000  0.0000
552  552 730.0000  0.0000
570  570 518.4000  7.5478
575  575 800.8000  4.2513
596  596 785.6000  2.4155
620  620 591.6000 10.5869
638  638 560.2000  4.0954
674  674 760.0000  0.0000
679  679 610.2500  0.0000
687  687 718.6667  0.0000
701  701 651.5000  0.0000
711  711 690.5000  0.0000
731  731 702.0000  2.1744
768  768 562.5000  0.0000
781  781 854.4000  0.7456
> 
> round(svyby(~api99, ~dnum, design=dclus2g, svymean),4)
    dnum    api99 se
83    83 694.3333  0
132  132 505.0000  0
152  152 574.0000  0
173  173 894.7500  0
198  198 533.7500  0
200  200 567.5455  0
228  228 477.0000  0
295  295 646.4000  0
302  302 903.5000  0
403  403 852.4000  0
452  452 533.0000  0
480  480 614.2000  0
523  523 580.5000  0
534  534 564.6000  0
549  549 896.2000  0
552  552 730.0000  0
570  570 548.9444  0
575  575 824.5357  0
596  596 787.5714  0
620  620 609.3750  0
638  638 585.6429  0
674  674 760.0000  0
679  679 610.2500  0
687  687 718.6667  0
701  701 651.5000  0
711  711 690.5000  0
731  731 700.6667  0
768  768 562.5000  0
781  781 851.0000  0
> 
> ## Averaging to first stage
> 
> dclus1<- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
> pop<-colSums(cbind(1,apipop$enroll),na.rm=TRUE)
> 
> dclus1g<-calibrate(dclus1, ~enroll, pop, aggregate=1)
> 
> svytotal(~enroll,dclus1g)
         total SE
enroll 3811472  0
> svytotal(~api.stu,dclus1g)
          total    SE
api.stu 3242857 38967
> 
> #variation within clusters should be zero
> all.equal(0, max(ave(weights(dclus1g),dclus1g$cluster,FUN=var),na.rm=TRUE))
[1] TRUE
> 
> ##bounded weights
>  dclus1g<-calibrate(dclus1, ~enroll, pop)
>  range(weights(dclus1g)/weights(dclus1))
[1] 0.7906782 1.7891164
>  dclus1gb<-calibrate(dclus1, ~enroll, pop, bounds=c(.6,1.5))
>  range(weights(dclus1gb)/weights(dclus1))
[1] 0.7198751 1.5000000
> 
> ## Ratio estimators
> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
> svytotal(~api.stu,dstrat)
          total    SE
api.stu 3086009 99477
> common<-svyratio(~api.stu, ~enroll, dstrat, separate=FALSE)
> total.enroll<-sum(apipop$enroll,na.rm=TRUE)
> predict(common, total=total.enroll)
$total
         enroll
api.stu 3190038

$se
          enroll
api.stu 29565.98

> dstratg<-calibrate(dstrat,~enroll-1, total.enroll, variance=1)
> svytotal(~api.stu, dstratg)
          total    SE
api.stu 3190038 29566
> 
> ## postStratify vs calibrate in stratified sample (Ben French)
> set.seed(17)
> dat<-data.frame(y=rep(0:1,each=100),x=rnorm(200)+2*rep(0:1,each=100),
+                 z=rbinom(200,1,.2), fpc=rep(c(100,10000),each=100))
> dat$w<-ifelse(dat$y,dat$z,1-dat$z)
> popw<-data.frame(w=c("0","1"), Freq=c(2000,8000))
>  des<-svydesign(id=~1,fpc=~fpc, data=dat,strata=~y)
> postStratify(des,~w,popw)->dps
> dcal<-calibrate(des,~factor(w), pop=c(10000,8000))
> 
> all.equal(SE(svymean(~x,dcal)),SE(svymean(~x,dps)))
[1] TRUE
> 
> ## missing data in calibrated design
> dps$variables$z[1]<-NA
> summary(svyglm(y~z+x,design=dps,family=quasibinomial))

Call:
svyglm(formula = y ~ z + x, design = dps, family = quasibinomial)

Survey design:
postStratify(des, ~w, popw)

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -0.1203     0.3380  -0.356    0.722    
z             6.2118     0.6451   9.630   <2e-16 ***
x             2.2602     0.2514   8.992   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasibinomial family taken to be 1.919987)

Number of Fisher Scoring iterations: 9

> 
> ## Ratio estimator using the heteroskedasticity parameter (Daniel Oehm)
> # should match the ratio estmate above
> dstratgh <- calibrate(dstrat,~enroll-1, total.enroll, variance=apistrat$enroll)
> svytotal(~api.stu, dstratgh)
          total    SE
api.stu 3190038 29566
> 
> ## individual boundary constraints as multiplicative values (Daniel Oehm)
> bnds <- list(
+   lower = c(1, 1, rep(-Inf, nrow(apistrat)-2)), 
+   upper = c(1, 1, rep(Inf, nrow(apistrat)-2))) # the first two weights will remain unchanged the others are free to move
> lapply(bnds, head)
$lower
[1]    1    1 -Inf -Inf -Inf -Inf

$upper
[1]   1   1 Inf Inf Inf Inf

> dstratg1<-calibrate(dstrat, ~enroll-1, total.enroll, bounds = bnds, variance=apistrat$enroll)
> svytotal(~api.stu, dstratg1)
          total    SE
api.stu 3190133 29561
> head(weights(dstrat))
    1     2     3     4     5     6 
44.21 44.21 44.21 44.21 44.21 44.21 
> head(weights(dstratg1))
       1        2        3        4        5        6 
44.21000 44.21000 45.72055 45.72055 45.72055 45.72055 
> all.equal(weights(dstrat)[1:2], weights(dstratg1)[1:2])
[1] TRUE
> 
> ## individual boundary constraints as constant values (Daniel Oehm)
> bnds <- list(
+   lower = c(44.21, 44.21, rep(-Inf, nrow(apistrat)-2)), 
+   upper = c(44.21, 44.21, rep(Inf, nrow(apistrat)-2))) # the first two weights will remain unchanged
> lapply(bnds, head)
$lower
[1] 44.21 44.21  -Inf  -Inf  -Inf  -Inf

$upper
[1] 44.21 44.21   Inf   Inf   Inf   Inf

> dstratg2<-calibrate(dstrat, ~enroll-1, total.enroll, bounds = bnds, bounds.const = TRUE, variance=apistrat$enroll)
> svytotal(~api.stu, dstratg2)
          total    SE
api.stu 3190133 29561
> head(weights(dstrat))
    1     2     3     4     5     6 
44.21 44.21 44.21 44.21 44.21 44.21 
> head(weights(dstratg2))
       1        2        3        4        5        6 
44.21000 44.21000 45.72055 45.72055 45.72055 45.72055 
> all.equal(round(weights(dstrat)[1:2], 8), round(weights(dstratg2)[1:2]), 8) # minor rounding error but all good
[1] TRUE
> 
> # sparse matrix support (Daniel Oehm)
> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
> dclus1g<-calibrate(dclus1, ~stype, pop.totals)
> svymean(~api00, dclus1g)
        mean     SE
api00 642.31 23.921
> svytotal(~enroll, dclus1g)
         total     SE
enroll 3680893 406293
> 
> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
> dclus1g<-calibrate(dclus1, ~stype, pop.totals, sparse = TRUE)
> svymean(~api00, dclus1g)
        mean     SE
api00 642.31 23.921
> svytotal(~enroll, dclus1g)
         total     SE
enroll 3680893 406293
> 
> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
> dclus1g<-calibrate(dclus1, ~stype, pop.totals, sparse = TRUE, calfun = "raking")
> svymean(~api00, dclus1g)
        mean     SE
api00 642.31 23.921
> svytotal(~enroll, dclus1g)
         total     SE
enroll 3680893 406293
> 
> proc.time()
   user  system elapsed 
   2.70    0.31    2.98