suppressWarnings({ suppressPackageStartupMessages({ library(survey) library(dplyr) library(svrep) library(testthat) }) }) set.seed(2014) # Load example datasets ---- data('api', package = 'survey') ## One-stage stratified sample (SRSWOR) dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc) ## One-stage cluster sample (SRSWOR) dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc) ## Two-stage cluster sample (SRSWOR) dclus2 <- svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2) ## Two-stage cluster sample (SRSWR) dclus2wr <- svydesign(id=~dnum+snum, weights = ~pw, data=apiclus2) ## Library two-stage survey (first stage PPSWOR, second stage SRSWOR) library_multistage_survey <- svydesign( data = svrep::library_multistage_sample, fpc = ~ PSU_SAMPLING_PROB + SSU_SAMPLING_PROB, ids = ~ PSU_ID + SSU_ID, pps = "brewer" ) ## Library two-stage survey with nonresponse multistage_survey_with_poisson_nonresponse <- svydesign( data = svrep::library_multistage_sample |> mutate( RESP_PROB = runif(n = nrow(svrep::library_multistage_sample), min = 0.6, max = 1), RESP_STATUS = ifelse( runif(n = nrow(svrep::library_multistage_sample)) < RESP_PROB, 1, 0) ), fpc = ~ PSU_SAMPLING_PROB + SSU_SAMPLING_PROB + RESP_PROB, ids = ~ PSU_ID + SSU_ID + RESP_STATUS, pps = "brewer" ) ## One-stage PPSWOR data('election', package = 'survey') pps_design_ht <- svydesign( data = election_pps, id = ~1, fpc = ~p, pps = ppsmat(election_jointprob), variance = "HT" ) # Convert to RWYB bootstrap design ---- set.seed(1999) dstrat_boot <- as_bootstrap_design( design = dstrat, type = "Rao-Wu-Yue-Beaumont", replicates = 100 ) set.seed(1999) dstrat_reps <- make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = dstrat$cluster, strata_ids = dstrat$strata, samp_unit_sel_probs = dstrat$allprob, samp_method_by_stage = c("SRSWOR"), output = 'factors' ) |> `dimnames<-`(NULL) set.seed(1999) dclus1_boot <- as_bootstrap_design( design = dclus1, type = "Rao-Wu-Yue-Beaumont", replicates = 100 ) set.seed(1999) dclus1_reps <- make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = dclus1$cluster, strata_ids = dclus1$strata, samp_unit_sel_probs = dclus1$allprob, samp_method_by_stage = c("SRSWOR"), output = 'factors' ) |> `dimnames<-`(NULL) set.seed(1999) dclus2_boot <- as_bootstrap_design( design = dclus2, type = "Rao-Wu-Yue-Beaumont", replicates = 100 ) set.seed(1999) dclus2_reps <- make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = dclus2$cluster, strata_ids = dclus2$strata, samp_unit_sel_probs = dclus2$allprob, samp_method_by_stage = c("SRSWOR", "SRSWOR"), output = 'factors' ) |> `dimnames<-`(NULL) set.seed(1999) dclus2wr_boot <- as_bootstrap_design( design = dclus2wr, type = "Rao-Wu-Yue-Beaumont", replicates = 100 ) set.seed(1999) dclus2wr_reps <- make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = dclus2$cluster, strata_ids = dclus2$strata, samp_unit_sel_probs = matrix(0, nrow = nrow(dclus2wr$fpc$sampsize), ncol = ncol(dclus2wr$fpc$sampsize)), samp_method_by_stage = c("SRSWR", "SRSWR"), output = 'factors' ) |> `dimnames<-`(NULL) set.seed(1999) library_multistage_boot <- as_bootstrap_design( design = library_multistage_survey, type = "Rao-Wu-Yue-Beaumont", replicates = 100, samp_method_by_stage = c("PPSWOR", "SRSWOR") ) set.seed(1999) library_multistage_reps <- make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = library_multistage_survey$cluster, strata_ids = library_multistage_survey$strata, samp_unit_sel_probs = library_multistage_survey$allprob, samp_method_by_stage = c("PPSWOR", "SRSWOR"), output = 'factors' ) |> `dimnames<-`(NULL) set.seed(1999) ms_w_poisson_nr_boot <- as_bootstrap_design( design = multistage_survey_with_poisson_nonresponse, type = "Rao-Wu-Yue-Beaumont", replicates = 100, samp_method_by_stage = c("PPSWOR", "SRSWOR", "Poisson") ) set.seed(1999) ms_w_poisson_nr_reps <- make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = multistage_survey_with_poisson_nonresponse$cluster, strata_ids = multistage_survey_with_poisson_nonresponse$strata, samp_unit_sel_probs = multistage_survey_with_poisson_nonresponse$allprob, samp_method_by_stage = c("PPSWOR", "SRSWOR", "Poisson"), output = 'factors' ) |> `dimnames<-`(NULL) set.seed(1999) election_pps_boot <- as_bootstrap_design( design = pps_design_ht, type = "Rao-Wu-Yue-Beaumont", replicates = 100 ) set.seed(1999) election_pps_reps <- make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = pps_design_ht$cluster, strata_ids = pps_design_ht$strata, samp_unit_sel_probs = pps_design_ht$allprob, samp_method_by_stage = c("PPSWOR"), output = 'factors' ) |> `dimnames<-`(NULL) dclus1_boot |> summarize_rep_weights(type = "overall") dclus2_boot |> summarize_rep_weights(type = "overall") library_multistage_boot |> summarize_rep_weights(type = "overall") election_pps_boot |> summarize_rep_weights(type = "overall") # Same results from conversion and creating from scratch ---- test_that( "Same results from conversion versus creating from scratch: dstrat", { expect_equal( object = dstrat_boot$repweights, expected = dstrat_reps ) }) test_that( "Same results from conversion versus creating from scratch: dclus1", { expect_equal( object = dclus1_boot$repweights, expected = dclus1_reps ) }) test_that( "Same results from conversion versus creating from scratch: dclus2", { expect_equal( object = dclus2_boot$repweights, expected = dclus2_reps ) }) test_that( "Same results from conversion versus creating from scratch: dclus2wr", { expect_equal( object = dclus2wr_boot$repweights, expected = dclus2wr_reps ) }) test_that( "Same results from conversion versus creating from scratch: library_multistage", { expect_equal( object = library_multistage_boot$repweights, expected = library_multistage_reps ) }) test_that( "Same results from conversion versus creating from scratch: multistage_survey_with_poisson_nonresponse", { expect_equal( object = ms_w_poisson_nr_boot$repweights, expected = ms_w_poisson_nr_reps ) }) test_that( "Same results from conversion versus creating from scratch: election_pps", { expect_equal( object = election_pps_boot$repweights, expected = election_pps_reps ) }) # Outputs either factors or weights ---- test_that( "Able to correctly output either adjustment factors or weights", { expect_equal( object = withr::with_seed(2014, { as_bootstrap_design(dclus1, type = "Rao-Wu-Yue-Beaumont") |> weights(type = "analysis") }), expected = withr::with_seed(2014, { as_bootstrap_design(dclus1, type = "Rao-Wu-Yue-Beaumont") |> weights(type = "replication") * weights(dclus1) }) ) expect_equal( object = withr::with_seed(2014, { as_bootstrap_design(dclus1, type = "Rao-Wu-Yue-Beaumont", replicates = 100) |> weights(type = "analysis") }), expected = withr::with_seed(2014, { make_rwyb_bootstrap_weights( num_replicates = 100, samp_unit_ids = dclus1$cluster, strata_ids = dclus1$strata, samp_unit_sel_probs = dclus1$allprob, samp_method_by_stage = c("SRSWOR"), output = 'weights' ) |> `dimnames<-`(NULL) }) ) }) # Expected warnings and errors ---- data('mu284', package = 'survey') test_that( desc = "Expected error for noncertainty singleton strata", { expect_error( object = svydesign( data = mu284 |> mutate(Stratum = case_when( id1 == "19" ~ "1", TRUE ~ "2" )), ids = ~ id1 + id2, strata = ~ Stratum, fpc = ~ n1 + n2 ) |> as_bootstrap_design(type = "Rao-Wu-Yue-Beaumont"), regexp = "Cannot form bootstrap adjustment factors for a stratum at stage 1" ) }) # Check for correctness ---- ##_ SRSWOR ---- test_that( "For SRSWOR, sum of weights in replicates always matches the population size", { set.seed(2014) expect_equal( object = svrep::library_census |> group_by(STABR) |> mutate(N_PSUS = n()) |> arrange(sample(x = n())) |> slice_head(n = 2) |> ungroup() %>% svydesign(data = ., ids = ~ 1, strata = ~ STABR, fpc = ~ N_PSUS) |> as_bootstrap_design(replicates = 100) |> summarize_rep_weights(type = "overall") |> select(avg_wgt_sum, sd_wgt_sums) |> as.numeric(), expected = c(9245, 0) ) }) ##_ PPSWOR ---- test_that( "For PPSWOR+SRSWOR two-stage design, RWYB bootstrap has correct weights sums, and estimate close to Brewer's approximation", { set.seed(2014) # Declare a multistage design # where first-stage probabilities are PPSWOR sampling # and second-stage probabilities are based on SRSWOR multistage_design <- svydesign( data = library_multistage_sample, ids = ~ PSU_ID + SSU_ID, probs = ~ PSU_SAMPLING_PROB + SSU_SAMPLING_PROB, pps = "brewer" ) # Convert to a bootstrap replicate design boot_design <- as_bootstrap_design( design = multistage_design, type = "Rao-Wu-Yue-Beaumont", samp_method_by_stage = c("PPSWOR", "SRSWOR"), replicates = 500 ) # Compare variance estimates brewer_estimate <- svytotal(x = ~ TOTCIR, na.rm = TRUE, design = multistage_design) |> vcov() |> as.numeric() boot_estimate <- svytotal(x = ~ TOTCIR, na.rm = TRUE, design = boot_design) |> vcov() |> as.numeric() expect_lt( object = abs(boot_estimate - brewer_estimate)/brewer_estimate, expected = 0.1 ) }) ##_ Poisson ---- test_that( "For single-stage Poisson design, RWYB bootstrap close to Horvitz-Thompson estimate", { set.seed(2014) ### Represent Poisson design as PPS design with Horvitz-Thompson estimator poisson_design <- multistage_survey_with_poisson_nonresponse$variables |> filter(RESP_STATUS == 1) %>% svydesign(data = ., probs = ~ RESP_PROB, ids = ~ 1, pps = ppsmat( (.$RESP_PROB %*% t(.$RESP_PROB)) |> `diag<-`(.$RESP_PROB) ), variance = "HT") ### Convert to RWYB bootstrap design poisson_boot <- poisson_design |> as_bootstrap_design(samp_method_by_stage = 'Poisson', replicates = 500) ### Calculate Horvitz-Thompson estimate poisson_quad_matrix <- make_quad_form_matrix( variance_estimator = "Horvitz-Thompson", joint_probs = (poisson_design$prob %*% t(poisson_design$prob)) |> `diag<-`(poisson_design$prob) ) wtd_y <- poisson_design$variables$TOTCIR / poisson_design$prob wtd_y[is.na(wtd_y)] <- 0 expected_value <- as.numeric(t(wtd_y) %*% poisson_quad_matrix %*% wtd_y) ### Compare bootstrap estimate to Horvitz-Thompson estimate rwyb_boot_est <- svytotal(x = ~ TOTCIR, design = poisson_boot, na.rm = TRUE) |> vcov() |> as.numeric() expect_lt( object = abs(rwyb_boot_est - expected_value)/expected_value, expected = 0.10 ) })