context("Sampling and probability functions") test_that("use of randomizr and filter works", { design <- declare_model( classrooms = add_level(10), individuals = add_level(20, female = rbinom(N, 1, 0.5)) ) + NULL dat <- draw_data(design) smp1 <- declare_sampling(S = complete_rs(N = N, n = 10), filter = S == 1) smp2 <- declare_sampling(S = complete_rs(N = N, n = 10)) smp3 <- declare_sampling(S = complete_rs(N = N, n = 10), filter = S == 0) smp4 <- declare_sampling(S = sample(x = c(0, 1, NA), N, replace = TRUE), filter = S == 0) expect_equal(nrow(smp1(dat)), 10) expect_equal(nrow(smp2(dat)), 10) expect_equal(nrow(smp3(dat)), 190) expect_true(sum(is.na(smp4(dat)$S)) == 0) }) test_that("legacy warnings", { expect_error(declare_sampling(n = 50), "S = draw_rs\\(N = N, n = 50\\)") expect_error(declare_sampling(n = 50, sampling_variable = "D"), "D = draw_rs\\(N = N, n = 50\\)") expect_silent(declare_sampling(S = complete_rs(N = N, n = 20))) }) context("Sampling and probability functions") test_that("randomizr works through declare_sampling", { df <- data.frame(ID = 1:10, strata = rep(c("A", "B"), 5, 5)) f_1 <- declare_sampling(legacy = TRUE) expect_equal(dim(f_1(df)), c(5, 3)) f_1 <- declare_sampling(n = 4, legacy = TRUE) expect_equal(dim(f_1(df)), c(4, 3)) f_1 <- declare_sampling(strata = strata, legacy = TRUE) expect_length(xtabs(~strata, f_1(df)), 2) # what about inside a function? new_fun <- function(n) { f_1 <- declare_sampling(n = n, legacy = TRUE) f_1(df) } expect_equal(dim(new_fun(3)), c(3, 3)) }) expect_sampling_step <- function(step, df, n, clusters = NULL, strata = NULL) { df <- step(df) if (!is.na(n)) { expect_equal(nrow(df), n) } expect_true(is.numeric(df$S_inclusion_prob)) if (is.character(clusters)) { } if (is.character(strata)) { } } test_that("test sampling and probability functions", { population <- declare_model( villages = add_level( N = 100, elevation = rnorm(N), high_elevation = as.numeric(elevation > 0) ), individuals = add_level( N = 10, noise = rnorm(N), ideo_3 = sample(c("Liberal", "Moderate", "Conservative"), size = N, prob = c(.2, .3, .5), replace = TRUE ) ) ) # Draw Data population <- population() # "complete" sampling expect_sampling_step(declare_sampling(legacy = TRUE), population, n = 500) expect_sampling_step(declare_sampling(legacy = TRUE, n = 60), population, n = 60) # stratified sampling expect_sampling_step(declare_sampling(legacy = TRUE, strata = ideo_3), population, n = NA) expect_sampling_step(declare_sampling(legacy = TRUE, strata = ideo_3, strata_prob = c(.3, .6, .1)), population, n = NA) expect_sampling_step(declare_sampling(legacy = TRUE, strata = ideo_3, strata_n = c(10, 10, 10)), population, n = 30) # Clustered sampling expect_sampling_step(declare_sampling(legacy = TRUE, clusters = villages), population, n = 500) # Stratified and Clustered assignments expect_sampling_step(declare_sampling(legacy = TRUE, clusters = villages, strata = high_elevation), population, n = NA) }) test_that("declare_sampling expected failures via validation fn", { expect_true(is.function(declare_sampling(legacy = TRUE))) expect_error(declare_sampling(strata = "character", legacy = TRUE), "strata") expect_error(declare_sampling(clusters = "character", legacy = TRUE), "clusters") expect_error(declare_sampling(sampling_variable = NULL, legacy = TRUE), "sampling_variable") }) # two by two: keep/drop standard name/non standard name test_that("keep/drop options work with diff sampling names", { desgn <- declare_model(N = 10) + NULL dat1 <- draw_data(desgn + declare_sampling(legacy = TRUE, n = 5)) dat2 <- draw_data(desgn + declare_sampling(legacy = TRUE, n = 5, drop_nonsampled = TRUE)) dat3 <- draw_data(desgn + declare_sampling(legacy = TRUE, n = 5, drop_nonsampled = FALSE)) dat4 <- draw_data(desgn + declare_sampling(legacy = TRUE, n = 5, sampling_variable = "smpld")) dat5 <- draw_data(desgn + declare_sampling(legacy = TRUE, n = 5, sampling_variable = "smpld", drop_nonsampled = TRUE)) dat6 <- draw_data(desgn + declare_sampling(legacy = TRUE, n = 5, sampling_variable = "smpld", drop_nonsampled = FALSE)) # length, which variables expect_equal(nrow(dat1), 5) expect_equal(nrow(dat2), 5) expect_equal(nrow(dat3), 10) expect_false("S" %in% names(dat1)) expect_false("S" %in% names(dat2)) expect_true("S" %in% names(dat3)) expect_equal(nrow(dat4), 5) expect_equal(nrow(dat5), 5) expect_equal(nrow(dat6), 10) expect_false("smpld" %in% names(dat4)) expect_false("smpld" %in% names(dat5)) expect_true("smpld" %in% names(dat6)) })