context("test-optimum_allocation") library(dplyr) library(optimall) data <- data.frame( "strata" = c( rep("a", times = 15), rep("b", times = 15), rep("c", times = 12) ), "y" = c(rnorm(30, sd = 1), rnorm(12, sd = 2)), "key" = rbinom(42, 1, 0.2) ) test_that("Neyman Allocation works", { nsd_vec <- c( length(data[data$strata == "a", "y"]) * sd(data[data$strata == "a", "y"]), length(data[data$strata == "b", "y"]) * sd(data[data$strata == "b", "y"]), length(data[data$strata == "c", "y"]) * sd(data[data$strata == "c", "y"]) ) expect_equal( optimum_allocation( data = data, strata = "strata", y = "y", method = "Neyman" )$n_sd, round(nsd_vec, digits = 2) ) expect_equal( optimum_allocation( data = data, strata = "strata", y = "y", method = "Neyman" )$stratum_fraction, round(nsd_vec / sum(nsd_vec), digits = 2) ) }) test_that("WrightI and WrightII work", { expect_equal( optimum_allocation( data = data, strata = "strata", y = "y", nsample = 10, method = "WrightI" )$stratum_size, optimum_allocation( data = data, strata = "strata", y = "y", nsample = 10, method = "WrightII" )$stratum_size) expect_equal( optimum_allocation( data = data, strata = "strata", y = "y", nsample = 10, method = "WrightII" )$stratum_size, optimum_allocation( data = data, strata = "strata", y = "y", nsample = 10, method = "Neyman" )$stratum_size ) # Should agree in this simple case expect_equal( sum(optimum_allocation( data = data, strata = "strata", y = "y", nsample = 15, method = "WrightII" )$stratum_size), 15 ) }) test_that("Output agrees whether input is matrix, df, or tibble", { data_mat <- as.matrix(data.frame( "strata" = c( rep(1, times = 15), rep(2, times = 15), rep(3, times = 12) ), "y" = data$y, "key" = data$key )) data_tib <- dplyr::as_tibble(data) expect_equal( optimum_allocation( data = data, strata = "strata", y = "y", nsample = 10 )$stratum_size, optimum_allocation( data = data_mat, strata = "strata", y = "y", nsample = 10 )$stratum_size) expect_equal( optimum_allocation( data = data_mat, strata = "strata", y = "y", nsample = 10 )$stratum_size, optimum_allocation( data = data_tib, strata = "strata", y = "y", nsample = 10 )$stratum_size ) }) test_that("optimum_allocation prints error message when 'y' is not numeric", { data2 <- dplyr::mutate(data, y = as.factor(y)) expect_error( optimum_allocation( data = data2, y = "y", strata = "strata", nsample = 10 ), "'y' must be numeric." ) }) test_that("'nsample' argument of optimum_allocation can't be less than or equal to zero, but it can be larger than the population of the dataset if the method is Neyman", { expect_error( optimum_allocation( data = data, y = "y", strata = "strata", method = "WrightII", nsample = 0 ), "'nsample' is too small for this method" ) expect_error( optimum_allocation( data = data, y = "y", strata = "strata", method = "WrightII", nsample = 50 ), "'nsample' is larger than population size" ) }) test_that("multiple strings in the 'strata' argument lead to the creation of new strata based on their interaction", { data$strata2 <- rbinom(42, 1, 0.5) names(data)[names(data) == "strata"] <- "strata3" expect_equal( as.character(optimum_allocation( data = data, strata = c( "strata3", "strata2" ), y = "y", nsample = 30 )$strata), c("a.0", "b.0", "c.0", "a.1", "b.1", "c.1") ) }) test_that("Error if not enough non-NA observations in a stratum", { data3 <- data %>% dplyr::mutate(y = ifelse(strata == "a", NA, y)) expect_error( optimum_allocation( data = data3, strata = "strata", y = "y", method = "Neyman", allow.na = TRUE ), "Function requires at least two observations per stratum" ) }) ## Tests for simple version with N_h and sd_h short_data <- data.frame( strata = c("a", "b", "c"), size = c(15, 15, 12), sd = c( sd(data[data$strata == "a", "y"]), sd(data[data$strata == "b", "y"]), sd(data[data$strata == "c", "y"]) ) ) nsd_vec <- short_data$size * short_data$sd test_that("Neyman Allocation works", { expect_equal( optimum_allocation( data = short_data, strata = "strata", sd_h = "sd", N_h = "size", method = "Neyman" )$n_sd, round(nsd_vec, digits = 2) ) expect_equal( optimum_allocation( data = short_data, strata = "strata", sd_h = "sd", N_h = "size", method = "Neyman" )$stratum_fraction, round(nsd_vec / sum(nsd_vec), digits = 2) ) }) test_that("WrightI and WrightII work", { expect_equal( optimum_allocation( data = short_data, strata = "strata", N_h = "size", sd_h = "sd", nsample = 10, method = "WrightI" )$stratum_size, optimum_allocation( data = short_data, strata = "strata", N_h = "size", sd_h = "sd", nsample = 10, method = "WrightII" )$stratum_size) expect_equal( optimum_allocation( data = short_data, strata = "strata", N_h = "size", sd_h = "sd", nsample = 10, method = "WrightII" )$stratum_size, optimum_allocation( data = data, strata = "strata", y = "y", nsample = 10, method = "Neyman" )$stratum_size ) # Should agree in this simple case expect_equal( sum(optimum_allocation( data = data, strata = "strata", y = "y", nsample = 15, method = "WrightII" )$stratum_size), 15 ) }) test_that("Errors work for sd_h and N_h version", { short_data3 <- short_data short_data3$y <- c(34, 20, 30) expect_error( optimum_allocation(short_data3, strata = "strata", y = "y", sd_h = "sd", N_h = "size" ), "One and only one of" ) expect_error( optimum_allocation(short_data3, strata = "strata", y = "y", N_h = "size" ), "If 'sd_h' is NULL, 'N_h' should also be NULL" ) new_row <- c("b", 15, 0.953, 25) short_data3 <- rbind(short_data3, new_row) short_data3$size <- as.numeric(short_data3$size) short_data3$sd <- as.numeric(short_data3$sd) expect_error( optimum_allocation(short_data3, strata = "strata", sd_h = "sd", N_h = "size" ), "data must only contain one row per stratum" ) }) test_that("Output agrees whether input is matrix, df, or tibble", { data_mat <- as.matrix(data.frame( "strata" = c(1, 2, 3), "size" = c(15, 15, 12), "sd" = c( sd(data[data$strata == "a", "y"]), sd(data[data$strata == "b", "y"]), sd(data[data$strata == "c", "y"]) ) )) data_tib <- dplyr::as_tibble(short_data) expect_equal( optimum_allocation( data = short_data, strata = "strata", N_h = "size", sd_h = "sd", nsample = 10 )$stratum_size, optimum_allocation( data = data_mat, strata = "strata", N_h = "size", sd_h = "sd", nsample = 10 )$stratum_size) expect_equal(optimum_allocation( data = data_mat, strata = "strata", N_h = "size", sd_h = "sd", nsample = 10 )$stratum_size, optimum_allocation( data = data_tib, strata = "strata", N_h = "size", sd_h = "sd", nsample = 10 )$stratum_size ) }) test_that("'nsample' argument of optimum_allocation can't be less than or equal to zero, but it can be larger than the population of the dataset if the method is Neyman", { expect_error( optimum_allocation( data = short_data, N_h = "size", sd_h = "sd", strata = "strata", method = "WrightII", nsample = 0 ), "'nsample' is too small for this method" ) expect_error( optimum_allocation( data = short_data, N_h = "size", sd_h = "sd", strata = "strata", method = "WrightII", nsample = 50 ), "'nsample' is larger than population size" ) }) test_that("multiple strings in the 'strata' argument lead to the creation of new strata based on their interaction", { short_data4 <- rbind(short_data, short_data) short_data4$strata2 <- c(0, 1, 0, 1, 0, 1) expect_equal( as.character(optimum_allocation( data = short_data4, strata = c("strata", "strata2"), N_h = "size", sd_h = "sd", nsample = 30 )$strata), c("a.0", "b.0", "c.0", "a.1", "b.1", "c.1") ) })