# Tests for survey analysis functions library(testthat) library(janitor) library(labelled) library(dplyr) x <- data.frame( # 4th person didn't respond at all unrelated = 1:5, # not part of the question q1_1 = c("a", "a", NA, NA, NA), q1_2 = c("b", "b", NA, NA, NA), q1_3 = c(NA, NA, "c", NA, NA), q1_4 = c(NA, NA, NA, NA, NA), # no one selected this choice q1_other = c("horses", NA, NA, NA, "only other for this person"), stringsAsFactors = FALSE ) # First, test without "other" column treated_x <- x %>% check_all_recode(q1_1:q1_4) tabulated_x <- x %>% check_all_recode(q1_1:q1_4) %>% check_all_count(q1_1:q1_4) test_that("treatment performs as expected, including setting labels", { approx <- data.frame( unrelated = 1:5, q1_1 = c(1, 1, 0, NA, NA), q1_2 = c(1, 1, 0, NA, NA), q1_3 = c(0, 0, 1, NA, NA), q1_4 = c(rep(0, 3), NA, NA), q1_other = c("horses", NA, NA, NA, "only other for this person"), stringsAsFactors = FALSE ) var_label(approx) <- list(unrelated = NULL, q1_1 = "a", q1_2 = "b", q1_3 = "c", q1_4 = NA_character_) # need to add variable labels to match the result of treatment expect_equal(treated_x, approx) }) test_that("tabulation performs as expected", { expect_equal( tabulated_x, data.frame( response = c("a", "b", "c", "q1_4"), n = c(2, 2, 1, 0), percent = c(2 / 3, 2 / 3, 1 / 3, 0), stringsAsFactors = FALSE ) %>% as_tabyl(., 1) ) }) ## Now test with "other" column treated_x_other <- suppressWarnings( check_all_recode( x, contains("q1") ) ) expect_warning(x %>% check_all_recode(contains("q1")), "has multiple values") tabulated_x_other <- treated_x_other %>% check_all_count(contains("q1")) test_that("treatment performs as expected, including setting labels", { approx_other <- data.frame( unrelated = 1:5, q1_1 = c(1, 1, 0, NA, 0), q1_2 = c(1, 1, 0, NA, 0), q1_3 = c(0, 0, 1, NA, 0), q1_4 = c(rep(0, 3), NA, 0), q1_other = c(1, 0, 0, NA, 1), stringsAsFactors = FALSE ) var_label(approx_other) <- list(unrelated = NULL, q1_1 = "a", q1_2 = "b", q1_3 = "c", q1_4 = NA_character_, q1_other = "Other") expect_equal(treated_x_other, approx_other) }) test_that("tabulation performs as expected", { expect_equal(tabulated_x_other, data.frame( response = c("a", "b", "c", "q1_4", "Other"), n = c(2, 2, 1, 0, 2), stringsAsFactors = FALSE ) %>% mutate(percent = n / 4) %>% as_tabyl(., 1)) }) test_that("select helpers work", { expect_equal( suppressWarnings(x %>% check_all_recode(q1_1:q1_other) %>% check_all_count(q1_1:q1_other)), suppressWarnings(x %>% check_all_recode(contains("q1")) %>% check_all_count(contains("q1"))) ) }) test_that("bad inputs are caught", { expect_error( check_all_recode(x, contains("not_there")), "no columns selected; check your variable name specification" ) }) vec <- c("Strongly agree", "Agree", "Somewhat agree", "Somewhat disagree", "Strongly disagree", "Frogs", NA) vec_fac <- factor(vec, levels = vec) test_that("recode default parameters are correct", { expect_equal(recode_to_binary(vec), factor(c("Selected", "Selected", rep("Not selected", 4), NA), levels = c("Selected", "Not selected", NA))) }) test_that("recode produces intended result", { expect_equal(recode_to_binary(vec, label_matched = "Top-2", label_unmatched = "Not in Top-2"), factor(c("Top-2", "Top-2", rep("Not in Top-2", 4), NA), levels = c("Top-2", "Not in Top-2", NA))) expect_equal(recode_to_binary(vec, "frogs", label_matched = "Top-2", label_unmatched = "Not in Top-2"), factor(c(rep("Not in Top-2", 5), "Top-2", NA), levels = c("Top-2", "Not in Top-2", NA))) expect_equal(recode_to_binary(vec, c("unrelated term", "frogs"), label_matched = "Top-2", label_unmatched = "Not in Top-2"), factor(c(rep("Not in Top-2", 5), "Top-2", NA), levels = c("Top-2", "Not in Top-2", NA))) }) test_that("same result on factor and character", { expect_equal(recode_to_binary(vec), recode_to_binary(vec_fac)) }) test_that("recode produces correct warning and result when nothing is found to recode", { expect_equal( suppressWarnings( recode_to_binary(vec, "not in the vector", label_matched = "Top-2", label_unmatched = "Not in Top-2") ), factor(c(rep("Not in Top-2", 6), NA), levels = c("Top-2", "Not in Top-2", NA)) ) expect_warning(recode_to_binary(vec, c("totally", "not in the vector")), "no instances of \"totally\", \"not in the vector\" found in x") }) # TODO, if/when we use the label attribute to capture survey question text: check that column attributes are retained? test_that("label attributes are skipped when set_labels = FALSE", { no_labels <- suppressWarnings(x %>% check_all_recode(contains("q1"), set_labels = FALSE)) expect_equal( var_label(no_labels), list(unrelated = NULL, q1_1 = NULL, q1_2 = NULL, q1_3 = NULL, q1_4 = NULL, q1_other = NULL) ) expect_equal( no_labels %>% check_all_count(contains("q1")), data.frame( response = c("q1_1", "q1_2", "q1_3", "q1_4", "q1_other"), n = c(2, 2, 1, 0, 2), percent = c(2 / 4, 2 / 4, 1 / 4, 0, 2 / 4), stringsAsFactors = FALSE ) %>% as_tabyl(1) ) }) test_that("bad inputs error or warn appropriately", { expect_error(check_all_count(mtcars, cyl:carb), "input vectors should only have values of 0, 1, and NA; run check_all_recode() before calling this function", fixed = TRUE ) mtcars %>% check_all_recode(cyl:disp) |> expect_warning("column 1 has multiple values besides NA; not sure which is the question text. Guessing this an \"Other (please specify)\" column.", fixed = TRUE) |> expect_warning("column 2 has multiple values besides NA; not sure which is the question text. Guessing this an \"Other (please specify)\" column.", fixed = TRUE) })