outbreaks <- c("MeAsles", "CHolera", "AjS", "meningitis", 'Cholera_intersectional', 'Measles_intersectional', 'Meningitis_intersectional', 'AJS_intersectional', 'Diphtheria_intersectional') surveys <- c("MOrtality", "VaCcination_ShoRt", 'Vaccination_LonG', "NutritIon", 'ebs') # Functions for checking age columns get_ages <- function(x) x[grepl("age_(year|month|day)s?$", names(x), perl = TRUE)] check_exclusive_ages <- function(x, n = 300) { # Check that the age columns are all cromulent. # There should be a total of `n` ages, but no more, indicating that they # are all exclusive # Alex: changed to less than or equal because have NAs now all(vapply(x, function(i) sum(i > -1, na.rm = TRUE) <= n, logical(1))) } check_age_integers <- function(x) { all(vapply(x, is.integer, logical(1))) } test_that("errors are thrown if the wrong dicts are used", { expect_error(gen_data("Dada"), "'dictionary' must be one of: 'Cholera', 'Measles', 'Meningitis', 'AJS', 'Cholera_intersectional', 'Measles_intersectional', 'Meningitis_intersectional', 'AJS_intersectional', 'Diphtheria_intersectional', 'Mortality', 'Nutrition', 'Vaccination_long', 'Vaccination_short', 'ebs'", fixed = TRUE ) }) test_that("msf_dict works", { for (disease in c(outbreaks)) { nested <- msf_dict(disease) long <- msf_dict(disease, compact = FALSE) neither <- msf_dict(disease, compact = FALSE, long = FALSE) expect_is(nested, "tbl_df", label = disease) expect_is(long, "tbl_df", label = disease) # neither compact nor long is a list of dictionaries and options expect_is(neither, "list", label = disease) expect_named(neither, c("dictionary", "options"), label = disease) # None of the option names have bracketed arguments in front of them expect_false(all(grepl("^\\[", long$option_name))) expect_false(all(grepl("^\\[", neither$options$option_name))) # rows in the dictionary should equal rows in the nested dictionary expect_equal(nrow(neither$dictionary), nrow(nested), label = disease) # the number of columns in the long data should be equal to the sum of the # columns minus 1 in the separated data sets expect_equal(ncol(long), sum(vapply(neither, ncol, integer(1))) - 1L, label = disease) # the number of columns in the dictionary should be one less than the number # of columns in the nested data. expect_equal(ncol(neither$dictionary) + 1L, ncol(nested), label = disease, info = disease) } }) test_that("msf_dict works for surveys", { for (type in surveys) { nested <- msf_dict(type, compact = TRUE) long <- msf_dict(type, compact = FALSE) # a tibble is produced expect_is(nested, "tbl_df", label = type) expect_is(long, "tbl_df", label = type) # the correct number of variables produced in both formats expect_gt(nrow(long), nrow(nested), label = type) expect_gt(ncol(long), ncol(nested), label = type) } }) test_that("outbreak data can be generated", { for (disease in c(outbreaks)) { varnames = ifelse(grepl("_intersectional", disease), "name", "data_element_shortname") dictionary <- msf_dict(disease) data <- gen_data(disease, varnames = varnames, numcases = 300) # a tibble is produced expect_is(data, "tbl_df", label = disease) # data produced has same number vars as dictionary expect_equal(nrow(dictionary), ncol(data), label = disease) # ages are all appropriate (functions defined at top) expect_true(check_exclusive_ages(get_ages(data), 300)) expect_true(check_age_integers(get_ages(data))) } }) test_that("survey data can be generated", { for (disease in surveys) { dictionary <- msf_dict(disease) data <- gen_data(disease, varnames = "name", numcases = 300) # check that produces a tibble expect_is(data, "tbl_df", label = disease) # ages are all appropriate (functions defined at top) expect_true(check_exclusive_ages(get_ages(data), 300)) expect_true(check_age_integers(get_ages(data))) if (disease != "ebs") { # define which var is eligible and interviewed for each dictionary eligible <- ifelse(tolower(disease) == "mortality", "member_number", "number_children") # check appropriate numbers (existing) for eligible # (interviewed doesnt exist anymore) # we now have NAs due to adding in non-response # (i.e. those who dont consent dont get filled in) expect_true(sum(data[[eligible]], na.rm = TRUE) > 0) } # pull together how many variables there should be # drop type "note" and count those with value_type "select_one" or NA base_count <- sum(dictionary$value_type %in% c("select_one", NA) & dictionary$type != "note" ) # add the select_multiples multiple_vars <- which(dictionary$value_type == "select_multiple") multiple_count <- 0 for (i in multiple_vars) { # original variable also added (therefor add one) nums <- nrow(dictionary$options[i][[1]]) + 1 multiple_count <- multiple_count + nums } # add the extra IDs generated id_counts <- ifelse(disease != "ebs", 3, 0) total_counts <- base_count + multiple_count + id_counts # generating the correct number of variables expect_equal(total_counts, ncol(data), label = disease) } })