outbreaks <- c("MeAsles", "CHolera", "AjS", "meningitis", 'Cholera_intersectional',
               'Measles_intersectional', 'Meningitis_intersectional', 'AJS_intersectional',
               'Diphtheria_intersectional')
surveys   <- c("MOrtality", "VaCcination_ShoRt", 'Vaccination_LonG', "NutritIon", 'ebs')

# Functions for checking age columns
get_ages <- function(x) x[grepl("age_(year|month|day)s?$", names(x), perl = TRUE)]
check_exclusive_ages <- function(x, n = 300) {
  # Check that the age columns are all cromulent.
  # There should be a total of `n` ages, but no more, indicating that they
  # are all exclusive
  # Alex: changed to less than or equal because have NAs now
  all(vapply(x, function(i) sum(i > -1, na.rm = TRUE) <= n, logical(1)))
}
check_age_integers <- function(x) {
  all(vapply(x, is.integer, logical(1)))
}

test_that("errors are thrown if the wrong dicts are used", {

  expect_error(gen_data("Dada"),
               "'dictionary' must be one of: 'Cholera', 'Measles', 'Meningitis', 'AJS', 'Cholera_intersectional', 'Measles_intersectional', 'Meningitis_intersectional', 'AJS_intersectional', 'Diphtheria_intersectional', 'Mortality', 'Nutrition', 'Vaccination_long', 'Vaccination_short', 'ebs'",
    fixed = TRUE
  )

})

test_that("msf_dict works", {

  for (disease in c(outbreaks)) {
    nested  <- msf_dict(disease)
    long    <- msf_dict(disease, compact = FALSE)
    neither <- msf_dict(disease, compact = FALSE, long = FALSE)

    expect_is(nested, "tbl_df", label = disease)
    expect_is(long, "tbl_df", label = disease)

    # neither compact nor long is a list of dictionaries and options
    expect_is(neither, "list", label = disease)
    expect_named(neither, c("dictionary", "options"), label = disease)

    # None of the option names have bracketed arguments in front of them
    expect_false(all(grepl("^\\[", long$option_name)))
    expect_false(all(grepl("^\\[", neither$options$option_name)))

    # rows in the dictionary should equal rows in the nested dictionary
    expect_equal(nrow(neither$dictionary), nrow(nested), label = disease)

    # the number of columns in the long data should be equal to the sum of the
    # columns minus 1 in the separated data sets
    expect_equal(ncol(long), sum(vapply(neither, ncol, integer(1))) - 1L, label = disease)

    # the number of columns in the dictionary should be one less than the number
    # of columns in the nested data.
    expect_equal(ncol(neither$dictionary) + 1L, ncol(nested), label = disease, info = disease)
  }

})

test_that("msf_dict works for surveys", {

  for (type in surveys) {
    nested <- msf_dict(type, compact = TRUE)
    long   <- msf_dict(type, compact = FALSE)

    # a tibble is produced
    expect_is(nested, "tbl_df", label = type)
    expect_is(long, "tbl_df", label = type)

    # the correct number of variables produced in both formats
    expect_gt(nrow(long), nrow(nested), label = type)
    expect_gt(ncol(long), ncol(nested), label = type)

  }

})

test_that("outbreak data can be generated", {

  for (disease in c(outbreaks)) {

    varnames = ifelse(grepl("_intersectional", disease), "name", "data_element_shortname")

    dictionary <- msf_dict(disease)
    data       <- gen_data(disease, varnames = varnames, numcases = 300)

    # a tibble is produced
    expect_is(data, "tbl_df", label = disease)

    # data produced has same number vars as dictionary
    expect_equal(nrow(dictionary), ncol(data), label = disease)

    # ages are all appropriate (functions defined at top)
    expect_true(check_exclusive_ages(get_ages(data), 300))
    expect_true(check_age_integers(get_ages(data)))
  }
})

test_that("survey data can be generated", {

  for (disease in surveys) {
    dictionary <- msf_dict(disease)
    data       <- gen_data(disease, varnames = "name", numcases = 300)

    # check that produces a tibble
    expect_is(data, "tbl_df", label = disease)

    # ages are all appropriate (functions defined at top)
    expect_true(check_exclusive_ages(get_ages(data), 300))
    expect_true(check_age_integers(get_ages(data)))


    if (disease != "ebs") {
      # define which var is eligible and interviewed for each dictionary
      eligible <- ifelse(tolower(disease) == "mortality",
                         "member_number",
                         "number_children")

      # check appropriate numbers (existing) for eligible
      # (interviewed doesnt exist anymore)
      # we now have NAs due to adding in non-response
      # (i.e. those who dont consent dont get filled in)
      expect_true(sum(data[[eligible]], na.rm = TRUE) > 0)

    }


    # pull together how many variables there should be

    # drop type "note" and count those with value_type "select_one" or NA
    base_count <- sum(dictionary$value_type %in% c("select_one", NA) &
                        dictionary$type != "note"
                        )

    # add the select_multiples
    multiple_vars <- which(dictionary$value_type == "select_multiple")

    multiple_count <- 0
    for (i in multiple_vars) {
      # original variable also added (therefor add one)
      nums <- nrow(dictionary$options[i][[1]]) + 1
      multiple_count <- multiple_count + nums
    }

    # add the extra IDs generated
    id_counts <- ifelse(disease != "ebs", 3, 0)

    total_counts <- base_count + multiple_count + id_counts

    # generating the correct number of variables
    expect_equal(total_counts, ncol(data), label = disease)
  }
})