expect_equal_saved_prep <- function(
  metadata,
  is_aggregated = TRUE,
  time_freq = NULL,
  file = NULL
) {

  workflow <- setup_test_workflow(
    metadata = metadata,
    is_aggregated = is_aggregated,
    time_freq = time_freq,
    link_geo = NULL,
    link = FALSE
  )

  saved <- paste0(
    "snapshots/data_processing/",
    file
  ) %>%
    testthat::test_path() %>%
    read_saved_csv()

  expect_equal(
    workflow$preprocessed_data(),
    saved,
    tolerance = 0.01,
    ignore_attr = TRUE
  )
}

test_that("prepprocess is consistent", {
  skip_on_cran()

  set.seed(123)

  # individual-level COVID data
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = TRUE,
      special_case = "covid",
      family = "binomial"
    ),
    is_aggregated = FALSE,
    time_freq = "week",
    file = "covid_binomial_indiv.csv"
  )

  # aggregated COVID data
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = TRUE,
      special_case = "covid",
      family = "binomial"
    ),
    is_aggregated = TRUE,
    time_freq = NULL,
    file = "covid_binomial_agg.csv"
  )

  # individual-level general time-varying data
  # with binary outcome
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = TRUE,
      special_case = NULL,
      family = "binomial"
    ),
    is_aggregated = FALSE,
    time_freq = "week",
    file = "timevar_binomial_indiv.csv"
  )

  # aggregated general time-varying data
  # with binary outcome
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = TRUE,
      special_case = NULL,
      family = "binomial"
    ),
    is_aggregated = TRUE,
    time_freq = NULL,
    file = "timevar_binomial_agg.csv"
  )

  # individual-level general time-varying data
  # with continuous outcome
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = TRUE,
      special_case = NULL,
      family = "normal"
    ),
    is_aggregated = FALSE,
    time_freq = "week",
    file = "timevar_normal_indiv.csv"
  )

  # individual-level polling data
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = FALSE,
      special_case = "poll",
      family = "binomial"
    ),
    is_aggregated = FALSE,
    time_freq = NULL,
    file = "poll_binomial_indiv.csv"
  )

  # aggregated polling data
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = FALSE,
      special_case = "poll",
      family = "binomial"
    ),
    is_aggregated = TRUE,
    time_freq = NULL,
    file = "poll_binomial_agg.csv"
  )

  # individual-level general cross-sectional data
  # with binary outcome
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = FALSE,
      special_case = NULL,
      family = "binomial"
    ),
    is_aggregated = FALSE,
    time_freq =  NULL,
    file = "crosssec_binomial_indiv.csv"
  )

  # aggregated general cross-sectional data
  # with binary outcome
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = FALSE,
      special_case = NULL,
      family = "binomial"
    ),
    is_aggregated = TRUE,
    time_freq = NULL,
    file = "crosssec_binomial_agg.csv"
  )

  # individual-level general cross-sectional data
  # with continuous outcome
  expect_equal_saved_prep(
    metadata = list(
      is_timevar = FALSE,
      special_case = NULL,
      family = "normal"
    ),
    is_aggregated = FALSE,
    time_freq = NULL,
    file = "crosssec_normal_indiv.csv"
  )

})

test_that("link_acs works with all linking geographies", {
  skip_on_cran()

  # No linking geography
  expect_no_error(
    setup_test_workflow(
      metadata = list(
        is_timevar = FALSE,
        special_case = NULL,
        family = "binomial"
      ),
      is_aggregated = TRUE,
      time_freq = NULL,
      link_geo = NULL,
      link = TRUE
    )
  )

  # Linking through zip
  expect_no_error(
    setup_test_workflow(
      metadata = list(
        is_timevar = FALSE,
        special_case = NULL,
        family = "binomial"
      ),
      is_aggregated = TRUE,
      time_freq = NULL,
      link_geo = "zip",
      link = TRUE
    )
  )

  # Linking through county
  expect_no_error(
    setup_test_workflow(
      metadata = list(
        is_timevar = FALSE,
        special_case = NULL,
        family = "binomial"
      ),
      is_aggregated = TRUE,
      time_freq = NULL,
      link_geo = "county",
      link = TRUE
    )
  )

  # Linking through state
  expect_no_error(
    setup_test_workflow(
      metadata = list(
        is_timevar = FALSE,
        special_case = NULL,
        family = "binomial"
      ),
      is_aggregated = TRUE,
      time_freq = NULL,
      link_geo = "state",
      link = TRUE
    )
  )

})


test_that("load_pstrat works", {
  skip_on_cran()

  pstrat_data <- example_pstrat_data()

  # For general time-varying data
  # with binary outcome
  workflow <- setup_test_workflow(
    metadata = list(
      is_timevar = TRUE,
      special_case = NULL,
      family = "binomial"
    ),
    is_aggregated = TRUE,
    time_freq = NULL,
    link = FALSE
  )

  capture.output({
    workflow$load_pstrat(pstrat_data)
  }, type = "message")
  expect_no_error(workflow$demo_bars("sex"))


  # For general time-varying data
  # with continuous outcome
  workflow <- setup_test_workflow(
    metadata = list(
      is_timevar = TRUE,
      special_case = NULL,
      family = "normal"
    ),
    is_aggregated = FALSE,
    time_freq = "week",
    link = FALSE
  )

  capture.output({
    workflow$load_pstrat(pstrat_data)
  }, type = "message")
  expect_no_error(workflow$demo_bars("sex"))

  # For general cross-sectional data
  # with binary outcome
  workflow <- setup_test_workflow(
    metadata = list(
      is_timevar = FALSE,
      special_case = NULL,
      family = "binomial"
    ),
    is_aggregated = TRUE,
    time_freq = NULL,
    link = FALSE
  )

  capture.output({
    workflow$load_pstrat(pstrat_data)
  }, type = "message")
  expect_no_error(workflow$demo_bars("sex"))


  # For general cross-sectional data
  # with continuous outcome
  workflow <- setup_test_workflow(
    metadata = list(
      is_timevar = FALSE,
      special_case = NULL,
      family = "normal"
    ),
    is_aggregated = FALSE,
    time_freq = NULL,
    link = FALSE
  )

  capture.output({
    workflow$load_pstrat(pstrat_data)
  }, type = "message")
  expect_no_error(workflow$demo_bars("sex"))


  # For COVID data
  workflow <- setup_test_workflow(
    metadata = list(
      is_timevar = TRUE,
      special_case = "covid",
      family = "binomial"
    ),
    is_aggregated = TRUE,
    time_freq = NULL,
    link = FALSE
  )

  expect_error(
    workflow$load_pstrat(pstrat_data),
    "Custom poststratification data is not supported for special cases"
  )

  # For polling data
  workflow <- setup_test_workflow(
    metadata = list(
      is_timevar = FALSE,
      special_case = "poll",
      family = "binomial"
    ),
    link = FALSE
  )

  expect_error(
    workflow$load_pstrat(pstrat_data),
    "Custom poststratification data is not supported for special cases"
  )
})

test_that(".impute is consistent", {
  skip_on_cran()

  set.seed(123)

  n <- 20
  cols <- c("sex", "race", "age")

  data <- example_sample_data(
    is_timevar = FALSE,
    is_aggregated = FALSE,
    special_case = NULL,
    family = "binomial"
  ) %>%
    mutate(
      across(all_of(cols),
      ~ replace(., row_number() <= n, NA))
    )

  workflow <- mrp_workflow()

  capture.output(
    workflow$preprocess(
      data,
      is_timevar = FALSE,
      is_aggregated = FALSE,
      special_case = NULL,
      family = "binomial"
    )
  , type = "message")

  saved <- testthat::test_path("snapshots/data_processing/impute.csv") %>%
    read_saved_csv()

  expect_equal(
    workflow$preprocessed_data(),
    saved
  )

})