# get dataset
data("fake_epi_df_togo")
# drop na
fake_epi_df_togo2 <- fake_epi_df_togo |> tidyr::drop_na()
# select cols
fake_epi_df_togo <- fake_epi_df_togo |>
dplyr::select(tidyselect::matches("malaria|cholera"))
# Test 1: Check if outliers are identified correctly with Z-Score
testthat::test_that("Z-Score method identifies outliers correctly", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo,
vars = "malaria_cases",
method = "zscore",
report_mode = TRUE)
testthat::expect_true(any(res$report$outliers == "5/900"))
})
# Test 2: Check if outliers are identified correctly with modified Z-Score
testthat::test_that("Modified Z-Score method identifies outliers correctly", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo,
vars = "malaria_cases",
method = "mod_zscore",
report_mode = TRUE)
testthat::expect_true(any(res$report$outliers == "0/900"))
})
# Test 3: Check if outliers are identified correctly with IQR method
testthat::test_that("IQR method identifies outliers correctly", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo,
vars = "malaria_cases",
method = "iqr_method",
report_mode = TRUE)
testthat::expect_true(any(res$report$outliers == "9/900"))
})
# Test 4: Check if outliers are removed correctly
testthat::test_that("Outliers are removed correctly", {
exected_max <- fake_epi_df_togo |>
tidyr::drop_na() |>
summarise(max(malaria_cases)) |> dplyr::pull()
res <- epiCleanr::handle_outliers(fake_epi_df_togo,
method = "zscore",
treat_method = "remove")
actual_max <- res |>
tidyr::drop_na() |>
summarise(max(malaria_cases)) |> dplyr::pull()
testthat::expect_true(all(exected_max > actual_max))
})
# Test 5: Check if report_mode returns a list with a dataframe and a ggplot object
testthat::test_that("Report mode returns correct types", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo, report_mode = TRUE)
testthat::expect_equal(typeof(res), as.character("list"))
testthat::expect_equal(typeof(res$report), "list")
testthat::expect_s3_class(res$plot, "ggplot")
})
# Test 5: Check if function correctly handles non-numeric columns
testthat::test_that("Function handles non-numeric columns", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo2,
vars = c("district", "malaria_tests"),
report_mode = TRUE)
testthat::expect_false("district" %in% colnames(res$report))
})
# Test 6: Check if outliers are replaced with NA when treat_method = "remove"
testthat::test_that(
"Outliers are replaced with NA when treat_method is 'remove'", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo2, method = "zscore",
treat_method = "remove")
testthat::expect_true(any(is.na(res$malaria_tests)))
testthat::expect_true(any(is.na(res$malaria_cases)))
})
# Test 7: Check if outliers are replaced with mean when treat_method = "mean"
testthat::test_that(
"Outliers are replaced with mean when treat_method is 'mean'", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo2,
method = "zscore", treat_method = "mean")
actual_mean_tests <- res |>
dplyr::summarise(mean(malaria_tests)) |> dplyr::pull()
expected_mean_tests <- fake_epi_df_togo2 |>
dplyr::summarise(mean(malaria_tests)) |> dplyr::pull()
testthat::expect_lt(as.numeric(actual_mean_tests),
as.numeric(expected_mean_tests))
})
# Test 8: Check if outliers are replaced with median when treat_method ="median"
testthat::test_that(
"Outliers are replaced with median when treat_method is 'mean'", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo2,
method = "zscore", treat_method = "median")
actual_mean_tests <- res |>
dplyr::summarise(median(malaria_tests)) |> dplyr::pull()
expected_mean_tests <- fake_epi_df_togo2 |>
dplyr::summarise(median(malaria_tests)) |> dplyr::pull()
testthat::expect_equal(as.numeric(actual_mean_tests),
as.numeric(expected_mean_tests))
})
# Test 9: Check if outliers are replaced with quantile when
# treat_method="quantile"
testthat::test_that(
"Outliers are replaced with median when treat_method is 'quantile'", {
res <- epiCleanr::handle_outliers(fake_epi_df_togo2,
method = "zscore",
treat_method = "quantile")
actual_mean_tests <- res |>
dplyr::summarise(median(malaria_tests)) |> dplyr::pull()
expected_mean_tests <- fake_epi_df_togo2 |>
dplyr::summarise(median(malaria_tests)) |> dplyr::pull()
testthat::expect_equal(as.numeric(actual_mean_tests),
as.numeric(expected_mean_tests))
})
# Test 10: Check if outliers are replaced with grouped_mean when
# treat_method="grouped_mean"
testthat::test_that(
"Outliers are replaced with median when treat_method is 'grouped_mean'", {
res <- epiCleanr::handle_outliers(
fake_epi_df_togo2,
method = "zscore",
grouping_vars = c("district", "year", "month"),
treat_method = "grouped_mean")
actual_mean_tests <- res |>
dplyr::summarise(median(malaria_tests)) |> dplyr::pull()
expected_mean_tests <- fake_epi_df_togo2 |>
dplyr::summarise(median(malaria_tests)) |> dplyr::pull()
testthat::expect_equal(as.numeric(actual_mean_tests),
as.numeric(expected_mean_tests))
})
# Test 11: Check for error when grouped_mean is declared but no groups are given
testthat::test_that(
"Grouped_mean is declared but no groups are given'", {
testthat::expect_error(
epiCleanr::handle_outliers(fake_epi_df_togo,
method = "zscore",
treat_method = "grouped_mean")
)
})
# Test 12: Check for error when invalid treat method is given
testthat::test_that(
"Check for error when invalid treat method is given", {
testthat::expect_error(
epiCleanr::handle_outliers(fake_epi_df_togo,
method = "zscore",
treat_method = "multiple_imputation"),
"Unknown treat_method: multiple_imputation"
)
})
# Test 13: Check 'grouped_mean' treatment in handle_outliers
testthat::test_that(
"Check 'grouped_mean' treatment in handle_outliers function", {
# Run the handle_outliers function with treat_method = "grouped_mean"
result <- epiCleanr::handle_outliers(
fake_epi_df_togo2,
method = "zscore",
vars = "malaria_cases",
grouping_vars = c("district", "year", "month"),
treat_method = "grouped_mean")
# Create what you expect grouped_mean_vals to look like
expected_means <- fake_epi_df_togo2 %>%
dplyr::group_by(district, year, month) %>%
dplyr::reframe(
dplyr::across(malaria_cases, \(x) mean(x, na.rm = TRUE))) |>
dplyr::ungroup() |> dplyr::pull()
exp <- as.numeric(mean(result$malaria_cases))
act <- as.numeric(mean(expected_means))
testthat::expect_true(exp < act)
})
# Test 14: Check behavior when method is NULL or has length greater than 1
testthat::test_that(
"Check behavior when method is NULL or has length greater than 1", {
# Execute the function with method = NULL
result1 <- epiCleanr::handle_outliers(fake_epi_df_togo, method = NULL,
report_mode = TRUE)
title = paste0(
"",
"Outlier Plot (Method: Modified Z-Score )",
": Outliers in orange",
", Non-outliers in blue"
)
# Validate that the chosen_df and title_suffix are as expected
testthat::expect_equal(result1$plot$labels$title, title)
testthat::expect_equal(result1$report$test[7], "Modified Z-Score")
# Execute the function with method having a length greater than 1
result2 <- epiCleanr::handle_outliers(fake_epi_df_togo,
method = c("zscore", "modified_zscore"),
report_mode = TRUE)
title2 = paste0(
"",
"Outlier Plot (Method: Z-Score )",
": Outliers in orange",
", Non-outliers in blue"
)
# Validate that the chosen_df and title_suffix are as expected
testthat::expect_equal(result2$plot$labels$title, title2)
})