# Unit tests for wildfire.R if (!"package:climatehealth" %in% search()) { pkgload::load_all(".", export_all = TRUE, helpers = FALSE, quiet = TRUE) } # Create temp_dir to be used by all WF tests temp_dir <- tempdir() temp_dir <- file.path(temp_dir, "wildfire_tests") if (!file.exists(temp_dir)) dir.create(temp_dir) # Test read_and_format_data WF_TEST_HEALTH <- data.frame( date = c("2020-01-01", "2020-01-02", "2020-01-03"), tmean = c(5.2, 6.1, 4.8), deaths = c(10, 12, 9), region = c("North", "South", "East"), relative_humidity = c(70, 65, 80), wind_speed = c(3.2, 2.8, 4.0), population = c(1000,4000,900), stringsAsFactors = FALSE, regnames = c("North", "South", "East") ) WF_TEST_CLIMATE <- data.frame( date = lubridate::dmy(c("01-01-2020", "02-01-2020", "03/01/2020")), region = c("North", "South", "East"), mean_PM = c(0.0005, 0.0001, 0.00015) ) test_that( "read_and_format_data returns as expected with all columns provided.", { res <- read_and_format_data( health_path = WF_TEST_HEALTH, date_col = "date", mean_temperature_col = "tmean", health_outcome_col = "deaths", region_col = "regnames", rh_col = "relative_humidity", wind_speed_col = "wind_speed", population_col = "population" ) exp_cols = c( "date", "tmean", "health_outcome", "region", "rh", "wind_speed", "year", "month", "day", "dow", "pop" ) expect_true(all(exp_cols %in% colnames(res))) expect_equal(res$day, c(1, 2, 3)) expect_equal(res$month, c(1, 1, 1)) } ) test_that( "read_and_format_data returns as expected with no optional columns.", { res <- read_and_format_data( health_path = WF_TEST_HEALTH, date_col = "date", mean_temperature_col = "tmean", health_outcome_col = "deaths", population_col = "population" ) exp_cols = c( "date", "tmean", "health_outcome", "region", "rh", "wind_speed", "year", "month", "day", "dow", "pop" ) expect_true(all(exp_cols %in% colnames(res))) expect_equal(res$region, rep("no_region", 3)) expect_equal(res$rh, rep(NA, 3)) expect_equal(res$wind_speed, rep(NA, 3)) } ) test_that( "read_and_format_data identifies and converts dates in dmy format.", { WF_DMY_DATA = dplyr::mutate( WF_TEST_HEALTH, date = c("01-01-2020", "02-01-2020", "03/01/2020") ) res <- read_and_format_data( health_path = WF_DMY_DATA, date_col = "date", mean_temperature_col = "tmean", health_outcome_col = "deaths", population_col = "population" ) expect_equal(res$dow, c("Wed", "Thu", "Fri")) } ) test_that( "read_and_format_data works without population_col when no population data are supplied.", { no_pop_data <- dplyr::select(WF_TEST_HEALTH, -population) res <- read_and_format_data( health_path = no_pop_data, date_col = "date", mean_temperature_col = "tmean", health_outcome_col = "deaths" ) expect_false("pop" %in% colnames(res)) expect_true(all(c("date", "tmean", "health_outcome", "region") %in% colnames(res))) } ) # Tests for extract_means_for_geography test_that( "extract_means_for_geography raises when the terra package is not installed.", { with_mocked_bindings( requireNamespace = function(pkg, quietly=T) FALSE, .package = "base", code = expect_error( extract_means_for_geography("", ""), "'terra' is not installed. Run climatehealth::" ) ) } ) test_that("extract_means_for_geography returns expected structure for a tiny NetCDF + shapefile", { # Helper: create a minimal NetCDF with dims lon, lat, time and one variable "pm25" create_test_ncdf <- function(path) { lon <- c(0.25, 0.75) lat <- c(0.25, 0.75) time <- c(0, 1) # "days since 2020-01-01" dim_lon <- ncdf4::ncdim_def("lon", "degrees_east", lon) dim_lat <- ncdf4::ncdim_def("lat", "degrees_north", lat) dim_time <- ncdf4::ncdim_def("time", "days since 2020-01-01", time, unlim = FALSE) var_pm <- ncdf4::ncvar_def( name = "pm25", units = "ug/m3", dim = list(dim_lon, dim_lat, dim_time), missval = -9999, prec = "float" ) nc <- ncdf4::nc_create(path, vars = list(var_pm)) # Array is lon x lat x time, 2 x 2 x 2 # Day 1 values: 1,2,3,4 ; Day 2 values: 5,6,7,8 arr <- array(c(1, 2, 3, 4, 5, 6, 7, 8), dim = c(2, 2, 2)) ncdf4::ncvar_put(nc, var_pm, arr) ncdf4::nc_close(nc) } # Helper: create a shapefile with two polygons splitting the domain in half # Region "west" covers x in [0,0.5], Region "east" covers x in [0.5,1] create_test_shapefile <- function(dsn_dir) { poly_west <- sf::st_polygon(list(rbind( c(0.0, 0.0), c(0.5, 0.0), c(0.5, 1.0), c(0.0, 1.0), c(0.0, 0.0) ))) poly_east <- sf::st_polygon(list(rbind( c(0.5, 0.0), c(1.0, 0.0), c(1.0, 1.0), c(0.5, 1.0), c(0.5, 0.0) ))) shp <- sf::st_sf( region = c("west", "east"), geometry = sf::st_sfc(poly_west, poly_east), crs = 4326 ) # Write ESRI Shapefile shp_path <- file.path(dsn_dir, "regions.shp") sf::st_write(shp, shp_path, quiet = TRUE, delete_layer = TRUE) shp_path } nc_path <- file.path(temp_dir, "tiny_pm25.nc") create_test_ncdf(nc_path) shp_path <- create_test_shapefile(temp_dir) out <- extract_means_for_geography(ncdf_path = nc_path, shp_path = shp_path) # Basic structure checks expect_s3_class(out, "data.frame") expect_true(all(c("region", "date", "mean_PM") %in% names(out))) # Expect two regions * two time steps = 4 rows expect_equal(nrow(out), 4L) # date should be Date and non-missing expect_s3_class(out$date, "Date") expect_false(any(is.na(out$date))) # mean_PM should be numeric and non-missing expect_true(is.numeric(out$mean_PM)) expect_false(any(is.na(out$mean_PM))) }) test_that("extract_means_for_geography errors when region_col is missing", { create_test_ncdf <- function(path) { lon <- c(0.25, 0.75) lat <- c(0.25, 0.75) time <- c(0, 1) dim_lon <- ncdf4::ncdim_def("lon", "degrees_east", lon) dim_lat <- ncdf4::ncdim_def("lat", "degrees_north", lat) dim_time <- ncdf4::ncdim_def("time", "days since 2020-01-01", time) var_pm <- ncdf4::ncvar_def("pm25", "ug/m3", list(dim_lon, dim_lat, dim_time), -9999, "float") nc <- ncdf4::nc_create(path, vars = list(var_pm)) arr <- array(c(1, 2, 3, 4, 5, 6, 7, 8), dim = c(2, 2, 2)) ncdf4::ncvar_put(nc, var_pm, arr) ncdf4::nc_close(nc) } create_bad_shapefile <- function(dsn_dir) { poly <- sf::st_polygon(list(rbind( c(0.0, 0.0), c(1.0, 0.0), c(1.0, 1.0), c(0.0, 1.0), c(0.0, 0.0) ))) shp <- sf::st_sf( not_region = "x", geometry = sf::st_sfc(poly), crs = 4326 ) shp_path <- file.path(dsn_dir, "bad_regions.shp") sf::st_write(shp, shp_path, quiet = TRUE, delete_layer = TRUE) shp_path } nc_path <- file.path(temp_dir, "tiny_pm25_missing_region.nc") create_test_ncdf(nc_path) shp_path <- create_bad_shapefile(temp_dir) expect_error( extract_means_for_geography(ncdf_path = nc_path, shp_path = shp_path, region_col = "region"), "Column 'region' not found in shapefile" ) }) test_that("extract_means_for_geography uses custom output_value_col", { create_test_ncdf <- function(path) { lon <- c(0.25, 0.75) lat <- c(0.25, 0.75) time <- c(0, 1) dim_lon <- ncdf4::ncdim_def("lon", "degrees_east", lon) dim_lat <- ncdf4::ncdim_def("lat", "degrees_north", lat) dim_time <- ncdf4::ncdim_def("time", "days since 2020-01-01", time) var_pm <- ncdf4::ncvar_def("pm25", "ug/m3", list(dim_lon, dim_lat, dim_time), -9999, "float") nc <- ncdf4::nc_create(path, vars = list(var_pm)) arr <- array(c(1, 2, 3, 4, 5, 6, 7, 8), dim = c(2, 2, 2)) ncdf4::ncvar_put(nc, var_pm, arr) ncdf4::nc_close(nc) } create_test_shapefile <- function(dsn_dir) { poly <- sf::st_polygon(list(rbind( c(0.0, 0.0), c(1.0, 0.0), c(1.0, 1.0), c(0.0, 1.0), c(0.0, 0.0) ))) # Use a different region column name to test region_col mapping too shp <- sf::st_sf( my_region = "all", geometry = sf::st_sfc(poly), crs = 4326 ) shp_path <- file.path(dsn_dir, "one_region.shp") sf::st_write(shp, shp_path, quiet = TRUE, delete_layer = TRUE) shp_path } tmp <- withr::local_tempdir() nc_path <- file.path(tmp, "tiny_pm25_custom_col.nc") create_test_ncdf(nc_path) shp_path <- create_test_shapefile(tmp) out <- extract_means_for_geography( ncdf_path = nc_path, shp_path = shp_path, region_col = "my_region", output_value_col = "avg_pm" ) expect_true(all(c("region", "date", "avg_pm") %in% names(out))) expect_false("mean_PM" %in% names(out)) }) test_that("extract_means_for_geography handles shapefile CRS different from raster CRS (transform)", { create_test_ncdf <- function(path) { lon <- c(0.25, 0.75) lat <- c(0.25, 0.75) time <- c(0, 1) dim_lon <- ncdf4::ncdim_def("lon", "degrees_east", lon) dim_lat <- ncdf4::ncdim_def("lat", "degrees_north", lat) dim_time <- ncdf4::ncdim_def("time", "days since 2020-01-01", time) var_pm <- ncdf4::ncvar_def("pm25", "ug/m3", list(dim_lon, dim_lat, dim_time), -9999, "float") nc <- ncdf4::nc_create(path, vars = list(var_pm)) arr <- array(c(1, 2, 3, 4, 5, 6, 7, 8), dim = c(2, 2, 2)) ncdf4::ncvar_put(nc, var_pm, arr) ncdf4::nc_close(nc) } create_test_shapefile_wrong_crs <- function(dsn_dir) { poly <- sf::st_polygon(list(rbind( c(0.0, 0.0), c(1.0, 0.0), c(1.0, 1.0), c(0.0, 1.0), c(0.0, 0.0) ))) shp_ll <- sf::st_sf( region = "all", geometry = sf::st_sfc(poly), crs = 4326 ) # Transform to Web Mercator before writing shp_3857 <- sf::st_transform(shp_ll, 3857) shp_path <- file.path(dsn_dir, "region_3857.shp") sf::st_write(shp_3857, shp_path, quiet = TRUE, delete_layer = TRUE) shp_path } nc_path <- file.path(temp_dir, "tiny_pm25_crs.nc") create_test_ncdf(nc_path) shp_path <- create_test_shapefile_wrong_crs(temp_dir) out <- extract_means_for_geography(ncdf_path = nc_path, shp_path = shp_path) expect_equal(unique(out$region), "all") expect_equal(nrow(out), 2L) expect_false(any(is.na(out$mean_PM))) }) # Tests for join_health_and_climate_data test_that( "join_health_and_climate_data behaves as expected.", { health <- read_and_format_data( health_path = WF_TEST_HEALTH, date_col = "date", mean_temperature_col = "tmean", health_outcome_col = "deaths", region_col = "region", rh_col = "relative_humidity", wind_speed_col = "wind_speed", population_col = "population" ) joined <- join_health_and_climate_data( climate_data = WF_TEST_CLIMATE, health_data = health, region_col = "region", date_col = "date", exposure_col = "mean_PM" ) exp_cols <- c( "date", "tmean", "health_outcome", "region", "rh", "wind_speed", "year", "month", "day", "dow", "mean_PM", "pop" ) expect_true(all(exp_cols %in% colnames(joined))) expect_equal(joined$mean_PM, c(500000, 100000, 150000)) } ) # Tests for load_wildfire_data test_that("load_wildfire_data returns formatted health data and renames PM column when join_wildfire_data = FALSE", { # Arrange: create a deterministic health_df that read_and_format_data would return health_df_stub <- data.frame( date = as.Date(c("2020-01-01", "2020-01-02", "2020-01-03")), tmean = c(10, 11, 12), health_outcome = c(1, 2, 3), region = c("A", "A", "A"), rh = c(50, 55, 60), wind_speed = c(3, 4, 5), pop = c(100, 700, 500), pm_raw = c(1.1, 2.2, 3.3), year = c(2020, 2020, 2020), month = c(1, 1, 1), day = c(1, 2, 3), dow = c("Wed", "Thu", "Fri") ) called_extract <- FALSE local_mocked_bindings( read_and_format_data = function(...) health_df_stub, extract_means_for_geography = function(...) { called_extract <<- TRUE stop("extract_means_for_geography should not be called when join_wildfire_data = FALSE") } ) # Act res <- load_wildfire_data( health_path = "ignored_because_mocked", ncdf_path = NULL, shp_path = NULL, join_wildfire_data = FALSE, date_col = "date", region_col = "region", shape_region_col = "region", mean_temperature_col = "tmean", population_col = "population", health_outcome_col = "deaths", rh_col = "relative_humidity", wind_speed_col = "wind_speed", pm_2_5_col = "pm_raw" ) # Assert: extractor not called expect_false(called_extract) # Assert: mean_PM exists and equals pm_raw expect_true("mean_PM" %in% names(res)) expect_equal(res$mean_PM, health_df_stub$pm_raw) # Assert: pm_raw no longer present (rename not copy) expect_false("pm_raw" %in% names(res)) # Assert: key columns still exist exp_columns <- c( "date", "tmean", "health_outcome", "region", "rh", "wind_speed", "mean_PM", "year", "month", "day", "dow", "pop" ) expect_true(all(exp_columns %in% names(res))) # Assert: date type preserved expect_s3_class(res$date, "Date") }) test_that("load_wildfire_data calls extractor + joiner with correct arguments when join_wildfire_data = TRUE", { # Arrange stubs health_df_stub <- data.frame( date = as.Date(c("2020-01-01", "2020-01-02")), tmean = c(10, 11), health_outcome = c(1, 2), region = c("A", "A"), pop = c(1000, 2500) ) wildfire_df_stub <- data.frame( region = c("A", "A"), date = as.Date(c("2020-01-01", "2020-01-02")), mean_PM = c(5, 6) ) joined_df_stub <- data.frame( date = as.Date(c("2020-01-01", "2020-01-02")), region = c("A", "A"), tmean = c(10, 11), health_outcome = c(1, 2), mean_PM = c(5, 6) ) # Capture calls captured <- list( ncdf_path = NULL, shp_path = NULL, shape_region_col = NULL, join_region_col = NULL, join_date_col = NULL ) local_mocked_bindings( read_and_format_data = function(...) health_df_stub, extract_means_for_geography = function(ncdf_path, shp_path, region_col, ...) { captured$ncdf_path <<- ncdf_path captured$shp_path <<- shp_path captured$shape_region_col <<- region_col wildfire_df_stub }, join_health_and_climate_data = function(climate_data, health_data, ...) { # Additional sanity checks inside the mock expect_equal(climate_data, wildfire_df_stub) expect_equal(health_data, health_df_stub) joined_df_stub } ) # Act res <- load_wildfire_data( health_path = "ignored_because_mocked", ncdf_path = "path/to/file.nc", shp_path = "path/to/shape.shp", join_wildfire_data = TRUE, date_col = "date", region_col = "region", shape_region_col = "shape_name", population_col = "population", mean_temperature_col = "tmean", health_outcome_col = "deaths", rh_col = NULL, wind_speed_col = NULL ) # Assert: output is whatever the joiner returns expect_equal(res, joined_df_stub) # Assert: correct wiring of parameters expect_equal(captured$ncdf_path, "path/to/file.nc") expect_equal(captured$shp_path, "path/to/shape.shp") expect_equal(captured$shape_region_col, "shape_name") }) test_that("load_wildfire_data defaults to joining when join_wildfire_data is not supplied", { health_df_stub <- data.frame( date = as.Date("2020-01-01"), tmean = 10, health_outcome = 1, region = "A", pop = 1000 ) wildfire_df_stub <- data.frame( region = "A", date = as.Date("2020-01-01"), mean_PM = 5 ) joined_df_stub <- data.frame( date = as.Date("2020-01-01"), region = "A", tmean = 10, health_outcome = 1, mean_PM = 5 ) called_extract <- FALSE called_join <- FALSE local_mocked_bindings( read_and_format_data = function(...) health_df_stub, extract_means_for_geography = function(...) { called_extract <<- TRUE; wildfire_df_stub }, join_health_and_climate_data = function(...) { called_join <<- TRUE; joined_df_stub } ) res <- load_wildfire_data( health_path = "ignored_because_mocked", ncdf_path = "dummy.nc", shp_path = "dummy.shp", # join_wildfire_data omitted on purpose date_col = "date", region_col = "region", shape_region_col = "region_in_shape", mean_temperature_col = "tmean", population_col = "population", health_outcome_col = "deaths" ) expect_true(called_extract) expect_true(called_join) expect_equal(res, joined_df_stub) }) # Tests for get_lags_and_means test_that( "get_lags_and_means returns the expected dataframe.", { LAG_MEAN_DF <- data.frame( tmean = c(5, 10, 15, 10, 3), tmin = c(3, 8, 13, 8, 2) ) res <- get_lags_and_means(LAG_MEAN_DF, "tmean", 2) expect_equal(length(colnames(res)), 8) exp_cols <- c( "tmean_l0", "tmean_l1", "tmean_l2", "tmean_l0_mean", "tmean_l1_mean", "tmean_l2_mean" ) expect_true(all(exp_cols %in% colnames(res))) expect_equal(res$tmean_l1_mean, c(NA, 7.5, 12.5, 12.5, 6.5)) } ) # Tests for create_lagged_variables test_that( "create_lagged_variables works as expected when provided the correct data.", { LAGGED_VARS_DF <- data.frame( region = c("A1", "A1", "A2", "A2", "A2", "A2"), tmean = c(5, 10, 2, 3, 5, 7), mean_PM = c(3, 3, 10, 7, 12, 20) ) res <- create_lagged_variables( data = LAGGED_VARS_DF, wildfire_lag = 2, temperature_lag = 1 ) added_cols <- c( "mean_PM_l0", "mean_PM_l1", "mean_PM_l2", "mean_PM_l0_mean", "mean_PM_l1_mean", "mean_PM_l2_mean", "tmean_l0", "tmean_l1", "tmean_l0_mean", "tmean_l1_mean" ) expect_true(all(added_cols %in% colnames(res))) } ) # Tests for create_temperature_splines test_that( "create_temperature_splines raises an error when degrees_freedom < 1.", { expect_error( create_temperature_splines(NULL, 0, 0), "Degrees of freedom must be >= 1." ) } ) test_that( "create_temperature_splines raises an error when nlag < 0.", { expect_error( create_temperature_splines(NULL, -1, 2), "nlag must be >= 0." ) } ) test_that( "create_temperature_splines raises an error when tmean isn't included.", { NO_TMEAN = data.frame(test = c(1, 2)) expect_error( create_temperature_splines(NO_TMEAN, 1, 6), "tmean not found in dataset to create splines." ) } ) test_that( "create_temperature_splines raises an error when the correct lag column isn't included.", { NO_TMEAN_LAG = data.frame(tmean = c(1, 2)) expect_error( create_temperature_splines(NO_TMEAN_LAG, 1, 6), "tmean_l1_mean not found in dataset." ) } ) test_that( "create_temperature_splines calculates splines as expected.", { SPLINES_DF <- data.frame( region = c("A1", "A1", "A1", "A2", "A2", "A2"), tmean = c(10, 14, 13, 5, 4, 9), tmean_l1_mean = c(NA, 12, 13.5, NA, 4.5, 6.5) ) res <- create_temperature_splines( data = SPLINES_DF, nlag = 1, degrees_freedom = 2 ) expect_true("ns.tmean" %in% colnames(res)) exp_splines_1 <- c(NA, 0, 0.3441, NA, 0, 0.3441) exp_splines_2 <- c(NA, 0, 0.7706, NA, 0, 0.7706) expect_equal(exp_splines_1, round(res$ns.tmean[, 1], 4)) expect_equal(exp_splines_2, round(res$ns.tmean[, 2], 4)) } ) # Tests for time_stratify TS_DATA <- data.frame( month = rep(c(01, 02), 2), year = c(2001, 2001, 2001, 2001), dow = rep(c("Mon", "Tue"), 2), region = c("Wales", "Wales", "North East", "North East"), health_outcome = c(57, 59, 278, 301) ) test_that( "time_stratify raises an error if required columns are missing.", { TS_FAIL <- TS_DATA[, !(colnames(TS_DATA) %in% c("dow"))] expect_error( time_stratify(TS_FAIL), "Data must include columns:" ) } ) test_that( "time_stratify returns expected values", { res <- time_stratify(TS_DATA) exp_stratum <- c( "North_East:2001:1:Mon", "North_East:2001:2:Tue", "Wales:2001:1:Mon", "Wales:2001:2:Tue" ) expect_equal(typeof(res$stratum), "integer") expect_equal(as.character(res$stratum), exp_stratum) } ) test_that( "descriptive_stats raises an error when the output directory does not exist.", { expect_error( descriptive_stats( data = NULL, variables = c(), bin_width = 5, output_dir = "does/not/exist" ), "Output directory does not exist" ) } ) test_that( "descriptive_stats saves out descriptive stats files correctly", { DS_DATA <- data.frame( health_outcome = c(150, 30, 122, 75, 94), tmean = c(7, 9, 14, 13, 13), rainfall = c(0.21, 0.22, 0.17, 0.18, 0.26) ) descriptive_stats( data = DS_DATA, variables = c("tmean", "rainfall"), output_dir = temp_dir ) expect_true(file.exists(file.path(temp_dir, "health_outcome_hist.png"))) expect_true(file.exists(file.path(temp_dir, "variable_summaries.csv"))) } ) # Tests for check_wildfire_vif test_that( "check_wildfire_vif raises an error when save_csv==T and no output path is provided.", { expect_error( check_wildfire_vif(NULL, c(), TRUE, NULL, FALSE), "No output path provided when save_csv==T." ) } ) test_that( "check_wildfire_vif raises an error when predictors is of the wrong type.", { expect_error( check_wildfire_vif(NULL, 5643574, FALSE, NULL, FALSE), "Please provide predictor variable names as a character vector" ) } ) test_that( "check_wildfire_vif raises an error when predictors is not of length 2.", { expect_error( check_wildfire_vif(NULL, c("one"), FALSE, NULL, FALSE), "Please provide at least two predictor variables" ) } ) set.seed(42) WIF_VIF_DATA <- data.frame( region = rep(c("RegionA", "RegionB"), each = 30), health_outcome = rpois(60, lambda = 50), smoke_exposure = rnorm(60, mean = 10, sd = 3), temperature = rnorm(60, mean = 20, sd = 2) ) WIF_VIF_DATA$pm25 <- WIF_VIF_DATA$smoke_exposure * 2 + rnorm(60, 0, 10) test_that( "check_wildfire calculates and saves VIF as expected", { if (!file.exists(file.path(temp_dir, "model_validation"))) dir.create(file.path(temp_dir, "model_validation")) out <- capture.output(check_wildfire_vif( WIF_VIF_DATA, c("smoke_exposure", "pm25", "temperature"), TRUE, temp_dir, TRUE )) # Printed message: accept presence anywhere on the line ([1] prefix from print is common) expect_true(any(grepl("Variance inflation factor:", out))) # Extract numeric tokens to ensure something VIF-like was printed nums <- suppressWarnings(as.numeric( unlist(regmatches(out, gregexpr("\\d+\\.?\\d*", out))) )) # At least as many numeric tokens as predictors per region, minimum 6 tokens expect_true(sum(!is.na(nums)) >= 6) # File created output_fpath <- file.path(temp_dir, "model_validation", "vif_results.csv") expect_true(file.exists(output_fpath)) # Read file and validate header and notes lines <- readLines(output_fpath, warn = FALSE) expect_true(length(lines) >= 2) # Allow quoted or unquoted header fields; write.csv quotes by default expect_true(grepl('^"?region"?,\\s*"?formula"?,\\s*"?var"?,\\s*"?VIF"?$', lines[1])) # There should be at least one non-empty data row before the appended notes # We don't assert exact content because current function overwrites per var in loop expect_true(any(nzchar(lines[2:length(lines)]))) # Notes must be present and in order near the end of the file note1 <- "Note: VIF model validation results." note2 <- "Please refer to wildfire methods documentation (section 3.7) for interpretation guidelines." idx1 <- which(trimws(lines) == note1) idx2 <- which(trimws(lines) == note2) expect_true(length(idx1) == 1) expect_true(length(idx2) == 1) expect_true(idx1 < idx2) # the final note should be close to the end; allow some buffer for trailing newline behavior expect_true(idx2 > (length(lines) - 10)) } ) test_that( "check_wildfire_vif raises a warning when VIF > 2.", { VIF_WARN_DATA <- data.frame( region = rep(c("RegionA", "RegionB"), each = 20), health_outcome = rnorm(40, mean = 50, sd = 10), smoke_exposure = rnorm(40, mean = 10, sd = 3), temperature = rnorm(40, mean = 20, sd = 2) ) VIF_WARN_DATA$pm25 <- ( VIF_WARN_DATA$smoke_exposure * 3 + rnorm(nrow(VIF_WARN_DATA)) ) expect_warning( check_wildfire_vif( VIF_WARN_DATA, c("smoke_exposure", "pm25", "temperature"), FALSE, NULL, FALSE ) ) } ) # Tests for get_wildfire_lag_columns test_that( "get_wildfire_lag_columns acts as expected", { WF_L_DATA <- data.frame( "mean_PM_l0_mean" = c(1), "mean_PM_l1_mean" = c(2), "mean_PM_l2_mean" = c(3) ) lag_cols <- get_wildfire_lag_columns(WF_L_DATA)$col_names exp_lag_cols <- c( "mean_PM", "mean_PM_l0_mean", "mean_PM_l1_mean", "mean_PM_l2_mean" ) expect_equal(lag_cols, exp_lag_cols) } ) # Tests for casecrossover_quasipoisson generate_wildfire_test_data <- function( n = 1000, n_strata = 5, n_lags = 2, spline_df = 6, start_date = as.Date("2020-01-01") ) { set.seed(42) RNGkind("Mersenne-Twister", "Inversion") dates <- seq.Date(from = start_date, by = "day", length.out = n) region <- rep(c("Region_A", "Region_B"), length.out = n) region_pop <- c(Region_A = 500000, Region_B = 1200000) data <- data.frame( date = dates, year = as.integer(format(dates, "%Y")), month = as.integer(format(dates, "%m")), day = as.integer(format(dates, "%d")), dow = factor( weekdays(dates), levels = c( "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" ) ), region = region, pop = unname(region_pop[region]), health_outcome = rpois(n, lambda = 5), mean_PM = runif(n, 5, 30), tmean = rnorm(n, mean = 15, sd = 5), rh = runif(n, 40, 90), wind_speed = runif(n, 0, 10), ind = sample(c(0, 1), n, replace = TRUE), stratum = factor(rep(1:n_strata, each = n / n_strata)) ) data <- create_lagged_variables(data, 3, 1) data <- create_temperature_splines(data, 1, 6) return(data) } test_that( "casecrossover_quassipoission calculates model values and saves residuals.", { test_data <- generate_wildfire_test_data( n = 5000, n_strata = 5, n_lags = 2, spline_df = 6 ) res <- casecrossover_quasipoisson( data = test_data, scale_factor_wildfire_pm = 10, save_fig = TRUE, output_folder_path = temp_dir, print_model_summaries = FALSE ) exp_results <- data.frame( lag = c(0, 1, 2, 3), relative_risk = c(0.9863, 0.9952, 1.0087, 1.0143), ci_lower = c(0.9632, 0.9622, 0.9682, 0.9673), ci_upper = c(1.0101, 1.0292, 1.0509, 1.0636) ) expect_equal(round(res, 4), exp_results, tolerance = 1e-3) expect_true(file.exists( file.path( temp_dir, "model_validation", "wildfires_residuals_vs_fit_plot.pdf" ) )) } ) test_that( "casecrossover_quassipoission calculates model values and prints residuals.", { test_data <- generate_wildfire_test_data( n = 5000, n_strata = 5, n_lags = 2, spline_df = 6 ) out <- capture.output( casecrossover_quasipoisson( data = test_data, scale_factor_wildfire_pm = 10, save_fig = FALSE, output_folder_path = NULL, print_model_summaries = TRUE ) ) # validate outputs are being printed reg_out <- unlist(regmatches( out, gregexpr("Ratio of residual deviance to degrees of freedom: ", out) )) expect_equal(length(reg_out), 65) } ) # Tests for calculate_qaic test_that( "calculate_qaic raises an error when save_csv=T and no output path is provided", { expect_error( calculate_qaic(NULL, TRUE, NULL, FALSE), "No output path provided when save_csv==T." ) } ) test_that( "calculate_qaic saves as expected, returns the correct results, and appends note to csv.", { test_data <- generate_wildfire_test_data( n = 5000, n_strata = 5, n_lags = 2, spline_df = 6 ) qaic_res <- calculate_qaic( data = test_data, save_csv = TRUE, output_folder_path = temp_dir, print_results = FALSE ) # validate qaic results expect_equal(unique(qaic_res$region), c("Region_A", "Region_B")) # NOTE: high tolerance due to platform drift (mac vs windows) expect_equal( round(qaic_res$qaic_at_lag_0, 3), c(1377.051, 1309.869), tolerance = 10 ) expect_equal( round(qaic_res$qaic_at_lag_2, 3), c(1376.393, 1307.147), tolerance = 10 ) expect_equal(round( qaic_res$pearson_dispersion_at_lag_1, 3 ), c(0.961, 1.032), tolerance = 0.005) expect_equal(round( qaic_res$pearson_dispersion_at_lag_3, 3 ), c(0.961, 1.031), tolerance = 0.005) # validate results were saved output_fpath <- file.path(temp_dir, "model_validation", "qaic_results.csv") expect_true(file.exists(output_fpath)) # Validate header and appended notes using readLines lines <- readLines(output_fpath, warn = FALSE) expect_true(length(lines) >= 3) # Header line: allow optional quotes around column names (write.csv conventions) # We know at least "region" should be present as first column expect_true(grepl('^"?region"?[,]', lines[1])) # Verify notes are present, appear in order, and are near the end note1 <- "Note: QAIC model validation results." note2 <- "Please refer to wildfire methods documentation (section 3.7) for interpretation guidelines." idx1 <- which(trimws(lines) == note1) idx2 <- which(trimws(lines) == note2) expect_true(length(idx1) == 1) expect_true(length(idx2) == 1) expect_true(idx1 < idx2) # final note should be close to the end; allow buffer for possible trailing newline expect_true(idx2 > (length(lines) - 10)) # Ensure there was at least one data row before the notes expect_true(any(nzchar(lines[2:(min(idx1, length(lines)) - 1)]))) } ) test_that( "calculate_qaic prints as expected when prompted.", { test_data <- generate_wildfire_test_data( n = 5000, n_strata = 5, n_lags = 2, spline_df = 6 ) out <- capture.output(calculate_qaic( data = test_data, print_results = TRUE )) qaic <- unlist(regmatches( out, gregexpr("QAIC for .* = ([0-9]+\\.[0-9]+)", out) )) pearson <- unlist(regmatches( out, gregexpr("Pearson dispersion statistic: ", out) )) expect_equal(length(qaic), 10) expect_equal(length(pearson), 10) } ) # Tests for plot_RR RR_EX_DATA <- data.frame( region_name = c(rep("Region_A", 3),rep("Region_B", 3)), lag = c(0, 1, 2, 0, 1, 2), relative_risk = c(1.0102, 1.0116, 1.0102,1.0125, 1.0138, 1.0120), ci_lower = c(0.9854, 0.9759, 0.9660, 0.9870, 0.9780, 0.9690), ci_upper = c(1.0356, 1.0487, 1.0565, 1.0380, 1.0505, 1.0550) ) test_that( "plot_RR raises an error is save_fig=T and output path is NULL", { expect_error( plot_RR(NULL, NULL, NULL, TRUE, NULL), "No output path provided when save_fig==T." ) } ) test_that( "plot_RR returns a plot object when no save is prompted.", { rr_plot <- plot_RR( rr_data = RR_EX_DATA, wildfire_lag = 2, by_region = TRUE, save_fig = FALSE, output_folder_path = NULL ) expect_true(inherits(rr_plot, "patchwork")) expect_true(inherits(rr_plot, "ggplot")) expect_true(inherits(rr_plot, "gg")) } ) test_that( "plot_RR saves a figure to disk when prompted.", { plot_RR( rr_data = RR_EX_DATA, wildfire_lag = 2, by_region = FALSE, save_fig = TRUE, output_folder_path = temp_dir ) expect_true(file.exists(file.path(temp_dir, "RR_lag_estimates.pdf"))) } ) # Tests for plot_RR_core test_that( "plot_RR_core raises an error is save_fig=T and output path is NULL", { expect_error( plot_RR_core(NULL, TRUE, 3, NULL), "No output path provided when save_fig==T." ) } ) test_that( "plot_RR_core returns a plot object.", { rr_plot <- plot_RR_core( rr_data <- subset(RR_EX_DATA, RR_EX_DATA$region_name=="Region_A"), save_fig = FALSE, wildfire_lag = 2, output_folder_path = NULL, region_name = "Region A", ylims = NULL ) expect_true(inherits(rr_plot, "ggplot")) expect_true(inherits(rr_plot, "gg")) } ) test_that( "plot_RR_core saves plots to disk when prompted.", { rr_plot <- plot_RR_core( rr_data <- subset(RR_EX_DATA, RR_EX_DATA$region_name=="Region_A"), save_fig = TRUE, wildfire_lag = 2, output_folder_path = temp_dir, region_name = "Region A", ylims = NULL ) output_fpath <- file.path(temp_dir, "RR_lag_estimates_region_a.pdf") expect_true(file.exists(output_fpath)) } ) # Tests for save_wildfire_results test_that( "save_wildfire_results raises an error is output_folder_path is NULL.", { expect_error( save_wildfire_results( NULL, NULL, NULL, NULL ), "Output directory required." ) } ) test_that( "save_wildfire_results raises an error is output_folder_path does not exist.", { expect_error( save_wildfire_results( NULL, NULL, NULL, "does/not/exist" ), "Output directory does not exist" ) } ) test_that( "save_wildfire_results saves results to disk when prompted", { rr_res <- data.frame(col1 = c("a", "b"), cold2 = c(34, 42)) an_ar_res <- data.frame(col1 = c("c", "d"), cold2 = c(88, 102)) an_ar_annual_res <- data.frame(col1 = c("e", "f"), cold2 = c(500, 1000)) save_wildfire_results( rr_results = rr_res, an_ar_results = an_ar_res, annual_af_an_results = an_ar_annual_res, output_folder_path = temp_dir ) rr_fpath <- file.path(temp_dir, "RR_lag_estimates.csv") an_ar_fpath <- file.path(temp_dir, "wildfire_health_monthly_estimates.csv") an_ar_ann_fpath <- file.path(temp_dir, "wildfire_health_yearly_estimates.csv") files <- c(rr_fpath, an_ar_fpath, an_ar_ann_fpath) for (f in files) { expect_true(file.exists(f)) } expect_equal( read.csv(rr_fpath), rr_res ) expect_equal( read.csv(an_ar_fpath), an_ar_res ) expect_equal( read.csv(an_ar_ann_fpath), an_ar_annual_res ) } ) # Tests for calculate_wildfire_rr_by_region test_that( "calculate_wildfire_rr_by_region raises an error is output_folder_path is NULL.", { expect_error( calculate_wildfire_rr_by_region( NULL, NULL, NULL, TRUE ), "No output path provided when save_fig==T." ) } ) test_that( "calculate_wildfire_rr_by_region rauses an error when region isn't present", { data <- data.frame(test = c(1, 2, 3)) expect_error( calculate_wildfire_rr_by_region( data = data, scale_factor_wildfire_pm = 10, calc_relative_risk_by_region = TRUE, save_fig = FALSE, output_folder_path = NULL, print_model_summaries = FALSE ), "data must contain 'region' column for region level RR data." ) } ) # NOTE: not testing save_fig functionality since this is done in casecrossover_quasipoisson test_that( "calculate_wildfire_rr_by_region returns resuults as expected", { test_data <- generate_wildfire_test_data( n = 5000, n_strata = 5, n_lags = 2, spline_df = 6 ) rr_by_reg <- calculate_wildfire_rr_by_region( data = test_data, scale_factor_wildfire_pm = 10, calc_relative_risk_by_region = TRUE, save_fig = FALSE, output_folder_path = NULL, print_model_summaries = FALSE ) expect_equal( unique(rr_by_reg$region_name), c("Region_A", "Region_B", "All Regions") ) lag2_df <- subset(rr_by_reg, rr_by_reg$lag==2) num_cols <- sapply(lag2_df, is.numeric) lag2_df[num_cols] <- lapply(lag2_df[num_cols], round, 6) rownames(lag2_df) <- NULL exp_lag2_df <- data.frame( lag = c(2, 2, 2), relative_risk = c(1.029838, 0.991563, 1.008722), ci_lower = c(0.970565, 0.936339, 0.968225), ci_upper = c(1.092731, 1.050044, 1.050913), region_name = c("Region_A", "Region_B", "All Regions"), stringsAsFactors = FALSE ) expect_equal(lag2_df, exp_lag2_df, tolerance = 0.005) } ) get_daily_af_an_testdata <- function() { test_data <- generate_wildfire_test_data( n = 5000, n_strata = 5, n_lags = 2, spline_df = 6 ) rr_data <- calculate_wildfire_rr_by_region( data = test_data, scale_factor_wildfire_pm = 10, calc_relative_risk_by_region = TRUE, save_fig = FALSE, output_folder_path = NULL, print_model_summaries = FALSE ) data_list <- list(test_data = test_data, rr_data = rr_data) return(data_list) } # Tests for calculate_daily_af_an test_that( "calculate_daily_AF_AN works as expected", { data <- get_daily_af_an_testdata() af_an_res <- calculate_daily_AF_AN( data = data$test_data, rr_data = data$rr_data ) exp_shape <- c(5000, 36) exp_columns <- c( "health_outcome", "mean_PM", "tmean", "rh", "wind_speed", "ind", "stratum", "region", "mean_PM_l0", "mean_PM_l1", "mean_PM_l2", "mean_PM_l3", "mean_PM_l0_mean", "mean_PM_l1_mean", "mean_PM_l2_mean", "mean_PM_l3_mean", "tmean_l0", "tmean_l1", "tmean_l0_mean", "tmean_l1_mean", "ns.tmean", "rescaled_RR", "attributable_fraction", "attributable_number", "rescaled_CI_upper", "attributable_fraction_upper", "attributable_number_upper", "rescaled_CI_lower", "attributable_fraction_lower", "attributable_number_lower", "date", "year", "month", "day", "dow", "pop" ) exp_an <- round(c( 0.01261, 0.01416, 0.01166, 0.00256, 0.01251, 0.01059 ), 5) expect_equal(exp_shape, dim(af_an_res)) expect_true(all(exp_columns %in% colnames(af_an_res))) expect_equal(exp_an, round(head(af_an_res)$attributable_number, 5), tolerance = 0.1) } ) # Tests for summarise_AF_AN test_that( "summarise_AF_AN summarises attributable numbers/fractions to monthly aggregates.", { data <- get_daily_af_an_testdata() af_an_res <- calculate_daily_AF_AN( data = data$test_data, rr_data = data$rr_data ) res <- summarise_AF_AN(data = af_an_res, monthly = TRUE) # validate outputs expect_true(all(unique(res$month) == 1:12)) expect_true(all(unique(res$year) == 2020:2033)) expect_equal( c(0.13, 0.10, 0.16, 0.14), round(head(res, 4)$total_attributable_number, 2) ) expect_equal(dim(res), c(330, 17)) expect_equal(unique(res$region), c("Region_A", "Region_B")) } ) test_that( "summarise_AF_AN summarises attributable numbers/fractions to daily aggregates.", { data <- get_daily_af_an_testdata() af_an_res <- calculate_daily_AF_AN( data = data$test_data, rr_data = data$rr_data ) res <- summarise_AF_AN(data = af_an_res, monthly = FALSE) # validate outputs expect_false("month" %in% colnames(res)) expect_true(all(unique(res$year) == 2020:2033)) expect_equal( rep(1.5, 4), round(head(res, 4)$total_attributable_number, 1), tolerance = 0.5 ) expect_equal(dim(res), c(28, 16)) expect_equal(unique(res$region), c("Region_A", "Region_B")) } ) # Tests for plot_aggregated_AF and plot_aggregated_AF_core (covers both) AGGREGATED_AF_DATA <- data.frame( region = c(rep("Region_A", 5), rep("Region_B", 5)), year = rep(2020:2024, 2), population = c(rep(500000, 5), rep(1200000, 5)), total_attributable_number = c(20.233,19.155,19.944,19.020,19.538, 9.029,8.776,8.123,8.874,8.728), total_variance_AN = c(6.345,5.703,6.658,5.838,6.264, 6.016,5.481,4.947,5.385,6.010), average_attributable_fraction = c(0.023,0.022,0.021,0.022,0.022, 0.010,0.010,0.010,0.010,0.010), variance_AF_mean = rep(0.001, 10), se_total_AN = c(2.519,2.388,2.580,2.416,2.503, 2.453,2.341,2.224,2.321,2.451), lower_ci_attributable_number = c(15.296,14.474,14.887,14.284,14.632, 4.222,4.187,3.763,4.326,3.924), upper_ci_attributable_number = c(25.170,23.835,25.001,23.755,24.443, 13.836,13.364,12.482,13.423,13.533), se_AF_mean = c(0.035,0.034,0.033,0.034,0.034, 0.033,0.033,0.032,0.033,0.032), lower_ci_attributable_fraction = c(-0.046,-0.045,-0.044,-0.045,-0.045, -0.054,-0.055,-0.053,-0.055,-0.053), upper_ci_attributable_fraction = c(0.092,0.088,0.086,0.088,0.088, 0.074,0.074,0.072,0.075,0.073), deaths_per_100k = c(4.047,3.831,3.989,3.804,3.908, 0.752,0.731,0.677,0.740,0.727), lower_ci_deaths_per_100k = c(3.059,2.895,2.977,2.857,2.926, 0.352,0.349,0.314,0.360,0.327), upper_ci_deaths_per_100k = c(5.034,4.767,5.000,4.751,4.889, 1.153,1.114,1.040,1.119,1.128) ) test_that( "plot_aggregated_AF raises an error if expected columns are not present.", { expect_error( plot_aggregated_AF( data = data.frame(), by_region = FALSE, output_dir = "."), "'data' must contain the following columns: " ) } ) test_that( "plot_aggregated_AF raises an error if output_dir does not exist.", { expect_error( plot_aggregated_AF( data = AGGREGATED_AF_DATA, by_region = FALSE, output_dir = "does/not/exist" ), "'output_dir' does not exist" ) } ) test_that( "plot_aggregated_AF raises an error if output_dir is NULL.", { expect_error( plot_aggregated_AF( data = AGGREGATED_AF_DATA, by_region = FALSE, output_dir = NULL ), "'output_dir' is NULL." ) } ) test_that( "plot_aggregated_AF creates and saves plot when by_region=FALSE.", { plot_aggregated_AF( data = AGGREGATED_AF_DATA, by_region = FALSE, output_dir = temp_dir ) expect_true( file.exists(file.path(temp_dir, "aggregated_AF.pdf")) ) } ) test_that( "plot_aggregated_AF errors when by_region = TRUE but 'region' column is missing", { df_no_region <- subset( AGGREGATED_AF_DATA, select = -region ) expect_error( plot_aggregated_AF( data = df_no_region, by_region = TRUE, output_dir = temp_dir ), "'data' must contain the following columns:" ) } ) test_that( "plot_aggregated_AF creates and saves plot when by_region=TRUE.", { plot_aggregated_AF( data = AGGREGATED_AF_DATA, by_region = TRUE, output_dir = temp_dir ) expect_true( file.exists(file.path(temp_dir, "aggregated_AF_by_region.pdf")) ) } ) # Tests for join_ar_and_pm_monthly test_that( "join_ar_and_pm_monthly raises an error when required columns are missing from an_ar_data", { expect_error( join_ar_and_pm_monthly(data.frame(), data.frame()), "'an_ar_data' requires the columns: " ) } ) test_that( "join_ar_and_pm_monthly raises an error when required columns are missing from an_ar_data", { expect_error( join_ar_and_pm_monthly( pm_data = data.frame(), an_ar_data = data.frame( year = c(2021, 2022), month = c(1, 1), region = c("regA", "regA") ) ), "'pm_data' requires the columns: " ) } ) test_that( "join_ar_and_pm_monthly joins data as expected", { ar_data <- data.frame( year = c(rep(2021, 3), rep(2022, 3)), month = rep(c(1:3), 2), region = c(rep("A1", 3), rep("A2", 3)), AN_total = c(23, 34, 22, 78, 87, 39) ) pm_data <- data.frame( year = c(rep(2021, 3), rep(2022, 3)), month = rep(c(1:3), 2), region = c(rep("A1", 3), rep("A2", 3)), mean_PM = c(0.12, 0.12, 0.11, 0.24, 0.23, 0.27) ) res <- join_ar_and_pm_monthly( pm_data = pm_data, an_ar_data = ar_data ) exp_columns <- c("year", "month", "region", "AN_total", "monthly_avg_pm25") expect_true(all(exp_columns %in% colnames(res))) expect_equal(dim(res), c(6, 5)) } ) # Tests for plot_ar_pm_monthly test_that( "plot_ar_pm_monthly raises an error if save_outputs==T and output_dir==NULL.", { expect_error( plot_ar_pm_monthly(data.frame(), TRUE, NULL), "'output_dir' must be provded to save outputs." ) } ) test_that( "plot_ar_pm_monthly raises an error if output_dir does not exist.", { expect_error( plot_ar_pm_monthly(data.frame(), TRUE, "does/no/exist"), "'output_dir' must exist on disk to save outputs." ) } ) test_that( "plot_ar_pm_monthly plots data and saves/returns outputs.", { MONTHLY_AR_PM <- data.frame( year = c(rep(2021, 3), rep(2022, 3)), month = rep(1:3, 2), region = c(rep("A1", 3), rep("A2", 3)), AN_total = c(23, 34, 22, 78, 87, 39), deaths_per_100k = round(runif(6, min = 10, max = 100), 1), monthly_avg_pm25 = round(runif(6, min = 5, max = 40), 1) ) # clean any pre-existing outputs pdf_path <- file.path(temp_dir, "Monthly_deaths_pm_trends.pdf") csv_path <- file.path(temp_dir, "Monthly_deaths_pm_trends.csv") if (file.exists(pdf_path)) file.remove(pdf_path) if (file.exists(csv_path)) file.remove(csv_path) res <- plot_ar_pm_monthly( data = MONTHLY_AR_PM, save_outputs = TRUE, output_dir = temp_dir ) # validate return expect_true(inherits(res, "data.frame")) expect_equal(dim(res), c(9, 4)) expect_equal(names(res), c("region", "month_name", "mean_deaths_per_100k", "mean_pm") ) # month_name should be an ordered factor with month.abb levels expect_true(is.factor(res$month_name)) expect_identical(levels(res$month_name), month.abb) # validate output saved expect_true(file.exists(pdf_path)) expect_true(file.info(pdf_path)$size > 0) expect_true(file.exists(csv_path)) expect_true(file.info(csv_path)$size > 0) } ) # Tests for generate_rr_pm_overall test_that( "generate_rr_pm_overall works as expected.", { df <- data.frame( lag = c(0, 1, 2), relative_risk = c(1.10, 1.25, 1.05), ci_lower = c(1.02, 1.15, 0.98), ci_upper = c(1.18, 1.35, 1.12) ) # Dummy data just to provide mean_PM for default pm_vals logic pm_data <- data.frame(mean_PM = rep(4, 5)) res <- generate_rr_pm_overall( data = pm_data, relative_risk_overall = df, scale_factor_wildfire_pm = 10, wildfire_lag = 0, pm_vals = NULL ) expect_true(inherits(res, "data.frame")) expect_equal(res$pm_levels, c(0, 1, 2, 3, 4)) } ) # Tests for generate_rr_by_region test_that( "generate_rr_pm_by_region works as expected.", { df <- data.frame( lag = rep(c(0, 1, 2), 2), region_name = c("A", "A", "A", "B", "B", "B"), relative_risk = c(1.10, 1.25, 1.05, 1.01, 1.00, 1.003), ci_lower = rep(c(1.02, 1.15, 0.98), 2), ci_upper = rep(c(1.18, 1.35, 1.12), 2) ) pm_data <- data.frame(mean_PM = rep(4, 5)) res <- generate_rr_pm_by_region( data = pm_data, relative_risk_overall = df, scale_factor_wildfire_pm = 10, wildfire_lag = 2, pm_vals = NULL ) expect_true(inherits(res, "data.frame")) exp_columns <- c( "region_name", "pm_levels", "relative_risk", "ci_lower", "ci_upper" ) expect_true(all(exp_columns %in% colnames(res))) expect_equal(c(rep("A", 5), rep("B", 5)), res$region_name) expect_equal(rep(seq(0, 4, 1), 2), res$pm_levels) } ) # Tests for plot_rr_by_pm and plot_rr_by_pm_core test_that( "plot_rr_by_pm raises an error when save_fig=T and output_dir=NULL.", { expect_error( plot_rr_by_pm( data = data.frame(test = c(1, 2)), save_fig = TRUE, output_dir = NULL ), "'output_dir' must be provided to save outputs." ) } ) test_that( "plot_rr_by_pm raises an error when the output directory does not exist", { expect_error( plot_rr_by_pm(data.frame(test = c(1, 2)), TRUE, "does/not/exist"), "'output_dir' must exist on disk to save outputs." ) } ) RR_PM_DF <- data.frame( region_name = rep("A", 4), pm_levels = c(0, 5, 10, 15), relative_risk = c(1, 1.0247, 1.0500, 1.0759), ci_lower = c(0.98, 1.027, 1.053, 1.078), ci_upper = c(1, 1.023, 1.045, 1.07) ) test_that( "plot_rr_by_pm raises an error if expected columns are missing.", { bad_df <- RR_PM_DF[, !(names(RR_PM_DF) %in% c("region_name"))] expect_error( plot_rr_by_pm(bad_df, FALSE, NULL), "'data' must contain these columns: " ) } ) test_that( "plot_rr_by_pm returns and saves plots as expected", { plot <- plot_rr_by_pm( data = RR_PM_DF, save_fig = TRUE, output_dir = temp_dir ) # validate return expect_true(inherits(plot, "patchwork")) expect_true(inherits(plot, "ggplot")) expect_true(inherits(plot, "gg")) # validate plot saved expect_true(file.exists(file.path(temp_dir, "ER_curve.pdf"))) } ) # Tests for plot_ar_by_region AR_AN_TEST_DF <- data.frame( region = c("A", "A", "A"), deaths_per_100k = c(100, 105, 110), lower_ci_deaths_per_100k = c(95, 100, 105), upper_ci_deaths_per_100k = c(105, 110, 115), total_attributable_number = c(200, 205, 210) ) test_that( "plot_ar_by_region raises an error if output_dir is null", { expect_error( plot_ar_by_region(data = AR_AN_TEST_DF, NULL), "'output_dir' required." ) } ) test_that( "plot_ar_by_region raises an error if output_dir does not exist", { expect_error( plot_ar_by_region(data = AR_AN_TEST_DF, "does/not/exist"), "'output_dir' does not exist." ) } ) test_that( "plot_ar_by_region raises an error if expected columns aren't present.", { bad_df <- AR_AN_TEST_DF[, !(names(AR_AN_TEST_DF) %in% c("region"))] expect_error( plot_ar_by_region(data = bad_df), "'data' must contain the following columns: " ) } ) test_that( "plot_ar_by_region returns plots and saves them as expected", { plot <- plot_ar_by_region(data = AR_AN_TEST_DF, output_dir = temp_dir) # validate return expect_true(inherits(plot, "ggplot")) expect_true(inherits(plot, "gg")) # validate plot saved expect_true(file.exists(file.path(temp_dir, "AR_by_region.pdf"))) } ) # Tets for plot_an_by_region test_that( "plot_an_by_region raises an error if output_dir is null", { expect_error( plot_an_by_region(data = AR_AN_TEST_DF, NULL), "'output_dir' required." ) } ) test_that( "plot_an_by_region raises an error if output_dir does not exist", { expect_error( plot_an_by_region(data = AR_AN_TEST_DF, "does/not/exist"), "'output_dir' does not exist." ) } ) test_that( "plot_an_by_region raises an error if expected columns aren't present.", { bad_df <- AR_AN_TEST_DF[, !(names(AR_AN_TEST_DF) %in% c("total_attributable_number"))] expect_error( plot_an_by_region(data = bad_df), "'data' must contain the following columns: " ) } ) test_that( "plot_an_by_region returns plots and saves them as expected", { plot <- plot_an_by_region(data = AR_AN_TEST_DF, output_dir = temp_dir) # validate return expect_true(inherits(plot, "ggplot")) expect_true(inherits(plot, "gg")) # validate plot saved expect_true(file.exists(file.path(temp_dir, "AN_by_region.pdf"))) } ) # tests/testthat/test-wildfire_do_analysis.R # Integration tests for wildfire_do_analysis() # - Dynamically generates a synthetic daily dataset for 2 regions # - Exercises the end-to-end pipeline without external joins (NetCDF/shapefile) # - Adds a second scenario that enables region-level outputs (AF/AN), still without saving files test_that("wildfire_do_analysis: end-to-end run (dataset-level RR only, no file outputs)", { # These are integration smoke tests. skip_on_cran() # Dynamically generate a synthetic dataset set.seed(123) n_days <- 180 dates <- seq.Date(as.Date("2019-01-01"), by = "day", length.out = n_days) regions <- c("North", "South") df <- base::expand.grid(date = dates, region = regions, KEEP.OUT.ATTRS = FALSE) # Temperature with seasonality + noise df$temp_mean <- 12 + 8 * sin(2 * pi * as.numeric(df$date) / 365) + stats::rnorm(nrow(df), sd = 2) # Wildfire PM2.5 (ug/m3) positive values pm_base_by_region <- ifelse(df$region == "North", 6, 10) df$pm25 <- pmax(0.1, pm_base_by_region + stats::rnorm(nrow(df), sd = 3)) # Construct a Poisson outcome with modest dependence on PM and temp eta <- -1 + 0.015 * df$pm25 + 0.01 * (df$temp_mean - mean(df$temp_mean)) mu <- pmax(0.1, exp(eta)) * 10 df$deaths <- stats::rpois(nrow(df), lambda = mu) # Write to temporary CSV (function reads from disk) tmp_dir <- tempdir() csv_path <- file.path(tmp_dir, "synthetic_health.csv") utils::write.csv(df, csv_path, row.names = FALSE) # Run analysis end-to-end without external joins res <- wildfire_do_analysis( health_path = csv_path, join_wildfire_data = FALSE, ncdf_path = NULL, shp_path = NULL, date_col = "date", region_col = "region", shape_region_col = "region", # NEW: required by updated signature mean_temperature_col = "temp_mean", health_outcome_col = "deaths", pm_2_5_col = "pm25", rh_col = NULL, wind_speed_col = NULL, wildfire_lag = 2, temperature_lag = 1, spline_temperature_lag = 0, spline_temperature_degrees_freedom = 3, predictors_vif = NULL, calc_relative_risk_by_region = FALSE, scale_factor_wildfire_pm = 10, save_fig = FALSE, save_csv = FALSE, output_folder_path = NULL, print_vif = FALSE, print_model_summaries = FALSE ) # Assertions on returned structure and content expect_type(res, "list") expect_named(res, c("RR_results", "AF_AN_results", "AR_PM_monthly"), ignore.order = TRUE) # RR results should be a data.frame with at least one row per lag (0..wildfire_lag) expect_s3_class(res$RR_results, "data.frame") expect_true(all( c("lag", "relative_risk", "ci_lower", "ci_upper", "region_name") %in% names(res$RR_results) )) expect_equal(sort(unique(res$RR_results$lag)), 0:2) expect_true(all(is.finite(res$RR_results$relative_risk))) # In this config you expect "All Regions" output only expect_true(all(res$RR_results$region_name == "All Regions")) # No AF/AN and no AR_PM_monthly in this configuration expect_true(is.null(res$AF_AN_results) || nrow(res$AF_AN_results) == 0) expect_true(is.null(res$AR_PM_monthly) || nrow(res$AR_PM_monthly) == 0) }) test_that("wildfire_do_analysis: end-to-end run with region-level outputs (AF/AN), no file outputs", { skip_on_cran() set.seed(456) n_days <- 150 dates <- seq.Date(as.Date("2020-04-01"), by = "day", length.out = n_days) regions <- c("North", "South") df <- base::expand.grid(date = dates, region = regions, KEEP.OUT.ATTRS = FALSE) df$temp_mean <- 11 + 7 * sin(2 * pi * as.numeric(df$date) / 365) + stats::rnorm(nrow(df), sd = 2) pm_base_by_region <- ifelse(df$region == "North", 5, 9) df$pm25 <- pmax(0.1, pm_base_by_region + stats::rnorm(nrow(df), sd = 2.5)) df$pops <- ifelse(df$region == "North", 1050000, 700000) eta <- -0.8 + 0.02 * df$pm25 + 0.008 * (df$temp_mean - mean(df$temp_mean)) mu <- pmax(0.1, exp(eta)) * 12 df$deaths <- stats::rpois(nrow(df), lambda = mu) tmp_dir2 <- tempdir() csv_path2 <- file.path(tmp_dir2, "synthetic_health_by_region.csv") utils::write.csv(df, csv_path2, row.names = FALSE) res2 <- wildfire_do_analysis( health_path = csv_path2, join_wildfire_data = FALSE, ncdf_path = NULL, shp_path = NULL, date_col = "date", region_col = "region", shape_region_col = "region", # NEW: required by updated signature mean_temperature_col = "temp_mean", health_outcome_col = "deaths", pm_2_5_col = "pm25", population_col = "pops", rh_col = NULL, wind_speed_col = NULL, wildfire_lag = 1, temperature_lag = 1, spline_temperature_lag = 0, spline_temperature_degrees_freedom = 3, predictors_vif = NULL, calc_relative_risk_by_region = TRUE, scale_factor_wildfire_pm = 10, save_fig = FALSE, save_csv = FALSE, output_folder_path = NULL, print_vif = FALSE, print_model_summaries = FALSE ) # RR results should include All Regions and the individual regions expect_s3_class(res2$RR_results, "data.frame") expect_true(all( c("lag", "relative_risk", "ci_lower", "ci_upper", "region_name") %in% names(res2$RR_results) )) expect_true(all(c("All Regions", "North", "South") %in% unique(res2$RR_results$region_name))) # AF/AN monthly results and AR~PM monthly join should be present expect_true(is.data.frame(res2$AF_AN_results) && nrow(res2$AF_AN_results) > 0) expect_true(is.data.frame(res2$AR_PM_monthly) && nrow(res2$AR_PM_monthly) > 0) # Basic column checks on AF/AN expect_true(all( c("region", "year", "month", "total_attributable_number", "average_attributable_fraction", "deaths_per_100k") %in% names(res2$AF_AN_results) )) }) test_that("wildfire_do_analysis: save_fig creates model_validation directory", { # Unit style: mock heavy steps to focus on folder creation behaviour tmp_out <- file.path(tempdir(), paste0("wf_out_", as.integer(stats::runif(1, 1, 1e9)))) dir.create(tmp_out, recursive = TRUE, showWarnings = FALSE) # Minimal data returned by load_wildfire_data, must have month/year/region/mean_PM for pm_data slice minimal_data <- data.frame( month = 1, year = 2020, region = "A", mean_PM = 5, stringsAsFactors = FALSE ) local_mocked_bindings( load_wildfire_data = function(...) minimal_data, create_lagged_variables = function(data, ...) data, create_temperature_splines = function(data, ...) data, time_stratify = function(data, ...) data, calculate_qaic = function(...) data.frame(ok = TRUE), calculate_wildfire_rr_by_region = function(...) data.frame( lag = 0, relative_risk = 1.01, ci_lower = 0.99, ci_upper = 1.03, region_name = "All Regions" ), plot_RR = function(...) invisible(NULL), generate_rr_pm_by_region = function(...) data.frame( region_name = "All Regions", lag = 0, pm = 0, rr = 1 ), plot_rr_by_pm = function(...) invisible(NULL) ) res <- wildfire_do_analysis( health_path = "ignored.csv", join_wildfire_data = FALSE, date_col = "date", region_col = "region", shape_region_col = "region", mean_temperature_col = "temp", health_outcome_col = "deaths", pm_2_5_col = "pm25", save_fig = TRUE, save_csv = FALSE, output_folder_path = tmp_out ) expect_true(dir.exists(file.path(tmp_out, "model_validation"))) expect_type(res, "list") }) test_that("wildfire_do_analysis: create_run_subdir writes outputs into a timestamped run folder", { tmp_out <- tempfile("wf_runs_") dir.create(tmp_out, recursive = TRUE, showWarnings = FALSE) minimal_data <- data.frame( month = 1, year = 2020, region = "A", mean_PM = 5, stringsAsFactors = FALSE ) captured_output_dir <- NULL local_mocked_bindings( load_wildfire_data = function(...) minimal_data, create_lagged_variables = function(data, ...) data, create_temperature_splines = function(data, ...) data, time_stratify = function(data, ...) data, calculate_qaic = function(...) data.frame(ok = TRUE), calculate_wildfire_rr_by_region = function(...) data.frame( lag = 0, relative_risk = 1.01, ci_lower = 0.99, ci_upper = 1.03, region_name = "All Regions" ), plot_RR = function(...) invisible(NULL), generate_rr_pm_by_region = function(...) data.frame(region_name = "All Regions", lag = 0, pm = 0, rr = 1), plot_rr_by_pm = function(...) invisible(NULL), save_wildfire_results = function(rr_results, an_ar_results, annual_af_an_results, output_folder_path, ...) { captured_output_dir <<- output_folder_path invisible(NULL) } ) wildfire_do_analysis( health_path = "ignored.csv", join_wildfire_data = FALSE, date_col = "date", region_col = "region", shape_region_col = "region", mean_temperature_col = "temp", health_outcome_col = "deaths", pm_2_5_col = "pm25", save_fig = TRUE, save_csv = TRUE, output_folder_path = tmp_out, create_run_subdir = TRUE ) created_dirs <- list.dirs(tmp_out, full.names = TRUE, recursive = FALSE) ts_dirs <- created_dirs[ grepl("wildfires_analysis_\\d{8}_\\d{6}$", basename(created_dirs)) ] expect_equal(length(ts_dirs), 1) expect_true(captured_output_dir %in% ts_dirs) expect_true(dir.exists(file.path(captured_output_dir, "model_validation"))) }) test_that("wildfire_do_analysis: predictors_vif triggers check_wildfire_vif", { minimal_data <- data.frame( month = 1, year = 2020, region = "A", mean_PM = 5, stringsAsFactors = FALSE ) vif_called <- FALSE vif_predictors <- NULL local_mocked_bindings( load_wildfire_data = function(...) minimal_data, create_lagged_variables = function(data, ...) data, create_temperature_splines = function(data, ...) data, time_stratify = function(data, ...) data, calculate_qaic = function(...) data.frame(ok = TRUE), check_wildfire_vif = function(data, predictors, ...) { vif_called <<- TRUE vif_predictors <<- predictors data.frame(ok = TRUE) }, calculate_wildfire_rr_by_region = function(...) data.frame( lag = 0, relative_risk = 1.01, ci_lower = 0.99, ci_upper = 1.03, region_name = "All Regions" ), plot_RR = function(...) invisible(NULL), generate_rr_pm_by_region = function(...) data.frame(region_name = "All Regions", lag = 0, pm = 0, rr = 1), plot_rr_by_pm = function(...) invisible(NULL) ) wildfire_do_analysis( health_path = "ignored.csv", join_wildfire_data = FALSE, date_col = "date", region_col = "region", shape_region_col = "region", mean_temperature_col = "temp", health_outcome_col = "deaths", pm_2_5_col = "pm25", predictors_vif = c("mean_PM_lag0", "tmean_lag0"), save_fig = FALSE, save_csv = FALSE ) expect_true(vif_called) expect_equal(vif_predictors, c("mean_PM_lag0", "tmean_lag0")) }) test_that("wildfire_do_analysis: save_csv triggers save_wildfire_results", { minimal_data <- data.frame( month = 1, year = 2020, region = "A", mean_PM = 5, stringsAsFactors = FALSE ) saved_called <- FALSE local_mocked_bindings( load_wildfire_data = function(...) minimal_data, create_lagged_variables = function(data, ...) data, create_temperature_splines = function(data, ...) data, time_stratify = function(data, ...) data, calculate_qaic = function(...) data.frame(ok = TRUE), calculate_wildfire_rr_by_region = function(...) data.frame( lag = 0, relative_risk = 1.01, ci_lower = 0.99, ci_upper = 1.03, region_name = "All Regions" ), plot_RR = function(...) invisible(NULL), generate_rr_pm_by_region = function(...) data.frame(region_name = "All Regions", lag = 0, pm = 0, rr = 1), plot_rr_by_pm = function(...) invisible(NULL), save_wildfire_results = function(rr_results, an_ar_results, annual_af_an_results, output_folder_path, ...) { saved_called <<- TRUE # Some basic sanity checks on what gets passed expect_s3_class(rr_results, "data.frame") expect_true(is.null(an_ar_results)) expect_true(is.null(annual_af_an_results)) expect_true(is.character(output_folder_path) || is.null(output_folder_path)) invisible(NULL) } ) wildfire_do_analysis( health_path = "ignored.csv", join_wildfire_data = FALSE, date_col = "date", region_col = "region", shape_region_col = "region", mean_temperature_col = "temp", health_outcome_col = "deaths", pm_2_5_col = "pm25", save_fig = FALSE, save_csv = TRUE, output_folder_path = tempdir() ) expect_true(saved_called) }) test_that("wildfire_do_analysis errors clearly when AF/AN outputs are requested without population data", { minimal_data <- data.frame( month = 1, year = 2020, region = "A", mean_PM = 5, stringsAsFactors = FALSE ) local_mocked_bindings( load_wildfire_data = function(...) minimal_data ) expect_error( wildfire_do_analysis( health_path = "ignored.csv", join_wildfire_data = FALSE, date_col = "date", region_col = "region", shape_region_col = "region", mean_temperature_col = "temp", health_outcome_col = "deaths", pm_2_5_col = "pm25", calc_relative_risk_by_region = TRUE ), "Population data are required when calc_relative_risk_by_region = TRUE" ) }) test_that("wildfire_do_analysis errors when create_run_subdir is requested without output_folder_path", { minimal_data <- data.frame( month = 1, year = 2020, region = "A", mean_PM = 5, stringsAsFactors = FALSE ) local_mocked_bindings( load_wildfire_data = function(...) minimal_data ) expect_error( wildfire_do_analysis( health_path = "ignored.csv", join_wildfire_data = FALSE, date_col = "date", region_col = "region", shape_region_col = "region", mean_temperature_col = "temp", health_outcome_col = "deaths", pm_2_5_col = "pm25", create_run_subdir = TRUE ), "`output_folder_path` is required when `create_run_subdir = TRUE`" ) })