cleanup_tmb_artifacts <- function() { # Find and remove symbols.rds files in R check directories temp_files <- list.files( path = tempdir(), pattern = "symbols.rds$", full.names = TRUE, recursive = TRUE ) # Filter to only include files in .Rcheck directories rcheck_files <- temp_files[grep("\\.Rcheck", temp_files)] # Remove files if (length(rcheck_files) > 0) { message( "Removing ", length(rcheck_files), " TMB artifacts in .Rcheck directories" ) for (file in rcheck_files) { tryCatch( { unlink(file, force = TRUE) message("Removed: ", file) }, error = function(e) { warning("Could not remove file: ", file, "\n", e$message) } ) } } # Additional cleanup for local symbols.rds if (file.exists("symbols.rds")) { unlink("symbols.rds", force = TRUE) } } testthat::test_that("process_dhs_data correctly processes DHS survey data", { # Setup temporary test environment # Create temp directory with normalized path tf <- file.path(tempdir(), "test_env") dir.create(tf, recursive = TRUE, showWarnings = FALSE) tmp_rds_dir <- file.path(tf, "rds") tmp_shp_dir <- file.path(tf, "shp") tmp_output <- file.path(tf, "output.rds") dir.create(tmp_rds_dir, recursive = TRUE, showWarnings = FALSE) dir.create(tmp_shp_dir, recursive = TRUE, showWarnings = FALSE) on.exit( { unlink(tf, recursive = TRUE) cleanup_tmb_artifacts() }, add = TRUE ) # Create fake DHS data create_fake_dhs_data <- function(country_code) { set.seed(123) # For reproducibility n <- 100 # Create labelled vectors hv007 <- haven::labelled( sample(c(2015, 2016), n, replace = TRUE), labels = c("2015" = 2015, "2016" = 2016) ) hv001 <- haven::labelled( sample(1:20, n, replace = TRUE), labels = setNames(1:20, paste("Cluster", 1:20)) ) hv105 <- haven::labelled( sample(c(1:97, 98), n, replace = TRUE), labels = c(setNames(1:97, paste("Age", 1:97)), "Don't know" = 98) ) # Combine into data frame data.frame( hv007 = hv007, hv001 = hv001, hv105 = hv105 ) } # Create fake shapefile data # Create fake shapefile data with explicit CRS create_fake_shapefile <- function(country_code) { set.seed(123) n_clusters <- 20 # Create spatial data frame with explicit CRS sf_data <- sf::st_as_sf( dplyr::tibble( DHSCLUST = 1:n_clusters, URBAN_RURA = sample(c("R", "U"), n_clusters, replace = TRUE), LATNUM = runif(n_clusters, -10, 10), LONGNUM = runif(n_clusters, -10, 10) ), coords = c("LONGNUM", "LATNUM"), crs = 4326 # WGS84 ) |> dplyr::mutate( LATNUM = runif(n_clusters, -10, 10), LONGNUM = runif(n_clusters, -10, 10) ) } # Save test data for two countries countries <- c("KE", "TZ") for (country in countries) { saveRDS( create_fake_dhs_data(country), file = file.path(tmp_rds_dir, paste0(country, "HR71FL.rds")) ) saveRDS( create_fake_shapefile(country), file = file.path(tmp_shp_dir, paste0(country, "HR7SHP.rds")) ) } # Run the function process_dhs_data( rds_dir = tmp_rds_dir, shp_dir = tmp_shp_dir, output_path = tmp_output ) # Read results results <- readRDS(tmp_output) # Test expectations testthat::expect_true(file.exists(tmp_output)) testthat::expect_named( results, c("outlier_free_data", "age_param_data") ) # Test outlier_free_data structure testthat::expect_true( is.data.frame(results$outlier_free_data) ) testthat::expect_true(all( c( "country", "country_code_iso3", "country_code_dhs", "year_of_survey", "dhs_clust_num", "ageyrs", "survey_code", "urban", "lat", "long", "country_year" ) %in% names(results$outlier_free_data) )) # Test age_param_data structure testthat::expect_true(is.data.frame(results$age_param_data)) testthat::expect_true(all( c( "country", "country_code_iso3", "country_code_dhs", "year_of_survey", "survey_code", "urban", "lat", "lon", "b1", "country_year" ) %in% names(results$age_param_data) )) # Test data quality testthat::expect_true(all(results$outlier_free_data$ageyrs != 98)) testthat::expect_true(all(!is.na(results$outlier_free_data$country_code_dhs))) testthat::expect_true(all(!is.na(results$age_param_data$country_code_dhs))) # Test for expected number of countries testthat::expect_equal( length(unique(results$outlier_free_data$country_code_dhs)), length(countries) ) }) testthat::test_that("Test the full pipeline works", { # Skip on CRAN and if TMB not available testthat::skip_on_cran() testthat::skip_if( !requireNamespace("TMB", quietly = TRUE), "TMB not available" ) country_codeiso <- "GMB" suppressWarnings({ # Create temp directory with normalized path tf <- file.path(tempdir(), "test_env") dir.create(tf, recursive = TRUE, showWarnings = FALSE) # Initialize with normalized path cpp_path <- file.path(tf, "02_scripts", "model") dir.create(cpp_path, recursive = TRUE, showWarnings = FALSE) cpp_path <- normalizePath(cpp_path, winslash = "/", mustWork = FALSE) init( r_script_name = "full_pipeline.R", cpp_script_name = "model.cpp", path = tf, open_r_script = FALSE ) # Simulate and save processed survey dataset for Gambia AgePopDenom::simulate_dummy_dhs_pr( country = "Gambia", country_code_iso3 = country_codeiso, country_code_dhs = "GM", year_of_survey = 2024, output_path = here::here( tf, "01_data", "1a_survey_data", "processed", "dhs_pr_records_combined.rds" ) |> normalizePath(winslash = "/", mustWork = FALSE) ) # Download shapefiles download_shapefile( country_codes = country_codeiso, dest_file = file.path( tf, "01_data", "1c_shapefiles", "district_shape.gpkg" ) |> normalizePath(winslash = "/", mustWork = FALSE) ) # Download population rasters from worldpop download_pop_rasters( country_codes = country_codeiso, dest_dir = file.path(tf, "01_data", "1b_rasters", "pop_raster") |> normalizePath(winslash = "/", mustWork = FALSE) ) # Extract urban extent raster extract_afurextent( dest_dir = file.path(tf, "01_data", "1b_rasters", "urban_extent") |> normalizePath(winslash = "/", mustWork = FALSE) ) # Modelling -------------------------------------------------------------- run_full_workflow( country_code = country_codeiso, survey_data_path = file.path( tf, "01_data", "1a_survey_data", "processed" ) |> normalizePath(winslash = "/", mustWork = FALSE), survey_data_suffix = "dhs_pr_records_combined.rds", shape_path = file.path( tf, "01_data", "1c_shapefiles" ) |> normalizePath(winslash = "/", mustWork = FALSE), shape_suffix = "district_shape.gpkg", pop_raster_path = file.path( tf, "01_data", "1b_rasters", "pop_raster" ) |> normalizePath(winslash = "/", mustWork = FALSE), pop_raster_suffix = "_ppp_2020_constrained.tif", ur_raster_path = file.path( tf, "01_data", "1b_rasters", "urban_extent" ) |> normalizePath(winslash = "/", mustWork = FALSE), ur_raster_suffix = "afurextent.asc", pred_save_file = FALSE, raster_width = 2500, raster_height = 2000, raster_resolution = 300, save_raster = TRUE, pyramid_line_color = "#67000d", pyramid_fill_high = "#fee0d2", pyramid_fill_low = "#a50f15", pyramid_caption = paste0( "Note: Total population includes ", "ages 99+, pyramid shows ages 0-99" ), generate_pop_raster = TRUE, output_paths = list( model = file.path(tf, "03_outputs", "3a_model_outputs"), plot = file.path(tf, "03_outputs", "3b_visualizations"), raster = file.path(tf, "03_outputs", "3c_raster_outputs"), table = file.path(tf, "03_outputs", "3c_table_outputs"), compiled = file.path(tf, "03_outputs", "3d_compiled_results"), excel = file.path( tf, "03_outputs", "3d_compiled_results", "age_pop_denom_compiled.xlsx" ), log = file.path( tf, "03_outputs", "3a_model_outputs", "modelling_log.rds" ) ) |> lapply(\(x) normalizePath(x, winslash = "/", mustWork = FALSE)), model_params = list( cell_size = 5000, n_sim = 100, ignore_cache = FALSE, age_range = c(0, 1), age_interval = 1, return_prop = TRUE, scale_outcome = "log_scale", shape_outcome = "log_shape", covariates = "urban", cpp_script = file.path(tf, "02_scripts", "model") |> normalizePath(winslash = "/", mustWork = FALSE), control_params = list(trace = 2), manual_params = NULL, verbose = TRUE ), return_results = FALSE, n_cores = 1 ) # Model outputs test ----------------------------------------------------- # Test existence of model output files testthat::expect_true( file.exists(file.path( tf, "03_outputs", "3a_model_outputs", "gmb_age_param_spatial.rds" )) ) testthat::expect_true( file.exists(file.path( tf, "03_outputs", "3a_model_outputs", "gmb_predictor_data.rds" )) ) # Test model output contents model_output <- readRDS(file.path( tf, "03_outputs", "3a_model_outputs", "gmb_age_param_spatial.rds" )) testthat::expect_type(model_output, "list") testthat::expect_true(all( c( "par", "objective", "convergence", "iterations", "evaluations", "message", "scale_formula", "shape_formula", "variogram" ) %in% names(model_output) )) # Test for variogram components testthat::expect_true("variogram" %in% names(model_output)) testthat::expect_type(model_output$variogram, "list") predictor_data <- readRDS(file.path( tf, "03_outputs", "3a_model_outputs", "gmb_predictor_data.rds" )) testthat::expect_s3_class(predictor_data, "data.frame") testthat::expect_true(all( c( "urban", "web_x", "web_y", "pop", "urban", "country", "region", "district", "country_code" ) %in% names(predictor_data) )) # Visualization outputs test testthat::expect_true( file.exists(file.path( tf, "03_outputs", "3b_visualizations", "gmb_gamma_prediction_rasters.png" )) ) testthat::expect_true( file.exists(file.path( tf, "03_outputs", "3b_visualizations", "gmb_age_pyramid_count.png" )) ) testthat::expect_true( file.exists(file.path( tf, "03_outputs", "3b_visualizations", "gmb_age_pyramid_prop.png" )) ) testthat::expect_true( file.exists(file.path( tf, "03_outputs", "3b_visualizations", "gmb_variogram.png" )) ) # Test image properties for (img_file in c( "gmb_gamma_prediction_rasters.png", "gmb_age_pyramid_count.png", "gmb_age_pyramid_prop.png" )) { img_path <- file.path(tf, "03_outputs", "3b_visualizations", img_file) img_info <- file.info(img_path) testthat::expect_gt(img_info$size, 0) } # Table outputs test table_path <- file.path( tf, "03_outputs", "3c_table_outputs", "gmb_age_tables_pop_0_1plus_yrs_by_1yrs.rds" ) testthat::expect_true(file.exists(table_path)) # Test table contents age_tables <- readRDS(table_path) testthat::expect_type(age_tables, "list") testthat::expect_true(all( c("prop_df", "pop_df") %in% names(age_tables) )) testthat::expect_true(all( c( "country", "region", "district", "popsize" ) %in% c( names(age_tables$prop_df), names(age_tables$pop_df) ) )) # Raster outputs test rast_path <- file.path( tf, "03_outputs", "3b_visualizations", "gmb_age_pop_grid_0_10_yrs_by_1yrs.tif" ) testthat::expect_true(file.exists(table_path)) # Check if output is raster rast <- terra::rast(rast_path) testthat::expect_equal(class(rast)[1], "SpatRaster") # Compiled results test excel_path <- file.path( tf, "03_outputs", "3d_compiled_results", "age_pop_denom_compiled.xlsx" ) testthat::expect_true(file.exists(excel_path)) testthat::expect_gt(file.info(excel_path)$size, 0) params_path <- file.path( tf, "03_outputs", "3d_compiled_results", "afro_model_params.csv" ) testthat::expect_true(file.exists(params_path)) # Test CSV contents params_data <- read.csv(params_path) testthat::expect_true(all( c( "country", "beta1", "beta2", "gamma", "log_sigma2", "log_phi", "log_tau1", "log_likelihood", "convergence", "iterations", "eval_function", "eval_gradient", "message" ) %in% names(params_data) )) testthat::expect_equal(params_data$country[1], "GMB") # Test log file log_path <- file.path( tf, "03_outputs", "3a_model_outputs", "modelling_log.rds" ) testthat::expect_true(file.exists(log_path)) log_data <- readRDS(log_path) testthat::expect_type(log_data, "list") }) }) testthat::test_that("simulate_dummy_dhs_pr() creates expected DHS dataset", { # setup: temporary output path temp_path <- tempfile(fileext = ".rds") # run simulation AgePopDenom::simulate_dummy_dhs_pr( total_population = 100, output_path = temp_path, seed = 42 ) # check file was created testthat::expect_true(file.exists(temp_path)) # read the output result <- readRDS(temp_path) # check structure testthat::expect_type(result, "list") testthat::expect_named(result, "age_param_data") testthat::expect_s3_class(result$age_param_data, "tbl_df") # check column names expected_cols <- c( "country", "country_code_iso3", "country_code_dhs", "year_of_survey", "id_coords", "lon", "lat", "web_x", "web_y", "log_scale", "log_shape", "urban", "b1", "c", "b2", "nsampled" ) testthat::expect_true(all(expected_cols %in% names(result$age_param_data))) # cleanup unlink(temp_path) }) testthat::test_that("autofitVariogram() returns valid variogram model", { testthat::skip_if_not_installed("sp") # prepare data data("meuse", package = "sp") meuse_sf <- sf::st_as_sf(meuse, coords = c("x", "y"), crs = 28992) # run function result <- AgePopDenom:::autofitVariogram(zinc ~ 1, meuse_sf) # basic checks testthat::expect_s3_class(result, "autofitVariogram") testthat::expect_type(result, "list") testthat::expect_named( result, c("exp_var", "var_model", "sserr"), ignore.order = TRUE ) # check contents testthat::expect_s3_class(result$exp_var, "data.frame") testthat::expect_true(nrow(result$exp_var) > 1) testthat::expect_s3_class(result$var_model, "variogramModel") testthat::expect_true(is.numeric(result$sserr)) }) testthat::test_that("autofitVariogram() prints expected output in verbose mode", { testthat::skip_if_not_installed("sp") testthat::skip_if_not_installed("sf") testthat::skip_if_not_installed("gstat") data("meuse", package = "sp") meuse_sf <- sf::st_as_sf(meuse, coords = c("x", "y"), crs = 28992) testthat::expect_output( AgePopDenom:::autofitVariogram(zinc ~ 1, meuse_sf, verbose = TRUE), regexp = "Selected:" ) testthat::expect_output( AgePopDenom:::autofitVariogram(zinc ~ 1, meuse_sf, verbose = TRUE), regexp = "Tested models, best first:" ) }) cleanup_tmb_artifacts()