# skip("Requires network access - skipped to prevent downloading RSS.xml")
test_that("spod_available_data falls back to XML on S3 error (v1)", {
test_dir <- setup_test_data_dir()
withr::defer(unlink(test_dir, recursive = TRUE))
withr::local_envvar(c("SPANISH_OD_DATA_DIR" = test_dir))
# Get a valid URL and size from the real package data
real_data <- readRDS(system.file(
"extdata",
"available_data_v1.rds",
package = "spanishoddata"
))
target_entry <- real_data[1, ]
# Mock S3 failure
local_mocked_bindings(
spod_available_data_s3 = function(...) stop("S3 bucket unavailable"),
read_data_links_xml = function(...) {
tibble::tibble(
target_url = target_entry$target_url,
pub_ts = as.POSIXct("2020-02-14", tz = "UTC"),
file_size_bytes = 99999, # This should be ignored in favor of RDS
etag = target_entry$etag
)
}
)
# Should warn about S3 failure or just message?
msgs <- capture_messages(
capture.output(
res <- spod_available_data(ver = 1, quiet = FALSE),
file = NULL
)
)
expect_match(msgs, "S3 fetch failed", all = FALSE)
expect_true(nrow(res) == 1)
# Check that we got the size from RDS, not the 99999 from XML
expected_mb <- round(target_entry$true_remote_file_size_bytes / 1024^2, 2)
expect_equal(res$remote_file_size_mb, expected_mb)
})
test_that("spod_available_data v1 handles file size imputation", {
test_dir <- setup_test_data_dir()
withr::defer(unlink(test_dir, recursive = TRUE))
withr::local_envvar(c("SPANISH_OD_DATA_DIR" = test_dir))
# Mock S3 failure + XML return with missing file size
local_mocked_bindings(
spod_available_data_s3 = function(...) stop("S3 fail"),
read_data_links_xml = function(...) {
tibble::tibble(
target_url = "https://opendata-movilidad.mitma.es/maestra1-mitma-distritos/ficheros-diarios/2020-02/20200215_maestra1_mitma_distrito.txt.gz",
pub_ts = as.POSIXct("2020-02-15", tz = "UTC"),
file_size_bytes = NA_real_,
etag = "def"
)
}
)
# Note: The code calls readRDS(system.file(...)) to get known sizes.
# We use a URL that is likely not in the bundled RDS, so left_join results in NA size.
# This triggers the imputation logic.
msgs <- capture_messages(
capture.output(
res <- spod_available_data(ver = 1, quiet = FALSE),
file = NULL
)
)
expect_match(msgs, "S3 fetch failed", all = FALSE)
# Since we have only 1 file and it has NA size, and imputation takes mean of category.
# But if all are NA, mean is NA.
# Code says: files_table$remote_file_size_mb[is.na(...)] <- mean(...)
# If mean is NaN (0/0), it assigns NaN.
# Let's see if it crashes.
expect_true(nrow(res) == 1)
})
test_that("check_local_files = TRUE works", {
test_dir <- setup_test_data_dir()
withr::defer(unlink(test_dir, recursive = TRUE))
withr::local_envvar(c("SPANISH_OD_DATA_DIR" = test_dir))
# Mock to prevent real RSS.xml download
local_mocked_bindings(
spod_get_latest_v2_file_list = function(...) NULL,
spod_get_latest_v1_file_list = function(...) NULL
)
res <- spod_available_data(ver = 2, check_local_files = TRUE, quiet = TRUE)
expect_true("downloaded" %in% names(res))
expect_true("local_file_size" %in% names(res))
})
test_that("spod_available_data rejects invalid version number", {
test_dir <- withr::local_tempdir()
expect_error(
spod_available_data(ver = 3, data_dir = test_dir),
"Invalid version number.*Must be 1.*or 2"
)
expect_error(
spod_available_data(ver = 0, data_dir = test_dir),
"Invalid version number.*Must be 1.*or 2"
)
})
test_that("spod_get_latest_v1_file_list handles download failure", {
test_dir <- withr::local_tempdir()
# Mock download.file to fail
testthat::local_mocked_bindings(
download.file = function(...) stop("Network error"),
.package = "utils"
)
expect_error(
spod_get_latest_v1_file_list(data_dir = test_dir, quiet = TRUE),
"Network error"
)
})
test_that("spod_get_latest_v2_file_list handles download failure", {
test_dir <- withr::local_tempdir()
# Mock download.file to fail
testthat::local_mocked_bindings(
download.file = function(...) stop("Network error"),
.package = "utils"
)
expect_error(
spod_get_latest_v2_file_list(data_dir = test_dir, quiet = TRUE),
"Network error"
)
})
test_that("spod_available_data_v2 handles file size imputation with categories", {
test_dir <- setup_test_data_dir()
withr::defer(unlink(test_dir, recursive = TRUE))
withr::local_envvar(c("SPANISH_OD_DATA_DIR" = test_dir))
# Mock S3 failure and XML with missing sizes
testthat::local_mocked_bindings(
spod_available_data_s3 = function(...) stop("S3 unavailable"),
read_data_links_memoised = function(...) {
tibble::tibble(
target_url = c(
"https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/viajes/ficheros-diarios/2022-01/20220101_personas_distritos.csv.gz",
"https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/viajes/ficheros-diarios/2022-01/20220102_personas_distritos.csv.gz"
),
pub_ts = as.POSIXct(c("2022-01-01", "2022-01-02"), tz = "UTC"),
file_size_bytes = c(1000000, NA_real_),
etag = c("abc", "def")
)
}
)
msgs <- capture_messages(
capture.output(
res <- spod_available_data_v2(
data_dir = test_dir,
quiet = FALSE,
use_s3 = TRUE
),
file = NULL
)
)
expect_match(msgs, "S3 fetch failed", all = FALSE)
# Check imputation happened
expect_true(nrow(res) == 2)
expect_true("size_imputed" %in% names(res))
})
test_that("spod_available_data with use_s3=FALSE uses XML directly", {
test_dir <- setup_test_data_dir()
withr::defer(unlink(test_dir, recursive = TRUE))
withr::local_envvar(c("SPANISH_OD_DATA_DIR" = test_dir))
# Mock XML reading
mock_xml_called <- FALSE
testthat::local_mocked_bindings(
read_data_links_memoised = function(...) {
mock_xml_called <<- TRUE
tibble::tibble(
target_url = "https://test.com/file.csv.gz",
pub_ts = as.POSIXct("2022-01-01", tz = "UTC"),
file_size_bytes = 1000000,
etag = "abc"
)
}
)
res <- spod_available_data_v2(
data_dir = test_dir,
use_s3 = FALSE,
quiet = TRUE
)
expect_true(mock_xml_called)
expect_true(nrow(res) > 0)
})
test_that("read_data_links_xml with force=TRUE re-downloads", {
test_dir <- withr::local_tempdir()
metadata_folder <- file.path(test_dir, spod_subfolder_metadata_cache())
dir.create(metadata_folder, recursive = TRUE)
# Create an old XML file
old_xml <- file.path(
metadata_folder,
paste0("data_links_v1_", Sys.Date() - 1, ".xml")
)
writeLines(
"testMon, 14 Feb 2020 00:00:00 +0000",
old_xml
)
# Mock the download function
download_called <- FALSE
mock_download <- function(...) {
download_called <<- TRUE
new_xml <- file.path(
metadata_folder,
paste0("data_links_v1_", Sys.Date(), ".xml")
)
writeLines(
"newMon, 14 Feb 2020 00:00:00 +0000",
new_xml
)
return(new_xml)
}
res <- read_data_links_xml(
metadata_folder = metadata_folder,
data_dir = test_dir,
force = TRUE,
quiet = TRUE,
latest_file_function = mock_download,
ver = 1
)
expect_true(download_called)
expect_true(nrow(res) > 0)
})
test_that("read_data_links_xml uses cached file when not stale", {
test_dir <- withr::local_tempdir()
metadata_folder <- file.path(test_dir, spod_subfolder_metadata_cache())
dir.create(metadata_folder, recursive = TRUE)
# Create a fresh XML file (today's date)
fresh_xml <- file.path(
metadata_folder,
paste0("data_links_v1_", Sys.Date(), ".xml")
)
writeLines(
"cachedMon, 14 Feb 2020 00:00:00 +0000",
fresh_xml
)
# Mock the download function - should NOT be called
download_called <- FALSE
mock_download <- function(...) {
download_called <<- TRUE
stop("Should not download!")
}
res <- read_data_links_xml(
metadata_folder = metadata_folder,
data_dir = test_dir,
force = FALSE,
quiet = TRUE,
latest_file_function = mock_download,
ver = 1
)
expect_false(download_called)
expect_true(nrow(res) > 0)
expect_equal(res$target_url[1], "cached")
})
test_that("spod_available_data v2 falls back to XML on S3 error", {
test_dir <- setup_test_data_dir()
withr::defer(unlink(test_dir, recursive = TRUE))
withr::local_envvar(c("SPANISH_OD_DATA_DIR" = test_dir))
# Mock S3 to fail and prevent real XML download
testthat::local_mocked_bindings(
spod_available_data_s3 = function(...) stop("S3 error"),
spod_get_latest_v2_file_list = function(...) {
xml_file <- file.path(test_dir, "metadata/data_links_v2_2024-01-01.xml")
dir.create(dirname(xml_file), recursive = TRUE, showWarnings = FALSE)
writeLines(
"- https://test.com/file.csv.gzMon, 01 Jan 2024 00:00:00 +0000
",
xml_file
)
return(xml_file)
}
)
# Should fall back to XML and show message
msgs <- capture_messages(
capture.output(
res <- spod_available_data(ver = 2, use_s3 = TRUE, quiet = FALSE),
file = NULL
)
)
expect_match(msgs, "S3 fetch failed", all = FALSE)
expect_s3_class(res, "data.frame")
})