# Tests for R/anda.R — ANDA5 integration (pure functions + mocks)
# ── .ech_catalog_ids ───────────────────────────────────────────────────────────
test_that(".ech_catalog_ids contains expected editions", {
ids <- .ech_catalog_ids
expect_true(is.list(ids))
expect_true("2023" %in% names(ids))
expect_true("2024" %in% names(ids))
expect_true("2007" %in% names(ids))
expect_equal(ids[["2023"]], 735L)
expect_equal(ids[["2024"]], 767L)
})
test_that("anda_list_editions returns data.frame with all editions", {
df <- anda_list_editions()
expect_true(is.data.frame(df))
expect_true("edition" %in% names(df))
expect_true("catalog_id" %in% names(df))
expect_true("2023" %in% df$edition)
expect_true(nrow(df) >= 18)
})
# ── .anda_parse_resources ──────────────────────────────────────────────────────
test_that(".anda_parse_resources parses HTML with data-file-id", {
html <- paste0(
'Download',
'Download',
'Download'
)
resources <- .anda_parse_resources(html, 735)
expect_true(is.data.frame(resources))
expect_equal(nrow(resources), 3)
expect_equal(resources$id, c("1264", "1265", "1270"))
expect_equal(resources$title[1], "Implantacion 2023")
})
test_that(".anda_parse_resources returns empty data.frame for no matches", {
html <- "
No resources here"
resources <- .anda_parse_resources(html, 735)
expect_true(is.data.frame(resources))
expect_equal(nrow(resources), 0)
})
# ── .anda_select_resource ──────────────────────────────────────────────────────
test_that(".anda_select_resource selects implantation for >= 2022", {
resources <- data.frame(
id = c("1241", "1264", "1265"),
title = c("FIES 2023", "Implantacion 2023", "ECH_01_2023"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "implantation", "2023")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "1264")
})
test_that(".anda_select_resource finds ECH_YYYY.csv when no implantacion label", {
# ECH 2024 pattern: no "implantacion" label, just "ECH_2024.csv"
resources <- data.frame(
id = c("100", "101", "102", "103"),
title = c("ECH_01_24.csv", "ECH_02_24.csv", "ECH_2024.csv", "FIES_2024.csv"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "implantation", "2024")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "102")
})
test_that(".anda_select_resource prefers SAV for < 2022, excludes FIES", {
resources <- data.frame(
id = c("100", "101", "102", "103"),
title = c("FIES 2018", "ECH_2018_sav", "ECH_2018.csv", "Estrato"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "implantation", "2018")
expect_equal(nrow(selected), 1)
# Should select CSV or SAV, not FIES or Estrato
expect_true(selected$id %in% c("101", "102"))
})
test_that(".anda_select_resource selects monthly files", {
resources <- data.frame(
id = c("1264", "1265", "1266", "1267"),
title = c("Implantacion 2023", "ech_01_2023", "ech_02_2023", "Bootstrap"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "monthly", "2023")
expect_equal(nrow(selected), 2)
expect_true(all(selected$id %in% c("1265", "1266")))
})
test_that(".anda_select_resource errors for missing monthly in pre-2022", {
resources <- data.frame(
id = c("100"),
title = c("ECH_2018"),
stringsAsFactors = FALSE
)
expect_error(
.anda_select_resource(resources, "monthly", "2018"),
"No monthly files"
)
})
test_that(".anda_select_resource selects bootstrap_annual", {
resources <- data.frame(
id = c("1264", "1270"),
title = c("Implantacion 2023", "Pesos replicados Bootstrap anuales 2023"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "bootstrap_annual", "2023")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "1270")
})
test_that(".anda_select_resource selects bootstrap_monthly", {
resources <- data.frame(
id = c("1264", "1271"),
title = c("Implantacion 2023", "Pesos replicados Bootstrap mensuales enero_junio 2023"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "bootstrap_monthly", "2023")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "1271")
})
test_that(".anda_select_resource selects bootstrap_quarterly", {
resources <- data.frame(
id = c("1264", "1272"),
title = c("Implantacion 2023", "Pesos replicados Bootstrap trimestrales 2023"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "bootstrap_quarterly", "2023")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "1272")
})
test_that(".anda_select_resource selects poverty data", {
resources <- data.frame(
id = c("1264", "1280"),
title = c("Implantacion 2023", "Microdatos_LP 2023"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "poverty", "2023")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "1280")
})
test_that(".anda_select_resource errors for unknown resource type", {
resources <- data.frame(id = "1", title = "Test", stringsAsFactors = FALSE)
expect_error(.anda_select_resource(resources, "unknown_type", "2023"), "Unknown resource")
})
# ── .anda_extract_file ─────────────────────────────────────────────────────────
test_that(".anda_extract_file handles CSV (plain text)", {
tmp_dir <- tempdir()
csv_path <- file.path(tmp_dir, "test_raw_csv")
writeLines("col1,col2\n1,2\n3,4", csv_path)
on.exit(unlink(file.path(tmp_dir, "ech_test.csv")))
result <- .anda_extract_file(csv_path, tmp_dir, "test")
expect_true(file.exists(result))
expect_match(result, "\\.csv$")
})
test_that(".anda_extract_file handles ZIP archive", {
tmp_dir <- tempfile("anda_zip_test_")
dir.create(tmp_dir, recursive = TRUE)
on.exit(unlink(tmp_dir, recursive = TRUE))
# Create a CSV inside a ZIP
csv_file <- file.path(tmp_dir, "data.csv")
writeLines("x,y\n1,2\n3,4", csv_file)
zip_path <- file.path(tmp_dir, "test_raw_zip.zip")
utils::zip(zip_path, csv_file, flags = "-j")
unlink(csv_file)
# Rename to raw (no extension) as anda_extract_file expects
raw_path <- file.path(tmp_dir, "test_raw_zip")
file.rename(zip_path, raw_path)
result <- .anda_extract_file(raw_path, tmp_dir, "ziptest")
expect_true(file.exists(result))
expect_match(result, "\\.csv$")
})
# ── .anda_find_data_file ──────────────────────────────────────────────────────
test_that(".anda_find_data_file prefers CSV over SAV", {
tmp_dir <- tempfile("anda_find_test_")
dir.create(tmp_dir, recursive = TRUE)
on.exit(unlink(tmp_dir, recursive = TRUE))
writeLines("x,y\n1,2", file.path(tmp_dir, "data.csv"))
writeLines("fake sav content", file.path(tmp_dir, "data.sav"))
result <- .anda_find_data_file(tmp_dir, "test")
expect_match(result, "\\.csv$")
})
test_that(".anda_find_data_file errors on empty directory", {
tmp_dir <- tempfile("anda_empty_test_")
dir.create(tmp_dir, recursive = TRUE)
on.exit(unlink(tmp_dir, recursive = TRUE))
expect_error(.anda_find_data_file(tmp_dir, "test"), "empty")
})
# ── anda_parse_variables (DDI XML parsing) ─────────────────────────────────────
test_that("anda_parse_variables parses DDI XML with variables", {
skip_if_not_installed("xml2")
ddi_xml <- '
Edad en años
Edad del encuestado en años cumplidos
Sexo
1
Hombre
2
Mujer
Región
'
tmp <- tempfile(fileext = ".xml")
on.exit(unlink(tmp))
writeLines(ddi_xml, tmp)
result <- anda_parse_variables(tmp)
expect_length(result, 3)
# EDAD: continuous, with label and description
expect_equal(result[[1]]$name, "edad")
expect_equal(result[[1]]$label, "Edad en años")
expect_equal(result[[1]]$type, "continuous")
expect_equal(result[[1]]$description, "Edad del encuestado en años cumplidos")
expect_null(result[[1]]$value_labels)
# SEXO: discrete, with value labels
expect_equal(result[[2]]$name, "sexo")
expect_equal(result[[2]]$type, "discrete")
expect_equal(result[[2]]$value_labels[["1"]], "Hombre")
expect_equal(result[[2]]$value_labels[["2"]], "Mujer")
# REGION: discrete, no value labels
expect_equal(result[[3]]$name, "region")
})
test_that("anda_parse_variables handles empty dataDscr", {
skip_if_not_installed("xml2")
ddi_xml <- '
'
tmp <- tempfile(fileext = ".xml")
on.exit(unlink(tmp))
writeLines(ddi_xml, tmp)
expect_warning(result <- anda_parse_variables(tmp), "No variables found")
expect_length(result, 0)
})
test_that("anda_parse_variables deduplicates by name", {
skip_if_not_installed("xml2")
ddi_xml <- '
Age 1
Age 2
Sex
'
tmp <- tempfile(fileext = ".xml")
on.exit(unlink(tmp))
writeLines(ddi_xml, tmp)
result <- anda_parse_variables(tmp)
expect_length(result, 2) # EDAD deduplicated
})
# ── .anda_extract_file SAV detection ──────────────────────────────────────────
test_that(".anda_extract_file detects SAV by magic bytes", {
tmp_dir <- tempfile("anda_sav_test_")
dir.create(tmp_dir, recursive = TRUE)
on.exit(unlink(tmp_dir, recursive = TRUE))
# Create fake SAV file (magic: $FL2)
sav_path <- file.path(tmp_dir, "test_raw_sav")
con <- file(sav_path, "wb")
writeBin(charToRaw("$FL2fake SAV content"), con)
close(con)
result <- .anda_extract_file(sav_path, tmp_dir, "savtest")
expect_true(file.exists(result))
expect_match(result, "\\.sav$")
})
# ── .anda_find_data_file prefers SAV over XLSX ─────────────────────────────────
test_that(".anda_find_data_file prefers SAV over XLSX when no CSV", {
tmp_dir <- tempfile("anda_find_sav_test_")
dir.create(tmp_dir, recursive = TRUE)
on.exit(unlink(tmp_dir, recursive = TRUE))
writeLines("fake sav", file.path(tmp_dir, "data.sav"))
writeLines("fake xlsx", file.path(tmp_dir, "data.xlsx"))
result <- .anda_find_data_file(tmp_dir, "test")
expect_match(result, "\\.sav$")
})
# ── anda_download_microdata error cases ────────────────────────────────────────
test_that("anda_download_microdata errors for unknown edition", {
expect_error(anda_download_microdata("1999"), "Unknown ECH edition")
})
# ── anda_variables (mock api_get_anda_variables) ──────────────────────────────
test_that("anda_variables returns data.frame from API", {
local_mocked_bindings(
api_get_anda_variables = function(survey_type, var_names) {
list(
list(name = "edad", label = "Edad", type = "continuous"),
list(name = "sexo", label = "Sexo", type = "discrete")
)
}
)
df <- anda_variables("ech", c("edad", "sexo"))
expect_true(is.data.frame(df))
expect_equal(nrow(df), 2)
expect_equal(df$name, c("edad", "sexo"))
expect_equal(df$label, c("Edad", "Sexo"))
})
test_that("anda_variables returns empty data.frame when no results", {
local_mocked_bindings(
api_get_anda_variables = function(survey_type, var_names) {
list()
}
)
df <- anda_variables("ech")
expect_true(is.data.frame(df))
expect_equal(nrow(df), 0)
})
test_that("anda_variable_detail returns single variable", {
local_mocked_bindings(
api_get_anda_variables = function(survey_type, var_names) {
list(
list(name = "edad", label = "Edad", type = "continuous", description = "Age in years")
)
}
)
result <- anda_variable_detail("ech", "edad")
expect_true(is.list(result))
expect_equal(result$name, "edad")
expect_equal(result$description, "Age in years")
})
test_that("anda_variable_detail returns NULL when not found", {
local_mocked_bindings(
api_get_anda_variables = function(survey_type, var_names) {
list()
}
)
result <- anda_variable_detail("ech", "nonexistent")
expect_null(result)
})
# ── .anda_select_resource: bootstrap_semestral ───────────────────────────────
test_that(".anda_select_resource selects bootstrap_semestral", {
resources <- data.frame(
id = c("1264", "1275"),
title = c("Implantacion 2023", "Pesos replicados Bootstrap semestrales 2023"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "bootstrap_semestral", "2023")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "1275")
})
test_that(".anda_select_resource errors when no bootstrap_semestral found", {
resources <- data.frame(
id = "1264", title = "Implantacion 2023", stringsAsFactors = FALSE
)
expect_error(
.anda_select_resource(resources, "bootstrap_semestral", "2023"),
"No semestral bootstrap"
)
})
# ── .anda_select_resource: implantation pre-2022 paths ──────────────────────
test_that(".anda_select_resource pre-2022 prefers CSV over SAV", {
resources <- data.frame(
id = c("100", "101", "102"),
title = c("FIES_2018.csv", "ECH_2018.csv", "ECH_2018_sav"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "implantation", "2018")
expect_equal(nrow(selected), 1)
# Should select ECH_2018.csv (not FIES)
expect_equal(selected$id, "101")
})
test_that(".anda_select_resource pre-2022 falls back to SAV when no CSV", {
resources <- data.frame(
id = c("100", "101"),
title = c("FIES_2018", "ECH_2018_sav"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "implantation", "2018")
expect_equal(nrow(selected), 1)
expect_equal(selected$id, "101")
})
test_that(".anda_select_resource pre-2022 fallback to first non-excluded", {
resources <- data.frame(
id = c("100", "101"),
title = c("FIES_2018", "ECH_raw_2018"),
stringsAsFactors = FALSE
)
selected <- .anda_select_resource(resources, "implantation", "2018")
expect_equal(nrow(selected), 1)
# FIES is excluded, so selects ECH_raw_2018
expect_equal(selected$id, "101")
})
test_that(".anda_select_resource >= 2022 errors when no implantation found", {
resources <- data.frame(
id = c("100", "101"),
title = c("ech_01_2023", "ech_02_2023"),
stringsAsFactors = FALSE
)
expect_error(
.anda_select_resource(resources, "implantation", "2023"),
"No implantation file found"
)
})
# ── .anda_extract_file: RAR detection branch ─────────────────────────────────
test_that(".anda_extract_file detects RAR but errors without archive package", {
tmp_dir <- tempfile("anda_rar_test_")
dir.create(tmp_dir, recursive = TRUE)
on.exit(unlink(tmp_dir, recursive = TRUE))
# Create file with RAR magic bytes
rar_path <- file.path(tmp_dir, "test_raw_rar")
con <- file(rar_path, "wb")
writeBin(as.raw(c(0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x00)), con)
writeBin(charToRaw("fake rar content"), con)
close(con)
# If archive package is not installed, should error; otherwise would extract
local_mocked_bindings(
requireNamespace = function(pkg, ...) FALSE,
.package = "base"
)
expect_error(.anda_extract_file(rar_path, tmp_dir, "rartest"), "archive")
})
# ── .anda_find_data_file: XLSX fallback ──────────────────────────────────────
test_that(".anda_find_data_file falls back to XLSX when no CSV/SAV", {
tmp_dir <- tempfile("anda_xlsx_test_")
dir.create(tmp_dir, recursive = TRUE)
on.exit(unlink(tmp_dir, recursive = TRUE))
writeLines("fake xlsx", file.path(tmp_dir, "data.xlsx"))
result <- .anda_find_data_file(tmp_dir, "test")
expect_match(result, "\\.xlsx$")
})
# ── anda_fetch_ddi / anda_catalog_search: mocked HTTP ────────────────────────
test_that("anda_fetch_ddi errors on non-200 response", {
local_mocked_bindings(
req_perform = function(req, ...) {
structure(list(
status_code = 404L,
body = raw(0),
headers = list()
), class = "httr2_response")
},
resp_status = function(resp, ...) 404L,
.package = "httr2"
)
expect_error(anda_fetch_ddi(999), "Failed to download DDI")
})
test_that("anda_catalog_search returns data.frame on success", {
mock_body <- jsonlite::toJSON(list(result = list(rows = list(
list(id = 735, title = "ECH 2023", year_start = "2023", year_end = "2023"),
list(id = 767, title = "ECH 2024", year_start = "2024", year_end = "2024")
))), auto_unbox = TRUE)
local_mocked_bindings(
req_perform = function(req, ...) {
structure(list(
status_code = 200L,
body = charToRaw(mock_body),
headers = list()
), class = "httr2_response")
},
resp_status = function(resp, ...) 200L,
resp_body_json = function(resp, ...) {
jsonlite::fromJSON(rawToChar(resp$body), simplifyVector = FALSE)
},
.package = "httr2"
)
result <- anda_catalog_search("ECH")
expect_true(is.data.frame(result))
expect_equal(nrow(result), 2)
expect_true("title" %in% names(result))
})
test_that("anda_catalog_search errors on non-200", {
local_mocked_bindings(
req_perform = function(req, ...) {
structure(list(
status_code = 500L,
body = raw(0),
headers = list()
), class = "httr2_response")
},
resp_status = function(resp, ...) 500L,
.package = "httr2"
)
expect_error(anda_catalog_search("ECH"), "catalog search failed")
})
test_that("anda_catalog_search returns empty df when no results", {
mock_body <- jsonlite::toJSON(
list(result = list(rows = list())),
auto_unbox = TRUE
)
local_mocked_bindings(
req_perform = function(req, ...) {
structure(list(
status_code = 200L,
body = charToRaw(mock_body),
headers = list()
), class = "httr2_response")
},
resp_status = function(resp, ...) 200L,
resp_body_json = function(resp, ...) {
jsonlite::fromJSON(rawToChar(resp$body), simplifyVector = FALSE)
},
.package = "httr2"
)
result <- anda_catalog_search("nonexistent")
expect_true(is.data.frame(result))
expect_equal(nrow(result), 0)
})