# Tests for .compute_derived_series() and .DERIVED_SERIES_SPEC # These tests verify the derived series computation engine (aggregates, average # income, residuals, rates, deflated) and the structural integrity of the # declarative specification that drives it. library(data.table) # ============================================================================= # PHASE 1: AGGREGATE TESTS # ============================================================================= test_that("aggregate: popocup + popdesocup = popnaforca", { dt <- data.table( anomesexato = 202301:202310, m_popocup = c(100, 105, 110, 108, 112, 115, 118, 120, 122, 125), m_popdesocup = c(10, 12, 11, 13, 9, 8, 10, 11, 12, 10) ) result <- PNADCperiods:::.compute_derived_series(copy(dt)) expect_true("m_popnaforca" %in% names(result)) expect_equal(result$m_popnaforca, dt$m_popocup + dt$m_popdesocup) }) test_that("aggregate dependency chain: empregpriv built first, then empregado uses it", { dt <- data.table( anomesexato = 202301:202310, # Components for empregpriv m_empregprivcomcart = seq(40, 49), m_empregprivsemcart = seq(10, 19), # Components for domestico m_domesticocomcart = seq(5, 14), m_domesticosemcart = seq(2, 11), # Components for empregpubl m_empregpublcomcart = seq(8, 17), m_empregpublsemcart = seq(3, 12), m_estatutmilitar = seq(1, 10) ) result <- PNADCperiods:::.compute_derived_series(copy(dt)) # empregpriv should be computed first expect_true("m_empregpriv" %in% names(result)) expected_empregpriv <- dt$m_empregprivcomcart + dt$m_empregprivsemcart expect_equal(result$m_empregpriv, expected_empregpriv) # domestico should also be computed expect_true("m_domestico" %in% names(result)) expected_domestico <- dt$m_domesticocomcart + dt$m_domesticosemcart expect_equal(result$m_domestico, expected_domestico) # empregpubl depends on 3 components expect_true("m_empregpubl" %in% names(result)) expected_empregpubl <- dt$m_empregpublcomcart + dt$m_empregpublsemcart + dt$m_estatutmilitar expect_equal(result$m_empregpubl, expected_empregpubl) # empregado = empregpriv + domestico + empregpubl (all computed above) expect_true("m_empregado" %in% names(result)) expected_empregado <- expected_empregpriv + expected_domestico + expected_empregpubl expect_equal(result$m_empregado, expected_empregado) }) # ============================================================================= # PHASE 2: AVERAGE INCOME TESTS # ============================================================================= test_that("average income: rendhabnominaltodos = massahabnominaltodos / comrendtodos * 1000", { dt <- data.table( anomesexato = 202301:202310, m_massahabnominaltodos = c(250e6, 260e6, 255e6, 265e6, 270e6, 275e6, 280e6, 285e6, 290e6, 295e6), m_comrendtodos = c(90e3, 91e3, 92e3, 93e3, 94e3, 95e3, 96e3, 97e3, 98e3, 99e3) ) result <- PNADCperiods:::.compute_derived_series(copy(dt)) expect_true("m_rendhabnominaltodos" %in% names(result)) expected <- round(dt$m_massahabnominaltodos / dt$m_comrendtodos * 1000, 0) expect_equal(result$m_rendhabnominaltodos, expected) }) # ============================================================================= # PHASE 3: RESIDUAL TESTS # ============================================================================= test_that("residual: popforadaforca = pop14mais - popocup - popdesocup", { dt <- data.table( anomesexato = 202301:202310, m_pop14mais = c(170, 172, 174, 176, 178, 180, 182, 184, 186, 188), m_popocup = c(100, 102, 104, 106, 108, 110, 112, 114, 116, 118), m_popdesocup = c(12, 13, 11, 14, 10, 9, 11, 12, 13, 10) ) result <- PNADCperiods:::.compute_derived_series(copy(dt)) expect_true("m_popforadaforca" %in% names(result)) expected <- dt$m_pop14mais - dt$m_popocup - dt$m_popdesocup expect_equal(result$m_popforadaforca, expected) }) # ============================================================================= # PHASE 4: RATE TESTS # ============================================================================= test_that("rate: taxadesocup = popdesocup / popnaforca * 100", { dt <- data.table( anomesexato = 202301:202310, m_popocup = c(100, 105, 110, 108, 112, 115, 118, 120, 122, 125), m_popdesocup = c(10, 12, 11, 13, 9, 8, 10, 11, 12, 10) ) # Phase 1 computes m_popnaforca = popocup + popdesocup # Phase 4 computes taxadesocup = popdesocup / popnaforca * 100 result <- PNADCperiods:::.compute_derived_series(copy(dt)) expect_true("m_taxadesocup" %in% names(result)) popnaforca <- dt$m_popocup + dt$m_popdesocup expected <- round(dt$m_popdesocup / popnaforca * 100, 1) expect_equal(result$m_taxadesocup, expected) }) test_that("rate with compound denominator: percdesalento uses (popnaforca + desalentado)", { dt <- data.table( anomesexato = 202301:202310, m_popocup = c(100, 105, 110, 108, 112, 115, 118, 120, 122, 125), m_popdesocup = c(10, 12, 11, 13, 9, 8, 10, 11, 12, 10), m_desalentado = c(5, 6, 4, 7, 3, 4, 5, 6, 7, 4), m_forcapotencial = c(8, 9, 7, 10, 6, 7, 8, 9, 10, 7) ) result <- PNADCperiods:::.compute_derived_series(copy(dt)) expect_true("m_percdesalento" %in% names(result)) # percdesalento spec: numerator = "desalentado", denominator = c("popnaforca", "desalentado") # popnaforca is computed in Phase 1 as popocup + popdesocup popnaforca <- dt$m_popocup + dt$m_popdesocup # Compound denominator: rowSums of m_popnaforca + m_desalentado denom <- popnaforca + dt$m_desalentado expected <- round(dt$m_desalentado / denom * 100, 1) expect_equal(result$m_percdesalento, expected) }) # ============================================================================= # PHASE 5: DEFLATED SERIES TESTS # ============================================================================= test_that("deflated series: verify IPCA deflation formula for hab and efet", { # 10 months with rising IPCA index ipca_values <- seq(5000, 5450, by = 50) dt <- data.table( anomesexato = 202301:202310, m_massahabnominaltodos = c(250e6, 260e6, 255e6, 265e6, 270e6, 275e6, 280e6, 285e6, 290e6, 295e6), m_massaefetnominaltodos = c(240e6, 250e6, 245e6, 255e6, 260e6, 265e6, 270e6, 275e6, 280e6, 285e6), m_comrendtodos = c(90e3, 91e3, 92e3, 93e3, 94e3, 95e3, 96e3, 97e3, 98e3, 99e3), ipca100dez1993 = ipca_values ) result <- PNADCperiods:::.compute_derived_series(copy(dt)) latest_ipca <- ipca_values[10] # max anomesexato # hab series use current IPCA (use_lagged_ipca = FALSE) # deflator_hab = latest_ipca / ipca100dez1993 expect_true("m_massahabtodosipcabr" %in% names(result)) deflator_hab <- latest_ipca / ipca_values expected_hab <- round(dt$m_massahabnominaltodos * deflator_hab, 0) expect_equal(result$m_massahabtodosipcabr, expected_hab) # efet series use lagged IPCA (use_lagged_ipca = TRUE) # deflator_efet = latest_ipca / shift(ipca100dez1993, 1, type="lag") expect_true("m_massaefettodosipcabr" %in% names(result)) ipca_lagged <- c(NA, ipca_values[1:9]) deflator_efet <- latest_ipca / ipca_lagged expected_efet <- round(dt$m_massaefetnominaltodos * deflator_efet, 0) # Row 1 is NA because lagged IPCA is NA expect_true(is.na(result$m_massaefettodosipcabr[1])) expect_equal(result$m_massaefettodosipcabr[2:10], expected_efet[2:10]) }) # ============================================================================= # EDGE CASE TESTS # ============================================================================= test_that("missing component column: derived series gracefully skipped", { # Only provide popocup, not popdesocup -- popnaforca cannot be computed dt <- data.table( anomesexato = 202301:202310, m_popocup = seq(100, 109) ) # Should not error result <- expect_silent(PNADCperiods:::.compute_derived_series(copy(dt))) # popnaforca should NOT be created (missing component) expect_false("m_popnaforca" %in% names(result)) # taxadesocup also should NOT be created (missing numerator and denominator) expect_false("m_taxadesocup" %in% names(result)) }) test_that("division by zero: denominator=0 does not crash", { dt <- data.table( anomesexato = 202301:202310, m_popocup = c(100, 105, 110, 108, 0, 115, 118, 120, 122, 125), m_popdesocup = c(10, 12, 11, 13, 0, 8, 10, 11, 12, 10) ) # Row 5: popnaforca = 0, so taxadesocup = 0/0 # Should not error -- NaN or Inf is acceptable, just no crash result <- expect_silent(PNADCperiods:::.compute_derived_series(copy(dt))) expect_true("m_taxadesocup" %in% names(result)) # The function completes without error expect_s3_class(result, "data.table") }) # ============================================================================= # .DERIVED_SERIES_SPEC STRUCTURAL VALIDATION # ============================================================================= test_that("spec: all aggregate specs have 'name' and non-empty 'components'", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC for (s in spec$aggregates) { expect_true(!is.null(s$name), info = paste("Aggregate missing 'name'")) expect_true(is.character(s$name) && nchar(s$name) > 0, info = paste("Aggregate has empty name")) expect_true(!is.null(s$components), info = paste("Aggregate", s$name, "missing 'components'")) expect_true(length(s$components) >= 2, info = paste("Aggregate", s$name, "has fewer than 2 components")) } }) test_that("spec: all rate specs have 'name', 'numerator', 'denominator'", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC for (s in spec$rates) { expect_true(!is.null(s$name), info = "Rate missing 'name'") expect_true(is.character(s$name) && nchar(s$name) > 0, info = "Rate has empty name") expect_true(!is.null(s$numerator), info = paste("Rate", s$name, "missing 'numerator'")) expect_true(!is.null(s$denominator), info = paste("Rate", s$name, "missing 'denominator'")) expect_true(length(unlist(s$numerator)) >= 1, info = paste("Rate", s$name, "has empty numerator")) expect_true(length(unlist(s$denominator)) >= 1, info = paste("Rate", s$name, "has empty denominator")) } }) test_that("spec: all residual specs have 'name', 'parent', 'subtract'", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC for (s in spec$residuals) { expect_true(!is.null(s$name), info = "Residual missing 'name'") expect_true(is.character(s$name) && nchar(s$name) > 0, info = "Residual has empty name") expect_true(!is.null(s$parent), info = paste("Residual", s$name, "missing 'parent'")) expect_true(is.character(s$parent) && nchar(s$parent) > 0, info = paste("Residual", s$name, "has empty parent")) expect_true(!is.null(s$subtract), info = paste("Residual", s$name, "missing 'subtract'")) expect_true(length(s$subtract) >= 1, info = paste("Residual", s$name, "has empty subtract list")) } }) test_that("spec: no duplicate names across all spec sections", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC all_names <- c( sapply(spec$aggregates, `[[`, "name"), sapply(spec$average_income, `[[`, "name"), sapply(spec$residuals, `[[`, "name"), sapply(spec$rates, `[[`, "name"), sapply(spec$deflated, `[[`, "name") ) duplicated_names <- all_names[duplicated(all_names)] expect_equal(length(duplicated_names), 0, info = paste("Duplicate derived series names:", paste(duplicated_names, collapse = ", "))) }) test_that("spec: aggregate dependencies in correct order (empregpriv before empregado)", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC agg_names <- sapply(spec$aggregates, `[[`, "name") # empregpriv must appear before empregado (empregado depends on empregpriv) idx_empregpriv <- which(agg_names == "empregpriv") idx_empregado <- which(agg_names == "empregado") expect_true(length(idx_empregpriv) == 1, info = "empregpriv not found in aggregates") expect_true(length(idx_empregado) == 1, info = "empregado not found in aggregates") expect_true(idx_empregpriv < idx_empregado, info = "empregpriv must be computed before empregado") # popnaforca must appear before forcaampliada (forcaampliada depends on popnaforca) idx_popnaforca <- which(agg_names == "popnaforca") idx_forcaampliada <- which(agg_names == "forcaampliada") expect_true(length(idx_popnaforca) == 1, info = "popnaforca not found in aggregates") expect_true(length(idx_forcaampliada) == 1, info = "forcaampliada not found in aggregates") expect_true(idx_popnaforca < idx_forcaampliada, info = "popnaforca must be computed before forcaampliada") # domestico must appear before empregado idx_domestico <- which(agg_names == "domestico") expect_true(idx_domestico < idx_empregado, info = "domestico must be computed before empregado") # empregpubl must appear before empregado idx_empregpubl <- which(agg_names == "empregpubl") expect_true(idx_empregpubl < idx_empregado, info = "empregpubl must be computed before empregado") }) test_that("spec: all deflated specs have 'source' and 'use_lagged_ipca'", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC for (s in spec$deflated) { expect_true(!is.null(s$name), info = "Deflated missing 'name'") expect_true(!is.null(s$source), info = paste("Deflated", s$name, "missing 'source'")) expect_true(is.character(s$source) && nchar(s$source) > 0, info = paste("Deflated", s$name, "has empty source")) expect_true(!is.null(s$use_lagged_ipca), info = paste("Deflated", s$name, "missing 'use_lagged_ipca'")) expect_true(is.logical(s$use_lagged_ipca), info = paste("Deflated", s$name, "'use_lagged_ipca' is not logical")) } }) test_that("spec: phase ordering ensures aggregates computed before rates that use them", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC # Collect all names produced by aggregates (Phase 1) agg_names <- sapply(spec$aggregates, `[[`, "name") # Rates that reference aggregate outputs in their numerator or denominator # e.g., taxadesocup uses popnaforca (aggregate) as denominator for (s in spec$rates) { # Check denominator references denom_components <- unlist(s$denominator) for (d in denom_components) { if (d %in% agg_names) { # This is fine -- the aggregate is computed in Phase 1, rate in Phase 4 expect_true(d %in% agg_names, info = paste("Rate", s$name, "references aggregate", d, "which should be computed before rates")) } } # Check numerator references num_components <- unlist(s$numerator) for (n in num_components) { if (n %in% agg_names) { expect_true(n %in% agg_names, info = paste("Rate", s$name, "references aggregate", n, "which should be computed before rates")) } } } # Specifically verify that taxadesocup denominator (popnaforca) is an aggregate taxadesocup <- Filter(function(s) s$name == "taxadesocup", spec$rates) expect_true(length(taxadesocup) == 1) denom <- unlist(taxadesocup[[1]]$denominator) expect_true("popnaforca" %in% agg_names, info = "popnaforca must be in aggregates since taxadesocup depends on it") expect_true(all(denom %in% c(agg_names, "popnaforca")), info = "taxadesocup denominator must reference known aggregates or primaries") }) test_that("spec: all average_income specs have 'multiplier' and 'decimals'", { spec <- PNADCperiods:::.DERIVED_SERIES_SPEC for (s in spec$average_income) { expect_true(!is.null(s$name), info = "Average income spec missing 'name'") expect_true(!is.null(s$numerator), info = paste("Average income", s$name, "missing 'numerator'")) expect_true(!is.null(s$denominator), info = paste("Average income", s$name, "missing 'denominator'")) expect_true(!is.null(s$multiplier), info = paste("Average income", s$name, "missing 'multiplier'")) expect_true(is.numeric(s$multiplier) && s$multiplier > 0, info = paste("Average income", s$name, "'multiplier' must be positive numeric")) expect_true(!is.null(s$decimals), info = paste("Average income", s$name, "missing 'decimals'")) expect_true(is.numeric(s$decimals) && s$decimals >= 0, info = paste("Average income", s$name, "'decimals' must be non-negative numeric")) } })