# Tests for validation utility functions test_that("required_vars_ref_month returns expected columns", { vars <- required_vars_ref_month() expect_type(vars, "character") expect_true("Ano" %in% vars) expect_true("Trimestre" %in% vars) expect_true("UPA" %in% vars) expect_true("V2008" %in% vars) # Birth day expect_true("V20081" %in% vars) # Birth month expect_true("V20082" %in% vars) # Birth year expect_true("V2009" %in% vars) # Age }) test_that("join_key_vars returns expected columns", { vars <- join_key_vars() expect_type(vars, "character") expect_true("Ano" %in% vars) expect_true("Trimestre" %in% vars) expect_true("UPA" %in% vars) expect_true("V1008" %in% vars) expect_true("V1014" %in% vars) # Note: V2003 (person number) is NOT in join_key_vars because # crosswalk is at household level (V1008), not person level expect_false("V2003" %in% vars) }) test_that("required_vars_weights returns expected columns", { vars <- required_vars_weights() expect_type(vars, "character") expect_true("V1028" %in% vars) expect_true("UF" %in% vars) expect_true("posest" %in% vars) expect_true("posest_sxi" %in% vars) }) test_that("validate_pnadc detects missing columns", { # Missing required columns bad_data <- data.frame(Ano = 2023, Trimestre = 1) expect_error(validate_pnadc(bad_data), "missing_ref_month") }) test_that("validate_pnadc accepts valid minimal data", { valid_data <- data.frame( Ano = 2023, Trimestre = 1, UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = 33 ) # Should not error expect_silent(validate_pnadc(valid_data)) }) # Parameterized test for invalid field values test_cases <- list( list(field = "Ano", value = 2000, error_pattern = "invalid_years", desc = "year (< 2012)"), list(field = "Trimestre", value = 5, error_pattern = "invalid_quarters", desc = "quarter (> 4)"), list(field = "V2008", value = 35, error_pattern = "invalid_birth_days", desc = "birth day (> 31)"), list(field = "V20081", value = 15, error_pattern = "invalid_birth_months", desc = "birth month (> 12)") ) for (tc in test_cases) { test_that(paste("validate_pnadc detects invalid", tc$desc), { # Create valid base data bad_data <- data.frame( Ano = 2023, Trimestre = 1, UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = 33 ) # Override with invalid value bad_data[[tc$field]] <- tc$value expect_error(validate_pnadc(bad_data), tc$error_pattern) }) } test_that("validate_pnadc returns report when stop_on_error = FALSE", { bad_data <- data.frame(Ano = 2023, Trimestre = 1) result <- validate_pnadc(bad_data, stop_on_error = FALSE) expect_type(result, "list") expect_false(result$valid) expect_true("missing_ref_month" %in% names(result$issues)) }) test_that("validate_pnadc reports valid data correctly", { valid_data <- data.frame( Ano = 2023, Trimestre = 1, UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = 33 ) result <- validate_pnadc(valid_data, stop_on_error = FALSE) expect_type(result, "list") expect_true(result$valid) expect_equal(result$n_rows, 1) }) test_that("validate_pnadc checks weight columns when requested", { # Valid ref month data but missing weight columns data_no_weights <- data.frame( Ano = 2023, Trimestre = 1, UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = 33 ) # Should pass without check_weights expect_silent(validate_pnadc(data_no_weights, check_weights = FALSE)) # Should fail with check_weights expect_error(validate_pnadc(data_no_weights, check_weights = TRUE), "missing_weights") }) test_that("validate_monthly_totals accepts valid data", { valid_totals <- data.frame( ref_month_yyyymm = c(202301, 202302, 202303), m_populacao = c(200000, 200100, 200200) ) # Should not error expect_silent(validate_monthly_totals(valid_totals)) }) test_that("validate_monthly_totals accepts anomesexato column", { valid_totals <- data.frame( anomesexato = c(202301, 202302, 202303), m_populacao = c(200000, 200100, 200200) ) # Should not error expect_silent(validate_monthly_totals(valid_totals)) }) test_that("validate_monthly_totals detects missing date column", { bad_totals <- data.frame( m_populacao = c(200000, 200100, 200200) ) expect_error(validate_monthly_totals(bad_totals), "missing_date") }) test_that("validate_monthly_totals detects missing population column", { bad_totals <- data.frame( ref_month_yyyymm = c(202301, 202302, 202303) ) expect_error(validate_monthly_totals(bad_totals), "missing_population") }) test_that("ensure_data_table converts data.frame", { df <- data.frame(x = 1:3, y = letters[1:3]) result <- ensure_data_table(df) expect_s3_class(result, "data.table") expect_equal(nrow(result), 3) }) test_that("ensure_data_table returns data.table unchanged when copy = FALSE", { dt <- data.table::data.table(x = 1:3) result <- ensure_data_table(dt, copy = FALSE) # Should be the same object (not a copy) expect_identical(data.table::address(result), data.table::address(dt)) }) test_that("ensure_data_table returns copy when copy = TRUE", { dt <- data.table::data.table(x = 1:3) result <- ensure_data_table(dt, copy = TRUE) # Should be a different object expect_false(identical(data.table::address(result), data.table::address(dt))) expect_equal(result$x, dt$x) }) # ============================================================================= # VALIDATE_PERIOD_INVARIANTS TESTS # ============================================================================= test_that("validate_period_invariants accepts valid crosswalk", { # 1. Setup: Create valid crosswalk with proper nesting # Weeks 1-2 → Fortnight 1, Weeks 3-4 → Fortnight 2, etc. # Fortnights 1-2 → Month 1, Fortnights 3-4 → Month 2, Fortnights 5-6 → Month 3 crosswalk <- data.table::data.table( Ano = 2023L, Trimestre = 1L, UPA = 1:10, V1008 = 1L, V2003 = 1L, ref_month_in_quarter = c(1L, 1L, 2L, 2L, 3L, 3L, NA, NA, NA, NA), ref_fortnight_in_quarter = c(1L, 1L, 3L, 3L, 5L, 5L, NA, NA, NA, NA), # Match months ref_week_in_quarter = c(1L, 2L, 5L, 6L, 9L, 10L, NA, NA, NA, NA), # Match fortnights determined_month = c(rep(TRUE, 6), rep(FALSE, 4)), determined_fortnight = c(rep(TRUE, 6), rep(FALSE, 4)), determined_week = c(rep(TRUE, 6), rep(FALSE, 4)) ) # 2. Verify: Should not error expect_no_error( PNADCperiods:::validate_period_invariants(crosswalk, context = "test") ) }) test_that("validate_period_invariants detects invalid month ranges", { # 1. Setup: Create crosswalk with invalid month (4, should be 1-3) crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L, 2L, 3L, 4L), # 4 is invalid! ref_fortnight_in_quarter = c(1L, 3L, 5L, NA), ref_week_in_quarter = c(1L, 3L, 5L, NA), determined_month = c(TRUE, TRUE, TRUE, TRUE), determined_fortnight = c(TRUE, TRUE, TRUE, FALSE), determined_week = c(TRUE, TRUE, TRUE, FALSE) ) # 2. Verify: Should error on invalid month value expect_error( PNADCperiods:::validate_period_invariants(crosswalk), "ref_month_in_quarter must be 1-3", label = "Invalid month value (4) should be detected" ) }) test_that("validate_period_invariants detects invalid fortnight ranges", { # 1. Setup: Create crosswalk with invalid fortnight (7, should be 1-6) crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L, 2L, 3L), ref_fortnight_in_quarter = c(1L, 3L, 7L), # 7 is invalid! ref_week_in_quarter = c(1L, 3L, 5L), determined_month = c(TRUE, TRUE, TRUE), determined_fortnight = c(TRUE, TRUE, TRUE), determined_week = c(TRUE, TRUE, TRUE) ) # 2. Verify: Should error on invalid fortnight value expect_error( PNADCperiods:::validate_period_invariants(crosswalk), "ref_fortnight_in_quarter must be 1-6", label = "Invalid fortnight value (7) should be detected" ) }) test_that("validate_period_invariants detects invalid week ranges", { # 1. Setup: Create crosswalk with invalid week (13, should be 1-12) crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L, 2L, 3L), ref_fortnight_in_quarter = c(1L, 3L, 5L), ref_week_in_quarter = c(1L, 5L, 13L), # 13 is invalid! determined_month = c(TRUE, TRUE, TRUE), determined_fortnight = c(TRUE, TRUE, TRUE), determined_week = c(TRUE, TRUE, TRUE) ) # 2. Verify: Should error on invalid week value expect_error( PNADCperiods:::validate_period_invariants(crosswalk), "ref_week_in_quarter must be 1-12", label = "Invalid week value (13) should be detected" ) }) test_that("validate_period_invariants detects week without fortnight (nesting violation)", { # 1. Setup: Week determined but fortnight is NA (violates nesting) crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L, 2L), ref_fortnight_in_quarter = c(1L, NA), # Fortnight NA ref_week_in_quarter = c(1L, 3L), # But week determined! determined_month = c(TRUE, TRUE), determined_fortnight = c(TRUE, FALSE), # Fortnight not determined determined_week = c(TRUE, TRUE) # But week is determined - VIOLATION ) # 2. Verify: Should error on nesting violation expect_error( PNADCperiods:::validate_period_invariants(crosswalk), "Nesting violation.*week determined but fortnight is NA", label = "Week without fortnight violates nesting" ) }) test_that("validate_period_invariants detects fortnight without month (nesting violation)", { # 1. Setup: Fortnight determined but month is NA crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L, NA), # Month NA ref_fortnight_in_quarter = c(1L, 3L), # But fortnight determined! ref_week_in_quarter = c(1L, NA), determined_month = c(TRUE, FALSE), # Month not determined determined_fortnight = c(TRUE, TRUE), # But fortnight is - VIOLATION determined_week = c(TRUE, FALSE) ) # 2. Verify: Should error on nesting violation expect_error( PNADCperiods:::validate_period_invariants(crosswalk), "Nesting violation.*fortnight determined but month is NA", label = "Fortnight without month violates nesting" ) }) test_that("validate_period_invariants detects week-fortnight mismatch", { # 1. Setup: Week and fortnight inconsistent # Week 1-2 should be in fortnight 1, but marked as fortnight 3 crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L), ref_fortnight_in_quarter = c(3L), # Fortnight 3 ref_week_in_quarter = c(1L), # Week 1 - should be in fortnight 1! determined_month = c(TRUE), determined_fortnight = c(TRUE), determined_week = c(TRUE) ) # 2. Verify: Should error on inconsistency expect_error( PNADCperiods:::validate_period_invariants(crosswalk), "Week-fortnight inconsistency", label = "Week-fortnight mismatch should be detected" ) }) test_that("validate_period_invariants detects fortnight-month mismatch", { # 1. Setup: Fortnight and month inconsistent # Fortnights 1-2 should be in month 1, but marked as month 3 crosswalk <- data.table::data.table( ref_month_in_quarter = c(3L), # Month 3 ref_fortnight_in_quarter = c(1L), # Fortnight 1 - should be in month 1! ref_week_in_quarter = c(1L), determined_month = c(TRUE), determined_fortnight = c(TRUE), determined_week = c(TRUE) ) # 2. Verify: Should error on inconsistency expect_error( PNADCperiods:::validate_period_invariants(crosswalk), "Fortnight-month inconsistency", label = "Fortnight-month mismatch should be detected" ) }) test_that("validate_period_invariants strict=FALSE allows warnings", { # 1. Setup: Create crosswalk with minor issue crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L, 2L, 4L), # 4 is invalid ref_fortnight_in_quarter = c(1L, 3L, NA), ref_week_in_quarter = c(1L, 3L, NA), determined_month = c(TRUE, TRUE, TRUE), determined_fortnight = c(TRUE, TRUE, FALSE), determined_week = c(TRUE, TRUE, FALSE) ) # 2. Verify: strict=FALSE should warn, not error expect_warning( PNADCperiods:::validate_period_invariants(crosswalk, strict = FALSE), "ref_month_in_quarter must be 1-3", label = "strict=FALSE should warn instead of error" ) }) test_that("validate_period_invariants provides context in error messages", { # 1. Setup: Invalid crosswalk crosswalk <- data.table::data.table( ref_month_in_quarter = c(4L), # Invalid ref_fortnight_in_quarter = c(NA), ref_week_in_quarter = c(NA), determined_month = c(TRUE), determined_fortnight = c(FALSE), determined_week = c(FALSE) ) # 2. Verify: Context appears in error message expect_error( PNADCperiods:::validate_period_invariants(crosswalk, context = "my_test"), "my_test.*ref_month_in_quarter", label = "Context should appear in error message" ) }) # ============================================================================= # VALIDATE_PNADC ADDITIONAL TESTS # ============================================================================= test_that("validate_pnadc with stop_on_error=FALSE returns issues for invalid years", { # 1. Setup: Data with invalid year bad_data <- data.frame( Ano = 2000, # Before 2012 (PNADC started 2012) Trimestre = 1, UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = 33 ) # 2. Execute: validate with stop_on_error=FALSE result <- validate_pnadc(bad_data, stop_on_error = FALSE) # 3. Verify: Should return list with issues expect_type(result, "list") expect_false(result$valid) expect_true("invalid_years" %in% names(result$issues)) }) test_that("validate_pnadc with stop_on_error=FALSE returns issues for invalid quarters", { # 1. Setup: Data with invalid quarter bad_data <- data.frame( Ano = 2023, Trimestre = 5, # Invalid (should be 1-4) UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = 33 ) # 2. Execute: validate with stop_on_error=FALSE result <- validate_pnadc(bad_data, stop_on_error = FALSE) # 3. Verify: Should return list with issues expect_false(result$valid) expect_true("invalid_quarters" %in% names(result$issues)) }) test_that("validate_pnadc detects unusual ages as warnings only", { # 1. Setup: Data with unusual age data_unusual_age <- data.frame( Ano = 2023, Trimestre = 1, UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = 150 # Unusual age (>130) ) # 2. Execute: Should produce warning but still validate expect_warning( result <- validate_pnadc(data_unusual_age, stop_on_error = TRUE), "Unusual ages|warning_ages" ) # 3. With stop_on_error=FALSE, should still return valid=TRUE (only warning) result <- suppressWarnings(validate_pnadc(data_unusual_age, stop_on_error = FALSE)) expect_true(result$valid, label = "Unusual ages are warnings only, not validation failures") }) test_that("validate_pnadc detects negative ages", { # 1. Setup: Data with negative age data_negative_age <- data.frame( Ano = 2023, Trimestre = 1, UPA = 1, V1008 = 1, V1014 = 1, V2008 = 15, V20081 = 6, V20082 = 1990, V2009 = -5 # Negative age ) # 2. Execute: Should produce warning expect_warning( validate_pnadc(data_negative_age, stop_on_error = TRUE), "Unusual ages|warning_ages" ) }) # ============================================================================= # SUBSET_AND_COPY TESTS # ============================================================================= test_that("subset_and_copy errors on missing required columns", { dt <- data.table::data.table(a = 1:3, b = 4:6) expect_error( PNADCperiods:::subset_and_copy(dt, required_cols = c("a", "c")), "Missing required columns.*c" ) }) test_that("subset_and_copy selects required columns from data.table", { dt <- data.table::data.table(a = 1:3, b = 4:6, c = 7:9) result <- PNADCperiods:::subset_and_copy(dt, required_cols = c("a", "b")) expect_s3_class(result, "data.table") expect_equal(names(result), c("a", "b")) expect_equal(nrow(result), 3) # Should be a copy (not same address) expect_false(identical(data.table::address(result), data.table::address(dt))) }) test_that("subset_and_copy converts data.frame to data.table", { df <- data.frame(a = 1:3, b = 4:6, c = 7:9) result <- PNADCperiods:::subset_and_copy(df, required_cols = c("a", "b")) expect_s3_class(result, "data.table") expect_equal(names(result), c("a", "b")) expect_equal(nrow(result), 3) }) test_that("subset_and_copy includes available optional columns", { dt <- data.table::data.table(a = 1:3, b = 4:6, c = 7:9) # Optional cols: c is available, d is not result <- PNADCperiods:::subset_and_copy( dt, required_cols = c("a"), optional_cols = c("c", "d") ) expect_equal(sort(names(result)), c("a", "c")) expect_false("d" %in% names(result)) }) test_that("subset_and_copy with NULL optional_cols returns only required", { dt <- data.table::data.table(a = 1:3, b = 4:6) result <- PNADCperiods:::subset_and_copy(dt, required_cols = c("a")) expect_equal(names(result), "a") }) # ============================================================================= # VALIDATE_MONTHLY_TOTALS ADDITIONAL TESTS # ============================================================================= test_that("validate_monthly_totals stop_on_error=FALSE returns FALSE for invalid data", { bad_totals <- data.frame(m_populacao = c(200000, 200100)) # Function returns invisible(logical): FALSE when issues found result <- validate_monthly_totals(bad_totals, stop_on_error = FALSE) expect_type(result, "logical") expect_false(result) }) test_that("validate_monthly_totals stop_on_error=FALSE returns TRUE for valid data", { valid_totals <- data.frame( ref_month_yyyymm = c(202301, 202302, 202303), m_populacao = c(200000, 200100, 200200) ) # Function returns invisible(logical): TRUE when valid result <- validate_monthly_totals(valid_totals, stop_on_error = FALSE) expect_type(result, "logical") expect_true(result) }) # ============================================================================= # VALIDATE_PERIOD_INVARIANTS ADDITIONAL TESTS # ============================================================================= test_that("validate_period_invariants handles crosswalk missing period columns", { # Crosswalk without any period columns crosswalk <- data.table::data.table( Ano = 2023L, Trimestre = 1L, UPA = 1:5 ) # Should not error when period columns are absent (gracefully skips checks) expect_no_error( PNADCperiods:::validate_period_invariants(crosswalk, context = "test") ) }) test_that("validate_period_invariants strict=FALSE returns structured report", { # Create invalid crosswalk crosswalk <- data.table::data.table( ref_month_in_quarter = c(1L, 4L), # 4 is invalid ref_fortnight_in_quarter = c(1L, NA), ref_week_in_quarter = c(1L, NA), determined_month = c(TRUE, TRUE), determined_fortnight = c(TRUE, FALSE), determined_week = c(TRUE, FALSE) ) result <- suppressWarnings( PNADCperiods:::validate_period_invariants(crosswalk, strict = FALSE) ) expect_type(result, "list") expect_false(result$valid) expect_true(length(result$violations) > 0) }) test_that("validate_pnadc handles empty data gracefully", { # 1. Setup: Empty data.frame with correct columns empty_data <- data.frame( Ano = integer(0), Trimestre = integer(0), UPA = integer(0), V1008 = integer(0), V1014 = integer(0), V2008 = integer(0), V20081 = integer(0), V20082 = integer(0), V2009 = integer(0) ) # 2. Verify: Should error on min.rows constraint # checkmate produces error about "at least 1 rows" expect_error( validate_pnadc(empty_data), "at least 1 rows|min.rows", label = "Empty data should fail min.rows check" ) })