# Tests for nesting enforcement in period identification # # These tests verify the fundamental guarantee of the nested algorithm: # - Fortnights can ONLY be determined for observations with determined months # - Weeks can ONLY be determined for observations with determined fortnights # # This nesting is enforced BY CONSTRUCTION in the algorithm, not by post-hoc cleanup. # ============================================================================= # HELPER FUNCTIONS # ============================================================================= # Note: Using shared test data generators from helper-test-data.R # - create_realistic_pnadc() for test data generation # ============================================================================= # NESTING ENFORCEMENT TESTS # ============================================================================= test_that("fortnight determination requires month determination", { # This is the KEY test: no observation should have determined_fortnight = TRUE # while determined_month = FALSE test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) # Count violations n_violations <- sum(result$determined_fortnight & !result$determined_month, na.rm = TRUE) expect_equal( n_violations, 0L, info = paste( "Found", n_violations, "observations with determined fortnight but undetermined month.", "This violates the nesting requirement." ) ) }) test_that("week determination requires fortnight determination", { # No observation should have determined_week = TRUE while determined_fortnight = FALSE test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) # Count violations n_violations <- sum(result$determined_week & !result$determined_fortnight, na.rm = TRUE) expect_equal( n_violations, 0L, info = paste( "Found", n_violations, "observations with determined week but undetermined fortnight.", "This violates the nesting requirement." ) ) }) test_that("week determination implies month determination (transitive)", { # By transitivity: determined_week => determined_fortnight => determined_month test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) # Count violations n_violations <- sum(result$determined_week & !result$determined_month, na.rm = TRUE) expect_equal( n_violations, 0L, info = paste( "Found", n_violations, "observations with determined week but undetermined month.", "This violates the transitive nesting requirement." ) ) }) # ============================================================================= # NESTING CONSISTENCY TESTS # ============================================================================= test_that("fortnight value is consistent with month value when both determined", { # When fortnight is determined, it should fall within the determined month # Month 1 = fortnights 1-2, Month 2 = fortnights 3-4, Month 3 = fortnights 5-6 test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) both_determined <- result[determined_month & determined_fortnight] if (nrow(both_determined) > 0) { # Calculate expected fortnight range for determined month both_determined[, `:=`( expected_fortnight_min = (ref_month_in_quarter - 1L) * 2L + 1L, expected_fortnight_max = ref_month_in_quarter * 2L )] # Check that actual fortnight falls within range inconsistent <- both_determined[ ref_fortnight_in_quarter < expected_fortnight_min | ref_fortnight_in_quarter > expected_fortnight_max ] expect_equal( nrow(inconsistent), 0L, info = paste( "Found", nrow(inconsistent), "observations where fortnight is outside determined month range." ) ) } }) test_that("week value is consistent with fortnight value when both determined", { # When week is determined, it should fall within the determined fortnight test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) both_determined <- result[determined_fortnight & determined_week] if (nrow(both_determined) > 0) { # Week should correspond to dates within the fortnight # Fortnight 1 = weeks 1-2, Fortnight 2 = weeks 3-4 of each month # This is a weaker check - just verify week exists for fortnight-determined obs expect_true(all(!is.na(both_determined$ref_week_in_quarter))) } }) # ============================================================================= # NESTING BY CONSTRUCTION TESTS # ============================================================================= test_that("determination rates follow nesting hierarchy", { # By construction: month_rate >= fortnight_rate >= week_rate test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) # Compute determination rates from result n_total <- nrow(result) month_rate <- sum(result$determined_month) / n_total fortnight_rate <- sum(result$determined_fortnight) / n_total week_rate <- sum(result$determined_week) / n_total expect_true( month_rate >= fortnight_rate, info = paste( "Month rate", round(month_rate, 4), "should be >= fortnight rate", round(fortnight_rate, 4) ) ) expect_true( fortnight_rate >= week_rate, info = paste( "Fortnight rate", round(fortnight_rate, 4), "should be >= week rate", round(week_rate, 4) ) ) }) test_that("all determined fortnights have valid month reference", { # Every observation with determined fortnight should have valid month columns test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) fortnight_det <- result[determined_fortnight == TRUE] if (nrow(fortnight_det) > 0) { # All should have valid month values expect_true(all(!is.na(fortnight_det$ref_month_in_quarter))) expect_true(all(!is.na(fortnight_det$ref_month_in_year))) expect_true(all(!is.na(fortnight_det$ref_month_yyyymm))) } }) test_that("all determined weeks have valid fortnight reference", { # Every observation with determined week should have valid fortnight columns test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) result <- pnadc_identify_periods(test_data, verbose = FALSE) week_det <- result[determined_week == TRUE] if (nrow(week_det) > 0) { # All should have valid fortnight values expect_true(all(!is.na(week_det$ref_fortnight_in_quarter))) expect_true(all(!is.na(week_det$ref_fortnight_in_month))) expect_true(all(!is.na(week_det$ref_fortnight_yyyyff))) } }) # ============================================================================= # EDGE CASE TESTS # ============================================================================= test_that("single quarter data respects nesting", { # Even with single quarter (lower determination rate), nesting should hold test_data <- create_realistic_pnadc(n_quarters = 1, n_upas = 5) result <- pnadc_identify_periods(test_data, verbose = FALSE) # Fortnight requires month n_violations_fm <- sum(result$determined_fortnight & !result$determined_month, na.rm = TRUE) expect_equal(n_violations_fm, 0L) # Week requires fortnight n_violations_wf <- sum(result$determined_week & !result$determined_fortnight, na.rm = TRUE) expect_equal(n_violations_wf, 0L) }) test_that("nesting holds across all quarters when stacked", { # Test with more quarters to stress the algorithm test_data <- create_realistic_pnadc(n_quarters = 8, n_upas = 5) result <- pnadc_identify_periods(test_data, verbose = FALSE) # Check nesting per quarter for (q in unique(result$Trimestre)) { quarter_data <- result[Trimestre == q] n_violations_fm <- sum(quarter_data$determined_fortnight & !quarter_data$determined_month, na.rm = TRUE) n_violations_wf <- sum(quarter_data$determined_week & !quarter_data$determined_fortnight, na.rm = TRUE) expect_equal( n_violations_fm, 0L, info = paste("Quarter", q, "has fortnight without month violations") ) expect_equal( n_violations_wf, 0L, info = paste("Quarter", q, "has week without fortnight violations") ) } }) # ============================================================================= # EXPERIMENTAL STRATEGIES NESTING TESTS # ============================================================================= test_that("experimental fortnight requires month (strict or experimental)", { # When fortnight is assigned experimentally, month must already exist test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE) # Use upa_aggregation strategy which doesn't require original data result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE) # Check that fortnight requires month (experimental strategies update main columns) has_fortnight <- !is.na(result$ref_fortnight_in_month) has_month <- !is.na(result$ref_month_in_quarter) n_violations <- sum(has_fortnight & !has_month, na.rm = TRUE) expect_equal( n_violations, 0L, info = paste( "Found", n_violations, "observations with fortnight but no month.", "This violates nesting." ) ) }) test_that("experimental week requires fortnight (strict or experimental)", { # When week is assigned experimentally, fortnight must already exist test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE) # Use upa_aggregation strategy which doesn't require original data result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE) # Check that week requires fortnight (experimental strategies update main columns) has_week <- !is.na(result$ref_week_in_month) has_fortnight <- !is.na(result$ref_fortnight_in_month) n_violations <- sum(has_week & !has_fortnight, na.rm = TRUE) expect_equal( n_violations, 0L, info = paste( "Found", n_violations, "observations with experimental week but no fortnight (strict or exp).", "This violates experimental nesting." ) ) }) test_that("experimental fortnight is consistent with month bounds", { # When fortnight is assigned, it should be within valid range (1 or 2 within month) test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE) # Use upa_aggregation strategy which doesn't require original data result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE) # Check fortnight values are in valid range has_fortnight <- result[!is.na(ref_fortnight_in_month)] if (nrow(has_fortnight) > 0) { # ref_fortnight_in_month should be 1 or 2 invalid <- has_fortnight[ref_fortnight_in_month < 1 | ref_fortnight_in_month > 2] expect_equal( nrow(invalid), 0L, info = paste( "Found", nrow(invalid), "observations where fortnight is outside valid range (1-2)." ) ) } }) test_that("experimental strategies extend but don't contradict strict determination", { # Experimental strategies may fill in previously NA values but should not # change values that were already determined by strict algorithm test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE) # Save original strict values (only where determined) original_month <- crosswalk$ref_month_in_quarter original_fortnight <- crosswalk$ref_fortnight_in_quarter original_week <- crosswalk$ref_week_in_quarter # Use upa_aggregation strategy which doesn't require original data result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE) # Where strict algorithm determined values, they should be preserved strict_month_mask <- !is.na(original_month) strict_fortnight_mask <- !is.na(original_fortnight) strict_week_mask <- !is.na(original_week) # Strict values should be unchanged where they existed expect_equal(result$ref_month_in_quarter[strict_month_mask], original_month[strict_month_mask]) expect_equal(result$ref_fortnight_in_quarter[strict_fortnight_mask], original_fortnight[strict_fortnight_mask]) expect_equal(result$ref_week_in_quarter[strict_week_mask], original_week[strict_week_mask]) }) test_that("experimental determination rates follow nesting hierarchy", { # After experimental strategies, determination rates should follow: month >= fortnight >= week test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10) crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE) # Use upa_aggregation strategy which doesn't require original data result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE) # Calculate determination rates from main columns (experimental strategies update these directly) month_rate <- mean(!is.na(result$ref_month_in_quarter)) fortnight_rate <- mean(!is.na(result$ref_fortnight_in_quarter)) week_rate <- mean(!is.na(result$ref_week_in_quarter)) expect_true( month_rate >= fortnight_rate, info = paste( "Month rate", round(month_rate, 4), "should be >= fortnight rate", round(fortnight_rate, 4) ) ) expect_true( fortnight_rate >= week_rate, info = paste( "Fortnight rate", round(fortnight_rate, 4), "should be >= week rate", round(week_rate, 4) ) ) })