# # d53.rds is generated by code below with data_1000000 from # https://github.com/statisticsnorway/sdc-census-2021-hypercubes/blob/main/data/data_1000000.RData # and f1, f2, f3, f4 as below # set.seed(53) # d53 <- GaussSuppressionFromData(data = data_1000000[sample.int(1e+06, size = 10000), ], # formula = SSBtools::combine_formulas(list(table_1 = f1, # table_2 = f2, # table_3 = f3, # table_4 = f4)), # output = "inner") test_that("SuppressLinkedTables", { skip("since more advanced tests below") f1 <- ~sex * (age_l + age_m + age_h) * (lms_l + lms_h) f2 <- ~sex * (age_l + age_m + age_h) * (hst_l + hst_m + hst_h) f3 <- ~sex * (age_l + age_m + age_h) * (fst_l + fst_m + fst_h) f4 <- ~sex * (lms_l + lms_h) * (hst_l + hst_m + hst_h) d53 <- readRDS(testthat::test_path("testdata", "d53.rds")) linkedGauss <- "consistent" recordAware <- TRUE #asum = NULL sum1 <- list(local_FALSE = c(1282693L, 2221804L, 4755062L, 20531L), local_TRUE = c(1282693L, 2221804L, 4755062L, 20531L), consistent_FALSE = c(1282693L, 2219402L, 4755062L, 20531L), consistent_TRUE = c(1353576L, 2305412L, 5071277L, 20531L), `back-tracking_FALSE` = c(1282693L, 2219402L, 4755062L, 20531L), `back-tracking_TRUE` = c(1445094L, 2768849L, 5378354L, 20531L)) sum2 <- list(local_FALSE = 20753475L, local_TRUE = 22023863L, consistent_FALSE = 20747225L, consistent_TRUE = 22078558L, `back-tracking_FALSE` = 20747225L, `back-tracking_TRUE` = 24137217L, global_FALSE = 21619689L, global_TRUE = 21619689L) sum1[["local-bdiag_FALSE"]] <- sum1[["local_FALSE"]] sum1[["local-bdiag_TRUE"]] <- sum1[["local_TRUE"]] sum2[["local-bdiag_FALSE"]] <- sum2[["local_FALSE"]] sum2[["local-bdiag_TRUE"]] <- sum2[["local_TRUE"]] for(linkedGauss in c("local", "consistent", "back-tracking", "local-bdiag")) for(recordAware in c(FALSE, TRUE)) { cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n") a <- SuppressLinkedTables(data = d53, freqVar = "freq", fun = SuppressSmallCounts, withinArg = list(list(formula = f1), list(formula = f2), list(formula = f3), list(formula = f4)), recordAware = recordAware, preAggregate = TRUE, maxN = 3, protectZeros = FALSE, extend0 = FALSE, printXdim = TRUE, singletonMethod = "none", linkedGauss = linkedGauss) expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed))), sum1[[paste(linkedGauss, recordAware, sep = "_")]]) } for(linkedGauss in c("local", "consistent", "back-tracking", "global", "local-bdiag")) for(recordAware in c(FALSE, TRUE)) { cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n") a <- tables_by_formulas(data = d53, freqVar = "freq", table_fun = SuppressSmallCounts, table_formulas = list(table_1 = f1, table_2 = f2, table_3 = f3, table_4 = f4), recordAware = recordAware, maxN = 3, protectZeros = FALSE, extend0 = FALSE, printXdim = TRUE, singletonMethod = "none", linkedGauss = linkedGauss) expect_identical(sum(seq_len(nrow(a)) * as.integer(a$suppressed)), sum2[[paste(linkedGauss, recordAware, sep = "_")]]) } }) test_that("SuppressLinkedTables with forced", { f1 <- ~sex * (age_l + age_m + age_h) * (lms_l + lms_h) f2 <- ~sex * (age_l + age_m + age_h) * (hst_l + hst_m + hst_h) f3 <- ~sex * (age_l + age_m + age_h) * (fst_l + fst_m + fst_h) f4 <- ~sex * (lms_l + lms_h) * (hst_l + hst_m + hst_h) d53 <- readRDS(testthat::test_path("testdata", "d53.rds")) printXdim <- FALSE printInc <- FALSE # In order for the candidates order to be the same # But what should be the same will not be exactly the same anyway. # This is because the common candidates order is not # input to SuppressLinkedTables() set.seed(123) d53$w = d53$freq + runif(nrow(d53))/nrow(d53) # forced increases complexity and results in unsafe in output # May give GaussSuppression-warning: # "some cell grouping ignored due to forced cells" # Thus, it can still be inconsistent suppression and warning: # "Inconsistent suppression across common cells within the algorithm" forced <- function(freq, crossTable, ...) freq>500 & crossTable["sex"] == "Total" sum1 <- list(local_FALSE = c(1268238, 2177150, 4681831, 19221), local_TRUE = c(1268238, 2177150, 4681831, 19221), consistent_FALSE = c(1265297, 2174748, 4681439, 22860), consistent_TRUE = c(1342807, 2222294, 4977701, 22860), `back-tracking_FALSE` = c(1273760, 2174748, 4681439, 22477), `back-tracking_TRUE` = c(1427836, 2710850, 5364422, 22477)) sum2 <- list(local_FALSE = c(1269555, 2177278, 4681933, 19665), local_TRUE = c(1365557, 2363893, 4950102, 19677), consistent_FALSE = c(1265297, 2174748, 4681413, 22864), consistent_TRUE = c(1343774, 2222294, 5000442, 22876), `back-tracking_FALSE` = c(1273760, 2174748, 4681413, 22481), `back-tracking_TRUE` = c(1427836, 2712055, 5364396, 22493)) sum1[["local-bdiag_FALSE"]] <- sum1[["local_FALSE"]] sum1[["local-bdiag_TRUE"]] <- sum1[["local_TRUE"]] sum2[["local-bdiag_FALSE"]] <- sum2[["local_FALSE"]] sum2[["local-bdiag_TRUE"]] <- sum2[["local_TRUE"]] # Copy of PxWebApiData:::WithWarningsAsMessages WithWarningsAsMessages <- function(expr, classes = "warning") { withCallingHandlers( expr, warning = function(w) { if (inherits(w, classes)) { message("Warning converted to message: ", w$message) tryInvokeRestart("muffleWarning") } } ) } # Choose not to use this now # WithWarningsAsMessages <- suppressWarnings # Choose not to suppress messages now WithWarningsAsMessages_ <- WithWarningsAsMessages WithWarningsAsMessages <- function(...) suppressMessages(WithWarningsAsMessages_(...)) As4list <- function(a){ list(a[a$table_1, ], a[a$table_2, ], a[a$table_3, ], a[a$table_4, ]) } for(linkedGauss in c("local", "consistent", "back-tracking", "local-bdiag")) for(recordAware in TRUE) { #for(recordAware in c(FALSE, TRUE)) { if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n") a <- WithWarningsAsMessages(SuppressLinkedTables(data = d53, freqVar = "freq", fun = SuppressSmallCounts, withinArg = list(list(formula = f1), list(formula = f2), list(formula = f3), list(formula = f4)), recordAware = recordAware, preAggregate = TRUE, maxN = 3, protectZeros = FALSE, extend0 = FALSE, printXdim = printXdim, printInc = printInc, singletonMethod = "none", forced = forced, linkedGauss = linkedGauss, numVar = "w", candidates = CandidatesNum)) expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed) + 2 * as.integer(x$unsafe))), sum1[[paste(linkedGauss, recordAware, sep = "_")]]) a <- As4list(WithWarningsAsMessages(tables_by_formulas(data = d53, freqVar = "freq", table_fun = SuppressSmallCounts, table_formulas = list(table_1 = f1, table_2 = f2, table_3 = f3, table_4 = f4), recordAware = recordAware, preAggregate = TRUE, maxN = 3, protectZeros = FALSE, extend0 = FALSE, printXdim = printXdim, printInc = printInc, singletonMethod = "none", forced = forced, linkedGauss = linkedGauss, numVar = "w", candidates = CandidatesNum))) expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed) + 2 * as.integer(x$unsafe))), sum2[[paste(linkedGauss, recordAware, sep = "_")]]) } }) test_that("SuppressLinkedTables with freq-singleton", { f1 <- ~sex * (age_l + age_m + age_h) * (lms_l + lms_h) f2 <- ~sex * (age_l + age_m + age_h) * (hst_l + hst_m + hst_h) f3 <- ~sex * (age_l + age_m + age_h) * (fst_l + fst_m + fst_h) f4 <- ~sex * (lms_l + lms_h) * (hst_l + hst_m + hst_h) d53 <- readRDS(testthat::test_path("testdata", "d53.rds")) printXdim <- FALSE printInc <- FALSE sum1 <- list(local_FALSE = c(1367202L, 2440464L, 5143007L, 20531L), local_TRUE = c(1367202L, 2440464L, 5143007L, 20531L), consistent_FALSE = c(1365362L, 2430673L, 5138722L, 21524L), consistent_TRUE = c(1431584L, 2431583L, 5229043L, 21524L), `back-tracking_FALSE` = c(1367329L, 2430673L, 5138722L, 21524L), `back-tracking_TRUE` = c(1525658L, 2962522L, 5706756L, 21524L), `local-bdiag_FALSE` = c(1367202L, 2440464L, 5143007L, 20531L), `local-bdiag_TRUE` = c(1367202L, 2440464L, 5143007L, 20531L)) sum2 <- list(local_FALSE = 23753179L, local_TRUE = 24928316L, consistent_FALSE = 23680330L, consistent_TRUE = 24242046L, `back-tracking_FALSE` = 23743652L, `back-tracking_TRUE` = 27124781L, global_FALSE = 26316146L, global_TRUE = 26316146L, `local-bdiag_FALSE` = 23753179L, `local-bdiag_TRUE` = 24928316L) sum1[["local-bdiag_FALSE"]] <- sum1[["local_FALSE"]] sum1[["local-bdiag_TRUE"]] <- sum1[["local_TRUE"]] sum2[["local-bdiag_FALSE"]] <- sum2[["local_FALSE"]] sum2[["local-bdiag_TRUE"]] <- sum2[["local_TRUE"]] for(linkedGauss in "consistent") #for(linkedGauss in c("local", "consistent", "back-tracking", "local-bdiag")) for(recordAware in TRUE) { #for(recordAware in c(FALSE, TRUE)) { if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n") a <- SuppressLinkedTables(data = d53, freqVar = "freq", fun = SuppressSmallCounts, withinArg = list(list(formula = f1), list(formula = f2), list(formula = f3), list(formula = f4)), recordAware = recordAware, preAggregate = TRUE, maxN = 3, protectZeros = FALSE, extend0 = FALSE, printXdim = printXdim, printInc = printInc, linkedGauss = linkedGauss) expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed))), sum1[[paste(linkedGauss, recordAware, sep = "_")]]) } for(linkedGauss in "consistent") # for(linkedGauss in c("local", "consistent", "back-tracking", "global", "local-bdiag")) for(recordAware in FALSE) { #for(recordAware in c(FALSE, TRUE)) { if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n") a <- tables_by_formulas(data = d53, freqVar = "freq", table_fun = SuppressSmallCounts, table_formulas = list(table_1 = f1, table_2 = f2, table_3 = f3, table_4 = f4), recordAware = recordAware, maxN = 3, protectZeros = FALSE, extend0 = FALSE, printXdim = printXdim, printInc = printInc, linkedGauss = linkedGauss) expect_identical(sum(seq_len(nrow(a)) * as.integer(a$suppressed)), sum2[[paste(linkedGauss, recordAware, sep = "_")]]) } }) test_that("SuppressLinkedTables with num-singleton", { f1 <- ~sex * (age_l + age_m) * (lms_l) f2 <- ~sex * (age_l + age_m) * (hst_l + hst_m) f3 <- ~sex * (age_l + age_m) * (fst_l + fst_m) f4 <- ~sex * (lms_l + lms_h) * (hst_l + hst_m) d53 <- readRDS(testthat::test_path("testdata", "d53.rds")) z <- SSBtools::MakeMicro(d53, "freq") set.seed(123) z$char <- sample(paste0("char", seq_len(nrow(z)/2)), nrow(z), replace = TRUE) z$value <- rnorm(nrow(z))^2 printXdim <- FALSE printInc <- FALSE sum1 <- list(local_FALSE = c(13105L, 72123L, 108122L, 7233L), local_TRUE = c(13105L, 72123L, 108122L, 7233L), consistent_FALSE = c(13105L, 72123L, 108122L, 7233L), consistent_TRUE = c(13857L, 75961L, 117968L, 7233L), `back-tracking_FALSE` = c(13105L, 72123L, 108122L, 7233L), `back-tracking_TRUE` = c(23649L, 87129L, 141280L, 7233L), `local-bdiag_FALSE` = c(13105L, 72123L, 108122L, 7233L), `local-bdiag_TRUE` = c(13105L, 72123L, 108122L, 7233L)) sum2 <- list(local_FALSE = 634206L, local_TRUE = 656029L, consistent_FALSE = 634206L, consistent_TRUE = 643913L, `back-tracking_FALSE` = 682083L, `back-tracking_TRUE` = 740275L, global_FALSE = 651516L, global_TRUE = 651516L, `local-bdiag_FALSE` = 634206L, `local-bdiag_TRUE` = 656029L) sum1[["local-bdiag_FALSE"]] <- sum1[["local_FALSE"]] sum1[["local-bdiag_TRUE"]] <- sum1[["local_TRUE"]] sum2[["local-bdiag_FALSE"]] <- sum2[["local_FALSE"]] sum2[["local-bdiag_TRUE"]] <- sum2[["local_TRUE"]] for(linkedGauss in "consistent") # for(linkedGauss in c("local", "consistent", "back-tracking", "local-bdiag")) for(recordAware in FALSE) { #for(recordAware in c(FALSE, TRUE)) { if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n") a <- SuppressLinkedTables(data = z, fun = SuppressDominantCells, dominanceVar = "value", contributorVar = "char", withinArg = list(list(formula = f1), list(formula = f2), list(formula = f3), list(formula = f4)), recordAware = recordAware, pPercent = 10, printXdim = printXdim, printInc = printInc, linkedGauss = linkedGauss) expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed))), sum1[[paste(linkedGauss, recordAware, sep = "_")]]) } for(linkedGauss in "consistent") # for(linkedGauss in c("local", "consistent", "back-tracking", "global", "local-bdiag")) for(recordAware in TRUE) { #for(recordAware in c(FALSE, TRUE)) { if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n") a <- tables_by_formulas(data = z, table_fun = SuppressDominantCells, dominanceVar = "value", contributorVar = "char", table_formulas = list(table_1 = f1, table_2 = f2, table_3 = f3, table_4 = f4), recordAware = recordAware, pPercent = 50, printXdim = printXdim, printInc = printInc, linkedGauss = linkedGauss) expect_identical(sum(seq_len(nrow(a)) * as.integer(a$suppressed)), sum2[[paste(linkedGauss, recordAware, sep = "_")]]) } })