# tab() computes cell values/bases from base_array + row/col arrays, then stores cells in a cell_store. # Layout functions then allocate those stored cells to the visible grid (layout$grid). # The tests in this file check core invariants across these stages, plus focused tests of internal helpers # that build bases, synchronize base matrices, and construct per-cell specifications. test_that("tab() computation produces cells consistent with statistic processor and base calculator", { # Checks that a cell pulled from layout$grid is consistent with recomputing that cell directly from # res$arrays using the statistic's processor and base_calculator. dat <- get_big_test_dat(n = 80, random_seed = 123) survey <- create_survey_data(dat) get_cell <- getFromNamespace("get_cell", "surveydatar") validate_cell_store <- getFromNamespace("validate_cell_store", "surveydatar") res <- tab( survey, rows = labelledordinal, cols = labelledcategorical, statistic = "column_pct" ) expect_true(validate_cell_store(res$cell_store)) i <- 1 j <- 1 cell_id <- res$layout$grid[i, j] cell <- get_cell(res$cell_store, cell_id) expect_type(cell$value, "double") expect_type(cell$base, "double") expected_value <- res$statistic$processor( base_array = res$arrays$base_array, row_m = res$arrays$row_arrays[[i]], row_u = res$arrays$row_u_arrays[[i]], col_m = res$arrays$col_arrays[[j]], col_u = res$arrays$col_u_arrays[[j]], values = res$arrays$values_array ) expected_base <- res$statistic$base_calculator( base_array = res$arrays$base_array, row_m = res$arrays$row_arrays[[i]], row_u = res$arrays$row_u_arrays[[i]], col_m = res$arrays$col_arrays[[j]], col_u = res$arrays$col_u_arrays[[j]], values_array = res$arrays$values_array ) expect_equal(cell$value, expected_value) expect_equal(cell$base, expected_base) }) test_that("as.data.frame base row can be NA when bases differ by cell, but view_base_matrix retains full bases", { dat <- get_routing_test_dat(n = 120, random_seed = 123) survey <- create_survey_data(dat) # Checks how the per-cell bases computed during tab() are represented when materializing output. # For values-based statistics, bases can differ by cell (valid values), so as.data.frame() may show NA # in the Base row, while view_base_matrix() still exposes the underlying per-cell base matrix. res <- tab( survey, rows = screen1, cols = total(), statistic = "mean", values = "screen2", show_base = TRUE ) df <- as.data.frame(res) base_label <- res$statistic$base_label expect_true(base_label %in% df$row_label) base_row <- df[df$row_label == base_label, , drop = FALSE] # Confirm the Base row contains NA (cannot be displayed consistently in a single row). expect_true(any(is.na(base_row[1, -1]))) bases <- view_base_matrix(res) expect_true(nrow(bases) > 0) expect_true(ncol(bases) > 1) # Confirm the underlying per-cell base matrix contains real numeric bases. expect_true(any(!is.na(as.matrix(bases[, -1, drop = FALSE])))) }) test_that("calculate_base_array applies filter and weights with expected NA/error behavior", { # Checks the internal base_array construction used by tab(): weights and whole-table filters are # converted into a numeric base_array that defines the effective population for all computations. calculate_base_array <- getFromNamespace("calculate_base_array", "surveydatar") dat <- data.frame( x = 1:5, w = c(1, 2, NA, 4, 5) ) # Internal building block: filter/weight -> numeric base_array used by statistics. # Filter only: returns 0/1 indicator with NA treated as FALSE. thr <- 3 base_f <- calculate_base_array(dat, filter_expr = quote(x > thr), weight_var = NULL, parent_env = environment()) expect_equal(base_f, c(0, 0, 0, 1, 1)) # Weight only: returns weights, with NA weights treated as 0. base_w <- calculate_base_array(dat, filter_expr = NULL, weight_var = "w", parent_env = environment()) expect_equal(base_w, c(1, 2, 0, 4, 5)) # Filter + weight: elementwise product of filter indicator and weights. base_fw <- calculate_base_array(dat, filter_expr = quote(x > thr), weight_var = "w", parent_env = environment()) expect_equal(base_fw, c(0, 0, 0, 4, 5)) # Error contract: filter must yield a logical vector of length nrow(data). expect_error( calculate_base_array(dat, filter_expr = quote(c(TRUE, FALSE)), weight_var = NULL, parent_env = environment()), "logical vector of length nrow\\(data\\)" ) # Error contract: weight must exist, be numeric, and be non-negative. expect_error( calculate_base_array(dat, filter_expr = NULL, weight_var = "missing", parent_env = environment()), "not found" ) expect_error( calculate_base_array(transform(dat, w = as.character(w)), filter_expr = NULL, weight_var = "w", parent_env = environment()), "must be numeric" ) expect_error( calculate_base_array(transform(dat, w = c(1, -1, 1, 1, 1)), filter_expr = NULL, weight_var = "w", parent_env = environment()), "non-negative" ) }) test_that("sync_base_matrix computes base matrix matching statistic base_calculator", { # Checks that base matrices built from row/col arrays are consistent with applying the statistic's # base_calculator to each (row, col) pair. sync_base_matrix <- getFromNamespace("sync_base_matrix", "surveydatar") ensure_builtins_registered() stat <- getFromNamespace("get_statistic", "surveydatar")("column_pct") # Should equal applying stat$base_calculator at every (row, col) intersection. base_array <- c(1, 2, 3, 4, 5) row_arrays <- list( c(1, 0, 1, 0, 1), c(0, 1, 0, 1, 0) ) col_arrays <- list( c(1, 1, 0, 0, 0), c(0, 0, 1, 1, 0), c(0, 0, 0, 0, 1) ) bm <- sync_base_matrix( row_arrays = row_arrays, col_arrays = col_arrays, row_u_arrays = list(rep(1, length(base_array)), rep(1, length(base_array))), col_u_arrays = list(rep(1, length(base_array)), rep(1, length(base_array)), rep(1, length(base_array))), base_array = base_array, values_array = NULL, statistic = stat ) expect_equal(dim(bm), c(length(row_arrays), length(col_arrays))) expected <- matrix(NA_real_, nrow = 2, ncol = 3) for (i in seq_along(row_arrays)) { for (j in seq_along(col_arrays)) { expected[i, j] <- stat$base_calculator( base_array = base_array, row_m = row_arrays[[i]], row_u = rep(1, length(base_array)), col_m = col_arrays[[j]], col_u = rep(1, length(base_array)), values_array = NULL ) } } expect_equal(bm, expected) }) test_that(".is_orthogonal detects overlapping memberships by row/col", { # Checks the internal orthogonality helper used to validate that a membership matrix does not assign # a respondent to multiple categories within a row/column. is_orthogonal <- getFromNamespace(".is_orthogonal", "surveydatar") # Orthogonality check: # - by='row': each row has <=1 non-zero per respondent # - by='col': each col has <=1 non-zero per respondent mat_ok <- matrix( c(1, 0, 0, 1, 0, 0), nrow = 3, byrow = TRUE ) expect_true(is_orthogonal(mat_ok, by = "row")) expect_true(is_orthogonal(mat_ok, by = "col")) mat_bad_row <- matrix( c(1, 1, 0, 0), nrow = 2, byrow = TRUE ) expect_false(is_orthogonal(mat_bad_row, by = "row")) mat_bad_col <- matrix( c(1, 0, 1, 0), nrow = 2, byrow = TRUE ) expect_false(is_orthogonal(mat_bad_col, by = "col")) }) test_that("build_cell_specification returns expected structure and normalized DSL fields", { # Checks the internal cell specification builder used during tab(): it should produce a stable # per-cell specification structure containing expressions plus normalized DSL metadata. build_cell_specification <- getFromNamespace("build_cell_specification", "surveydatar") dat <- get_big_test_dat(n = 20, random_seed = 123) dpdict <- create_dict_with_metadata(dat) # build_cell_specification() defines the semantic identity stored on each cell. # Minimal specs: enough structure for normalize_dsl() to run. row_spec <- list(expr = quote(labelledordinal == 1), dsl = quote(labelledordinal == 1)) col_spec <- list(expr = quote(labelledcategorical == 1), dsl = quote(labelledcategorical == 1)) # Arrays with meta (as created by parsing/expansion); need meta$dsl + variables for normalization. row_array <- rep(0, nrow(dat)) col_array <- rep(0, nrow(dat)) attr(row_array, "meta") <- list(dsl = quote(labelledordinal == 1), variables = list("labelledordinal"), tags = NULL, label = "Row") attr(col_array, "meta") <- list(dsl = quote(labelledcategorical == 1), variables = list("labelledcategorical"), tags = NULL, label = "Col") base_filter_expr <- quote(TRUE) base_filter_spec <- NULL spec <- build_cell_specification( row_spec = row_spec, col_spec = col_spec, row_array = row_array, col_array = col_array, base_filter_spec = base_filter_spec, base_filter_expr = base_filter_expr, data = dat, dpdict = dpdict, statistic_id = "count", values_var = "randomnumeric" ) expect_true(is.list(spec)) expect_true(all(c("base_expr", "row_expr", "col_expr", "dsl", "meta", "statistic_id", "values_var") %in% names(spec))) expect_true(is.list(spec$dsl)) expect_true(all(c("row", "col", "base") %in% names(spec$dsl))) expect_true(isTRUE(is.symbol(spec$values_var))) expect_equal(as.character(spec$values_var), "randomnumeric") expect_equal(spec$statistic_id, "count") })