# Tests for i_screener()
# No model fitting — all tests run fast; no skip_on_cran() needed.
# ── Shared helper ─────────────────────────────────────────────────────────────
# Five subjects covering each quality failure mode:
# "A" — passes all criteria on x and y
# "B" — x has only 2 valid obs (fails min_n when min_n >= 3)
# "C" — x is nearly constant (fails min_sd; also fails max_mode_pct)
# "D" — x has 4/5 identical values (fails max_mode_pct; passes min_sd)
# "E" — y is nearly constant (fails min_sd on y; x is fine)
#
# Known metric values (used in correctness tests):
# A x: n=5, SD≈1.581, mode_pct=0.20
# B x: n=2, SD≈1.414, mode_pct=0.50
# C x: n=5, SD≈0.447, mode_pct=0.80 (mean=2.2: four 2s, one 3)
# D x: n=5, SD≈0.894, mode_pct=0.80 (four 5s, one 3)
# E y: n=5, SD≈0.447, mode_pct=0.80 (four 3s, one 4)
make_screen_df <- function() {
data.frame(
id = rep(c("A", "B", "C", "D", "E"), each = 5),
x = c(
1, 2, 4, 3, 5, # A: varied
1, 3, NA, NA, NA, # B: only 2 valid
2, 2, 2, 2, 3, # C: nearly constant, SD ≈ 0.447
5, 5, 5, 5, 3, # D: mode_pct = 0.80, SD ≈ 0.894
1, 2, 4, 3, 5 # E: varied (same as A)
),
y = c(
5, 3, 1, 4, 2, # A: varied
2, 4, 1, 5, 3, # B: all 5 valid
2, 4, 1, 5, 3, # C: varied
2, 4, 1, 5, 3, # D: varied
3, 3, 3, 3, 4 # E: nearly constant, SD ≈ 0.447
),
stringsAsFactors = FALSE
)
}
# ══════════════════════════════════════════════════════════════════════════════
# Validation
# ══════════════════════════════════════════════════════════════════════════════
test_that("misspelled col triggers error naming the bad variable", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "not_a_col", id_var = "id"),
regexp = "not_a_col"
)
})
test_that("misspelled id_var triggers error naming the bad variable", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "not_an_id"),
regexp = "not_an_id"
)
})
test_that("error message includes 'Cannot find required variables'", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "oops", id_var = "id"),
regexp = "Cannot find required variables"
)
})
test_that("empty cols vector triggers error mentioning 'cols'", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = character(0), id_var = "id"),
regexp = "cols"
)
})
test_that("character column in cols triggers error naming the column", {
df <- make_screen_df()
df$x <- as.character(df$x)
expect_error(
i_screener(df, cols = "x", id_var = "id"),
regexp = "must be numeric"
)
})
test_that("non-numeric cols error reports the actual class", {
df <- make_screen_df()
df$x <- factor(df$x)
expect_error(
i_screener(df, cols = "x", id_var = "id"),
regexp = "factor"
)
})
test_that("max_mode_pct = 0 triggers error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", max_mode_pct = 0),
regexp = "max_mode_pct"
)
})
test_that("max_mode_pct > 1 triggers error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", max_mode_pct = 1.1),
regexp = "max_mode_pct"
)
})
test_that("min_sd <= 0 triggers error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", min_sd = 0),
regexp = "min_sd"
)
})
test_that("min_n_subject = NA_real_ triggers user-friendly error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = NA_real_),
regexp = "min_n"
)
})
test_that("min_n_subject = Inf triggers user-friendly error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = Inf),
regexp = "min_n"
)
})
test_that("min_sd = NA_real_ triggers user-friendly error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 3, min_sd = NA_real_),
regexp = "min_sd"
)
})
test_that("id_var as a vector triggers error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = c("id", "id")),
regexp = "id_var"
)
})
test_that("min_sd = Inf triggers user-friendly error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", min_sd = Inf),
regexp = "min_sd"
)
})
test_that("invalid mode triggers error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", mode = "bad_mode"),
regexp = "mode"
)
})
test_that("invalid filter_type triggers error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", filter_type = "bad_type"),
regexp = "filter_type"
)
})
# ══════════════════════════════════════════════════════════════════════════════
# Output structure
# ══════════════════════════════════════════════════════════════════════════════
test_that("mode=filter joint returns a data.frame", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
expect_s3_class(result, "data.frame")
})
test_that("mode=filter joint preserves all original columns", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
expect_true(all(c("id", "x", "y") %in% names(result)))
expect_equal(ncol(result), ncol(df))
})
test_that("mode=filter joint removes rows of failing subjects", {
df <- make_screen_df()
# B has n_valid = 2 for x; with min_n_subject = 3 it fails; others pass
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
expect_false("B" %in% result$id)
expect_true(all(c("A", "C", "D", "E") %in% result$id))
expect_equal(nrow(result), 20L) # 4 subjects x 5 rows
})
test_that("mode=filter per_column does not remove rows", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3,
filter_type = "per_column")
expect_equal(nrow(result), nrow(df))
expect_equal(ncol(result), ncol(df))
})
test_that("mode=filter per_column sets failing col values to NA, other cols unchanged", {
df <- make_screen_df()
# B fails min_n on x; its y values should remain intact
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3,
filter_type = "per_column")
x_B <- result$x[result$id == "B"]
y_B <- result$y[result$id == "B"]
expect_true(all(is.na(x_B)))
expect_false(any(is.na(y_B)))
})
test_that("mode=flag joint adds exactly one pass_overall column", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3, mode = "flag")
expect_true("pass_overall" %in% names(result))
expect_equal(ncol(result), ncol(df) + 1L)
expect_equal(nrow(result), nrow(df))
expect_type(result$pass_overall, "logical")
})
test_that("mode=flag per_column adds one
_pass column per variable", {
df <- make_screen_df()
result <- i_screener(df, cols = c("x", "y"), id_var = "id", min_n_subject = 3,
mode = "flag", filter_type = "per_column")
expect_true("x_pass" %in% names(result))
expect_true("y_pass" %in% names(result))
expect_false("pass_overall" %in% names(result))
expect_equal(ncol(result), ncol(df) + 2L)
})
test_that("mode=report returns one row per subject", {
df <- make_screen_df()
result <- i_screener(df, cols = c("x", "y"), id_var = "id", min_n_subject = 3,
mode = "report")
expect_equal(nrow(result), 5L) # 5 subjects
})
test_that("mode=report contains expected columns", {
df <- make_screen_df()
result <- i_screener(df, cols = c("x", "y"), id_var = "id", min_n_subject = 3,
mode = "report")
expected_cols <- c("id",
"x_n_valid", "y_n_valid",
"x_sd", "y_sd",
"x_mode_pct","y_mode_pct",
"x_pass", "y_pass",
"pass_overall")
expect_true(all(expected_cols %in% names(result)))
})
test_that("output has no residual grouping", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
expect_false(dplyr::is_grouped_df(result))
})
test_that("pre-existing grouping on input is handled without error", {
df <- dplyr::group_by(make_screen_df(), id)
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
expect_s3_class(result, "data.frame")
})
test_that("row order of passing subjects is preserved", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
# All rows from B are removed; remaining rows keep original order
df_expected <- df[df$id != "B", ]
expect_equal(result$id, df_expected$id)
expect_equal(result$x, df_expected$x, ignore_attr = TRUE)
})
# ══════════════════════════════════════════════════════════════════════════════
# Statistical correctness
# ══════════════════════════════════════════════════════════════════════════════
test_that("min_n removes subjects below the threshold (joint)", {
df <- make_screen_df()
# B: x has n_valid = 2; min_n_subject = 3 → B fails
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
expect_false("B" %in% result$id)
})
test_that("min_n keeps subjects at or above the threshold", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 2)
expect_true(all(c("A", "B", "C", "D", "E") %in% result$id))
})
test_that("min_sd removes low-variance subjects (joint)", {
df <- make_screen_df()
# C: x SD ≈ 0.447 < 0.5 → C fails; D: SD ≈ 0.894 → passes
result <- i_screener(df, cols = "x", id_var = "id",
min_n_subject = 2, min_sd = 0.5)
expect_false("C" %in% result$id)
expect_true("D" %in% result$id)
})
test_that("min_sd removes subject with low SD on second col (joint)", {
df <- make_screen_df()
# E: y SD ≈ 0.447 < 0.5 → E fails in joint mode with cols = c("x", "y")
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5)
expect_false("E" %in% result$id)
})
test_that("min_sd per_column sets NA only in failing column", {
df <- make_screen_df()
# C: x fails min_sd; y is fine → x values NA, y unchanged
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5,
filter_type = "per_column")
x_C <- result$x[result$id == "C"]
y_C <- result$y[result$id == "C"]
expect_true(all(is.na(x_C)))
expect_false(any(is.na(y_C)))
})
test_that("max_mode_pct removes stuck responders (joint)", {
df <- make_screen_df()
# C: mode_pct = 0.80 > 0.75 → fails; D: mode_pct = 0.80 > 0.75 → fails
result <- i_screener(df, cols = "x", id_var = "id",
min_n_subject = 2, max_mode_pct = 0.75)
expect_false("C" %in% result$id)
expect_false("D" %in% result$id)
expect_true("A" %in% result$id)
})
test_that("max_mode_pct at exactly the threshold value passes", {
df <- make_screen_df()
# C and D both have mode_pct = 0.80; max_mode_pct = 0.80 → they pass
result <- i_screener(df, cols = "x", id_var = "id",
min_n_subject = 2, max_mode_pct = 0.80)
expect_true("C" %in% result$id)
expect_true("D" %in% result$id)
})
test_that("max_mode_pct per_column sets NA only in failing column", {
df <- make_screen_df()
# D: x fails max_mode_pct; D's y is fine
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, max_mode_pct = 0.75,
filter_type = "per_column")
x_D <- result$x[result$id == "D"]
y_D <- result$y[result$id == "D"]
expect_true(all(is.na(x_D)))
expect_false(any(is.na(y_D)))
})
test_that("joint mode removes subject failing on second col only", {
df <- make_screen_df()
# E: x passes min_sd, y fails min_sd → joint removes E; per_column keeps E rows
result_joint <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5, filter_type = "joint")
result_pc <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5, filter_type = "per_column")
expect_false("E" %in% result_joint$id)
expect_true("E" %in% result_pc$id)
})
test_that("flag joint: pass_overall is TRUE only for fully passing subjects", {
df <- make_screen_df()
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5, mode = "flag")
# A passes both; B, C, E fail; D passes x but D's y is fine
expect_true( all(result$pass_overall[result$id == "A"]))
expect_false(all(result$pass_overall[result$id == "C"]))
expect_false(all(result$pass_overall[result$id == "E"]))
})
test_that("flag per_column: x_pass and y_pass reflect independent evaluation", {
df <- make_screen_df()
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5,
mode = "flag", filter_type = "per_column")
# E: x passes (SD ≈ 1.58 > 0.5), y fails (SD ≈ 0.447 < 0.5)
x_pass_E <- unique(result$x_pass[result$id == "E"])
y_pass_E <- unique(result$y_pass[result$id == "E"])
expect_true(x_pass_E)
expect_false(y_pass_E)
})
test_that("report returns correct n_valid values", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 2, mode = "report")
expect_equal(result$x_n_valid[result$id == "A"], 5L)
expect_equal(result$x_n_valid[result$id == "B"], 2L)
})
test_that("report returns correct sd values", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 2, mode = "report")
expect_equal(result$x_sd[result$id == "A"],
sd(c(1, 2, 4, 3, 5)), tolerance = 1e-10)
expect_equal(result$x_sd[result$id == "C"],
sd(c(2, 2, 2, 2, 3)), tolerance = 1e-10)
})
test_that("report returns correct mode_pct values", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 2, mode = "report")
# A: all unique → 1/5 = 0.2; C: four 2s → 4/5 = 0.8; D: four 5s → 0.8
expect_equal(result$x_mode_pct[result$id == "A"], 0.2, tolerance = 1e-10)
expect_equal(result$x_mode_pct[result$id == "C"], 0.8, tolerance = 1e-10)
expect_equal(result$x_mode_pct[result$id == "D"], 0.8, tolerance = 1e-10)
})
test_that("report pass_overall is FALSE when any col fails", {
df <- make_screen_df()
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5, mode = "report")
# E passes x but fails y → pass_overall must be FALSE
expect_false(result$pass_overall[result$id == "E"])
})
test_that("no criteria beyond min_n returns all subjects with n_valid >= min_n", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 3)
# Only B (n=2) is removed; C, D, E all have n=5
expect_true(all(c("A", "C", "D", "E") %in% result$id))
expect_false("B" %in% result$id)
})
# ══════════════════════════════════════════════════════════════════════════════
# Verbose
# ══════════════════════════════════════════════════════════════════════════════
test_that("verbose = TRUE emits messages", {
df <- make_screen_df()
suppressMessages(
expect_message(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 3, verbose = TRUE)
)
)
})
test_that("verbose = FALSE emits no messages", {
df <- make_screen_df()
expect_no_message(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 3, verbose = FALSE)
)
})
test_that("verbose message mentions 'i_screener' and subject counts", {
df <- make_screen_df()
msgs <- character(0)
withCallingHandlers(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 3, verbose = TRUE),
message = function(m) {
msgs <<- c(msgs, conditionMessage(m))
invokeRestart("muffleMessage")
}
)
combined <- paste(msgs, collapse = " ")
expect_match(combined, "i_screener", ignore.case = TRUE)
expect_match(combined, "min_n", ignore.case = TRUE)
expect_match(combined, "[0-9]") # at least one count in the output
})
# ══════════════════════════════════════════════════════════════════════════════
# Column name collision guard
# ══════════════════════════════════════════════════════════════════════════════
test_that("flag joint errors if df already has pass_overall column", {
df <- make_screen_df()
df$pass_overall <- TRUE
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 3, mode = "flag"),
regexp = "pass_overall"
)
})
test_that("flag per_column errors if df already has a _pass column", {
df <- make_screen_df()
df$x_pass <- TRUE
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 3,
mode = "flag", filter_type = "per_column"),
regexp = "x_pass"
)
})
test_that("filter per_column errors if df already has a _pass column", {
df <- make_screen_df()
df$x_pass <- TRUE
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 3,
mode = "filter", filter_type = "per_column"),
regexp = "x_pass"
)
})
# ══════════════════════════════════════════════════════════════════════════════
# Additional correctness edge cases
# ══════════════════════════════════════════════════════════════════════════════
test_that("mode=filter returns all rows when all subjects pass", {
df <- make_screen_df()
result <- i_screener(df, cols = "x", id_var = "id", min_n_subject = 1)
expect_equal(nrow(result), nrow(df))
expect_equal(result$id, df$id)
})
test_that("max_mode_pct = 1.0 does not remove subjects with mode_pct < 1.0", {
df <- make_screen_df()
# C has mode_pct = 0.80 ≤ 1.0 → passes; everyone with n ≥ 2 should pass
result <- i_screener(df, cols = "x", id_var = "id",
min_n_subject = 2, max_mode_pct = 1.0)
expect_true("C" %in% result$id)
expect_true("D" %in% result$id)
})
test_that("subject with n_valid = 1 fails min_sd due to NA sd", {
df <- data.frame(
id = c(rep("A", 5), rep("B", 5)),
x = c(1, 2, 3, 4, 5, # A: n=5, SD > 0
5, NA, NA, NA, NA), # B: n=1, SD = NA → fails min_sd
stringsAsFactors = FALSE
)
result <- i_screener(df, cols = "x", id_var = "id",
min_n_subject = 1, min_sd = 0.5, mode = "report")
expect_false(result$x_pass[result$id == "B"])
expect_true(result$x_pass[result$id == "A"])
})
test_that("per_column filter handles different subjects failing different cols independently", {
df <- data.frame(
id = rep(c("A", "B"), each = 5),
x = c(1, 2, 3, 4, 5, # A: SD > 0.5 → passes
3, 3, 3, 3, 3), # B: constant → SD = 0 → fails min_sd on x
y = c(4, 4, 4, 4, 4, # A: constant → SD = 0 → fails min_sd on y
1, 2, 3, 4, 5), # B: varied → passes
stringsAsFactors = FALSE
)
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, min_sd = 0.5,
filter_type = "per_column")
# A: y is NA; x is intact
expect_true(all(is.na(result$y[result$id == "A"])))
expect_false(any(is.na(result$x[result$id == "A"])))
# B: x is NA; y is intact
expect_true(all(is.na(result$x[result$id == "B"])))
expect_false(any(is.na(result$y[result$id == "B"])))
})
test_that("report column order groups by metric type across all cols", {
df <- make_screen_df()
result <- i_screener(df, cols = c("x", "y"), id_var = "id",
min_n_subject = 2, mode = "report")
expected_order <- c("id",
"x_n_valid", "y_n_valid",
"x_sd", "y_sd",
"x_mode_pct","y_mode_pct",
"x_pass", "y_pass",
"pass_overall")
expect_equal(names(result), expected_order)
})
# ══════════════════════════════════════════════════════════════════════════════
# Inf / NaN in data columns
# ══════════════════════════════════════════════════════════════════════════════
test_that("Inf in a screened column triggers an informative error", {
df <- make_screen_df()
df$x[1] <- Inf
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 2),
regexp = "Inf"
)
})
test_that("-Inf in a screened column triggers an informative error", {
df <- make_screen_df()
df$x[1] <- -Inf
expect_error(
i_screener(df, cols = "x", id_var = "id", min_n_subject = 2),
regexp = "Inf"
)
})
test_that("NaN in a screened column is treated as NA (reduces n_valid)", {
df <- data.frame(
id = rep(c("A", "B"), each = 5),
x = c(1, 2, 3, 4, 5,
NaN, 2, 3, 4, 5),
stringsAsFactors = FALSE
)
result <- i_screener(df, cols = "x", id_var = "id",
min_n_subject = 2, mode = "report")
expect_equal(result$x_n_valid[result$id == "B"], 4L)
})
test_that("max_mode_pct = NaN triggers an informative error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", max_mode_pct = NaN),
regexp = "max_mode_pct"
)
})
test_that("max_mode_pct = NA triggers an informative error", {
df <- make_screen_df()
expect_error(
i_screener(df, cols = "x", id_var = "id", max_mode_pct = NA),
regexp = "max_mode_pct"
)
})