# Tests for GLM/ART stratification via `within` parameter # These tests verify that the `within` parameter correctly produces # separate results per stratum, not a diluted main effect. # ============================================================================= # Test Fixtures # ============================================================================= #' Generate factorial test data with interaction pattern #' #' Creates data where some peptides have an effect only at 24h (not at 0h), #' simulating a time-dependent treatment response (interaction pattern). #' #' @param n_peptides Total number of peptides #' @param n_interaction Number of peptides with interaction pattern (effect at 24h only) #' @param n_reps Number of biological replicates per condition #' @param effect_size Fold change for effect at 24h #' @param seed Random seed #' @return A tibble suitable for read_pepdiff() make_interaction_data <- function(n_peptides = 30, n_interaction = 10, n_reps = 4, effect_size = 4.0, seed = 42) { set.seed(seed) # Create peptide and gene IDs peptide_ids <- paste0("PEP_", sprintf("%03d", seq_len(n_peptides))) gene_ids <- paste0("GENE_", sprintf("%03d", ceiling(seq_len(n_peptides) / 2))) # Interaction peptides are last n_interaction interaction_peptides <- tail(peptide_ids, n_interaction) # Design: treatment (ctrl, trt) x timepoint (0h, 24h) factor_grid <- expand.grid( treatment = c("ctrl", "trt"), timepoint = c("0h", "24h"), stringsAsFactors = FALSE ) base_mean <- 1e6 cv <- 0.25 data_list <- list() for (i in seq_len(nrow(factor_grid))) { condition <- factor_grid[i, ] for (rep_idx in seq_len(n_reps)) { for (pep_idx in seq_len(n_peptides)) { peptide <- peptide_ids[pep_idx] gene <- gene_ids[pep_idx] # Determine fold change fc_multiplier <- 1.0 if (peptide %in% interaction_peptides) { # Interaction pattern: effect ONLY at 24h, not at 0h if (condition$treatment == "trt" && condition$timepoint == "24h") { fc_multiplier <- effect_size } } # Generate Gamma-distributed value shape <- 1 / (cv^2) adjusted_mean <- base_mean * fc_multiplier rate <- shape / adjusted_mean value <- stats::rgamma(1, shape = shape, rate = rate) row <- data.frame( peptide = peptide, gene_id = gene, bio_rep = rep_idx, value = value, treatment = condition$treatment, timepoint = condition$timepoint, stringsAsFactors = FALSE ) data_list[[length(data_list) + 1]] <- row } } } tibble::as_tibble(do.call(rbind, data_list)) } # ============================================================================= # Test 1: GLM with within produces results for each stratum # ============================================================================= test_that("GLM with within produces results for each stratum", { # Setup: 2x2 factorial data raw_data <- make_factorial_test_data(seed = 111) # Write and read to create pepdiff_data temp_file <- write_test_csv(raw_data) dat <- read_pepdiff( file = temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) unlink(temp_file) # Run stratified analysis results <- compare(dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "glm") # Should have 2 rows per peptide (one per timepoint) expect_equal(nrow(results$results), 2 * length(dat$peptides)) # Should have timepoint column expect_true("timepoint" %in% names(results$results)) # Should have both timepoint levels expect_setequal(unique(results$results$timepoint), c("0h", "24h")) }) # ============================================================================= # Test 2: Stratified results detect interaction correctly # ============================================================================= test_that("GLM stratification detects interaction pattern", { # Setup: data with interaction pattern (Group C: effect at 24h only) raw_data <- make_interaction_data( n_peptides = 30, n_interaction = 10, # PEP_021 through PEP_030 have interaction pattern n_reps = 4, effect_size = 4.0, seed = 222 ) # Write and read to create pepdiff_data temp_file <- write_test_csv(raw_data) dat <- read_pepdiff( file = temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) unlink(temp_file) # Run stratified analysis results <- compare(dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "glm") # Interaction peptides (effect at 24h only) interaction_peps <- paste0("PEP_", sprintf("%03d", 21:30)) interaction_results <- results$results |> dplyr::filter(peptide %in% interaction_peps) |> dplyr::group_by(timepoint) |> dplyr::summarise(median_fc = median(fold_change), .groups = "drop") # At 0h: FC should be ~1 (no effect) fc_0h <- interaction_results$median_fc[interaction_results$timepoint == "0h"] expect_true(fc_0h > 0.7 && fc_0h < 1.5, label = sprintf("FC at 0h should be ~1, got %.2f", fc_0h)) # At 24h: FC should be ~4 (the true effect) fc_24h <- interaction_results$median_fc[interaction_results$timepoint == "24h"] expect_true(fc_24h > 3 && fc_24h < 5, label = sprintf("FC at 24h should be ~4, got %.2f", fc_24h)) }) # ============================================================================= # Test 3: FDR correction is applied within each stratum # ============================================================================= test_that("FDR correction is applied within each stratum", { # Setup raw_data <- make_factorial_test_data(seed = 333) temp_file <- write_test_csv(raw_data) dat <- read_pepdiff( file = temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) unlink(temp_file) results <- compare(dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "glm") # Check FDR is computed separately per stratum for (tp in c("0h", "24h")) { stratum_results <- results$results |> dplyr::filter(timepoint == tp) expected_fdr <- p.adjust(stratum_results$p_value, method = "BH") expect_equal(stratum_results$fdr, expected_fdr, label = paste("FDR within", tp)) } }) # ============================================================================= # Test 4: ART method with stratification # ============================================================================= test_that("ART with within produces results for each stratum", { skip_if_not_installed("ARTool") # Setup raw_data <- make_factorial_test_data(seed = 444) temp_file <- write_test_csv(raw_data) dat <- read_pepdiff( file = temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) unlink(temp_file) # Run stratified ART analysis results <- compare(dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "art") # Should have 2 rows per peptide (one per timepoint) expect_equal(nrow(results$results), 2 * length(dat$peptides)) # Should have timepoint column expect_true("timepoint" %in% names(results$results)) # Should have both timepoint levels expect_setequal(unique(results$results$timepoint), c("0h", "24h")) }) # ============================================================================= # Test 5: Pairwise method with stratification still works # ============================================================================= test_that("Pairwise method with within still works", { raw_data <- make_factorial_test_data(seed = 555) temp_file <- write_test_csv(raw_data) dat <- read_pepdiff( file = temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) unlink(temp_file) results <- compare(dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "pairwise", test = "wilcoxon") # Should have timepoint column expect_true("timepoint" %in% names(results$results)) # Should have 2 rows per peptide expect_equal(nrow(results$results), 2 * length(dat$peptides)) # Should have both timepoint levels expect_setequal(unique(results$results$timepoint), c("0h", "24h")) }) # ============================================================================= # Test 6: Pairwise bayes_t with stratification works (bf_threshold passed) # ============================================================================= test_that("Pairwise bayes_t with within works", { raw_data <- make_factorial_test_data(seed = 666) temp_file <- write_test_csv(raw_data) dat <- read_pepdiff( file = temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) unlink(temp_file) # Run with bayes_t - bf_threshold should be passed through results <- compare(dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "pairwise", test = "bayes_t", bf_threshold = 10) # Should have timepoint column expect_true("timepoint" %in% names(results$results)) # Should have bf column (bayes_t specific) expect_true("bf" %in% names(results$results)) # Should have both timepoint levels expect_setequal(unique(results$results$timepoint), c("0h", "24h")) }) # ============================================================================= # Test 7: Multiple within factors # ============================================================================= test_that("Multiple within factors work", { # Generate 3-factor data: treatment x timepoint x tissue set.seed(777) factor_grid <- expand.grid( treatment = c("ctrl", "trt"), timepoint = c("0h", "24h"), tissue = c("liver", "kidney"), stringsAsFactors = FALSE ) n_peptides <- 5 n_reps <- 2 base_mean <- 1e6 cv <- 0.3 peptide_ids <- paste0("PEP_", sprintf("%03d", seq_len(n_peptides))) gene_ids <- paste0("GENE_", sprintf("%03d", ceiling(seq_len(n_peptides) / 2))) data_list <- list() for (i in seq_len(nrow(factor_grid))) { condition <- factor_grid[i, ] for (rep_idx in seq_len(n_reps)) { for (pep_idx in seq_len(n_peptides)) { shape <- 1 / (cv^2) rate <- shape / base_mean value <- stats::rgamma(1, shape = shape, rate = rate) row <- data.frame( peptide = peptide_ids[pep_idx], gene_id = gene_ids[pep_idx], bio_rep = rep_idx, value = value, treatment = condition$treatment, timepoint = condition$timepoint, tissue = condition$tissue, stringsAsFactors = FALSE ) data_list[[length(data_list) + 1]] <- row } } } raw_data <- tibble::as_tibble(do.call(rbind, data_list)) temp_file <- write_test_csv(raw_data) dat <- read_pepdiff( file = temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint", "tissue"), replicate = "bio_rep" ) unlink(temp_file) # Stratify by both timepoint AND tissue results <- compare(dat, compare = "treatment", ref = "ctrl", within = c("timepoint", "tissue"), method = "glm") # Should have 4 rows per peptide (2 timepoints x 2 tissues) expect_equal(nrow(results$results), 4 * length(dat$peptides)) # Should have both within columns expect_true("timepoint" %in% names(results$results)) expect_true("tissue" %in% names(results$results)) # Should have all 4 combinations combos <- unique(results$results[, c("timepoint", "tissue")]) expect_equal(nrow(combos), 4) })