# Tests for masked data generation functions # ============================================================================== # Tests for md_series_lifetime_right_censoring # ============================================================================== test_that("md_series_lifetime_right_censoring adds system lifetime column", { df <- data.frame( t1 = c(1, 2, 3), t2 = c(2, 1, 4), t3 = c(3, 3, 2) ) result <- md_series_lifetime_right_censoring(df, tau = Inf) expect_true("t" %in% colnames(result)) # System lifetime should be minimum of component lifetimes expect_equal(result$t, c(1, 1, 2)) }) test_that("md_series_lifetime_right_censoring adds censoring indicator", { df <- data.frame( t1 = c(1, 2, 3), t2 = c(2, 1, 4), t3 = c(3, 3, 2) ) result <- md_series_lifetime_right_censoring(df, tau = Inf) expect_true("delta" %in% colnames(result)) # All TRUE when tau = Inf (no censoring) expect_true(all(result$delta)) }) test_that("md_series_lifetime_right_censoring applies right censoring", { df <- data.frame( t1 = c(1, 5, 3), t2 = c(2, 6, 4), t3 = c(3, 7, 2) ) tau <- 4 result <- md_series_lifetime_right_censoring(df, tau = tau) # First observation: min = 1 < 4, not censored expect_equal(result$t[1], 1) expect_true(result$delta[1]) # Second observation: min = 5 > 4, censored at 4 expect_equal(result$t[2], 4) expect_false(result$delta[2]) # Third observation: min = 2 < 4, not censored expect_equal(result$t[3], 2) expect_true(result$delta[3]) }) test_that("md_series_lifetime_right_censoring accepts custom column names", { df <- data.frame( comp1 = c(1, 2, 3), comp2 = c(2, 1, 4) ) result <- md_series_lifetime_right_censoring( df, tau = Inf, comp = "comp", lifetime = "time", right_censoring_indicator = "cens" ) expect_true("time" %in% colnames(result)) expect_true("cens" %in% colnames(result)) expect_equal(result$time, c(1, 1, 3)) }) test_that("md_series_lifetime_right_censoring errors when comp not found", { df <- data.frame( x1 = c(1, 2, 3), x2 = c(2, 1, 4) ) expect_error(md_series_lifetime_right_censoring(df, comp = "t"), "comp not in colnames") }) # ============================================================================== # Tests for md_bernoulli_cand_c1_c2_c3 # ============================================================================== test_that("md_bernoulli_cand_c1_c2_c3 adds probability columns", { df <- data.frame( t1 = c(1, 2, 3), t2 = c(2, 1, 4), t3 = c(3, 3, 2), delta = c(TRUE, TRUE, TRUE) ) result <- md_bernoulli_cand_c1_c2_c3(df, p = 0.3) expect_true("q1" %in% colnames(result)) expect_true("q2" %in% colnames(result)) expect_true("q3" %in% colnames(result)) }) test_that("md_bernoulli_cand_c1_c2_c3 sets failed component probability to 1", { df <- data.frame( t1 = c(1, 5, 3), # min at row 1, 3 t2 = c(2, 1, 4), # min at row 2 t3 = c(3, 3, 2), delta = c(TRUE, TRUE, TRUE) ) result <- md_bernoulli_cand_c1_c2_c3(df, p = 0.3) # Row 1: t1 is minimum, so q1 = 1 expect_equal(result$q1[1], 1) # Row 2: t2 is minimum, so q2 = 1 expect_equal(result$q2[2], 1) # Row 3: t3 is minimum, so q3 = 1 expect_equal(result$q3[3], 1) }) test_that("md_bernoulli_cand_c1_c2_c3 sets non-failed component probability to p", { df <- data.frame( t1 = c(1, 5, 3), t2 = c(2, 1, 4), t3 = c(3, 3, 2), delta = c(TRUE, TRUE, TRUE) ) p <- 0.3 result <- md_bernoulli_cand_c1_c2_c3(df, p = p) # Row 1: t1 is minimum, so q2 and q3 should be p expect_equal(result$q2[1], p) expect_equal(result$q3[1], p) # Row 2: t2 is minimum, so q1 and q3 should be p expect_equal(result$q1[2], p) expect_equal(result$q3[2], p) }) test_that("md_bernoulli_cand_c1_c2_c3 sets all probabilities to 0 for censored observations", { df <- data.frame( t1 = c(1, 5, 3), t2 = c(2, 1, 4), t3 = c(3, 3, 2), delta = c(TRUE, FALSE, TRUE) # Row 2 is censored ) result <- md_bernoulli_cand_c1_c2_c3(df, p = 0.3) # Row 2 is censored, all probabilities should be 0 expect_equal(result$q1[2], 0) expect_equal(result$q2[2], 0) expect_equal(result$q3[2], 0) }) test_that("md_bernoulli_cand_c1_c2_c3 handles empty data frame", { df <- data.frame( t1 = numeric(0), t2 = numeric(0), delta = logical(0) ) result <- md_bernoulli_cand_c1_c2_c3(df, p = 0.3) expect_equal(nrow(result), 0) }) test_that("md_bernoulli_cand_c1_c2_c3 accepts vector of probabilities", { df <- data.frame( t1 = c(1, 2), t2 = c(2, 1), delta = c(TRUE, TRUE) ) p <- c(0.2, 0.4) # Different p for each row result <- md_bernoulli_cand_c1_c2_c3(df, p = p) # Row 1: t1 is minimum, q2 = 0.2 expect_equal(result$q2[1], 0.2) # Row 2: t2 is minimum, q1 = 0.4 expect_equal(result$q1[2], 0.4) }) test_that("md_bernoulli_cand_c1_c2_c3 errors when comp not found", { df <- data.frame( x1 = c(1, 2, 3), x2 = c(2, 1, 4), delta = c(TRUE, TRUE, TRUE) ) expect_error(md_bernoulli_cand_c1_c2_c3(df, p = 0.3), "No component lifetime") }) test_that("md_bernoulli_cand_c1_c2_c3 errors when right_censoring_indicator not found", { df <- data.frame( t1 = c(1, 2, 3), t2 = c(2, 1, 4) ) expect_error(md_bernoulli_cand_c1_c2_c3(df, p = 0.3), "right_censoring_indicator not in colnames") }) # ============================================================================== # Tests for md_cand_sampler # ============================================================================== test_that("md_cand_sampler creates candidate set columns", { df <- data.frame( q1 = c(1, 0.5, 0.3), q2 = c(0.3, 1, 0.3), q3 = c(0.3, 0.3, 1) ) set.seed(42) result <- md_cand_sampler(df) expect_true("x1" %in% colnames(result)) expect_true("x2" %in% colnames(result)) expect_true("x3" %in% colnames(result)) }) test_that("md_cand_sampler produces boolean candidate sets", { df <- data.frame( q1 = c(1, 0.5, 0.3), q2 = c(0.3, 1, 0.3), q3 = c(0.3, 0.3, 1) ) set.seed(42) result <- md_cand_sampler(df) expect_type(result$x1, "logical") expect_type(result$x2, "logical") expect_type(result$x3, "logical") }) test_that("md_cand_sampler includes component with probability 1", { # Set probabilities so some are always 1 df <- data.frame( q1 = c(1, 0, 0), q2 = c(0, 1, 0), q3 = c(0, 0, 1) ) result <- md_cand_sampler(df) expect_true(result$x1[1]) expect_true(result$x2[2]) expect_true(result$x3[3]) expect_false(result$x2[1]) expect_false(result$x3[1]) }) test_that("md_cand_sampler handles empty data frame", { df <- data.frame( q1 = numeric(0), q2 = numeric(0) ) result <- md_cand_sampler(df) expect_equal(nrow(result), 0) }) test_that("md_cand_sampler errors when df is not data frame", { expect_error(md_cand_sampler(list(q1 = c(1, 0.5))), "must be a data frame") }) test_that("md_cand_sampler accepts custom column names", { df <- data.frame( prob1 = c(1, 0.5), prob2 = c(0.5, 1) ) set.seed(42) result <- md_cand_sampler(df, prob = "prob", candset = "cand") expect_true("cand1" %in% colnames(result)) expect_true("cand2" %in% colnames(result)) }) test_that("md_cand_sampler samples match probabilities approximately", { skip_on_cran() # This test is probabilistic # Set up with known probability n <- 1000 p <- 0.7 df <- data.frame( q1 = rep(p, n), q2 = rep(p, n) ) set.seed(123) result <- md_cand_sampler(df) # Sample proportion should be close to p prop1 <- mean(result$x1) prop2 <- mean(result$x2) expect_equal(prop1, p, tolerance = 0.05) expect_equal(prop2, p, tolerance = 0.05) }) # ============================================================================== # Integration tests: Complete masked data workflow # ============================================================================== test_that("complete masked data generation workflow works", { # Step 1: Generate component lifetimes set.seed(42) n <- 100 rates <- c(0.5, 0.3, 0.2) df <- data.frame( t1 = rexp(n, rates[1]), t2 = rexp(n, rates[2]), t3 = rexp(n, rates[3]) ) # Step 2: Apply right censoring tau <- 3 df <- md_series_lifetime_right_censoring(df, tau = tau) expect_true("t" %in% colnames(df)) expect_true("delta" %in% colnames(df)) expect_true(all(df$t <= tau)) # Step 3: Generate candidate set probabilities df <- md_bernoulli_cand_c1_c2_c3(df, p = 0.3) expect_true("q1" %in% colnames(df)) expect_true("q2" %in% colnames(df)) expect_true("q3" %in% colnames(df)) # Step 4: Sample candidate sets df <- md_cand_sampler(df) expect_true("x1" %in% colnames(df)) expect_true("x2" %in% colnames(df)) expect_true("x3" %in% colnames(df)) # Verify C1: Failed component is in candidate set for non-censored observations for (i in seq_len(nrow(df))) { if (df$delta[i]) { # For non-censored: candidate set should be non-empty # and should include the failed component (but we can only verify non-empty # since we don't know which failed without the component lifetimes being visible) expect_true(df$x1[i] || df$x2[i] || df$x3[i], info = paste("Row", i, "should have non-empty candidate set")) } else { # For censored: candidate set should be empty expect_false(df$x1[i] || df$x2[i] || df$x3[i], info = paste("Censored row", i, "should have empty candidate set")) } } }) # ============================================================================== # Tests for md_bernoulli_cand_c1_c2_c3 edge cases # ============================================================================== test_that("md_bernoulli_cand_c1_c2_c3 with p=0 gives singleton candidate sets", { df <- data.frame( t1 = c(1, 5, 3), t2 = c(2, 1, 4), t3 = c(3, 3, 2), delta = c(TRUE, TRUE, TRUE) ) result <- md_bernoulli_cand_c1_c2_c3(df, p = 0) # Non-failed components should have probability 0 # Row 1: t1 is min, so q2 = q3 = 0 expect_equal(result$q2[1], 0) expect_equal(result$q3[1], 0) # But failed component is still 1 expect_equal(result$q1[1], 1) }) test_that("md_bernoulli_cand_c1_c2_c3 with p=1 gives all-component candidate sets", { df <- data.frame( t1 = c(1, 5, 3), t2 = c(2, 1, 4), t3 = c(3, 3, 2), delta = c(TRUE, TRUE, TRUE) ) result <- md_bernoulli_cand_c1_c2_c3(df, p = 1) # All non-censored obs should have all probabilities = 1 expect_true(all(result$q1 == 1)) expect_true(all(result$q2 == 1)) expect_true(all(result$q3 == 1)) }) test_that("md_bernoulli_cand_c1_c2_c3 with vector p uses per-obs probabilities", { df <- data.frame( t1 = c(1, 5), t2 = c(2, 1), delta = c(TRUE, TRUE) ) p <- c(0.0, 1.0) result <- md_bernoulli_cand_c1_c2_c3(df, p = p) # Row 1: p=0, only failed component (t1 min -> q1=1, q2=0) expect_equal(result$q1[1], 1) expect_equal(result$q2[1], 0) # Row 2: p=1, both components (t2 min -> q2=1, q1=1) expect_equal(result$q1[2], 1) expect_equal(result$q2[2], 1) }) test_that("masked data can be used with likelihood model", { # Generate masked data set.seed(42) n <- 100 rates <- c(0.5, 0.3, 0.2) df <- data.frame( t1 = rexp(n, rates[1]), t2 = rexp(n, rates[2]), t3 = rexp(n, rates[3]) ) df <- md_series_lifetime_right_censoring(df, tau = Inf) df <- md_bernoulli_cand_c1_c2_c3(df, p = 0.3) df <- md_cand_sampler(df) # Convert pipeline's delta (logical) to model's omega (character) df$omega <- ifelse(df$delta, "exact", "right") # Fit likelihood model model <- exp_series_md_c1_c2_c3() ll_fn <- loglik(model) # Should be able to evaluate likelihood ll <- ll_fn(df, par = rates) expect_true(is.finite(ll)) }) # ============================================================================== # Coverage gap: md_cand_sampler with no matching probability columns # ============================================================================== test_that("md_cand_sampler errors when no probability columns match prefix", { df <- data.frame( a1 = c(0.5, 0.3), a2 = c(0.3, 0.5) ) # Default prefix is "q" but data has "a" columns expect_error(md_cand_sampler(df), "No component probabilities found") })