library(testthat) library(visPedigree) library(data.table) # =========================================================================== # Test Suite for pedcontrib() # Purpose: Capture exact numerical outputs of the current (pure-R) implementation # so that any future Rcpp optimisation must produce identical results. # =========================================================================== # ---- Fixture 1: 10-individual "theory" pedigree (瓶颈模型) ---- # Hand-derived expected values from the manuscript make_theory_ped <- function() { ped_dt <- data.table( Ind = c("F1", "F2", "F3", "F4", "S1", "D1", "R1", "R2", "R3", "R4"), Sire = c(NA, NA, NA, NA, "F1", "F3", "S1", "S1", "S1", "S1"), Dam = c(NA, NA, NA, NA, "F2", "F4", "D1", "D1", "D1", "F4") ) suppressMessages(tidyped(ped_dt)) } # ---- Fixture 2: small_ped (visPedigree built-in dataset) ---- make_small_ped <- function() { suppressMessages(tidyped(small_ped)) } # ---- Fixture 3: two founders with one offspring, candidates are founders ---- make_founders_as_cand_ped <- function() { ped_dt <- data.table( Ind = c("A", "B", "C"), Sire = c(NA, NA, "A"), Dam = c(NA, NA, "B") ) suppressMessages(tidyped(ped_dt)) } # ---- Fixture 4: single offspring from two founders ---- make_minimal_ped <- function() { ped_dt <- data.table( Ind = c("P1", "P2", "O1"), Sire = c(NA, NA, "P1"), Dam = c(NA, NA, "P2") ) suppressMessages(tidyped(ped_dt)) } # ---- Fixture 5: single candidate (edge case) ---- make_single_cand_ped <- function() { ped_dt <- data.table( Ind = c("A", "B", "C", "D"), Sire = c(NA, NA, "A", "A"), Dam = c(NA, NA, "B", "B") ) suppressMessages(tidyped(ped_dt)) } # ---- Fixture 6: deeper chain (4 generations) ---- make_deep_chain_ped <- function() { ped_dt <- data.table( Ind = c("G1M", "G1F", "G2", "G3M", "G3F", "G4"), Sire = c(NA, NA, "G1M", "G2", NA, "G3M"), Dam = c(NA, NA, "G1F", NA, NA, "G3F") ) suppressMessages(tidyped(ped_dt)) } # ========================================================================== # Group 1: Founder contributions (p_i) and f_e # ========================================================================== test_that("Theory pedigree: founder contributions match hand-derived values", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "founder", top = 100)) # Extract founder table and sort by name for deterministic comparison fd <- res$founders setkey(fd, Ind) expect_equal(fd["F1", Contrib], 0.25, tolerance = 1e-10) expect_equal(fd["F2", Contrib], 0.25, tolerance = 1e-10) expect_equal(fd["F3", Contrib], 0.1875, tolerance = 1e-10) expect_equal(fd["F4", Contrib], 0.3125, tolerance = 1e-10) }) test_that("Theory pedigree: f_e matches hand-derived value (3.878788)", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) expected_fe <- 1 / (0.25^2 + 0.25^2 + 0.1875^2 + 0.3125^2) expect_equal(res$summary$f_e, expected_fe, tolerance = 1e-6) }) test_that("Theory pedigree: founder contributions sum to 1.0", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "founder", top = 100)) expect_equal(sum(res$founders$Contrib), 1.0, tolerance = 1e-10) }) # ========================================================================== # Group 2: Ancestor marginal contributions (q_i) and f_a # ========================================================================== test_that("Theory pedigree: ancestor marginal contributions match hand-derived values", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "ancestor", top = 100)) anc <- res$ancestors # By rank order: S1(0.5), D1(0.375), F4(0.125) expect_equal(anc$Contrib[1], 0.500, tolerance = 1e-10) expect_equal(anc$Contrib[2], 0.375, tolerance = 1e-10) expect_equal(anc$Contrib[3], 0.125, tolerance = 1e-10) expect_equal(anc$Ind[1], "S1") expect_equal(anc$Ind[2], "D1") expect_equal(anc$Ind[3], "F4") }) test_that("Theory pedigree: f_a matches hand-derived value (2.461538)", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) expected_fa <- 1 / (0.5^2 + 0.375^2 + 0.125^2) expect_equal(res$summary$f_a, expected_fa, tolerance = 1e-6) }) test_that("Theory pedigree: ancestor contributions sum to 1.0", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "ancestor", top = 100)) expect_equal(sum(res$ancestors$Contrib), 1.0, tolerance = 1e-10) }) # ========================================================================== # Group 3: small_ped dataset (complex real-world pedigree) # ========================================================================== test_that("small_ped: f_e and f_a match known values", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) # These values are from the current pure-R implementation runs expect_equal(res$summary$f_e, 6.585209, tolerance = 1e-4) expect_equal(res$summary$f_a, 2.666667, tolerance = 1e-4) }) test_that("small_ped: founder contributions sum to 1.0", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "founder", top = 100)) expect_equal(sum(res$founders$Contrib), 1.0, tolerance = 1e-10) }) test_that("small_ped: ancestor contributions sum to 1.0", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "ancestor", top = 100)) expect_equal(sum(res$ancestors$Contrib), 1.0, tolerance = 1e-10) }) test_that("small_ped: top ancestor is N with q = 0.50", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "ancestor", top = 100)) expect_equal(res$ancestors$Ind[1], "X") expect_equal(res$ancestors$Contrib[1], 0.50, tolerance = 1e-10) }) test_that("small_ped: ancestor ranking order is correct", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "ancestor", top = 100)) # Expected order: N(0.50), V(0.25), R(0.125), K(0.125), W(0.125), J1(0.0625), O(0.03125) expect_equal(res$ancestors$Ind[1], "X") expect_equal(res$ancestors$Ind[2], "N") expect_equal(res$ancestors$Contrib[2], 0.25, tolerance = 1e-10) }) # ========================================================================== # Group 4: f_e and f_a invariance under `top` parameter # ========================================================================== test_that("f_e and f_a are invariant to the top parameter", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res_full <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) res_top1 <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 1)) expect_equal(res_full$summary$f_e, res_top1$summary$f_e) expect_equal(res_full$summary$f_a, res_top1$summary$f_a) }) # ========================================================================== # Group 5: Edge cases # ========================================================================== test_that("Single candidate: founder and ancestor contributions work", { tp <- make_single_cand_ped() res <- suppressMessages(pedcontrib(tp, reference = "C", mode = "both", top = 100)) # Single individual C has parents A and B # Founder contributions: A = 0.5, B = 0.5 fd <- res$founders setkey(fd, Ind) expect_equal(fd["A", Contrib], 0.5, tolerance = 1e-10) expect_equal(fd["B", Contrib], 0.5, tolerance = 1e-10) expect_equal(sum(fd$Contrib), 1.0, tolerance = 1e-10) # Ancestor contributions should also sum to 1.0 expect_equal(sum(res$ancestors$Contrib), 1.0, tolerance = 1e-10) }) test_that("Minimal pedigree (1 offspring from 2 founders): correct values", { tp <- make_minimal_ped() res <- suppressMessages(pedcontrib(tp, reference = "O1", mode = "both", top = 100)) # f_e: 1/(0.5^2 + 0.5^2) = 2.0 expect_equal(res$summary$f_e, 2.0, tolerance = 1e-10) # Founders: P1=0.5, P2=0.5 fd <- res$founders setkey(fd, Ind) expect_equal(fd["P1", Contrib], 0.5, tolerance = 1e-10) expect_equal(fd["P2", Contrib], 0.5, tolerance = 1e-10) }) test_that("Deep chain: contributions propagate correctly through 4 generations", { tp <- make_deep_chain_ped() res <- suppressMessages(pedcontrib(tp, reference = "G4", mode = "both", top = 100)) # G4's parents are G3M and G3F # G3F is a founder -> contributes 0.5 to G4 # G3M's parent is G2 (sire side only) # G2's parents are G1M and G1F # So G1M -> G2 -> G3M -> G4: contribution = 0.5^3 = 0.125 # G1F -> G2 -> G3M -> G4: contribution = 0.5^3 = 0.125 # G3M has no dam, so half of G3M is "missing" => goes to phantom founder # G2 -> G3M: 0.5 (sire contribution to G3M), then G3M -> G4: 0.5 # So from G2: 0.25 to G4 fd <- res$founders setkey(fd, Ind) expect_equal(fd["G3F", Contrib], 0.5, tolerance = 1e-10) # G3M has no dam => 25% of G4's genes go to a phantom founder # Real founders account for only 0.75 of total variation expect_equal(sum(fd$Contrib), 1.0, tolerance = 1e-10) # Ancestor contributions also reflect the same incomplete pedigree expect_true(sum(res$ancestors$Contrib) <= 1.0 + 1e-10) }) test_that("Founders as candidates: they are their own founders, no ancestors above", { tp <- make_founders_as_cand_ped() # Use the two founders A and B as candidates res <- suppressMessages(pedcontrib(tp, reference = c("A", "B"), mode = "both", top = 100)) # Founder contributions: A = 0.5, B = 0.5 expect_equal(nrow(res$founders), 2) expect_equal(sum(res$founders$Contrib), 1.0, tolerance = 1e-10) # f_e = 1 / (0.5^2 + 0.5^2) = 2.0 expect_equal(res$summary$f_e, 2.0, tolerance = 1e-10) # No ancestors should be found (founders have no parents) expect_equal(nrow(res$ancestors), 2) }) # ========================================================================== # Group 6: mode parameter behaviour # ========================================================================== test_that("mode='founder' does not produce ancestors", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "founder")) expect_true(!is.null(res$founders)) expect_true(is.null(res$ancestors)) }) test_that("mode='ancestor' does not produce founders", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "ancestor")) expect_true(is.null(res$founders)) expect_true(!is.null(res$ancestors)) }) test_that("mode='both' produces both founders and ancestors", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both")) expect_true(!is.null(res$founders)) expect_true(!is.null(res$ancestors)) }) # ========================================================================== # Group 7: Return structure and class # ========================================================================== test_that("pedcontrib returns correct structure", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both")) expect_s3_class(res, "pedcontrib") expect_true(is.list(res)) expect_true(all(c("founders", "ancestors", "summary") %in% names(res))) # founders table columns expect_true(all(c("Ind", "Contrib", "CumContrib", "Rank") %in% names(res$founders))) # ancestors table columns expect_true(all(c("Ind", "Contrib", "CumContrib", "Rank") %in% names(res$ancestors))) # summary fields expect_true(all(c("n_ref", "n_founder", "f_e", "n_ancestor", "f_a") %in% names(res$summary))) }) test_that("CumContrib is monotonically non-decreasing", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) expect_true(all(diff(res$founders$CumContrib) >= -1e-15)) expect_true(all(diff(res$ancestors$CumContrib) >= -1e-15)) }) # ========================================================================== # Group 8: Error handling # ========================================================================== test_that("pedcontrib rejects non-tidyped input", { expect_error(pedcontrib(data.frame(a = 1)), "tidyped") }) test_that("pedcontrib errors on empty candidate set", { tp <- make_theory_ped() expect_error( suppressMessages(suppressWarnings(pedcontrib(tp, reference = "NONEXISTENT", mode = "both"))), "No valid reference" ) }) # ========================================================================== # Group 9: Relationship between f_e and f_a # ========================================================================== test_that("f_a <= f_e (bottleneck effect)", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) expect_true(res$summary$f_a <= res$summary$f_e) }) test_that("small_ped: f_a <= f_e", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) expect_true(res$summary$f_a <= res$summary$f_e) }) # ========================================================================== # Group 10: Shannon entropy effective numbers f_e(H) and f_a(H) # ========================================================================== test_that("Theory pedigree: f_e_H matches hand-derived value", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "founder", top = 100)) p <- c(0.25, 0.25, 0.1875, 0.3125) expected_fe_H <- exp(-sum(p * log(p))) expect_equal(res$summary$f_e_H, expected_fe_H, tolerance = 1e-6) }) test_that("Theory pedigree: f_e_H >= f_e (Hill monotonicity)", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "founder", top = 100)) expect_true(res$summary$f_e_H >= res$summary$f_e) }) test_that("Theory pedigree: f_a_H >= f_a", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) expect_true(res$summary$f_a_H >= res$summary$f_a) }) test_that("Uniform contributions: f_e_H == f_e == Nf", { # Two founders each contributing exactly 0.5 tp <- make_founders_as_cand_ped() res <- suppressMessages(pedcontrib(tp, reference = "C", mode = "founder", top = 100)) # Both parents contribute equally -> uniform expect_equal(res$summary$f_e_H, res$summary$f_e, tolerance = 1e-6) expect_equal(res$summary$f_e_H, res$summary$n_founder, tolerance = 1e-6) }) test_that("small_ped: f_e_H >= f_e", { tp <- make_small_ped() cand <- c("Z1", "Z2", "Y", "X") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "both", top = 100)) expect_true(res$summary$f_e_H >= res$summary$f_e) expect_true(res$summary$f_a_H >= res$summary$f_a) }) test_that("f_e_H is NA when no founders found", { tp <- make_theory_ped() cand <- c("R1", "R2", "R3", "R4") res <- suppressMessages(pedcontrib(tp, reference = cand, mode = "ancestor", top = 100)) # When mode = "ancestor", founders is NULL, f_e_H should not exist expect_null(res$summary$f_e_H) # But f_a_H should exist expect_true(!is.null(res$summary$f_a_H) && !is.na(res$summary$f_a_H)) })