library(testthat) library(visPedigree) library(data.table) # Create a minimal testing dataset test_ped_dt <- data.table::data.table( Ind = c("A", "B", "C", "D", "E", "F"), Sire = c(NA, NA, "A", "A", "C", "C"), Dam = c(NA, NA, "B", "B", "D", "D"), Sex = c("male", "female", "male", "female", "male", "female"), Gen = c(1, 1, 2, 2, 3, 3), Group = c("G1", "G1", "G2", "G2", "G3", "G3") ) # C and D are full siblings (F _ offspring = 0.25). # E and F are offspring of full siblings. test_ped <- suppressMessages(tidyped(test_ped_dt, reference = c("E", "F"))) test_that("pedrel calculates relation correctly and handles boundaries", { # N < 2 case for G1 (after subsetting by Gen, if one was dropped, or just test N=1) dt_n1 <- data.table::data.table(Ind = c("A", "B", "C"), Sire = c(NA, NA, "A"), Dam = c(NA, NA, "B"), Sex = c("male", "female", "male"), Gen = c(1, 1, 2)) ped_n1 <- suppressMessages(tidyped(dt_n1)) res_n1 <- suppressWarnings(pedrel(ped_n1, by = "Gen")) expect_equal(res_n1[Gen == 2, NUsed], 1) expect_true(is.na(res_n1[Gen == 2, MeanRel])) # For test_ped, group by Gen rel <- pedrel(test_ped, by = "Gen") # Gen 1: A, B (Unrelated) -> 0 expect_equal(rel[Gen == 1, MeanRel], 0) # Gen 2: C, D (full sibs). a_ij = 0.5. Mean of off-diagonals = 0.5 expect_equal(rel[Gen == 2, MeanRel], 0.5) # Test sample size behavior (less than 2 individuals after filtering) rel_sample <- suppressWarnings(pedrel(test_ped, by = "Gen", reference = c("A", "C"))) expect_true(all(is.na(rel_sample$MeanRel))) # Test with a different by parameter (e.g. BirthYear) test_ped$YearGroup <- c(1, 1, 2, 2, 3, 3) rel_year <- pedrel(test_ped, by = "YearGroup") expect_true("YearGroup" %in% names(rel_year)) expect_false("Group" %in% names(rel_year)) expect_equal(rel_year[YearGroup == 2, MeanRel], 0.5) # C, D in YearGroup 2 # Test reference parameter filtering with another by variable # If reference is only C, D, E, then YearGroup 3 should only have E left -> NUsed=1, NA MeanRel rel_year_ref <- suppressWarnings(pedrel(test_ped, by = "YearGroup", reference = c("C", "D", "E"))) expect_equal(rel_year_ref[YearGroup == 2, NUsed], 2) expect_equal(rel_year_ref[YearGroup == 2, MeanRel], 0.5) expect_equal(rel_year_ref[YearGroup == 3, NUsed], 1) expect_true(is.na(rel_year_ref[YearGroup == 3, MeanRel])) coan <- pedrel(test_ped, by = "Gen", scale = "coancestry") expect_true("MeanCoan" %in% names(coan)) expect_false("MeanRel" %in% names(coan)) expect_equal(coan[Gen == 1, MeanCoan], 0.25) expect_equal(coan[Gen == 2, MeanCoan], 0.375) expect_equal(coan[Gen == 3, MeanCoan], 0.5) }) test_that("pedrel captures deep inbreeding and correct ancestor tracing", { # Deep inbreeding: Full-sib mating (Gen 3), then their offspring full-sib mating (Gen 4) dt_deep <- data.table::data.table( Ind = c("A", "B", "C", "D", "E", "F", "G", "H"), Sire = c(NA, NA, "A", "A", "C", "C", "E", "E"), Dam = c(NA, NA, "B", "B", "D", "D", "F", "F"), Gen = c(1, 1, 2, 2, 3, 3, 4, 4) ) tp_deep <- tidyped(dt_deep) rel_deep <- pedrel(tp_deep, by = "Gen") # Gen 1: Unrelated founders expect_equal(rel_deep[Gen == 1, MeanRel], 0.0) # Gen 2: C, D are full sibs (0.5) expect_equal(rel_deep[Gen == 2, MeanRel], 0.5) # Gen 3: E, F's parents are C, D (full sibs). a_EF = 0.5 + 0.5 * f_CD = 0.5 + 0.5 * 0.5 = 0.75 # Error check: if ancestor tracing failed, this would be 0.5 expect_equal(rel_deep[Gen == 3, MeanRel], 0.75) # Gen 4: G, H's parents are E, F. a_GH = 0.5 + 0.5 * f_EF = 0.5 + 0.5 * 1.0 (since F_E=0.25+0.25*0.5=0.375? No, a_EF=0.75) # Actually a_GH = 0.5 + 0.5 * a_EF = 0.5 + 0.5 * 0.75 = 0.875? # Wait, manual calibration: # A_mat[G, H] = 0.5 * (A[E, G] + A[F, G]) = 0.5 * (0.5*(A[E,E]+A[E,F]) + 0.5*(A[F,E]+A[F,F])) # A[E,E] = 1 + f_CD = 1 + 0.25 = 1.25. A[E,F] = 0.75. # A_mat[G,H] = 0.5 * (0.5*(1.25+0.75) + 0.5*(0.75+1.25)) = 0.5 * (1.0 + 1.0) = 1.0 expect_equal(rel_deep[Gen == 4, MeanRel], 1.0) coan_deep <- pedrel(tp_deep, by = "Gen", scale = "coancestry") expect_equal(coan_deep[Gen == 1, MeanCoan], 0.25) expect_equal(coan_deep[Gen == 2, MeanCoan], 0.375) expect_equal(coan_deep[Gen == 3, MeanCoan], 0.5) expect_equal(coan_deep[Gen == 4, MeanCoan], 0.59375) }) test_that("pedgenint computes Average from all parent-offspring pairs", { test_ped$BirthYear <- c(2000, 2001, 2005, 2006, 2010, 2012) # SS: C-A (5), E-C (5) -> mean=5 # SD: D-A (6), F-C (7) -> mean=6.5 # DS: C-B (4), E-D (4) -> mean=4 # DD: D-B (5), F-D (6) -> mean=5.5 # Average from ALL 8 pairs: (5+6+5+7+4+5+4+6)/8 = 5.25 # Note: numeric years auto-converted to Date (YYYY-07-01); leap years cause # tiny deviations from exact integers, so we use tolerance = 0.01. suppressMessages( genint_res <- pedgenint(test_ped, timevar = "BirthYear", unit = "year") ) avg_res <- genint_res[Pathway == "Average"] expect_equal(avg_res$Mean, 5.25, tolerance = 0.01) expect_equal(avg_res$N, 8L) expect_true(!is.na(avg_res$SD)) # Sex-specific pathways should still work expect_equal(genint_res[Pathway == "SS", Mean], 5, tolerance = 0.01) expect_equal(genint_res[Pathway == "SD", Mean], 6.5, tolerance = 0.01) expect_equal(genint_res[Pathway == "DS", Mean], 4, tolerance = 0.01) expect_equal(genint_res[Pathway == "DD", Mean], 5.5, tolerance = 0.01) # SO/DO: sex-independent pathways # SO (Sire→Offspring): A→C(5), A→D(6), C→E(5), C→F(7) -> N=4, Mean=5.75 so_res <- genint_res[Pathway == "SO"] expect_equal(so_res$N, 4L) expect_equal(so_res$Mean, 5.75, tolerance = 0.01) # DO (Dam→Offspring): B→C(4), B→D(5), D→E(4), D→F(6) -> N=4, Mean=4.75 do_res <- genint_res[Pathway == "DO"] expect_equal(do_res$N, 4L) expect_equal(do_res$Mean, 4.75, tolerance = 0.01) # All 7 pathways present expect_equal(sort(genint_res$Pathway), c("Average", "DD", "DO", "DS", "SD", "SO", "SS")) }) test_that("pedgenint works with Date and character date inputs", { # Date input — exact control, no auto-conversion message test_ped$BirthDate <- as.Date(c( "2000-03-15", "2001-06-20", # A, B "2005-03-15", "2006-06-20", # C, D "2010-03-15", "2012-06-20" # E, F )) genint_date <- suppressMessages( pedgenint(test_ped, timevar = "BirthDate", unit = "day") ) expect_true(all(genint_date$Mean > 0)) expect_true("Average" %in% genint_date$Pathway) # Character date input test_ped$BirthStr <- as.character(test_ped$BirthDate) genint_str <- suppressMessages( pedgenint(test_ped, timevar = "BirthStr", unit = "month") ) expect_true(all(genint_str$Mean > 0)) # unit = "hour" genint_hr <- suppressMessages( pedgenint(test_ped, timevar = "BirthDate", unit = "hour") ) expect_true(all(genint_hr$Mean > 0)) }) test_that("pedcontrib f_e and f_a compute on full sets despite top cutoff", { cont_all <- suppressMessages(pedcontrib(test_ped, mode = "both", top = 100)) cont_top1 <- suppressMessages(pedcontrib(test_ped, mode = "both", top = 1)) # f_e and f_a should be identical because they're based on full arrays before truncation expect_equal(cont_all$summary$f_e, cont_top1$summary$f_e) expect_equal(cont_all$summary$f_a, cont_top1$summary$f_a) # Ensure reported reflects top expect_equal(cont_top1$summary$n_founder_show, 1) expect_equal(cont_all$summary$n_founder_show, length(cont_all$founders$Ind)) expect_true(cont_top1$summary$n_founder > 1) }) test_that("pedpartial and pedancestry run without addnum=TRUE initially", { # Create tidyped without IndNum (addnum = FALSE) ped_nonum <- suppressMessages(tidyped(test_ped_dt, addnum = FALSE)) expect_false("IndNum" %in% names(ped_nonum)) # Should not crash and automatically handle missing nums part_res <- pedpartial(ped_nonum, ancestors = c("A", "B"), top = 2) expect_true(all(c("A", "B") %in% names(part_res))) expect_equal(nrow(part_res), nrow(ped_nonum)) # pedancestry with labels ped_nonum$Label <- ifelse(ped_nonum$Ind %in% c("A", "B"), "Base", NA) anc_res <- pedancestry(ped_nonum, foundervar = "Label") expect_true("Base" %in% names(anc_res)) # All non-founders descend completely from A and B, so Base should be 1.0 for all expect_true(all(anc_res$Base == 1.0)) }) # --- Additional tests for pedne, pedecg, pedsubpop, pedfclass --- test_that("pedecg computes equivalent complete generations", { ecg_res <- pedecg(test_ped) expect_true(all(c("ECG", "FullGen", "MaxGen") %in% names(ecg_res))) expect_equal(nrow(ecg_res), nrow(test_ped)) # Founders A, B have ECG = 0 expect_equal(ecg_res[Ind == "A", ECG], 0) expect_equal(ecg_res[Ind == "B", ECG], 0) # C has 2 known parents -> ECG = 1 expect_equal(ecg_res[Ind == "C", ECG], 1) # E has parents C, D who each have ECG=1 -> ECG = 1 + (1+1)/2 = 2 expect_equal(ecg_res[Ind == "E", ECG], 2) }) test_that("pedne computes effective population size by cohort", { test_ped$BirthYear <- c(2000, 2000, 2005, 2005, 2010, 2010) res <- suppressMessages(pedne(test_ped, by = "BirthYear")) expect_true(all(c("Cohort", "N", "DeltaC", "Ne") %in% names(res))) # Only cohort 2010 has ECG > 1 (founders/gen1 filtered out) expect_equal(nrow(res), 3) expect_equal(res$Cohort, c(2000, 2005, 2010)) expect_true(any(res$DeltaC >= 0, na.rm=TRUE)) expect_true(any(res$Ne > 0 & is.finite(res$Ne))) }) test_that("pedsubpop splits pedigree by grouping variable", { res_gen <- pedsubpop(test_ped, by = "Gen") expect_true(is.data.table(res_gen)) expect_equal(nrow(res_gen), length(unique(test_ped$Gen))) expect_true(all(c("Group", "N") %in% names(res_gen))) # Gen 1 (Group=1) has 2 individuals expect_equal(res_gen[Group == 1, N], 2) expect_equal(res_gen[Group == 2, N], 2) }) test_that("pedsubpop summarizes disconnected groups and isolated individuals", { ped_multi_dt <- data.table::data.table( Ind = c("A", "B", "C", "D", "E", "F", "ISO"), Sire = c(NA, NA, "A", NA, NA, "D", NA), Dam = c(NA, NA, "B", NA, NA, "E", NA), Sex = c("male", "female", "male", "male", "female", "male", NA_character_) ) ped_multi <- suppressMessages(tidyped(ped_multi_dt)) res_null <- pedsubpop(ped_multi) expect_true(is.data.table(res_null)) expect_equal(nrow(res_null), 3) expect_true(all(c("GP1", "GP2", "Isolated") %in% res_null$Group)) expect_equal(res_null[Group == "GP1", N], 3) expect_equal(res_null[Group == "GP1", N_Founder], 2) expect_equal(res_null[Group == "GP2", N], 3) expect_equal(res_null[Group == "GP2", N_Founder], 2) expect_equal(res_null[Group == "Isolated", N], 1) expect_equal(res_null[Group == "Isolated", N_Sire], 0) expect_equal(res_null[Group == "Isolated", N_Dam], 0) expect_equal(res_null[Group == "Isolated", N_Founder], 1) }) test_that("pedfclass classifies inbreeding levels", { res <- pedfclass(test_ped) expect_true(is.data.table(res)) expect_true("FClass" %in% names(res)) expect_s3_class(res$FClass, "ordered") # A, B, C, D have f=0 (4 individuals); E, F have f=0.25 (2 individuals) expect_equal(sum(res$Count), nrow(test_ped)) expect_equal(res[FClass == "F = 0", Count], 4) expect_equal(res[FClass == "0.125 < F <= 0.25", Count], 2) }) test_that("pedfclass handles threshold boundaries correctly", { threshold_ped <- data.table( Ind = c("A", "B", "C", "D", "E"), Sire = NA_character_, Dam = NA_character_, Sex = c("male", "female", "male", "female", "male"), f = c(0, 0.0625, 0.125, 0.25, 0.250001) ) threshold_ped <- new_tidyped(threshold_ped) res <- pedfclass(threshold_ped) expect_equal(res[FClass == "F = 0", Count], 1L) expect_equal(res[FClass == "0 < F <= 0.0625", Count], 1L) expect_equal(res[FClass == "0.0625 < F <= 0.125", Count], 1L) expect_equal(res[FClass == "0.125 < F <= 0.25", Count], 1L) expect_equal(res[FClass == "F > 0.25", Count], 1L) }) test_that("pedfclass supports custom breaks", { threshold_ped <- data.table( Ind = c("A", "B", "C", "D", "E", "F", "G"), Sire = NA_character_, Dam = NA_character_, Sex = c("male", "female", "male", "female", "male", "female", "male"), f = c(0, 0.03125, 0.0625, 0.125, 0.25, 0.5, 0.6) ) threshold_ped <- new_tidyped(threshold_ped) res <- pedfclass(threshold_ped, breaks = c(0.03125, 0.0625, 0.125, 0.25, 0.5)) # labels auto-generated: 5 bounded + 1 tail = 6 positives, plus "F = 0" = 7 levels expect_equal( as.character(res$FClass), c("F = 0", "0 < F <= 0.03125", "0.03125 < F <= 0.0625", "0.0625 < F <= 0.125", "0.125 < F <= 0.25", "0.25 < F <= 0.5", "F > 0.5") ) expect_equal(res[FClass == "0 < F <= 0.03125", Count], 1L) expect_equal(res[FClass == "0.03125 < F <= 0.0625", Count], 1L) expect_equal(res[FClass == "0.0625 < F <= 0.125", Count], 1L) expect_equal(res[FClass == "0.125 < F <= 0.25", Count], 1L) expect_equal(res[FClass == "0.25 < F <= 0.5", Count], 1L) expect_equal(res[FClass == "F > 0.5", Count], 1L) }) test_that("pedfclass respects custom labels aligned to breaks", { threshold_ped <- data.table( Ind = c("A", "B", "C", "D", "E"), Sire = NA_character_, Dam = NA_character_, Sex = c("male", "female", "male", "female", "male"), f = c(0, 0.0625, 0.125, 0.25, 0.4) ) threshold_ped <- new_tidyped(threshold_ped) res <- pedfclass( threshold_ped, breaks = c(0.0625, 0.125, 0.25), labels = c("Low", "Moderate", "High") ) # tail auto-generated as "F > 0.25" expect_equal(as.character(res$FClass), c("F = 0", "Low", "Moderate", "High", "F > 0.25")) expect_equal(res[FClass == "Low", Count], 1L) expect_equal(res[FClass == "High", Count], 1L) expect_equal(res[FClass == "F > 0.25", Count], 1L) # Wrong length should error expect_error( pedfclass(threshold_ped, breaks = c(0.0625, 0.125, 0.25), labels = c("A", "B")), "length equal to length\\(breaks\\)" ) }) test_that("pedstats returns correct structure without timevar", { # Use a pedigree without any time column to test no-timevar path ped_no_time <- data.table::data.table( ID = c("A", "B", "C", "D"), Sire = c(NA, NA, "A", "A"), Dam = c(NA, NA, "B", "B"), Sex = c("male", "female", "male", "female") ) tp <- suppressMessages(tidyped(ped_no_time)) ps <- suppressMessages(pedstats(tp)) # class expect_s3_class(ps, "pedstats") # $summary columns and types expect_true(is.data.table(ps$summary)) expect_named(ps$summary, c("N", "NSire", "NDam", "NFounder", "MaxGen")) expect_equal(ps$summary$N, nrow(tp)) expect_true(ps$summary$NFounder > 0) # $ecg columns expect_true(is.data.table(ps$ecg)) expect_named(ps$ecg, c("Ind", "ECG", "FullGen", "MaxGen")) expect_equal(nrow(ps$ecg), nrow(tp)) # no timevar -> gen_intervals is NULL expect_null(ps$gen_intervals) }) test_that("pedstats returns gen_intervals with timevar", { tp2 <- suppressMessages(tidyped(big_family_size_ped)) ps2 <- pedstats(tp2, timevar = "Year") # $gen_intervals columns expect_true(is.data.table(ps2$gen_intervals)) expect_true(all(c("Pathway", "N", "Mean", "SD") %in% names(ps2$gen_intervals))) expect_true("Average" %in% ps2$gen_intervals$Pathway) # ecg = FALSE suppresses ecg ps_no_ecg <- pedstats(tp2, timevar = "Year", ecg = FALSE) expect_null(ps_no_ecg$ecg) # genint = FALSE suppresses gen_intervals ps_no_gi <- pedstats(tp2, timevar = "Year", genint = FALSE) expect_null(ps_no_gi$gen_intervals) }) test_that("pedancestry proportions sum to 1 for multi-line admixture", { ped_df <- data.table::data.table( Ind = c("A","B","C","D", "E","F","G"), Sire = c(NA, NA, NA, NA, "A","C","E"), Dam = c(NA, NA, NA, NA, "B","D","F"), Sex = c("male","female","male","female", "male","female","male") ) ped_df$Line <- c("Line1", "Line1", "Line2", "Line2", NA, NA, NA) tp <- suppressMessages(tidyped(ped_df)) res <- pedancestry(tp, foundervar = "Line") # Calculate sum of all label columns for each individual res[, Sum := rowSums(.SD, na.rm = TRUE), .SDcols = c("Line1", "Line2")] # Check specific admixture for the final hybrid (G) expect_equal(res[Ind == "G", Line1], 0.5) expect_equal(res[Ind == "G", Line2], 0.5) # All proportions must sum strictly to 1.0 (with floating point tolerance) expect_true(all(abs(res$Sum - 1.0) < 1e-12)) }) test_that("pedrel results match between compact and non-compact modes", { # Use a medium subset to explicitly test folding performance equivalence # 500 rows is large enough to contain multi-generational sib-groups for compression ped_sub <- head(visPedigree::big_family_size_ped, 500) tp <- suppressMessages(tidyped(ped_sub)) rel_comp <- pedrel(tp, compact = TRUE) rel_full <- pedrel(tp, compact = FALSE) expect_equal(rel_comp, rel_full) coan_comp <- pedrel(tp, compact = TRUE, scale = "coancestry") coan_full <- pedrel(tp, compact = FALSE, scale = "coancestry") expect_equal(coan_comp, coan_full) })