skip_if(!check_dependencies(python = TRUE)) init_env(quiet = TRUE) afr <- population("AFR", time = 100000, N = 30) ooa <- population("OOA", parent = afr, time = 60000, N = 5, remove = 23000) ehg <- population("EHG", parent = ooa, time = 28000, N = 10, remove = 6000) eur <- population("EUR", parent = ehg, time = 25000, N = 20) ana <- population("ANA", time = 28000, N = 30, parent = ooa, remove = 4000) yam <- population("YAM", time = 7000, N = 5, parent = ehg, remove = 2500) model <- compile_model(populations = list(afr, ooa, ehg, eur, ana, yam), generation_time = 30) test_that("number of samples must be a non-zero integer larger than one", { msg <- "Sample counts must be non-negative, non-zero integer numbers" expect_error(schedule_sampling(model, times = 45000, list(afr, 10.3)), msg) expect_error(schedule_sampling(model, times = 45000, list(afr, -10)), msg) expect_error(schedule_sampling(model, times = 45000, list(afr, 10), list(ooa, -42, "Ust_Ishim")), msg) expect_error(schedule_sampling(model, times = 45000, list(afr, 10), list(ooa, -42, "Ust_Ishim")), msg) }) test_that("named samples are enforced to be unique", { expect_error(schedule_sampling(model, times = c(45000, 30000), list(afr, 10), list(ooa, 1, "Ust_Ishim")), "Named samples must be unique. The following sample names are duplicated:\nUst_Ishim") }) check_ordering <- function(samples, direction) { if (direction == "backward") op <- `<=` else op <- `>=` # dplyr::filter(samples, grepl("_\\d+$", name)) %>% samples %>% dplyr::group_by(pop) %>% dplyr::mutate(id = paste0(pop, "-", 1:dplyr::n())) %>% split(., .$pop) %>% lapply(function(df) data.frame(name = df$name, i = as.integer(gsub(".*-(\\d+)", "\\1", df$id)), age = df$time)) %>% Filter(function(df) any(!is.na(df$i)), .) %>% lapply(function(df) {df$ordered <- all(diff(df$i) >= 0) && all(op(diff(df$age), 0)); df}) %>% do.call(rbind, .) } test_that("temporal ordering of samples remains consistent with the schedule (msprime)", { schedule <- schedule_sampling(model, times = 45000, list(afr, 10), list(ooa, 1, "Ust_Ishim")) samples <- ts_samples(msprime(model, sequence_length = 100000, recombination_rate = 0, samples = schedule)) expect_true(all(check_ordering(samples, "backward")$ordered)) schedule <- rbind( schedule_sampling(model, times = c(45000, 30000), list(afr, 10), list(ooa, 1)), schedule_sampling(model, times = 40000, list(ooa, 1, "Ust_Ishim")) ) samples <- ts_samples(msprime(model, sequence_length = 100000, recombination_rate = 0, samples = schedule)) expect_true(all(check_ordering(samples, "backward")$ordered)) schedule <- dplyr::bind_rows( schedule_sampling(model, times = 10000, list(afr, 10)), schedule_sampling(model, times = 30000, list(afr, 30)), schedule_sampling(model, times = 40000, list(afr, 40)), schedule_sampling(model, times = 5000, list(afr, 5)), schedule_sampling(model, times = 5000, list(eur, 5)), schedule_sampling(model, times = 1000, list(eur, 1)), schedule_sampling(model, times = 3000, list(eur, 3)), schedule_sampling(model, times = 2000, list(eur, 2)) ) samples <- ts_samples(msprime(model, sequence_length = 100000, recombination_rate = 0, samples = schedule)) expect_true(all(check_ordering(samples, "backward")$ordered)) # the above tests were testing consistency of times -- here let's check 1-to-1 correspondence schedule <- dplyr::bind_rows( schedule_sampling(model, times = 10000, list(afr, 1)), schedule_sampling(model, times = 30000, list(afr, 1)), schedule_sampling(model, times = 40000, list(afr, 1)), schedule_sampling(model, times = 5000, list(afr, 1)), schedule_sampling(model, times = 35000, list(ooa, 1)), schedule_sampling(model, times = 41000, list(ooa, 1)), schedule_sampling(model, times = 40000, list(ooa, 1)), schedule_sampling(model, times = 5000, list(eur, 1)), schedule_sampling(model, times = 1000, list(eur, 1)), schedule_sampling(model, times = 3000, list(eur, 1)), schedule_sampling(model, times = 2000, list(eur, 1)), schedule_sampling(model, times = 10000, list(ehg, 1)), schedule_sampling(model, times = 18000, list(ehg, 1)), schedule_sampling(model, times = 17000, list(ehg, 1)), schedule_sampling(model, times = 9000, list(ehg, 1)), ) samples <- ts_samples(msprime(model, sequence_length = 100000, recombination_rate = 0, samples = schedule)) schedule_ids <- dplyr::arrange(schedule, time) %>% dplyr::group_by(pop) %>% dplyr::mutate(id = paste0(pop, "-", time, "-", 1:dplyr::n())) %>% .$id samples_ids <- dplyr::arrange(samples, time) %>% dplyr::group_by(pop) %>% dplyr::mutate(id = paste0(pop, "-", time, "-", 1:dplyr::n())) %>% .$id expect_equal(schedule_ids, samples_ids) }) test_that("temporal ordering of samples remains consistent with the schedule (SLiM)", { schedule <- schedule_sampling(model, times = 45000, list(afr, 10), list(ooa, 1, "Ust_Ishim")) ts <- slim(model, sequence_length = 100000, recombination_rate = 0, samples = schedule) samples <- ts_samples(ts) expect_true(all(check_ordering(samples, "backward")$ordered)) schedule <- rbind( schedule_sampling(model, times = c(45000, 30000), list(afr, 10), list(ooa, 1)), schedule_sampling(model, times = 40000, list(ooa, 1, "Ust_Ishim")) ) samples <- ts_samples(slim(model, sequence_length = 100000, recombination_rate = 0, samples = schedule)) expect_true(all(check_ordering(samples, "backward")$ordered)) schedule <- dplyr::bind_rows( schedule_sampling(model, times = 10000, list(afr, 10)), schedule_sampling(model, times = 30000, list(afr, 30)), schedule_sampling(model, times = 40000, list(afr, 40)), schedule_sampling(model, times = 5000, list(afr, 5)), schedule_sampling(model, times = 5000, list(eur, 5)), schedule_sampling(model, times = 1000, list(eur, 1)), schedule_sampling(model, times = 3000, list(eur, 3)), schedule_sampling(model, times = 2000, list(eur, 2)) ) samples <- ts_samples(slim(model, sequence_length = 100000, recombination_rate = 0, samples = schedule)) expect_true(all(check_ordering(samples, "backward")$ordered)) # the above tests were testing consistency of times -- here let's check 1-to-1 correspondence schedule <- dplyr::bind_rows( schedule_sampling(model, times = 10000, list(afr, 1)), schedule_sampling(model, times = 30000, list(afr, 1)), schedule_sampling(model, times = 40000, list(afr, 1)), schedule_sampling(model, times = 5000, list(afr, 1)), schedule_sampling(model, times = 35000, list(ooa, 1)), schedule_sampling(model, times = 41000, list(ooa, 1)), schedule_sampling(model, times = 40000, list(ooa, 1)), schedule_sampling(model, times = 5000, list(eur, 1)), schedule_sampling(model, times = 1000, list(eur, 1)), schedule_sampling(model, times = 3000, list(eur, 1)), schedule_sampling(model, times = 2000, list(eur, 1)), schedule_sampling(model, times = 10000, list(ehg, 1)), schedule_sampling(model, times = 18000, list(ehg, 1)), schedule_sampling(model, times = 17000, list(ehg, 1)), schedule_sampling(model, times = 9000, list(ehg, 1)), ) samples <- ts_samples(slim(model, sequence_length = 100000, recombination_rate = 0, samples = schedule)) schedule_ids <- dplyr::arrange(schedule, time) %>% dplyr::group_by(pop) %>% dplyr::mutate(id = paste0(pop, "-", time, "-", 1:dplyr::n())) %>% .$id samples_ids <- dplyr::arrange(samples, time) %>% dplyr::group_by(pop) %>% dplyr::mutate(id = paste0(pop, "-", time, "-", 1:dplyr::n())) %>% .$id expect_equal(schedule_ids, samples_ids) })