context("find_peaks lod_int, and bayes_int") # read data iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2")) # calculate genotype probabilities map <- insert_pseudomarkers(iron$gmap, step=1) probs <- calc_genoprob(iron, map, error_prob=0.002) # grab phenotypes and covariates; ensure that covariates have names attribute pheno <- iron$pheno covar <- match(iron$covar$sex, c("f", "m")) # make numeric names(covar) <- rownames(iron$covar) Xcovar <- get_x_covar(iron) # perform genome scan out <- scan1(probs, pheno, addcovar=covar, Xcovar=Xcovar) test_that("find_peaks works", { expected_2_1 <- structure(list(lodindex = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), lodcolumn = c("liver", "liver", "liver", "liver", "liver", "liver", "liver", "liver", "liver", "liver", "liver", "liver", "spleen", "spleen", "spleen"), chr = structure(c(1L, 2L, 4L, 7L, 7L, 8L, 11L, 13L, 15L, 16L, 17L, 19L, 8L, 9L, 10L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X"), class = "factor"), pos = c(90.3, 56.8, 10.9, 25.1, 49.1, 41, 26, 32.5, 49.2, 28.6, 4.3, 38.3, 13.6, 56.6, 37), lod = c(2.53157055941134, 4.85599006729575, 2.74279943572619, 3.77915164598982, 4.42062344751016, 3.49656665028876, 2.61495248604883, 3.40360749696547, 4.03706766736211, 6.35264382891418, 2.71848298281312, 3.0383533142527, 5.35976176735248, 12.5986057120873, 2.23257885452713)), .Names = c("lodindex", "lodcolumn", "chr", "pos", "lod"), row.names = c(NA, 15L), class = "data.frame") expect_equal(find_peaks(out, map, 2, 1), expected_2_1) # test sort expect_equal(find_peaks(out, map, 2, 1, sort_by="pos"), expected_2_1[c(1,2,3,4,5,13,6,14,15,7,8,9,10,11,12),]) expect_equal(find_peaks(out, map, 2, 1, sort_by="lod"), expected_2_1[c(14,10,13,2,5,9,4,6,8,12,3,11,7,1,15),]) expected_4_Inf <- expected_2_1[c(2, 5, 9, 10, 13, 14),] rownames(expected_4_Inf) <- NULL expect_equal(find_peaks(out, map, 4, Inf), expected_4_Inf) expected_3_1 <- expected_2_1[c(2,4,5,6,8,9,10,12,13,14),] rownames(expected_3_1) <- NULL expect_equal(find_peaks(out, map, 3, 1), expected_3_1) # separate X threshold expect_equal(find_peaks(out, map, 2, 1, thresholdX=1.5), rbind(expected_2_1, data.frame(lodindex=2, lodcolumn="spleen", chr="X", pos=29.5, lod=1.65321447653698, stringsAsFactors=FALSE))) # column-specific thresholds result <- find_peaks(out, map, c(4,2), 1, thresholdX=c(0.3, 1.5), peakdropX=c(0.1, 1)) expected <- rbind(expected_2_1[c(2,5,9,10),], data.frame(lodindex=rep(1,2), lodcolumn=rep("liver",2), chr=rep("X",2), pos=c(29.5, 57.9), lod=c(0.832206643447098, 0.35793505359049), stringsAsFactors=FALSE), expected_2_1[13:15,], data.frame(lodindex=2, lodcolumn="spleen", chr="X", pos=29.5, lod=1.65321447653698, stringsAsFactors=FALSE)) rownames(expected) <- NULL expect_equal(result, expected) # lod support intervals expect_equal(find_peaks(out, map, 4, 2, 1.5), cbind(expected_4_Inf, ci_lo=c(48.1, 1.1, 16.4, 6.6, 0.0, 43.7), ci_hi=c(73.2, 53.6, 49.2, 40.4, 32.7, 61.2))) expect_equal(find_peaks(out, map, 4, 2, 2), cbind(expected_4_Inf, ci_lo=c(48.1, 1.1, 16.4, 6.6, 0.0, 43.7), ci_hi=c(73.2, 53.6, 49.2, 40.4, 32.7, 61.2))) expected_3_1_lodint <- cbind(expected_3_1, ci_lo=c(48.1, 1.1, 37.2, 17.3, 17.5, 16.4, 6.6, 3.3, 0.0, 53.6), ci_hi=c(73.2, 28.4, 53.6, 69.9, 40.4, 49.2, 40.4, 38.3, 17.3, 61.2)) expect_equal(find_peaks(out, map, 3, 1, 0.9), expected_3_1_lodint) skip_on_cran() # sorting expect_equal(find_peaks(out, map, 3, 1, 0.9, sort_by="pos"), expected_3_1_lodint[c(1,2,3,9,4,10,5,6,7,8),]) expect_equal(find_peaks(out, map, 3, 1, 0.9, sort_by="lod"), expected_3_1_lodint[c(10,7,9,1,3,6,2,4,5,8),]) # expand2markers=FALSE expect_equal(find_peaks(out, map, 3, 1, 0.9, expand2markers=FALSE), cbind(expected_3_1, ci_lo=c(53.3, 11.1, 38.1, 24.0, 20.5, 38.4, 21.6, 28.3, 8.0, 53.6), ci_hi=c(66.3, 28.1, 53.6, 53.0, 40.4, 49.2, 32.6, 38.3, 17.0, 59.6))) # Bayes intervals expect_equal(find_peaks(out, map, 4, 2, prob=0.95), cbind(expected_4_Inf, ci_lo=c(48.1, 13.1, 16.4, 6.6, 0.0, 53.6), ci_hi=c(73.2, 53.6, 49.2, 40.4, 32.7, 61.2))) expect_equal(find_peaks(out, map, 4, 2, prob=0.99), cbind(expected_4_Inf, ci_lo=c(48.1, 1.1, 16.4, 6.6, 0.0, 43.7), ci_hi=c(73.2, 53.6, 49.2, 40.4, 32.7, 61.2))) expected_3_1_bayesint <- cbind(expected_3_1, ci_lo=c(48.1, 13.1, 37.2, 32.7, 17.5, 16.4, 6.6, 3.3, 0.0, 53.6), ci_hi=c(73.2, 28.4, 53.6, 69.9, 40.4, 49.2, 40.4, 38.3, 17.3, 61.2)) expect_equal(find_peaks(out, map, 3, 1, prob=0.8), expected_3_1_bayesint) # sorting expect_equal(find_peaks(out, map, 3, 1, prob=0.8, sort_by="pos"), expected_3_1_bayesint[c(1,2,3,9,4,10,5,6,7,8),]) expect_equal(find_peaks(out, map, 3, 1, prob=0.8, sort_by="lod"), expected_3_1_bayesint[c(10,7,9,1,3,6,2,4,5,8),]) # expand2markers=FALSE expect_equal(find_peaks(out, map, 3, 1, 0.8, expand2markers=FALSE), cbind(expected_3_1, ci_lo=c(53.3, 12.1, 39.1, 26.0, 21.5, 39.4, 25.1, 28.3, 9.0, 53.6), ci_hi=c(65.3, 28.1, 53.6, 52.0, 40.4, 49.2, 32.6, 38.3, 17.0, 59.6))) # test that it works if output or map are subsetted expect_equal( find_peaks(out, map["2"]), find_peaks(subset(out, map, chr="2"), map) ) # test that it works if output has rows shuffled out_shuffled <- out[sample(1:nrow(out)),,drop=FALSE] class(out_shuffled) <- c("scan1", "matrix") expect_equal( find_peaks(out_shuffled, map), find_peaks(out, map)) # test that find_peaks works if there are no peaks above threshold blank_output <- structure(list(lodindex = numeric(0), lodcolumn = character(0), chr = structure(integer(0), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X"), class = "factor"), pos = numeric(0), lod = numeric(0)), .Names = c("lodindex", "lodcolumn", "chr", "pos", "lod"), row.names = integer(0), class = "data.frame") expect_equal( find_peaks(out, map, threshold=9999), blank_output) # test that find_peaks works if there are no peaks above threshold # like above, but also requesting LOD or Bayes intervals blank_output <- structure(list(lodindex = numeric(0), lodcolumn = character(0), chr = structure(integer(0), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X"), class = "factor"), pos = numeric(0), lod = numeric(0), ci_lo = numeric(0), ci_hi = numeric(0)), .Names = c("lodindex", "lodcolumn", "chr", "pos", "lod", "ci_lo", "ci_hi"), row.names = integer(0), class = "data.frame") expect_equal( find_peaks(out, map, threshold=9999, drop=2), blank_output) expect_equal( find_peaks(out, map, threshold=9999, prob=0.95), blank_output) }) test_that("lod_int works", { expected <- cbind(ci_lo=1.1, pos=49.1, ci_hi=53.6) rownames(expected) <- 1 expect_equal(lod_int(out, map, 7, 1), expected) expected[1,c(1,3)] <- c(12.1, 53.6) expect_equal(lod_int(out, map, 7, 1, expand2markers=FALSE), expected) expected[1,c(1,3)] <- c(37.2, 53.6) expect_equal(lod_int(out, map, 7, 1, drop=0.5), expected) expected2 <- rbind("1"=c(1.1,25.1,28.4), "2"=c(37.2,49.1,53.6)) colnames(expected2) <- colnames(expected) expect_equal(lod_int(out, map, 7, 1, 0, 1.5, 1), expected2) expected3 <- cbind(ci_lo=43.7, pos=56.6, ci_hi=61.2) rownames(expected3) <- 1 expect_equal(lod_int(out, map, 9, 2), expected3) expected4 <- cbind(ci_lo=0.0, pos=13.6, ci_hi=32.7) rownames(expected4) <- 1 expect_equal(lod_int(out, map, 8, 2), expected4) expect_equal( lod_int(out, map, chr="2"), lod_int(subset(out, map, chr="2"), map, chr="2")) expect_equal( lod_int(out, map, chr="2"), lod_int(out, map["2"], chr="2")) }) test_that("bayes_int works", { expected <- cbind(ci_lo=13.1, pos=49.1, ci_hi=53.6) rownames(expected) <- 1 expect_equal(bayes_int(out, map, 7, 1), expected) expected[1,c(1,3)] <- c(15.1, 53.6) expect_equal(bayes_int(out, map, 7, 1, expand2markers=FALSE), expected) expected[1,c(1,3)] <- c(37.2, 53.6) expect_equal(bayes_int(out, map, 7, 1, prob=0.8), expected) expected2 <- rbind("1"=c(1.1,25.1,28.4), "2"=c(37.2,49.1,53.6)) colnames(expected2) <- colnames(expected) expect_equal(bayes_int(out, map, 7, 1, 0, 1.5, 0.95), expected2) expected3 <- cbind(ci_lo=53.6, pos=56.6, ci_hi=61.2) rownames(expected3) <- 1 expect_equal(bayes_int(out, map, 9, 2), expected3) expected4 <- cbind(ci_lo=0.0, pos=13.6, ci_hi=32.7) rownames(expected4) <- 1 expect_equal(bayes_int(out, map, 8, 2), expected4) expect_equal( bayes_int(out, map, chr="2"), bayes_int(subset(out, map, chr="2"), map, chr="2")) expect_equal( bayes_int(out, map, chr="2"), bayes_int(out, map["2"], chr="2")) }) test_that("lod_int and bayes_int give same results as R/qtl", { skip_on_cran() out_rqtl <- data.frame(chr=map2chr(map), pos=map2pos(map), unclass(out)) rownames(out_rqtl) <- map2markernames(map) class(out_rqtl) <- c("scanone", "data.frame") # 1st lod col: chr 2, 7, 15, 16 # 2nd lod col: chr 8, 9 for(lod in 1:2) { for(chr in list(c(2,7,15,16), c(8,9))[[lod]]) { for(drop in c(1, 1.5, 2)) { # expand to markers li_rqtl <- qtl::lodint(out_rqtl, lod=lod, chr=chr, expandtomarkers=TRUE, drop=drop) li_qtl2 <- lod_int(out, map, lod=lod, chr=chr, drop=drop) expect_equivalent(li_rqtl[,2], as.numeric(li_qtl2)) # don't expand to markers li_rqtl <- qtl::lodint(out_rqtl, lod=lod, chr=chr, drop=drop) li_qtl2 <- lod_int(out, map, lod=lod, chr=chr, expand2markers=FALSE, drop=drop) expect_equivalent(li_rqtl[,2], as.numeric(li_qtl2)) } for(prob in c(0.90, 0.95, 0.99)) { # expand to markers bi_rqtl <- qtl::bayesint(out_rqtl, lod=lod, chr=chr, expandtomarkers=TRUE, prob=prob) bi_qtl2 <- bayes_int(out, map, lod=lod, chr=chr, prob=prob) expect_equivalent(bi_rqtl[,2], as.numeric(bi_qtl2)) # don't expand to markers bi_rqtl <- qtl::bayesint(out_rqtl, lod=lod, chr=chr, prob=prob) bi_qtl2 <- bayes_int(out, map, lod=lod, chr=chr, expand2markers=FALSE, prob=prob) expect_equivalent(bi_rqtl[,2], as.numeric(bi_qtl2)) } } } }) test_that("find_peaks works with snpinfo table", { skip_if(isnt_karl(), "this test only run locally") # load example data and calculate genotype probabilities file <- paste0("https://raw.githubusercontent.com/rqtl/", "qtl2data/main/DOex/DOex.zip") DOex <- read_cross2(file) probs <- calc_genoprob(DOex[1:20,"2"], error_prob=0.002) snpdb_file <- system.file("extdata", "cc_variants_small.sqlite", package="qtl2") queryf <- create_variant_query_func(snpdb_file) out <- scan1snps(probs, DOex$pmap, DOex$pheno, query_func=queryf, chr=2, start=97.2, end=97.3) # test max_scan1() expect_equal(max(out$lod, out$snpinfo), data.frame(chr="2", pos=out$snpinfo[6,"pos"], OF_immobile_pct=out$lod[6], row.names=out$snpinfo[6,"snp"], stringsAsFactors=FALSE)) # find_peaks (no peaks above threshold) expect_equal(find_peaks(out$lod, out$snpinfo), data.frame(lodindex=numeric(0), lodcolumn=character(0), chr=factor(character(0), "2"), pos=numeric(0), lod=numeric(0), stringsAsFactors=FALSE)) expect_equal(find_peaks(out$lod, out$snpinfo, drop=1.5), data.frame(lodindex=numeric(0), lodcolumn=character(0), chr=factor(character(0), "2"), pos=numeric(0), lod=numeric(0), ci_lo=numeric(0), ci_hi=numeric(0), stringsAsFactors=FALSE)) expect_equal(find_peaks(out$lod, out$snpinfo, prob=0.95), data.frame(lodindex=numeric(0), lodcolumn=character(0), chr=factor(character(0), "2"), pos=numeric(0), lod=numeric(0), ci_lo=numeric(0), ci_hi=numeric(0), stringsAsFactors=FALSE)) # find peaks (one peak above threshold) expect_equal(find_peaks(out$lod, out$snpinfo, threshold=0.5), data.frame(lodindex=1, lodcolumn="OF_immobile_pct", chr=factor("2", "2"), pos=out$snpinfo[6,"pos"], lod=out$lod[6], row.names=1L, stringsAsFactors=FALSE)) expect_equal(find_peaks(out$lod, out$snpinfo, threshold=0.5, drop=1.5), data.frame(lodindex=1, lodcolumn="OF_immobile_pct", chr=factor("2", "2"), pos=out$snpinfo[6,"pos"], lod=out$lod[6], ci_lo=min(out$snpinfo$pos), ci_hi=max(out$snpinfo$pos), row.names=1L, stringsAsFactors=FALSE)) expect_equal(find_peaks(out$lod, out$snpinfo, threshold=0.5, prob=0.95), data.frame(lodindex=1, lodcolumn="OF_immobile_pct", chr=factor("2", "2"), pos=out$snpinfo[6,"pos"], lod=out$lod[6], ci_lo=min(out$snpinfo$pos), ci_hi=max(out$snpinfo$pos), row.names=1L, stringsAsFactors=FALSE)) })