# https://github.com/arq5x/bedtools2/blob/master/test/closest/test-closest.sh test_that("1bp closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 9 , 10 , "chr1" , 19 , 20 , "chr1" , 20 , 21 ) res <- bed_closest(x, y) expect_equal(nrow(res), 3) expect_true(all(c(-1, 0, 1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("reciprocal test of 1bp closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 9 , 10 , "chr1" , 19 , 20 , "chr1" , 20 , 21 ) res <- bed_closest(y, x) expect_equal(nrow(res), 3) expect_true(all(c(1, 0, -1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("0bp apart closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 9 , 10 , "chr1" , 19 , 21 , "chr1" , 20 , 21 ) res <- bed_closest(x, y) expect_equal(nrow(res), 3) expect_true(all(c(-1, 0, 1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("reciprocal of 0bp apart closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 9 , 10 , "chr1" , 19 , 21 , "chr1" , 20 , 21 ) res <- bed_closest(y, x) res2 <- bed_closest(x, y) expect_equal(nrow(res), 3) expect_equal(nrow(res), 3) expect_true(all(c(1, 0, -1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("check that first left interval at index 0 is not lost", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 9 , 10 ) res <- bed_closest(x, y) expect_equal(nrow(res), 1) }) test_that("check that first right interval at index 0 is not lost", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 20 , 21 ) res <- bed_closest(x, y) expect_equal(nrow(res), 1) }) test_that("check that strand closest works (strand = TRUE)", { x <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 100 , 200 , "a" , 10 , "+" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 90 , 120 , "b" , 1 , "-" ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) # report NA expect_equal(nrow(res), 1) expect_equal(nrow(na.omit(res)), 0) }) test_that("check that same strand is reported (strand = TRUE", { x <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 80 , 100 , "q1" , 1 , "+" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 5 , 15 , "d1.1" , 1 , "+" , "chr1" , 20 , 60 , "d1.2" , 2 , "-" , "chr1" , 200 , 220 , "d1.3" , 3 , "-" ) pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.distance , "chr1" , 80 , 100 , "q1" , 1 , "+" , 5 , 15 , "d1.1" , 1 , "+" , 0 , -66 ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) expect_true(all(pred == res)) }) test_that("check that different strand is reported (strand_opp = TRUE", { x <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 80 , 100 , "q1" , 1 , "+" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 5 , 15 , "d1.1" , 1 , "+" , "chr1" , 20 , 60 , "d1.2" , 2 , "-" , "chr1" , 200 , 220 , "d1.3" , 3 , "-" ) pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "q1" , 1 , "+" , 20 , 60 , "d1.2" , 2 , "+" , 0 , -21 ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_true(all(pred == res)) }) test_that("check that reciprocal strand closest works (strand_opp = TRUE) ", { x <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 100 , 200 , "a" , 10 , "+" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 80 , 90 , "b" , 1 , "-" ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_equal(nrow(res), 1) }) test_that("overlapping intervals are removed (overlap = F)", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 9 , 10 , "chr1" , 19 , 21 , "chr1" , 20 , 21 ) res <- bed_closest(x, y, overlap = FALSE) expect_true(res[2, "start.y"] != 19) }) test_that("duplicate intervals are not reported", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 100 , 200 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 100 , 200 , "chr1" , 150 , 200 , "chr1" , 550 , 580 , "chr2" , 7000 , 8500 ) res <- bed_closest(x, y) expect_false(any(duplicated(res))) }) test_that("all overlapping features are reported", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 100 , 200 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 100 , 200 , "chr1" , 150 , 200 , "chr1" , 50 , 100 , "chr1" , 200 , 300 ) exp <- tibble::tribble( ~chrom , ~start.x , ~start.y , "chr1" , 100 , 200 ) res <- bed_closest(x, y) expect_true(nrow(res) == 4) }) test_that("test reporting of first overlapping feature and overlap = F excludes overlapping intervals", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 100 , 101 , "chr1" , 200 , 201 , "chr1" , 300 , 301 , "chr1" , 100000 , 100010 , "chr1" , 100020 , 100040 , "chr2" , 1 , 10 , "chr2" , 20 , 30 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 100 , 101 , "chr1" , 150 , 201 , "chr1" , 175 , 375 ) pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~start.y , ~end.y , ~.dist , "chr1" , 100 , 101 , 150 , 201 , 50 , "chr1" , 200 , 201 , 100 , 101 , -100 , "chr1" , 300 , 301 , 150 , 201 , -100 , "chr1" , 100000 , 100010 , 175 , 375 , -99626 , "chr1" , 100020 , 100040 , 175 , 375 , -99646 , "chr2" , 1 , 10 , NA , NA , NA , "chr2" , 20 , 30 , NA , NA , NA ) res <- bed_closest(x, y, overlap = F) expect_equal(res, pred) }) ### test distance reporting conditions ### ### tbls to test d_q1 <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 80 , 100 , "d_q1.1" , 5 , "+" ) d_q2 <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 80 , 100 , "d_q2.1" , 5 , "-" ) d_d1F <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 40 , 60 , "d1F.1" , 10 , "+" ) d_d1R <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 40 , 60 , "d1R.1" , 10 , "-" ) d_d2F <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 140 , 160 , "d2F.1" , 10 , "+" ) d_d2R <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 140 , 160 , "d2R.1" , 10 , "-" ) test_that("default distance reporting works for forward hit on left, forward query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q1.1" , 5 , "+" , 40 , 60 , "d1F.1" , 10 , "+" , 0 , -21 ) res <- bed_closest(d_q1, d_d1F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on left, forward query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q1.1" , 5 , "+" , 40 , 60 , "d1R.1" , 10 , "-" , 0 , -21 ) res <- bed_closest(d_q1, d_d1R) expect_true(all(pred == res)) }) test_that("default distance reporting works for forward hit on left, reverse query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q2.1" , 5 , "-" , 40 , 60 , "d1F.1" , 10 , "+" , 0 , -21 ) res <- bed_closest(d_q2, d_d1F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on left, reverse query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q2.1" , 5 , "-" , 40 , 60 , "d1R.1" , 10 , "-" , 0 , -21 ) res <- bed_closest(d_q2, d_d1R) expect_true(all(pred == res)) }) test_that("default distance reporting works for forward hit on right, forward query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q1.1" , 5 , "+" , 140 , 160 , "d2F.1" , 10 , "+" , 0 , 41 ) res <- bed_closest(d_q1, d_d2F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on right, forward query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q1.1" , 5 , "+" , 140 , 160 , "d2R.1" , 10 , "-" , 0 , 41 ) res <- bed_closest(d_q1, d_d2R) expect_true(all(pred == res)) }) test_that("default distance reporting works for forward hit on right, reverse query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q2.1" , 5 , "-" , 140 , 160 , "d2F.1" , 10 , "+" , 0 , 41 ) res <- bed_closest(d_q2, d_d2F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on right, reverse query", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 80 , 100 , "d_q2.1" , 5 , "-" , 140 , 160 , "d2R.1" , 10 , "-" , 0 , 41 ) res <- bed_closest(d_q2, d_d2R) expect_true(all(pred == res)) }) ### additional tbls for tests ### a2 <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 10 , 20 , "a1" , 1 , "-" ) b2 <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~score , ~strand , "chr1" , 8 , 9 , "b1" , 1 , "+" , "chr1" , 21 , 22 , "b2" , 1 , "-" ) test_that("Make sure non-overlapping ties are reported ", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 10 , 20 , "a1" , 1 , "-" , 21 , 22 , "b2" , 1 , "-" , 0 , 2 , "chr1" , 10 , 20 , "a1" , 1 , "-" , 8 , 9 , "b1" , 1 , "+" , 0 , -2 ) res <- bed_closest(a2, b2) expect_equal(pred, res) }) test_that("Make sure non-overlapping ties are reported with strand = T ", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 10 , 20 , "a1" , 1 , "-" , 21 , 22 , "b2" , 1 , "-" , 0 , 2 ) res <- bed_closest(group_by(a2, strand), group_by(b2, strand)) expect_true(all(pred == res)) }) test_that("Make sure non-overlapping ties are reported with strand_opp = T ", { pred <- tibble::tribble( ~chrom , ~start.x , ~end.x , ~name.x , ~score.x , ~strand.x , ~start.y , ~end.y , ~name.y , ~score.y , ~strand.y , ~.overlap , ~.dist , "chr1" , 10 , 20 , "a1" , 1 , "-" , 8 , 9 , "b1" , 1 , "-" , 0 , -2 ) res <- bed_closest(group_by(a2, strand), group_by(flip_strands(b2), strand)) expect_true(all(pred == res)) }) test_that("Make sure that closest intervals are captured when intervals span multiple interval tree nodes issue #105", { # when the y tbl has >= 64 intervals two nodes of the interval tree will be generated snps <- read_bed(valr_example("hg19.snps147.chr22.bed.gz"), n_max = 10) genes_one_node <- read_bed( valr_example("genes.hg19.chr22.bed.gz"), n_max = 63 ) genes_two_nodes <- read_bed( valr_example("genes.hg19.chr22.bed.gz"), n_max = 64 ) res_expt_one_node <- bed_closest(snps, genes_one_node) res_expt_two_nodes <- bed_closest(snps, genes_two_nodes) # adding one extra interval should not result in doubling the reported intervals expect_false(nrow(res_expt_two_nodes) >= 2 * nrow(res_expt_one_node)) }) test_that("test that a max of two duplicated x ivls are returned, assuming non-overlapping, and non-duplicate y ivls #105", { snps <- read_bed(valr_example("hg19.snps147.chr22.bed.gz"), n_max = 10) genes <- read_bed(valr_example("genes.hg19.chr22.bed.gz"), n_max = 64) # make sure there are no repeated y ivls (otherwise more than 2 x ivls should be reported) genes <- group_by(genes, chrom, start, end) genes <- mutate(genes, ivl_count = n()) genes <- filter(genes, ivl_count == 1) genes <- select(genes, -ivl_count) genes <- group_by(genes, chrom) res <- bed_closest(snps, genes, overlap = FALSE) res <- group_by(res, chrom, start.x, end.x) res <- summarize(res, n = n(), .groups = "keep") # there should not be more than 2 possible closest ivls. expect_true(all(res$n <= 2)) genome <- tibble::tribble( ~chrom , ~size , "chr1" , 10000000 , "chr2" , 50000000 , "chr3" , 60000000 , "chrX" , 5000000 ) x <- bed_random(genome, n = 1e5, seed = 1) y <- bed_random(genome, n = 1e5, seed = 2) x$idx <- seq_len(nrow(x)) y$idx <- seq_len(nrow(y)) res <- bed_closest(x, y, overlap = FALSE) res <- group_by(res, idx.x) res_grps <- summarize(res, n = n(), .groups = "keep") # if more than 1 x ivl reported, then it is due to duplicated y ivls in input multi_grps <- res_grps[res_grps$n > 1, ] if (nrow(multi_grps) > 0) { grps <- res[res$idx.x %in% multi_grps$idx.x, ] grps <- group_by(grps, idx.x) res <- summarize( grps, n_res = n(), same_abs_dist = length(unique(abs(.dist))) == 1, idx_y_distinct = length(unique(idx.y)) == n_res ) expect_true(all(res$same_abs_dist & res$idx_y_distinct)) } }) test_that("ensure that subtraction is done with respect to input tbls issue#108", { x <- tibble::tribble( ~chrom , ~start , ~end , ~group , "chr1" , 100 , 200 , "A" , "chr1" , 200 , 400 , "A" , "chr1" , 300 , 500 , "A" , "chr1" , 125 , 175 , "C" , "chr1" , 150 , 200 , "B" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~group , "chr1" , 100 , 200 , "A" , "chr1" , 200 , 400 , "B" , "chr1" , 300 , 500 , "A" , "chr1" , 125 , 175 , "C" , "chr2" , 150 , 200 , "A" ) x_grouped <- arrange(x, chrom, start) |> group_by(group, chrom) y_grouped <- arrange(y, chrom, start) |> group_by(group, chrom) res <- bed_closest(x_grouped, y_grouped) expect_true(all(res$group.x == res$group.y)) }) # from https://github.com/arq5x/bedtools2/blob/master/test/closest/test-closest.sh test_that("test closest forcing -s yet no matching strands on chrom", { x <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 100 , 200 , "a" , 10 , "+" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 90 , 120 , "b" , 1 , "-" ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) expect_true(nrow(res) == 1) expect_true(nrow(na.omit(res)) == 0) }) test_that("test closest forcing -S with only an opp strands on chrom", { x <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 100 , 200 , "a" , 10 , "+" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 90 , 120 , "b" , 1 , "-" ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_true(nrow(res) == 1) }) test_that("Make sure non-overlapping ties are reported", { x <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 10 , 20 , "a1" , 1 , "-" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 8 , 9 , "b1" , 1 , "+" , "chr1" , 21 , 22 , "b2" , 1 , "-" ) res <- bed_closest(x, y) expect_true(nrow(res) == 2) }) test_that("Make sure non-overlapping ties are reported, with strand option", { x <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 10 , 20 , "a1" , 1 , "-" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 8 , 9 , "b1" , 1 , "+" , "chr1" , 21 , 22 , "b2" , 1 , "-" ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) expect_true(nrow(res) == 1) }) test_that("Make sure non-overlapping ties are reported, with strand-oppo option", { x <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 10 , 20 , "a1" , 1 , "-" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 8 , 9 , "b1" , 1 , "+" , "chr1" , 21 , 22 , "b2" , 1 , "-" ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_true(nrow(res) == 1) }) test_that("check ties, single db", { x <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 10 , 20 , "a1" , 1 , "-" ) y <- tibble::tribble( ~chrom , ~start , ~end , ~group , ~score , ~strand , "chr1" , 8 , 9 , "b1" , 1 , "+" , "chr1" , 21 , 22 , "b2" , 1 , "-" ) res <- bed_closest(x, y) expect_true(nrow(res) == 2) }) test_that("check reporting of adjacent intervals issue #348", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 10 , 20 ) y <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 8 , 9 , "chr1" , 9 , 10 , "chr1" , 20 , 21 , "chr1" , 21 , 22 ) res <- bed_closest(x, y) expect_true(nrow(res) == 2) })