# https://github.com/arq5x/bedtools2/blob/master/test/closest/test-closest.sh test_that("1bp closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 9, 10, "chr1", 19, 20, "chr1", 20, 21 ) res <- bed_closest(x, y) expect_equal(nrow(res), 3) expect_true(all(c(-1, 0, 1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("reciprocal test of 1bp closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 9, 10, "chr1", 19, 20, "chr1", 20, 21 ) res <- bed_closest(y, x) expect_equal(nrow(res), 3) expect_true(all(c(1, 0, -1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("0bp apart closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 9, 10, "chr1", 19, 21, "chr1", 20, 21 ) res <- bed_closest(x, y) expect_equal(nrow(res), 3) expect_true(all(c(-1, 0, 1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("reciprocal of 0bp apart closer, check for off-by-one errors", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 9, 10, "chr1", 19, 21, "chr1", 20, 21 ) res <- bed_closest(y, x) res2 <- bed_closest(x, y) expect_equal(nrow(res), 3) expect_equal(nrow(res), 3) expect_true(all(c(1, 0, -1) == res$.dist)) expect_true(all(c(0, 1, 0) == res$.overlap)) }) test_that("check that first left interval at index 0 is not lost", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 9, 10 ) res <- bed_closest(x, y) expect_equal(nrow(res), 1) }) test_that("check that first right interval at index 0 is not lost", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 20, 21 ) res <- bed_closest(x, y) expect_equal(nrow(res), 1) }) test_that("check that strand closest works (strand = TRUE)", { x <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 100, 200, "a", 10, "+" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 90, 120, "b", 1, "-" ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) # report NA expect_equal(nrow(res), 1) expect_equal(nrow(na.omit(res)), 0) }) test_that("check that same strand is reported (strand = TRUE", { x <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 80, 100, "q1", 1, "+" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 5, 15, "d1.1", 1, "+", "chr1", 20, 60, "d1.2", 2, "-", "chr1", 200, 220, "d1.3", 3, "-" ) pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.distance, "chr1", 80, 100, "q1", 1, "+", 5, 15, "d1.1", 1, "+", 0, -66 ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) expect_true(all(pred == res)) }) test_that("check that different strand is reported (strand_opp = TRUE", { x <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 80, 100, "q1", 1, "+" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 5, 15, "d1.1", 1, "+", "chr1", 20, 60, "d1.2", 2, "-", "chr1", 200, 220, "d1.3", 3, "-" ) pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "q1", 1, "+", 20, 60, "d1.2", 2, "+", 0, -21 ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_true(all(pred == res)) }) test_that("check that reciprocal strand closest works (strand_opp = TRUE) ", { x <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 100, 200, "a", 10, "+" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 80, 90, "b", 1, "-" ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_equal(nrow(res), 1) }) test_that("overlapping intervals are removed (overlap = F)", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 9, 10, "chr1", 19, 21, "chr1", 20, 21 ) res <- bed_closest(x, y, overlap = FALSE) expect_true(res[2, "start.y"] != 19) }) test_that("duplicate intervals are not reported", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 100, 200 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 100, 200, "chr1", 150, 200, "chr1", 550, 580, "chr2", 7000, 8500 ) res <- bed_closest(x, y) expect_false(any(duplicated(res))) }) test_that("all overlapping features are reported", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 100, 200 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 100, 200, "chr1", 150, 200, "chr1", 50, 100, "chr1", 200, 300 ) exp <- tibble::tribble( ~chrom, ~start.x, ~start.y, "chr1", 100, 200 ) res <- bed_closest(x, y) expect_true(nrow(res) == 4) }) test_that("test reporting of first overlapping feature and overlap = F excludes overlapping intervals", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 100, 101, "chr1", 200, 201, "chr1", 300, 301, "chr1", 100000, 100010, "chr1", 100020, 100040, "chr2", 1, 10, "chr2", 20, 30 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 100, 101, "chr1", 150, 201, "chr1", 175, 375 ) pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~start.y, ~end.y, ~.dist, "chr1", 100, 101, 150, 201, 50, "chr1", 200, 201, 100, 101, -100, "chr1", 300, 301, 150, 201, -100, "chr1", 100000, 100010, 175, 375, -99626, "chr1", 100020, 100040, 175, 375, -99646, "chr2", 1, 10, NA, NA, NA, "chr2", 20, 30, NA, NA, NA ) res <- bed_closest(x, y, overlap = F) expect_equal(res, pred) }) ### test distance reporting conditions ### ### tbls to test d_q1 <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 80, 100, "d_q1.1", 5, "+" ) d_q2 <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 80, 100, "d_q2.1", 5, "-" ) d_d1F <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 40, 60, "d1F.1", 10, "+" ) d_d1R <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 40, 60, "d1R.1", 10, "-" ) d_d2F <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 140, 160, "d2F.1", 10, "+" ) d_d2R <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 140, 160, "d2R.1", 10, "-" ) test_that("default distance reporting works for forward hit on left, forward query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q1.1", 5, "+", 40, 60, "d1F.1", 10, "+", 0, -21 ) res <- bed_closest(d_q1, d_d1F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on left, forward query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q1.1", 5, "+", 40, 60, "d1R.1", 10, "-", 0, -21 ) res <- bed_closest(d_q1, d_d1R) expect_true(all(pred == res)) }) test_that("default distance reporting works for forward hit on left, reverse query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q2.1", 5, "-", 40, 60, "d1F.1", 10, "+", 0, -21 ) res <- bed_closest(d_q2, d_d1F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on left, reverse query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q2.1", 5, "-", 40, 60, "d1R.1", 10, "-", 0, -21 ) res <- bed_closest(d_q2, d_d1R) expect_true(all(pred == res)) }) test_that("default distance reporting works for forward hit on right, forward query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q1.1", 5, "+", 140, 160, "d2F.1", 10, "+", 0, 41 ) res <- bed_closest(d_q1, d_d2F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on right, forward query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q1.1", 5, "+", 140, 160, "d2R.1", 10, "-", 0, 41 ) res <- bed_closest(d_q1, d_d2R) expect_true(all(pred == res)) }) test_that("default distance reporting works for forward hit on right, reverse query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q2.1", 5, "-", 140, 160, "d2F.1", 10, "+", 0, 41 ) res <- bed_closest(d_q2, d_d2F) expect_true(all(pred == res)) }) test_that("default distance reporting works for reverse hit on right, reverse query", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 80, 100, "d_q2.1", 5, "-", 140, 160, "d2R.1", 10, "-", 0, 41 ) res <- bed_closest(d_q2, d_d2R) expect_true(all(pred == res)) }) ### additional tbls for tests ### a2 <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 10, 20, "a1", 1, "-" ) b2 <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 8, 9, "b1", 1, "+", "chr1", 21, 22, "b2", 1, "-" ) test_that("Make sure non-overlapping ties are reported ", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 10, 20, "a1", 1, "-", 21, 22, "b2", 1, "-", 0, 2, "chr1", 10, 20, "a1", 1, "-", 8, 9, "b1", 1, "+", 0, -2 ) res <- bed_closest(a2, b2) expect_equal(pred, res) }) test_that("Make sure non-overlapping ties are reported with strand = T ", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 10, 20, "a1", 1, "-", 21, 22, "b2", 1, "-", 0, 2 ) res <- bed_closest(group_by(a2, strand), group_by(b2, strand)) expect_true(all(pred == res)) }) test_that("Make sure non-overlapping ties are reported with strand_opp = T ", { pred <- tibble::tribble( ~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist, "chr1", 10, 20, "a1", 1, "-", 8, 9, "b1", 1, "-", 0, -2 ) res <- bed_closest(group_by(a2, strand), group_by(flip_strands(b2), strand)) expect_true(all(pred == res)) }) test_that("Make sure that closest intervals are captured when intervals span multiple interval tree nodes issue #105", { # when the y tbl has >= 64 intervals two nodes of the interval tree will be generated snps <- read_bed(valr_example("hg19.snps147.chr22.bed.gz"), n_max = 10) genes_one_node <- read_bed(valr_example("genes.hg19.chr22.bed.gz"), n_max = 63) genes_two_nodes <- read_bed(valr_example("genes.hg19.chr22.bed.gz"), n_max = 64) res_expt_one_node <- bed_closest(snps, genes_one_node) res_expt_two_nodes <- bed_closest(snps, genes_two_nodes) # adding one extra interval should not result in doubling the reported intervals expect_false(nrow(res_expt_two_nodes) >= 2 * nrow(res_expt_one_node)) }) test_that("test that a max of two duplicated x ivls are returned, assuming non-overlapping, and non-duplicate y ivls #105", { snps <- read_bed(valr_example("hg19.snps147.chr22.bed.gz"), n_max = 10) genes <- read_bed(valr_example("genes.hg19.chr22.bed.gz"), n_max = 64) # make sure there are no repeated y ivls (otherwise more than 2 x ivls should be reported) genes <- group_by(genes, chrom, start, end) genes <- mutate(genes, ivl_count = n()) genes <- filter(genes, ivl_count == 1) genes <- select(genes, -ivl_count) genes <- group_by(genes, chrom) res <- bed_closest(snps, genes, overlap = FALSE) res <- group_by(res, chrom, start.x, end.x) res <- summarize(res, n = n(), .groups = "keep") # there should not be more than 2 possible closest ivls. expect_true(all(res$n <= 2)) genome <- tibble::tribble( ~chrom, ~size, "chr1", 10000000, "chr2", 50000000, "chr3", 60000000, "chrX", 5000000 ) x <- bed_random(genome, n = 1e5, seed = 1) y <- bed_random(genome, n = 1e5, seed = 2) x$idx <- seq_len(nrow(x)) y$idx <- seq_len(nrow(y)) res <- bed_closest(x, y, overlap = FALSE) res <- group_by(res, idx.x) res_grps <- summarize(res, n = n(), .groups = "keep") # if more than 1 x ivl reported, then it is due to duplicated y ivls in input multi_grps <- res_grps[res_grps$n > 1, ] if (nrow(multi_grps) > 0) { grps <- res[res$idx.x %in% multi_grps$idx.x, ] grps <- group_by(grps, idx.x) res <- summarize(grps, n_res = n(), same_abs_dist = length(unique(abs(.dist))) == 1, idx_y_distinct = length(unique(idx.y)) == n_res ) expect_true(all(res$same_abs_dist & res$idx_y_distinct)) } }) test_that("ensure that subtraction is done with respect to input tbls issue#108", { x <- tibble::tribble( ~chrom, ~start, ~end, ~group, "chr1", 100, 200, "A", "chr1", 200, 400, "A", "chr1", 300, 500, "A", "chr1", 125, 175, "C", "chr1", 150, 200, "B" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~group, "chr1", 100, 200, "A", "chr1", 200, 400, "B", "chr1", 300, 500, "A", "chr1", 125, 175, "C", "chr2", 150, 200, "A" ) x_grouped <- arrange(x, chrom, start) |> group_by(group, chrom) y_grouped <- arrange(y, chrom, start) |> group_by(group, chrom) res <- bed_closest(x_grouped, y_grouped) expect_true(all(res$group.x == res$group.y)) }) # from https://github.com/arq5x/bedtools2/blob/master/test/closest/test-closest.sh test_that("test closest forcing -s yet no matching strands on chrom", { x <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 100, 200, "a", 10, "+" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 90, 120, "b", 1, "-" ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) expect_true(nrow(res) == 1) expect_true(nrow(na.omit(res)) == 0) }) test_that("test closest forcing -S with only an opp strands on chrom", { x <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 100, 200, "a", 10, "+" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 90, 120, "b", 1, "-" ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_true(nrow(res) == 1) }) test_that("Make sure non-overlapping ties are reported", { x <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 10, 20, "a1", 1, "-" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 8, 9, "b1", 1, "+", "chr1", 21, 22, "b2", 1, "-" ) res <- bed_closest(x, y) expect_true(nrow(res) == 2) }) test_that("Make sure non-overlapping ties are reported, with strand option", { x <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 10, 20, "a1", 1, "-" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 8, 9, "b1", 1, "+", "chr1", 21, 22, "b2", 1, "-" ) res <- bed_closest(group_by(x, strand), group_by(y, strand)) expect_true(nrow(res) == 1) }) test_that("Make sure non-overlapping ties are reported, with strand-oppo option", { x <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 10, 20, "a1", 1, "-" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 8, 9, "b1", 1, "+", "chr1", 21, 22, "b2", 1, "-" ) res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand)) expect_true(nrow(res) == 1) }) test_that("check ties, single db", { x <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 10, 20, "a1", 1, "-" ) y <- tibble::tribble( ~chrom, ~start, ~end, ~group, ~score, ~strand, "chr1", 8, 9, "b1", 1, "+", "chr1", 21, 22, "b2", 1, "-" ) res <- bed_closest(x, y) expect_true(nrow(res) == 2) }) test_that("check reporting of adjacent intervals issue #348", { x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 8, 9, "chr1", 9, 10, "chr1", 20, 21, "chr1", 21, 22 ) res <- bed_closest(x, y) expect_true(nrow(res) == 2) })