# https://github.com/arq5x/bedtools2/blob/master/test/cluster/test-cluster.sh x <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~id , ~strand , "chr1" , 72017 , 884436 , "a" , 1 , "+" , "chr1" , 72017 , 844113 , "b" , 2 , "+" , "chr1" , 939517 , 1011278 , "c" , 3 , "+" , "chr1" , 1142976 , 1203168 , "d" , 4 , "+" , "chr1" , 1153667 , 1298845 , "e" , 5 , "-" , "chr1" , 1153667 , 1219633 , "f" , 6 , "+" , "chr1" , 1155173 , 1200334 , "g" , 7 , "-" , "chr1" , 1229798 , 1500664 , "h" , 8 , "-" , "chr1" , 1297735 , 1357056 , "i" , 9 , "+" , "chr1" , 1844181 , 1931789 , "j" , 10 , "-" ) test_that("basic cluster works", { res <- bed_cluster(x) # test number of groups in output expect_equal(length(unique(res$.id)), 4) expect_equal(res$.id, c(1, 1, 2, 3, 3, 3, 3, 3, 3, 4)) }) test_that("stranded cluster works", { res <- bed_cluster(group_by(x, strand)) # test number of groups in output expect_equal(length(unique(res$.id)), 6) expect_equal(res$.id, c(1, 1, 2, 3, 3, 4, 4, 4, 5, 6)) }) x <- tibble::tribble( ~chrom , ~start , ~end , ~name , ~id , ~strand , "chr1" , 72017 , 884436 , "a" , 1 , "+" , "chr1" , 72017 , 844113 , "b" , 2 , "+" , "chr1" , 939517 , 1011278 , "c" , 3 , "+" , "chr2" , 940000 , 990000 , "d" , 4 , "-" ) test_that("cluster ids are not repeated per group issue #171", { res <- bed_cluster(x) # test that groups have unique ids chr1_ids <- filter(res, chrom == "chr1") |> select(.id) |> unique() |> unlist() chr2_ids <- filter(res, chrom == "chr2") |> select(.id) |> unique() |> unlist() shared_ids <- intersect(chr1_ids, chr2_ids) expect_equal(length(shared_ids), 0) }) test_that("guard against max_dist argument preventing clustering first interval in contig issue #388", { x <- tibble::tribble( ~chrom , ~start , ~end , "a" , 1 , 10 , "a" , 20 , 50 , "b" , 20 , 50 , "c" , 100 , 100 ) res <- bed_cluster(x, max_dist = 0) expect_equal(res$.id, 1L:4L) res <- bed_cluster(x, max_dist = 100) expect_equal(res$.id, c(1, 1, 2, 3)) res <- bed_cluster(x, max_dist = 10) expect_equal(res$.id, c(1, 1, 2, 3)) res <- bed_cluster(x, max_dist = 9) expect_equal(res$.id, 1L:4L) }) test_that("check for off by one errors, related to issue #401 @kcamnairb ", { x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 1 , 10 , "chr1" , 5 , 20 , "chr1" , 30 , 40 ) res <- bed_cluster(x, max_dist = 10) expect_equal(res$.id, c(1L, 1L, 1L)) x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 1 , 3 , "chr1" , 2 , 4 , "chr1" , 5 , 10 , "chr1" , 12 , 14 ) res <- bed_cluster(x, max_dist = 0) expect_equal(res$.id, c(1L, 1L, 2L, 3L)) res <- bed_cluster(x, max_dist = 1) expect_equal(res$.id, c(1L, 1L, 1L, 2L)) }) test_that("check for additional errors, related to issue #401 @kcamnairb ", { x <- tibble::tribble( ~chrom , ~start , ~end , "scaffold_66" , 27262 , 70396 , "scaffold_66" , 66594 , 67647 , "scaffold_66" , 82218 , 85280 , "scaffold_66" , 85878 , 87553 , "scaffold_66" , 87831 , 89885 , "scaffold_66" , 90498 , 91996 ) res <- bed_cluster(x, max_dist = 20000) expect_true(all(res$.id == 1)) x <- tibble::tribble( ~chrom , ~start , ~end , "chr1" , 1 , 10 , "chr1" , 1 , 11 , "chr1" , 1 , 9 , "chr1" , 1 , 9 , "chr1" , 3 , 4 , "chr1" , 3 , 12 , "chr1" , 10 , 14 , "chr1" , 100 , 200 , "chr2" , 1 , 10 , "chr2" , 1 , 11 , "chr2" , 1 , 9 , "chr2" , 1 , 9 , "chr2" , 3 , 4 , "chr2" , 3 , 12 , "chr2" , 10 , 14 , "chr2" , 100 , 200 ) res <- bed_cluster(x, max_dist = 0) expect_true(max(res$.id) == 4) res <- bed_cluster(x, max_dist = 100) expect_equal(res$.id, c(rep(1, 8), rep(2, 8))) res <- bed_cluster(x, max_dist = -3) expect_true(max(res$.id) == 6) })