x <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 10, 20, "chr1", 30, 40 ) y <- tibble::tribble( ~chrom, ~start, ~end, "chr1", 15, 20 ) test_that("jaccard coeff is calculated correctly", { res <- bed_jaccard(x, y) expect_equal(res$jaccard, 0.25) }) test_that("jaccard coeff is calc'd for large data sets", { genome <- read_genome(valr_example("hg19.chrom.sizes.gz")) x <- bed_random(genome, n = 1e5, seed = 10000) y <- bed_random(genome, n = 1e5, seed = 20000) res <- bed_jaccard(x, y) expect_equal(round(res$jaccard, 3), 0.016) }) test_that("jaccard with grouped inputs are calculated", { genome <- read_genome(valr_example("hg19.chrom.sizes.gz")) x <- bed_random(genome, n = 1e5, seed = 10000) y <- bed_random(genome, n = 1e5, seed = 20000) res <- bed_jaccard( group_by(x, chrom), group_by(y, chrom) ) expect_equal(nrow(res), 24) expect_true("chrom" %in% names(res)) }) # from https://github.com/arq5x/bedtools2/blob/master/test/jaccard/test-jaccard.sh test_that("Test symmetry", { res <- bed_jaccard(x, y) res2 <- bed_jaccard(y, x) expect_equal(res$jaccard, res2$jaccard) }) test_that("Test jaccard with mixed strand files", { a <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 10L, 50L, "a1f", 2L, "+", "chr1", 20L, 60L, "b1r", 4L, "-", "chr1", 25L, 70L, "c1q", 8L, ".", "chr1", 30L, 75L, "d1q", 16L, ".", "chr1", 40L, 80L, "e1f", 32L, "+", "chr1", 45L, 90L, "f1r", 64L, "-", "chr2", 10L, 50L, "a2q", 2L, ".", "chr2", 20L, 40L, "b2f", 4L, "+", "chr2", 25L, 50L, "c2r", 8L, "-", "chr2", 30L, 60L, "d2f", 16L, "+", "chr2", 35L, 65L, "e2q", 32L, ".", "chr2", 39L, 80L, "f2r", 64L, "-" ) b <- tibble::tribble( ~chrom, ~start, ~end, ~name, ~score, ~strand, "chr1", 10L, 50L, "2a1r", 2L, "-", "chr1", 40L, 70L, "2b1q", 4L, ".", "chr1", 60L, 100L, "2c1f", 8L, "+", "chr2", 15L, 40L, "2d2f", 16L, "+", "chr2", 30L, 100L, "2e2r", 32L, "-" ) res <- bed_jaccard(a, b) expect_equal(res$len_i, 145) expect_equal(res$len_u, 325) expect_equal(round(res$jaccard, 5), round(0.8055556, 5)) expect_equal(res$n, 2) })