################################################################################

context("READ_BGEN")

skip_if_not_installed("dplyr")
skip_if_not_installed("dbplyr")
skip_if_not_installed("RSQLite")

################################################################################

# need to write bgen/bgi files because can't have binary files..
library(magrittr)
bgen_file <- tempfile(fileext = ".bgen")
test_path("testdata/bgen_example.rds") %>%
  readRDS() %>% writeBin(bgen_file, useBytes = TRUE)
test_path("testdata/bgi_example.rds") %>%
  readRDS() %>% writeBin(paste0(bgen_file, ".bgi"), useBytes = TRUE)

variants <- readRDS(test_path("testdata/bgen_variants.rds"))
dosages  <- readRDS(test_path("testdata/bgen_dosages.rds"))
snp_info <- readRDS(test_path("testdata/bgen_varinfo.rds"))
IDs <- with(variants, paste(1, physical.pos, allele1, allele2, sep = "_"))
# variants 18 & 19 have identical IDs
excl <- c(18, 19)

ncores <- function() sample(1:2, 1)

################################################################################

test_that("snp_readBGI() can return the full variant information", {
  full_info <- snp_readBGI(paste0(bgen_file, ".bgi"))
  expect_equal(full_info[c(1:2, 5:6)], variants[c(1, 4:6)], check.attributes = FALSE)
})

################################################################################

test_that("raises some errors", {
  expect_error(snp_attach(snp_readBGEN(bgen_file, tempfile(), IDs, ncores = ncores())),
               "'list_snp_id' is not of class 'list'.", fixed = TRUE)
  expect_error(
    snp_attach(snp_readBGEN(bgen_file, tempfile(), list(c(IDs, "LOL")),
                            ncores = ncores())),
               "Wrong format of some variants.", fixed = TRUE)
})

################################################################################

test_that("format_snp_id() works as expected", {
  expect_error(format_snp_id(c("1_88169_C_T", "01_88169_C_T", "1:88169_C_T")),
               "Wrong format of some variants.", fixed = TRUE)
  expect_identical(format_snp_id(c("1_88169_C_T", "01_88169_C_T")),
                   c("01_88169_C_T", "01_88169_C_T"))
})

################################################################################

test_that("same as package {rbgen}", {
  replicate(20, {
    test <- snp_attach(snp_readBGEN(bgen_file, tempfile(), list(IDs), ncores = ncores()))
    G <- test$genotypes
    expect_identical(test$map[-excl, 1:6], variants[-excl, ])
    expect_identical(G[, -excl][501], NA_real_)
    expect_equal(G[, -excl], round(dosages[, -excl], 2))
  })
})

################################################################################

test_that("same variant infos as with QCTOOL", {

  test <- snp_attach(snp_readBGEN(bgen_file, tempfile(), list(IDs), ncores = ncores()))

  expect_identical(
    dplyr::mutate(test$map[-19, 1:6], chromosome = as.integer(chromosome)),
    dplyr::as_tibble(dplyr::transmute(
      snp_info[-19, ], chromosome, marker.ID = alternate_ids, rsid,
      physical.pos = position, allele1 = alleleA, allele2 = alleleB))
  )

  expect_equal(test$map$freq, colMeans(test$genotypes[], na.rm = TRUE) / 2,
               tolerance = 2e-4)
  expect_equal(test$map$freq[-19], snp_info$alleleB_frequency[-19], tolerance = 1e-6)
  expect_equal(test$map$info[-19], snp_info$impute_info[-19], tolerance = 1e-6)
})

################################################################################

setdiff2 <- function(x, y) x[!x %in% y]

test_that("works with a subset of SNPs", {
  ind_snp <- setdiff2(sample(length(IDs), 50, replace = TRUE), excl)
  test2 <- snp_attach(snp_readBGEN(bgen_file, tempfile(), list(IDs[ind_snp]),
                                   ncores = ncores()))
  G2 <- test2$genotypes
  expect_equal(dim(G2), c(500, length(ind_snp)))
  expect_identical(test2$map[1:6], variants[ind_snp, ])
  expect_equal(G2[], round(dosages[, ind_snp], 2))
})

test_that("works with a subset of individuals", {
  ind_snp <- setdiff2(sample(length(IDs), 100, replace = TRUE), excl)
  expect_error(
    snp_readBGEN(bgen_file, tempfile(), list(IDs[ind_snp]), c(1, 501), ncores = ncores()),
    "all(ind_row >= 1 & ind_row <= N) is not TRUE", fixed = TRUE)
  ind_row <- sample(500, 100, replace = TRUE)
  test3 <- snp_attach(
    snp_readBGEN(bgen_file, tempfile(), list(IDs[ind_snp]), ind_row, ncores = ncores()))
  G3 <- test3$genotypes
  expect_equal(dim(G3), c(length(ind_row), length(ind_snp)))
  expect_identical(test3$map[1:6], variants[ind_snp, ])
  expect_equal(G3[], round(dosages[ind_row, ind_snp], 2))
})

################################################################################

test_that("works with multiple files", {
  ind_snp <- setdiff2(sample(length(IDs), 50, replace = TRUE), excl)
  ind_row <- sample(500, 100, replace = TRUE)
  list_IDs <- split(IDs[ind_snp], sort(rep_len(1:3, length(ind_snp))))
  test4 <- snp_attach(
    snp_readBGEN(rep(bgen_file, 3), tempfile(), list_IDs, ind_row, ncores = ncores()))
  G4 <- test4$genotypes
  expect_equal(dim(G4), c(length(ind_row), length(ind_snp)))
  expect_identical(test4$map[1:6], variants[ind_snp, ])
  expect_equal(G4[], round(dosages[ind_row, ind_snp], 2))
})

################################################################################

test_that("read as random hard calls", {
  skip_on_covr()
  G <- snp_attach(snp_readBGEN(bgen_file, tempfile(), list(IDs)))$genotypes[]
  all_G <- replicate(50, simplify = FALSE, {
    test <- snp_readBGEN(bgen_file, tempfile(), list(IDs), read_as = "random")
    snp_attach(test)$genotypes[]
  })
  expect_false(any(sapply(all_G[-1], identical, all_G[[1]])))
  G_mean <- Reduce('+', all_G) / length(all_G)
  # G[1:5, 1:5] - G_mean[1:5, 1:5]
  # plot(G_mean, G, pch = 20, col = scales::alpha("black", 0.1))
  expect_identical(G_mean[is.na(G)], rep(NA_real_, 2))
  expect_equal(G_mean[!is.na(G)], G[!is.na(G)], tolerance = 0.2)
  lm_coef <- stats::lm(G_mean[!is.na(G)] ~ G[!is.na(G)])$coef
  expect_equal(unname(lm_coef), c(0, 1), tolerance = 1e-3)
})

################################################################################

test_that("work with duplicated variants or individuals", {
  G <- snp_attach(snp_readBGEN(
    bgen_file, tempfile(), list(rep("1_1001_A_G", 3))))$genotypes
  expect_equal(ncol(G), 3)
  expect_identical(G[, 1], G[, 2])
  expect_identical(G[, 1], G[, 3])
  G2 <- snp_attach(snp_readBGEN(
    bgen_file, tempfile(), ind_row = rep(1, 3), list(IDs)))$genotypes
  expect_equal(dim(G2), c(3, length(IDs)))
  expect_identical(G2[1, ], G2[2, ])
  expect_identical(G2[1, ], G2[3, ])
})

################################################################################