truncated <- system.file("extdata", "truncated", package = "tidyGenR") fs <- list.files(truncated, pattern = "fastq.gz", full.names = T ) # default works test_that("expected list is produced with default parameters", { ln <- 5 dr <- dereplicate(fs[1:ln]) # result is a list expect_equal(class(dr), "list") # length is as expected expect_length(dr, ln) # all elements in list are data.frames expect_true(all(vapply(dr, function(x) "data.frame" %in% class(x), logical(1)))) # list names == extracted groups, are OK expect_equal( names(dr), c("chrna9_F", "chrna9_R", "nfkbia_F", "nfkbia_R", "rogdi_F") ) }) # min_sam_fr filter (filter cells below a number) work test_that("'min_sam_fr' filter works:", { ln <- 5 dr_0 <- dereplicate(fs[1:ln], min_sam_fr = 0, min_loc_fr = 0) min_sam_fr_100 <- 10 # the minimum value in a cell is > 10 dr_100 <- dereplicate(fs[1:ln], min_sam_fr = min_sam_fr_100, min_loc_fr = 0) expect_true(all(unlist(plyr::llply( dr_100, function(x) { all(x[, 3, drop = TRUE] > min_sam_fr_100) } )))) # any of the values from default is below min_sam_fr_100 expect_true(all(unlist(plyr::llply( dr_0, function(x) { any(x[, 3, drop = TRUE] < min_sam_fr_100) } )))) }) # min_loc_fr filter (filter cells below a number) work test_that("'min_sam_fr' filter works:", { lc <- c("chrna9", "nfkbia") ln <- fs[grep(paste(lc, collapse = "|"), fs)] dr_0 <- dereplicate(ln, min_sam_fr = 0, min_loc_fr = 0 ) min_loc_fr10 <- 10 # the minimum rowSums (variant count across all samples) is above dr_10 <- dereplicate(ln, min_sam_fr = 0, min_loc_fr = min_loc_fr10 ) rw_sums <- lapply(dr_10, function(x) { z <- select(x, -c(1, 2)) |> rowSums() all(z > min_loc_fr10) }) expect_true(all(unlist(rw_sums))) }) # 'by' argument selects field correctly test_that("'by' for selecting another field works", { lc <- c("chrna9", "nfkbia") ln <- fs[grep(paste(lc, collapse = "|"), fs)] # by sample dr_sample <- dereplicate(ln, by = "([a-zA-Z0-9]*)_[a-zA-Z0-9]*_[F|R]" ) expect_equal(names(dr_sample), c("BOR1061", "BOR1063", "BOR1069")) # by locus dr_locus <- dereplicate(ln, by = "[a-zA-Z0-9]*_([a-zA-Z0-9]*)_[F|R]" ) expect_equal(names(dr_locus), lc) }) # out_xlsx writes excel test_that("'by' for selecting another field works", { # by sample suppressWarnings( file.remove(file.path(tempdir(), "test.xlsx")) ) dr_excel <- dereplicate(fs[1:5], out_xlsx = file.path(tempdir(), "test.xlsx") ) expect_true(file.exists(file.path(tempdir(), "test.xlsx"))) })