#### Setup #### # Installed package context parent_path <- ".." # Local script context # library(testthat) # library(airr) # parent_path <- "tests" # Rearrangement test files good_rearrangement_file <- file.path(parent_path, "data-tests", "good_rearrangement.tsv") bad_rearrangement_file <- file.path(parent_path, "data-tests", "bad_rearrangement.tsv") # Repertoire test files good_repertoire_file <- file.path(parent_path, "data-tests", "good_repertoire.yaml") bad_repertoire_file <- file.path(parent_path, "data-tests", "bad_repertoire.yaml") warn_repertoire_file <- file.path(parent_path, "data-tests", "warn_repertoire.json") # Germline test files good_germline_set_file <- file.path(parent_path, "data-tests", "good_germline_set.json") bad_germline_set_file <- file.path(parent_path, "data-tests", "bad_germline_set.json") good_genotype_set_file <- file.path(parent_path, "data-tests", "good_genotype_set.json") bad_genotype_set_file <- file.path(parent_path, "data-tests", "bad_genotype_set.json") # Combined test files good_combined_yaml <- file.path(parent_path, "data-tests", "good_combined_airr.yaml") good_combined_json <- file.path(parent_path, "data-tests", "good_combined_airr.json") good_combined_names <- c("Repertoire", "GermlineSet", "GenotypeSet") # Expected warnings for bad_rearrangement_file expected_w <- c( "Warning: File is missing AIRR mandatory field(s): sequence", "Warning: sequence_id(s) are not unique: IVKNQEJ01AJ44V, IVKNQEJ01AJ44V", "Warning: sequence_id is empty for row(s): 7", "Warning: rev_comp is not logical for row(s): 4", "Warning: productive is not logical for row(s): 1" ) #### Rearrangement #### context("Rearrangement I/O - good data") test_that("read_rearrangement loads a data.frame", { tbl_1 <- read_rearrangement(good_rearrangement_file) expect_true(is.data.frame(tbl_1)) }) test_that("read_tabular loads a data.frame", { expect_error(read_tabular(good_rearrangement_file)) tbl_1 <- read_tabular(good_rearrangement_file, schema=RearrangementSchema) expect_true(is.data.frame(tbl_1)) }) test_that("read_tabular applies base", { tbl_0 <- read_tabular(good_rearrangement_file, base="0", schema=RearrangementSchema) tbl_1 <- read_tabular(good_rearrangement_file, base="1", schema=RearrangementSchema) expect_true(is.data.frame(tbl_0)) expect_true(validate_tabular(tbl_0, schema = RearrangementSchema)) start_positions <- grep("_start$", names(tbl_0), perl=TRUE) expect_equivalent(tbl_0[, start_positions] - 1, tbl_1[, start_positions]) }) test_that("read_tabular reads zipped tsv", { # Create test data gzip tmp_file <- tempfile(fileext = ".tsv.gz") x <- readr::read_tsv(good_rearrangement_file) readr::write_tsv(x, file=tmp_file) tbl_1 <- read_tabular(tmp_file, schema=RearrangementSchema) expect_true(is.data.frame(tbl_1)) # bzip2 tmp_file <- tempfile(fileext=".tsv.bz2") readr::write_tsv(x, file=tmp_file) tbl_1 <- read_tabular(tmp_file, schema=RearrangementSchema) expect_true(is.data.frame(tbl_1)) }) test_that("Columns are of expected type", { # Create test data tmp_file <- tempfile(fileext = ".tsv") tbl_0 <- read_tabular(good_rearrangement_file, schema=RearrangementSchema) # Create non-schema columns extra_cols <- data.frame( extra.int = c(1:nrow(tbl_0)), extra.numeric = rnorm(nrow(tbl_0)), extra.double = rnorm(nrow(tbl_0)), extra.character = stringi::stri_rand_strings(nrow(tbl_0), 6) ) extra_cols_na <- data.frame( extra.int = NA, extra.numeric = NA, extra.double = NA, extra.character = NA ) # Get the number of replications to create a data.frame with NAs to make # guessing of column type fail (all NAs) num_rep <- ceiling(1000/nrow(tbl_0)) + 1 tbl_1 <- do.call( "rbind", replicate(num_rep, cbind(tbl_0, extra_cols_na), simplify = FALSE)) # Make all sequence_id unique tbl_1$sequence_id <- c(1:nrow(tbl_1)) tbl_0 <- cbind(tbl_0, extra_cols) tbl_0 <- rbind(tbl_1, tbl_0) write_tabular(tbl_0, tmp_file, schema=RearrangementSchema) # Read with col_character() as default expect_is(tbl_0 <- read_tabular(tmp_file, schema=RearrangementSchema), "data.frame") expect_is(tbl_0$extra.int, "character") expect_is(tbl_0$extra.numeric, "character") expect_is(tbl_0$extra.character, "character") expect_is(read_tabular( good_rearrangement_file, schema=RearrangementSchema, aux_types = c(extra.int = "integer", extra.double = "double", extra.numeric = "numeric", extra.character = "character")), "data.frame") expect_is(tbl_0 <- read_tabular( tmp_file, "1", schema = RearrangementSchema, aux_types = c(extra.int = "integer", extra.double = "double", extra.numeric = "numeric", extra.character = "character")), "data.frame") expect_is(tbl_0$extra.int, "integer") expect_is(tbl_0$extra.double, "numeric") expect_is(tbl_0$extra.numeric, "numeric") expect_is(tbl_0$extra.character, "character") }) test_that("write_tabular writes a file with logicals encoded T/F", { tbl <- read_tabular(good_rearrangement_file, schema=RearrangementSchema) out_file <- file.path(tempdir(), "test_out.tsv") write_tabular(tbl, out_file, schema = RearrangementSchema) expect_true(file.exists(out_file)) reload_tbl <- read.delim(out_file, colClasses="character") expect_true(all(reload_tbl[['rev_comp']] == "T")) expect_equal(reload_tbl[['productive']], c("T","T","F","T","T","F","F","F","T")) }) context("Rearrangement I/O - bad data") test_that("read_tabular with bad data", { # Expect valid==FALSE bad_data <- suppressWarnings(read_tabular(bad_rearrangement_file, base="1", schema=RearrangementSchema)) expect_false(suppressWarnings(validate_tabular(bad_data, schema=RearrangementSchema))) # Check error messages w <- capture_warnings(validate_tabular(bad_data, schema=RearrangementSchema)) expect_equal(w, expected_w) }) test_that("write_tabular writes a bad file, with warnings, with logicals T/T", { bad_data <- suppressWarnings(read_tabular(bad_rearrangement_file, base="1", schema=RearrangementSchema)) out_file <- file.path(tempdir(), "test_out.tsv") expect_warning(write_tabular(bad_data, out_file, schema = RearrangementSchema)) expect_true(file.exists(out_file)) reload_tbl <- read.delim(out_file, colClasses="character") expect_equal(reload_tbl[['rev_comp']], c("T","T","T","","T","T","T","T","T","T","T")) expect_equal(reload_tbl[['productive']], c("","T","F","T","T","F","F","F","T","T","T")) }) #### Repertoire #### context("Repertoire I/O - good data") test_that("read_airr loads a Repertoire", { rep_1 <- read_airr(good_repertoire_file) expect_true(is.list(rep_1)) }) test_that("read_airr with format=yaml loads a Repertoire", { rep_1 <- read_airr(good_repertoire_file, format="yaml") expect_true(is.list(rep_1)) }) test_that("write_airr writes a yaml Repertoire", { geno <- read_airr(good_repertoire_file, validate=T) tmp_file <- tempfile(fileext=".yaml") write_airr(geno, tmp_file, validate=T, model=F) geno_tmp <- read_airr(tmp_file, validate=T, model=F) expect_identical(geno, geno_tmp) }) test_that("write_airr writes a json Repertoire", { geno <- read_airr(good_repertoire_file, validate=T) tmp_file <- tempfile(fileext=".json") write_airr(geno, tmp_file, validate=T, model=F) geno_tmp <- read_airr(tmp_file, validate=T, model=F) expect_identical(geno, geno_tmp) }) context("Repertoire I/O - bad data") test_that("validate_airr with bad data returns an error", { bad_data <- expect_warning(read_airr(bad_repertoire_file)) expect_false(suppressWarnings(validate_airr(bad_data))) }) #### GermlineSet #### context("GermlineSet I/O - good data") test_that("read_airr loads a GermlineSet", { rep_1 <- read_airr(good_germline_set_file, validate=T) expect_true(is.list(rep_1)) }) context("GermlineSet I/O - bad data") test_that("validate_airr with bad data returns an error", { bad_data <- read_airr(bad_germline_set_file, validate=F) expect_false(expect_warning(validate_airr(bad_data))) }) #### GenotypeSet #### context("GenotypeSet I/O - good data") test_that("read_airr loads a GenotypeSet", { rep_1 <- read_airr(good_genotype_set_file, validate=T) expect_true(is.list(rep_1)) }) context("GenotypeSet I/O - bad data") test_that("validate_airr with bad data returns an error", { bad_data <- read_airr(bad_genotype_set_file, validate=F) expect_false(expect_warning(validate_airr(bad_data))) }) #### Combined Data #### context("Combined I/O") test_that("read_airr loads a combined yaml file", { repr <- read_airr(good_combined_yaml, validate=T) expect_true(is.list(repr)) expect_equal(names(repr), good_combined_names) }) test_that("read_airr loads a combined json file", { repr <- read_airr(good_combined_json, validate=T) expect_true(is.list(repr)) expect_equal(names(repr), good_combined_names) }) test_that("read_airr json and yaml load identical objects", { repr_yaml <- read_airr(good_combined_yaml, validate=T) repr_json <- read_airr(good_combined_json, validate=T) expect_identical(repr_yaml, repr_json) }) test_that("write_airr writes a combined yaml file", { repr <- read_airr(good_combined_yaml, validate=T) tmp_file <- tempfile(fileext=".yaml") write_airr(repr, tmp_file, validate=T, model=F) repr_tmp <- read_airr(tmp_file, validate=T, model=F) expect_identical(repr, repr_tmp) }) test_that("write_airr writes a combined json file", { repr <- read_airr(good_combined_json, validate=T) tmp_file <- tempfile(fileext=".json") write_airr(repr, tmp_file, validate=T, model=F) repr_tmp <- read_airr(tmp_file, validate=T, model=F) expect_identical(repr, repr_tmp) }) test_that("write_airr json and yaml writes identical objects", { repr <- read_airr(good_combined_yaml, validate=T) tmp_yaml <- tempfile(fileext=".yaml") write_airr(repr, tmp_yaml, validate=T) repr_yaml <- read_airr(tmp_yaml, validate=T) tmp_json <- tempfile(fileext=".json") write_airr(repr, tmp_json, validate=T) repr_json <- read_airr(tmp_json, validate=T) expect_identical(repr_yaml, repr_json) }) #### DataFile #### context("DataFile") test_that("read_airr works with DataFile", { rep1 <- read_airr(good_combined_yaml, validate=T, model=F) rep2 <- read_airr(good_combined_yaml, validate=T, model=T) expect_identical(rep1, rep2) }) test_that("validate_airr works with DataFile", { rep <- read_airr(good_combined_yaml, validate=F, model=F) rep[["Nonsense"]] <- list(list(id="id", data="data")) expect_true(expect_warning(validate_airr(rep, model=T), "Skipping validation of non-DataFile object: Nonsense")) expect_false(expect_warning(validate_airr(rep, model=F), "Unrecognized schema: Nonsense")) }) test_that("write_airr yaml works with DataFile", { # Test data repr <- read_airr(good_combined_yaml, validate=F, model=F) repr_extra <- repr repr_extra[["Nonsense"]] <- list(list(id="id", data="data")) # Write without DataFile constraint tmp_file <- tempfile(fileext=".yaml") expect_warning(write_airr(repr_extra, tmp_file, validate=T, model=F), "Unrecognized schema: Nonsense") repr_tmp <- expect_warning(read_airr(tmp_file, validate=T, model=T), "Skipping validation of non-DataFile object: Nonsense") expect_true("Nonsense" %in% names(repr_tmp)) expect_identical(repr_extra, repr_tmp) # Write with DataFile constraint expect_warning(write_airr(repr_extra, tmp_file, validate=T, model=T), "Skipping validation of non-DataFile object: Nonsense") repr_tmp <- read_airr(tmp_file, validate=T, model=T) expect_false("Nonsense" %in% names(repr_tmp)) expect_identical(repr, repr_tmp) }) test_that("write_airr json works with DataFile", { # Test data repr <- read_airr(good_combined_json, validate=F, model=F) repr_extra <- repr repr_extra[["Nonsense"]] <- list(list(id="id", data="data")) # Write without DataFile constraint tmp_file <- tempfile(fileext=".json") expect_warning(write_airr(repr_extra, tmp_file, validate=T, model=F), "Unrecognized schema: Nonsense") repr_tmp <- expect_warning(read_airr(tmp_file, validate=T, model=T), "Skipping validation of non-DataFile object: Nonsense") expect_true("Nonsense" %in% names(repr_tmp)) expect_identical(repr_extra, repr_tmp) # Write with DataFile constraint expect_warning(write_airr(repr_extra, tmp_file, validate=T, model=T), "Skipping validation of non-DataFile object: Nonsense") repr_tmp <- read_airr(tmp_file, validate=T, model=T) expect_false("Nonsense" %in% names(repr_tmp)) expect_identical(repr, repr_tmp) })