create_test_output_dir <- function(name = "test_immundata_") { tempfile(name) } cleanup_output_dir <- function(dir) { if (dir.exists(dir)) { unlink(dir, recursive = TRUE) } } snapshot_test_root <- function() { normalizePath(file.path(tempdir(), "imd_snap_tests"), mustWork = FALSE) } create_snapshot_test_layout <- function() { root <- snapshot_test_root() if (dir.exists(root)) { unlink(root, recursive = TRUE) } dir.create(root, recursive = TRUE, showWarnings = FALSE) project_a <- file.path(root, "projectA") project_b <- file.path(root, "projectB") dir.create(project_a, recursive = TRUE, showWarnings = FALSE) dir.create(project_b, recursive = TRUE, showWarnings = FALSE) list( root = root, projectA = project_a, projectB = project_b ) } cleanup_snapshot_test_root <- function() { keep_snap_tests <- identical(Sys.getenv("IMD_KEEP_SNAP_TESTS"), "1") if (keep_snap_tests) { return(invisible(NULL)) } root <- snapshot_test_root() if (dir.exists(root)) { unlink(root, recursive = TRUE) } } test_ig_data <- function() { system.file("extdata/ig", "multiple_ig_loci.tsv.gz", package = "immundata") } format_integrity_df_dump <- function(df) { dump <- utils::capture.output(print(df, row.names = FALSE)) if (length(dump) == 0) { return("") } paste(dump, collapse = "\n") } expect_agg_repertoires_integrity <- function(idata, context = NULL) { checkmate::assert_r6(idata, "ImmunData") if (is.null(context)) { context <- "agg_repertoires integrity" } ann <- idata$annotations |> collect() reps <- idata$repertoires |> collect() ann_required <- c("imd_receptor_id", "imd_repertoire_id", "imd_count", "imd_proportion", "n_repertoires") reps_required <- c("imd_repertoire_id", "n_barcodes", "n_receptors") ann_missing <- setdiff(ann_required, names(ann)) reps_missing <- setdiff(reps_required, names(reps)) ann_na_cols <- intersect(ann_required, names(ann)) reps_na_cols <- intersect(reps_required, names(reps)) ann_na_counts <- if (length(ann_na_cols) > 0) { vapply(ann[ann_na_cols], function(x) sum(is.na(x)), integer(1)) } else { integer() } reps_na_counts <- if (length(reps_na_cols) > 0) { vapply(reps[reps_na_cols], function(x) sum(is.na(x)), integer(1)) } else { integer() } unmatched_repertoire_ids <- NULL unmatched_repertoire_ids_display <- "" if ("imd_repertoire_id" %in% names(ann) && "imd_repertoire_id" %in% names(reps)) { unmatched_repertoire_ids <- ann |> dplyr::distinct(imd_repertoire_id) |> dplyr::anti_join( reps |> dplyr::distinct(imd_repertoire_id), by = "imd_repertoire_id" ) unmatched_repertoire_ids_display <- if (nrow(unmatched_repertoire_ids) > 0) { paste(unmatched_repertoire_ids$imd_repertoire_id, collapse = ", ") } else { "" } } diag_lines <- c( paste0("context: ", context), paste0("annotation shape: ", nrow(ann), "x", ncol(ann)), paste0("repertoire shape: ", nrow(reps), "x", ncol(reps)), if (length(ann_missing) > 0) { paste0("missing annotation columns: ", paste(ann_missing, collapse = ", ")) } else { "missing annotation columns: " }, if (length(reps_missing) > 0) { paste0("missing repertoire columns: ", paste(reps_missing, collapse = ", ")) } else { "missing repertoire columns: " }, if (length(ann_na_counts) > 0) { paste0("annotation NA counts: ", paste(names(ann_na_counts), ann_na_counts, sep = "=", collapse = ", ")) } else { "annotation NA counts: " }, if (length(reps_na_counts) > 0) { paste0("repertoire NA counts: ", paste(names(reps_na_counts), reps_na_counts, sep = "=", collapse = ", ")) } else { "repertoire NA counts: " }, paste0("unmatched repertoire ids: ", unmatched_repertoire_ids_display) ) diag <- paste(diag_lines, collapse = "\n") has_ann_na_mismatch <- length(ann_na_counts) > 0 && any(ann_na_counts != 0) has_reps_na_mismatch <- length(reps_na_counts) > 0 && any(reps_na_counts != 0) has_unmatched_repertoire_ids <- !is.null(unmatched_repertoire_ids) && nrow(unmatched_repertoire_ids) > 0 has_mismatch <- nrow(reps) <= 0 || length(ann_missing) > 0 || length(reps_missing) > 0 || has_ann_na_mismatch || has_reps_na_mismatch || has_unmatched_repertoire_ids if (has_mismatch) { diag <- paste0( diag, "\n\nannotations dump:\n", format_integrity_df_dump(ann), "\n\nrepertoires dump:\n", format_integrity_df_dump(reps) ) } testthat::expect_true(nrow(reps) > 0, info = diag) testthat::expect_equal(length(ann_missing), 0, info = diag) testthat::expect_equal(length(reps_missing), 0, info = diag) if (length(ann_na_counts) > 0) { testthat::expect_true(all(ann_na_counts == 0), info = diag) } if (length(reps_na_counts) > 0) { testthat::expect_true(all(reps_na_counts == 0), info = diag) } if (!is.null(unmatched_repertoire_ids)) { testthat::expect_equal( nrow(unmatched_repertoire_ids), 0, info = diag ) } invisible(list( annotations = ann, repertoires = reps )) }