# ========================================================================= # Shared Mock VCF Text File Generator # ========================================================================= write_mock_vcf <- function(include_ad = TRUE, missing_gt = FALSE) { # Cleanly determine the conditional lines first ad_header <- if (include_ad) "##FORMAT=" else "" format_field <- if (include_ad) "GT:AD" else "GT" vcf_lines <- c( "##fileformat=VCFv4.2", "##FORMAT=", ad_header, paste0("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tInd1\tInd2\tInd3"), # Chromosome 1 - Variant 1 (Clean) paste0("Chr1\t1001\trs1\tA\tG\t100\tPASS\t.\t", format_field, "\t0/1", if (include_ad) ":10,10" else "", "\t0/0", if (include_ad) ":20,0" else "", "\t1/1", if (include_ad) ":0,20" else ""), # Chromosome 1 - Variant 2 (Injected Missing Data conditional) paste0("Chr1\t2002\trs2\tC\tT\t100\tPASS\t.\t", format_field, "\t", if (missing_gt) "./.:0,0" else if (include_ad) "0/1:12,8" else "0/1", "\t0/1", if (include_ad) ":10,10" else "", "\t0/0", if (include_ad) ":20,0" else ""), # Chromosome 2 - Variant 3 (Single marker on a chromosome to check for NaN bug) paste0("Chr2\t5005\trs3\tT\tG\t100\tPASS\t.\t", format_field, "\t1/1", if (include_ad) ":0,20" else "", "\t0/1", if (include_ad) ":11,9" else "", "\t0/0", if (include_ad) ":20,0" else "") ) # Clean out any empty strings remaining from the 'else' branches vcf_lines <- vcf_lines[vcf_lines != ""] tmp_file <- tempfile(fileext = ".vcf") writeLines(vcf_lines, tmp_file) return(tmp_file) } # ========================================================================= # 1. Structural Sanity Parsing (Standard GT Ingestion) # ========================================================================= test_that("ReadVCF correctly extracts and builds genetic proxy footprints from GT matrices", { vcf_path <- write_mock_vcf(include_ad = FALSE) on.exit(unlink(vcf_path)) # Guaranteed file destruction cleanup res <- ReadVCF(File = vcf_path, AlleleDepthField = NULL, MaxMarkerMissing = 1.0, MaxIndMissing = 1.0, Verbose = FALSE) # Structural Outputs Assertions expect_type(res, "list") expect_named(res, c("Geno", "MarkerInfo", "GeneticMap")) # Dimension mapping validation (3 individuals, 3 variants) expect_length(res$Geno, 3) expect_equal(nrow(res$MarkerInfo), 3) expect_equal(dim(res$GeneticMap), c(3, 3)) # String synthesis verification block expect_equal(res$MarkerInfo$MARKER[1], "Chr1_1001") expect_equal(colnames(res$GeneticMap), c("Chromosome", "Marker", "Distance")) }) # ========================================================================= # 2. Field String Extraction (Optional Depth Allocations) # ========================================================================= test_that("ReadVCF switches dynamically to Allele Depth Field configurations", { vcf_path <- write_mock_vcf(include_ad = TRUE) on.exit(unlink(vcf_path)) # Inject mock formats handler for .FormatVariant hook tracing expect_error( ReadVCF(File = vcf_path, AlleleDepthField = "AD", AlleleDepthType = "alleles", Verbose = FALSE), NA ) # Verify that missing type mappings trigger checks expect_error( ReadVCF(File = vcf_path, AlleleDepthField = "AD", AlleleDepthType = "invalid_type", Verbose = FALSE), regexp = "chosen among|informed along with" ) }) # ========================================================================= # 3. Missing Value Filtering Logic Branches # ========================================================================= test_that("Missing value filters appropriately drop elements and update metadata", { # Build a file where variant 2 has explicit missing markers vcf_path <- write_mock_vcf(include_ad = TRUE, missing_gt = TRUE) on.exit(unlink(vcf_path)) # Run clean pass (MaxMarkerMissing = 1.0 means no drops) res_clean <- ReadVCF(File = vcf_path, MaxMarkerMissing = 1.0, MaxIndMissing = 1.0, Verbose = FALSE) expect_length(res_clean$Geno, 3) # Run strict missing pass (Variant 2 has 1/3 = 33% missingness, should drop at 20% limit) res_filtered <- ReadVCF(File = vcf_path, MaxMarkerMissing = 0.20, MaxIndMissing = 1.0, Verbose = FALSE) # Geno list and MarkerInfo arrays must shrink down simultaneously expect_length(res_filtered$Geno, 2) expect_equal(nrow(res_filtered$MarkerInfo), 2) expect_false("Chr1_2002" %in% names(res_filtered$Geno)) }) # ========================================================================= # 4. Physical Map Proxy Boundary Protections (Preventing NaN) # ========================================================================= test_that("Genetic map distance calculations wrap cleanly across boundaries without generating NaN", { vcf_path <- write_mock_vcf(include_ad = FALSE) on.exit(unlink(vcf_path)) res <- ReadVCF(File = vcf_path, MaxMarkerMissing = 1.0, MaxIndMissing = 1.0, Verbose = FALSE) # Verify bounded distance assignments (Chr1 Min marker should always resolve to 0) expect_equal(res$GeneticMap$Distance[res$GeneticMap$Marker == "Chr1_1001"], 0) expect_equal(res$GeneticMap$Distance[res$GeneticMap$Marker == "Chr1_2002"], 100) # Check single marker chromosome handling (The safety bug check) chr2_dist <- res$GeneticMap$Distance[res$GeneticMap$Marker == "Chr2_5005"] expect_false(is.nan(chr2_dist)) expect_equal(chr2_dist, 0) }) # ========================================================================= # 5. Missing File Safeguards & Terminal Mutes # ========================================================================= test_that("ReadVCF protects file systems and maintains mute constraints cleanly", { expect_error(ReadVCF(File = "non_existent_file.vcf", Verbose = FALSE), regexp = "No file was found") vcf_path <- write_mock_vcf(include_ad = FALSE) on.exit(unlink(vcf_path)) expect_silent(ReadVCF(File = vcf_path, Verbose = FALSE)) })