context("MADC to VCF")

test_that("test madc conversion",{
  #Input variables
  madc_file <- system.file("example_MADC_FixedAlleleID.csv", package="BIGr")
  bot_file <- system.file("example_SNPs_DArTag-probe-design_f180bp.botloci", package="BIGr")

  #Calculations
  temp <- tempfile(fileext = ".vcf")

  #Convert the dart files to vcf
  suppressWarnings(
    madc2vcf_targets(madc_file = madc_file, output.file = temp, get_REF_ALT = FALSE)
  )

  #Test validity of VCF
  vcf <- read.vcfR(temp, verbose = FALSE)

  DP_df <- extract.gt(vcf, element = "DP")
  DP_df_mean <- mean(as.numeric(DP_df))

  #Checks
  expect_s4_class(vcf, "vcfR")
  expect_true(nrow(vcf) == 20)
  expect_true(ncol(vcf@gt) == 11)
  expect_true(all(dim(DP_df) == c(20,10)))
  expect_true(round(DP_df_mean, 3) == 227.22)

  rm(vcf)
  rm(temp)

  # Test with REF_ALT
  temp <- tempfile(fileext = ".vcf")

  suppressWarnings(
    madc2vcf_targets(madc_file = madc_file, output.file = temp, get_REF_ALT = TRUE, botloci_file = bot_file)
  )

  #Test validity of VCF
  vcf <- read.vcfR(temp, verbose = FALSE)

  #Checks
  expect_s4_class(vcf, "vcfR")
  expect_true(nrow(vcf) == 20)
  expect_true(ncol(vcf@gt) == 11)
  expect_true(all(dim(DP_df) == c(20,10)))
  expect_true(round(DP_df_mean, 3) == 227.22)
  expect_true(all(vcf@fix[,4] != "."))
  expect_true(all(vcf@fix[,5] != "."))
  expect_true(all(vcf@fix[,4] != vcf@fix[,5]))

  # UPDATED: These expected values might change with the fix
  # You'll need to verify these are the correct expected values after the fix
  expect_true(all(vcf@fix[1:5,4] == c("A", "G", "G", "G", "C")))
  expect_true(all(vcf@fix[1:5,5] == c("T", "A", "A", "A", "A")))

  rm(vcf)
  rm(temp)
})

# NEW: Add specific test for bottom strand markers
test_that("bottom strand markers have correct REF/ALT", {
  madc_file <- system.file("example_MADC_FixedAlleleID.csv", package="BIGr")
  bot_file <- system.file("example_SNPs_DArTag-probe-design_f180bp.botloci", package="BIGr")

  temp_targets <- tempfile(fileext = ".vcf")
  temp_all <- tempfile(fileext = ".vcf")

  # Get results from both functions
  suppressWarnings(
    madc2vcf_targets(madc_file = madc_file, output.file = temp_targets,
                    get_REF_ALT = TRUE, botloci_file = bot_file)
  )

  suppressWarnings(
    madc2vcf_all(madc = madc_file, botloci_file = bot_file,
                hap_seq_file = NULL, out_vcf = temp_all, verbose = FALSE)
  )

  vcf_targets <- read.vcfR(temp_targets, verbose = FALSE)
  vcf_all <- read.vcfR(temp_all, verbose = FALSE)

  # Find common markers between both outputs
  common_markers <- intersect(vcf_targets@fix[,"ID"], vcf_all@fix[,"ID"])

  if(length(common_markers) > 0) {
    # Compare REF/ALT for common markers
    targets_subset <- vcf_targets@fix[vcf_targets@fix[,"ID"] %in% common_markers,]
    all_subset <- vcf_all@fix[vcf_all@fix[,"ID"] %in% common_markers,]

    # Sort both by ID for comparison
    targets_subset <- targets_subset[order(targets_subset[,"ID"]),]
    all_subset <- all_subset[order(all_subset[,"ID"]),]

    # Check that REF/ALT match between the two functions
    expect_equal(targets_subset[,"REF"], all_subset[,"REF"])
    expect_equal(targets_subset[,"ALT"], all_subset[,"ALT"])
  }

  # Validate that all REF/ALT are valid nucleotides
  expect_true(all(vcf_targets@fix[,"REF"] %in% c("A", "T", "G", "C", ".")))
  expect_true(all(vcf_targets@fix[,"ALT"] %in% c("A", "T", "G", "C", ".")))

  # Validate that REF != ALT where both are not "."
  valid_snps <- vcf_targets@fix[,"REF"] != "." & vcf_targets@fix[,"ALT"] != "."
  if(any(valid_snps)) {
    expect_true(all(vcf_targets@fix[valid_snps,"REF"] != vcf_targets@fix[valid_snps,"ALT"]))
  }

  rm(vcf_targets, vcf_all, temp_targets, temp_all)
})