# =============================================================================
# test-grs.R — Unit tests for grs_ series (no network, no RAP)
# =============================================================================


# ===========================================================================
# Shared helpers
# ===========================================================================

# Minimal valid weights data.table
.fake_weights_dt <- function() {
  data.table::data.table(
    snp           = paste0("rs", 1:5),
    effect_allele = c("A", "T", "C", "G", "A"),
    beta          = c(0.10, -0.20, 0.30, 0.05, -0.15)
  )
}

# Write a weights CSV to a temp path; returns the path
.write_weights_file <- function(path, dt = NULL) {
  if (is.null(dt)) dt <- .fake_weights_dt()
  data.table::fwrite(dt, path)
  path
}

# Simulated GRS cohort for standardize / validate
.fake_grs_dt <- function(n = 300, seed = 42) {
  set.seed(seed)
  data.table::data.table(
    IID              = seq_len(n),
    GRS_a_z    = rnorm(n),
    GRS_b_z = rnorm(n),
    outcome          = rbinom(n, 1, 0.20),
    followup_years   = round(runif(n, 1, 15), 2)
  )
}


# ===========================================================================
# grs_check()
# ===========================================================================

test_that("grs_check() aborts when file not found", {
  expect_error(grs_check("/no/such/file.txt"), "not found")
})

test_that("grs_check() aborts when required columns are missing", {
  withr::local_tempfile(fileext = ".csv") |> (function(path) {
    data.table::fwrite(data.table::data.table(x = 1:3, y = 1:3), path)
    expect_error(grs_check(path), "Required column")
  })()
})

test_that("grs_check() aborts when NA values are present", {
  withr::local_tempfile(fileext = ".csv") |> (function(path) {
    dt <- .fake_weights_dt()
    dt[2L, beta := NA_real_]
    .write_weights_file(path, dt)
    expect_error(grs_check(path), "NA")
  })()
})

test_that("grs_check() aborts on duplicate SNP IDs", {
  withr::local_tempfile(fileext = ".csv") |> (function(path) {
    dt <- .fake_weights_dt()
    dt[2L, snp := dt$snp[1L]]
    .write_weights_file(path, dt)
    expect_error(grs_check(path), "duplicate")
  })()
})

test_that("grs_check() aborts when beta is non-numeric", {
  withr::local_tempfile(fileext = ".csv") |> (function(path) {
    dt <- data.table::data.table(
      snp           = paste0("rs", 1:3),
      effect_allele = c("A", "T", "C"),
      beta          = c("high", "low", "medium")
    )
    .write_weights_file(path, dt)
    expect_error(grs_check(path), "[Bb]eta|numeric")
  })()
})

test_that("grs_check() warns on non-rs SNP IDs but does not abort", {
  withr::local_tempfile(fileext = ".csv") |> (function(path) {
    dt <- .fake_weights_dt()
    dt[1L, snp := "chr1:12345:A:G"]
    dest <- tempfile(fileext = ".txt")
    on.exit(unlink(dest), add = TRUE)
    .write_weights_file(path, dt)
    expect_warning(grs_check(path, dest = dest), "rs\\[0-9\\]\\+|rs.*format")
  })()
})

test_that("grs_check() warns on non-ATCG alleles but does not abort", {
  withr::local_tempfile(fileext = ".csv") |> (function(path) {
    dt <- .fake_weights_dt()
    dt[1L, effect_allele := "INDEL"]
    dest <- tempfile(fileext = ".txt")
    on.exit(unlink(dest), add = TRUE)
    .write_weights_file(path, dt)
    expect_warning(grs_check(path, dest = dest), "A/T/C/G|allele")
  })()
})

test_that("grs_check() returns a 3-column data.table and writes output file", {
  withr::local_tempfile(fileext = ".csv") |> (function(path) {
    .write_weights_file(path)
    dest <- tempfile(fileext = ".txt")
    on.exit(unlink(dest), add = TRUE)

    result <- grs_check(path, dest = dest)

    expect_s3_class(result, "data.table")
    expect_equal(ncol(result), 3L)
    expect_named(result, c("snp", "effect_allele", "beta"))
    expect_equal(nrow(result), 5L)
    expect_true(file.exists(dest))
  })()
})


# ===========================================================================
# grs_standardize() / grs_zscore()
# ===========================================================================

test_that("grs_standardize() aborts on non-data.frame input", {
  expect_error(grs_standardize("not a df"), "data.frame")
})

test_that("grs_standardize() aborts when manually specified column not found", {
  dt <- .fake_grs_dt()
  expect_error(grs_standardize(dt, grs_cols = "no_such_col"), "not found")
})

test_that("grs_standardize() aborts when no GRS columns auto-detected", {
  dt <- data.table::data.table(IID = 1:5, age = 50:54)
  expect_error(grs_standardize(dt), "No columns")
})

test_that("grs_standardize() aborts on zero-variance column", {
  dt <- data.table::data.table(IID = 1:5, GRS_flat = rep(1.0, 5))
  expect_error(grs_standardize(dt), "zero variance")
})

test_that("grs_standardize() produces mean ≈ 0 and SD ≈ 1 for _z columns", {
  dt  <- .fake_grs_dt()
  out <- grs_standardize(dt, grs_cols = c("GRS_a_z", "GRS_b_z"))

  for (col in c("GRS_a_z_z", "GRS_b_z_z")) {
    expect_true(col %in% names(out))
    expect_lt(abs(mean(out[[col]], na.rm = TRUE)), 1e-10)
    expect_lt(abs(sd(out[[col]], na.rm = TRUE) - 1), 1e-10)
  }
})

test_that("grs_standardize() inserts _z column immediately after source column", {
  dt  <- .fake_grs_dt()
  out <- grs_standardize(dt, grs_cols = "GRS_a_z")
  idx_src <- which(names(out) == "GRS_a_z")
  idx_z   <- which(names(out) == "GRS_a_z_z")
  expect_equal(idx_z, idx_src + 1L)
})

test_that("grs_standardize() preserves original columns unchanged", {
  dt  <- .fake_grs_dt()
  out <- grs_standardize(dt, grs_cols = "GRS_a_z")
  expect_equal(out$GRS_a_z, dt$GRS_a_z)
})

test_that("grs_standardize() works on plain data.frame input", {
  df  <- as.data.frame(.fake_grs_dt())
  out <- grs_standardize(df, grs_cols = "GRS_a_z")
  expect_s3_class(out, "data.table")
  expect_true("GRS_a_z_z" %in% names(out))
})

test_that("grs_zscore() produces identical result to grs_standardize()", {
  dt  <- .fake_grs_dt()
  r1  <- grs_standardize(dt, grs_cols = "GRS_a_z")
  r2  <- grs_zscore(dt,      grs_cols = "GRS_a_z")
  expect_equal(r1, r2)
})


# ===========================================================================
# grs_validate()
# ===========================================================================

test_that("grs_validate() aborts on non-data.frame input", {
  expect_error(grs_validate("x", outcome_col = "outcome"), "data.frame")
})

test_that("grs_validate() aborts when outcome_col is missing", {
  dt <- .fake_grs_dt()
  expect_error(grs_validate(dt, outcome_col = "no_such_col"), "not found")
})

test_that("grs_validate() aborts when grs_cols not found in data", {
  dt <- .fake_grs_dt()
  expect_error(
    grs_validate(dt, grs_cols = "no_such_grs", outcome_col = "outcome"),
    "not found"
  )
})

test_that("grs_validate() aborts when no GRS columns auto-detected", {
  dt <- data.table::data.table(IID = 1:10, outcome = rbinom(10, 1, 0.2))
  expect_error(grs_validate(dt, outcome_col = "outcome"), "No GRS")
})

test_that("grs_validate() aborts when grs_cols are non-numeric", {
  dt <- data.table::data.table(
    IID     = 1:10,
    GRS_x   = as.character(rnorm(10)),
    outcome = rbinom(10, 1, 0.3)
  )
  expect_error(
    grs_validate(dt, grs_cols = "GRS_x", outcome_col = "outcome"),
    "numeric"
  )
})

test_that("grs_validate() returns a list with 4 named elements (logistic)", {
  skip_if_not_installed("pROC")
  dt  <- .fake_grs_dt()
  res <- suppressWarnings(
    grs_validate(dt,
                 grs_cols    = c("GRS_a_z", "GRS_b_z"),
                 outcome_col = "outcome")
  )
  expect_type(res, "list")
  expect_named(res, c("per_sd", "high_vs_low", "trend", "discrimination"))
})

test_that("grs_validate() per_sd has one row per GRS (logistic, unadjusted)", {
  skip_if_not_installed("pROC")
  dt  <- .fake_grs_dt()
  res <- suppressWarnings(
    grs_validate(dt,
                 grs_cols    = c("GRS_a_z", "GRS_b_z"),
                 outcome_col = "outcome")
  )
  expect_s3_class(res$per_sd, "data.table")
  # At minimum one unadjusted row per GRS
  expect_gte(nrow(res$per_sd), 2L)
})

test_that("grs_validate() discrimination returns AUC column (logistic)", {
  skip_if_not_installed("pROC")
  dt  <- .fake_grs_dt()
  res <- suppressWarnings(
    grs_validate(dt,
                 grs_cols    = "GRS_a_z",
                 outcome_col = "outcome")
  )
  expect_true("AUC" %in% names(res$discrimination))
  expect_gte(res$discrimination$AUC, 0)
  expect_lte(res$discrimination$AUC, 1)
})

test_that("grs_validate() returns C-index column when time_col supplied (Cox)", {
  dt  <- .fake_grs_dt()
  res <- suppressWarnings(
    grs_validate(dt,
                 grs_cols    = "GRS_a_z",
                 outcome_col = "outcome",
                 time_col    = "followup_years")
  )
  expect_true("C_index" %in% names(res$discrimination))
  expect_gte(res$discrimination$C_index, 0)
  expect_lte(res$discrimination$C_index, 1)
})

test_that("grs_validate() does not modify the user's original data", {
  skip_if_not_installed("pROC")
  dt   <- .fake_grs_dt()
  orig <- data.table::copy(dt)
  suppressWarnings(
    grs_validate(dt,
                 grs_cols    = "GRS_a_z",
                 outcome_col = "outcome")
  )
  expect_equal(dt, orig)
})


# ===========================================================================
# grs_bgen2pgen() — argument validation (no RAP submission)
# ===========================================================================

test_that("grs_bgen2pgen() aborts when chr contains out-of-range values", {
  expect_error(grs_bgen2pgen(chr = 0),  "between 1 and 22")
  expect_error(grs_bgen2pgen(chr = 23), "between 1 and 22")
})

test_that("grs_bgen2pgen() aborts when chr contains NA", {
  expect_error(grs_bgen2pgen(chr = NA_integer_), "between 1 and 22")
})

test_that("grs_bgen2pgen() aborts when maf is out of range", {
  expect_error(grs_bgen2pgen(chr = 22, maf = 0),    "maf")
  expect_error(grs_bgen2pgen(chr = 22, maf = 0.5),  "maf")
  expect_error(grs_bgen2pgen(chr = 22, maf = -0.1), "maf")
})

test_that("grs_bgen2pgen() aborts when maf is not a scalar", {
  expect_error(grs_bgen2pgen(chr = 22, maf = c(0.01, 0.05)), "maf")
})

test_that("grs_bgen2pgen() aborts on invalid instance value", {
  expect_error(grs_bgen2pgen(chr = 22, instance = "xlarge"), "arg")
})

test_that("grs_bgen2pgen() aborts on invalid priority value", {
  expect_error(grs_bgen2pgen(chr = 22, priority = "medium"), "arg")
})

test_that("grs_bgen2pgen() warns when standard instance used for chr 1-16", {
  # Mock project check so the test never reaches dx run, regardless of auth state
  local_mocked_bindings(
    .dx_get_project_id = function() NA_character_,
    .package = "ukbflow"
  )
  expect_warning(
    tryCatch(
      grs_bgen2pgen(chr = 1, instance = "standard"),
      error = function(e) NULL
    ),
    "storage|mem2_ssd1|large"
  )
})


# ===========================================================================
# grs_score() — argument validation (no RAP submission)
# ===========================================================================

test_that("grs_score() aborts when file is not a character vector", {
  expect_error(grs_score(file = 123L), "character")
})

test_that("grs_score() aborts when file entries are unnamed", {
  expect_error(grs_score(file = c("weights.txt")), "named")
})

test_that("grs_score() aborts when file has partially missing names", {
  expect_error(grs_score(file = c(grs_a = "a.txt", "b.txt")), "named")
})

test_that("grs_score() aborts on duplicate names in file", {
  expect_error(
    grs_score(file = c(grs_a = "a.txt", grs_a = "b.txt")),
    "[Dd]uplicate"
  )
})

test_that("grs_score() aborts when local weight file does not exist", {
  expect_error(
    grs_score(file = c(grs_a = "/no/such/weights.txt")),
    "not found"
  )
})

test_that("grs_score() aborts when maf is out of range", {
  withr::local_tempfile(fileext = ".txt") |> (function(f) {
    file.create(f)
    expect_error(grs_score(file = c(grs_a = f), maf = 0),   "maf")
    expect_error(grs_score(file = c(grs_a = f), maf = 0.5), "maf")
  })()
})

test_that("grs_score() aborts on invalid instance value", {
  withr::local_tempfile(fileext = ".txt") |> (function(f) {
    file.create(f)
    expect_error(grs_score(file = c(grs_a = f), instance = "mega"), "arg")
  })()
})

test_that("grs_score() aborts on invalid priority value", {
  withr::local_tempfile(fileext = ".txt") |> (function(f) {
    file.create(f)
    expect_error(grs_score(file = c(grs_a = f), priority = "urgent"), "arg")
  })()
})