# =============================================================================
# test-decode.R — Unit tests for decode_ series (no network, no real files)
# =============================================================================

# ===========================================================================
# Shared helpers
# ===========================================================================

.fake_fields_decode <- function() {
  data.frame(
    field_name = c(
      "participant.eid",
      "participant.p31",
      "participant.p53_i0", "participant.p53_i1",
      "participant.p22189",
      "participant.p20002_i0_a0",
      "participant.p22009_a1"
    ),
    title = c(
      "Participant ID",
      "Sex",
      "Date of attending assessment centre | Instance 0",
      "Date of attending assessment centre | Instance 1",
      "Townsend deprivation index at recruitment",
      "Non-cancer illness code, self-reported | Instance 0 | Array 0",
      "Genetic principal components | Array 1"
    ),
    stringsAsFactors = FALSE
  )
}

.fake_field_meta <- function() {
  data.frame(
    field_id    = c(31L, 21000L, 20116L, 21022L, 22189L, 53L),
    value_type  = c(21L, 21L,    21L,    31L,    31L,    51L),
    encoding_id = c(9L,  1001L,  90L,    0L,     0L,     0L),
    stringsAsFactors = FALSE
  )
}

.fake_esimpint <- function() {
  data.frame(
    encoding_id = c(9L,  9L,    90L,     90L,        90L),
    value       = c(0L,  1L,    0L,      1L,          2L),
    meaning     = c("Female", "Male", "Never", "Previous", "Current"),
    stringsAsFactors = FALSE
  )
}

.set_decode_cache <- function() {
  # Reason: fields cache is a per-dataset named list; dataset cache avoids
  # dx ls network call so decode_names() -> extract_ls() is a pure memory path
  .ukbflow_cache$dataset    <- "fake_dataset"
  .ukbflow_cache$fields     <- list("fake_dataset" = .fake_fields_decode())
  .ukbflow_cache$field_meta <- .fake_field_meta()
  .ukbflow_cache$esimpint   <- .fake_esimpint()
}

.clear_decode_cache <- function() {
  .ukbflow_cache$dataset    <- NULL
  .ukbflow_cache$fields     <- NULL
  .ukbflow_cache$field_meta <- NULL
  .ukbflow_cache$esimpint   <- NULL
}


# ===========================================================================
# .title_to_snake() — pure function
# ===========================================================================

test_that(".title_to_snake() handles simple title", {
  expect_equal(ukbflow:::.title_to_snake("Sex"), "sex")
})

test_that(".title_to_snake() converts spaces and special chars to underscores", {
  expect_equal(
    ukbflow:::.title_to_snake("Townsend deprivation index at recruitment"),
    "townsend_deprivation_index_at_recruitment"
  )
})

test_that(".title_to_snake() appends instance suffix", {
  result <- ukbflow:::.title_to_snake(
    "Date of attending assessment centre | Instance 0"
  )
  expect_true(endsWith(result, "_i0"))
  expect_true(startsWith(result, "date_of_attending"))
})

test_that(".title_to_snake() appends array suffix", {
  result <- ukbflow:::.title_to_snake("Genetic principal components | Array 1")
  expect_true(endsWith(result, "_a1"))
  expect_true(startsWith(result, "genetic_principal"))
})

test_that(".title_to_snake() handles both instance and array suffixes", {
  result <- ukbflow:::.title_to_snake(
    "Non-cancer illness code, self-reported | Instance 0 | Array 1"
  )
  expect_true(grepl("_i0_a1$", result))
})

test_that(".title_to_snake() strips leading and trailing underscores from base", {
  # Titles starting with special chars should not produce leading underscores
  result <- ukbflow:::.title_to_snake("Age at recruitment")
  expect_false(startsWith(result, "_"))
  expect_false(endsWith(result, "_"))
})


# ===========================================================================
# .extract_field_id() — pure function
# ===========================================================================

test_that(".extract_field_id() extracts field_id from participant.pXXXX", {
  expect_equal(ukbflow:::.extract_field_id("participant.p31"), 31L)
})

test_that(".extract_field_id() extracts field_id from pXXXX", {
  expect_equal(ukbflow:::.extract_field_id("p31"), 31L)
})

test_that(".extract_field_id() extracts field_id from pXXXX_iX_aX", {
  expect_equal(ukbflow:::.extract_field_id("p20002_i0_a1"), 20002L)
})

test_that(".extract_field_id() returns NA for eid column", {
  expect_true(is.na(ukbflow:::.extract_field_id("eid")))
})

test_that(".extract_field_id() returns NA for non-UKB column name", {
  expect_true(is.na(ukbflow:::.extract_field_id("my_column")))
})


# ===========================================================================
# .build_name_map() — pure function
# ===========================================================================

test_that(".build_name_map() maps participant.eid to eid", {
  result <- ukbflow:::.build_name_map("participant.eid", .fake_fields_decode())
  expect_equal(result, "eid")
})

test_that(".build_name_map() maps participant.p31 to sex", {
  result <- ukbflow:::.build_name_map("participant.p31", .fake_fields_decode())
  expect_equal(result, "sex")
})

test_that(".build_name_map() maps p31 (no prefix) to sex", {
  result <- ukbflow:::.build_name_map("p31", .fake_fields_decode())
  expect_equal(result, "sex")
})

test_that(".build_name_map() appends _i0 suffix for instance fields", {
  result <- ukbflow:::.build_name_map("participant.p53_i0", .fake_fields_decode())
  expect_true(endsWith(result, "_i0"))
})

test_that(".build_name_map() appends _a1 suffix for array fields", {
  result <- ukbflow:::.build_name_map("participant.p22009_a1", .fake_fields_decode())
  expect_true(endsWith(result, "_a1"))
})

test_that(".build_name_map() appends _i0_a0 for instance+array fields", {
  result <- ukbflow:::.build_name_map("participant.p20002_i0_a0", .fake_fields_decode())
  expect_true(grepl("_i0_a0$", result))
})

test_that(".build_name_map() falls back to stripped name for unknown column", {
  result <- ukbflow:::.build_name_map("participant.p99999", .fake_fields_decode())
  expect_equal(result, "p99999")
})


# ===========================================================================
# decode_names() — user-facing function
# ===========================================================================

test_that("decode_names() stops on non-data.frame input", {
  expect_error(decode_names("not a df"), "data.frame")
})

test_that("decode_names() stops on invalid max_nchar", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(participant.p31 = 1L)
  expect_error(decode_names(df, max_nchar = 0), "positive integer")
})

test_that("decode_names() renames participant.eid to eid", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(`participant.eid` = 1L, check.names = FALSE)
  result <- suppressMessages(decode_names(df))
  expect_true("eid" %in% names(result))
})

test_that("decode_names() renames p31 to sex", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(`participant.eid` = 1L, `participant.p31` = 1L,
                   check.names = FALSE)
  result <- suppressMessages(decode_names(df))
  expect_true("sex" %in% names(result))
  expect_false("participant.p31" %in% names(result))
})

test_that("decode_names() works with extract_batch format (no prefix)", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(eid = 1L, p31 = 1L)
  result <- suppressMessages(decode_names(df))
  expect_true("eid"  %in% names(result))
  expect_true("sex"  %in% names(result))
})

test_that("decode_names() preserves data.table class", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  dt <- data.table::data.table(`participant.eid` = 1L, `participant.p31` = 1L)
  result <- suppressMessages(decode_names(dt))
  expect_true(data.table::is.data.table(result))
})

test_that("decode_names() warns when a renamed column exceeds max_nchar", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  # p53_i0 → "date_of_attending_assessment_centre_i0" (>30 chars)
  df <- data.frame(`participant.p53_i0` = "2008-01-01", check.names = FALSE)
  expect_message(
    decode_names(df, max_nchar = 30),
    "longer than 30"
  )
})

test_that("decode_names() does not warn within max_nchar limit", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(`participant.p31` = 1L, check.names = FALSE)
  # "sex" is 3 chars — no warning expected
  expect_no_warning(suppressMessages(decode_names(df, max_nchar = 60)))
})

test_that("decode_names() produces correct output when dataset and fields are cached", {
  # Reason: with both $dataset and $fields populated, decode_names() → extract_ls()
  # is a pure memory path — verify the end result is correct, not the mechanism
  .set_decode_cache()
  on.exit(.clear_decode_cache())

  df <- data.frame(`participant.eid` = 1L, `participant.p31` = 1L,
                   check.names = FALSE)
  result <- suppressMessages(decode_names(df))
  expect_true("eid" %in% names(result))
  expect_true("sex" %in% names(result))
})

test_that("decode_names() handles duplicate snake_case names with make.unique()", {
  # Two columns that would produce the same snake_case name
  fields_dup <- data.frame(
    field_name = c("participant.p99_i0", "participant.p99_i0_dup"),
    title      = c("Some field | Instance 0", "Some field | Instance 0"),
    stringsAsFactors = FALSE
  )
  .ukbflow_cache$dataset <- "fake_dataset"
  .ukbflow_cache$fields  <- list("fake_dataset" = fields_dup)
  on.exit(.clear_decode_cache())

  df <- data.frame(p99_i0 = 1L, p99_i0_dup = 2L)
  result <- suppressMessages(decode_names(df))
  expect_equal(length(unique(names(result))), 2L)
})


# ===========================================================================
# decode_values() — user-facing function
# ===========================================================================

test_that("decode_values() stops on non-data.frame input", {
  expect_error(decode_values("not a df"), "data.frame")
})

test_that("decode_values() stops on non-string metadata_dir", {
  df <- data.frame(p31 = 1L)
  expect_error(decode_values(df, metadata_dir = 123))
})

test_that("decode_values() stops on NA metadata_dir", {
  df <- data.frame(p31 = 1L)
  expect_error(decode_values(df, metadata_dir = NA_character_))
})

test_that("decode_values() stops on length > 1 metadata_dir", {
  df <- data.frame(p31 = 1L)
  expect_error(decode_values(df, metadata_dir = c("a/", "b/")))
})

test_that("decode_values() warns when no UKB field columns found", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(sex = c("Male", "Female"))
  expect_message(decode_values(df), "no UKB field ID columns detected",
                 ignore.case = TRUE)
})

test_that("decode_values() decodes p31: 0 → Female, 1 → Male", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(p31 = c(0L, 1L, 1L, 0L))
  result <- suppressMessages(decode_values(df))
  expect_equal(result$p31, c("Female", "Male", "Male", "Female"))
})

test_that("decode_values() decodes p20116 (smoking): 0/1/2 → labels", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(p20116_i0 = c(0L, 1L, 2L))
  result <- suppressMessages(decode_values(df))
  expect_equal(result$p20116_i0, c("Never", "Previous", "Current"))
})

test_that("decode_values() leaves continuous column unchanged", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(p31 = 1L, p22189 = -3.94)
  result <- suppressMessages(decode_values(df))
  expect_equal(result$p22189, -3.94)
  expect_true(is.numeric(result$p22189))
})

test_that("decode_values() leaves already-decoded character column unchanged", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(p31 = c("Female", "Male"), stringsAsFactors = FALSE)
  result <- suppressMessages(decode_values(df))
  # Should remain unchanged — already character
  expect_equal(result$p31, c("Female", "Male"))
})

test_that("decode_values() returns NA for codes absent from encoding table", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(p31 = c(0L, 1L, 99L))   # 99 is not in encoding 9
  result <- suppressMessages(decode_values(df))
  expect_true(is.na(result$p31[3]))
})

test_that("decode_values() preserves data.table class", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  dt <- data.table::data.table(p31 = c(0L, 1L))
  result <- suppressMessages(decode_values(dt))
  expect_true(data.table::is.data.table(result))
})

test_that("decode_values() leaves non-categorical field (p53 date) unchanged", {
  .set_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(p31 = 1L, p53_i0 = 13956L)
  result <- suppressMessages(decode_values(df))
  expect_equal(result$p53_i0, 13956L)
})

test_that("decode_values() errors with clear message when field.tsv missing", {
  .clear_decode_cache()
  on.exit(.clear_decode_cache())
  df <- data.frame(p31 = 1L)
  expect_error(
    suppressMessages(decode_values(df, metadata_dir = tempdir())),
    "field.tsv"
  )
})

test_that("decode_values() errors with clear message when esimpint.tsv missing", {
  .clear_decode_cache()
  on.exit(.clear_decode_cache())

  # Provide field.tsv in a temp dir but not esimpint.tsv
  tmp <- tempdir()
  field_path <- file.path(tmp, "field.tsv")
  data.table::fwrite(.fake_field_meta(), field_path, sep = "\t")
  on.exit(unlink(field_path), add = TRUE)

  df <- data.frame(p31 = 1L)
  expect_error(
    suppressMessages(decode_values(df, metadata_dir = tmp)),
    "esimpint.tsv"
  )
})