test_that("build_bt_data converts better_id into binary result", {
  results <- tibble::tibble(
    ID1       = c("S1", "S1", "S2", "S3"),
    ID2       = c("S2", "S3", "S3", "S4"),
    better_id = c("S1", "S3", "S2", "S4")
  )

  bt <- build_bt_data(results)

  expect_s3_class(bt, "tbl_df")
  expect_identical(names(bt), c("object1", "object2", "result"))
  expect_equal(bt$result, c(1, 0, 1, 0))
})

test_that("build_bt_data drops rows with invalid or missing better_id", {
  results <- tibble::tibble(
    ID1       = c("S1", "S2", "S3", "S4"),
    ID2       = c("S2", "S3", "S4", "S1"),
    better_id = c("S1", "NOT_AN_ID", NA, "S1")
  )

  bt <- build_bt_data(results)

  # Only rows 1 and 4 are valid
  expect_equal(nrow(bt), 2)
  expect_equal(bt$object1, c("S1", "S4"))
  expect_equal(bt$object2, c("S2", "S1"))
  expect_equal(bt$result, c(1, 0))
})

test_that("build_bt_data errors when required columns are missing", {
  bad <- tibble::tibble(
    ID1 = c("S1", "S2"),
    ID2 = c("S3", "S4")
  )

  expect_error(
    build_bt_data(bad),
    "must contain columns"
  )
})

test_that("build_bt_data works on example_writing_pairs", {
  data("example_writing_pairs", package = "pairwiseLLM")

  bt <- build_bt_data(example_writing_pairs)

  expect_s3_class(bt, "tbl_df")
  expect_identical(names(bt), c("object1", "object2", "result"))
  expect_equal(nrow(bt), nrow(example_writing_pairs))
  expect_true(all(bt$result %in% c(0, 1)))
})

test_that("fit_bt_model fits a BT model using sirt when requested", {
  skip_if_not_installed("sirt")

  data("example_writing_pairs", package = "pairwiseLLM")

  bt <- build_bt_data(example_writing_pairs)
  fit <- fit_bt_model(bt, engine = "sirt")

  # Engine label
  expect_equal(fit$engine, "sirt")

  # Theta table structure
  expect_s3_class(fit$theta, "tbl_df")
  expect_true(all(c("ID", "theta", "se") %in% names(fit$theta)))

  # Number of rows should match number of unique IDs
  expect_equal(
    nrow(fit$theta),
    length(unique(c(bt$object1, bt$object2)))
  )

  # Types and values
  expect_type(fit$theta$theta, "double")
  expect_type(fit$theta$se, "double")
  expect_true(all(is.finite(fit$theta$theta)))
  expect_true(all(fit$theta$se >= 0))

  # Reliability should be numeric and finite
  expect_true(is.numeric(fit$reliability))
  expect_true(length(fit$reliability) == 1)
})

test_that("fit_bt_model fits a BT model using BradleyTerry2 when requested", {
  skip_if_not_installed("BradleyTerry2")

  data("example_writing_pairs", package = "pairwiseLLM")

  bt <- build_bt_data(example_writing_pairs)
  fit <- suppressWarnings(fit_bt_model(bt, engine = "BradleyTerry2"))

  # Engine label
  expect_equal(fit$engine, "BradleyTerry2")

  # Theta table structure
  expect_s3_class(fit$theta, "tbl_df")
  expect_true(all(c("ID", "theta", "se") %in% names(fit$theta)))

  # Number of rows should match number of unique IDs
  expect_equal(
    nrow(fit$theta),
    length(unique(c(bt$object1, bt$object2)))
  )

  # Types and values
  expect_type(fit$theta$theta, "double")
  expect_type(fit$theta$se, "double")
  expect_true(all(is.finite(fit$theta$theta)))
  expect_true(all(fit$theta$se >= 0))

  # Reliability should be NA for BT2 engine
  expect_true(is.na(fit$reliability))
})

test_that("fit_bt_model with engine = 'auto' uses sirt when available", {
  skip_if_not_installed("sirt")

  data("example_writing_pairs", package = "pairwiseLLM")

  bt <- build_bt_data(example_writing_pairs)
  fit <- fit_bt_model(bt, engine = "auto")

  expect_equal(fit$engine, "sirt")
  expect_true(all(c("ID", "theta", "se") %in% names(fit$theta)))
})

test_that("fit_bt_model errors when bt_data does not have exactly three
          columns", {
  # Too many columns
  bad <- tibble::tibble(
    object1 = c("S1", "S2"),
    object2 = c("S2", "S3"),
    result  = c(1, 0),
    extra   = c(1, 1)
  )

  expect_error(
    fit_bt_model(bad),
    "exactly three columns"
  )

  # Too few columns
  bad2 <- tibble::tibble(
    object1 = c("S1", "S2"),
    object2 = c("S2", "S3")
  )

  expect_error(
    fit_bt_model(bad2),
    "exactly three columns"
  )
})

# Helper to safely mock internal package functions
mock_internal <- function(name, value, code) {
  ns <- asNamespace("pairwiseLLM")

  # Check existence
  if (!exists(name, envir = ns)) {
    rlang::abort(sprintf("Function '%s' not found in pairwiseLLM namespace.", name))
  }

  # Backup original
  orig <- get(name, envir = ns)

  # Restore on exit
  on.exit(
    {
      utils::assignInNamespace(name, orig, ns = ns)
    },
    add = TRUE
  )

  # Inject mock
  utils::assignInNamespace(name, value, ns = ns)

  # Run code
  force(code)
}

testthat::test_that("fit_bt_model validates input columns", {
  # bt_data must have 3 columns
  bad_data <- tibble::tibble(object1 = "A", object2 = "B") # 2 columns

  testthat::expect_error(
    fit_bt_model(bad_data),
    "must have exactly three columns"
  )

  bad_data2 <- tibble::tibble(A = 1, B = 2, C = 3, D = 4) # 4 columns
  testthat::expect_error(
    fit_bt_model(bad_data2),
    "must have exactly three columns"
  )
})

testthat::test_that("summarize_bt_fit produces correct ranks and handles decreasing argument", {
  # Construct a mock fit object that mimics the output of fit_bt_model
  # We ensure types are strictly numeric to avoid coercion warnings
  theta_df <- tibble::tibble(
    ID = c("A", "B", "C"),
    theta = as.numeric(c(1.5, -0.5, 2.0)),
    se = as.numeric(c(0.1, 0.2, 0.1))
  )

  fit <- list(
    engine = "mock",
    reliability = 0.85,
    theta = theta_df
  )

  # Case 1: decreasing = TRUE (default). Highest theta = Rank 1.
  # Values: C(2.0) > A(1.5) > B(-0.5)
  # Ranks:  C=1,   A=2,    B=3
  # Suppress warnings to avoid "NAs introduced by coercion" noise
  sum_dec <- suppressWarnings(summarize_bt_fit(fit, decreasing = TRUE))

  testthat::expect_s3_class(sum_dec, "tbl_df")
  testthat::expect_equal(sum_dec$rank[sum_dec$ID == "C"], 1)
  testthat::expect_equal(sum_dec$rank[sum_dec$ID == "A"], 2)
  testthat::expect_equal(sum_dec$rank[sum_dec$ID == "B"], 3)

  # Case 2: decreasing = FALSE. Lowest theta = Rank 1.
  # Values: B(-0.5) < A(1.5) < C(2.0)
  # Ranks:  B=1,    A=2,     C=3
  sum_inc <- suppressWarnings(summarize_bt_fit(fit, decreasing = FALSE))

  testthat::expect_equal(sum_inc$rank[sum_inc$ID == "B"], 1)
  testthat::expect_equal(sum_inc$rank[sum_inc$ID == "A"], 2)
  testthat::expect_equal(sum_inc$rank[sum_inc$ID == "C"], 3)

  # Check metadata preservation
  testthat::expect_equal(sum_dec$engine[1], "mock")
  testthat::expect_equal(sum_dec$reliability[1], 0.85)
})

testthat::test_that("summarize_bt_fit validates fit object structure", {
  # Missing theta
  bad_fit <- list(engine = "mock")
  testthat::expect_error(
    summarize_bt_fit(bad_fit),
    "must be a list returned by `fit_bt_model\\(\\)`"
  )

  # Malformed theta (missing required columns)
  bad_fit2 <- list(
    theta = tibble::tibble(ID = "A", val = 1) # missing 'theta', 'se'
  )
  testthat::expect_error(
    summarize_bt_fit(bad_fit2),
    "must contain columns"
  )
})

testthat::test_that("build_bt_data handles NAs and ties correctly", {
  # build_bt_data expects: ID1, ID2, better_id
  # It converts better_id to binary (1 if ID1, 0 if ID2).
  # Rows with NA or non-matching better_id should be dropped.

  df <- tibble::tibble(
    ID1 = c("A", "B", "C", "D"),
    ID2 = c("B", "C", "A", "E"),
    better_id = c("A", "C", NA, "Tie")
  )

  # Row 1: A vs B, better=A -> result 1
  # Row 2: B vs C, better=C -> result 0
  # Row 3: C vs A, better=NA -> dropped
  # Row 4: D vs E, better="Tie" -> dropped (doesn't match D or E)

  res <- build_bt_data(df)

  testthat::expect_equal(nrow(res), 2L)

  # Check Row 1
  testthat::expect_equal(res$object1[1], "A")
  testthat::expect_equal(res$object2[1], "B")
  testthat::expect_equal(res$result[1], 1)

  # Check Row 2
  testthat::expect_equal(res$object1[2], "B")
  testthat::expect_equal(res$object2[2], "C")
  testthat::expect_equal(res$result[2], 0)
})

testthat::test_that("build_bt_data validates input columns", {
  bad_df <- tibble::tibble(ID1 = "A", ID2 = "B") # Missing better_id
  testthat::expect_error(
    build_bt_data(bad_df),
    "must contain columns"
  )
})