test_that("utility functions work correctly", {
  # Test %||%
  expect_equal(NULL %||% 5, 5)
  expect_equal(3 %||% 5, 3)
  expect_equal(integer(0) %||% 5, 5)

  # Test robust_scale
  expect_true(is.numeric(robust_scale(rnorm(50))))
  expect_true(is.na(robust_scale(1)))
  expect_true(robust_scale(c(1, 2, 3, 100)) > 0)

  # Test safe_sd
  expect_true(safe_sd(c(5, 5, 5)) > 0)
  expect_true(safe_sd(rnorm(20)) > 0)
})

test_that("stochasticity metrics work correctly", {
  set.seed(42)
  x <- rnorm(30, 10, 2)
  y <- rnorm(30, 5, 2)

  # CV
  expect_true(is.numeric(cv(x)))
  expect_true(cv(x) > 0)
  expect_true(is.na(cv(1)))

  # Fano factor
  expect_true(is.numeric(fano_factor(x + 10)))
  expect_true(is.na(fano_factor(c(-1, -2))))

  # SSMD
  ssmd_val <- ssmd(x, y)
  expect_true(is.numeric(ssmd_val))
  expect_true(ssmd_val > 0)
})

test_that("distance metrics work correctly", {
  set.seed(42)
  x <- rnorm(30, 0, 1)
  y <- rnorm(30, 2, 1)

  # Energy distance
  ed <- energy_distance(x, y)
  expect_true(is.numeric(ed))
  expect_true(ed >= 0)

  # Wasserstein
  wd <- wasserstein_1d(x, y)
  expect_true(is.numeric(wd))
  expect_true(wd >= 0)

  # Log-Euclidean
  led <- log_euclidean_distance(exp(x), exp(y))
  expect_true(is.numeric(led))
})

test_that("scoring rules work correctly", {
  set.seed(42)
  forecast <- rnorm(50, 5, 1)

  # CRPS
  crps <- crps_empirical(forecast, 5.5)
  expect_true(is.numeric(crps))
  expect_true(crps >= 0)

  # Dawid-Sebastiani
  ds <- dawid_sebastiani(5.5, 5, 1)
  expect_true(is.numeric(ds))

  # Interval score
  is_val <- interval_score(5.5, 3, 7, alpha = 0.1)
  expect_true(is.numeric(is_val))
})

test_that("separation assessment works correctly", {
  set.seed(42)
  pc_mp <- rnorm(8, 100, 10)
  nc_mp <- rnorm(8, 20, 5)
  pc_ttt <- rnorm(8, 8, 1)
  nc_ttt <- rnorm(8, 72, 5)

  sep <- assess_separation(pc_mp, nc_mp, pc_ttt, nc_ttt)
  expect_true(is.list(sep))
  expect_true("d_combined" %in% names(sep))
  expect_true("recommended_profile" %in% names(sep))
  expect_true(sep$recommended_profile %in% c("standard", "sensitive", "matrix_robust"))
})

test_that("compute_instability_flags works correctly", {
  set.seed(42)

  # Well-separated case: should NOT be unstable
  flags <- compute_instability_flags(
    trt_mp = rnorm(8, 95, 10),
    trt_ttt = rnorm(8, 10, 2),
    pc_mp = rnorm(8, 100, 10),
    nc_mp = rnorm(8, 20, 5),
    pc_ttt = rnorm(8, 8, 1),
    nc_ttt = rnorm(8, 72, 5),
    crossing_threshold = 40,
    strictness = "moderate"
  )
  expect_true(is.list(flags))
  expect_true("unstable" %in% names(flags))
  expect_true("reasons" %in% names(flags))
  expect_true("metrics" %in% names(flags))
  expect_true(is.logical(flags$unstable))

  # Ambiguous case (between PC and NC): more likely to flag
  flags2 <- compute_instability_flags(
    trt_mp = rnorm(8, 50, 30),
    trt_ttt = c(10, 50, 15, 60, 12, 55, 20, 65),
    pc_mp = rnorm(8, 100, 10),
    nc_mp = rnorm(8, 20, 5),
    pc_ttt = rnorm(8, 8, 1),
    nc_ttt = rnorm(8, 72, 5),
    crossing_threshold = 40,
    strictness = "strict"
  )
  expect_true(is.list(flags2))
})

test_that("kwela_analyze works with basic data", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  result <- kwela_analyze(df, verbose = FALSE)

  # Check structure
  expect_true(is.data.frame(result))
  expect_true("Type" %in% names(result))
  expect_true("classification" %in% names(result))
  expect_true("well_score" %in% names(result))
  expect_true("matrix_instability" %in% names(result))

  # Check attributes
  expect_equal(attr(result, "version"), "1.0.0")
  expect_equal(attr(result, "mode"), "diagnostic")
  expect_true(!is.null(attr(result, "trt_summary")))
  expect_true(!is.null(attr(result, "separation")))
  expect_true(!is.null(attr(result, "instability_summary")))
})

test_that("dual-mode operation works", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  res_diag <- kwela_analyze(df, mode = "diagnostic", verbose = FALSE)
  res_res <- kwela_analyze(df, mode = "research", verbose = FALSE)

  expect_equal(attr(res_diag, "mode"), "diagnostic")
  expect_equal(attr(res_res, "mode"), "research")

  # Diagnostic mode: no stochastic rescue
  expect_equal(sum(res_diag$stoch_rescue[res_diag$Type == "Sample"]), 0)
})

test_that("instability strictness levels work", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  for (strict in c("strict", "moderate", "lenient")) {
    result <- kwela_analyze(df, instability_strictness = strict, verbose = FALSE)
    expect_equal(attr(result, "instability_strictness"), strict)
  }

  # Instability can be disabled
  result_no <- kwela_analyze(df, instability_check = FALSE, verbose = FALSE)
  expect_false(attr(result_no, "instability_check"))
})

test_that("kwela_summarize extracts treatment summary", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  result <- kwela_analyze(df, verbose = FALSE)
  summary <- kwela_summarize(result)

  expect_true(is.data.frame(summary))
  expect_true("Treatment" %in% names(summary))
  expect_true("classification" %in% names(summary))
  expect_true("positive_rate" %in% names(summary))
})

test_that("kwela_diagnostics returns diagnostic info with instability", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  result <- kwela_analyze(df, verbose = FALSE)
  diag <- kwela_diagnostics(result)

  expect_true(is.list(diag))
  expect_equal(diag$version, "1.0.0")
  expect_equal(diag$mode, "diagnostic")
  expect_true("instability" %in% names(diag))
  expect_true("check_enabled" %in% names(diag$instability))
  expect_true("n_inconclusive_matrix" %in% names(diag$instability))
})

test_that("profile selection works correctly", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  # Test each profile
  for (prof in c("standard", "sensitive", "matrix_robust")) {
    result <- kwela_analyze(df, profile = prof, verbose = FALSE)
    expect_equal(attr(result, "profile"), prof)
  }

  # Test auto
  result_auto <- kwela_analyze(df, profile = "auto", verbose = FALSE)
  expect_true(attr(result_auto, "profile") %in% c("standard", "sensitive", "matrix_robust"))
})

test_that("consensus rules work correctly", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  for (cons in c("strict", "majority", "flexible", "threshold")) {
    result <- kwela_analyze(df, consensus = cons, verbose = FALSE)
    expect_equal(attr(result, "consensus"), cons)
  }
})

test_that("bootstrap summary works", {
  set.seed(42)
  df <- data.frame(
    Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
                  rep("Sample_A", 8)),
    TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
    MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
  )

  result <- kwela_analyze(df, verbose = FALSE)
  set.seed(123)
  boot <- kwela_bootstrap_summary(result, B = 100)

  expect_true(is.data.frame(boot))
  expect_true("mean_score" %in% names(boot))
  expect_true("score_lo" %in% names(boot))
  expect_true("score_hi" %in% names(boot))
})