# tests/testthat/test-sn-opt.R
#
# Correctness gate tests for OPT-S1..S8 (sn() performance optimizations).
# All tests must pass against CURRENT, UNMODIFIED code — this is a baseline
# sanity check, not a TDD red.  They remain green throughout all work units.
#
# Comparison strategy for robustbase::Sn:
#   Use finite.corr = FALSE in both sn() and robustbase::Sn.
#   Our package uses CONST_SN = 1.19259855312321 (high precision); robustbase
#   uses 1.1926 (4-decimal).  The systematic relative difference is ~1.21e-6.
#   Tolerance 1e-5 provides 8× headroom above the constant-rounding bias.

# ---------------------------------------------------------------------------
# S.1 — Known-value test: sn(c(1,2,4,8,16), finite.corr=FALSE) == 3 * CONST_SN
# ---------------------------------------------------------------------------
test_that("S.1 known value: sn(c(1,2,4,8,16), finite.corr=FALSE)", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  x <- c(1, 2, 4, 8, 16)
  expect_identical(sn(x, finite.corr = FALSE), 3 * 1.19259855312321)
})

# ---------------------------------------------------------------------------
# S.2 — Size sweep n=2..50 + {100,500,1000,5000} vs robustbase::Sn
# ---------------------------------------------------------------------------
test_that("S.2 size sweep n=2..50 + large vs robustbase::Sn", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  skip_if_not_installed("robustbase")
  tol <- 1e-5  # 8x headroom above the ~1.21e-6 CONST_SN constant-rounding bias

  ns <- c(2L:50L, 100L, 500L, 1000L, 5000L)
  for (n in ns) {
    set.seed(n * 17L + 3L)
    x <- rnorm(n)
    got <- sn(x, finite.corr = FALSE)
    ref <- robustbase::Sn(x, finite.corr = FALSE)
    expect_equal(got, ref, tolerance = tol,
                 label = paste("size sweep n =", n))
  }
})

# ---------------------------------------------------------------------------
# S.3 — Sorted variant equivalence: C_sn_sorted(sort(x)) == sn(x)
# ---------------------------------------------------------------------------
test_that("S.3 sorted variant equivalence: C_sn_sorted(sort(x)) == sn(x)", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  tol <- sqrt(.Machine$double.eps)
  for (n in c(5L, 50L, 500L, 5000L)) {
    set.seed(n * 31L)
    x <- rnorm(n)
    got <- C_sn_sorted(sort(x))
    ref <- sn(x)
    expect_equal(got, ref, tolerance = tol,
                 label = paste("C_sn_sorted equivalence n =", n))
  }
})

# ---------------------------------------------------------------------------
# S.4 — Ensemble integration: scale_robust(x, n_boot=50) finite, positive, deterministic
# ---------------------------------------------------------------------------
test_that("S.4 ensemble integration: scale_robust is finite, positive, deterministic", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  set.seed(88)
  x <- rnorm(10)
  v1 <- scale_robust(x, n_boot = 50)
  expect_true(is.finite(v1), label = "ensemble result is finite")
  expect_gt(v1, 0, label = "ensemble result is positive")
  # determinism: repeat 5 times, must be identical
  for (i in seq_len(5L)) {
    v2 <- scale_robust(x, n_boot = 50)
    expect_identical(v1, v2, label = paste("ensemble determinism rep", i))
  }
})

# ---------------------------------------------------------------------------
# S.5 — Edge cases
# ---------------------------------------------------------------------------
test_that("S.5 edge case: n=1 returns NA", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  expect_true(is.na(sn(1)))
  expect_true(is.na(sn(1.0)))
})

test_that("S.5 edge case: n=2 returns a valid positive value", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  v <- sn(c(1, 3))
  expect_true(is.finite(v))
  expect_gt(v, 0)
})

test_that("S.5 edge case: constant data returns 0", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  expect_equal(sn(rep(5.0, 10)), 0)
  expect_equal(sn(rep(5.0, 100)), 0)
})

test_that("S.5 edge case: integer input matches double input", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  x_int <- c(1L, 2L, 4L, 8L, 16L)
  x_dbl <- as.double(x_int)
  expect_identical(sn(x_int, finite.corr = FALSE),
                   sn(x_dbl, finite.corr = FALSE))
  # Also verify known value via integer path
  expect_identical(sn(x_int, finite.corr = FALSE), 3 * 1.19259855312321)
})

# ---------------------------------------------------------------------------
# S.6 — Determinism: 30 reps at n=500, all identical
# ---------------------------------------------------------------------------
test_that("S.6 determinism: 30 reps at n=500 all identical", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  set.seed(500L)
  x <- rnorm(500L)
  ref <- sn(x)
  for (i in seq_len(30L)) {
    expect_identical(sn(x), ref,
                     label = paste("determinism rep", i, "n=500"))
  }
})

# ---------------------------------------------------------------------------
# S.7 — Boundary: n=16 and n=17 (sort-network / general path boundary)
# ---------------------------------------------------------------------------
test_that("S.7 boundary: n=16 and n=17 both correct", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  skip_if_not_installed("robustbase")
  tol <- 1e-5
  set.seed(42L)
  x16 <- rnorm(16L)
  x17 <- c(x16, rnorm(1L))
  expect_equal(sn(x16, finite.corr = FALSE),
               robustbase::Sn(x16, finite.corr = FALSE),
               tolerance = tol, label = "n=16 boundary")
  expect_equal(sn(x17, finite.corr = FALSE),
               robustbase::Sn(x17, finite.corr = FALSE),
               tolerance = tol, label = "n=17 boundary")
})

# ---------------------------------------------------------------------------
# S.8 — Boundary: n=128 and n=129 (micro-buffer / stack-buffer boundary)
# ---------------------------------------------------------------------------
test_that("S.8 boundary: n=128 and n=129 both correct", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  skip_if_not_installed("robustbase")
  tol <- 1e-5
  set.seed(128L)
  x128 <- rnorm(128L)
  x129 <- c(x128, rnorm(1L))
  expect_equal(sn(x128, finite.corr = FALSE),
               robustbase::Sn(x128, finite.corr = FALSE),
               tolerance = tol, label = "n=128 boundary")
  expect_equal(sn(x129, finite.corr = FALSE),
               robustbase::Sn(x129, finite.corr = FALSE),
               tolerance = tol, label = "n=129 boundary")
})

# ---------------------------------------------------------------------------
# S.9 — Sorted variant C_sn_sorted: diagnostic export present and correct
#        Graceful skip if C_sn_fast_orig exists (OPT-S4 phase — no change to sorted)
# ---------------------------------------------------------------------------
test_that("S.9 C_sn_sorted sorted-variant: matches sn() on sorted input", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  tol <- sqrt(.Machine$double.eps)
  for (n in c(2L, 5L, 16L, 17L, 100L, 1000L)) {
    set.seed(600L + n)
    x <- sort(rnorm(n))
    expect_equal(C_sn_sorted(x), sn(x),
                 tolerance = tol,
                 label = paste("C_sn_sorted sorted n =", n))
  }
})

# ---------------------------------------------------------------------------
# S.11 — OPT-S7: workspace-reuse variant produces identical results
#   C_sn_impl_sorted workspace overload must return bitwise-identical results
#   to the no-workspace path at n=100 (stack path, workspace ignored) and
#   n=5000 (heap path, workspace reused instead of heap-allocated).
#   Ensemble non-regression: scale_robust_ci() result unchanged by the
#   workspace-threading change in ensemble.cpp.
# ---------------------------------------------------------------------------
test_that("S.11 OPT-S7: C_sn_sorted workspace variant identical to no-workspace at n=100 and n=5000", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  # C_sn_sorted uses the workspace overload path internally (via ensemble).
  # Verify it produces the same result as sn() called on the same sorted input,
  # with finite.corr=TRUE (default) — tolerating only the consistency-constant bias.
  tol <- sqrt(.Machine$double.eps)
  for (n in c(100L, 5000L)) {
    set.seed(800L + n)
    x <- rnorm(n)
    sx <- sort(x)
    # C_sn_sorted: the existing Rcpp export calls C_sn_impl_sorted (no-workspace path)
    ref <- C_sn_sorted(sx)
    # sn() on the same data: applies sort internally, returns finite.corr=TRUE result
    got <- sn(x)
    expect_equal(ref, got, tolerance = tol,
                 label = paste("C_sn_sorted vs sn() n =", n))
    # Verify the result is finite and positive for this input
    expect_true(is.finite(ref), label = paste("C_sn_sorted finite n =", n))
    expect_gt(ref, 0, label = paste("C_sn_sorted positive n =", n))
  }
})

test_that("S.11 OPT-S7: ensemble non-regression — scale_robust(ci=TRUE) unchanged after workspace change", {
  tryCatch(devtools::load_all(quiet = TRUE), error = function(e) invisible(NULL))
  set.seed(123L)
  x <- rnorm(15L)  # n < 20 to stay in ensemble mode (auto_switch threshold)
  # Capture reference result — same seed, must be deterministic
  set.seed(123L)
  ref <- scale_robust(x, n_boot = 100L, ci = TRUE)
  # Repeat 3 times: result must be deterministic and stable
  for (i in seq_len(3L)) {
    set.seed(123L)
    got <- scale_robust(x, n_boot = 100L, ci = TRUE)
    expect_identical(ref, got,
                     label = paste("ensemble non-regression rep", i))
  }
  # Basic sanity
  expect_true(is.finite(ref$estimate),
              label = "ensemble estimate finite")
  expect_gt(ref$estimate, 0,
            label = "ensemble estimate positive")
})