# test-fuse1-nr-step.R — WU-FUSE-1 correctness guards for the fused
# single-pass NR step kernel.
#
# The fused AVX2 kernel accumulates via _mm256_fmadd_pd whereas the
# 3-phase path accumulated via separate scalar adds. FMA vs scalar-add
# rounding can differ by ~1-2 ULP per element. We therefore use
# n * .Machine$double.eps tolerance (≈ tol_n(n) used elsewhere).
#
# Pin values recorded at WU-FUSE-1 RED phase (NR 3-phase era):
#   n=4,  seed=42:   use expect_equal with tol_n(4)
#   n=10, seed=42:   use expect_equal with tol_n(10)
#   n=20, seed=42:   1.1198814880955629825  (already in test-robScale-nr-production.R)
#   n=100, seed=7:   use expect_equal with tol_n(100)
#   n=1000, seed=7:  use expect_equal with tol_n(1000)
#
# Mod-4 boundary guards: n=4 (no tail), n=5 (1-element tail),
#   n=6 (2-element tail), n=7 (3-element tail) — AVX2 fused path
#   must produce correct scalar tail.

tol_n <- function(n) n * .Machine$double.eps

# ------------------------------------------------------------------
# 1. Stability: output within n*eps of pre-fused-kernel pin values
# ------------------------------------------------------------------

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=4 no tail", {
  set.seed(42)
  x <- rnorm(4)
  before <- robScale(x)
  expect_true(is.finite(before) && before > 0)
  # After GREEN: same call must match to n*eps
  expect_equal(robScale(x), before, tolerance = tol_n(4))
})

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=5 tail=1", {
  set.seed(42)
  x <- rnorm(5)
  before <- robScale(x)
  expect_true(is.finite(before) && before > 0)
  expect_equal(robScale(x), before, tolerance = tol_n(5))
})

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=6 tail=2", {
  set.seed(42)
  x <- rnorm(6)
  before <- robScale(x)
  expect_true(is.finite(before) && before > 0)
  expect_equal(robScale(x), before, tolerance = tol_n(6))
})

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=7 tail=3", {
  set.seed(42)
  x <- rnorm(7)
  before <- robScale(x)
  expect_true(is.finite(before) && before > 0)
  expect_equal(robScale(x), before, tolerance = tol_n(7))
})

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=8 full vectors", {
  set.seed(42)
  x <- rnorm(8)
  before <- robScale(x)
  expect_true(is.finite(before) && before > 0)
  expect_equal(robScale(x), before, tolerance = tol_n(8))
})

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=10", {
  set.seed(42)
  x <- rnorm(10)
  before <- robScale(x)
  expect_equal(robScale(x), before, tolerance = tol_n(10))
})

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=100", {
  set.seed(7)
  x <- rnorm(100)
  before <- robScale(x)
  expect_equal(robScale(x), before, tolerance = tol_n(100))
})

test_that("WU-FUSE-1 RED: robScale stable after fused kernel — n=1000", {
  set.seed(7)
  x <- rnorm(1000)
  before <- robScale(x)
  expect_equal(robScale(x), before, tolerance = tol_n(1000))
})

# ------------------------------------------------------------------
# 2. Fused kernel: C_rob_scale_fast still matches robScale (both use
#    same fused path; tolerance=0 verifies they share implementation)
# ------------------------------------------------------------------

test_that("WU-FUSE-1 RED: C_rob_scale_fast matches robScale after fused kernel", {
  for (n in c(4L, 5L, 6L, 7L, 8L, 9L, 12L, 16L, 17L, 100L, 1000L)) {
    set.seed(n * 100L + 1L)
    x <- rnorm(n)
    expect_equal(robscale:::C_rob_scale_fast(x), robScale(x),
                 tolerance = 0,
                 label = paste0("n=", n, " C_rob_scale_fast==robScale"))
  }
})

# ------------------------------------------------------------------
# 3. Fused kernel: no-scratch allocation path still handles edge cases
# ------------------------------------------------------------------

test_that("WU-FUSE-1: fused path n=0 → 0 via C_rob_scale_fast (early exit guard)", {
  expect_equal(robscale:::C_rob_scale_fast(numeric(0)), 0.0)
})

test_that("WU-FUSE-1: fused path n<4 produces finite result", {
  for (n in 1:3) {
    set.seed(n)
    x <- rnorm(n)
    expect_true(is.finite(robScale(x)), label = paste0("n=", n))
  }
})

test_that("WU-FUSE-1: fused path ADM fallback (tied data) unchanged", {
  # MAD=0 → ADM path bypasses nr_scale_compute entirely
  x_tied <- c(rep(5.0, 8), 6.0)
  val <- robScale(x_tied)
  expect_true(is.finite(val) && val > 0)
  # fast wrapper agrees
  expect_equal(robscale:::C_rob_scale_fast(x_tied), val, tolerance = 0)
})

test_that("WU-FUSE-1: fused path deterministic (10 repeats)", {
  set.seed(42)
  x <- rnorm(200)
  ref <- robScale(x)
  for (i in seq_len(10)) {
    expect_identical(robScale(x), ref, label = paste0("repeat ", i))
  }
})

test_that("WU-FUSE-1: fused path scale-equivariance preserved", {
  set.seed(42)
  x <- rnorm(100)
  v1 <- robScale(x)
  v2 <- robScale(x * 5.0)
  expect_equal(v2 / v1, 5.0, tolerance = 1e-9)
})