suppress_bootstrap_assumption_warning <- function(expr) {
  withCallingHandlers(expr, warning = function(w) {
    if (grepl("injects replicate analysis weights", conditionMessage(w), fixed = TRUE)) {
      invokeRestart("muffleWarning")
    }
  })
}

test_that("svyrep.design is rejected with clear error", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

  drep <- svrep::as_bootstrap_design(dstrat, replicates = 10)

  estimator <- function(data, ...) {
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  expect_error(
    bootstrap_variance(
      drep, estimator,
      point_estimate = mean(apistrat$api00),
      bootstrap_reps = 10
    ),
    "replicate design|svyrep"
  )
})

test_that("replicate count mismatch warns but proceeds", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

  estimator <- function(data, ...) {
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  future::plan(future::sequential)

  result <- tryCatch(
    suppressWarnings(
      bootstrap_variance(
        dstrat, estimator,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 200
      )
    ),
    error = function(e) list(error = TRUE, message = e$message)
  )

  expect_false(isTRUE(result$error))
  expect_true(is.finite(result$variance))
  expect_true(result$variance > 0)
  expect_true(length(result$replicates) >= 2)
})

test_that("survey bootstrap errors when replicate design has too few replicates", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  df <- data.frame(y = 1:4, w = 1)
  design <- survey::svydesign(ids = ~1, data = df, weights = ~w)

  estimator <- function(data, ...) {
    est <- survey::svymean(~y, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  testthat::local_mocked_bindings(
    as_bootstrap_design = function(design, replicates, ...) {
      repw <- matrix(stats::weights(design), ncol = 1)
      survey::svrepdesign(
        variables = design$variables,
        repweights = repw,
        weights = stats::weights(design),
        type = "bootstrap",
        scale = 1,
        rscales = 1,
        combined.weights = FALSE,
        mse = FALSE
      )
    },
    .package = "svrep"
  )

  expect_error(
    bootstrap_variance(
      design, estimator,
      point_estimate = mean(df$y),
      bootstrap_reps = 10
    ),
    "at least 2 replicates",
    fixed = TRUE
  )
})

test_that("survey NA policy 'strict' shows detailed error", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

# Estimator that fails for specific replicates
  fail_at <- c(2, 5, 8)
  counter <- 0
  estimator <- function(data, ...) {
    counter <<- counter + 1
    if (counter %in% fail_at) {
      return(list(y_hat = NA_real_, converged = FALSE))
    }
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  counter <- 0
  future::plan(future::sequential)

  err <- tryCatch(
    suppress_bootstrap_assumption_warning(
      bootstrap_variance(
        dstrat, estimator,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 10,
        survey_na_policy = "strict"
      )
    ),
    error = function(e) e$message
  )

# Should show specific indices
  expect_match(err, "2.*5.*8")
  expect_match(err, "strict")
  expect_match(err, "Troubleshooting")
})

test_that("survey NA policy 'omit' handles failures correctly", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

# Controlled failure
  fail_at <- c(3, 7, 12)
  counter <- 0
  estimator <- function(data, ...) {
    counter <<- counter + 1
    if (counter %in% fail_at) {
      return(list(y_hat = NA_real_, converged = FALSE))
    }
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  counter <- 0
  future::plan(future::sequential)

  expect_warning(
    res <- suppress_bootstrap_assumption_warning(
      bootstrap_variance(
        dstrat, estimator,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 15,
        survey_na_policy = "omit"
      )
    ),
    "3/15.*failed.*omitted"
  )

# Verify mathematical properties
  expect_equal(length(res$replicates), 12) # 15 - 3 failed
  expect_true(all(is.finite(res$replicates)))
  expect_true(is.finite(res$variance))
  expect_true(res$variance > 0)
  expect_true(res$se > 0)
})

test_that("survey NA policy 'omit' requires at least 2 successes", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

# Estimator that always fails
  estimator <- function(data, ...) {
    list(y_hat = NA_real_, converged = FALSE)
  }

  future::plan(future::sequential)

  expect_error(
    suppress_bootstrap_assumption_warning(
      bootstrap_variance(
        dstrat, estimator,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 10,
        survey_na_policy = "omit"
      )
    ),
    "Too few successful"
  )
})

test_that("survey NA policy 'omit' shows failure pattern", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

# Many failures to test "First 10 failed + X more" message
  fail_at <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
  counter <- 0
  estimator <- function(data, ...) {
    counter <<- counter + 1
    if (counter %in% fail_at) {
      return(list(y_hat = NA_real_, converged = FALSE))
    }
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  counter <- 0
  future::plan(future::sequential)

  warn_msg <- NULL
  res <- tryCatch(
    withCallingHandlers(
      suppress_bootstrap_assumption_warning(
        bootstrap_variance(
          dstrat, estimator,
          point_estimate = mean(apistrat$api00),
          bootstrap_reps = 20,
          survey_na_policy = "omit"
        )
      ),
    warning = function(w) {
      warn_msg <<- conditionMessage(w)
      invokeRestart("muffleWarning")
      }
    ),
    error = function(e) list(error = TRUE)
  )

# Should show "First 10 failed + X more"
  expect_match(warn_msg, "15/20")
  expect_match(warn_msg, "First 10")
  expect_match(warn_msg, "5 more")

# Should still produce valid result
  expect_false(isTRUE(res$error))
  expect_equal(length(res$replicates), 5) # 20 - 15 failed
})

test_that("mathematical correctness: variance has correct properties", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

# Test 1: Basic mathematical properties with survey design
  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

  estimator <- function(data, ...) {
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  future::plan(future::sequential)
  set.seed(424242)

  res <- suppress_bootstrap_assumption_warning(
    bootstrap_variance(
      dstrat, estimator,
      point_estimate = mean(apistrat$api00),
      bootstrap_reps = 30
    )
  )

  expect_true(res$variance >= 0)
  expect_equal(res$se, sqrt(res$variance))
  expect_true(all(is.finite(res$replicates)))
  expect_equal(length(res$replicates), 30)

# Test 2: Compare to analytical ground truth for IID normal data
# For iid normal data, we know the ground truth variance of the sample mean
  set.seed(12345)
  n <- 1000
  true_sd <- 2
  x <- rnorm(n, mean = 5, sd = true_sd) # sd = 2, so var = 4

# True variance of sample mean = var / n = 4/1000 = 0.004
  true_variance_of_mean <- true_sd^2 / n

  data_iid <- data.frame(y = x)
  estimator_mean <- function(data, ...) {
    list(y_hat = mean(data$y), converged = TRUE)
  }

  set.seed(424242)
  res_iid <- bootstrap_variance(
    data_iid, estimator_mean,
    point_estimate = mean(x),
    bootstrap_reps = 500
  )

# Bootstrap estimate should be close to analytical value with 500 bootstrap reps
  expect_equal(res_iid$variance, true_variance_of_mean, tolerance = 0.3)
  expect_true(res_iid$variance > 0)
  expect_equal(res_iid$se, sqrt(res_iid$variance))
})

test_that("boundary cases: minimum replicates and edge conditions", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

  estimator <- function(data, ...) {
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  future::plan(future::sequential)

# Test 1: bootstrap_reps = 2 (minimum for variance calculation)
  res_min <- suppress_bootstrap_assumption_warning(
    bootstrap_variance(
      dstrat, estimator,
      point_estimate = mean(apistrat$api00),
      bootstrap_reps = 2
    )
  )
  expect_equal(length(res_min$replicates), 2)
  expect_true(is.finite(res_min$variance))
  expect_true(res_min$variance >= 0)

# Test 2: bootstrap_reps = 1 should still work but variance may be problematic
# svrVar should handle this, but it's an edge case
  res_one <- tryCatch(
    suppress_bootstrap_assumption_warning(
      bootstrap_variance(
        dstrat, estimator,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 1
      )
    ),
    error = function(e) list(error = TRUE, message = e$message)
  )
# With 1 replicate, variance calculation may fail or return zero
# We just verify it doesn't crash catastrophically
  expect_true(is.list(res_one))

# Test 3: IID data with small n
  small_data <- data.frame(y = c(1, 2, 3))
  estimator_simple <- function(data, ...) {
    list(y_hat = mean(data$y), converged = TRUE)
  }

  res_small <- bootstrap_variance(
    small_data, estimator_simple,
    point_estimate = mean(small_data$y),
    bootstrap_reps = 10
  )
  expect_equal(length(res_small$replicates), 10)
  expect_true(is.finite(res_small$variance))
  expect_true(res_small$variance >= 0)

# Test 4: Single observation edge case
  single_data <- data.frame(y = 5)
  res_single <- bootstrap_variance(
    single_data, estimator_simple,
    point_estimate = 5,
    bootstrap_reps = 10
  )
# With single observation, all bootstrap samples are identical
  expect_equal(length(res_single$replicates), 10)
  expect_true(all(res_single$replicates == 5))
# Variance should be zero (or very close) since all replicates are identical
  expect_true(res_single$variance < 1e-10)
})

test_that("IID resample_guard integrates with failure handling", {
  set.seed(123)
  df <- data.frame(y = c(1, 2, 3, 4, 5))

  estimator_mean <- function(data, ...) {
    list(y_hat = mean(data$y), converged = TRUE)
  }

# Guard that rejects all resamples; every replicate will hit max attempts and return NA
  always_reject_guard <- function(indices, data) {
    FALSE
  }

  expect_error(
    bootstrap_variance(
      df,
      estimator_func = estimator_mean,
      point_estimate = mean(df$y),
      bootstrap_reps = 10,
      resample_guard = always_reject_guard
    ),
    "All bootstrap replicates failed",
    fixed = TRUE
  )
})

test_that("omit policy correctly subsets rscales for mathematical correctness", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

# This test verifies the critical mathematical requirement:
# When omitting failed replicates, BOTH replicate_estimates AND rep_rscales
# must be subsetted to the same indices for svrVar formula to be correct:
# V = scale * sum(rscales[j] * (theta[j] - theta_0)^2)

# Create controlled failure pattern
  fail_at <- c(2, 5, 8, 11, 14) # 5 failures out of 20
  counter <- 0
  estimator <- function(data, ...) {
    counter <<- counter + 1
    if (counter %in% fail_at) {
      return(list(y_hat = NA_real_, converged = FALSE))
    }
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  counter <- 0
  future::plan(future::sequential)
  set.seed(999)

  expect_warning(
    res <- suppress_bootstrap_assumption_warning(
      bootstrap_variance(
        dstrat, estimator,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 20,
        survey_na_policy = "omit"
      )
    ),
    "5/20.*failed.*omitted"
  )

# Number of replicates matches expected successes
  expect_equal(length(res$replicates), 15) # 20 - 5 failed

# All returned replicates are finite
# (If rscales weren't subsetted correctly, NA values might slip through)
  expect_true(all(is.finite(res$replicates)))

# Variance is finite and positive
# If rscales length didn't match replicates length, svrVar would fail or
# produce NA/Inf, or use wrong scaling factors
  expect_true(is.finite(res$variance))
  expect_true(res$variance > 0)
  expect_equal(res$se, sqrt(res$variance))

# Test with different failure patterns to ensure robustness
  set.seed(777)
  fail_randomly <- sample(1:30, 12) # 12 random failures out of 30
  counter <- 0

  estimator_random <- function(data, ...) {
    counter <<- counter + 1
    if (counter %in% fail_randomly) {
      return(list(y_hat = NA_real_, converged = FALSE))
    }
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  counter <- 0
  set.seed(888)

  expect_warning(
    res_random <- suppress_bootstrap_assumption_warning(
      bootstrap_variance(
        dstrat, estimator_random,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 30,
        survey_na_policy = "omit"
      )
    ),
    "12/30.*failed"
  )

  expect_equal(length(res_random$replicates), 18) # 30 - 12 failed
  expect_true(all(is.finite(res_random$replicates)))
  expect_true(is.finite(res_random$variance))
  expect_true(res_random$variance > 0)
  expect_equal(res_random$se, sqrt(res_random$variance))
})

test_that("survey NA policy default is 'strict'", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(api, package = "survey")
  dstrat <- survey::svydesign(
    id = ~1, strata = ~stype, weights = ~pw,
    data = apistrat, fpc = ~fpc
  )

# Estimator that fails
  fail_at <- c(5)
  counter <- 0
  estimator <- function(data, ...) {
    counter <<- counter + 1
    if (counter %in% fail_at) {
      return(list(y_hat = NA_real_, converged = FALSE))
    }
    est <- survey::svymean(~api00, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  counter <- 0
  future::plan(future::sequential)

# Without specifying survey_na_policy, should default to strict and error
  err <- tryCatch(
    suppress_bootstrap_assumption_warning(
      bootstrap_variance(
        dstrat, estimator,
        point_estimate = mean(apistrat$api00),
        bootstrap_reps = 10
      )
    ),
    error = function(e) e$message
  )

  expect_match(err, "strict")
  expect_match(err, "1/10.*failed")
})

test_that("survey bootstrap warns for multistage/FPC designs", {
  skip_if_not_installed("survey")
  skip_if_not_installed("svrep")

  data(mu284, package = "survey")
  multistage_design <- survey::svydesign(
    data = mu284,
    ids = ~ id1 + id2,
    fpc = ~ n1 + n2
  )

  estimator <- function(data, ...) {
    est <- survey::svymean(~y1, data)
    list(y_hat = as.numeric(est), converged = TRUE)
  }

  future::plan(future::sequential)

  expect_warning(
    bootstrap_variance(
      multistage_design,
      estimator,
      point_estimate = mean(mu284$y1),
      bootstrap_reps = 10
    ),
    "injects replicate analysis weights",
    fixed = TRUE
  )
})

test_that("nmar.bootstrap_apply option controls future.apply usage", {
  skip_if_not_installed("future")
  skip_if_not_installed("future.apply")

  df <- data.frame(y = c(1, 2, 3, 4))
  estimator_mean <- function(data, ...) list(y_hat = mean(data$y), converged = TRUE)

  future::plan(future::sequential)

  old_opt <- getOption("nmar.bootstrap_apply", NULL)
  on.exit(options(nmar.bootstrap_apply = old_opt), add = TRUE)

  called <- new.env(parent = emptyenv())
  called$future_lapply <- FALSE

  testthat::local_mocked_bindings(
    future_lapply = function(X, FUN, ...) {
      called$future_lapply <- TRUE
      lapply(X, FUN)
    },
    .package = "future.apply"
  )

  options(nmar.bootstrap_apply = "base")
  res_base <- bootstrap_variance(
    df,
    estimator_func = estimator_mean,
    point_estimate = mean(df$y),
    bootstrap_reps = 5
  )
  expect_false(isTRUE(called$future_lapply))
  expect_true(is.finite(res_base$variance))

  called$future_lapply <- FALSE
  options(nmar.bootstrap_apply = "future")
  res_future <- bootstrap_variance(
    df,
    estimator_func = estimator_mean,
    point_estimate = mean(df$y),
    bootstrap_reps = 5
  )
  expect_true(isTRUE(called$future_lapply))
  expect_true(is.finite(res_future$variance))
})

test_that("nmar.bootstrap_apply='auto' uses base unless parallel is configured", {
  skip_if_not_installed("future")
  skip_if_not_installed("future.apply")

  df <- data.frame(y = c(1, 2, 3, 4))
  estimator_mean <- function(data, ...) list(y_hat = mean(data$y), converged = TRUE)

  future::plan(future::sequential)

  old_opt <- getOption("nmar.bootstrap_apply", NULL)
  on.exit(options(nmar.bootstrap_apply = old_opt), add = TRUE)

  testthat::local_mocked_bindings(
    future_lapply = function(...) stop("future_lapply should not be called in auto+sequential"),
    .package = "future.apply"
  )
  options(nmar.bootstrap_apply = "auto")

  res_auto <- bootstrap_variance(
    df,
    estimator_func = estimator_mean,
    point_estimate = mean(df$y),
    bootstrap_reps = 5
  )
  expect_true(is.finite(res_auto$variance))

  called <- new.env(parent = emptyenv())
  called$future_lapply <- FALSE

  testthat::local_mocked_bindings(
    future_lapply = function(X, FUN, ...) {
      called$future_lapply <- TRUE
      lapply(X, FUN)
    },
    .package = "future.apply"
  )
  testthat::local_mocked_bindings(
    nmar_future_workers = function() 2L,
    .package = "NMAR"
  )

  res_auto_parallel <- bootstrap_variance(
    df,
    estimator_func = estimator_mean,
    point_estimate = mean(df$y),
    bootstrap_reps = 5
  )
  expect_true(isTRUE(called$future_lapply))
  expect_true(is.finite(res_auto_parallel$variance))
})

test_that("nmar.bootstrap_apply validates option value", {
  df <- data.frame(y = c(1, 2, 3, 4))
  estimator_mean <- function(data, ...) list(y_hat = mean(data$y), converged = TRUE)

  old_opt <- getOption("nmar.bootstrap_apply", NULL)
  on.exit(options(nmar.bootstrap_apply = old_opt), add = TRUE)
  options(nmar.bootstrap_apply = "nope")
  expect_error(
    bootstrap_variance(
      df,
      estimator_func = estimator_mean,
      point_estimate = mean(df$y),
      bootstrap_reps = 5
    ),
    "nmar.bootstrap_apply",
    fixed = TRUE
  )
})