## ============================================================
## NRMSampling -- testthat test suite (edition 3)
## ============================================================

library(testthat)
library(NRMSampling)

# ---- Shared test data -------------------------------------------------------
data(sample_nrm)
set.seed(42)

# ── Probability Sampling ─────────────────────────────────────────────────────

test_that("srs_sample: returns correct n rows", {
  s <- srs_sample(sample_nrm, n = 20)
  expect_equal(nrow(s), 20L)
})

test_that("srs_sample: appends .sample_id column", {
  s <- srs_sample(sample_nrm, n = 10)
  expect_true(".sample_id" %in% names(s))
})

test_that("srs_sample: no replacement -- unique rows", {
  s <- srs_sample(sample_nrm, n = 50)
  expect_equal(length(unique(s$.sample_id)), 50L)
})

test_that("srs_sample: with replacement works", {
  s <- srs_sample(sample_nrm, n = 120, replace = TRUE)
  expect_equal(nrow(s), 120L)
})

test_that("srs_sample: error when n > N without replacement", {
  expect_error(srs_sample(sample_nrm, n = 200),
               "exceeds population size")
})

test_that("srs_sample: error for non-data-frame input", {
  expect_error(srs_sample(1:100, n = 10), "data frame")
})

test_that("stratified_sample: correct rows per stratum (equal)", {
  s <- stratified_sample(sample_nrm, "strata", n_per_stratum = 4)
  expect_true(all(table(s$strata) == 4))
})

test_that("stratified_sample: named vector allocation", {
  ns <- c(forest = 6, agriculture = 3, grassland = 4)
  s  <- stratified_sample(sample_nrm, "strata", n_per_stratum = ns)
  expect_equal(as.integer(table(s$strata)["forest"]), 6L)
})

test_that("stratified_sample: error for unknown strata_var", {
  expect_error(stratified_sample(sample_nrm, "nonexistent", 5),
               "not found")
})

test_that("systematic_sample: returns roughly N/k rows", {
  s <- systematic_sample(sample_nrm, k = 10)
  expect_true(abs(nrow(s) - 10) <= 1)
})

test_that("systematic_sample: all indices are multiples of k apart", {
  s   <- systematic_sample(sample_nrm, k = 5)
  ids <- s$.sample_id
  diffs <- diff(ids)
  expect_true(all(diffs == 5))
})

test_that("cluster_sample: returns only selected clusters", {
  s <- cluster_sample(sample_nrm, "cluster", n_clusters = 3)
  expect_equal(length(unique(s$cluster)), 3L)
})

test_that("cluster_sample: error when n_clusters exceeds clusters", {
  expect_error(cluster_sample(sample_nrm, "cluster", n_clusters = 50),
               "exceeds")
})

test_that("pps_sample: correct sample size", {
  s <- pps_sample(sample_nrm, "size", n = 15)
  expect_equal(nrow(s), 15L)
})

test_that("pps_sample: appends .inclusion_prob", {
  s <- pps_sample(sample_nrm, "size", n = 10)
  expect_true(".inclusion_prob" %in% names(s))
  expect_true(all(s$.inclusion_prob > 0))
})

test_that("pps_sample: error for non-positive size values", {
  bad <- sample_nrm
  bad$size[1] <- -1
  expect_error(pps_sample(bad, "size", n = 5), "strictly positive")
})

# ── Non-Probability Sampling ──────────────────────────────────────────────────

test_that("convenience_sample: returns first n rows", {
  s <- convenience_sample(sample_nrm, n = 15)
  expect_equal(nrow(s), 15L)
  expect_equal(s$plot_id, sample_nrm$plot_id[1:15])
})

test_that("convenience_sample: error when n > nrow", {
  expect_error(convenience_sample(sample_nrm, n = 500), "exceeds")
})

test_that("purposive_sample: correct filtering", {
  s <- purposive_sample(sample_nrm, "biomass > 40")
  expect_true(all(s$biomass > 40))
})

test_that("purposive_sample: zero rows for impossible condition", {
  s <- purposive_sample(sample_nrm, "biomass > 1000")
  expect_equal(nrow(s), 0L)
})

test_that("purposive_sample: error for invalid condition", {
  expect_error(purposive_sample(sample_nrm, "nonexistent_col > 5"),
               "Invalid condition")
})

test_that("quota_sample: respects quota size", {
  s <- quota_sample(sample_nrm, "strata", quota = 4)
  expect_true(all(table(s$strata) <= 4))
})

# ── Estimation ────────────────────────────────────────────────────────────────

test_that("estimate_mean: matches base mean", {
  s <- srs_sample(sample_nrm, n = 30)
  expect_equal(estimate_mean(s$biomass), mean(s$biomass, na.rm = TRUE))
})

test_that("estimate_total: equals N * mean", {
  s <- srs_sample(sample_nrm, n = 30)
  tot <- estimate_total(s$biomass, N = 100)
  expect_equal(tot, mean(s$biomass) * 100, tolerance = 1e-6)
})

test_that("estimate_variance: positive for real data", {
  expect_gt(estimate_variance(sample_nrm$biomass), 0)
})

test_that("estimate_se: positive, with fpc <= without fpc", {
  s   <- srs_sample(sample_nrm, n = 30)
  se_fpc  <- estimate_se(s$biomass, N = 100)
  se_nofpc <- estimate_se(s$biomass)
  expect_gt(se_fpc, 0)
  expect_lte(se_fpc, se_nofpc)
})

test_that("estimate_ci: lower < mean < upper", {
  s  <- srs_sample(sample_nrm, n = 30)
  ci <- estimate_ci(s$biomass, N = 100)
  expect_lt(ci["lower"], ci["mean"])
  expect_gt(ci["upper"], ci["mean"])
})

test_that("estimate_ci: error for invalid conf_level", {
  s <- srs_sample(sample_nrm, n = 20)
  expect_error(estimate_ci(s$biomass, conf_level = 1.5), "conf_level")
})

test_that("ratio_estimator: returns numeric scalar", {
  s <- srs_sample(sample_nrm, n = 30)
  r <- ratio_estimator(s$biomass, s$size, sum(sample_nrm$size))
  expect_type(r, "double")
  expect_length(r, 1L)
})

test_that("ratio_estimator: error when x sums to zero", {
  s <- srs_sample(sample_nrm, n = 30)
  expect_error(ratio_estimator(s$biomass, rep(0, nrow(s)), 1000),
               "Sum of 'x'")
})

test_that("regression_estimator: returns numeric scalar", {
  s <- srs_sample(sample_nrm, n = 30)
  r <- regression_estimator(s$biomass, s$size, mean(sample_nrm$size))
  expect_type(r, "double")
  expect_length(r, 1L)
})

test_that("ht_estimator: positive result for positive y/pi", {
  pps <- pps_sample(sample_nrm, "size", n = 20)
  ht  <- ht_estimator(pps$biomass, pps$.inclusion_prob)
  expect_gt(ht, 0)
})

test_that("ht_estimator: error for pi out of range", {
  expect_error(ht_estimator(1:5, c(0.2, 0.3, 0, 0.4, 0.5)), "pi.*0.*1")
})

test_that("ht_variance: non-negative result", {
  pps <- pps_sample(sample_nrm, "size", n = 20)
  v   <- ht_variance(pps$biomass, pps$.inclusion_prob)
  expect_gte(v, 0)
})

test_that("stratified_estimator: returns numeric scalar", {
  st  <- stratified_sample(sample_nrm, "strata", 5)
  N_h <- table(sample_nrm$strata)
  est <- stratified_estimator(st$biomass, st$strata, N_h)
  expect_type(est, "double")
  expect_length(est, 1L)
})

# ── NRM Utilities ─────────────────────────────────────────────────────────────

test_that("biomass_estimate: list with correct elements", {
  s   <- srs_sample(sample_nrm, n = 30)
  res <- biomass_estimate(s, "biomass", area = 1000)
  expect_named(res, c("mean_biomass", "total_biomass", "se", "n"))
  expect_gt(res$total_biomass, 0)
})

test_that("biomass_estimate: total = mean * area", {
  s   <- srs_sample(sample_nrm, n = 30)
  res <- biomass_estimate(s, "biomass", area = 500)
  expect_equal(res$total_biomass, res$mean_biomass * 500, tolerance = 1e-3)
})

test_that("soil_loss_estimate: list with positive values", {
  s   <- srs_sample(sample_nrm, n = 25)
  res <- soil_loss_estimate(s, "soil_loss", area = 500)
  expect_gt(res$total_loss, 0)
})

test_that("carbon_stock_estimate: carbon <= biomass", {
  s   <- srs_sample(sample_nrm, n = 30)
  res <- carbon_stock_estimate(s, "biomass", area = 1000)
  expect_lt(res$total_carbon, res$total_biomass)
})

test_that("carbon_stock_estimate: custom fraction", {
  s <- srs_sample(sample_nrm, n = 30)
  r1 <- carbon_stock_estimate(s, "biomass", area = 1000, carbon_fraction = 0.5)
  r2 <- carbon_stock_estimate(s, "biomass", area = 1000, carbon_fraction = 0.4)
  expect_gt(r1$total_carbon, r2$total_carbon)
})

test_that("plot_summary: data.frame with correct columns", {
  s   <- srs_sample(sample_nrm, n = 30)
  res <- plot_summary(s, vars = c("biomass", "soil_loss"))
  expect_s3_class(res, "data.frame")
  expect_equal(nrow(res), 2L)
  expect_true(all(c("variable", "n", "mean", "sd", "min", "max") %in% names(res)))
})

test_that("plot_summary: error for missing column", {
  expect_error(plot_summary(sample_nrm, vars = "nonexistent"), "not found")
})

test_that("sampling_efficiency: RE positive and named", {
  y1 <- srs_sample(sample_nrm, 20)$biomass
  y2 <- srs_sample(sample_nrm, 40)$biomass
  re <- sampling_efficiency(y1, y2, N = 100)
  expect_named(re, c("var_design1", "var_design2", "relative_efficiency"))
  expect_gt(re["relative_efficiency"], 0)
})

# ── Unit conversion & edge cases ──────────────────────────────────────────────

test_that("estimate_mean: handles NA values", {
  y <- c(1, 2, NA, 4, 5)
  expect_equal(estimate_mean(y), mean(c(1, 2, 4, 5)))
})

test_that("estimate_total: error for non-positive N", {
  expect_error(estimate_total(1:10, N = -5), "positive")
})

test_that("purposive_sample: multi-condition works", {
  s <- purposive_sample(sample_nrm,
                        "biomass > 20 & strata == 'forest'")
  expect_true(all(s$biomass > 20))
  expect_true(all(s$strata == "forest"))
})

test_that("regression_estimator: error for length mismatch", {
  expect_error(regression_estimator(1:10, 1:5, X_mean = 50),
               "same length")
})

test_that("srs_sample: rownames are reset", {
  s <- srs_sample(sample_nrm, n = 20)
  expect_equal(rownames(s), as.character(seq_len(nrow(s))))
})