## ============================================================ ## NRMSampling -- testthat test suite (edition 3) ## ============================================================ library(testthat) library(NRMSampling) # ---- Shared test data ------------------------------------------------------- data(sample_nrm) set.seed(42) # ── Probability Sampling ───────────────────────────────────────────────────── test_that("srs_sample: returns correct n rows", { s <- srs_sample(sample_nrm, n = 20) expect_equal(nrow(s), 20L) }) test_that("srs_sample: appends .sample_id column", { s <- srs_sample(sample_nrm, n = 10) expect_true(".sample_id" %in% names(s)) }) test_that("srs_sample: no replacement -- unique rows", { s <- srs_sample(sample_nrm, n = 50) expect_equal(length(unique(s$.sample_id)), 50L) }) test_that("srs_sample: with replacement works", { s <- srs_sample(sample_nrm, n = 120, replace = TRUE) expect_equal(nrow(s), 120L) }) test_that("srs_sample: error when n > N without replacement", { expect_error(srs_sample(sample_nrm, n = 200), "exceeds population size") }) test_that("srs_sample: error for non-data-frame input", { expect_error(srs_sample(1:100, n = 10), "data frame") }) test_that("stratified_sample: correct rows per stratum (equal)", { s <- stratified_sample(sample_nrm, "strata", n_per_stratum = 4) expect_true(all(table(s$strata) == 4)) }) test_that("stratified_sample: named vector allocation", { ns <- c(forest = 6, agriculture = 3, grassland = 4) s <- stratified_sample(sample_nrm, "strata", n_per_stratum = ns) expect_equal(as.integer(table(s$strata)["forest"]), 6L) }) test_that("stratified_sample: error for unknown strata_var", { expect_error(stratified_sample(sample_nrm, "nonexistent", 5), "not found") }) test_that("systematic_sample: returns roughly N/k rows", { s <- systematic_sample(sample_nrm, k = 10) expect_true(abs(nrow(s) - 10) <= 1) }) test_that("systematic_sample: all indices are multiples of k apart", { s <- systematic_sample(sample_nrm, k = 5) ids <- s$.sample_id diffs <- diff(ids) expect_true(all(diffs == 5)) }) test_that("cluster_sample: returns only selected clusters", { s <- cluster_sample(sample_nrm, "cluster", n_clusters = 3) expect_equal(length(unique(s$cluster)), 3L) }) test_that("cluster_sample: error when n_clusters exceeds clusters", { expect_error(cluster_sample(sample_nrm, "cluster", n_clusters = 50), "exceeds") }) test_that("pps_sample: correct sample size", { s <- pps_sample(sample_nrm, "size", n = 15) expect_equal(nrow(s), 15L) }) test_that("pps_sample: appends .inclusion_prob", { s <- pps_sample(sample_nrm, "size", n = 10) expect_true(".inclusion_prob" %in% names(s)) expect_true(all(s$.inclusion_prob > 0)) }) test_that("pps_sample: error for non-positive size values", { bad <- sample_nrm bad$size[1] <- -1 expect_error(pps_sample(bad, "size", n = 5), "strictly positive") }) # ── Non-Probability Sampling ────────────────────────────────────────────────── test_that("convenience_sample: returns first n rows", { s <- convenience_sample(sample_nrm, n = 15) expect_equal(nrow(s), 15L) expect_equal(s$plot_id, sample_nrm$plot_id[1:15]) }) test_that("convenience_sample: error when n > nrow", { expect_error(convenience_sample(sample_nrm, n = 500), "exceeds") }) test_that("purposive_sample: correct filtering", { s <- purposive_sample(sample_nrm, "biomass > 40") expect_true(all(s$biomass > 40)) }) test_that("purposive_sample: zero rows for impossible condition", { s <- purposive_sample(sample_nrm, "biomass > 1000") expect_equal(nrow(s), 0L) }) test_that("purposive_sample: error for invalid condition", { expect_error(purposive_sample(sample_nrm, "nonexistent_col > 5"), "Invalid condition") }) test_that("quota_sample: respects quota size", { s <- quota_sample(sample_nrm, "strata", quota = 4) expect_true(all(table(s$strata) <= 4)) }) # ── Estimation ──────────────────────────────────────────────────────────────── test_that("estimate_mean: matches base mean", { s <- srs_sample(sample_nrm, n = 30) expect_equal(estimate_mean(s$biomass), mean(s$biomass, na.rm = TRUE)) }) test_that("estimate_total: equals N * mean", { s <- srs_sample(sample_nrm, n = 30) tot <- estimate_total(s$biomass, N = 100) expect_equal(tot, mean(s$biomass) * 100, tolerance = 1e-6) }) test_that("estimate_variance: positive for real data", { expect_gt(estimate_variance(sample_nrm$biomass), 0) }) test_that("estimate_se: positive, with fpc <= without fpc", { s <- srs_sample(sample_nrm, n = 30) se_fpc <- estimate_se(s$biomass, N = 100) se_nofpc <- estimate_se(s$biomass) expect_gt(se_fpc, 0) expect_lte(se_fpc, se_nofpc) }) test_that("estimate_ci: lower < mean < upper", { s <- srs_sample(sample_nrm, n = 30) ci <- estimate_ci(s$biomass, N = 100) expect_lt(ci["lower"], ci["mean"]) expect_gt(ci["upper"], ci["mean"]) }) test_that("estimate_ci: error for invalid conf_level", { s <- srs_sample(sample_nrm, n = 20) expect_error(estimate_ci(s$biomass, conf_level = 1.5), "conf_level") }) test_that("ratio_estimator: returns numeric scalar", { s <- srs_sample(sample_nrm, n = 30) r <- ratio_estimator(s$biomass, s$size, sum(sample_nrm$size)) expect_type(r, "double") expect_length(r, 1L) }) test_that("ratio_estimator: error when x sums to zero", { s <- srs_sample(sample_nrm, n = 30) expect_error(ratio_estimator(s$biomass, rep(0, nrow(s)), 1000), "Sum of 'x'") }) test_that("regression_estimator: returns numeric scalar", { s <- srs_sample(sample_nrm, n = 30) r <- regression_estimator(s$biomass, s$size, mean(sample_nrm$size)) expect_type(r, "double") expect_length(r, 1L) }) test_that("ht_estimator: positive result for positive y/pi", { pps <- pps_sample(sample_nrm, "size", n = 20) ht <- ht_estimator(pps$biomass, pps$.inclusion_prob) expect_gt(ht, 0) }) test_that("ht_estimator: error for pi out of range", { expect_error(ht_estimator(1:5, c(0.2, 0.3, 0, 0.4, 0.5)), "pi.*0.*1") }) test_that("ht_variance: non-negative result", { pps <- pps_sample(sample_nrm, "size", n = 20) v <- ht_variance(pps$biomass, pps$.inclusion_prob) expect_gte(v, 0) }) test_that("stratified_estimator: returns numeric scalar", { st <- stratified_sample(sample_nrm, "strata", 5) N_h <- table(sample_nrm$strata) est <- stratified_estimator(st$biomass, st$strata, N_h) expect_type(est, "double") expect_length(est, 1L) }) # ── NRM Utilities ───────────────────────────────────────────────────────────── test_that("biomass_estimate: list with correct elements", { s <- srs_sample(sample_nrm, n = 30) res <- biomass_estimate(s, "biomass", area = 1000) expect_named(res, c("mean_biomass", "total_biomass", "se", "n")) expect_gt(res$total_biomass, 0) }) test_that("biomass_estimate: total = mean * area", { s <- srs_sample(sample_nrm, n = 30) res <- biomass_estimate(s, "biomass", area = 500) expect_equal(res$total_biomass, res$mean_biomass * 500, tolerance = 1e-3) }) test_that("soil_loss_estimate: list with positive values", { s <- srs_sample(sample_nrm, n = 25) res <- soil_loss_estimate(s, "soil_loss", area = 500) expect_gt(res$total_loss, 0) }) test_that("carbon_stock_estimate: carbon <= biomass", { s <- srs_sample(sample_nrm, n = 30) res <- carbon_stock_estimate(s, "biomass", area = 1000) expect_lt(res$total_carbon, res$total_biomass) }) test_that("carbon_stock_estimate: custom fraction", { s <- srs_sample(sample_nrm, n = 30) r1 <- carbon_stock_estimate(s, "biomass", area = 1000, carbon_fraction = 0.5) r2 <- carbon_stock_estimate(s, "biomass", area = 1000, carbon_fraction = 0.4) expect_gt(r1$total_carbon, r2$total_carbon) }) test_that("plot_summary: data.frame with correct columns", { s <- srs_sample(sample_nrm, n = 30) res <- plot_summary(s, vars = c("biomass", "soil_loss")) expect_s3_class(res, "data.frame") expect_equal(nrow(res), 2L) expect_true(all(c("variable", "n", "mean", "sd", "min", "max") %in% names(res))) }) test_that("plot_summary: error for missing column", { expect_error(plot_summary(sample_nrm, vars = "nonexistent"), "not found") }) test_that("sampling_efficiency: RE positive and named", { y1 <- srs_sample(sample_nrm, 20)$biomass y2 <- srs_sample(sample_nrm, 40)$biomass re <- sampling_efficiency(y1, y2, N = 100) expect_named(re, c("var_design1", "var_design2", "relative_efficiency")) expect_gt(re["relative_efficiency"], 0) }) # ── Unit conversion & edge cases ────────────────────────────────────────────── test_that("estimate_mean: handles NA values", { y <- c(1, 2, NA, 4, 5) expect_equal(estimate_mean(y), mean(c(1, 2, 4, 5))) }) test_that("estimate_total: error for non-positive N", { expect_error(estimate_total(1:10, N = -5), "positive") }) test_that("purposive_sample: multi-condition works", { s <- purposive_sample(sample_nrm, "biomass > 20 & strata == 'forest'") expect_true(all(s$biomass > 20)) expect_true(all(s$strata == "forest")) }) test_that("regression_estimator: error for length mismatch", { expect_error(regression_estimator(1:10, 1:5, X_mean = 50), "same length") }) test_that("srs_sample: rownames are reset", { s <- srs_sample(sample_nrm, n = 20) expect_equal(rownames(s), as.character(seq_len(nrow(s)))) })