## Tests for agriDQ package ## All tests are self-contained and run in < 5 seconds total # ============================================================ # Outlier detection # ============================================================ test_that("check_outliers returns correct S3 class", { x <- make_yield_outlier() result <- check_outliers(x, method = c("iqr", "zscore", "hampel")) expect_s3_class(result, "agriDQ_outlier") }) test_that("check_outliers flags a known extreme outlier by consensus", { x <- c(rep(4, 19), 50) result <- check_outliers(x, method = c("iqr", "zscore", "hampel")) expect_true(result$flags$consensus[20]) expect_equal(result$n_flagged, 1L) }) test_that("check_outliers produces <= 2 false positives on clean normal data", { set.seed(99) x <- rnorm(40, 4, 0.3) result <- check_outliers(x, method = "iqr") expect_lte(result$n_flagged, 2L) }) test_that("check_outliers stops on non-numeric input", { expect_error(check_outliers(c("a", "b", "c")), regexp = "numeric") }) test_that("check_outliers stops when labels length mismatches x", { expect_error(check_outliers(1:10, labels = letters[1:5]), regexp = "length") }) test_that("check_outliers_mv returns correct class and flags outlier", { df <- data.frame(y1 = c(rep(4, 20), 15), y2 = c(rep(80, 20), 10)) result <- check_outliers_mv(df) expect_s3_class(result, "agriDQ_mout") expect_gte(result$n_flagged, 1L) }) test_that("check_outliers_mv stops with fewer than 2 numeric columns", { df <- data.frame(y = 1:10) expect_error(check_outliers_mv(df), regexp = "2 numeric") }) # ============================================================ # Missing data # ============================================================ test_that("check_missing returns correct class and detects NAs", { df <- data.frame(y = c(1, 2, NA, 4, 5), x = c(NA, 2, 3, 4, 5)) result <- check_missing(df, plot = FALSE) expect_s3_class(result, "agriDQ_missing") expect_gt(result$total_pct_miss, 0) }) test_that("check_missing col_summary has required columns", { df <- data.frame(a = c(1, NA), b = c(2, 3)) result <- check_missing(df, plot = FALSE) expect_true(all(c("variable", "n_missing", "pct_missing") %in% names(result$col_summary))) }) test_that("check_missing reports zero missing correctly", { df <- data.frame(a = 1:5, b = 6:10) result <- check_missing(df, plot = FALSE) expect_equal(result$total_pct_miss, 0) }) test_that("classify_missing returns a data frame with mechanism column", { set.seed(3) df <- data.frame(y = c(NA, rnorm(9)), x = rnorm(10)) result <- classify_missing(df) expect_s3_class(result, "data.frame") expect_true("mechanism" %in% names(result)) }) # ============================================================ # Normality # ============================================================ test_that("check_normality returns correct class", { set.seed(42) result <- check_normality(rnorm(30, 4, 0.5), plot = FALSE, tests = "shapiro") expect_s3_class(result, "agriDQ_normality") }) test_that("check_normality passes on clearly normal data", { set.seed(42) result <- check_normality(rnorm(50, 4, 0.5), plot = FALSE, tests = c("shapiro", "anderson")) expect_equal(result$consensus, "pass") }) test_that("check_normality warns/fails on clearly non-normal data", { result <- check_normality(c(rep(1, 20), rep(15, 3)), plot = FALSE, tests = c("shapiro", "anderson")) expect_true(result$consensus %in% c("warning", "fail")) }) test_that("check_normality descriptives are computed correctly", { x <- c(1, 2, 3, 4, 5) result <- check_normality(x, plot = FALSE, tests = "shapiro") expect_equal(result$n, 5L) expect_equal(result$descriptives$mean, 3) expect_equal(result$descriptives$median, 3) }) # ============================================================ # Homogeneity of variance # ============================================================ test_that("check_homogeneity passes for equal variances", { set.seed(5) y <- c(rnorm(10, 4, 0.5), rnorm(10, 4, 0.5), rnorm(10, 4, 0.5)) grp <- rep(c("T1", "T2", "T3"), each = 10) result <- check_homogeneity(y, grp) expect_s3_class(result, "agriDQ_homogeneity") expect_equal(result$consensus, "pass") }) test_that("check_homogeneity fails for strongly unequal variances", { y <- c(rnorm(10, 4, 0.1), rnorm(10, 4, 5), rnorm(10, 4, 0.1)) grp <- rep(c("T1", "T2", "T3"), each = 10) result <- check_homogeneity(y, grp) expect_true(result$consensus %in% c("warning", "fail")) }) test_that("check_homogeneity stops with mismatched lengths", { expect_error(check_homogeneity(1:10, rep("A", 5)), regexp = "length") }) # ============================================================ # Independence # ============================================================ test_that("check_independence passes on random residuals", { set.seed(7) result <- check_independence(rnorm(30), plot = FALSE) expect_s3_class(result, "agriDQ_independence") expect_equal(result$consensus, "pass") }) test_that("check_independence detects strong autocorrelation", { result <- check_independence(cumsum(rnorm(50)), plot = FALSE) expect_true(result$consensus %in% c("warning", "fail")) }) # ============================================================ # Experimental design # ============================================================ test_that("check_design passes on a perfect RCBD", { df <- make_rcbd(t = 4, b = 3) result <- check_design(df, "treatment", "block", "yield", "RCBD") expect_s3_class(result, "agriDQ_design") expect_equal(result$results$replication_balance$status, "pass") }) test_that("check_design detects unbalanced replications", { df <- data.frame( treatment = c(rep("T1", 3), rep("T2", 2), rep("T3", 4)), yield = rnorm(9, 4, 0.5) ) result <- check_design(df, "treatment", response = "yield", design = "CRD") expect_false(result$results$replication_balance$status == "pass") }) test_that("check_design stops when response column is missing", { df <- make_rcbd() expect_error(check_design(df, "treatment", "block", "nonexistent"), regexp = "valid column") }) # ============================================================ # Qualitative checks # ============================================================ test_that("check_qualitative detects case inconsistency", { df <- data.frame(trt = c("T1", "t1", "T2", "T2"), stringsAsFactors = FALSE) result <- check_qualitative(df) expect_s3_class(result, "agriDQ_qualitative") expect_gt(result$n_issues, 0L) }) test_that("check_qualitative passes on clean data", { df <- data.frame(trt = c("T1", "T1", "T2", "T2", "T3", "T3"), stringsAsFactors = FALSE) result <- check_qualitative(df) expect_equal(result$n_issues, 0L) }) test_that("standardise_labels trims whitespace and collapses spaces", { df <- data.frame(v = c(" T1 ", "T1", " T2"), stringsAsFactors = FALSE) out <- standardise_labels(df) expect_true(all(out$v == trimws(out$v))) expect_false(any(grepl(" ", out$v, fixed = TRUE))) }) # ============================================================ # Full pipeline # ============================================================ test_that("run_dq_pipeline returns correct class and non-empty summary", { data(agri_trial, package = "agriDQ") result <- run_dq_pipeline(agri_trial, response = "yield", treatment = "treatment", block = "block", plot = FALSE) expect_s3_class(result, "agriDQ_pipeline") expect_true(nrow(result$summary) > 0L) expect_true(all(c("module","test","statistic","p_value","status") %in% names(result$summary))) }) test_that("generate_dq_report creates a valid HTML file", { data(agri_trial, package = "agriDQ") pl <- run_dq_pipeline(agri_trial, response = "yield", treatment = "treatment", block = "block", plot = FALSE) tmp <- tempfile(fileext = ".html") on.exit(unlink(tmp)) generate_dq_report(pl, output_file = tmp, title = "Test Report", author = "testthat") expect_true(file.exists(tmp)) expect_gt(file.size(tmp), 2000L) content <- paste(readLines(tmp), collapse = "\n") expect_true(grepl("", content, fixed = TRUE)) expect_true(grepl("agriDQ", content, fixed = TRUE)) })