## Tests for agriDQ package
## All tests are self-contained and run in < 5 seconds total

# ============================================================
# Outlier detection
# ============================================================
test_that("check_outliers returns correct S3 class", {
  x      <- make_yield_outlier()
  result <- check_outliers(x, method = c("iqr", "zscore", "hampel"))
  expect_s3_class(result, "agriDQ_outlier")
})

test_that("check_outliers flags a known extreme outlier by consensus", {
  x      <- c(rep(4, 19), 50)
  result <- check_outliers(x, method = c("iqr", "zscore", "hampel"))
  expect_true(result$flags$consensus[20])
  expect_equal(result$n_flagged, 1L)
})

test_that("check_outliers produces <= 2 false positives on clean normal data", {
  set.seed(99)
  x      <- rnorm(40, 4, 0.3)
  result <- check_outliers(x, method = "iqr")
  expect_lte(result$n_flagged, 2L)
})

test_that("check_outliers stops on non-numeric input", {
  expect_error(check_outliers(c("a", "b", "c")), regexp = "numeric")
})

test_that("check_outliers stops when labels length mismatches x", {
  expect_error(check_outliers(1:10, labels = letters[1:5]), regexp = "length")
})

test_that("check_outliers_mv returns correct class and flags outlier", {
  df     <- data.frame(y1 = c(rep(4, 20), 15),
                       y2 = c(rep(80, 20), 10))
  result <- check_outliers_mv(df)
  expect_s3_class(result, "agriDQ_mout")
  expect_gte(result$n_flagged, 1L)
})

test_that("check_outliers_mv stops with fewer than 2 numeric columns", {
  df <- data.frame(y = 1:10)
  expect_error(check_outliers_mv(df), regexp = "2 numeric")
})

# ============================================================
# Missing data
# ============================================================
test_that("check_missing returns correct class and detects NAs", {
  df     <- data.frame(y = c(1, 2, NA, 4, 5), x = c(NA, 2, 3, 4, 5))
  result <- check_missing(df, plot = FALSE)
  expect_s3_class(result, "agriDQ_missing")
  expect_gt(result$total_pct_miss, 0)
})

test_that("check_missing col_summary has required columns", {
  df     <- data.frame(a = c(1, NA), b = c(2, 3))
  result <- check_missing(df, plot = FALSE)
  expect_true(all(c("variable", "n_missing", "pct_missing") %in%
                    names(result$col_summary)))
})

test_that("check_missing reports zero missing correctly", {
  df     <- data.frame(a = 1:5, b = 6:10)
  result <- check_missing(df, plot = FALSE)
  expect_equal(result$total_pct_miss, 0)
})

test_that("classify_missing returns a data frame with mechanism column", {
  set.seed(3)
  df     <- data.frame(y = c(NA, rnorm(9)), x = rnorm(10))
  result <- classify_missing(df)
  expect_s3_class(result, "data.frame")
  expect_true("mechanism" %in% names(result))
})

# ============================================================
# Normality
# ============================================================
test_that("check_normality returns correct class", {
  set.seed(42)
  result <- check_normality(rnorm(30, 4, 0.5), plot = FALSE,
                             tests = "shapiro")
  expect_s3_class(result, "agriDQ_normality")
})

test_that("check_normality passes on clearly normal data", {
  set.seed(42)
  result <- check_normality(rnorm(50, 4, 0.5), plot = FALSE,
                             tests = c("shapiro", "anderson"))
  expect_equal(result$consensus, "pass")
})

test_that("check_normality warns/fails on clearly non-normal data", {
  result <- check_normality(c(rep(1, 20), rep(15, 3)), plot = FALSE,
                             tests = c("shapiro", "anderson"))
  expect_true(result$consensus %in% c("warning", "fail"))
})

test_that("check_normality descriptives are computed correctly", {
  x      <- c(1, 2, 3, 4, 5)
  result <- check_normality(x, plot = FALSE, tests = "shapiro")
  expect_equal(result$n, 5L)
  expect_equal(result$descriptives$mean, 3)
  expect_equal(result$descriptives$median, 3)
})

# ============================================================
# Homogeneity of variance
# ============================================================
test_that("check_homogeneity passes for equal variances", {
  set.seed(5)
  y      <- c(rnorm(10, 4, 0.5), rnorm(10, 4, 0.5), rnorm(10, 4, 0.5))
  grp    <- rep(c("T1", "T2", "T3"), each = 10)
  result <- check_homogeneity(y, grp)
  expect_s3_class(result, "agriDQ_homogeneity")
  expect_equal(result$consensus, "pass")
})

test_that("check_homogeneity fails for strongly unequal variances", {
  y      <- c(rnorm(10, 4, 0.1), rnorm(10, 4, 5), rnorm(10, 4, 0.1))
  grp    <- rep(c("T1", "T2", "T3"), each = 10)
  result <- check_homogeneity(y, grp)
  expect_true(result$consensus %in% c("warning", "fail"))
})

test_that("check_homogeneity stops with mismatched lengths", {
  expect_error(check_homogeneity(1:10, rep("A", 5)), regexp = "length")
})

# ============================================================
# Independence
# ============================================================
test_that("check_independence passes on random residuals", {
  set.seed(7)
  result <- check_independence(rnorm(30), plot = FALSE)
  expect_s3_class(result, "agriDQ_independence")
  expect_equal(result$consensus, "pass")
})

test_that("check_independence detects strong autocorrelation", {
  result <- check_independence(cumsum(rnorm(50)), plot = FALSE)
  expect_true(result$consensus %in% c("warning", "fail"))
})

# ============================================================
# Experimental design
# ============================================================
test_that("check_design passes on a perfect RCBD", {
  df     <- make_rcbd(t = 4, b = 3)
  result <- check_design(df, "treatment", "block", "yield", "RCBD")
  expect_s3_class(result, "agriDQ_design")
  expect_equal(result$results$replication_balance$status, "pass")
})

test_that("check_design detects unbalanced replications", {
  df     <- data.frame(
    treatment = c(rep("T1", 3), rep("T2", 2), rep("T3", 4)),
    yield     = rnorm(9, 4, 0.5)
  )
  result <- check_design(df, "treatment", response = "yield", design = "CRD")
  expect_false(result$results$replication_balance$status == "pass")
})

test_that("check_design stops when response column is missing", {
  df <- make_rcbd()
  expect_error(check_design(df, "treatment", "block", "nonexistent"),
               regexp = "valid column")
})

# ============================================================
# Qualitative checks
# ============================================================
test_that("check_qualitative detects case inconsistency", {
  df     <- data.frame(trt = c("T1", "t1", "T2", "T2"),
                       stringsAsFactors = FALSE)
  result <- check_qualitative(df)
  expect_s3_class(result, "agriDQ_qualitative")
  expect_gt(result$n_issues, 0L)
})

test_that("check_qualitative passes on clean data", {
  df     <- data.frame(trt = c("T1", "T1", "T2", "T2", "T3", "T3"),
                       stringsAsFactors = FALSE)
  result <- check_qualitative(df)
  expect_equal(result$n_issues, 0L)
})

test_that("standardise_labels trims whitespace and collapses spaces", {
  df  <- data.frame(v = c(" T1 ", "T1", "  T2"), stringsAsFactors = FALSE)
  out <- standardise_labels(df)
  expect_true(all(out$v == trimws(out$v)))
  expect_false(any(grepl("  ", out$v, fixed = TRUE)))
})

# ============================================================
# Full pipeline
# ============================================================
test_that("run_dq_pipeline returns correct class and non-empty summary", {
  data(agri_trial, package = "agriDQ")
  result <- run_dq_pipeline(agri_trial, response = "yield",
                             treatment = "treatment", block = "block",
                             plot = FALSE)
  expect_s3_class(result, "agriDQ_pipeline")
  expect_true(nrow(result$summary) > 0L)
  expect_true(all(c("module","test","statistic","p_value","status") %in%
                    names(result$summary)))
})

test_that("generate_dq_report creates a valid HTML file", {
  data(agri_trial, package = "agriDQ")
  pl  <- run_dq_pipeline(agri_trial, response = "yield",
                          treatment = "treatment", block = "block",
                          plot = FALSE)
  tmp <- tempfile(fileext = ".html")
  on.exit(unlink(tmp))
  generate_dq_report(pl, output_file = tmp,
                     title  = "Test Report",
                     author = "testthat")
  expect_true(file.exists(tmp))
  expect_gt(file.size(tmp), 2000L)
  content <- paste(readLines(tmp), collapse = "\n")
  expect_true(grepl("<!DOCTYPE html>", content, fixed = TRUE))
  expect_true(grepl("agriDQ", content, fixed = TRUE))
})