# tests/testthat/test-data_prep.R test_that("clean_agri_data removes NA rows", { df <- data.frame(yield = c(1, 2, NA, 4), block = c("A","A","B","B")) out <- clean_agri_data(df, yield_col = "yield") expect_equal(nrow(out), 3L) expect_equal(attr(out, "n_removed"), 1L) }) test_that("clean_agri_data returns agriData class", { df <- data.frame(yield = c(2.1, 3.4, 2.8)) out <- clean_agri_data(df) expect_s3_class(out, "agriData") }) test_that("clean_agri_data flags IQR outliers", { df <- data.frame(yield = c(3, 3.1, 3.2, 3.1, 100)) out <- clean_agri_data(df, flag_outliers = TRUE) expect_true(".outlier" %in% names(out)) expect_true(out$.outlier[5]) expect_false(out$.outlier[1]) }) test_that("clean_agri_data errors on missing column", { df <- data.frame(x = 1:5) expect_error(clean_agri_data(df, yield_col = "yield"), "not found") }) test_that("yield_normalize zscore produces mean≈0, sd≈1", { df <- data.frame(yield = c(10, 20, 30, 40, 50)) out <- yield_normalize(df, yield_col = "yield", method = "zscore") expect_true("yield_norm" %in% names(out)) expect_lt(abs(mean(out$yield_norm)), 1e-10) expect_lt(abs(sd(out$yield_norm) - 1), 1e-10) }) test_that("yield_normalize minmax bounds output to [0,1]", { df <- data.frame(yield = c(5, 10, 15, 20)) out <- yield_normalize(df, method = "minmax") expect_equal(min(out$yield_norm), 0) expect_equal(max(out$yield_norm), 1) }) test_that("yield_normalize respects group_by", { df <- data.frame(yield = c(1,2,10,20), site = c("A","A","B","B")) out <- yield_normalize(df, group_by = "site", method = "minmax") grp_a <- out$yield_norm[out$site == "A"] grp_b <- out$yield_norm[out$site == "B"] expect_equal(min(grp_a), 0); expect_equal(max(grp_a), 1) expect_equal(min(grp_b), 0); expect_equal(max(grp_b), 1) }) test_that("outlier_flag IQR method appends correct column", { df <- data.frame(x = c(1,2,2,2,2,100)) out <- outlier_flag(df, col = "x", method = "iqr") expect_true("x_outlier" %in% names(out)) expect_true(out$x_outlier[6]) }) test_that("outlier_flag zscore method works", { df <- data.frame(x = c(rep(5,20), 100)) out <- outlier_flag(df, col = "x", method = "zscore") expect_true(out$x_outlier[21]) }) test_that("outlier_flag modz method works", { df <- data.frame(x = c(rep(5,20), 200)) out <- outlier_flag(df, col = "x", method = "modz") expect_true(out$x_outlier[21]) }) test_that("print.agriData outputs without error", { df <- clean_agri_data(data.frame(yield = 1:10)) expect_output(print(df), "agriData") }) test_that("summary.agriData outputs without error", { df <- clean_agri_data(data.frame(yield = 1:10)) expect_output(summary(df), "agriData summary") })