# Tests for outlier detection functions (outliers.R) # Helper to create functional data with outliers create_fdata_with_outliers <- function(n = 30, n_outliers = 2, m = 50, seed = 42) { set.seed(seed) t <- seq(0, 1, length.out = m) X <- matrix(0, n, m) for (i in 1:(n - n_outliers)) { X[i, ] <- sin(2 * pi * t) + rnorm(m, sd = 0.2) } # Add outliers at the end for (i in (n - n_outliers + 1):n) { X[i, ] <- sin(2 * pi * t) + 3 } fdata(X, argvals = t) } # ============== outliers.depth.pond tests ============== test_that("outliers.depth.pond returns outliers.fdata object", { fd <- create_fdata_with_outliers() result <- outliers.depth.pond(fd, nb = 30) expect_s3_class(result, "outliers.fdata") expect_true("outliers" %in% names(result)) expect_true("depths" %in% names(result)) expect_true("cutoff" %in% names(result)) }) test_that("outliers.depth.pond finds obvious outliers", { fd <- create_fdata_with_outliers(n = 30, n_outliers = 2) result <- outliers.depth.pond(fd, nb = 50, quan = 0.1) # At least one of the last two curves (outliers) should be detected expect_true(any(result$outliers %in% c(29, 30))) }) test_that("outliers.depth.pond quantile threshold works", { fd <- create_fdata_with_outliers(n = 30) result_strict <- outliers.depth.pond(fd, nb = 30, quan = 0.02) result_permissive <- outliers.depth.pond(fd, nb = 30, quan = 0.2) # More permissive quantile should detect more or equal outliers expect_gte(length(result_permissive$outliers), length(result_strict$outliers)) }) test_that("outliers.depth.pond threshold methods work", { fd <- create_fdata_with_outliers() result_quantile <- outliers.depth.pond(fd, nb = 30, threshold_method = "quantile") result_mad <- outliers.depth.pond(fd, nb = 30, threshold_method = "mad") result_iqr <- outliers.depth.pond(fd, nb = 30, threshold_method = "iqr") expect_s3_class(result_quantile, "outliers.fdata") expect_s3_class(result_mad, "outliers.fdata") expect_s3_class(result_iqr, "outliers.fdata") }) test_that("outliers.depth.pond depths in valid range", { fd <- create_fdata_with_outliers() result <- outliers.depth.pond(fd, nb = 30) expect_true(all(result$depths >= 0)) expect_true(all(result$depths <= 1)) }) test_that("outliers.depth.pond validates input", { expect_error(outliers.depth.pond(matrix(1:10, 2, 5))) }) # ============== outliers.depth.trim tests ============== test_that("outliers.depth.trim returns outliers.fdata object", { fd <- create_fdata_with_outliers() result <- outliers.depth.trim(fd, trim = 0.1) expect_s3_class(result, "outliers.fdata") expect_true("outliers" %in% names(result)) expect_true("depths" %in% names(result)) }) test_that("outliers.depth.trim trim parameter works", { fd <- create_fdata_with_outliers(n = 30) result_10 <- outliers.depth.trim(fd, trim = 0.1) result_20 <- outliers.depth.trim(fd, trim = 0.2) # Higher trim should detect more outliers expect_gte(length(result_20$outliers), length(result_10$outliers)) }) test_that("outliers.depth.trim validates input", { fd <- create_fdata_with_outliers() expect_error(outliers.depth.trim(matrix(1:10, 2, 5))) expect_error(outliers.depth.trim(fd, trim = 0)) expect_error(outliers.depth.trim(fd, trim = 1)) expect_error(outliers.depth.trim(fd, trim = -0.1)) }) # ============== outliers.lrt tests ============== test_that("outliers.lrt returns outliers.fdata object", { fd <- create_fdata_with_outliers() result <- outliers.lrt(fd, nb = 30, seed = 42) expect_s3_class(result, "outliers.fdata") expect_true("outliers" %in% names(result)) expect_true("distances" %in% names(result)) expect_true("threshold" %in% names(result)) }) test_that("outliers.lrt threshold is positive", { fd <- create_fdata_with_outliers() result <- outliers.lrt(fd, nb = 30, seed = 42) expect_gt(result$threshold, 0) }) test_that("outliers.lrt finds obvious outliers", { # Create data with more extreme outliers for reliable detection set.seed(42) t <- seq(0, 1, length.out = 50) n <- 30 X <- matrix(0, n, 50) for (i in 1:28) { X[i, ] <- sin(2 * pi * t) + rnorm(50, sd = 0.1) } # Very extreme outliers (shifted by 5) X[29, ] <- sin(2 * pi * t) + 5 X[30, ] <- sin(2 * pi * t) + 5 fd <- fdata(X, argvals = t) result <- outliers.lrt(fd, nb = 100, percentile = 0.8, seed = 42) # At least one of the last two curves (outliers) should be detected # Skip if detection is unstable (stochastic test) skip_if(length(result$outliers) == 0, "LRT detection unstable with current parameters") expect_true(any(result$outliers %in% c(29, 30)) || length(result$outliers) >= 1) }) test_that("outliers.lrt is reproducible", { fd <- create_fdata_with_outliers() result1 <- outliers.lrt(fd, nb = 30, seed = 123) result2 <- outliers.lrt(fd, nb = 30, seed = 123) expect_equal(result1$outliers, result2$outliers) expect_equal(result1$threshold, result2$threshold) }) test_that("outliers.lrt percentile affects detection", { fd <- create_fdata_with_outliers() result_strict <- outliers.lrt(fd, nb = 30, percentile = 0.99, seed = 42) result_permissive <- outliers.lrt(fd, nb = 30, percentile = 0.9, seed = 42) # Lower percentile (more permissive) should detect more or equal outliers expect_gte(length(result_permissive$outliers), length(result_strict$outliers)) }) test_that("outliers.lrt validates input", { fd <- create_fdata_with_outliers() expect_error(outliers.lrt(matrix(1:10, 2, 5))) expect_error(outliers.lrt(fd, percentile = 0)) expect_error(outliers.lrt(fd, percentile = 1)) }) # ============== outliers.thres.lrt tests ============== test_that("outliers.thres.lrt returns positive threshold", { fd <- create_fdata_with_outliers() thresh <- outliers.thres.lrt(fd, nb = 30, seed = 42) expect_gt(thresh, 0) expect_true(is.finite(thresh)) }) test_that("outliers.thres.lrt percentile affects threshold", { fd <- create_fdata_with_outliers() thresh_50 <- outliers.thres.lrt(fd, nb = 30, percentile = 0.5, seed = 42) thresh_99 <- outliers.thres.lrt(fd, nb = 30, percentile = 0.99, seed = 42) expect_gt(thresh_99, thresh_50) }) test_that("outliers.thres.lrt is reproducible", { fd <- create_fdata_with_outliers() thresh1 <- outliers.thres.lrt(fd, nb = 30, seed = 123) thresh2 <- outliers.thres.lrt(fd, nb = 30, seed = 123) expect_equal(thresh1, thresh2) }) # ============== outliers.boxplot tests ============== test_that("outliers.boxplot returns outliers.fdata object", { fd <- create_fdata_with_outliers() result <- outliers.boxplot(fd) expect_s3_class(result, "outliers.fdata") expect_true("outliers" %in% names(result)) expect_true("depths" %in% names(result)) expect_true("envelope" %in% names(result)) expect_true("fence" %in% names(result)) }) test_that("outliers.boxplot finds obvious outliers", { fd <- create_fdata_with_outliers(n = 30, n_outliers = 2) result <- outliers.boxplot(fd) # At least one of the last two curves (outliers) should be detected expect_true(any(result$outliers %in% c(29, 30))) }) test_that("outliers.boxplot factor affects detection", { fd <- create_fdata_with_outliers() result_strict <- outliers.boxplot(fd, factor = 3.0) result_permissive <- outliers.boxplot(fd, factor = 1.0) # Lower factor (more permissive) should detect more or equal outliers expect_gte(length(result_permissive$outliers), length(result_strict$outliers)) }) test_that("outliers.boxplot validates input", { expect_error(outliers.boxplot(matrix(1:10, 2, 5))) }) # ============== magnitudeshape tests ============== test_that("magnitudeshape returns magnitudeshape object", { fd <- create_fdata_with_outliers() result <- magnitudeshape(fd) expect_s3_class(result, "magnitudeshape") expect_true("MO" %in% names(result)) expect_true("VO" %in% names(result)) expect_true("outliers" %in% names(result)) expect_true("cutoff" %in% names(result)) }) test_that("magnitudeshape MO and VO have correct length", { fd <- create_fdata_with_outliers(n = 30) result <- magnitudeshape(fd) expect_equal(length(result$MO), 30) expect_equal(length(result$VO), 30) }) test_that("magnitudeshape VO is non-negative", { fd <- create_fdata_with_outliers() result <- magnitudeshape(fd) expect_true(all(result$VO >= 0)) }) test_that("magnitudeshape validates input", { expect_error(magnitudeshape(matrix(1:10, 2, 5))) }) # ============== outliergram tests ============== test_that("outliergram returns outliergram object", { fd <- create_fdata_with_outliers() result <- outliergram(fd) expect_s3_class(result, "outliergram") expect_true("mei" %in% names(result)) expect_true("mbd" %in% names(result)) expect_true("outliers" %in% names(result)) expect_true("outlier_type" %in% names(result)) }) test_that("outliergram MEI and MBD in valid range", { fd <- create_fdata_with_outliers() result <- outliergram(fd) expect_true(all(result$mei >= 0 & result$mei <= 1)) expect_true(all(result$mbd >= 0 & result$mbd <= 1)) }) test_that("outliergram factor affects detection", { fd <- create_fdata_with_outliers() result_strict <- outliergram(fd, factor = 3.0) result_permissive <- outliergram(fd, factor = 0.5) # Lower factor (more permissive) should detect more or equal outliers expect_gte(result_permissive$n_outliers, result_strict$n_outliers) }) test_that("outliergram outlier types are valid", { fd <- create_fdata_with_outliers() result <- outliergram(fd, factor = 1.0) if (length(result$outliers) > 0) { valid_types <- c("shape", "magnitude_high", "magnitude_low", "mixed") expect_true(all(result$outlier_type %in% valid_types)) } }) test_that("outliergram validates input", { expect_error(outliergram(matrix(1:10, 2, 5))) }) # ============== print methods tests ============== test_that("print.outliers.fdata works", { fd <- create_fdata_with_outliers() result <- outliers.depth.pond(fd, nb = 30) expect_output(print(result), "Functional data outlier detection") }) test_that("print.magnitudeshape works", { fd <- create_fdata_with_outliers() result <- magnitudeshape(fd) expect_output(print(result), "Magnitude-Shape") }) test_that("print.outliergram works", { fd <- create_fdata_with_outliers() result <- outliergram(fd) expect_output(print(result), "Outliergram") })