# Use a small, single-year slice throughout for speed dat <- selectByDate(mydata, year = 2003) # --- Output structure -------------------------------------------------------- test_that("WhittakerSmooth returns a data frame with the same number of rows", { result <- WhittakerSmooth(dat, pollutant = "o3", lambda = 100) expect_s3_class(result, "data.frame") expect_equal(nrow(result), nrow(dat)) }) test_that("WhittakerSmooth adds a smooth_ column by default", { result <- WhittakerSmooth(dat, pollutant = "o3", lambda = 100) expect_true("smooth_o3" %in% names(result)) }) test_that("WhittakerSmooth respects new.name for single pollutant", { result <- WhittakerSmooth( dat, pollutant = "o3", lambda = 100, new.name = "o3_smooth" ) expect_true("o3_smooth" %in% names(result)) expect_false("smooth_o3" %in% names(result)) }) test_that("WhittakerSmooth handles multiple pollutants and returns a column for each", { result <- WhittakerSmooth(dat, pollutant = c("o3", "no2"), lambda = 100) expect_true(all(c("smooth_o3", "smooth_no2") %in% names(result))) }) test_that("WhittakerSmooth with p adds _baseline and _increment columns", { result <- WhittakerSmooth(dat, pollutant = "o3", lambda = 100, p = 0.05) expect_true(all(c("o3_baseline", "o3_increment") %in% names(result))) expect_false("smooth_o3" %in% names(result)) }) test_that("WhittakerSmooth with p and multiple pollutants adds columns for each", { result <- WhittakerSmooth( dat, pollutant = c("o3", "no2"), lambda = 100, p = 0.05 ) expect_true(all( c("o3_baseline", "o3_increment", "no2_baseline", "no2_increment") %in% names(result) )) }) # --- Smoothed values are numerically sensible -------------------------------- test_that("smooth_o3 is numeric with no NaN values", { result <- WhittakerSmooth(dat, pollutant = "o3", lambda = 100) expect_true(is.numeric(result$smooth_o3)) expect_false(any(is.nan(result$smooth_o3))) }) test_that("smoothed values stay within a reasonable range of the input", { result <- WhittakerSmooth(dat, pollutant = "o3", lambda = 100) obs_range <- range(dat$o3, na.rm = TRUE) # Allow modest overshoot from the smoother but not orders of magnitude expect_true(min(result$smooth_o3, na.rm = TRUE) >= obs_range[1] - 5) expect_true(max(result$smooth_o3, na.rm = TRUE) <= obs_range[2] + 5) }) test_that("higher lambda produces a smoother (lower variance) series", { smooth_low <- WhittakerSmooth(dat, pollutant = "o3", lambda = 1)$smooth_o3 smooth_high <- WhittakerSmooth(dat, pollutant = "o3", lambda = 1e6)$smooth_o3 expect_lt(var(smooth_high, na.rm = TRUE), var(smooth_low, na.rm = TRUE)) }) test_that("baseline is always <= observed values (ALS property)", { result <- WhittakerSmooth(dat, pollutant = "o3", lambda = 1000, p = 0.01) obs <- dat$o3 base <- result$o3_baseline # For small p the baseline should hug the bottom: allow only trivial excess non_na <- !is.na(obs) & !is.na(base) excess <- mean(base[non_na] > obs[non_na] + 1) # fraction exceeding obs by >1 expect_lt(excess, 0.05) }) test_that("increment equals observed minus baseline (within floating-point tolerance)", { result <- WhittakerSmooth(dat, pollutant = "o3", lambda = 100, p = 0.05) non_na <- !is.na(dat$o3) & !is.na(result$o3_baseline) expect_equal( result$o3_increment[non_na], (dat$o3 - result$o3_baseline)[non_na], tolerance = 1e-6 ) }) # --- Input validation -------------------------------------------------------- test_that("WhittakerSmooth errors on non-numeric pollutant column", { bad <- dat bad$o3 <- as.character(bad$o3) expect_error( WhittakerSmooth(bad, pollutant = "o3", lambda = 100), regexp = "not numeric" ) }) test_that("WhittakerSmooth errors when p is outside [0, 1]", { expect_error( WhittakerSmooth(dat, pollutant = "o3", lambda = 100, p = 1.5), regexp = "between 0 and 1" ) expect_error( WhittakerSmooth(dat, pollutant = "o3", lambda = 100, p = -0.1), regexp = "between 0 and 1" ) }) test_that("WhittakerSmooth errors when p length mismatches pollutant length", { expect_error( WhittakerSmooth( dat, pollutant = c("o3", "no2"), lambda = 100, p = c(0.01, 0.05, 0.1) ), regexp = "length 1 or the same length" ) }) test_that("WhittakerSmooth warns when new.name length mismatches pollutant length", { expect_warning( WhittakerSmooth( dat, pollutant = c("o3", "no2"), lambda = 100, new.name = "just_one" ), regexp = "does not match" ) }) test_that("WhittakerSmooth warns when new.name is supplied alongside p", { expect_warning( WhittakerSmooth( dat, pollutant = "o3", lambda = 100, p = 0.05, new.name = "ignored" ), regexp = "ignored" ) }) # --- Missing data handling --------------------------------------------------- test_that("WhittakerSmooth handles data with missing values without erroring", { dat_gaps <- dat dat_gaps$o3[sample(nrow(dat_gaps), 200)] <- NA expect_no_error(WhittakerSmooth(dat_gaps, pollutant = "o3", lambda = 100)) }) test_that("WhittakerSmooth with date.pad = TRUE still returns original row count", { # Introduce a gap then pad back dat_gap <- dat[!lubridate::month(dat$date) %in% 6, ] result <- WhittakerSmooth( dat_gap, pollutant = "o3", lambda = 100, date.pad = TRUE ) expect_equal(nrow(result), nrow(dat_gap)) })