# timeAverage tests
# Use a small slice for speed; one full year gives clean monthly/annual rows
dat <- selectByDate(mydata, year = 2003)
dat1 <- selectByDate(mydata, year = 2003, month = 1)

# Shared results — most tests reuse these
ta_day <- timeAverage(dat, avg.time = "day", progress = FALSE)
ta_month <- timeAverage(dat, avg.time = "month", progress = FALSE)
ta_year <- timeAverage(dat, avg.time = "year", progress = FALSE)

# --- Output structure --------------------------------------------------------

test_that("timeAverage returns a data frame (tibble) with a date column", {
  expect_s3_class(ta_day, "data.frame")
  expect_true("date" %in% names(ta_day))
  expect_s3_class(ta_day$date, "POSIXct")
})

test_that("timeAverage preserves numeric pollutant columns", {
  expect_true(all(c("no2", "o3", "nox", "ws", "wd") %in% names(ta_day)))
})

# --- Row counts --------------------------------------------------------------

test_that("avg.time = 'day' returns one row per day", {
  expect_equal(nrow(ta_day), 365L)
})

test_that("avg.time = 'month' returns one row per month", {
  expect_equal(nrow(ta_month), 12L)
})

test_that("avg.time = 'year' returns one row", {
  expect_equal(nrow(ta_year), 1L)
})

test_that("avg.time = '2 month' returns 6 rows for a full year", {
  result <- timeAverage(dat, avg.time = "2 month", progress = FALSE)
  expect_equal(nrow(result), 6L)
})

# --- Aggregation correctness -------------------------------------------------

test_that("daily mean is between hourly min and max", {
  safe_stat <- function(fn, x) {
    if (all(is.na(x))) {
      NA_real_
    } else {
      fn(x, na.rm = TRUE)
    }
  }
  daily_min <- tapply(
    dat$no2,
    lubridate::date(dat$date),
    \(x) safe_stat(min, x)
  )
  daily_max <- tapply(
    dat$no2,
    lubridate::date(dat$date),
    \(x) safe_stat(max, x)
  )
  non_na <- !is.na(ta_day$no2)

  expect_true(all(ta_day$no2[non_na] >= daily_min[non_na] - 1e-9))
  expect_true(all(ta_day$no2[non_na] <= daily_max[non_na] + 1e-9))
})

test_that("annual mean matches a direct calculation", {
  expected <- mean(dat$no2, na.rm = TRUE)
  expect_equal(ta_year$no2, expected, tolerance = 1e-6)
})

# --- statistic argument ------------------------------------------------------

test_that("statistic = 'max' returns values >= statistic = 'mean'", {
  ta_max <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "max",
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_max$no2)
  expect_true(all(ta_max$no2[non_na] >= ta_month$no2[non_na]))
})

test_that("statistic = 'min' returns values <= statistic = 'mean'", {
  ta_min <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "min",
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_min$no2)
  expect_true(all(ta_min$no2[non_na] <= ta_month$no2[non_na]))
})

test_that("statistic = 'sum' exceeds statistic = 'mean' for positive values", {
  ta_sum <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "sum",
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_sum$no2)
  expect_true(all(ta_sum$no2[non_na] >= ta_month$no2[non_na]))
})

test_that("statistic = 'frequency' returns integer counts <= hours in period", {
  ta_freq <- timeAverage(
    dat1,
    avg.time = "day",
    statistic = "frequency",
    progress = FALSE
  )
  expect_true(all(ta_freq$no2 <= 24, na.rm = TRUE))
  expect_true(all(ta_freq$no2 >= 0, na.rm = TRUE))
})

test_that("statistic = 'percentile' with percentile = 95 is >= mean", {
  ta_p95 <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "percentile",
    percentile = 95,
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_p95$no2)
  expect_true(all(ta_p95$no2[non_na] >= ta_month$no2[non_na]))
})

# --- Wind direction ----------------------------------------------------------

test_that("wind direction stays in [0, 360]", {
  non_na <- !is.na(ta_day$wd)
  expect_true(all(ta_day$wd[non_na] >= 0))
  expect_true(all(ta_day$wd[non_na] <= 360))
})

test_that("vector-averaged wd handles the 350/10 degree case correctly", {
  # Exact 350 + 10 average should be 0 (or 360), not 180
  two_hours <- data.frame(
    date = as.POSIXct(c("2023-01-01 00:00", "2023-01-01 01:00"), tz = "GMT"),
    wd = c(350, 10),
    ws = c(2, 2)
  )
  result <- timeAverage(two_hours, avg.time = "day", progress = FALSE)
  # The result should be close to 0 or 360, not near 180
  avg_wd <- result$wd
  expect_true(avg_wd <= 10 || avg_wd >= 350)
})

# --- data.thresh -------------------------------------------------------------

test_that("data.thresh = 100 produces more NAs than data.thresh = 0", {
  dat_gaps <- dat1
  dat_gaps$no2[sample(nrow(dat_gaps), 300)] <- NA

  res_low <- timeAverage(
    dat_gaps,
    avg.time = "day",
    data.thresh = 0,
    progress = FALSE
  )
  res_high <- timeAverage(
    dat_gaps,
    avg.time = "day",
    data.thresh = 100,
    progress = FALSE
  )

  expect_gte(sum(is.na(res_high$no2)), sum(is.na(res_low$no2)))
})

# --- Expansion (avg.time < data interval) ------------------------------------

test_that("expanding hourly to 15-min produces 4x as many rows", {
  result <- timeAverage(
    dat1,
    avg.time = "15 min",
    fill = TRUE,
    progress = FALSE
  )
  expect_equal(nrow(result), (nrow(dat1) * 4L) - 3)
})

# --- avg.time = 'season' -----------------------------------------------------

test_that("avg.time = 'season' returns 5 rows for a full year", {
  result <- timeAverage(dat, avg.time = "season", progress = FALSE)
  expect_equal(nrow(result), 5L)
})

# --- Input validation --------------------------------------------------------

test_that("invalid statistic raises an error", {
  expect_error(
    timeAverage(
      dat,
      avg.time = "day",
      statistic = "geometric_mean",
      progress = FALSE
    ),
    regexp = "statistic"
  )
})

test_that("data.thresh outside [0, 100] raises an error", {
  expect_error(
    timeAverage(dat, avg.time = "day", data.thresh = 101, progress = FALSE),
    regexp = "capture"
  )
})

test_that("percentile outside [0, 100] raises an error", {
  expect_error(
    timeAverage(
      dat,
      avg.time = "day",
      statistic = "percentile",
      percentile = 110,
      progress = FALSE
    ),
    regexp = "Percentile"
  )
})

test_that("invalid avg.time unit raises an error", {
  expect_error(
    timeAverage(dat, avg.time = "fortnight", progress = FALSE),
    regexp = "not recognised"
  )
})