# Tests for i_detrender()
# No skip_on_cran() needed — only lm() fitting, which is fast.

# ── Shared helper ──────────────────────────────────────────────────────────────
# Five subjects covering the main filter cases:
#   "valid"      — 25 obs, normal variance, detrends cleanly
#   "too_few"    — 5 obs, below default min_n_subject = 20
#   "no_var"     — 25 obs, constant x (pre-detrend var = 0)
#   "pure_trend" — 25 obs, perfect linear x (post-detrend var = 0)
#   "all_na"     — 25 obs, all-NA x

make_det_df <- function() {
  set.seed(1)
  rbind(
    data.frame(id = "valid",      time = seq_len(25), x = rnorm(25),               stringsAsFactors = FALSE),
    data.frame(id = "too_few",    time = seq_len(5),  x = rnorm(5),                stringsAsFactors = FALSE),
    data.frame(id = "no_var",     time = seq_len(25), x = rep(3.0, 25),            stringsAsFactors = FALSE),
    data.frame(id = "pure_trend", time = seq_len(25), x = as.numeric(seq_len(25)), stringsAsFactors = FALSE),
    data.frame(id = "all_na",     time = seq_len(25), x = rep(NA_real_, 25),       stringsAsFactors = FALSE)
  )
}


# ══════════════════════════════════════════════════════════════════════════════
# Validation
# ══════════════════════════════════════════════════════════════════════════════

test_that("misspelled col triggers error naming the bad variable", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "not_a_col", id_var = "id", timevar = "time"),
    regexp = "not_a_col"
  )
})

test_that("misspelled id_var triggers error naming the bad variable", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "x", id_var = "not_an_id", timevar = "time"),
    regexp = "not_an_id"
  )
})

test_that("misspelled timevar triggers error naming the bad variable", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "not_a_time"),
    regexp = "not_a_time"
  )
})

test_that("error message includes 'Cannot find required variables'", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "oops", id_var = "id", timevar = "time"),
    regexp = "Cannot find required variables"
  )
})

test_that("empty cols vector triggers error", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = character(0), id_var = "id", timevar = "time"),
    regexp = "cols"
  )
})

test_that("character column in cols triggers error naming the column", {
  df <- make_det_df()
  df$x <- as.character(df$x)
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "must be numeric"
  )
})

test_that("non-numeric cols error reports the actual class", {
  df <- make_det_df()
  df$x <- factor(df$x)
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "factor"
  )
})

test_that("id_var as a vector triggers error", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "x", id_var = c("id", "id"), timevar = "time"),
    regexp = "id_var"
  )
})

test_that("timevar as a vector triggers error", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = c("time", "time")),
    regexp = "timevar"
  )
})

test_that("min_n_subject = 0 triggers upfront error", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time", min_n_subject = 0),
    regexp = "min_n_subject"
  )
})

test_that("minvar = -1 triggers upfront error", {
  df <- make_det_df()
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time", minvar = -1),
    regexp = "minvar"
  )
})

test_that("non-numeric timevar triggers error naming the class", {
  df <- make_det_df()
  df$time <- as.character(df$time)
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "must be numeric"
  )
})

test_that("Date timevar triggers error suggesting numeric conversion", {
  df <- make_det_df()
  df$time <- as.Date("2024-01-01") + df$time
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "must be numeric"
  )
})

test_that("single NA in timevar triggers error reporting the count", {
  df         <- make_det_df()
  df$time[1] <- NA
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "1 row"
  )
})

test_that("NA in timevar error names the offending variable", {
  df         <- make_det_df()
  df$time[1] <- NA
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "'time'"
  )
})


# ══════════════════════════════════════════════════════════════════════════════
# Output structure
# ══════════════════════════════════════════════════════════════════════════════

test_that("returns a data.frame", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_s3_class(result, "data.frame")
})

test_that("append = TRUE preserves all original columns", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", append = TRUE)
  expect_true(all(c("id", "time", "x") %in% names(result)))
})

test_that("append = TRUE adds exactly one _dt column for one input col", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", append = TRUE)
  expect_true("x_dt" %in% names(result))
  expect_equal(ncol(result), ncol(df) + 1L)
})

test_that("append = TRUE preserves row count", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", append = TRUE)
  expect_equal(nrow(result), nrow(df))
})

test_that("append = FALSE returns only id_var, timevar, and _dt columns", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", append = FALSE)
  expect_equal(sort(names(result)), sort(c("id", "time", "x_dt")))
})

test_that("append = FALSE preserves row count", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", append = FALSE)
  expect_equal(nrow(result), nrow(df))
})

test_that("multiple cols produce a _dt column for each", {
  set.seed(2)
  df     <- make_det_df()
  df$x2  <- c(rnorm(25), rnorm(5), rep(3.0, 25), as.numeric(seq_len(25)), rep(NA_real_, 25))
  result <- i_detrender(df, cols = c("x", "x2"), id_var = "id", timevar = "time")
  expect_true("x_dt"  %in% names(result))
  expect_true("x2_dt" %in% names(result))
  expect_equal(ncol(result), ncol(df) + 2L)
})

test_that("row order of input is preserved in output", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_equal(result$id, df$id)
})

test_that("pre-existing grouping is removed without error", {
  df     <- dplyr::group_by(make_det_df(), id)
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_s3_class(result, "data.frame")
  expect_equal(nrow(result), nrow(make_det_df()))
})

test_that("output df has no residual grouping", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_false(dplyr::is_grouped_df(result))
})


# ══════════════════════════════════════════════════════════════════════════════
# Per-subject filtering
# ══════════════════════════════════════════════════════════════════════════════

test_that("all-NA column within a subject produces all NA in _dt", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_true(all(is.na(result$x_dt[result$id == "all_na"])))
})

test_that("subject below min_n_subject produces all NA in _dt", {
  df     <- make_det_df()   # "too_few" has 5 obs, default min_n_subject = 20
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_true(all(is.na(result$x_dt[result$id == "too_few"])))
})

test_that("subject with pre-detrend variance < minvar produces all NA in _dt", {
  df     <- make_det_df()   # "no_var" has constant x, var = 0 < 0.01
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_true(all(is.na(result$x_dt[result$id == "no_var"])))
})

test_that("subject with post-detrend variance < minvar produces all NA in _dt", {
  df     <- make_det_df()   # "pure_trend": seq(1,25) ~ seq(1,25) → residuals = 0
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_true(all(is.na(result$x_dt[result$id == "pure_trend"])))
})

test_that("valid subject passes all filters and produces non-NA _dt values", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_false(any(is.na(result$x_dt[result$id == "valid"])))
})

test_that("filtering is per-column: one col can be NA while another is valid for the same subject", {
  set.seed(3)
  # Subject with good x1 (normal variance) but constant x2 (zero variance)
  df <- data.frame(
    id   = rep("A", 25),
    time = seq_len(25),
    x1   = rnorm(25),
    x2   = rep(5.0, 25),
    stringsAsFactors = FALSE
  )
  result <- i_detrender(df, cols = c("x1", "x2"), id_var = "id", timevar = "time")
  expect_false(any(is.na(result$x1_dt)))   # x1 detrends cleanly
  expect_true(all(is.na(result$x2_dt)))    # x2 is constant → NA
})

test_that("minvar non-default threshold is respected for pre-detrend variance", {
  set.seed(10)
  df <- rbind(
    data.frame(id = "low_var", time = seq_len(25), x = rnorm(25, sd = 0.08), stringsAsFactors = FALSE),
    data.frame(id = "ok",      time = seq_len(25), x = rnorm(25),            stringsAsFactors = FALSE)
  )
  # sd ≈ 0.08 → var ≈ 0.0064 < default minvar 0.01 → should fail
  result_strict <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", minvar = 0.01)
  expect_true(all(is.na(result_strict$x_dt[result_strict$id == "low_var"])))

  # With minvar = 0.001: same subject passes
  result_loose <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", minvar = 0.001)
  expect_false(any(is.na(result_loose$x_dt[result_loose$id == "low_var"])))
})

test_that("append = FALSE with multiple cols returns id_var, timevar, and all _dt columns", {
  set.seed(7)
  df    <- make_det_df()
  df$x2 <- c(rnorm(25), rnorm(5), rep(3.0, 25), as.numeric(seq_len(25)), rep(NA_real_, 25))
  result <- i_detrender(df, cols = c("x", "x2"), id_var = "id", timevar = "time", append = FALSE)
  expect_equal(sort(names(result)), sort(c("id", "time", "x_dt", "x2_dt")))
})

test_that("min_n_subject threshold is respected with a non-default value", {
  set.seed(4)
  df <- rbind(
    data.frame(id = "borderline", time = seq_len(10), x = rnorm(10), stringsAsFactors = FALSE),
    data.frame(id = "ok",         time = seq_len(25), x = rnorm(25), stringsAsFactors = FALSE)
  )
  # With min_n_subject = 15: "borderline" (10 obs) should fail
  result_strict <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", min_n_subject = 15)
  expect_true(all(is.na(result_strict$x_dt[result_strict$id == "borderline"])))

  # With min_n_subject = 5: "borderline" (10 obs) should pass
  result_loose <- i_detrender(df, cols = "x", id_var = "id", timevar = "time", min_n_subject = 5)
  expect_false(any(is.na(result_loose$x_dt[result_loose$id == "borderline"])))
})


# ══════════════════════════════════════════════════════════════════════════════
# Statistical correctness
# ══════════════════════════════════════════════════════════════════════════════

test_that("residuals of valid subject have mean close to 0", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  x_dt_valid <- result$x_dt[result$id == "valid"]
  expect_equal(mean(x_dt_valid, na.rm = TRUE), 0, tolerance = 1e-10)
})

test_that("residuals match manual lm() calculation for valid subject", {
  df     <- make_det_df()
  valid  <- df[df$id == "valid", ]
  manual <- stats::residuals(stats::lm(x ~ time, data = valid, na.action = stats::na.exclude))

  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_equal(
    as.numeric(result$x_dt[result$id == "valid"]),
    as.numeric(manual),
    tolerance = 1e-10
  )
})

test_that("detrending one subject does not alter another subject's residuals", {
  set.seed(5)
  df <- rbind(
    data.frame(id = "A", time = seq_len(25), x = rnorm(25), stringsAsFactors = FALSE),
    data.frame(id = "B", time = seq_len(25), x = rnorm(25), stringsAsFactors = FALSE)
  )
  result   <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  manual_B <- stats::residuals(
    stats::lm(x ~ time, data = df[df$id == "B", ], na.action = stats::na.exclude)
  )
  expect_equal(
    as.numeric(result$x_dt[result$id == "B"]),
    as.numeric(manual_B),
    tolerance = 1e-10
  )
})

test_that("NA values within an otherwise-varying series are preserved as NA in _dt", {
  set.seed(6)
  df <- data.frame(
    id   = rep("A", 25),
    time = seq_len(25),
    x    = c(NA, rnorm(24)),   # first obs is NA, rest are normal
    stringsAsFactors = FALSE
  )
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_true(is.na(result$x_dt[1]))          # NA position preserved
  expect_false(any(is.na(result$x_dt[-1])))   # other positions filled
})

test_that("scattered interior NAs are preserved at correct positions in _dt", {
  set.seed(99)
  x_raw        <- rnorm(25)
  x_raw[c(5, 12, 20)] <- NA
  df <- data.frame(id = "A", time = seq_len(25), x = x_raw,
                   stringsAsFactors = FALSE)
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")
  expect_true(all(is.na(result$x_dt[c(5, 12, 20)])))
  expect_false(any(is.na(result$x_dt[-c(5, 12, 20)])))
})

test_that("all-NA subject produces all-NA output regardless of min_n_subject threshold", {
  df <- data.frame(id = "allna", time = seq_len(25),
                   x  = rep(NA_real_, 25), stringsAsFactors = FALSE)
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time",
                        min_n_subject = 1)
  expect_true(all(is.na(result$x_dt)))
})

test_that("append = FALSE preserves rows for filtered-out subjects as NA", {
  df     <- make_det_df()
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time",
                        append = FALSE)
  expect_equal(nrow(result), nrow(df))
  expect_true(all(is.na(result$x_dt[result$id == "no_var"])))
})


# ══════════════════════════════════════════════════════════════════════════════
# Verbose
# ══════════════════════════════════════════════════════════════════════════════

test_that("verbose = TRUE emits messages", {
  df   <- make_det_df()
  msgs <- capture_messages(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time", verbose = TRUE)
  )
  expect_gt(length(msgs), 0)
})

test_that("verbose = FALSE emits no messages", {
  df <- make_det_df()
  expect_no_message(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time", verbose = FALSE)
  )
})

test_that("verbose message mentions detrend", {
  df   <- make_det_df()
  msgs <- capture_messages(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time", verbose = TRUE)
  )
  expect_true(any(grepl("(?i)detrend", msgs, perl = TRUE)))
})


# ══════════════════════════════════════════════════════════════════════════════
# Inf / NaN in input
# ══════════════════════════════════════════════════════════════════════════════
# Inf is rejected upfront with an informative error.
# NaN (which is.na treats as NA) is handled normally by na.rm / na.exclude.

test_that("Inf in a column triggers an informative error", {
  set.seed(10)
  df <- rbind(
    data.frame(id = "good",  time = seq_len(25), x = rnorm(25), stringsAsFactors = FALSE),
    data.frame(id = "inf_x", time = seq_len(25), x = c(rnorm(24), Inf), stringsAsFactors = FALSE)
  )
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "Inf"
  )
})

test_that("-Inf in a column triggers an informative error", {
  df <- data.frame(id = "A", time = seq_len(25), x = c(-Inf, rnorm(24)),
                   stringsAsFactors = FALSE)
  expect_error(
    i_detrender(df, cols = "x", id_var = "id", timevar = "time"),
    regexp = "Inf"
  )
})

test_that("NaN in a column is treated like NA (missing value)", {
  set.seed(12)
  df <- data.frame(
    id   = rep("A", 25),
    time = seq_len(25),
    x    = c(NaN, rnorm(24)),
    stringsAsFactors = FALSE
  )
  result <- i_detrender(df, cols = "x", id_var = "id", timevar = "time")

  # NaN position should be NA in the output (na.exclude preserves position)
  expect_true(is.na(result$x_dt[1]))
  # Other positions should be valid residuals
  expect_false(any(is.na(result$x_dt[2:25])))
})