# Tests for pmstandardize()
# No model fitting — all tests run fast; no skip_on_cran() needed.

# ── Shared helper ─────────────────────────────────────────────────────────────
# Three subjects covering the main edge cases:
#   "A" — normal variance (x and y)
#   "B" — constant x (zero variance), normal y
#   "C" — all-NA x, normal y

make_pms_df <- function() {
  data.frame(
    id = rep(c("A", "B", "C"), each = 5),
    x  = c( 1,  2,  3,  4,  5,      # A: mean = 3, sd = sqrt(2.5)
            10, 10, 10, 10, 10,      # B: constant
            NA, NA, NA, NA, NA),     # C: all NA
    y  = c( 5,  4,  3,  2,  1,      # A: normal
             1,  2,  3,  4,  5,      # B: normal
             2,  4,  6,  8, 10),     # C: normal
    stringsAsFactors = FALSE
  )
}


# ══════════════════════════════════════════════════════════════════════════════
# Validation
# ══════════════════════════════════════════════════════════════════════════════

test_that("misspelled col triggers error naming the bad variable", {
  df <- make_pms_df()
  expect_error(
    pmstandardize(df, cols = "not_a_col", id_var = "id"),
    regexp = "not_a_col"
  )
})

test_that("misspelled id_var triggers error naming the bad variable", {
  df <- make_pms_df()
  expect_error(
    pmstandardize(df, cols = "x", id_var = "not_an_id"),
    regexp = "not_an_id"
  )
})

test_that("error message includes 'Cannot find required variables'", {
  df <- make_pms_df()
  expect_error(
    pmstandardize(df, cols = "oops", id_var = "id"),
    regexp = "Cannot find required variables"
  )
})

test_that("id_var as a vector triggers error", {
  df <- make_pms_df()
  expect_error(
    pmstandardize(df, cols = "x", id_var = c("id", "id")),
    regexp = "id_var"
  )
})

test_that("empty cols vector triggers error", {
  df <- make_pms_df()
  expect_error(
    pmstandardize(df, cols = character(0), id_var = "id"),
    regexp = "cols"
  )
})

test_that("character column in cols triggers error naming the column", {
  df <- make_pms_df()
  df$x <- as.character(df$x)
  expect_error(
    pmstandardize(df, cols = "x", id_var = "id"),
    regexp = "must be numeric"
  )
})

test_that("non-numeric cols error reports the actual class", {
  df <- make_pms_df()
  df$x <- factor(df$x)
  expect_error(
    pmstandardize(df, cols = "x", id_var = "id"),
    regexp = "factor"
  )
})

test_that("mixed numeric and non-numeric cols triggers error only for non-numeric", {
  df <- make_pms_df()
  df$x <- as.character(df$x)
  expect_error(
    pmstandardize(df, cols = c("x", "y"), id_var = "id"),
    regexp = "\\bx\\b"
  )
})


# ══════════════════════════════════════════════════════════════════════════════
# Output structure
# ══════════════════════════════════════════════════════════════════════════════

test_that("returns a data.frame", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id")
  expect_s3_class(result, "data.frame")
})

test_that("append = TRUE preserves all original columns", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id", append = TRUE)
  expect_true(all(c("id", "x", "y") %in% names(result)))
})

test_that("append = TRUE adds exactly one _psd column for one input col", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id", append = TRUE)
  expect_true("x_psd" %in% names(result))
  # Total columns = original 3 + 1 new
  expect_equal(ncol(result), ncol(df) + 1L)
})

test_that("append = TRUE preserves row count", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id", append = TRUE)
  expect_equal(nrow(result), nrow(df))
})

test_that("append = FALSE returns only id_var and _psd columns", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id", append = FALSE)
  expect_equal(sort(names(result)), sort(c("id", "x_psd")))
})

test_that("append = FALSE preserves row count", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id", append = FALSE)
  expect_equal(nrow(result), nrow(df))
})

test_that("multiple cols produce a _psd column for each", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = c("x", "y"), id_var = "id")
  expect_true("x_psd" %in% names(result))
  expect_true("y_psd" %in% names(result))
  expect_equal(ncol(result), ncol(df) + 2L)
})

test_that("row order of input is preserved in output", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id")
  expect_equal(result$id, df$id)
})

test_that("pre-existing grouping on input df is handled without error", {
  df     <- dplyr::group_by(make_pms_df(), id)
  result <- pmstandardize(df, cols = "x", id_var = "id")
  expect_s3_class(result, "data.frame")
  expect_equal(nrow(result), 15L)
})

test_that("output df has no residual grouping", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id")
  expect_false(dplyr::is_grouped_df(result))
})


# ══════════════════════════════════════════════════════════════════════════════
# Statistical correctness
# ══════════════════════════════════════════════════════════════════════════════

test_that("within-person mean of z-scores is 0 for normal-variance subjects", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id")

  # Subject A has normal variance — within-person mean must be 0
  x_psd_A <- result$x_psd[result$id == "A"]
  expect_equal(mean(x_psd_A, na.rm = TRUE), 0, tolerance = 1e-10)
})

test_that("within-person SD of z-scores is 1 for normal-variance subjects", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id")

  x_psd_A <- result$x_psd[result$id == "A"]
  expect_equal(sd(x_psd_A, na.rm = TRUE), 1, tolerance = 1e-10)
})

test_that("z-score values match manual calculation", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id")

  x_A        <- df$x[df$id == "A"]
  expected_z <- (x_A - mean(x_A)) / sd(x_A)
  x_psd_A    <- result$x_psd[result$id == "A"]

  expect_equal(x_psd_A, expected_z, tolerance = 1e-10)
})

test_that("constant within-person series returns 0 at observed positions, NA elsewhere", {
  df <- data.frame(
    id = rep(c("A", "B"), each = 5),
    x  = c(1, 2, 3, 4, 5,       # A: normal
           10, 10, NA, 10, 10),  # B: constant with one NA
    stringsAsFactors = FALSE
  )
  result  <- pmstandardize(df, cols = "x", id_var = "id")
  x_psd_B <- result$x_psd[result$id == "B"]

  expect_true(all(x_psd_B[c(1, 2, 4, 5)] == 0))   # observed constant values → 0
  expect_true(is.na(x_psd_B[3]))                    # NA position preserved
})

test_that("all-NA within-person series returns NA", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = "x", id_var = "id")

  x_psd_C <- result$x_psd[result$id == "C"]
  expect_true(all(is.na(x_psd_C)))
})

test_that("person with a single non-NA value returns 0 at non-NA position, NA elsewhere", {
  df <- data.frame(
    id = rep(c("A", "B"), each = 5),
    x  = c(1, 2, 3, 4, 5,      # A: normal
           NA, NA, 7, NA, NA),  # B: exactly one non-NA
    stringsAsFactors = FALSE
  )
  result  <- pmstandardize(df, cols = "x", id_var = "id")
  x_psd_B <- result$x_psd[result$id == "B"]

  expect_equal(x_psd_B[3], 0)                          # the one observed value → 0
  expect_true(all(is.na(x_psd_B[c(1, 2, 4, 5)])))     # NA positions preserved
})

test_that("NA values within an otherwise-varying series are preserved as NA in output", {
  df <- data.frame(
    id = rep("A", 6),
    x  = c(1, NA, 3, 4, 5, 6),
    stringsAsFactors = FALSE
  )
  result <- pmstandardize(df, cols = "x", id_var = "id")

  expect_true(is.na(result$x_psd[2]))           # NA position preserved
  expect_false(any(is.na(result$x_psd[-2])))    # other positions filled
})

test_that("NA values do not contaminate within-person mean and SD calculation", {
  df <- data.frame(
    id = rep("A", 6),
    x  = c(1, NA, 3, 4, 5, 6),
    stringsAsFactors = FALSE
  )
  result <- pmstandardize(df, cols = "x", id_var = "id")

  non_na_x   <- df$x[!is.na(df$x)]
  expected_z <- (non_na_x - mean(non_na_x)) / sd(non_na_x)

  expect_equal(result$x_psd[!is.na(result$x_psd)], expected_z, tolerance = 1e-10)
})

test_that("each subject is standardized independently (between-person means differ)", {
  # Two subjects with very different scales — output should both have mean 0
  df <- data.frame(
    id = rep(c("A", "B"), each = 5),
    x  = c(1, 2, 3, 4, 5,          # A: scale ~1
           100, 200, 300, 400, 500), # B: scale ~100
    stringsAsFactors = FALSE
  )
  result <- pmstandardize(df, cols = "x", id_var = "id")

  mean_A <- mean(result$x_psd[result$id == "A"])
  mean_B <- mean(result$x_psd[result$id == "B"])

  expect_equal(mean_A, 0, tolerance = 1e-10)
  expect_equal(mean_B, 0, tolerance = 1e-10)

  sd_A <- sd(result$x_psd[result$id == "A"])
  sd_B <- sd(result$x_psd[result$id == "B"])

  expect_equal(sd_A, 1, tolerance = 1e-10)
  expect_equal(sd_B, 1, tolerance = 1e-10)
})

test_that("standardizing y does not alter x values and vice versa", {
  df     <- make_pms_df()
  result <- pmstandardize(df, cols = c("x", "y"), id_var = "id")

  expect_equal(result$x, df$x)
  expect_equal(result$y, df$y)
})

# ── Inf / NaN in input ──────────────────────────────────────────────────────
# Inf is rejected upfront with an informative error — it is corrupted data
# that the user should fix, not a case to silently handle.

test_that("Inf in input triggers an informative error", {
  df <- data.frame(
    id = rep(c("A", "B"), each = 5),
    x  = c(1, 2, 3, 4, 5,
           1, 2, Inf, 4, 5),
    stringsAsFactors = FALSE
  )
  expect_error(
    pmstandardize(df, cols = "x", id_var = "id"),
    regexp = "Inf"
  )
})

test_that("-Inf in input triggers an informative error", {
  df <- data.frame(
    id = rep("A", 5),
    x  = c(-Inf, 2, 3, 4, 5),
    stringsAsFactors = FALSE
  )
  expect_error(
    pmstandardize(df, cols = "x", id_var = "id"),
    regexp = "Inf"
  )
})

test_that("Inf in one column is caught even when other columns are clean", {
  df <- data.frame(
    id = rep("A", 5),
    x  = c(1, 2, 3, 4, 5),
    y  = c(1, Inf, 3, 4, 5),
    stringsAsFactors = FALSE
  )
  expect_error(
    pmstandardize(df, cols = c("x", "y"), id_var = "id"),
    regexp = "y"
  )
})

test_that("NaN in input is treated like NA", {
  df <- data.frame(
    id = rep("A", 5),
    x  = c(NaN, 2, 3, 4, 5),
    stringsAsFactors = FALSE
  )
  result <- pmstandardize(df, cols = "x", id_var = "id")

  # NaN position: is.na(NaN) is TRUE, so handled by na.rm = TRUE
  expect_true(is.na(result$x_psd[1]))
  # Remaining values should be valid z-scores
  non_na <- result$x_psd[2:5]
  expect_false(any(is.na(non_na)))
  expect_equal(mean(non_na), 0, tolerance = 1e-10)
})


# ══════════════════════════════════════════════════════════════════════════════
# Verbose
# ══════════════════════════════════════════════════════════════════════════════

test_that("verbose = TRUE emits messages", {
  df <- make_pms_df()
  suppressMessages(
    expect_message(
      pmstandardize(df, cols = "x", id_var = "id", verbose = TRUE)
    )
  )
})

test_that("verbose = FALSE emits no messages", {
  df <- make_pms_df()
  expect_no_message(
    pmstandardize(df, cols = "x", id_var = "id", verbose = FALSE)
  )
})

test_that("verbose message mentions z-score or standardiz", {
  df <- make_pms_df()
  suppressMessages(
    expect_message(
      pmstandardize(df, cols = "x", id_var = "id", verbose = TRUE),
      regexp = "(?i)z.score|standardiz"
    )
  )
})