test_that("standardization only (no ids) produces mean 0, sd 1 for numeric vars", {
  set.seed(1)
  df <- data.frame(
    a = rnorm(50, mean = 10, sd = 2),
    b = rnorm(50, mean = -3, sd = 5),
    c = letters[1:50]  # non-numeric
  )

  X <- data_preparation(df, standardize = TRUE)

  expect_true(is.data.frame(X))
  expect_setequal(names(X), c("a", "b", "c"))  # non-numeric untouched/kept

  expect_equal(mean(X$a, na.rm = TRUE), 0, tolerance = 1e-12)
  expect_equal(sd(X$a, na.rm = TRUE),   1, tolerance = 1e-12)

  expect_equal(mean(X$b, na.rm = TRUE), 0, tolerance = 1e-12)
  expect_equal(sd(X$b, na.rm = TRUE),   1, tolerance = 1e-12)
})

test_that("no ids + standardize = FALSE errors", {
  df <- data.frame(a = 1:5)
  expect_error(
    data_preparation(df, standardize = FALSE),
    "set `standardize = TRUE`"
  )
})

test_that("errors when no numeric variables exist", {
  df <- data.frame(x = letters[1:5], y = LETTERS[1:5])
  expect_error(data_preparation(df, standardize = TRUE), "No numeric variables")
})

test_that("with ids present: ids are dropped and only non-id numeric vars transformed", {
  set.seed(2)
  df <- data.frame(
    id = rep(1:5, each = 4),
    time = rep(1:4, times = 5),
    y = rnorm(20),
    z = rnorm(20),
    w = letters[1:20]
  )

  X <- data_preparation(df, id = "id", time = "time",
                        fixed_effects = FALSE, standardize = TRUE)

  expect_false("id" %in% names(X))
  expect_false("time" %in% names(X))
  expect_true(all(c("y", "z", "w") %in% names(X)))

  # y, z standardized
  expect_equal(mean(X$y, na.rm = TRUE), 0, tolerance = 1e-12)
  expect_equal(sd(X$y, na.rm = TRUE),   1, tolerance = 1e-12)

  expect_equal(mean(X$z, na.rm = TRUE), 0, tolerance = 1e-12)
  expect_equal(sd(X$z, na.rm = TRUE),   1, tolerance = 1e-12)

  # non-numeric unchanged
  expect_identical(X$w, df$w)
})

test_that("section FE demeaning yields zero group means by id (for transformed vars)", {
  set.seed(3)
  df <- data.frame(
    id = rep(1:4, each = 5),
    time = rep(1:5, times = 4),
    x = rnorm(20, mean = 2),
    y = rnorm(20, mean = -1)
  )

  X <- data_preparation(df, id = "id", time = "time",
                        fixed_effects = TRUE, effect = "section",
                        standardize = FALSE)

  # group means by id should be ~0 after section FE
  mx <- tapply(X$x, df$id, mean, na.rm = TRUE)
  my <- tapply(X$y, df$id, mean, na.rm = TRUE)

  expect_true(all(abs(mx) < 1e-12))
  expect_true(all(abs(my) < 1e-12))
})

test_that("time FE demeaning yields zero group means by time (for transformed vars)", {
  set.seed(4)
  df <- data.frame(
    id = rep(1:5, each = 4),
    time = rep(1:4, times = 5),
    x = rnorm(20),
    y = rnorm(20)
  )

  X <- data_preparation(df, id = "id", time = "time",
                        fixed_effects = TRUE, effect = "time",
                        standardize = FALSE)

  mx <- tapply(X$x, df$time, mean, na.rm = TRUE)
  my <- tapply(X$y, df$time, mean, na.rm = TRUE)

  expect_true(all(abs(mx) < 1e-12))
  expect_true(all(abs(my) < 1e-12))
})

test_that("two-way FE demeaning yields zero means by id and by time (balanced panel)", {
  set.seed(5)
  df <- data.frame(
    id = rep(1:6, each = 4),
    time = rep(1:4, times = 6),
    x = rnorm(24, mean = 10),
    y = rnorm(24, mean = -2)
  )

  X <- data_preparation(df, id = "id", time = "time",
                        fixed_effects = TRUE, effect = "twoway",
                        standardize = FALSE)

  mx_id <- tapply(X$x, df$id, mean, na.rm = TRUE)
  mx_t  <- tapply(X$x, df$time, mean, na.rm = TRUE)

  my_id <- tapply(X$y, df$id, mean, na.rm = TRUE)
  my_t  <- tapply(X$y, df$time, mean, na.rm = TRUE)

  expect_true(all(abs(mx_id) < 1e-12))
  expect_true(all(abs(mx_t)  < 1e-12))
  expect_true(all(abs(my_id) < 1e-12))
  expect_true(all(abs(my_t)  < 1e-12))
})

test_that("FE + standardization gives (approx) mean 0 and sd 1", {
  set.seed(6)
  df <- data.frame(
    id = rep(1:8, each = 3),
    time = rep(1:3, times = 8),
    x = rnorm(24, mean = 7, sd = 4),
    y = rnorm(24, mean = -5, sd = 2)
  )

  X <- data_preparation(df, id = "id", time = "time",
                        fixed_effects = TRUE, effect = "twoway",
                        standardize = TRUE)

  expect_equal(mean(X$x, na.rm = TRUE), 0, tolerance = 1e-12)
  expect_equal(sd(X$x, na.rm = TRUE),   1, tolerance = 1e-12)

  expect_equal(mean(X$y, na.rm = TRUE), 0, tolerance = 1e-12)
  expect_equal(sd(X$y, na.rm = TRUE),   1, tolerance = 1e-12)
})

test_that("standardization sets constant / zero-variance vars to NA_real_", {
  df <- data.frame(
    id = rep(1:3, each = 4),
    time = rep(1:4, times = 3),
    x = rep(5, 12),              # constant
    y = c(1:11, NA_real_)        # non-constant with NA
  )

  X <- data_preparation(df, id = "id", time = "time",
                        fixed_effects = FALSE, standardize = TRUE)

  expect_true(all(is.na(X$x)))
  # y should be standardized (not all NA)
  expect_true(any(!is.na(X$y)))
  expect_equal(mean(X$y, na.rm = TRUE), 0, tolerance = 1e-12)
})

test_that("errors if ids provided but not found in data", {
  df <- data.frame(id = 1:5, time = 1:5, x = rnorm(5))
  expect_error(data_preparation(df, id = "ID", time = "time", standardize = TRUE))
  expect_error(data_preparation(df, id = "id", time = "TIME", standardize = TRUE))
})

test_that("errors if only id or only time is provided and standardize = FALSE", {
  df <- data.frame(id = 1:5, time = 1:5, x = rnorm(5))

  expect_error(
    data_preparation(df, id = "id", standardize = FALSE),
    "set `standardize = TRUE`"
  )

  expect_error(
    data_preparation(df, time = "time", standardize = FALSE),
    "set `standardize = TRUE`"
  )
})

test_that("with ids: errors when no numeric vars besides id/time", {
  df <- data.frame(
    id = 1:5,
    time = 1:5,
    group = letters[1:5]
  )

  expect_error(
    data_preparation(df, id = "id", time = "time", standardize = TRUE),
    "No numeric variables to transform"
  )
})