# ── ml_cv() ───────────────────────────────────────────────────────────────────

test_that("ml_cv creates correct number of folds", {
  s <- ml_split(iris, "Species", seed = 42L)
  c <- ml_cv(s, "Species", folds = 5L, seed = 42L)
  expect_s3_class(c, "ml_cv_result")
  expect_length(c$folds, 5L)
})

test_that("ml_cv fold indices cover all dev rows", {
  s <- ml_split(iris, "Species", seed = 42L)
  c <- ml_cv(s, "Species", folds = 5L, seed = 42L)
  n_dev <- nrow(s$dev)
  all_valid <- sort(unlist(lapply(c$folds, `[[`, "valid")))
  expect_equal(all_valid, seq_len(n_dev))
})

test_that("ml_cv folds are non-overlapping", {
  s <- ml_split(iris, "Species", seed = 42L)
  c <- ml_cv(s, "Species", folds = 3L, seed = 42L)
  valids <- lapply(c$folds, `[[`, "valid")
  for (i in seq_along(valids)) {
    for (j in seq_along(valids)) {
      if (i != j) expect_length(intersect(valids[[i]], valids[[j]]), 0L)
    }
  }
})

test_that("ml_cv result feeds into ml_fit", {
  s <- ml_split(iris, "Species", seed = 42L)
  c <- ml_cv(s, "Species", folds = 3L, seed = 42L)
  model <- ml_fit(c, "Species", seed = 42L)
  expect_s3_class(model, "ml_model")
  expect_false(is.null(model$scores_))
})

test_that("ml_cv seed reproducibility", {
  s <- ml_split(iris, "Species", seed = 42L)
  c1 <- ml_cv(s, "Species", folds = 5L, seed = 99L)
  c2 <- ml_cv(s, "Species", folds = 5L, seed = 99L)
  expect_identical(c1$folds[[1]]$valid, c2$folds[[1]]$valid)
})

test_that("ml_cv rejects non-split input", {
  expect_error(ml_cv(iris, "Species"), "ml_split_result")
})

test_that("ml_cv rejects folds < 2", {
  s <- ml_split(iris, "Species", seed = 42L)
  expect_error(ml_cv(s, "Species", folds = 1L, seed = 42L), "folds")
})

test_that("ml_cv rejects missing target", {
  s <- ml_split(iris, "Species", seed = 42L)
  expect_error(ml_cv(s, "nonexistent", folds = 3L, seed = 42L), "nonexistent")
})

test_that("ml_cv print method works", {
  s <- ml_split(iris, "Species", seed = 42L)
  c <- ml_cv(s, "Species", folds = 5L, seed = 42L)
  out <- capture.output(print(c))
  expect_true(any(grepl("CV", out)))
  expect_true(any(grepl("5 folds", out)))
})

# ── ml_cv_temporal() ─────────────────────────────────────────────────────────

test_that("ml_cv_temporal creates expanding-window folds", {
  df <- data.frame(date = 1:100, x = rnorm(100), y = sample(0:1, 100, TRUE))
  s <- ml_split_temporal(df, "y", time = "date")
  c <- ml_cv_temporal(s, "y", folds = 3L)
  expect_s3_class(c, "ml_cv_result")
  expect_length(c$folds, 3L)
  # Expanding: fold 2 train should be larger than fold 1 train
  expect_true(length(c$folds[[2]]$train) > length(c$folds[[1]]$train))
})

# ── ml_cv_group() ────────────────────────────────────────────────────────────

test_that("ml_cv_group no group leakage across folds", {
  df <- data.frame(
    pid = rep(1:20, each = 5),
    x = rnorm(100),
    y = sample(0:1, 100, TRUE)
  )
  s <- ml_split(df, "y", seed = 42L)
  c <- ml_cv_group(s, "y", groups = "pid", folds = 3L, seed = 42L)
  expect_s3_class(c, "ml_cv_result")
  # Check no group appears in both train and valid of any fold
  dev_data <- s$dev
  for (fold in c$folds) {
    train_groups <- unique(dev_data$pid[fold$train])
    valid_groups <- unique(dev_data$pid[fold$valid])
    expect_length(intersect(train_groups, valid_groups), 0L)
  }
})

test_that("ml_cv_group rejects missing groups column", {
  s <- ml_split(iris, "Species", seed = 42L)
  expect_error(ml_cv_group(s, "Species", groups = "nope", seed = 42L), "nope")
})

# ── ml_cv_temporal: embargo + window ─────────────────────────────────────────

test_that("ml_cv_temporal embargo shifts valid start by embargo rows", {
  withr::local_seed(42L)
  n  <- 200L
  df <- data.frame(
    date   = seq.Date(as.Date("2020-01-01"), by = "day", length.out = n),
    f1     = stats::rnorm(n),
    target = sample(c(0L, 1L), n, replace = TRUE)
  )
  s  <- ml_split_temporal(df, "target", time = "date")
  c0 <- ml_cv_temporal(s, "target", folds = 3L)
  ce <- ml_cv_temporal(s, "target", folds = 3L, embargo = 5L)
  # With embargo, valid starts later → fewer valid rows in first folds
  expect_gte(length(c0$folds[[1L]]$valid), length(ce$folds[[1L]]$valid))
})

test_that("ml_cv_temporal rejects negative embargo", {
  withr::local_seed(42L)
  n  <- 200L
  df <- data.frame(
    date   = seq.Date(as.Date("2020-01-01"), by = "day", length.out = n),
    f1     = stats::rnorm(n),
    target = sample(c(0L, 1L), n, replace = TRUE)
  )
  s <- ml_split_temporal(df, "target", time = "date")
  expect_error(
    ml_cv_temporal(s, "target", folds = 3L, embargo = -1L),
    regexp = "embargo.*>= 0|embargo.*negative"
  )
})

test_that("ml_cv_temporal sliding window requires window_size", {
  withr::local_seed(42L)
  n  <- 200L
  df <- data.frame(
    date   = seq.Date(as.Date("2020-01-01"), by = "day", length.out = n),
    f1     = stats::rnorm(n),
    target = sample(c(0L, 1L), n, replace = TRUE)
  )
  s <- ml_split_temporal(df, "target", time = "date")
  expect_error(
    ml_cv_temporal(s, "target", folds = 3L, window = "sliding"),
    regexp = "window_size"
  )
})

test_that("ml_cv_temporal sliding window creates folds", {
  withr::local_seed(42L)
  n  <- 200L
  df <- data.frame(
    date   = seq.Date(as.Date("2020-01-01"), by = "day", length.out = n),
    f1     = stats::rnorm(n),
    target = sample(c(0L, 1L), n, replace = TRUE)
  )
  s <- ml_split_temporal(df, "target", time = "date")
  c <- ml_cv_temporal(s, "target", folds = 3L, window = "sliding",
                       window_size = 50L)
  expect_s3_class(c, "ml_cv_result")
  expect_gte(length(c$folds), 1L)
})