test_that("three-way split produces correct sizes", {
  s <- ml_split(iris, "Species", seed = 42L)
  expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), nrow(iris))
  # Within tolerance of 60/20/20
  expect_true(abs(nrow(s$train) / nrow(iris) - 0.6) < 0.05)
})

test_that("stratified split preserves class proportions", {
  s <- ml_split(iris, "Species", seed = 42L)
  train_freq <- table(s$train$Species) / nrow(s$train)
  orig_freq  <- table(iris$Species)  / nrow(iris)
  for (cls in names(orig_freq)) {
    expect_true(abs(train_freq[[cls]] - orig_freq[[cls]]) < 0.1)
  }
})

test_that("$dev = rbind($train, $valid) row count matches", {
  s <- ml_split(iris, "Species", seed = 42L)
  expect_equal(nrow(s$dev), nrow(s$train) + nrow(s$valid))
})

test_that("seed reproducibility: same seed produces identical split", {
  s1 <- ml_split(iris, "Species", seed = 99L)
  s2 <- ml_split(iris, "Species", seed = 99L)
  expect_identical(rownames(s1$train), rownames(s2$train))
})

test_that("different seeds produce different splits", {
  s1 <- ml_split(iris, "Species", seed = 1L)
  s2 <- ml_split(iris, "Species", seed = 2L)
  expect_false(identical(rownames(s1$train), rownames(s2$train)))
})

test_that("CV mode returns ml_split_result with folds and test", {
  cv <- ml_split(iris, "Species", seed = 42L, folds = 5L)
  expect_s3_class(cv, "ml_split_result")
  expect_equal(length(cv$folds), 5L)
  # Test partition held out (not on CV, on the split)
  expect_true(nrow(cv$test) > 0L)
  expect_true(nrow(cv$test) < nrow(iris))
})

test_that("NA target rows dropped with warning", {
  df <- iris
  df$Species[1:5] <- NA
  expect_warning(
    s <- ml_split(df, "Species", seed = 42L),
    regexp = "Dropped"
  )
  expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), nrow(iris) - 5L)
})

test_that("tibble input works", {
  if (!requireNamespace("tibble", quietly = TRUE)) skip("tibble not installed")
  tbl <- tibble::as_tibble(iris)
  s   <- ml_split(tbl, "Species", seed = 42L)
  expect_s3_class(s, "ml_split_result")
  expect_true(is.data.frame(s$train))
})

test_that("error on non-existent target column", {
  expect_error(
    ml_split(iris, "nonexistent", seed = 42L),
    class = "data_error"
  )
})

test_that("error on empty data.frame", {
  expect_error(
    ml_split(data.frame(), seed = 42L),
    class = "data_error"
  )
})

test_that("error on ratio not summing to 1.0", {
  expect_error(
    ml_split(iris, "Species", seed = 42L, ratio = c(0.5, 0.3, 0.3)),
    class = "config_error"
  )
})

test_that("CV split_result has test but train is dev data", {
  cv <- ml_split(iris, "Species", seed = 42L, folds = 3L)
  # $test is available (held out for assess)
  expect_true(nrow(cv$test) > 0L)
  # $train is dev data (used internally for folds)
  expect_true(nrow(cv$train) > 0L)
  # $folds present
  expect_equal(length(cv$folds), 3L)
})

test_that("classification heuristic: numeric target with few unique values", {
  df <- data.frame(x = stats::rnorm(100L), target = sample(1:3, 100L, replace = TRUE))
  expect_warning(
    s <- ml_split(df, "target", seed = 42L),
    regexp = "classification"
  )
})

test_that("task= override works", {
  df <- data.frame(x = stats::rnorm(100L), y = stats::rnorm(100L))
  s  <- ml_split(df, "y", seed = 42L, task = "regression")
  expect_s3_class(s, "ml_split_result")
})

test_that("stratify = FALSE produces valid split", {
  s <- ml_split(iris, "Species", seed = 42L, stratify = FALSE)
  expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), nrow(iris))
})

test_that("folds > n_rows raises data_error", {
  small_df <- data.frame(x = 1:5, y = c("a", "b", "a", "b", "a"))
  expect_error(
    ml_split(small_df, "y", seed = 42L, folds = 10L),
    class = "data_error"
  )
})

test_that("ml$split() module style works identically", {
  s1 <- ml_split(iris, "Species", seed = 42L)
  s2 <- ml$split(iris, "Species", seed = 42L)
  expect_identical(rownames(s1$train), rownames(s2$train))
})

test_that("1-row data raises data_error", {
  expect_error(
    ml_split(iris[1L, , drop = FALSE], "Species", seed = 42L),
    class = "data_error"
  )
})

# ── time= parameter ─────────────────────────────────────────────────────────

test_that("time= produces chronological split, drops time column", {
  n <- 200L
  df <- data.frame(
    t = seq_len(n),
    x = stats::rnorm(n),
    y = stats::rnorm(n)
  )
  s <- ml_split(df, "y", seed = 42L, time = "t")
  # Time column should be dropped

  expect_false("t" %in% names(s$train))
  expect_false("t" %in% names(s$valid))
  expect_false("t" %in% names(s$test))
  # All rows accounted for
  expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), n)
})

test_that("time= split is deterministic (seed ignored)", {
  n <- 200L
  df <- data.frame(t = seq_len(n), x = stats::rnorm(n), y = stats::rnorm(n))
  s1 <- ml_split(df, "y", seed = 1L, time = "t")
  s2 <- ml_split(df, "y", seed = 999L, time = "t")
  expect_identical(s1$train$x, s2$train$x)
})

test_that("time= shuffled input gets sorted", {
  n <- 100L
  df <- data.frame(t = sample(n), x = seq_len(n), y = stats::rnorm(n))
  s  <- ml_split(df, "y", seed = 42L, time = "t")
  # Train should have lowest-t rows (sorted by t, then dropped)
  # x values in train should correspond to the lowest t values
  expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), n)
})

test_that("time= with folds produces temporal CV with test holdout", {
  n <- 200L
  df <- data.frame(t = seq_len(n), x = stats::rnorm(n), y = stats::rnorm(n))
  cv <- ml_split(df, "y", seed = 42L, time = "t", folds = 3L)
  expect_s3_class(cv, "ml_split_result")
  expect_equal(length(cv$folds), 3L)
  # Test held out from end of time series
  expect_true(nrow(cv$test) > 0L)
  # Each fold's valid indices should come after train indices (expanding window)
  for (f in cv$folds) {
    expect_true(max(f$train) < min(f$valid))
  }
})

test_that("time= missing column raises data_error", {
  expect_error(
    ml_split(iris, "Species", seed = 42L, time = "nonexistent"),
    class = "data_error"
  )
})

# ── groups= parameter ───────────────────────────────────────────────────────

test_that("groups= ensures no group leakage", {
  n <- 200L
  grp <- rep(paste0("g", seq_len(20L)), each = 10L)
  df  <- data.frame(group = grp, x = stats::rnorm(n), y = stats::rnorm(n))
  s   <- ml_split(df, "y", seed = 42L, groups = "group")
  # No group appears in more than one partition
  train_grps <- unique(s$train$group)
  valid_grps <- unique(s$valid$group)
  test_grps  <- unique(s$test$group)
  expect_length(intersect(train_grps, valid_grps), 0L)
  expect_length(intersect(train_grps, test_grps), 0L)
  expect_length(intersect(valid_grps, test_grps), 0L)
  # All rows accounted for
  expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), n)
})

test_that("groups= with folds produces GroupKFold with test holdout", {
  n <- 200L
  grp <- rep(paste0("g", seq_len(20L)), each = 10L)
  df  <- data.frame(group = grp, x = stats::rnorm(n), y = stats::rnorm(n))
  cv  <- ml_split(df, "y", seed = 42L, groups = "group", folds = 4L)
  expect_s3_class(cv, "ml_split_result")
  expect_equal(length(cv$folds), 4L)
  # Test groups held out
  expect_true(nrow(cv$test) > 0L)
  # No group leakage across train/valid in each fold
  dev_data <- cv$train  # dev data stored as train when folds present
  for (f in cv$folds) {
    train_grps <- unique(dev_data$group[f$train])
    valid_grps <- unique(dev_data$group[f$valid])
    expect_length(intersect(train_grps, valid_grps), 0L)
  }
})

test_that("groups= seed reproducibility", {
  n <- 200L
  grp <- rep(paste0("g", seq_len(20L)), each = 10L)
  df  <- data.frame(group = grp, x = stats::rnorm(n), y = stats::rnorm(n))
  s1  <- ml_split(df, "y", seed = 42L, groups = "group")
  s2  <- ml_split(df, "y", seed = 42L, groups = "group")
  expect_identical(s1$train$group, s2$train$group)
})

test_that("groups= with <3 unique groups raises data_error", {
  df <- data.frame(group = rep(c("a", "b"), each = 50L),
                   x = stats::rnorm(100L), y = stats::rnorm(100L))
  expect_error(
    ml_split(df, "y", seed = 42L, groups = "group"),
    class = "data_error"
  )
})

test_that("groups= missing column raises data_error", {
  expect_error(
    ml_split(iris, "Species", seed = 42L, groups = "nonexistent"),
    class = "data_error"
  )
})

test_that("time= and groups= combined raises config_error", {
  df <- data.frame(t = 1:100, g = rep(1:10, each = 10L),
                   x = stats::rnorm(100L), y = stats::rnorm(100L))
  expect_error(
    ml_split(df, "y", seed = 42L, time = "t", groups = "g"),
    class = "config_error"
  )
})

# ── Partition tag tests ──

test_that("split tags partitions with _ml_partition attr", {
  s <- ml_split(iris, "Species", seed = 42L)
  expect_equal(attr(s$train, "_ml_partition"), "train")
  expect_equal(attr(s$valid, "_ml_partition"), "valid")
  expect_equal(attr(s$test,  "_ml_partition"), "test")
})

test_that("dev property tags as dev", {
  s <- ml_split(iris, "Species", seed = 42L)
  expect_equal(attr(s$dev, "_ml_partition"), "dev")
})