# Tests for ml_split_temporal() and ml_split_group() domain specializations # ── Temporal holdout ───────────────────────────────────────────────────────── test_that("ml_split_temporal produces ml_split_result", { df <- data.frame(date = 1:100, x = rnorm(100), y = sample(0:1, 100, TRUE)) s <- ml_split_temporal(df, "y", time = "date") expect_s3_class(s, "ml_split_result") }) test_that("ml_split_temporal drops time column", { df <- data.frame(date = 1:100, x = rnorm(100), y = sample(0:1, 100, TRUE)) s <- ml_split_temporal(df, "y", time = "date") expect_false("date" %in% names(s$train)) expect_false("date" %in% names(s$test)) }) test_that("ml_split_temporal tags partitions", { df <- data.frame(date = 1:100, x = rnorm(100), y = sample(0:1, 100, TRUE)) s <- ml_split_temporal(df, "y", time = "date") expect_equal(attr(s$train, "_ml_partition"), "train") expect_equal(attr(s$valid, "_ml_partition"), "valid") expect_equal(attr(s$test, "_ml_partition"), "test") }) test_that("ml_split_temporal is deterministic", { df <- data.frame(date = 1:100, x = rnorm(100), y = sample(0:1, 100, TRUE)) s1 <- ml_split_temporal(df, "y", time = "date") s2 <- ml_split_temporal(df, "y", time = "date") expect_equal(s1$train, s2$train) }) test_that("ml_split_temporal equivalent to ml_split(time=)", { df <- data.frame(date = 1:100, x = rnorm(100), y = sample(0:1, 100, TRUE)) s1 <- ml_split_temporal(df, "y", time = "date") s2 <- ml_split(df, "y", time = "date") expect_equal(nrow(s1$train), nrow(s2$train)) expect_equal(nrow(s1$test), nrow(s2$test)) }) test_that("ml_split_temporal with folds produces ml_cv_result", { df <- data.frame(date = 1:200, x = rnorm(200), y = sample(0:1, 200, TRUE)) cv <- ml_split_temporal(df, "y", time = "date", folds = 5) expect_s3_class(cv, "ml_cv_result") expect_equal(cv$k, 5L) }) test_that("ml_split_temporal errors on missing time column", { df <- data.frame(x = rnorm(100), y = sample(0:1, 100, TRUE)) expect_error(ml_split_temporal(df, "y", time = "nonexistent"), "not found") }) test_that("ml_split_temporal dev property works", { df <- data.frame(date = 1:100, x = rnorm(100), y = sample(0:1, 100, TRUE)) s <- ml_split_temporal(df, "y", time = "date") expect_equal(nrow(s$dev), nrow(s$train) + nrow(s$valid)) }) # ── Group holdout ──────────────────────────────────────────────────────────── test_that("ml_split_group produces ml_split_result", { df <- data.frame(pid = rep(1:10, each = 5), x = rnorm(50), y = sample(0:1, 50, TRUE)) s <- ml_split_group(df, "y", groups = "pid", seed = 42) expect_s3_class(s, "ml_split_result") }) test_that("ml_split_group has no group overlap", { df <- data.frame(pid = rep(1:20, each = 5), x = rnorm(100), y = sample(0:1, 100, TRUE)) s <- ml_split_group(df, "y", groups = "pid", seed = 42) train_g <- unique(s$train$pid) valid_g <- unique(s$valid$pid) test_g <- unique(s$test$pid) expect_length(intersect(train_g, valid_g), 0) expect_length(intersect(train_g, test_g), 0) expect_length(intersect(valid_g, test_g), 0) }) test_that("ml_split_group tags partitions", { df <- data.frame(pid = rep(1:10, each = 5), x = rnorm(50), y = sample(0:1, 50, TRUE)) s <- ml_split_group(df, "y", groups = "pid", seed = 42) expect_equal(attr(s$train, "_ml_partition"), "train") expect_equal(attr(s$valid, "_ml_partition"), "valid") expect_equal(attr(s$test, "_ml_partition"), "test") }) test_that("ml_split_group reproducible with same seed", { df <- data.frame(pid = rep(1:10, each = 5), x = rnorm(50), y = sample(0:1, 50, TRUE)) s1 <- ml_split_group(df, "y", groups = "pid", seed = 42) s2 <- ml_split_group(df, "y", groups = "pid", seed = 42) expect_equal(sort(unique(s1$train$pid)), sort(unique(s2$train$pid))) }) test_that("ml_split_group equivalent to ml_split(groups=)", { df <- data.frame(pid = rep(1:10, each = 5), x = rnorm(50), y = sample(0:1, 50, TRUE)) s1 <- ml_split_group(df, "y", groups = "pid", seed = 42) s2 <- ml_split(df, "y", groups = "pid", seed = 42) expect_equal(sort(unique(s1$train$pid)), sort(unique(s2$train$pid))) }) test_that("ml_split_group with folds produces ml_cv_result", { df <- data.frame(pid = rep(1:20, each = 5), x = rnorm(100), y = sample(0:1, 100, TRUE)) cv <- ml_split_group(df, "y", groups = "pid", folds = 4, seed = 42) expect_s3_class(cv, "ml_cv_result") expect_equal(cv$k, 4L) }) test_that("ml_split_group errors on missing groups column", { df <- data.frame(x = rnorm(50), y = sample(0:1, 50, TRUE)) expect_error(ml_split_group(df, "y", groups = "nonexistent"), "not found") }) # ── No-target usage ────────────────────────────────────────────────────────── test_that("ml_split_temporal works without target", { df <- data.frame(date = 1:100, x = rnorm(100)) s <- ml_split_temporal(df, time = "date") expect_s3_class(s, "ml_split_result") expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), 100L) }) test_that("ml_split_group works without target", { df <- data.frame(pid = rep(1:10, each = 5), x = rnorm(50)) s <- ml_split_group(df, groups = "pid", seed = 42) expect_s3_class(s, "ml_split_result") })