test_that("task detection: factor target → classification", { expect_equal(.detect_task(factor(c("a", "b", "a"))), "classification") }) test_that("task detection: character target → classification", { expect_equal(.detect_task(c("yes", "no", "yes")), "classification") }) test_that("task detection: many unique numerics → regression", { expect_equal(.detect_task(stats::rnorm(200)), "regression") }) test_that("task detection: few unique numerics → classification", { expect_equal(.detect_task(rep(1:3, 100)), "classification") }) test_that("task detection: override 'regression' works", { expect_equal(.detect_task(rep(1:3, 100), task = "regression"), "regression") }) test_that("task detection: invalid task raises config_error", { expect_error(.detect_task(c("a","b"), task = "magic"), class = "config_error") }) test_that("coerce tibble to data.frame", { skip_if_not_installed("tibble") tbl <- tibble::as_tibble(iris) df <- .coerce_data(tbl) expect_true(is.data.frame(df)) expect_false(inherits(df, "tbl_df")) }) test_that("prepare: ordinal encoding for tree algorithm", { X <- data.frame(color = c("red", "blue", "red"), stringsAsFactors = FALSE) y <- c(1L, 0L, 1L) norm <- .prepare(X, y, algorithm = "xgboost", task = "classification") expect_true(length(norm$category_maps) > 0L) expect_false(norm$use_onehot) }) test_that("prepare: one-hot encoding for linear algorithm", { X <- data.frame(color = c("red", "blue", "red"), stringsAsFactors = FALSE) y <- c(1L, 0L, 1L) norm <- .prepare(X, y, algorithm = "logistic", task = "classification") expect_true(norm$use_onehot) expect_true("color" %in% norm$onehot_cols) }) test_that("transform_fit: output is numeric data.frame", { X <- data.frame(color = c("red", "blue", "red"), x = c(1, 2, 3), stringsAsFactors = FALSE) y <- c(1L, 0L, 1L) norm <- .prepare(X, y, algorithm = "logistic", task = "classification") res <- .transform_fit(X, norm) expect_true(all(vapply(res$X, is.numeric, logical(1L)))) }) test_that("encode_target: string labels → 0-based integers", { y <- c("yes", "no", "yes") norm <- .prepare(data.frame(x = 1:3), y, algorithm = "xgboost", task = "classification") enc <- .encode_target(y, norm) expect_true(all(enc %in% 0:1)) }) test_that("decode: 0-based integers → original labels", { y <- c("yes", "no", "yes") norm <- .prepare(data.frame(x = 1:3), y, algorithm = "xgboost", task = "classification") enc <- .encode_target(y, norm) dec <- .decode(enc, norm) expect_equal(as.character(dec), y) }) test_that("non-tree algorithm: NA imputed with median", { X <- data.frame(x = c(1, 2, NA, 4, 5)) y <- c(1L, 0L, 1L, 0L, 1L) norm <- .prepare(X, y, algorithm = "logistic", task = "classification") # Expects a warning about NA imputation suppressWarnings(res <- .transform_fit(X, norm)) # NA should be gone after imputation expect_false(any(is.na(res$X))) }) test_that("check_duplicate_cols: raises data_error on duplicates", { df <- data.frame(a = 1:3, b = 1:3) names(df) <- c("a", "a") expect_error(.check_duplicate_cols(df), class = "data_error") })