library(testthat) library(dplyr) # small dataset for tests set.seed(123) data(iris) iris_binary <- iris[iris$Species != "setosa", ] iris_binary$Species <- factor(iris_binary$Species) # ======================== # Input Validation Tests # ======================== test_that("fastml errors when label column doesn't exist", { expect_error( fastml( data = iris_binary, label = "nonexistent_column", algorithms = "logistic_reg" ), "Label variable must exist|not found|not present|does not exist" ) }) test_that("fastml errors with empty data", { expect_error( fastml( data = iris_binary[0, ], label = "Species", algorithms = "logistic_reg" ) ) }) test_that("fastml errors with invalid algorithm name", { expect_error( fastml( data = iris_binary, label = "Species", algorithms = "nonexistent_algorithm" ) ) }) test_that("fastml errors with invalid resampling method", { expect_error( fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "invalid_method" ) ) }) # ======================== # Task Detection Tests # ======================== test_that("fastml detects classification task from factor label", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE )) expect_equal(model$task, "classification") }) test_that("fastml detects regression task from numeric label", { skip_on_cran() data(mtcars) model <- fastml( data = mtcars, label = "mpg", algorithms = "linear_reg", resampling_method = "none", use_default_tuning = FALSE ) expect_equal(model$task, "regression") }) test_that("fastml uses explicitly specified task", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", task = "classification", resampling_method = "none", use_default_tuning = FALSE )) expect_equal(model$task, "classification") }) # ======================== # Data Splitting Tests # ======================== test_that("fastml respects test_size parameter", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE, test_size = 0.3 )) # Check that raw_train_data and raw_test_data exist expect_true(!is.null(model$raw_train_data)) expect_true(!is.null(model$raw_test_data)) }) test_that("fastml uses provided train_data and test_data", { skip_on_cran() set.seed(123) idx <- sample(nrow(iris_binary), floor(0.7 * nrow(iris_binary))) train_data <- iris_binary[idx, ] test_data <- iris_binary[-idx, ] model <- suppressWarnings(fastml( train_data = train_data, test_data = test_data, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE )) expect_equal(nrow(model$raw_train_data), nrow(train_data)) expect_equal(nrow(model$raw_test_data), nrow(test_data)) }) # ======================== # Resampling Tests # ======================== test_that("fastml works with cv resampling", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "cv", folds = 3, use_default_tuning = FALSE )) expect_true(inherits(model, "fastml")) }) test_that("fastml works with none resampling", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE )) expect_true(inherits(model, "fastml")) }) # ======================== # Multiple Algorithms Tests # ======================== test_that("fastml trains multiple algorithms", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = c("logistic_reg", "rand_forest"), resampling_method = "none", use_default_tuning = FALSE )) expect_true(length(model$models) >= 2) }) test_that("fastml returns best model among multiple", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = c("logistic_reg", "rand_forest"), resampling_method = "none", use_default_tuning = FALSE )) expect_true(!is.null(model$best_model)) expect_true(!is.null(model$best_model_name)) }) # ======================== # Metric Tests # ======================== test_that("fastml uses specified metric for classification", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", metric = "accuracy", resampling_method = "none", use_default_tuning = FALSE )) expect_equal(model$metric, "accuracy") }) test_that("fastml uses specified metric for regression", { skip_on_cran() data(mtcars) model <- fastml( data = mtcars, label = "mpg", algorithms = "linear_reg", metric = "rmse", resampling_method = "none", use_default_tuning = FALSE ) expect_equal(model$metric, "rmse") }) # ======================== # Output Structure Tests # ======================== test_that("fastml returns complete object structure", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE )) expect_true("models" %in% names(model)) expect_true("performance" %in% names(model)) expect_true("predictions" %in% names(model)) expect_true("best_model" %in% names(model)) expect_true("label" %in% names(model)) expect_true("task" %in% names(model)) expect_true("preprocessor" %in% names(model)) }) test_that("fastml performance contains expected metrics for classification", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE )) perf <- model$performance[[1]] if (is.list(perf) && !is.data.frame(perf)) { perf <- perf[[1]] } expect_true("accuracy" %in% perf$.metric || "roc_auc" %in% perf$.metric) }) test_that("fastml performance contains expected metrics for regression", { skip_on_cran() data(mtcars) model <- fastml( data = mtcars, label = "mpg", algorithms = "linear_reg", resampling_method = "none", use_default_tuning = FALSE ) perf <- model$performance[[1]] if (is.list(perf) && !is.data.frame(perf)) { perf <- perf[[1]] } expect_true("rmse" %in% perf$.metric || "rsq" %in% perf$.metric) }) # ======================== # Seed Reproducibility Tests # ======================== test_that("fastml results are reproducible with same seed", { skip_on_cran() model1 <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE, seed = 42 )) model2 <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE, seed = 42 )) # Performance should be identical with same seed perf1 <- model1$performance[[1]] perf2 <- model2$performance[[1]] if (is.list(perf1) && !is.data.frame(perf1)) { perf1 <- perf1[[1]] } if (is.list(perf2) && !is.data.frame(perf2)) { perf2 <- perf2[[1]] } expect_equal(perf1$.estimate, perf2$.estimate) }) # ======================== # Special Data Types Tests # ======================== test_that("fastml handles character columns by converting to factor", { skip_on_cran() df <- iris_binary df$char_col <- sample(c("A", "B"), nrow(df), replace = TRUE) model <- suppressWarnings(fastml( data = df, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE )) expect_true(inherits(model, "fastml")) }) test_that("fastml handles data with NAs via imputation", { skip_on_cran() df <- iris_binary df$Sepal.Length[1:5] <- NA model <- suppressWarnings(fastml( data = df, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE, impute_method = "medianImpute" )) expect_true(inherits(model, "fastml")) }) # ======================== # Event Class Tests # ======================== test_that("fastml uses specified event_class", { skip_on_cran() model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", event_class = "first", resampling_method = "none", use_default_tuning = FALSE )) expect_equal(model$event_class, "first") }) # ======================== # Verbose Tests # ======================== test_that("fastml verbose=TRUE produces output", { skip_on_cran() output <- capture.output(type = "message", { model <- suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE, verbose = TRUE )) }) expect_true(length(output) > 0) }) test_that("fastml verbose=FALSE suppresses output", { skip_on_cran() output <- capture.output({ model <- suppressMessages(suppressWarnings(fastml( data = iris_binary, label = "Species", algorithms = "logistic_reg", resampling_method = "none", use_default_tuning = FALSE, verbose = FALSE ))) }) # Verbose=FALSE should produce minimal/no output expect_true(length(output) == 0 || all(output == "")) })