## AutoEnsML — comprehensive unit tests ## Author: Sadikul Islam ## Note: feedback_scores is an exported FUNCTION (not a dataset). ## Call feedback_scores() to get the data frame. ## ── Helper: small fast subset ───────────────────────────────────────────────── .fs <- function() feedback_scores()[1:60, ] ## ── 1. auto_ensml: regression ───────────────────────────────────────────────── test_that("auto_ensml regression returns valid AutoEnsML object", { skip_on_cran() df <- .fs() res <- auto_ensml( overall_score ~ instructor_rating + content_clarity + pace_score, data = df, task = "regression", n_folds = 3L, max_models = 3L, methods = c("simple_avg", "weighted_avg", "greedy"), parallel = FALSE, verbose = FALSE ) expect_s3_class(res, "AutoEnsML") expect_equal(res$task, "regression") expect_gte(res$n_models, 2L) expect_true(is.numeric(res$weights)) expect_equal(sum(res$weights), 1, tolerance = 1e-5) expect_true(all(res$weights >= -1e-10)) expect_s3_class(res$leaderboard, "data.frame") expect_true(all(c("method", "RMSE", "composite") %in% names(res$leaderboard))) expect_true(is.character(res$best_method)) expect_true(res$best_method %in% res$leaderboard$method) }) ## ── 2. auto_ensml: classification ───────────────────────────────────────────── test_that("auto_ensml classification returns valid AutoEnsML object", { skip_on_cran() df <- .fs() res <- auto_ensml( sentiment_binary ~ instructor_rating + content_clarity + pace_score, data = df, task = "classification", n_folds = 3L, methods = c("simple_avg", "weighted_avg"), parallel = FALSE, verbose = FALSE ) expect_s3_class(res, "AutoEnsML") expect_equal(res$task, "classification") expect_true(all(c("AUC", "F1", "MCC") %in% names(res$leaderboard))) }) ## ── 3. predict: regression ──────────────────────────────────────────────────── test_that("predict.AutoEnsML returns correct-length numeric vector (regression)", { skip_on_cran() df <- .fs() res <- auto_ensml( overall_score ~ instructor_rating + content_clarity, data = df, task = "regression", n_folds = 3L, methods = c("simple_avg", "weighted_avg"), parallel = FALSE, verbose = FALSE ) p <- predict(res, newdata = df[1:5, ]) expect_length(p, 5L) expect_true(is.numeric(p)) expect_false(anyNA(p)) }) ## ── 4. predict: classification types ───────────────────────────────────────── test_that("predict.AutoEnsML type = class returns 0/1 integers", { skip_on_cran() df <- .fs() res <- auto_ensml( sentiment_binary ~ instructor_rating + content_clarity, data = df, task = "classification", n_folds = 3L, methods = "simple_avg", parallel = FALSE, verbose = FALSE ) cl <- predict(res, newdata = df[1:5, ], type = "class") pr <- predict(res, newdata = df[1:5, ], type = "prob") expect_true(all(cl %in% c(0L, 1L))) expect_true(all(pr >= 0 & pr <= 1)) }) ## ── 5. S3: print and summary ────────────────────────────────────────────────── test_that("print and summary produce expected output", { skip_on_cran() df <- .fs() res <- auto_ensml( overall_score ~ instructor_rating + content_clarity, data = df, task = "regression", n_folds = 3L, methods = "simple_avg", parallel = FALSE, verbose = FALSE ) expect_output(print(res), "AutoEnsML") expect_output(summary(res), "AutoEnsML Summary") expect_output(summary(res), "Sadikul Islam") expect_output(summary(res), "Optimal weights") }) ## ── 6. coef ─────────────────────────────────────────────────────────────────── test_that("coef.AutoEnsML returns named weights summing to 1", { skip_on_cran() df <- .fs() res <- auto_ensml( overall_score ~ instructor_rating + content_clarity, data = df, task = "regression", n_folds = 3L, methods = "simple_avg", parallel = FALSE, verbose = FALSE ) w <- coef(res) expect_true(is.numeric(w)) expect_false(is.null(names(w))) expect_equal(sum(w), 1, tolerance = 1e-5) expect_true(all(w >= -1e-10)) }) ## ── 7. optimize_weights ─────────────────────────────────────────────────────── test_that("optimize_weights returns non-negative weights summing to 1", { set.seed(7L) oof <- matrix(rnorm(300L), 100L, 3L, dimnames = list(NULL, c("glmnet", "ranger", "xgboost"))) y <- rnorm(100L) w <- optimize_weights(oof, y, metric = "rmse", task = "regression") expect_length(w, 3L) expect_named(w, c("glmnet", "ranger", "xgboost")) expect_equal(sum(w), 1, tolerance = 1e-5) expect_true(all(w >= -1e-10)) }) ## ── 8. compute_diversity ────────────────────────────────────────────────────── test_that("compute_diversity returns integer in [3, ncol(oof)]", { set.seed(8L) oof <- matrix(rnorm(500L), 100L, 5L) y <- rnorm(100L) n <- compute_diversity(oof, y, "rmse", "regression", verbose = FALSE) expect_true(is.numeric(n)) expect_gte(n, 3L) expect_lte(n, 5L) }) ## ── 9. compare_ensembles ────────────────────────────────────────────────────── test_that("compare_ensembles returns ranked data frame with all stat columns", { set.seed(9L) n <- 80L; y <- rnorm(n) er <- list(a = rnorm(n), b = rnorm(n)) df <- compare_ensembles(er, y, task = "regression") expect_s3_class(df, "data.frame") expect_equal(nrow(df), 2L) expect_true(all(c("method", "RMSE", "MAE", "R2", "AIC", "BIC", "composite") %in% names(df))) expect_lte(df$composite[1L], df$composite[2L]) }) ## ── 10. Input validation ────────────────────────────────────────────────────── test_that("auto_ensml rejects invalid inputs with informative errors", { df <- .fs() expect_error(auto_ensml("not formula", df), "formula") expect_error(auto_ensml(overall_score ~ ., "not df"), "data.frame") expect_error(auto_ensml(overall_score ~ ., df[1:5, ]), "20") expect_error(auto_ensml(overall_score ~ ., df, n_folds = 1L), "n_folds") }) ## ── 11. AUC helper ──────────────────────────────────────────────────────────── test_that(".ae_auc returns value in [0, 1]", { set.seed(10L) y <- sample(0:1, 60L, replace = TRUE) pred <- runif(60L) auc <- AutoEnsML:::.ae_auc(y, pred) expect_gte(auc, 0) expect_lte(auc, 1) }) ## ── 12. RNG state not permanently altered ───────────────────────────────────── test_that("auto_ensml restores RNG state after execution", { skip_on_cran() df <- .fs() set.seed(99L); before <- runif(1L) set.seed(99L) auto_ensml( overall_score ~ instructor_rating, data = df, task = "regression", n_folds = 3L, methods = "simple_avg", parallel = FALSE, verbose = FALSE ) after <- runif(1L) expect_equal(before, after, tolerance = 1e-10) }) ## ── 13. feedback_scores function ────────────────────────────────────────────── test_that("feedback_scores() returns correct structure", { fs <- feedback_scores() # call as function expect_s3_class(fs, "data.frame") expect_equal(nrow(fs), 200L) expect_equal(ncol(fs), 9L) expect_true(all(c("overall_score", "sentiment_binary", "session_type") %in% names(fs))) expect_true(all(fs$sentiment_binary %in% c(0L, 1L))) expect_true(all(fs$overall_score >= 1 & fs$overall_score <= 5)) expect_true(all(fs$session_type %in% c("workshop", "lecture", "seminar"))) })