# Tests for Rust backend (engine="ml") and parity with R backends. # All tests skip_if_not(.rust_available()) — CI without Rust is fine. # ── Availability ───────────────────────────────────────────────────────────── test_that(".rust_available() returns logical", { result <- ml:::.rust_available() expect_type(result, "logical") expect_length(result, 1L) }) # ── Linear (Ridge) ────────────────────────────────────────────────────────── test_that("Rust linear: fit + predict regression", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "linear", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "linear") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") }) test_that("Rust linear vs R linear parity", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model_rust <- ml_fit(s$train, "mpg", algorithm = "linear", engine = "ml", seed = 42L) model_r <- ml_fit(s$train, "mpg", algorithm = "linear", engine = "r", seed = 42L) preds_rust <- predict(model_rust, newdata = s$valid) preds_r <- predict(model_r, newdata = s$valid) expect_equal(preds_rust, preds_r, tolerance = 1e-6) }) # ── Logistic ──────────────────────────────────────────────────────────────── test_that("Rust logistic: fit + predict classification", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "logistic", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "logistic") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("Rust logistic vs R logistic parity (binary)", { skip_if_not(ml:::.rust_available(), "Rust backend not available") df <- binary_df(100L) s <- ml_split(df, "churn", seed = 42L) model_rust <- ml_fit(s$train, "churn", algorithm = "logistic", engine = "ml", seed = 42L) model_r <- ml_fit(s$train, "churn", algorithm = "logistic", engine = "r", seed = 42L) preds_rust <- predict(model_rust, newdata = s$valid) preds_r <- predict(model_r, newdata = s$valid) # Parity: same predictions (may differ slightly due to optimizer) agreement <- mean(preds_rust == preds_r) expect_true(agreement >= 0.8, info = paste0("Agreement: ", agreement)) }) # ── Decision Tree ─────────────────────────────────────────────────────────── test_that("Rust decision tree: classification fit + predict", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "decision_tree", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("Rust decision tree: regression fit + predict", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "decision_tree", engine = "ml", seed = 42L) preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") }) test_that("Rust decision tree: explain returns importances", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "decision_tree", engine = "ml", seed = 42L) imp <- ml_explain(model) expect_s3_class(imp, "ml_explanation") expect_true(nrow(imp) > 0L) }) # ── Random Forest ─────────────────────────────────────────────────────────── test_that("Rust random forest: classification fit + predict", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "random_forest", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("Rust random forest: regression fit + predict", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "random_forest", engine = "ml", seed = 42L) preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") }) test_that("Rust random forest: explain returns importances", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "random_forest", engine = "ml", seed = 42L) imp <- ml_explain(model) expect_s3_class(imp, "ml_explanation") expect_true(nrow(imp) > 0L) }) # ── KNN ───────────────────────────────────────────────────────────────────── test_that("Rust knn: classification fit + predict", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "knn", engine = "ml", seed = 42L, k = 5L) expect_s3_class(model, "ml_model") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("Rust knn: regression fit + predict", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "knn", engine = "ml", seed = 42L, k = 5L) preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") }) # ── Engine parameter ──────────────────────────────────────────────────────── test_that("engine='ml' errors when Rust not available", { skip_if(ml:::.rust_available(), "Test requires Rust NOT available") s <- iris_split() expect_error( ml_fit(s$train, "Species", algorithm = "logistic", engine = "ml", seed = 42L), class = "config_error" ) }) test_that("engine='r' forces CRAN backend even when Rust is available", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "logistic", engine = "r", seed = 42L) # R logistic engine stores $models list (from .lr_fit) expect_true(is.list(model$engine)) expect_false(ml:::.is_rust_engine(model$engine)) }) test_that("engine='auto' selects Rust when available", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "logistic", engine = "auto", seed = 42L) expect_true(ml:::.is_rust_engine(model$engine)) }) # ── predict_proba ─────────────────────────────────────────────────────────── test_that("Rust logistic predict_proba returns valid probabilities", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "logistic", engine = "ml", seed = 42L) proba <- ml_predict_proba(model, s$valid) expect_true(is.data.frame(proba)) expect_equal(nrow(proba), nrow(s$valid)) # Probabilities should be non-negative and rows should sum close to 1 expect_true(all(as.matrix(proba) >= 0)) }) test_that("Rust random forest predict_proba returns valid probabilities", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "random_forest", engine = "ml", seed = 42L) proba <- ml_predict_proba(model, s$valid) expect_true(is.data.frame(proba)) expect_equal(nrow(proba), nrow(s$valid)) }) # ── Serialization (saveRDS / readRDS) ─────────────────────────────────────── test_that("Rust model survives saveRDS/readRDS round-trip", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "random_forest", engine = "ml", seed = 42L) preds_before <- predict(model, newdata = s$valid) tmp <- tempfile(fileext = ".rds") on.exit(unlink(tmp)) saveRDS(model, tmp) loaded <- readRDS(tmp) preds_after <- predict(loaded, newdata = s$valid) expect_identical(preds_before, preds_after) }) # ── criterion= smoke tests (Phases 1-2) ───────────────────────────────────── test_that("criterion='entropy' routes to Rust (decision_tree)", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "decision_tree", criterion = "entropy", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("criterion='entropy' runs without error", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() m_e <- ml_fit(s$train, "Species", algorithm = "decision_tree", criterion = "entropy", engine = "ml", seed = 42L) m_g <- ml_fit(s$train, "Species", algorithm = "decision_tree", criterion = "gini", engine = "ml", seed = 42L) preds_e <- predict(m_e, newdata = s$valid) preds_g <- predict(m_g, newdata = s$valid) # Both must produce valid predictions (same length) expect_length(preds_e, nrow(s$valid)) expect_length(preds_g, nrow(s$valid)) # Entropy and gini are distinct criteria — at least one fit should succeed expect_true(is.character(preds_e) || is.integer(preds_e) || is.numeric(preds_e)) }) # ── algorithm="extra_trees" smoke tests (Phase 2) ────────────────────────── test_that("extra_trees classification routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "extra_trees", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "extra_trees") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("extra_trees regression routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "extra_trees", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") }) test_that("extra_trees predict_proba returns valid probabilities", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "extra_trees", engine = "ml", seed = 42L) proba <- ml_predict_proba(model, s$valid) expect_true(is.data.frame(proba)) expect_equal(nrow(proba), nrow(s$valid)) expect_true(all(as.matrix(proba) >= 0)) }) # ── multi_class="softmax" smoke tests (Phase 3) ────────────────────────── test_that("multi_class='softmax' logistic works in R", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "logistic", multi_class = "softmax", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) proba <- ml_predict_proba(model, s$valid) row_sums <- rowSums(as.matrix(proba)) expect_true(all(abs(row_sums - 1.0) < 1e-9)) }) # ── monotone_cst smoke tests (Phase 4) ─────────────────────────────────── test_that("monotone_cst integer vector wires through to decision_tree reg", { skip_if_not(ml:::.rust_available(), "Rust backend not available") withr::local_seed(42L) n <- 100L x <- sort(runif(n)) df <- data.frame(x = x, y = x + rnorm(n, sd = 0.1)) s <- ml_split(df, "y", seed = 42L) model <- ml_fit(s$train, "y", algorithm = "decision_tree", monotone_cst = 1L, engine = "ml", seed = 42L) test_x <- data.frame(x = seq(0, 1, length.out = 50L)) preds <- predict(model, newdata = test_x) diffs <- diff(preds) expect_true(all(diffs >= -1e-9), info = paste("monotone violations:", sum(diffs < -1e-9))) }) # ── algorithm="gradient_boosting" smoke tests (GBT parity) ────────────────── test_that("gradient_boosting classification routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "gradient_boosting", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "gradient_boosting") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_true(all(preds %in% levels(s$train$Species))) }) test_that("gradient_boosting regression routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "gradient_boosting", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") }) test_that("gradient_boosting predict_proba returns valid probabilities", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "gradient_boosting", engine = "ml", seed = 42L) proba <- ml_predict_proba(model, s$valid) expect_true(is.data.frame(proba)) expect_equal(nrow(proba), nrow(s$valid)) expect_equal(ncol(proba), 3L) # 3 species row_sums <- rowSums(as.matrix(proba)) expect_true(all(abs(row_sums - 1.0) < 1e-5)) expect_true(all(as.matrix(proba) >= 0)) }) test_that("gradient_boosting feature importances sum to 1", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "gradient_boosting", engine = "ml", seed = 42L) exp <- ml_explain(model) expect_s3_class(exp, "ml_explanation") expect_equal(nrow(exp), length(model$features)) expect_true(abs(sum(exp$importance) - 1.0) < 1e-5) }) test_that("gradient_boosting n_estimators and learning_rate params accepted", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "gradient_boosting", n_estimators = 20L, learning_rate = 0.05, engine = "ml", seed = 42L) preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("gradient_boosting serializes and deserializes via saveRDS", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "gradient_boosting", engine = "ml", seed = 42L) preds_before <- predict(model, newdata = s$valid) tmp <- tempfile(fileext = ".rds") saveRDS(model, tmp) model2 <- readRDS(tmp) preds_after <- predict(model2, newdata = s$valid) expect_equal(preds_before, preds_after) }) # ── Naive Bayes ───────────────────────────────────────────────────────────── test_that("naive_bayes classification routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "naive_bayes", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "naive_bayes") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_true(ml:::.is_rust_engine(model$engine)) }) test_that("naive_bayes predict_proba returns valid probabilities", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "naive_bayes", engine = "ml", seed = 42L) proba <- ml_predict_proba(model, s$valid) expect_true(is.data.frame(proba)) expect_equal(nrow(proba), nrow(s$valid)) expect_equal(ncol(proba), 3L) expect_true(all(as.matrix(proba) >= 0)) row_sums <- rowSums(as.matrix(proba)) expect_true(all(abs(row_sums - 1.0) < 1e-5)) }) test_that("naive_bayes serializes and deserializes via saveRDS", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "naive_bayes", engine = "ml", seed = 42L) preds_before <- predict(model, newdata = s$valid) tmp <- tempfile(fileext = ".rds") on.exit(unlink(tmp)) saveRDS(model, tmp) model2 <- readRDS(tmp) preds_after <- predict(model2, newdata = s$valid) expect_identical(preds_before, preds_after) }) # ── Elastic Net ────────────────────────────────────────────────────────────── test_that("elastic_net regression routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "elastic_net", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "elastic_net") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") expect_true(ml:::.is_rust_engine(model$engine)) }) test_that("elastic_net alpha param accepted", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "elastic_net", alpha = 0.5, l1_ratio = 0.5, engine = "ml", seed = 42L) preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) }) test_that("elastic_net serializes and deserializes via saveRDS", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "elastic_net", engine = "ml", seed = 42L) preds_before <- predict(model, newdata = s$valid) tmp <- tempfile(fileext = ".rds") on.exit(unlink(tmp)) saveRDS(model, tmp) model2 <- readRDS(tmp) preds_after <- predict(model2, newdata = s$valid) expect_equal(preds_before, preds_after, tolerance = 1e-9) }) # ── AdaBoost ───────────────────────────────────────────────────────────────── test_that("adaboost classification routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "adaboost", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "adaboost") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_true(ml:::.is_rust_engine(model$engine)) }) test_that("adaboost predict_proba returns valid probabilities", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "adaboost", engine = "ml", seed = 42L) proba <- ml_predict_proba(model, s$valid) expect_true(is.data.frame(proba)) expect_equal(nrow(proba), nrow(s$valid)) expect_equal(ncol(proba), 3L) expect_true(all(as.matrix(proba) >= 0)) }) test_that("adaboost feature importances sum to 1", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "adaboost", engine = "ml", seed = 42L) exp <- ml_explain(model) expect_s3_class(exp, "ml_explanation") expect_equal(nrow(exp), length(model$features)) expect_true(abs(sum(exp$importance) - 1.0) < 1e-5) }) test_that("adaboost serializes and deserializes via saveRDS", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "adaboost", engine = "ml", seed = 42L) preds_before <- predict(model, newdata = s$valid) tmp <- tempfile(fileext = ".rds") on.exit(unlink(tmp)) saveRDS(model, tmp) model2 <- readRDS(tmp) preds_after <- predict(model2, newdata = s$valid) expect_identical(preds_before, preds_after) }) # ── SVM ────────────────────────────────────────────────────────────────────── test_that("svm classification routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "svm", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "svm") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_true(ml:::.is_rust_engine(model$engine)) }) test_that("svm regression routes to Rust", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- mtcars_split() model <- ml_fit(s$train, "mpg", algorithm = "svm", engine = "ml", seed = 42L) expect_s3_class(model, "ml_model") expect_equal(model$algorithm, "svm") preds <- predict(model, newdata = s$valid) expect_length(preds, nrow(s$valid)) expect_type(preds, "double") }) test_that("svm predict_proba returns valid probabilities", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "svm", engine = "ml", seed = 42L) proba <- ml_predict_proba(model, s$valid) expect_true(is.data.frame(proba)) expect_equal(nrow(proba), nrow(s$valid)) expect_equal(ncol(proba), 3L) expect_true(all(as.matrix(proba) >= 0)) }) test_that("svm serializes and deserializes via saveRDS", { skip_if_not(ml:::.rust_available(), "Rust backend not available") s <- iris_split() model <- ml_fit(s$train, "Species", algorithm = "svm", engine = "ml", seed = 42L) preds_before <- predict(model, newdata = s$valid) tmp <- tempfile(fileext = ".rds") on.exit(unlink(tmp)) saveRDS(model, tmp) model2 <- readRDS(tmp) preds_after <- predict(model2, newdata = s$valid) expect_identical(preds_before, preds_after) })