# Tests for predict.C5.0 function # --- Basic Predictions --- test_that("predict returns class predictions from tree model", { set.seed(5001) dat <- make_two_class_data(100, seed = 5001) mod <- C5.0(dat[1:80, -1], dat$y[1:80]) pred <- predict(mod, dat[81:100, -1]) expect_s3_class(pred, "factor") expect_length(pred, 20) expect_equal(levels(pred), c("A", "B")) }) test_that("predict returns probability predictions from tree model", { set.seed(5002) dat <- make_two_class_data(100, seed = 5002) mod <- C5.0(dat[1:80, -1], dat$y[1:80]) pred <- predict(mod, dat[81:100, -1], type = "prob") expect_true(is.matrix(pred)) expect_equal(nrow(pred), 20) expect_equal(ncol(pred), 2) expect_equal(colnames(pred), c("A", "B")) # Probabilities should sum to 1 row_sums <- rowSums(pred) expect_true(all(abs(row_sums - 1) < 1e-6)) }) test_that("predict works from rules model", { set.seed(5003) dat <- make_two_class_data(100, seed = 5003) mod <- C5.0(dat[1:80, -1], dat$y[1:80], rules = TRUE) pred_class <- predict(mod, dat[81:100, -1]) expect_s3_class(pred_class, "factor") expect_length(pred_class, 20) pred_prob <- predict(mod, dat[81:100, -1], type = "prob") expect_true(is.matrix(pred_prob)) }) test_that("predict works from boosted model", { set.seed(5004) dat <- make_two_class_data(150, seed = 5004) mod <- C5.0(dat[1:100, -1], dat$y[1:100], trials = 5) pred <- predict(mod, dat[101:150, -1]) expect_s3_class(pred, "factor") expect_length(pred, 50) }) test_that("predict works with specific trials value", { set.seed(5005) dat <- make_two_class_data(150, seed = 5005) mod <- C5.0(dat[1:100, -1], dat$y[1:100], trials = 10) # Use trials <= actual to avoid warning actual_trials <- mod$trials["Actual"] if (actual_trials > 1) { pred <- predict(mod, dat[101:150, -1], trials = 1) expect_s3_class(pred, "factor") expect_length(pred, 50) } else { # Model early-stopped to 1 trial, just use default pred <- predict(mod, dat[101:150, -1]) expect_s3_class(pred, "factor") expect_length(pred, 50) } }) test_that("predict works from formula-fitted model", { set.seed(5006) dat <- make_two_class_data(100, seed = 5006) mod <- C5.0(y ~ ., data = dat[1:80, ]) pred <- predict(mod, dat[81:100, ]) expect_s3_class(pred, "factor") expect_length(pred, 20) }) test_that("predict handles NA values via na.action", { set.seed(5007) dat <- make_two_class_data(100, seed = 5007) mod <- C5.0(y ~ ., data = dat[1:80, ]) # Add some NAs to test data test_dat <- dat[81:100, ] test_dat$x1[1] <- NA pred <- predict(mod, test_dat, na.action = na.pass) expect_s3_class(pred, "factor") expect_length(pred, 20) }) test_that("predict works with multiclass outcome", { set.seed(5008) dat <- make_multi_class_data(150, n_classes = 4, seed = 5008) mod <- C5.0(dat[1:100, -1], dat$y[1:100]) pred_class <- predict(mod, dat[101:150, -1]) expect_s3_class(pred_class, "factor") expect_equal(levels(pred_class), c("A", "B", "C", "D")) pred_prob <- predict(mod, dat[101:150, -1], type = "prob") expect_equal(ncol(pred_prob), 4) expect_equal(colnames(pred_prob), c("A", "B", "C", "D")) }) test_that("predict works with case weights model", { set.seed(5009) dat <- make_two_class_data(100, seed = 5009) wts <- runif(80, 0.5, 2) mod <- C5.0(dat[1:80, -1], dat$y[1:80], weights = wts) pred <- predict(mod, dat[81:100, -1]) expect_s3_class(pred, "factor") expect_length(pred, 20) }) # --- Error Conditions --- test_that("predict errors on invalid type", { set.seed(6001) dat <- make_two_class_data(100, seed = 6001) mod <- C5.0(dat[, -1], dat$y) expect_snapshot( error = TRUE, predict(mod, dat[, -1], type = "invalid") ) }) test_that("predict errors on prob with costs", { set.seed(6002) dat <- make_two_class_data(100, seed = 6002) costs <- make_cost_matrix(c("A", "B")) mod <- C5.0(dat[, -1], dat$y, costs = costs) expect_snapshot( error = TRUE, predict(mod, dat[, -1], type = "prob") ) }) test_that("predict errors on NULL newdata", { set.seed(6003) dat <- make_two_class_data(100, seed = 6003) mod <- C5.0(dat[, -1], dat$y) expect_snapshot( error = TRUE, predict(mod, newdata = NULL) ) }) test_that("predict errors on missing column names", { set.seed(6004) dat <- make_two_class_data(100, seed = 6004) mod <- C5.0(dat[, -1], dat$y) newdata <- as.matrix(dat[, -1]) colnames(newdata) <- NULL expect_snapshot( error = TRUE, predict(mod, newdata) ) }) test_that("predict errors on multiple trials values", { set.seed(6005) dat <- make_two_class_data(100, seed = 6005) mod <- C5.0(dat[, -1], dat$y, trials = 5) expect_snapshot( error = TRUE, predict(mod, dat[, -1], trials = c(1, 2, 3)) ) }) test_that("predict errors on non-positive trials", { set.seed(6006) dat <- make_two_class_data(100, seed = 6006) mod <- C5.0(dat[, -1], dat$y, trials = 5) expect_snapshot( error = TRUE, predict(mod, dat[, -1], trials = 0) ) }) # --- Warning Conditions --- test_that("predict warns when trials exceeds actual", { set.seed(7001) dat <- make_two_class_data(150, seed = 7001) mod <- C5.0(dat[, -1], dat$y, trials = 5) expect_snapshot_warning( predict(mod, dat[, -1], trials = 100) ) }) # --- Additional Prediction Coverage Tests --- test_that("predict handles data with missing values", { set.seed(7101) n <- 100 dat <- make_two_class_data(n, seed = 7101) mod <- C5.0(dat[1:80, -1], dat$y[1:80]) # Test data with NAs test_dat <- dat[81:100, -1] test_dat$x1[1:3] <- NA test_dat$x2[4:5] <- NA pred <- predict(mod, test_dat) expect_s3_class(pred, "factor") expect_length(pred, 20) }) test_that("predict works with rules model and probabilities", { set.seed(7102) dat <- make_two_class_data(150, seed = 7102) mod <- C5.0(dat[1:100, -1], dat$y[1:100], rules = TRUE) pred_prob <- predict(mod, dat[101:150, -1], type = "prob") expect_true(is.matrix(pred_prob)) expect_equal(nrow(pred_prob), 50) # Probabilities should sum to 1 row_sums <- rowSums(pred_prob) expect_true(all(abs(row_sums - 1) < 1e-6)) }) test_that("predict works with boosted rules model", { set.seed(7103) dat <- make_two_class_data(200, seed = 7103) mod <- C5.0(dat[1:150, -1], dat$y[1:150], trials = 5, rules = TRUE) pred <- predict(mod, dat[151:200, -1]) expect_s3_class(pred, "factor") expect_length(pred, 50) }) test_that("predict preserves row names in probability output", { set.seed(7104) dat <- make_two_class_data(100, seed = 7104) mod <- C5.0(dat[1:80, -1], dat$y[1:80]) test_dat <- dat[81:100, -1] rownames(test_dat) <- paste0("case_", 81:100) pred_prob <- predict(mod, test_dat, type = "prob") expect_equal(rownames(pred_prob), paste0("case_", 81:100)) }) test_that("predict works with ordered factor predictors", { set.seed(7105) n <- 150 dat <- data.frame( y = factor(sample(c("low", "high"), n, replace = TRUE)), x1 = rnorm(n), x2 = ordered( sample(c("small", "medium", "large"), n, replace = TRUE), levels = c("small", "medium", "large") ) ) mod <- C5.0(dat[1:100, -1], dat$y[1:100]) pred <- predict(mod, dat[101:150, -1]) expect_s3_class(pred, "factor") }) test_that("predict works with model trained using sampling", { set.seed(7106) n <- 200 dat <- make_two_class_data(n, seed = 7106) mod <- C5.0( dat[1:150, -1], dat$y[1:150], control = C5.0Control(sample = 0.7, seed = 42) ) pred <- predict(mod, dat[151:200, -1]) expect_s3_class(pred, "factor") expect_length(pred, 50) }) test_that("predict works with model trained using winnowing", { set.seed(7107) n <- 200 dat <- make_two_class_data(n, seed = 7107) mod <- C5.0( dat[1:150, -1], dat$y[1:150], control = C5.0Control(winnow = TRUE) ) pred <- predict(mod, dat[151:200, -1]) expect_s3_class(pred, "factor") }) test_that("predict works with model trained using fuzzyThreshold", { set.seed(7108) n <- 200 dat <- make_two_class_data(n, seed = 7108) mod <- C5.0( dat[1:150, -1], dat$y[1:150], control = C5.0Control(fuzzyThreshold = TRUE) ) pred_class <- predict(mod, dat[151:200, -1]) pred_prob <- predict(mod, dat[151:200, -1], type = "prob") expect_s3_class(pred_class, "factor") expect_true(is.matrix(pred_prob)) })