library(testthat) library(rpart) library(ggplot2) # Create a sample dataset (iris is fine for this purpose) data(iris) # Split the data into 80% training and 20% testing set.seed(123) # Set seed for reproducibility train_indices <- sample(1:nrow(iris), size = 0.8 * nrow(iris)) train_data <- iris[train_indices, ] test_data <- iris[-train_indices, ] test_that("decision_tree_analysis and prediction work correctly", { # 1. Test if decision tree model fits correctly formula <- Species ~ Sepal.Length + Sepal.Width model_result <- decision_tree_analysis(train_data, formula) # Ensure that the model is an rpart object (directly returned, not in a list) expect_s3_class(model_result, "rpart") # Check if the model has learned from the data (i.e., not empty) expect_true(length(model_result$frame) > 0) # 2. Test if prediction works correctly on the fitted model predictions <- decision_tree_predict(model_result, test_data) # Check that the predictions are of correct length expect_equal(length(predictions), nrow(test_data)) # Check that predictions are of the correct class type (factor) expect_true(is.factor(predictions)) # Test if predictions match the actual levels in the response variable expect_true(all(predictions %in% levels(test_data$Species))) }) test_that("decision_tree_plot works correctly", { # 3. Test if decision tree plot works for 2 predictors formula_2_predictors <- Species ~ Sepal.Length + Sepal.Width model_result_2 <- decision_tree_analysis(train_data, formula_2_predictors) # Test if plot function executes without errors expect_silent(decision_tree_plot(model_result_2, train_data, formula_2_predictors)) # 4. Test if decision tree plot works with more than 2 predictors formula_multiple_predictors <- Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width model_result_multi <- decision_tree_analysis(train_data, formula_multiple_predictors) # Test if plot function executes without errors expect_silent(decision_tree_plot(model_result_multi, train_data, formula_multiple_predictors)) }) test_that("handling missing values in decision tree model", { # 5. Test if model handles missing values correctly iris_with_na <- train_data iris_with_na$Sepal.Length[1] <- NA # Add NA value to a column formula_with_na <- Species ~ Sepal.Length + Sepal.Width model_result_na <- decision_tree_analysis(iris_with_na, formula_with_na) # Check if the model still fits even with NAs in the data (may handle internally) expect_s3_class(model_result_na, "rpart") # Test if prediction works with missing data predictions_with_na <- decision_tree_predict(model_result_na, iris_with_na) expect_equal(length(predictions_with_na), nrow(iris_with_na)) }) test_that("decision_tree_predict handles new unseen data", { # 6. Test if prediction works on new unseen data formula_for_train <- Species ~ Sepal.Length + Sepal.Width model_result_train <- decision_tree_analysis(train_data, formula_for_train) # Use a subset of the original dataset for testing the prediction new_data <- test_data[, c("Sepal.Length", "Sepal.Width", "Species")] # Make predictions using the model on the new data predictions_new_data <- decision_tree_predict(model_result_train, new_data) # Ensure predictions match the length of the new data expect_equal(length(predictions_new_data), nrow(new_data)) # Ensure the predictions are valid class labels expect_true(all(predictions_new_data %in% levels(test_data$Species))) })