library(testthat)
library(rpart)
library(ggplot2)

# Create a sample dataset (iris is fine for this purpose)
data(iris)

# Split the data into 80% training and 20% testing
set.seed(123)  # Set seed for reproducibility
train_indices <- sample(1:nrow(iris), size = 0.8 * nrow(iris))
train_data <- iris[train_indices, ]
test_data <- iris[-train_indices, ]

test_that("decision_tree_analysis and prediction work correctly", {
  # 1. Test if decision tree model fits correctly
  formula <- Species ~ Sepal.Length + Sepal.Width
  model_result <- decision_tree_analysis(train_data, formula)

  # Ensure that the model is an rpart object (directly returned, not in a list)
  expect_s3_class(model_result, "rpart")

  # Check if the model has learned from the data (i.e., not empty)
  expect_true(length(model_result$frame) > 0)

  # 2. Test if prediction works correctly on the fitted model
  predictions <- decision_tree_predict(model_result, test_data)

  # Check that the predictions are of correct length
  expect_equal(length(predictions), nrow(test_data))

  # Check that predictions are of the correct class type (factor)
  expect_true(is.factor(predictions))

  # Test if predictions match the actual levels in the response variable
  expect_true(all(predictions %in% levels(test_data$Species)))
})

test_that("decision_tree_plot works correctly", {
  # 3. Test if decision tree plot works for 2 predictors
  formula_2_predictors <- Species ~ Sepal.Length + Sepal.Width
  model_result_2 <- decision_tree_analysis(train_data, formula_2_predictors)

  # Test if plot function executes without errors
  expect_silent(decision_tree_plot(model_result_2, train_data, formula_2_predictors))

  # 4. Test if decision tree plot works with more than 2 predictors
  formula_multiple_predictors <- Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
  model_result_multi <- decision_tree_analysis(train_data, formula_multiple_predictors)

  # Test if plot function executes without errors
  expect_silent(decision_tree_plot(model_result_multi, train_data, formula_multiple_predictors))
})

test_that("handling missing values in decision tree model", {
  # 5. Test if model handles missing values correctly
  iris_with_na <- train_data
  iris_with_na$Sepal.Length[1] <- NA  # Add NA value to a column

  formula_with_na <- Species ~ Sepal.Length + Sepal.Width
  model_result_na <- decision_tree_analysis(iris_with_na, formula_with_na)

  # Check if the model still fits even with NAs in the data (may handle internally)
  expect_s3_class(model_result_na, "rpart")

  # Test if prediction works with missing data
  predictions_with_na <- decision_tree_predict(model_result_na, iris_with_na)
  expect_equal(length(predictions_with_na), nrow(iris_with_na))
})

test_that("decision_tree_predict handles new unseen data", {
  # 6. Test if prediction works on new unseen data
  formula_for_train <- Species ~ Sepal.Length + Sepal.Width
  model_result_train <- decision_tree_analysis(train_data, formula_for_train)

  # Use a subset of the original dataset for testing the prediction
  new_data <- test_data[, c("Sepal.Length", "Sepal.Width", "Species")]

  # Make predictions using the model on the new data
  predictions_new_data <- decision_tree_predict(model_result_train, new_data)

  # Ensure predictions match the length of the new data
  expect_equal(length(predictions_new_data), nrow(new_data))

  # Ensure the predictions are valid class labels
  expect_true(all(predictions_new_data %in% levels(test_data$Species)))
})