library(testthat)


data(iris)
set.seed(123)
train_indices <- sample(1:nrow(iris), size = 0.8 * nrow(iris))
train_data <- iris[train_indices, ]
test_data <- iris[-train_indices, ]


print("Train Data:")
print(head(train_data))
print("Test Data:")
print(head(test_data))
test_that("data_input function works correctly with iris dataset", {
  # Test if data frame is provided
  expect_error(data_input(), "Please provide a data frame.")

  # Test if data is actually a data frame
  expect_error(data_input("not a data frame"), "'data' must be a data frame.")

  # Test if 'n' is a positive integer
  expect_error(data_input(train_data, n = -1), "'n' must be a positive integer.")

  # Test if scaling works correctly (standardize)
  result <- data_input(train_data, scale_method = "standardize", preview = FALSE)
  expect_equal(mean(result$full_data$Sepal.Length), 0, tolerance = 0.1)
  expect_equal(mean(result$full_data$Sepal.Width), 0, tolerance = 0.1)
  expect_equal(mean(result$full_data$Petal.Length), 0, tolerance = 0.1)
  expect_equal(mean(result$full_data$Petal.Width), 0, tolerance = 0.1)

  # Test if normalization works correctly
  result <- data_input(train_data, scale_method = "normalize", preview = FALSE)
  expect_true(all(result$full_data$Sepal.Length >= 0 & result$full_data$Sepal.Length <= 1))
  expect_true(all(result$full_data$Sepal.Width >= 0 & result$full_data$Sepal.Width <= 1))
  expect_true(all(result$full_data$Petal.Length >= 0 & result$full_data$Petal.Length <= 1))
  expect_true(all(result$full_data$Petal.Width >= 0 & result$full_data$Petal.Width <= 1))

  # Test if conversion to tibble works correctly
  result <- data_input(train_data, as_tibble = TRUE, preview = FALSE)
  expect_true(inherits(result$full_data, "tbl_df"))

  # Test if NA removal works correctly (iris has no NA values, so it should remain the same)
  iris_with_na <- train_data
  iris_with_na$Sepal.Length[1] <- NA
  result <- data_input(iris_with_na, preview = FALSE)
  expect_true(all(!is.na(result$full_data$Sepal.Length)))

  # Test if outlier removal works correctly (IQR method)
  iris_with_outliers <- train_data
  iris_with_outliers$Sepal.Length[1] <- 100  # Setting an outlier
  result <- data_input(iris_with_outliers, remove_outliers = TRUE, preview = FALSE)
  # Checking if the outlier is removed (assuming 100 is an outlier)
  expect_true(all(result$full_data$Sepal.Length <= 10)) # Adjust this based on actual outlier threshold

  # Test summary and preview output
  result <- data_input(train_data, show_summary = TRUE, preview = TRUE)
  # Instead of expecting the entire summary, check if "setosa" appears in the summary
  expect_output(data_input(train_data, show_summary = TRUE, preview = TRUE), "setosa")
  expect_output(data_input(train_data, show_summary = TRUE, preview = TRUE), "versicolor")
  expect_output(data_input(train_data, show_summary = TRUE, preview = TRUE), "virginica")

  # Test error when ggplot2 is not available
  if (!requireNamespace("ggplot2", quietly = TRUE)) {
    expect_warning(data_input(train_data, plot_data = TRUE), "Package 'ggplot2' required for plotting.")
  }
})