library(testthat) data(iris) set.seed(123) train_indices <- sample(1:nrow(iris), size = 0.8 * nrow(iris)) train_data <- iris[train_indices, ] test_data <- iris[-train_indices, ] print("Train Data:") print(head(train_data)) print("Test Data:") print(head(test_data)) test_that("data_input function works correctly with iris dataset", { # Test if data frame is provided expect_error(data_input(), "Please provide a data frame.") # Test if data is actually a data frame expect_error(data_input("not a data frame"), "'data' must be a data frame.") # Test if 'n' is a positive integer expect_error(data_input(train_data, n = -1), "'n' must be a positive integer.") # Test if scaling works correctly (standardize) result <- data_input(train_data, scale_method = "standardize", preview = FALSE) expect_equal(mean(result$full_data$Sepal.Length), 0, tolerance = 0.1) expect_equal(mean(result$full_data$Sepal.Width), 0, tolerance = 0.1) expect_equal(mean(result$full_data$Petal.Length), 0, tolerance = 0.1) expect_equal(mean(result$full_data$Petal.Width), 0, tolerance = 0.1) # Test if normalization works correctly result <- data_input(train_data, scale_method = "normalize", preview = FALSE) expect_true(all(result$full_data$Sepal.Length >= 0 & result$full_data$Sepal.Length <= 1)) expect_true(all(result$full_data$Sepal.Width >= 0 & result$full_data$Sepal.Width <= 1)) expect_true(all(result$full_data$Petal.Length >= 0 & result$full_data$Petal.Length <= 1)) expect_true(all(result$full_data$Petal.Width >= 0 & result$full_data$Petal.Width <= 1)) # Test if conversion to tibble works correctly result <- data_input(train_data, as_tibble = TRUE, preview = FALSE) expect_true(inherits(result$full_data, "tbl_df")) # Test if NA removal works correctly (iris has no NA values, so it should remain the same) iris_with_na <- train_data iris_with_na$Sepal.Length[1] <- NA result <- data_input(iris_with_na, preview = FALSE) expect_true(all(!is.na(result$full_data$Sepal.Length))) # Test if outlier removal works correctly (IQR method) iris_with_outliers <- train_data iris_with_outliers$Sepal.Length[1] <- 100 # Setting an outlier result <- data_input(iris_with_outliers, remove_outliers = TRUE, preview = FALSE) # Checking if the outlier is removed (assuming 100 is an outlier) expect_true(all(result$full_data$Sepal.Length <= 10)) # Adjust this based on actual outlier threshold # Test summary and preview output result <- data_input(train_data, show_summary = TRUE, preview = TRUE) # Instead of expecting the entire summary, check if "setosa" appears in the summary expect_output(data_input(train_data, show_summary = TRUE, preview = TRUE), "setosa") expect_output(data_input(train_data, show_summary = TRUE, preview = TRUE), "versicolor") expect_output(data_input(train_data, show_summary = TRUE, preview = TRUE), "virginica") # Test error when ggplot2 is not available if (!requireNamespace("ggplot2", quietly = TRUE)) { expect_warning(data_input(train_data, plot_data = TRUE), "Package 'ggplot2' required for plotting.") } })