library(testthat)
library(dplyr)

# Prevent Rplots.pdf from being created by graphics calls
grDevices::pdf(file = NULL)
withr::defer(grDevices::dev.off())

# small dataset for tests
set.seed(123)
data(iris)

test_that("fastexplore errors with non-data.frame input", {
  expect_error(
    fastexplore("not a data frame"),
    "'data' must be a data.frame"
  )
})

test_that("fastexplore errors with empty data", {
  expect_error(
    fastexplore(iris[0, ]),
    "'data' has no rows to explore"
  )
})

test_that("fastexplore errors with missing label column", {
  expect_error(
    fastexplore(iris, label = "nonexistent"),
    "Label column 'nonexistent' is not present in data"
  )
})

test_that("fastexplore returns expected structure", {
  result <- fastexplore(iris, label = "Species", pairwise_matrix = FALSE)

  expect_true(is.list(result))
  expect_true("data_overview" %in% names(result))
  expect_true("summary_stats" %in% names(result))
  expect_true("missing_data" %in% names(result))
  expect_true("correlation_matrix" %in% names(result))
  expect_true("plots" %in% names(result))
})

test_that("fastexplore data_overview contains expected elements", {
  result <- fastexplore(iris, pairwise_matrix = FALSE)

  expect_true("dimensions" %in% names(result$data_overview))
  expect_true("types" %in% names(result$data_overview))
  expect_true("unique_values" %in% names(result$data_overview))
  expect_true("head" %in% names(result$data_overview))

  expect_equal(result$data_overview$dimensions$Rows, 150)
  expect_equal(result$data_overview$dimensions$Columns, 5)
})

test_that("fastexplore computes summary statistics", {
  result <- fastexplore(iris, pairwise_matrix = FALSE)

  expect_true(!is.null(result$summary_stats))
  expect_true("Column" %in% names(result$summary_stats))
  expect_true("Mean" %in% names(result$summary_stats))
  expect_true("Median" %in% names(result$summary_stats))
  expect_true("SD" %in% names(result$summary_stats))
})

test_that("fastexplore detects missing data", {
  iris_with_na <- iris
  iris_with_na$Sepal.Length[1:10] <- NA

  result <- fastexplore(iris_with_na, pairwise_matrix = FALSE, use_upset_missing = FALSE)

  missing_row <- result$missing_data[result$missing_data$Column == "Sepal.Length", ]
  expect_equal(missing_row$Missing[[1]], 10)
})

test_that("fastexplore computes correlation matrix", {
  result <- fastexplore(iris, pairwise_matrix = FALSE)

  expect_true(!is.null(result$correlation_matrix))
  expect_true(is.matrix(result$correlation_matrix))
  expect_equal(nrow(result$correlation_matrix), 4)  # 4 numeric columns
})

test_that("fastexplore detects high correlations", {
  result <- fastexplore(iris, corr_threshold = 0.8, pairwise_matrix = FALSE)

  expect_true(!is.null(result$high_corr_pairs))
  # Sepal.Length and Petal.Length should be highly correlated
  expect_true(nrow(result$high_corr_pairs) > 0)
})

test_that("fastexplore generates frequency tables for factors", {
  result <- fastexplore(iris, pairwise_matrix = FALSE)

  expect_true(!is.null(result$freq_tables))
  expect_true("Species" %in% names(result$freq_tables))
  expect_equal(nrow(result$freq_tables$Species), 3)  # 3 species
})

test_that("fastexplore detects class imbalance when label is provided", {
  result <- fastexplore(iris, label = "Species", pairwise_matrix = FALSE)

  expect_true(!is.null(result$class_imbalance))
  expect_equal(nrow(result$class_imbalance), 3)
  expect_equal(sum(result$class_imbalance$count), 150)
})

test_that("fastexplore detects duplicated rows", {
  # Create a dataset without pre-existing duplicates
  df_base <- data.frame(
    a = 1:100,
    b = rnorm(100),
    c = letters[rep(1:10, 10)]
  )
  df_with_dups <- rbind(df_base, df_base[1:5, ])

  result <- fastexplore(df_with_dups, pairwise_matrix = FALSE)

  expect_equal(result$duplicated_rows, 5)
  expect_true(!is.null(result$duplicated_examples))
})

test_that("fastexplore performs IQR outlier detection", {
  result <- fastexplore(iris, outlier_method = "iqr", pairwise_matrix = FALSE)

  expect_true(!is.null(result$outlier_summary))
  expect_true(is.numeric(result$outlier_summary))
})

test_that("fastexplore performs zscore outlier detection", {
  result <- fastexplore(iris, outlier_method = "zscore", pairwise_matrix = FALSE)

  expect_true(!is.null(result$outlier_summary))
  expect_true(is.numeric(result$outlier_summary))
})

test_that("fastexplore runs normality tests", {
  result <- fastexplore(iris, run_distribution_checks = TRUE, pairwise_matrix = FALSE)

  expect_true(!is.null(result$normality_tests))
  expect_true("Column" %in% names(result$normality_tests))
  expect_true("P_Value" %in% names(result$normality_tests))
})

test_that("fastexplore generates histogram plots", {
  result <- fastexplore(iris, visualize = "histogram", pairwise_matrix = FALSE)

  expect_true(!is.null(result$plots$histograms))
  expect_true(length(result$plots$histograms) == 4)  # 4 numeric columns
})

test_that("fastexplore generates boxplot plots", {
  result <- fastexplore(iris, visualize = "boxplot", pairwise_matrix = FALSE)

  expect_true(!is.null(result$plots$boxplots))
  expect_true(length(result$plots$boxplots) == 4)
})

test_that("fastexplore generates bar plots for factors", {
  result <- fastexplore(iris, visualize = "barplot", pairwise_matrix = FALSE)

  expect_true(!is.null(result$plots$barplots))
  expect_true("Species" %in% names(result$plots$barplots))
})

test_that("fastexplore generates correlation heatmap", {
  result <- fastexplore(iris, visualize = "heatmap", pairwise_matrix = FALSE)

  expect_true(!is.null(result$plots$correlation_heatmap))
  expect_true(inherits(result$plots$correlation_heatmap, c("gg", "ggplot")))
})

test_that("fastexplore generates scatterplots", {
  result <- fastexplore(iris, visualize = "scatterplot", pairwise_matrix = FALSE)

  expect_true(!is.null(result$plots$scatterplots))
})

test_that("fastexplore respects sample_size parameter", {
  result <- fastexplore(iris, sample_size = 50, pairwise_matrix = FALSE)

  # The result should still work

  expect_true(!is.null(result$data_overview))
})

test_that("fastexplore auto_convert_numeric converts numeric-like factors", {
  df <- data.frame(
    a = factor(c("1", "2", "3", "4", "5", "6", "7", "8")),
    b = c(1, 2, 3, 4, 5, 6, 7, 8)
  )

  result <- fastexplore(df, auto_convert_numeric = TRUE, pairwise_matrix = FALSE)

  # Column 'a' should be converted and appear in summary_stats
  expect_true("a" %in% result$summary_stats$Column)
})

test_that("fastexplore auto_convert_dates converts date-like strings", {
  df <- data.frame(
    date_col = c("2023-01-01", "2023-01-02", "2023-01-03"),
    value = c(1, 2, 3)
  )

  result <- fastexplore(df, auto_convert_dates = TRUE, pairwise_matrix = FALSE)

  # The function should run without error
  expect_true(!is.null(result))
})

test_that("fastexplore feature_engineering creates date features", {
  df <- data.frame(
    date_col = as.Date(c("2021-01-15", "2022-06-20", "2023-12-25")),
    value = c(1, 2, 3)
  )

  result <- fastexplore(df, feature_engineering = TRUE, pairwise_matrix = FALSE, run_distribution_checks = FALSE)

  # date features should be created
  expect_true("date_col_day" %in% result$data_overview$types$Column)
  expect_true("date_col_month" %in% result$data_overview$types$Column)
  expect_true("date_col_year" %in% result$data_overview$types$Column)
})

test_that("fastexplore detects zero variance columns", {
  df <- data.frame(
    constant = rep(5, 100),
    variable = rnorm(100)
  )

  # Suppress expected warning about zero standard deviation in correlation
  result <- suppressWarnings(fastexplore(df, pairwise_matrix = FALSE, run_distribution_checks = FALSE))

  expect_true("constant" %in% result$zero_variance_cols)
})

test_that("fastexplore detects potential ID columns", {
  df <- data.frame(
    id = 1:100,
    value = rnorm(100)
  )

  result <- fastexplore(df, pairwise_matrix = FALSE)

  expect_true("id" %in% result$potential_id_cols)
})

test_that("fastexplore generates grouped plots when label is factor", {
  result <- fastexplore(
    iris,
    label = "Species",
    visualize = c("histogram", "boxplot"),
    grouped_plots = TRUE,
    pairwise_matrix = FALSE
  )

  # Plots should exist
  expect_true(!is.null(result$plots$histograms))
  expect_true(!is.null(result$plots$boxplots))
})