library(testthat)
library(quickOutlier)

# ==============================================================================
# 1. TEST BÁSICOS (Univariantes)
# ==============================================================================
test_that("detect_outliers identifies correct rows", {
  df <- data.frame(val = c(1, 2, 1, 2, 100))

  res_iqr <- detect_outliers(df, "val", method = "iqr")
  expect_equal(nrow(res_iqr), 1)
  expect_equal(res_iqr$val, 100)

  res_z <- detect_outliers(df, "val", method = "zscore", threshold = 1)
  expect_equal(nrow(res_z), 1)
  expect_equal(res_z$val, 100)

  expect_error(detect_outliers(df, "columna_fantasma"))
})

test_that("treat_outliers modifies values correctly", {
  # CORRECCIÓN: Usamos datos con varianza (1, 2, 3...) para que el IQR no sea 0
  df <- data.frame(val = c(1, 2, 3, 4, 100))
  clean_df <- treat_outliers(df, "val", method = "iqr")

  # El valor 100 debe haber sido reducido (Winsorizado)
  expect_lt(clean_df$val[5], 100)
  # Pero debe seguir siendo alto (mayor o igual al máximo normal, que es 4)
  expect_gte(clean_df$val[5], 4)
})

test_that("scan_data summarizes dataset", {
  df <- data.frame(a = c(1, 100), b = c(1, 2), char = c("x", "y"))
  res <- scan_data(df, method = "iqr")

  expect_true("Column" %in% names(res))
  expect_equal(nrow(res), 2)
})

# ==============================================================================
# 2. TEST GRÁFICOS
# ==============================================================================
test_that("plot_outliers returns a ggplot object", {
  df <- data.frame(val = rnorm(20))
  p <- plot_outliers(df, "val")
  expect_s3_class(p, "ggplot")
})

# ==============================================================================
# 3. TEST AVANZADOS
# ==============================================================================
test_that("detect_multivariate uses Mahalanobis correctly", {
  df <- data.frame(x = 1:20, y = 1:20)
  outlier <- data.frame(x = 1, y = 100)
  df_final <- rbind(df, outlier)

  res <- detect_multivariate(df_final, c("x", "y"), confidence_level = 0.95)
  expect_gte(nrow(res), 1)
  expect_equal(res$y[nrow(res)], 100)
})

test_that("detect_density (LOF) works", {
  df <- data.frame(
    x = c(0.1, 0.2, 0.1, 0.2, 10),
    y = c(0.1, 0.1, 0.2, 0.2, 10)
  )
  res <- detect_density(df, k = 4, threshold = 1)
  expect_equal(nrow(res), 1)
  expect_equal(res$x, 10)
})

# ==============================================================================
# 4. TEST EXPERTOS
# ==============================================================================
test_that("detect_iforest returns scores and flags", {
  skip_if_not_installed("isotree")

  df <- data.frame(x = rnorm(100), y = rnorm(100))
  df[1, ] <- c(100, 100)

  # CORRECCIÓN: Usamos suppressWarnings para evitar el aviso de OpenMP en Mac
  res <- suppressWarnings(detect_iforest(df, ntrees = 10, contamination = 0.05))

  expect_true("If_Score" %in% names(res))
  expect_true(res$Is_Outlier[1])
})

test_that("detect_ts_outliers identifies seasonality breaks", {
  x <- seq(1, 10, length.out = 50)
  y <- sin(x)
  y[25] <- 10

  res <- detect_ts_outliers(y, frequency = 10)

  expect_true(res$Is_Outlier[25])
})

# ==============================================================================
# 5. TEST NUEVAS FUNCIONES (Categóricos e Influencia)
# ==============================================================================

test_that("detect_categorical_outliers finds rare items", {
  # Caso: "typo" aparece 1 vez de 11 total (aprox 9%)
  # Si ponemos min_freq = 0.1 (10%), debería detectarlo.
  vec <- c(rep("Normal", 10), "Typo")

  res <- detect_categorical_outliers(vec, min_freq = 0.10)

  expect_true("Is_Outlier" %in% names(res))
  # El "Typo" debe ser outlier
  expect_true(res[res$Category == "Typo", "Is_Outlier"])
  # "Normal" no debe ser outlier
  expect_false(res[res$Category == "Normal", "Is_Outlier"])
})

test_that("diagnose_influence detects high leverage points", {
  # Dataset simple lineal
  df <- data.frame(x = 1:10, y = 1:10)

  # Añadimos un punto que rompe la línea (x=20, y=0)
  # Este punto tiene mucha "palanca" (leverage)
  outlier <- data.frame(x = 20, y = 0)
  df <- rbind(df, outlier)

  res <- diagnose_influence(df, target = "y", predictor = "x")

  # El último punto (índice 11) debe ser influyente
  expect_true(res$Is_Influential[11])
  expect_gt(res$Cooks_Dist[11], 4/11) # Debe superar el umbral 4/n
})