# Tests for test_chisq function
test_that("test_chisq returns correct structure", {
  result <- test_chisq(mtcars, "cyl", "vs")
  
  expect_true(tibble::is_tibble(result))
  expect_true(all(c("col_x", "col_y", "p") %in% names(result)))
  expect_equal(result$col_x, "cyl")
  expect_equal(result$col_y, "vs")
})

test_that("test_chisq handles NA filtering", {
  df <- mtcars
  df$cyl[1:3] <- NA
  
  result <- test_chisq(df, "cyl", "vs", na_x = NA)
  
  expect_true(tibble::is_tibble(result))
  expect_true(!is.na(result$p))
})

test_that("test_chisq returns significant result for related variables", {
  # cyl and vs are clearly related in mtcars
  result <- test_chisq(mtcars, "cyl", "vs")
  
  # Should be highly significant
  expect_true(result$p < 0.05)
})

# Tests for calc_pc_loglin function
test_that("calc_pc_loglin calculates percentage impact", {
  # Create simple linear model with log transform
  df <- data.frame(
    y = exp(1 + 0.5 * 1:10 + rnorm(10, 0, 0.1)),
    x = 1:10
  )
  model <- lm(log(y) ~ x, data = df)
  
  result <- calc_pc_loglin(model)
  
  expect_true(tibble::is_tibble(result))
  expect_true(all(c("var", "coef", "pc_impact") %in% names(result)))
  expect_equal(nrow(result), 2)  # Intercept + x
})

test_that("calc_pc_loglin percentage impact formula is correct", {
  # Create model with known coefficient
  df <- data.frame(y = 1:10, x = 1:10)
  model <- lm(log(y) ~ x, data = df)
  
  result <- calc_pc_loglin(model)
  
  # pc_impact should be exp(coef) - 1
  expect_equal(
    result$pc_impact,
    exp(result$coef) - 1,
    tolerance = 0.0001
  )
})

# Tests for run_hclust function
test_that("run_hclust returns hclust object", {
  df <- iris[, 1:4]
  result <- run_hclust(df)
  
  expect_s3_class(result, "hclust")
})
  
test_that("run_hclust respects method parameter", {
  df <- iris[1:20, 1:4]
  
  result_complete <- run_hclust(df, method = "complete")
  result_single <- run_hclust(df, method = "single")
  
  expect_s3_class(result_complete, "hclust")
  expect_s3_class(result_single, "hclust")
  
  # Different methods should give different results
  expect_false(identical(result_complete$height, result_single$height))
})

test_that("run_hclust respects dmeth parameter", {
  df <- iris[1:20, 1:4]
  
  result_euclidean <- run_hclust(df, dmeth = "euclidean")
  result_manhattan <- run_hclust(df, dmeth = "manhattan")
  
  expect_s3_class(result_euclidean, "hclust")
  expect_s3_class(result_manhattan, "hclust")
})

# Tests for ttest_nps function
test_that("ttest_nps returns margin of error", {
  set.seed(123)
  x <- sample(c(-100, 0, 100), 100, replace = TRUE)
  
  # Capture the result (suppress messages)
  result <- suppressMessages(ttest_nps(x))
  
  expect_type(result, "double")
  expect_true(result > 0)  # Margin of error should be positive
})

test_that("ttest_nps respects confidence level", {
  set.seed(123)
  x <- sample(c(-100, 0, 100), 100, replace = TRUE)
  
  result_95 <- suppressMessages(ttest_nps(x, conf_level = 0.95))
  result_99 <- suppressMessages(ttest_nps(x, conf_level = 0.99))
  
  # Higher confidence level = wider interval
  expect_true(result_99 > result_95)
})

# Tests for split_tt function
test_that("split_tt splits data correctly", {
  df <- data.frame(x = 1:100, y = 101:200)
  
  result <- split_tt(df, 0.7)
  
  expect_type(result, "list")
  expect_true(all(c("train", "test") %in% names(result)))
  
  # Check proportions are roughly correct
  expect_equal(nrow(result$train), 70)
  expect_equal(nrow(result$test), 30)
  
  # No overlap between train and test
  train_rows <- rownames(result$train)
  test_rows <- rownames(result$test)
  expect_equal(length(intersect(train_rows, test_rows)), 0)
})

test_that("split_tt handles different proportions", {
  set.seed(123)
  df <- data.frame(x = 1:100)
  
  result_80 <- split_tt(df, 0.8)
  result_50 <- split_tt(df, 0.5)
  
  # split_tt now preserves data frame structure with drop = FALSE
  expect_equal(nrow(result_80$train), 80)
  expect_equal(nrow(result_50$train), 50)
})

test_that("split_tt preserves all columns", {
  df <- data.frame(a = 1:10, b = 11:20, c = letters[1:10])
  
  result <- split_tt(df, 0.6)
  
  expect_equal(names(result$train), names(df))
  expect_equal(names(result$test), names(df))
})