# Tests for C5imp variable importance function

# Helper to create data that produces non-trivial trees
make_separable_data <- function(n = 200, seed = 8000) {
  set.seed(seed)
  # Create data where x1 and x2 are predictive
  x1 <- rnorm(n)
  x2 <- rnorm(n)
  x3 <- factor(sample(letters[1:3], n, replace = TRUE))
  # Make y depend on x1 and x2
  prob <- plogis(x1 + 0.5 * x2)
  y <- factor(ifelse(runif(n) < prob, "A", "B"))
  data.frame(y = y, x1 = x1, x2 = x2, x3 = x3)
}

# --- Basic Usage ---

test_that("C5imp returns usage metric by default", {
  dat <- make_separable_data(200, seed = 8001)
  mod <- C5.0(dat[, -1], dat$y)

  # Skip if tree is trivial (no splits)
  skip_if(mod$size == 1, "Tree has no splits")

  imp <- C5imp(mod)
  expect_s3_class(imp, "data.frame")
  expect_named(imp, "Overall")
  expect_true(all(rownames(imp) %in% c("x1", "x2", "x3")))
})

test_that("C5imp returns splits metric", {
  dat <- make_separable_data(200, seed = 8002)
  mod <- C5.0(dat[, -1], dat$y)

  skip_if(mod$size == 1, "Tree has no splits")

  imp <- C5imp(mod, metric = "splits")
  expect_s3_class(imp, "data.frame")
  expect_named(imp, "Overall")
})

test_that("C5imp with pct = FALSE returns raw counts for splits", {
  dat <- make_separable_data(200, seed = 8003)
  mod <- C5.0(dat[, -1], dat$y)

  skip_if(mod$size == 1, "Tree has no splits")

  imp_pct <- C5imp(mod, metric = "splits", pct = TRUE)
  imp_raw <- C5imp(mod, metric = "splits", pct = FALSE)

  expect_s3_class(imp_raw, "data.frame")
  # Pct should sum to 100 if there are splits
  if (sum(imp_pct$Overall) > 0) {
    expect_equal(sum(imp_pct$Overall), 100, tolerance = 0.01)
  }
})

test_that("C5imp works with tree model", {
  dat <- make_separable_data(200, seed = 8004)
  mod <- C5.0(dat[, -1], dat$y)

  skip_if(mod$size == 1, "Tree has no splits")

  imp <- C5imp(mod)
  expect_s3_class(imp, "data.frame")
  expect_true(nrow(imp) > 0)
})

test_that("C5imp works with rules model", {
  dat <- make_separable_data(200, seed = 8005)
  mod <- C5.0(dat[, -1], dat$y, rules = TRUE)

  # Rules might not always produce results, just check structure
  imp <- C5imp(mod)
  expect_s3_class(imp, "data.frame")
})

test_that("C5imp works with boosted model", {
  dat <- make_separable_data(300, seed = 8006)
  mod <- C5.0(dat[, -1], dat$y, trials = 10)

  imp <- C5imp(mod)
  expect_s3_class(imp, "data.frame")
})

test_that("C5imp results are sorted by importance (descending)", {
  dat <- make_separable_data(200, seed = 8007)
  mod <- C5.0(dat[, -1], dat$y)

  skip_if(mod$size == 1, "Tree has no splits")

  imp <- C5imp(mod)
  # Check that values are in descending order
  expect_true(all(diff(imp$Overall) <= 0))
})

test_that("C5imp includes all predictors", {
  dat <- make_separable_data(200, seed = 8008)
  mod <- C5.0(dat[, -1], dat$y)

  skip_if(mod$size == 1, "Tree has no splits")

  imp <- C5imp(mod)
  expect_equal(sort(rownames(imp)), sort(c("x1", "x2", "x3")))
})

# --- Error Conditions ---

test_that("C5imp errors on invalid metric", {
  dat <- make_separable_data(200, seed = 9001)
  mod <- C5.0(dat[, -1], dat$y)

  expect_snapshot(
    error = TRUE,
    C5imp(mod, metric = "invalid")
  )
})