# Tests verifying that control parameters actually affect model output
# Each test compares default vs modified parameter to ensure different results

# Helper to create data that produces non-trivial trees
make_predictive_data <- function(n = 300, seed = 100) {
  set.seed(seed)
  x1 <- rnorm(n)
  x2 <- rnorm(n)
  x3 <- rnorm(n)
  x4 <- factor(sample(letters[1:4], n, replace = TRUE))
  # Make y predictable from x1 and x2
  prob <- plogis(1.5 * x1 - x2 + 0.5 * x3)
  y <- factor(ifelse(runif(n) < prob, "A", "B"))
  data.frame(y = y, x1 = x1, x2 = x2, x3 = x3, x4 = x4)
}

# --- Case Weights ---

test_that("case weights affect model output", {
  dat <- make_predictive_data(300, seed = 101)

  # Default (no weights)
  set.seed(1001)
  mod_default <- C5.0(dat[, -1], dat$y)

  # With weights emphasizing class A
  wts <- ifelse(dat$y == "A", 3, 1)
  set.seed(1001)
  mod_weighted <- C5.0(dat[, -1], dat$y, weights = wts)

  # Trees should differ
  expect_false(identical(mod_default$tree, mod_weighted$tree))

  # Predictions should differ for at least some cases
  pred_default <- predict(mod_default, dat[, -1])
  pred_weighted <- predict(mod_weighted, dat[, -1])
  expect_false(identical(pred_default, pred_weighted))
})

# --- Winnowing ---

test_that("winnow parameter affects model output", {
  # Create data with some irrelevant predictors
  set.seed(102)
  n <- 300
  dat <- make_predictive_data(n, seed = 102)
  # Add noise predictors
  dat$noise1 <- rnorm(n)
  dat$noise2 <- rnorm(n)
  dat$noise3 <- factor(sample(letters[1:5], n, replace = TRUE))

  set.seed(1002)
  mod_default <- C5.0(dat[, -1], dat$y, control = C5.0Control(winnow = FALSE))
  set.seed(1002)
  mod_winnow <- C5.0(dat[, -1], dat$y, control = C5.0Control(winnow = TRUE))

  # Trees or attribute usage should differ
  # Winnowing may eliminate some predictors
  expect_false(identical(mod_default$tree, mod_winnow$tree))
})

# --- CF (Confidence Factor) ---

test_that("CF parameter affects model output", {
  dat <- make_predictive_data(300, seed = 103)

  # Default CF = 0.25
  set.seed(1003)
  mod_default <- C5.0(dat[, -1], dat$y, control = C5.0Control(CF = 0.25))

  # Very low CF = more pruning = smaller tree
  set.seed(1003)
  mod_low_cf <- C5.0(dat[, -1], dat$y, control = C5.0Control(CF = 0.01))

  # High CF = less pruning = larger tree
  set.seed(1003)
  mod_high_cf <- C5.0(dat[, -1], dat$y, control = C5.0Control(CF = 0.75))

  # Trees should differ
  expect_false(identical(mod_default$tree, mod_low_cf$tree))
  expect_false(identical(mod_default$tree, mod_high_cf$tree))

  # Low CF should generally produce smaller or equal tree
  expect_true(mod_low_cf$size <= mod_high_cf$size)
})

# --- minCases ---

test_that("minCases parameter affects model output", {
  dat <- make_predictive_data(400, seed = 104)

  # Default minCases = 2
  set.seed(1004)
  mod_default <- C5.0(dat[, -1], dat$y, control = C5.0Control(minCases = 2))

  # High minCases = larger minimum leaf size = smaller tree
  set.seed(1004)
  mod_high_min <- C5.0(dat[, -1], dat$y, control = C5.0Control(minCases = 30))

  # Trees should differ
  expect_false(identical(mod_default$tree, mod_high_min$tree))

  # Higher minCases should produce smaller or equal tree
  expect_true(mod_high_min$size <= mod_default$size)
})

# --- Sample ---
test_that("sample parameter affects model output", {
  dat <- make_predictive_data(400, seed = 105)

  # No sampling
  set.seed(1005)
  mod_default <- C5.0(
    dat[, -1],
    dat$y,
    control = C5.0Control(sample = 0, seed = 42)
  )

  # 50% sampling
  set.seed(1005)
  mod_sampled <- C5.0(
    dat[, -1],
    dat$y,
    control = C5.0Control(sample = 0.5, seed = 42)
  )

  # Trees should differ (trained on different subsets)
  expect_false(identical(mod_default$tree, mod_sampled$tree))

  # Predictions may differ
  pred_default <- predict(mod_default, dat[, -1])
  pred_sampled <- predict(mod_sampled, dat[, -1])
  # At least the output contains sampling info
  expect_true(
    grepl("test data", mod_sampled$output) ||
      !identical(pred_default, pred_sampled)
  )
})

# --- Bands ---

test_that("bands parameter affects rules model output", {
  dat <- make_predictive_data(300, seed = 106)

  # Rules without banding
  set.seed(1006)
  mod_default <- C5.0(
    dat[, -1],
    dat$y,
    rules = TRUE,
    control = C5.0Control(bands = 0)
  )

  # Rules with banding
  set.seed(1006)
  mod_banded <- C5.0(
    dat[, -1],
    dat$y,
    rules = TRUE,
    control = C5.0Control(bands = 5)
  )

  # Rules output should differ (banding adds utility info)
  expect_false(identical(mod_default$rules, mod_banded$rules))

  # Output should mention bands for banded model
  expect_true(mod_banded$control$bands == 5)
})

# --- noGlobalPruning ---

test_that("noGlobalPruning parameter affects model output", {
  # Use larger dataset with more noise to ensure pruning has effect
  set.seed(107)
  n <- 500
  dat <- data.frame(
    y = factor(sample(c("A", "B"), n, replace = TRUE, prob = c(0.6, 0.4))),
    x1 = rnorm(n),
    x2 = rnorm(n),
    x3 = rnorm(n),
    x4 = rnorm(n),
    x5 = factor(sample(letters[1:5], n, replace = TRUE))
  )
  # Make y somewhat predictable
  prob <- plogis(0.8 * dat$x1 - 0.5 * dat$x2)
  dat$y <- factor(ifelse(runif(n) < prob, "A", "B"))

  # Default (global pruning enabled)
  set.seed(1007)
  mod_default <- C5.0(
    dat[, -1],
    dat$y,
    control = C5.0Control(noGlobalPruning = FALSE, CF = 0.5)
  )

  # No global pruning = potentially larger tree
  set.seed(1007)
  mod_no_prune <- C5.0(
    dat[, -1],
    dat$y,
    control = C5.0Control(noGlobalPruning = TRUE, CF = 0.5)
  )

  # Without pruning, tree should be same size or larger
  expect_true(mod_no_prune$size >= mod_default$size)

  # If sizes differ, trees must differ
  if (mod_no_prune$size != mod_default$size) {
    expect_false(identical(mod_default$tree, mod_no_prune$tree))
  }
})

# --- fuzzyThreshold ---

test_that("fuzzyThreshold parameter affects model output", {
  dat <- make_predictive_data(300, seed = 108)

  # Default (no fuzzy thresholds)
  set.seed(1008)
  mod_default <- C5.0(
    dat[, -1],
    dat$y,
    control = C5.0Control(fuzzyThreshold = FALSE)
  )

  # With fuzzy thresholds
  set.seed(1008)
  mod_fuzzy <- C5.0(
    dat[, -1],
    dat$y,
    control = C5.0Control(fuzzyThreshold = TRUE)
  )

  # Trees may have same structure but probability predictions should differ
  pred_default <- predict(mod_default, dat[, -1], type = "prob")
  pred_fuzzy <- predict(mod_fuzzy, dat[, -1], type = "prob")

  # Fuzzy thresholds affect probability assignments
  expect_false(identical(pred_default, pred_fuzzy))
})

# --- earlyStopping ---

test_that("earlyStopping parameter affects boosted model output", {
  dat <- make_predictive_data(300, seed = 109)

  # With early stopping (default)
  set.seed(1009)
  mod_early <- C5.0(
    dat[, -1],
    dat$y,
    trials = 50,
    control = C5.0Control(earlyStopping = TRUE)
  )

  # Without early stopping
  set.seed(1009)
  mod_no_early <- C5.0(
    dat[, -1],
    dat$y,
    trials = 50,
    control = C5.0Control(earlyStopping = FALSE)
  )

  # Without early stopping, should run all requested trials
  expect_equal(mod_no_early$trials["Actual"], c(Actual = 50L))

  # With early stopping, may use fewer trials
  # (or same if boosting is effective)
  expect_true(mod_early$trials["Actual"] <= 50)

  # If early stopping kicked in, trees should differ
  if (mod_early$trials["Actual"] < 50) {
    expect_false(identical(mod_early$tree, mod_no_early$tree))
  }
})

# --- Combined test: Multiple parameters ---

test_that("multiple control parameters combine to affect output", {
  dat <- make_predictive_data(400, seed = 110)

  # Default parameters
  set.seed(1010)
  mod_default <- C5.0(dat[, -1], dat$y)

  # Multiple parameter changes
  set.seed(1010)
  mod_custom <- C5.0(
    dat[, -1],
    dat$y,
    control = C5.0Control(
      winnow = TRUE,
      CF = 0.10,
      minCases = 10,
      fuzzyThreshold = TRUE,
      noGlobalPruning = TRUE
    )
  )

  # Should produce different trees
  expect_false(identical(mod_default$tree, mod_custom$tree))

  # Should produce different predictions
  pred_default <- predict(mod_default, dat[, -1], type = "prob")
  pred_custom <- predict(mod_custom, dat[, -1], type = "prob")
  expect_false(identical(pred_default, pred_custom))
})

# --- Reproducibility test ---

test_that("same seed produces identical models", {
  dat <- make_predictive_data(300, seed = 111)

  # Fit model twice with same seed
  set.seed(1011)
  mod_1 <- C5.0(dat[, -1], dat$y)

  set.seed(1011)
  mod_2 <- C5.0(dat[, -1], dat$y)

  # Trees should be identical

  expect_identical(mod_1$tree, mod_2$tree)
  expect_identical(mod_1$rules, mod_2$rules)
  expect_identical(mod_1$size, mod_2$size)

  # Predictions should be identical
  pred_1 <- predict(mod_1, dat[, -1])
  pred_2 <- predict(mod_2, dat[, -1])
  expect_identical(pred_1, pred_2)

  pred_prob_1 <- predict(mod_1, dat[, -1], type = "prob")
  pred_prob_2 <- predict(mod_2, dat[, -1], type = "prob")
  expect_identical(pred_prob_1, pred_prob_2)
})