## AutoEnsML — comprehensive unit tests
## Author: Sadikul Islam <sadikul.islamiasri@gmail.com>
## Note: feedback_scores is an exported FUNCTION (not a dataset).
##       Call feedback_scores() to get the data frame.

## ── Helper: small fast subset ─────────────────────────────────────────────────
.fs <- function() feedback_scores()[1:60, ]

## ── 1. auto_ensml: regression ─────────────────────────────────────────────────
test_that("auto_ensml regression returns valid AutoEnsML object", {
  skip_on_cran()
  df  <- .fs()
  res <- auto_ensml(
    overall_score ~ instructor_rating + content_clarity + pace_score,
    data       = df,
    task       = "regression",
    n_folds    = 3L,
    max_models = 3L,
    methods    = c("simple_avg", "weighted_avg", "greedy"),
    parallel   = FALSE,
    verbose    = FALSE
  )
  expect_s3_class(res, "AutoEnsML")
  expect_equal(res$task, "regression")
  expect_gte(res$n_models, 2L)
  expect_true(is.numeric(res$weights))
  expect_equal(sum(res$weights), 1, tolerance = 1e-5)
  expect_true(all(res$weights >= -1e-10))
  expect_s3_class(res$leaderboard, "data.frame")
  expect_true(all(c("method", "RMSE", "composite") %in% names(res$leaderboard)))
  expect_true(is.character(res$best_method))
  expect_true(res$best_method %in% res$leaderboard$method)
})

## ── 2. auto_ensml: classification ─────────────────────────────────────────────
test_that("auto_ensml classification returns valid AutoEnsML object", {
  skip_on_cran()
  df  <- .fs()
  res <- auto_ensml(
    sentiment_binary ~ instructor_rating + content_clarity + pace_score,
    data     = df,
    task     = "classification",
    n_folds  = 3L,
    methods  = c("simple_avg", "weighted_avg"),
    parallel = FALSE,
    verbose  = FALSE
  )
  expect_s3_class(res, "AutoEnsML")
  expect_equal(res$task, "classification")
  expect_true(all(c("AUC", "F1", "MCC") %in% names(res$leaderboard)))
})

## ── 3. predict: regression ────────────────────────────────────────────────────
test_that("predict.AutoEnsML returns correct-length numeric vector (regression)", {
  skip_on_cran()
  df  <- .fs()
  res <- auto_ensml(
    overall_score ~ instructor_rating + content_clarity,
    data = df, task = "regression", n_folds = 3L,
    methods = c("simple_avg", "weighted_avg"), parallel = FALSE, verbose = FALSE
  )
  p <- predict(res, newdata = df[1:5, ])
  expect_length(p, 5L)
  expect_true(is.numeric(p))
  expect_false(anyNA(p))
})

## ── 4. predict: classification types ─────────────────────────────────────────
test_that("predict.AutoEnsML type = class returns 0/1 integers", {
  skip_on_cran()
  df  <- .fs()
  res <- auto_ensml(
    sentiment_binary ~ instructor_rating + content_clarity,
    data = df, task = "classification", n_folds = 3L,
    methods = "simple_avg", parallel = FALSE, verbose = FALSE
  )
  cl <- predict(res, newdata = df[1:5, ], type = "class")
  pr <- predict(res, newdata = df[1:5, ], type = "prob")
  expect_true(all(cl %in% c(0L, 1L)))
  expect_true(all(pr >= 0 & pr <= 1))
})

## ── 5. S3: print and summary ──────────────────────────────────────────────────
test_that("print and summary produce expected output", {
  skip_on_cran()
  df  <- .fs()
  res <- auto_ensml(
    overall_score ~ instructor_rating + content_clarity,
    data = df, task = "regression", n_folds = 3L,
    methods = "simple_avg", parallel = FALSE, verbose = FALSE
  )
  expect_output(print(res),   "AutoEnsML")
  expect_output(summary(res), "AutoEnsML Summary")
  expect_output(summary(res), "Sadikul Islam")
  expect_output(summary(res), "Optimal weights")
})

## ── 6. coef ───────────────────────────────────────────────────────────────────
test_that("coef.AutoEnsML returns named weights summing to 1", {
  skip_on_cran()
  df  <- .fs()
  res <- auto_ensml(
    overall_score ~ instructor_rating + content_clarity,
    data = df, task = "regression", n_folds = 3L,
    methods = "simple_avg", parallel = FALSE, verbose = FALSE
  )
  w <- coef(res)
  expect_true(is.numeric(w))
  expect_false(is.null(names(w)))
  expect_equal(sum(w), 1, tolerance = 1e-5)
  expect_true(all(w >= -1e-10))
})

## ── 7. optimize_weights ───────────────────────────────────────────────────────
test_that("optimize_weights returns non-negative weights summing to 1", {
  set.seed(7L)
  oof <- matrix(rnorm(300L), 100L, 3L,
                dimnames = list(NULL, c("glmnet", "ranger", "xgboost")))
  y   <- rnorm(100L)
  w   <- optimize_weights(oof, y, metric = "rmse", task = "regression")
  expect_length(w, 3L)
  expect_named(w, c("glmnet", "ranger", "xgboost"))
  expect_equal(sum(w), 1, tolerance = 1e-5)
  expect_true(all(w >= -1e-10))
})

## ── 8. compute_diversity ──────────────────────────────────────────────────────
test_that("compute_diversity returns integer in [3, ncol(oof)]", {
  set.seed(8L)
  oof <- matrix(rnorm(500L), 100L, 5L)
  y   <- rnorm(100L)
  n   <- compute_diversity(oof, y, "rmse", "regression", verbose = FALSE)
  expect_true(is.numeric(n))
  expect_gte(n, 3L)
  expect_lte(n, 5L)
})

## ── 9. compare_ensembles ──────────────────────────────────────────────────────
test_that("compare_ensembles returns ranked data frame with all stat columns", {
  set.seed(9L)
  n  <- 80L; y <- rnorm(n)
  er <- list(a = rnorm(n), b = rnorm(n))
  df <- compare_ensembles(er, y, task = "regression")
  expect_s3_class(df, "data.frame")
  expect_equal(nrow(df), 2L)
  expect_true(all(c("method", "RMSE", "MAE", "R2", "AIC", "BIC", "composite")
                  %in% names(df)))
  expect_lte(df$composite[1L], df$composite[2L])
})

## ── 10. Input validation ──────────────────────────────────────────────────────
test_that("auto_ensml rejects invalid inputs with informative errors", {
  df <- .fs()
  expect_error(auto_ensml("not formula", df),           "formula")
  expect_error(auto_ensml(overall_score ~ ., "not df"), "data.frame")
  expect_error(auto_ensml(overall_score ~ ., df[1:5, ]), "20")
  expect_error(auto_ensml(overall_score ~ ., df, n_folds = 1L), "n_folds")
})

## ── 11. AUC helper ────────────────────────────────────────────────────────────
test_that(".ae_auc returns value in [0, 1]", {
  set.seed(10L)
  y    <- sample(0:1, 60L, replace = TRUE)
  pred <- runif(60L)
  auc  <- AutoEnsML:::.ae_auc(y, pred)
  expect_gte(auc, 0)
  expect_lte(auc, 1)
})

## ── 12. RNG state not permanently altered ─────────────────────────────────────
test_that("auto_ensml restores RNG state after execution", {
  skip_on_cran()
  df <- .fs()
  set.seed(99L); before <- runif(1L)
  set.seed(99L)
  auto_ensml(
    overall_score ~ instructor_rating,
    data = df, task = "regression", n_folds = 3L,
    methods = "simple_avg", parallel = FALSE, verbose = FALSE
  )
  after <- runif(1L)
  expect_equal(before, after, tolerance = 1e-10)
})

## ── 13. feedback_scores function ──────────────────────────────────────────────
test_that("feedback_scores() returns correct structure", {
  fs <- feedback_scores()                       # call as function
  expect_s3_class(fs, "data.frame")
  expect_equal(nrow(fs), 200L)
  expect_equal(ncol(fs), 9L)
  expect_true(all(c("overall_score", "sentiment_binary", "session_type")
                  %in% names(fs)))
  expect_true(all(fs$sentiment_binary %in% c(0L, 1L)))
  expect_true(all(fs$overall_score >= 1 & fs$overall_score <= 5))
  expect_true(all(fs$session_type %in% c("workshop", "lecture", "seminar")))
})