library(funcml)

audit_regression_data <- function() {
  make_split <- function(seed, n) {
    set.seed(seed)
    x1 <- rnorm(n)
    x2 <- rnorm(n)
    x3 <- rnorm(n)
    y <- 2 * x1 - 1.5 * x2 + 0.7 * x3 + rnorm(n, sd = 0.35)
    data.frame(outcome = y, x1 = x1, x2 = x2, x3 = x3)
  }
  list(
    scenario = "regression",
    formula = outcome ~ x1 + x2 + x3,
    train = make_split(20260401L, 72L),
    test = make_split(20260402L, 24L),
    task = "regression"
  )
}

audit_binary_data <- function() {
  make_split <- function(seed, n) {
    set.seed(seed)
    x1 <- rnorm(n)
    x2 <- rnorm(n)
    x3 <- rnorm(n)
    eta <- 1.1 * x1 - 0.8 * x2 + 0.6 * x3
    pr <- stats::plogis(eta)
    y <- ifelse(runif(n) < pr, "zeta", "alpha")
    data.frame(
      outcome = factor(y, levels = c("zeta", "alpha")),
      x1 = x1,
      x2 = x2,
      x3 = x3
    )
  }
  list(
    scenario = "binary_classification",
    formula = outcome ~ x1 + x2 + x3,
    train = make_split(20260403L, 84L),
    test = make_split(20260404L, 28L),
    task = "classification"
  )
}

audit_multiclass_data <- function() {
  make_split <- function(seed, n) {
    set.seed(seed)
    x1 <- rnorm(n)
    x2 <- rnorm(n)
    x3 <- rnorm(n)
    s1 <- 1.0 * x1 - 0.4 * x2 + 0.2 * x3 + rnorm(n, sd = 0.15)
    s2 <- -0.3 * x1 + 1.1 * x2 - 0.2 * x3 + rnorm(n, sd = 0.15)
    s3 <- 0.2 * x1 - 0.1 * x2 + 1.0 * x3 + rnorm(n, sd = 0.15)
    cls <- c("zeta", "alpha", "mu")[max.col(cbind(s1, s2, s3), ties.method = "first")]
    data.frame(
      outcome = factor(cls, levels = c("zeta", "alpha", "mu")),
      x1 = x1,
      x2 = x2,
      x3 = x3
    )
  }
  list(
    scenario = "multiclass_classification",
    formula = outcome ~ x1 + x2 + x3,
    train = make_split(20260405L, 96L),
    test = make_split(20260406L, 30L),
    task = "classification"
  )
}

audit_spec_for_test <- function(model, scenario) {
  switch(
    model,
    rpart = list(cp = 0.001, minsplit = 5L),
    glmnet = list(alpha = 0.5),
    ranger = list(num.trees = 80L, min.node.size = 3L),
    nnet = list(size = 4L, decay = 0.01, maxit = 150L),
    randomForest = list(ntree = 80L),
    gbm = list(n.trees = 60L, interaction.depth = 2L, shrinkage = 0.05, n.minobsinnode = 5L),
    C50 = list(trials = 5L),
    kknn = list(k = 5L, distance = 2),
    earth = list(degree = 1L),
    naivebayes = list(laplace = 1),
    adaboost = list(iter = 20L, nu = 0.1, type = "discrete"),
    pls = list(ncomp = 2L),
    ctree = list(mincriterion = 0.8),
    cforest = list(ntree = 80L, mincriterion = 0),
    lightgbm = list(nrounds = 40L, num_leaves = 15L, learning_rate = 0.1),
    bart = list(ntree = 40L, ndpost = 40L, nskip = 10L, keeptrees = TRUE, verbose = FALSE),
    xgboost = list(nrounds = 40L, max_depth = 4L, eta = 0.1, subsample = 1, colsample_bytree = 1),
    stacking = list(
      learners = if (identical(scenario, "multiclass_classification")) c("rpart", "kknn") else c("glm", "rpart"),
      learner_specs = list(),
      meta_model = "glmnet"
    ),
    superlearner = list(
      learners = if (identical(scenario, "multiclass_classification")) c("rpart", "kknn") else c("glm", "rpart"),
      learner_specs = list(),
      meta_model = "glmnet",
      resampling = cv(v = 3, seed = 1)
    ),
    list()
  )
}

scenario_supported_by_design <- function(adapter, scenario) {
  if (identical(scenario, "regression")) {
    return("regression" %in% adapter$tasks)
  }
  if (!("classification" %in% adapter$tasks)) {
    return(FALSE)
  }
  if (identical(scenario, "multiclass_classification")) {
    return(isTRUE(adapter$supports$multiclass))
  }
  TRUE
}

audit_datasets <- list(
  regression = audit_regression_data(),
  binary_classification = audit_binary_data(),
  multiclass_classification = audit_multiclass_data()
)

test_that("learner registry support metadata is coherent", {
  reg <- funcml:::funcml_registry()

  expect_equal(length(reg), 25L)
  expect_true(all(vapply(reg, function(x) is.character(x$package) && nzchar(x$package), logical(1))))
  expect_true(all(vapply(reg, function(x) is.list(x$supports), logical(1))))
  expect_true(all(vapply(reg, function(x) is.function(x$fit_xy), logical(1))))
  expect_true(all(vapply(reg, function(x) is.function(x$predict_xy), logical(1))))
  expect_true(all(vapply(reg, function(x) all(x$tasks %in% c("classification", "regression")), logical(1))))
})

test_that("advertised learner raw and probability contracts hold package-wide", {
  reg <- funcml:::funcml_registry()
  scenario_map <- do.call(
    rbind,
    lapply(names(reg), function(model) {
      adapter <- reg[[model]]
      out <- character()
      if ("regression" %in% adapter$tasks) out <- c(out, "regression")
      if ("classification" %in% adapter$tasks) out <- c(out, "binary_classification", "multiclass_classification")
      data.frame(learner = model, task = out, stringsAsFactors = FALSE)
    })
  )

  failures <- character()

  for (i in seq_len(nrow(scenario_map))) {
    model <- scenario_map$learner[i]
    scenario <- scenario_map$task[i]
    adapter <- reg[[model]]
    ds <- audit_datasets[[scenario]]
    supported_mode <- scenario_supported_by_design(adapter, scenario)
    spec <- audit_spec_for_test(model, scenario)
    fit_res <- safe_fit(ds$formula, ds$train, model, spec = spec, seed = 1L)

    if (!supported_mode) {
      if (identical(fit_res$status, "pass")) {
        failures <- c(failures, sprintf("%s/%s: unsupported mode fit succeeded", model, scenario))
      } else if (!is_clear_unsupported_error(fit_res$error)) {
        failures <- c(failures, sprintf("%s/%s: unsupported mode error was unclear: %s", model, scenario, fit_res$error))
      }
      next
    }

    if (!identical(fit_res$status, "pass")) {
      failures <- c(failures, sprintf("%s/%s: fit failed: %s", model, scenario, fit_res$error))
      next
    }

    raw_res <- safe_predict_raw(fit_res$value, ds$test)
    if (!identical(raw_res$status, "pass")) {
      failures <- c(failures, sprintf("%s/%s: raw predict failed: %s", model, scenario, raw_res$error))
      next
    }

    raw_check <- check_raw_prediction(
      raw_res$value,
      task = ds$task,
      n_expected = nrow(ds$test),
      levels_expected = if (is.factor(ds$train$outcome)) levels(ds$train$outcome) else NULL
    )
    if (!identical(raw_check$status, "pass")) {
      failures <- c(failures, sprintf("%s/%s: raw contract failed: %s", model, scenario, paste(raw_check$notes, collapse = "; ")))
    }

    if (identical(ds$task, "classification")) {
      prob_res <- safe_predict_prob(fit_res$value, ds$test)
      if (isTRUE(adapter$supports$prob)) {
        if (!identical(prob_res$status, "pass")) {
          failures <- c(failures, sprintf("%s/%s: prob predict failed: %s", model, scenario, prob_res$error))
          next
        }
        prob_check <- check_prob_prediction(prob_res$value, n_expected = nrow(ds$test), levels_expected = levels(ds$train$outcome))
        if (!identical(prob_check$status, "pass")) {
          failures <- c(failures, sprintf("%s/%s: prob contract failed: %s", model, scenario, paste(prob_check$notes, collapse = "; ")))
        } else {
          prob_mat <- prob_check$value
          if (!identical(check_prob_row_sums(prob_mat)$status, "pass")) {
            failures <- c(failures, sprintf("%s/%s: probability row sums invalid", model, scenario))
          }
          if (!identical(check_prob_column_names(prob_mat, levels(ds$train$outcome))$status, "pass")) {
            failures <- c(failures, sprintf("%s/%s: probability columns misaligned", model, scenario))
          }
          if (!identical(check_prob_class_reconstruction(prob_mat, raw_res$value, levels(ds$train$outcome))$status, "pass")) {
            failures <- c(failures, sprintf("%s/%s: argmax(prob) mismatched raw classes", model, scenario))
          }
        }
      } else {
        if (identical(prob_res$status, "pass")) {
          failures <- c(failures, sprintf("%s/%s: unsupported prob path succeeded", model, scenario))
        } else if (!is_clear_unsupported_error(prob_res$error)) {
          failures <- c(failures, sprintf("%s/%s: unsupported prob error was unclear: %s", model, scenario, prob_res$error))
        }
      }
    }
  }

  expect_equal(length(failures), 0L, info = paste(failures, collapse = "\n"))
})

test_that("interpretability contract holds for supported learner paths", {
  reg <- funcml:::funcml_registry()
  failures <- character()

  for (model in names(reg)) {
    adapter <- reg[[model]]

    for (scenario in c("regression", "binary_classification", "multiclass_classification")) {
      ds <- audit_datasets[[scenario]]
      if (identical(ds$task, "regression") && !("regression" %in% adapter$tasks)) next
      if (identical(ds$task, "classification") && !("classification" %in% adapter$tasks)) next

      supported_mode <- scenario_supported_by_design(adapter, scenario)
      spec <- audit_spec_for_test(model, scenario)
      fit_res <- safe_fit(ds$formula, ds$train, model, spec = spec, seed = 1L)

      if (!supported_mode) {
        next
      }
      if (!identical(fit_res$status, "pass")) {
        failures <- c(failures, sprintf("%s/%s: fit failed before interpret checks: %s", model, scenario, fit_res$error))
        next
      }

      raw_interpret <- safe_interpret_raw(fit_res$value, ds$train)
      if (!identical(raw_interpret$status, "pass")) {
        failures <- c(failures, sprintf("%s/%s: interpret raw failed: %s", model, scenario, raw_interpret$error))
      }

      if (identical(ds$task, "classification")) {
        prob_interpret <- safe_interpret_prob(fit_res$value, ds$train)
        if (isTRUE(adapter$supports$prob)) {
          if (!identical(prob_interpret$status, "pass")) {
            failures <- c(failures, sprintf("%s/%s: interpret prob failed: %s", model, scenario, prob_interpret$error))
          }
        } else if (identical(prob_interpret$status, "pass")) {
          failures <- c(failures, sprintf("%s/%s: interpret prob succeeded for unsupported learner", model, scenario))
        }
      }
    }
  }

  expect_equal(length(failures), 0L, info = paste(failures, collapse = "\n"))
})