test_that("binarymlr_parameters() and registry expose 18 default variants", {
  p <- binarymlr_parameters(verbose = FALSE)
  expect_type(p, "list")
  expect_identical(p$seed, 123L)
  expect_true(p$min_auc_accept >= 0.5 && p$min_auc_accept < 1)
  expect_length(p$algorithms, 9L)

  reg <- get_binary_registry()
  expect_setequal(list_binary_algorithms(),
                  c("lasso_logit", "ridge_logit", "enet_logit",
                    "glm_logit", "stepwise_logit", "gbm_bin", "rf_bin",
                    "pca_logit", "gaussian_nb"))
  for (k in list_binary_algorithms()) {
    expect_true(!is.null(reg[[k]]$fit))
    expect_true(!is.null(reg[[k]]$predict))
    expect_s3_class(reg[[k]]$grid(p), "data.frame")
  }

  variants <- list_binary_model_variants(p)
  counts <- count_binary_combinations(p, min_size = 1, max_size = 2)
  expect_equal(nrow(variants), 18L)
  expect_equal(counts$n_candidates, 18L)
  expect_true(counts$n_combinations > 150L)
  expect_false(counts$allow_same_algorithm)
})

test_that("binary metrics handle simple perfect and imperfect predictions", {
  y <- c(0, 0, 1, 1)
  expect_equal(binary_auc(y, c(0.1, 0.2, 0.8, 0.9)), 1)
  expect_equal(binary_auc(y, c(0.9, 0.8, 0.2, 0.1)), 0)
  expect_true(binary_pr_auc(y, c(0.1, 0.2, 0.8, 0.9)) > 0.9)
  met <- binary_threshold_metrics(y, c(0.1, 0.2, 0.8, 0.9), threshold = 0.5)
  expect_equal(met$accuracy, 1)
  expect_equal(met$sensitivity, 1)
  expect_equal(met$specificity, 1)
})

test_that("prepare_binary_cohort_input() computes shared numeric features", {
  data(iris)
  d <- iris[iris$Species != "setosa", ]
  d$cohort <- rep(c("A", "B"), length.out = nrow(d))
  d$sample_id <- paste0("iris_", seq_len(nrow(d)))

  prep <- prepare_binary_cohort_input(
    d, cohort = "cohort", outcome = "Species", id = "sample_id",
    positive_class = "versicolor"
  )
  expect_s3_class(prep, "automlr_binary_input")
  expect_length(prep$cohorts, 2L)
  expect_setequal(prep$shared_features,
                  c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"))
  expect_true(all(c("n_positive", "n_negative", "positive_rate") %in%
                    names(prep$summary)))
})

test_that("prepare_binary_cohort_input() refuses accidental multiclass collapse", {
  d <- data.frame(
    cohort = rep(c("A", "B"), each = 3),
    outcome = rep(c("case", "control", "other"), 2),
    x = seq_len(6)
  )
  expect_error(
    prepare_binary_cohort_input(d, cohort = "cohort", outcome = "outcome",
                                positive_class = "case"),
    "exactly two"
  )
  prep <- prepare_binary_cohort_input(
    d, cohort = "cohort", outcome = "outcome",
    positive_class = "case", collapse_other = TRUE
  )
  expect_equal(sum(unlist(lapply(prep$cohorts, `[[`, "outcome")) == 1L), 2L)
})

test_that("fit_binary_ensemble() and export_binary_results() run on iris", {
  skip_if_not_installed("glmnet")
  data(iris)
  d <- iris[iris$Species != "setosa", ]
  d <- rbind(
    utils::head(d[d$Species == "versicolor", , drop = FALSE], 20),
    utils::head(d[d$Species == "virginica", , drop = FALSE], 20)
  )
  d$cohort <- rep(c("A", "B"), length.out = nrow(d))
  d$sample_id <- paste0("iris_", seq_len(nrow(d)))
  prep <- prepare_binary_cohort_input(
    d, cohort = "cohort", outcome = "Species", id = "sample_id",
    positive_class = "versicolor"
  )
  params <- binarymlr_parameters(
    algorithms = c("lasso_logit", "ridge_logit"),
    verbose = FALSE
  )
  fit <- suppressWarnings(fit_binary_ensemble(
    prep, params = params, min_models = 1, max_models = 2,
    verbose = FALSE
  ))

  expect_s3_class(fit, "automlr_binary_ensemble")
  expect_equal(sum(fit$weights), 1)
  prob <- predict(fit, fit$training$X[1:5, , drop = FALSE], type = "prob")
  cls <- predict(fit, fit$training$X[1:5, , drop = FALSE], type = "class")
  expect_length(prob, 5L)
  expect_true(all(prob >= 0 & prob <= 1))
  expect_true(all(cls %in% c(0L, 1L)))
  expect_true(is.finite(fit$combination_evaluation$summary$auc[1L]))

  export_dir <- file.path(tempdir(), "automlr-binary-export-test")
  exported <- export_binary_results(
    fit, output_dir = export_dir, formats = "png", top_n = 5,
    publication = TRUE, summary_language = "bilingual"
  )
  expect_true(file.exists(exported$report))
  expect_true(file.exists(file.path(export_dir, "tables", "single_models.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "combinations.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "predicted_probabilities.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "threshold_metrics.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "roc_curve.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "pr_curve.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "calibration_curve.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "dca_curve.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "confusion_matrix.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "feature_importance.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "preprocessing_report.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "resampling_manifest.csv")))
  expect_true(file.exists(file.path(export_dir, "tables", "model_performance_forest.csv")))
  expect_true(file.exists(file.path(export_dir, "summaries", "summary_report.md")))
  expect_true(file.exists(file.path(export_dir, "summaries", "explainability_summary.md")))
  expect_true(file.exists(file.path(export_dir, "figures", "publication",
                                   "fig1_all_model_cohort_auc_heatmap.png")))
  expect_true(file.exists(file.path(export_dir, "figures", "publication",
                                   "fig8_feature_importance_barplot.png")))
})

test_that("binary threshold strategy exports and k-fold resampling works", {
  skip_if_not_installed("glmnet")
  data(iris)
  d <- iris[iris$Species != "setosa", ]
  d <- rbind(
    utils::head(d[d$Species == "versicolor", , drop = FALSE], 12),
    utils::head(d[d$Species == "virginica", , drop = FALSE], 12)
  )
  d$cohort <- rep(c("A", "B"), length.out = nrow(d))
  prep <- prepare_binary_cohort_input(
    d, cohort = "cohort", outcome = "Species",
    positive_class = "versicolor", negative_class = "virginica"
  )
  params <- binarymlr_parameters(
    algorithms = c("lasso_logit", "ridge_logit"),
    resampling = "kfold", k_folds = 3L,
    verbose = FALSE
  )
  fit <- suppressWarnings(fit_binary_ensemble(
    prep, params = params, strategy = "threshold", min_auc = 0.5,
    verbose = FALSE
  ))
  expect_s3_class(fit, "automlr_binary_ensemble")
  expect_equal(unique(fit$evaluation$summary$resampling), "kfold")
  expect_true(nrow(fit$evaluation$resampling_manifest) > 0L)
  export_dir <- file.path(tempdir(), "automlr-binary-threshold-test")
  exported <- export_binary_results(
    fit, output_dir = export_dir, formats = "png", top_n = 5,
    publication = FALSE, summary_language = "en"
  )
  expect_true(file.exists(exported$report))
  pred <- read.csv(file.path(export_dir, "tables", "predicted_probabilities.csv"))
  expect_true(all(c("apparent_probability", "oof_probability",
                    "apparent_class", "oof_class") %in% names(pred)))
})