library(testthat)

# Grouped CV tests

test_that("grouped CV keeps groups intact", {
  skip_if_not_installed("rsample")

  set.seed(123)
  grouped_data <- tibble::tibble(
    id = 1:12,
    group = rep(letters[1:4], each = 3),
    strata = rep(c("A", "B"), each = 6),
    value = rnorm(12)
  )

  resamples <- fastml:::make_grouped_cv(
    grouped_data,
    group_cols = "group",
    v = 4,
    strata = NULL,
    repeats = 1
  )

  purrr::walk(resamples$splits, function(split) {
    analysis_groups <- unique(rsample::analysis(split)$group)
    assessment_groups <- unique(rsample::assessment(split)$group)
    expect_length(intersect(analysis_groups, assessment_groups), 0)
  })
})

test_that("grouped CV errors when groups fewer than folds", {
  skip_if_not_installed("rsample")

  small_groups <- tibble::tibble(
    group = rep(letters[1:3], each = 2),
    value = runif(6)
  )

  expect_error(
    fastml:::make_grouped_cv(
      small_groups,
      group_cols = "group",
      v = 4,
      strata = NULL,
      repeats = 1
    ),
    "cannot exceed the number of groups"
  )
})

test_that("grouped CV stratification distributes strata across folds", {
  skip_if_not_installed("rsample")

  set.seed(456)
  strat_data <- tibble::tibble(
    id = 1:12,
    group = rep(paste0("g", 1:6), each = 2),
    strata = rep(c("A", "B"), each = 6),
    value = rnorm(12)
  )

  resamples <- fastml:::make_grouped_cv(
    strat_data,
    group_cols = "group",
    v = 3,
    strata = "strata",
    repeats = 1
  )

  purrr::walk(resamples$splits, function(split) {
    assessment_strata <- unique(rsample::assessment(split)$strata)
    expect_setequal(assessment_strata, unique(strat_data$strata))
  })
})

# Blocked CV tests

test_that("blocked CV preserves order and prevents leakage", {
  skip_if_not_installed("rsample")

  blocked_data <- tibble::tibble(
    time = 1:12,
    value = rnorm(12)
  )

  blocked_info <- fastml:::make_blocked_cv(
    blocked_data,
    block_col = "time",
    block_size = 3
  )

  expect_equal(blocked_info$data$time, blocked_data$time)
  expect_true(all(diff(blocked_info$block_ids) >= 0))

  blocked_plan <- blocked_info$data
  blocked_plan$.fastml_block <- blocked_info$block_ids

  resamples <- rsample::group_vfold_cv(
    blocked_plan,
    group = .fastml_block,
    v = blocked_info$n_blocks
  )

  purrr::walk(resamples$splits, function(split) {
    analysis <- rsample::analysis(split)
    assessment <- rsample::assessment(split)
    expect_true(all(diff(analysis$time) >= 0))
    expect_true(all(diff(assessment$time) >= 0))
    expect_true(all(!assessment$time %in% analysis$time))
  })
})

# Rolling origin tests

test_that("rolling origin uses expanding analysis and fixed assessment windows", {
  skip_if_not_installed("rsample")

  rolling_data <- tibble::tibble(
    time = 1:10,
    value = rnorm(10)
  )

  resamples <- fastml:::make_rolling_origin_cv(
    rolling_data,
    block_col = "time",
    initial_window = 4,
    assess_window = 2,
    skip = 0
  )

  analysis_sizes <- purrr::map_int(resamples$splits, ~ nrow(rsample::analysis(.x)))
  assessment_sizes <- purrr::map_int(resamples$splits, ~ nrow(rsample::assessment(.x)))

  expect_equal(analysis_sizes[1], 4)
  expect_true(all(diff(analysis_sizes) >= 0))
  expect_true(all(assessment_sizes == 2))
})

# Nested CV tests

test_that("nested CV inner folds remain within outer training data", {
  skip_if_not_installed("rsample")

  set.seed(789)
  nested_data <- tibble::tibble(
    id = 1:18,
    x = runif(18),
    y = rnorm(18)
  )

  nested_resamples <- fastml:::make_nested_cv(
    nested_data,
    outer_folds = 3,
    inner_folds = 2,
    group_cols = NULL,
    strata = NULL
  )

  purrr::walk2(
    nested_resamples$splits,
    nested_resamples$inner_resamples,
    function(outer_split, inner_set) {
      outer_analysis_ids <- rsample::analysis(outer_split)$id
      outer_assessment_ids <- rsample::assessment(outer_split)$id

      purrr::walk(inner_set$splits, function(inner_split) {
        inner_analysis_ids <- rsample::analysis(inner_split)$id
        inner_assessment_ids <- rsample::assessment(inner_split)$id

        expect_true(all(inner_analysis_ids %in% outer_analysis_ids))
        expect_true(all(inner_assessment_ids %in% outer_analysis_ids))
        expect_length(intersect(inner_assessment_ids, outer_assessment_ids), 0)
      })
    }
  )
})

test_that("nested CV tuning uses inner folds and outer metrics match manual evaluation", {
  skip_if_not_installed("rsample")
  skip_if_not_installed("workflows")
  skip_if_not_installed("recipes")
  skip_if_not_installed("parsnip")
  skip_if_not_installed("yardstick")
  skip_if_not_installed("tune")
  skip_if_not_installed("dials")
  skip_if_not_installed("rpart")
  if (requireNamespace("censored", quietly = TRUE) &&
      !requireNamespace("pec", quietly = TRUE)) {
    skip("Package 'pec' is required for decision_tree when censored is installed.")
  }

  set.seed(101)
  tuning_data <- tibble::tibble(
    id = 1:24,
    x = runif(24, -1, 1),
    y = 3 * x + rnorm(24, sd = 0.1)
  )

  nested_resamples <- fastml:::make_nested_cv(
    tuning_data,
    outer_folds = 3,
    inner_folds = 3,
    group_cols = NULL,
    strata = NULL
  )

  recipe_spec <- recipes::recipe(y ~ x, data = tuning_data)
  model_spec <- parsnip::decision_tree(
    cost_complexity = tune::tune(),
    tree_depth = tune::tune()
  ) %>%
    parsnip::set_mode("regression") %>%
    parsnip::set_engine("rpart")

  workflow_spec <- workflows::workflow() %>%
    workflows::add_recipe(recipe_spec) %>%
    workflows::add_model(model_spec)

  metrics <- yardstick::metric_set(yardstick::rmse)

  nested_fit <- fastml:::fastml_run_nested_cv(
    workflow_spec = workflow_spec,
    nested_resamples = nested_resamples,
    train_data = tuning_data,
    label = "y",
    task = "regression",
    metric = "rmse",
    metrics = metrics,
    engine = "rpart",
    engine_args = list(),
    tune_params_template = dials::parameters(
      dials::cost_complexity(),
      dials::tree_depth()
    ),
    engine_tune_params = list(),
    tuning_strategy = "grid",
    tuning_iterations = 5,
    adaptive = FALSE,
    early_stopping = FALSE,
    ctrl_grid = tune::control_grid(save_pred = TRUE),
    ctrl_bayes = tune::control_bayes(save_pred = TRUE),
    ctrl_race = NULL,
    my_metrics = NULL,
    do_tuning = TRUE,
    event_class = NULL,
    class_threshold = 0.5,
    start_col = NULL,
    time_col = NULL,
    status_col = NULL,
    eval_times = NULL,
    at_risk_threshold = 0.1,
    seed = 202,
    update_params_fn = function(params, new) params,
    multiclass_auc = "macro",
    grid_levels = 3L
  )

  inner_results <- nested_fit$details$inner_results

  purrr::walk2(
    nested_resamples$splits,
    seq_along(inner_results),
    function(outer_split, idx) {
      outer_analysis_ids <- rsample::analysis(outer_split)$id
      outer_assessment_ids <- rsample::assessment(outer_split)$id

      tune_splits <- inner_results[[idx]]$splits
      purrr::walk(tune_splits, function(inner_split) {
        inner_analysis_ids <- rsample::analysis(inner_split)$id
        inner_assessment_ids <- rsample::assessment(inner_split)$id

        expect_true(all(inner_analysis_ids %in% outer_analysis_ids))
        expect_true(all(inner_assessment_ids %in% outer_analysis_ids))
        expect_length(intersect(inner_assessment_ids, outer_assessment_ids), 0)
      })
    }
  )

  outer_performance <- nested_fit$details$outer_performance
  expect_equal(sort(unique(outer_performance$outer_id)), sort(nested_resamples$id))

  purrr::walk2(
    nested_resamples$splits,
    seq_along(inner_results),
    function(outer_split, idx) {
      best_params <- tune::select_best(inner_results[[idx]], metric = "rmse")
      finalized <- tune::finalize_workflow(workflow_spec, best_params)
      outer_train <- rsample::analysis(outer_split)
      outer_assess <- rsample::assessment(outer_split)
      fitted <- workflows::fit(finalized, data = outer_train)
      preds <- predict(fitted, new_data = outer_assess)$.pred
      expected_rmse <- yardstick::rmse_vec(truth = outer_assess$y, estimate = preds)

      fold_id <- nested_resamples$id[idx]
      recorded <- outer_performance %>%
        dplyr::filter(outer_id == fold_id, .metric == "rmse")

      expect_equal(recorded$.estimate, expected_rmse)
    }
  )
})

# Stratification imbalance tests

test_that("stratified CV handles severe class imbalance (95/5)", {
  skip_if_not_installed("rsample")

  set.seed(123)
  # Create 95/5 imbalanced dataset with 100 observations
  n_majority <- 95
  n_minority <- 5
  imbalanced_data <- tibble::tibble(
    id = 1:(n_majority + n_minority),
    x = rnorm(n_majority + n_minority),
    outcome = factor(c(rep("majority", n_majority), rep("minority", n_minority)))
  )

  # Use stratified vfold_cv
  resamples <- rsample::vfold_cv(imbalanced_data, v = 5, strata = outcome)

  # Verify each fold has both classes represented
  purrr::walk(resamples$splits, function(split) {
    analysis_classes <- unique(rsample::analysis(split)$outcome)
    assessment_classes <- unique(rsample::assessment(split)$outcome)

    # Both classes should be present in analysis (training) set
    expect_length(analysis_classes, 2)
    # Assessment set should have at least minority class when possible
    # With 5 folds and 5 minority samples, each fold should have 1 minority
    expect_true("minority" %in% assessment_classes || length(assessment_classes) >= 1)
  })

  # Verify class proportions are approximately maintained (within reason)
  analysis_props <- purrr::map_dbl(resamples$splits, function(split) {
    analysis_data <- rsample::analysis(split)
    sum(analysis_data$outcome == "minority") / nrow(analysis_data)
  })

  # Proportions should be close to original 5%
  expect_true(all(analysis_props > 0.02 & analysis_props < 0.10))
})

test_that("stratified CV handles extreme imbalance (99/1)", {
  skip_if_not_installed("rsample")

  set.seed(456)
  # Create 99/1 imbalanced dataset
  n_majority <- 99
  n_minority <- 1
  extreme_data <- tibble::tibble(
    id = 1:(n_majority + n_minority),
    x = rnorm(n_majority + n_minority),
    outcome = factor(c(rep("majority", n_majority), rep("minority", n_minority)))
  )

  # Stratified CV with minority class < folds should either warn or handle gracefully
  # Different rsample versions may behave differently, so we test it doesn't error
  resamples <- tryCatch(
    rsample::vfold_cv(extreme_data, v = 5, strata = outcome),
    warning = function(w) {
      # If it warns, that's expected behavior
      suppressWarnings(rsample::vfold_cv(extreme_data, v = 5, strata = outcome))
    }
  )

  # The resamples should be created successfully

  expect_true(inherits(resamples, "rset"))
  expect_equal(nrow(resamples), 5)
})

test_that("grouped stratified CV handles imbalanced groups", {
  skip_if_not_installed("rsample")

  set.seed(789)
  # Create grouped data where some groups have only one class
  grouped_imbalanced <- tibble::tibble(
    id = 1:30,
    group = rep(paste0("g", 1:6), each = 5),
    strata = c(
      rep("A", 5),  # g1: all A
      rep("B", 5),  # g2: all B
      c("A", "A", "B", "B", "B"),  # g3: mixed
      c("A", "B", "A", "B", "A"),  # g4: mixed
      c("A", "A", "A", "B", "B"),  # g5: mixed
      rep("A", 5)   # g6: all A
    ),
    value = rnorm(30)
  )

  # When stratifying grouped CV, should handle uneven strata distribution

  # Expect a warning because strata is not constant within groups
  resamples <- expect_warning(
    fastml:::make_grouped_cv(
      grouped_imbalanced,
      group_cols = "group",
      v = 3,
      strata = "strata",
      repeats = 1
    ),
    "Proceeding without stratification"
  )

  # Verify groups remain intact
  purrr::walk(resamples$splits, function(split) {
    analysis_groups <- unique(rsample::analysis(split)$group)
    assessment_groups <- unique(rsample::assessment(split)$group)
    expect_length(intersect(analysis_groups, assessment_groups), 0)
  })
})