# CV parity tests — structural invariants against caret/rsample reference.
#
# The backtester for the backtester. Compares ml_split() holdout and CV
# against reference R packages on STRUCTURAL PROPERTIES:
#
#   1. Fold count matches
#   2. Fold sizes balanced (±1 row)
#   3. No validation overlap (partition property)
#   4. Complete coverage (every row appears in exactly one valid fold)
#   5. Stratification preserves class ratios per fold
#   6. Group non-overlap (no group in both train and valid)
#   7. Temporal ordering (train indices < valid indices)
#   8. Train+valid = all data (no rows lost or invented)
#
# We do NOT compare exact indices — implementations legitimately differ.
# We compare the properties any correct implementation must satisfy.

# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------

make_clf_200 <- function() {
  withr::local_seed(42L)
  data.frame(
    x1 = stats::rnorm(200L),
    x2 = stats::rnorm(200L),
    x3 = stats::rnorm(200L),
    x4 = stats::rnorm(200L),
    x5 = stats::rnorm(200L),
    target = sample(c(0L, 1L), 200L, replace = TRUE)
  )
}

make_reg_200 <- function() {
  withr::local_seed(42L)
  x <- stats::rnorm(200L)
  data.frame(
    x1 = x,
    x2 = stats::rnorm(200L),
    x3 = stats::rnorm(200L),
    target = 2 * x + stats::rnorm(200L) * 0.3
  )
}

make_group_200 <- function() {
  withr::local_seed(42L)
  data.frame(
    group_id = rep(paste0("g", seq_len(20L)), each = 10L),
    x1 = stats::rnorm(200L),
    x2 = stats::rnorm(200L),
    target = sample(c(0L, 1L), 200L, replace = TRUE)
  )
}

make_temporal_500 <- function() {
  withr::local_seed(42L)
  data.frame(
    date = seq_len(500L),
    x1 = stats::rnorm(500L),
    x2 = stats::rnorm(500L),
    target = stats::rnorm(500L) * 10 + 100
  )
}


# ---------------------------------------------------------------------------
# Invariant helpers
# ---------------------------------------------------------------------------

assert_fold_count <- function(folds, k) {
  expect_equal(length(folds), k, label = "fold count")
}

assert_balanced_fold_sizes <- function(folds, n_total, k) {
  expected <- n_total / k
  for (i in seq_along(folds)) {
    size <- length(folds[[i]]$valid)
    expect_true(
      abs(size - expected) <= 1.5,
      label = sprintf("Fold %d: valid size %d, expected ~%.0f", i, size, expected)
    )
  }
}

assert_no_valid_overlap <- function(folds) {
  seen <- integer(0)
  for (i in seq_along(folds)) {
    idx <- folds[[i]]$valid
    overlap <- intersect(seen, idx)
    expect_length(overlap, 0L)
    seen <- c(seen, idx)
  }
}

assert_complete_coverage <- function(folds, n_total) {
  all_valid <- integer(0)
  for (f in folds) {
    all_valid <- c(all_valid, f$valid)
  }
  expect_equal(sort(unique(all_valid)), seq_len(n_total))
}

assert_train_valid_disjoint <- function(folds) {
  for (i in seq_along(folds)) {
    overlap <- intersect(folds[[i]]$train, folds[[i]]$valid)
    expect_length(overlap, 0L)
  }
}

assert_train_valid_cover_all <- function(folds, n_total) {
  for (i in seq_along(folds)) {
    all_idx <- sort(union(folds[[i]]$train, folds[[i]]$valid))
    expect_equal(all_idx, seq_len(n_total))
  }
}

assert_group_non_overlap <- function(folds, group_col) {
  for (i in seq_along(folds)) {
    train_grps <- unique(group_col[folds[[i]]$train])
    valid_grps <- unique(group_col[folds[[i]]$valid])
    leak <- intersect(train_grps, valid_grps)
    expect_length(leak, 0L)
  }
}

assert_temporal_ordering <- function(folds) {
  for (i in seq_along(folds)) {
    expect_true(
      max(folds[[i]]$train) < min(folds[[i]]$valid),
      label = sprintf("Fold %d: train max < valid min", i)
    )
  }
}


# ---------------------------------------------------------------------------
# 1. Holdout split — structural invariants
# ---------------------------------------------------------------------------

test_that("holdout: partitions cover full dataset", {
  df <- make_clf_200()
  s  <- ml_split(df, "target", seed = 42L)
  expect_equal(nrow(s$train) + nrow(s$valid) + nrow(s$test), nrow(df))
})

test_that("holdout: partitions are disjoint", {
  df <- make_clf_200()
  s  <- ml_split(df, "target", seed = 42L)
  train_idx <- as.integer(rownames(s$train))
  valid_idx <- as.integer(rownames(s$valid))
  test_idx  <- as.integer(rownames(s$test))
  expect_length(intersect(train_idx, valid_idx), 0L)
  expect_length(intersect(train_idx, test_idx), 0L)
  expect_length(intersect(valid_idx, test_idx), 0L)
})

test_that("holdout: partition sizes match ratio ±5%", {
  df <- make_clf_200()
  n  <- nrow(df)
  s  <- ml_split(df, "target", seed = 42L)
  expect_true(abs(nrow(s$train) / n - 0.6) < 0.05)
  expect_true(abs(nrow(s$valid) / n - 0.2) < 0.05)
  expect_true(abs(nrow(s$test)  / n - 0.2) < 0.05)
})

test_that("holdout: dev = train + valid", {
  df <- make_clf_200()
  s  <- ml_split(df, "target", seed = 42L)
  expect_equal(nrow(s$dev), nrow(s$train) + nrow(s$valid))
})


# ---------------------------------------------------------------------------
# 2. K-fold CV — structural invariants
# ---------------------------------------------------------------------------

for (k in c(2L, 3L, 5L, 10L)) {
  test_that(sprintf("cv(%d-fold): fold count matches", k), {
    df <- make_clf_200()
    cv <- ml_split(df, "target", seed = 42L, folds = k)
    assert_fold_count(cv$folds, k)
  })

  test_that(sprintf("cv(%d-fold): fold sizes balanced", k), {
    df <- make_clf_200()
    cv <- ml_split(df, "target", seed = 42L, folds = k)
    n_dev <- nrow(cv$train)
    assert_balanced_fold_sizes(cv$folds, n_dev, k)
  })

  test_that(sprintf("cv(%d-fold): no validation overlap", k), {
    df <- make_clf_200()
    cv <- ml_split(df, "target", seed = 42L, folds = k)
    assert_no_valid_overlap(cv$folds)
  })

  test_that(sprintf("cv(%d-fold): complete coverage", k), {
    df <- make_clf_200()
    cv <- ml_split(df, "target", seed = 42L, folds = k)
    n_dev <- nrow(cv$train)
    assert_complete_coverage(cv$folds, n_dev)
  })

  test_that(sprintf("cv(%d-fold): train/valid disjoint", k), {
    df <- make_clf_200()
    cv <- ml_split(df, "target", seed = 42L, folds = k)
    assert_train_valid_disjoint(cv$folds)
  })

  test_that(sprintf("cv(%d-fold): train+valid = all data", k), {
    df <- make_clf_200()
    cv <- ml_split(df, "target", seed = 42L, folds = k)
    n_dev <- nrow(cv$train)
    assert_train_valid_cover_all(cv$folds, n_dev)
  })
}


# ---------------------------------------------------------------------------
# 3. CV vs caret reference (structural parity)
# ---------------------------------------------------------------------------

test_that("cv vs caret::createFolds — same fold sizes", {
  skip_if_not_installed("caret")
  df <- make_clf_200()
  k  <- 5L

  # ml (folds on dev, test held out)
  cv <- ml_split(df, "target", seed = 42L, folds = k)
  n_dev <- nrow(cv$train)
  ml_sizes <- sort(vapply(cv$folds, function(f) length(f$valid), integer(1L)))

  # caret reference (folds on full data for comparison)
  withr::local_seed(42L)
  caret_folds <- caret::createFolds(df$target, k = k, list = TRUE, returnTrain = FALSE)
  caret_sizes <- sort(vapply(caret_folds, length, integer(1L)))

  # ml folds should be balanced relative to dev size
  expected_ml <- n_dev / k
  expected <- nrow(df) / k
  for (s in ml_sizes) {
    expect_true(abs(s - expected_ml) <= 2, label = sprintf("ml fold size %d", s))
  }
  for (s in caret_sizes) {
    expect_true(abs(s - expected) <= 2, label = sprintf("caret fold size %d", s))
  }
})

test_that("cv vs caret::createFolds — both satisfy invariants", {
  skip_if_not_installed("caret")
  df <- make_clf_200()
  k  <- 5L

  # ml invariants (folds on dev, test held out)
  cv <- ml_split(df, "target", seed = 42L, folds = k)
  n_dev <- nrow(cv$train)
  assert_fold_count(cv$folds, k)
  assert_no_valid_overlap(cv$folds)
  assert_complete_coverage(cv$folds, n_dev)
  assert_train_valid_disjoint(cv$folds)

  # caret invariants (convert to our format)
  withr::local_seed(42L)
  caret_idx <- caret::createFolds(df$target, k = k, list = TRUE, returnTrain = FALSE)
  caret_folds <- lapply(seq_len(k), function(i) {
    valid <- caret_idx[[i]]
    train <- setdiff(seq_len(nrow(df)), valid)
    list(train = train, valid = valid)
  })
  assert_fold_count(caret_folds, k)
  assert_no_valid_overlap(caret_folds)
  assert_complete_coverage(caret_folds, nrow(df))
  assert_train_valid_disjoint(caret_folds)
})


# ---------------------------------------------------------------------------
# 4. Temporal CV — structural invariants
# ---------------------------------------------------------------------------

test_that("temporal cv: fold count matches", {
  df <- make_temporal_500()
  cv <- ml_split(df, "target", time = "date", folds = 5L)
  assert_fold_count(cv$folds, 5L)
})

test_that("temporal cv: chronological ordering (train < valid)", {
  df <- make_temporal_500()
  cv <- ml_split(df, "target", time = "date", folds = 5L)
  assert_temporal_ordering(cv$folds)
})

test_that("temporal cv: expanding window (train grows)", {
  df <- make_temporal_500()
  cv <- ml_split(df, "target", time = "date", folds = 5L)
  prev_size <- 0L
  for (f in cv$folds) {
    expect_true(length(f$train) >= prev_size)
    prev_size <- length(f$train)
  }
})

test_that("temporal cv: train/valid disjoint", {
  df <- make_temporal_500()
  cv <- ml_split(df, "target", time = "date", folds = 5L)
  assert_train_valid_disjoint(cv$folds)
})

test_that("temporal cv vs rsample::rolling_origin — both preserve order", {
  skip_if_not_installed("rsample")
  df <- make_temporal_500()

  # ml
  cv <- ml_split(df, "target", time = "date", folds = 3L)
  assert_temporal_ordering(cv$folds)

  # rsample reference — rolling_origin preserves temporal ordering
  ro <- rsample::rolling_origin(df, initial = 250L, assess = 50L, skip = 50L)
  for (i in seq_len(nrow(ro))) {
    split_i   <- ro$splits[[i]]
    # Use the date column to verify temporal ordering
    train_dates <- rsample::analysis(split_i)$date
    valid_dates <- rsample::assessment(split_i)$date
    expect_true(max(train_dates) < min(valid_dates))
  }
})


# ---------------------------------------------------------------------------
# 5. Group CV — structural invariants
# ---------------------------------------------------------------------------

test_that("group cv: fold count matches", {
  df  <- make_group_200()
  cv  <- ml_split(df, "target", seed = 42L, groups = "group_id", folds = 4L)
  assert_fold_count(cv$folds, 4L)
})

test_that("group cv: no group in both train and valid", {
  df  <- make_group_200()
  cv  <- ml_split(df, "target", seed = 42L, groups = "group_id", folds = 4L)
  dev <- cv$train
  assert_group_non_overlap(cv$folds, dev$group_id)
})

test_that("group cv: train/valid disjoint", {
  df  <- make_group_200()
  cv  <- ml_split(df, "target", seed = 42L, groups = "group_id", folds = 4L)
  assert_train_valid_disjoint(cv$folds)
})

test_that("group cv: complete coverage", {
  df  <- make_group_200()
  cv  <- ml_split(df, "target", seed = 42L, groups = "group_id", folds = 4L)
  n_dev <- nrow(cv$train)
  assert_complete_coverage(cv$folds, n_dev)
})

test_that("group cv: groups distributed ~equally", {
  df  <- make_group_200()
  k   <- 4L
  cv  <- ml_split(df, "target", seed = 42L, groups = "group_id", folds = k)
  dev <- cv$train
  n_grps  <- length(unique(dev$group_id))
  expected <- n_grps / k
  for (f in cv$folds) {
    n_grps_fold <- length(unique(dev$group_id[f$valid]))
    expect_true(abs(n_grps_fold - expected) <= 2)
  }
})

test_that("group holdout: no group leakage", {
  df <- make_group_200()
  s  <- ml_split(df, "target", seed = 42L, groups = "group_id")
  train_g <- unique(s$train$group_id)
  valid_g <- unique(s$valid$group_id)
  test_g  <- unique(s$test$group_id)
  expect_length(intersect(train_g, valid_g), 0L)
  expect_length(intersect(train_g, test_g), 0L)
  expect_length(intersect(valid_g, test_g), 0L)
})


# ---------------------------------------------------------------------------
# 6. Determinism
# ---------------------------------------------------------------------------

test_that("cv deterministic with same seed", {
  df  <- make_clf_200()
  cv1 <- ml_split(df, "target", seed = 42L, folds = 5L)
  cv2 <- ml_split(df, "target", seed = 42L, folds = 5L)
  for (i in seq_along(cv1$folds)) {
    expect_identical(cv1$folds[[i]]$valid, cv2$folds[[i]]$valid)
  }
})

test_that("group cv deterministic with same seed", {
  df  <- make_group_200()
  cv1 <- ml_split(df, "target", seed = 42L, groups = "group_id", folds = 4L)
  cv2 <- ml_split(df, "target", seed = 42L, groups = "group_id", folds = 4L)
  for (i in seq_along(cv1$folds)) {
    expect_identical(cv1$folds[[i]]$valid, cv2$folds[[i]]$valid)
  }
})

test_that("temporal cv deterministic (no seed)", {
  df  <- make_temporal_500()
  cv1 <- ml_split(df, "target", time = "date", folds = 3L)
  cv2 <- ml_split(df, "target", time = "date", folds = 3L)
  for (i in seq_along(cv1$folds)) {
    expect_identical(cv1$folds[[i]]$valid, cv2$folds[[i]]$valid)
  }
})


# ---------------------------------------------------------------------------
# 7. Stratification ratio parity
# ---------------------------------------------------------------------------

test_that("stratified split preserves class ratio within ±10%", {
  df <- make_clf_200()
  s  <- ml_split(df, "target", seed = 42L, stratify = TRUE)
  global_ratio <- mean(df$target)
  train_ratio  <- mean(s$train$target)
  valid_ratio  <- mean(s$valid$target)
  test_ratio   <- mean(s$test$target)
  expect_true(abs(train_ratio - global_ratio) < 0.10)
  expect_true(abs(valid_ratio - global_ratio) < 0.10)
  expect_true(abs(test_ratio  - global_ratio) < 0.10)
})

test_that("stratified cv preserves class ratio per fold within ±15%", {
  skip_if_not_installed("caret")
  df <- make_clf_200()
  k  <- 5L
  global_ratio <- mean(df$target)

  # caret stratified folds (must pass factor target for stratification)
  withr::local_seed(42L)
  caret_idx <- caret::createFolds(factor(df$target), k = k, list = TRUE, returnTrain = FALSE)
  for (i in seq_len(k)) {
    fold_ratio <- mean(df$target[caret_idx[[i]]])
    expect_true(
      abs(fold_ratio - global_ratio) < 0.15,
      label = sprintf("caret fold %d ratio %.3f vs global %.3f", i, fold_ratio, global_ratio)
    )
  }
})


# ---------------------------------------------------------------------------
# 8. Regression CV invariants
# ---------------------------------------------------------------------------

test_that("regression cv satisfies all structural invariants", {
  df <- make_reg_200()
  k  <- 5L
  cv <- ml_split(df, "target", seed = 42L, folds = k)
  n_dev <- nrow(cv$train)
  assert_fold_count(cv$folds, k)
  assert_balanced_fold_sizes(cv$folds, n_dev, k)
  assert_no_valid_overlap(cv$folds)
  assert_complete_coverage(cv$folds, n_dev)
  assert_train_valid_disjoint(cv$folds)
  assert_train_valid_cover_all(cv$folds, n_dev)
})