# Tests for ROBOS_RM_SMOTE()
# Run with: devtools::test() or testthat::test_file("tests/testthat/test-rm_smote.R")

# Helper: create a small imbalanced dataset
make_imbalanced <- function(n_maj = 100, n_min = 20, n_out = 3, seed = 42) {
  set.seed(seed)
  majority <- data.frame(
    x1    = rnorm(n_maj, mean = 0, sd = 1),
    x2    = rnorm(n_maj, mean = 0, sd = 1),
    class = factor("negative", levels = c("negative", "positive"))
  )
  minority <- data.frame(
    x1    = rnorm(n_min, mean = 3, sd = 1),
    x2    = rnorm(n_min, mean = 3, sd = 1),
    class = factor("positive", levels = c("negative", "positive"))
  )
  if (n_out > 0) {
    outliers <- data.frame(
      x1    = rnorm(n_out, mean = 15, sd = 0.1),
      x2    = rnorm(n_out, mean = 15, sd = 0.1),
      class = factor("positive", levels = c("negative", "positive"))
    )
    df <- rbind(majority, minority, outliers)
  } else {
    df <- rbind(majority, minority)
  }
  df[sample(nrow(df)), ]
}

# ── Output structure ───────────────────────────────────────────────────────

test_that("ROBOS_RM_SMOTE returns a data frame", {
  dt <- make_imbalanced()
  result <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  expect_s3_class(result, "data.frame")
})

test_that("output has the same columns as input", {
  dt <- make_imbalanced()
  result <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  expect_equal(colnames(result), colnames(dt))
})

test_that("output has more rows than input", {
  dt <- make_imbalanced()
  result <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  expect_gt(nrow(result), nrow(dt))
})

# ── Class balance ──────────────────────────────────────────────────────────

test_that("eIR = 1 produces a balanced dataset", {
  dt <- make_imbalanced(n_maj = 100, n_min = 20, n_out = 0)
  result <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  n_neg <- sum(result$class == "negative")
  n_pos <- sum(result$class == "positive")
  # After balancing: positive count should equal majority count
  expect_equal(n_neg, n_pos)
})

test_that("minority class count increases after ROBOS_RM_SMOTE", {
  dt <- make_imbalanced()
  n_min_before <- sum(dt$class == "positive")
  result <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  n_min_after <- sum(result$class == "positive")
  expect_gt(n_min_after, n_min_before)
})

test_that("majority class count is unchanged", {
  dt <- make_imbalanced()
  n_maj_before <- sum(dt$class == "negative")
  result <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  n_maj_after <- sum(result$class == "negative")
  expect_equal(n_maj_after, n_maj_before)
})

# ── eIR parameter ─────────────────────────────────────────────────────────

test_that("eIR > 1 results in partial balancing", {
  dt <- make_imbalanced(n_maj = 100, n_min = 20, n_out = 0)
  result_eir1 <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  result_eir2 <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 2)
  # eIR=2 should add fewer synthetics than eIR=1
  expect_lt(nrow(result_eir2), nrow(result_eir1))
})

# ── dup_size parameter ─────────────────────────────────────────────────────

test_that("dup_size controls number of synthetic samples", {
  dt <- make_imbalanced(n_maj = 100, n_min = 20, n_out = 0)
  result <- ROBOS_RM_SMOTE(dt, target = "positive", dup_size = 2)
  n_synthetic <- nrow(result) - nrow(dt)
  # dup_size = 2 → 2 * 20 = 40 synthetic samples
  expect_equal(n_synthetic, 40)
})

# ── Covariance methods ─────────────────────────────────────────────────────

test_that("all cov_method options run without error", {
  dt <- make_imbalanced(n_maj = 80, n_min = 20, n_out = 0)
  methods <- c("mcd", "mve", "mest", "mmest", "sde", "sest", "ogk")
  for (m in methods) {
    expect_no_error(
      ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1, cov_method = m)
    )
  }
})

# ── Weight functions ───────────────────────────────────────────────────────

test_that("all weight_func options run without error", {
  dt <- make_imbalanced()
  for (wf in 1:3) {
    expect_no_error(
      ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1, weight_func = wf)
    )
  }
})

# ── Outlier robustness ─────────────────────────────────────────────────────

test_that("ROBOS_RM_SMOTE with outliers produces fewer outlier-derived synthetics (weight_func=1)", {
  # With weight_func=1, outliers get weight 0 → cannot be selected as parents
  # All synthetic samples should lie within the normal minority region
  set.seed(42)
  normal_min <- data.frame(
    x1 = rnorm(20, 3, 1), x2 = rnorm(20, 3, 1),
    class = factor("positive", levels = c("negative", "positive"))
  )
  outlier_min <- data.frame(
    x1 = c(20, 21), x2 = c(20, 21),
    class = factor("positive", levels = c("negative", "positive"))
  )
  majority <- data.frame(
    x1 = rnorm(80), x2 = rnorm(80),
    class = factor("negative", levels = c("negative", "positive"))
  )
  dt <- rbind(majority, normal_min, outlier_min)

  result <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1,
                     weight_func = 1, cov_method = "mcd")

  # Synthetic observations are the new rows
  synthetic <- tail(result, nrow(result) - nrow(dt))

  # No synthetic x1 value should be close to the outlier region (> 15)
  expect_true(all(synthetic$x1 < 15),
              info = "Synthetics should not be generated near outlier region")
})

# ── Input validation ───────────────────────────────────────────────────────

test_that("missing 'class' column raises error", {
  dt <- data.frame(x1 = rnorm(50), x2 = rnorm(50))
  expect_error(ROBOS_RM_SMOTE(dt), regexp = "class")
})

test_that("unknown target raises error", {
  dt <- make_imbalanced()
  expect_error(ROBOS_RM_SMOTE(dt, target = "unknown"), regexp = "not found")
})

test_that("non-data-frame input raises error", {
  expect_error(ROBOS_RM_SMOTE(matrix(1:20, 10, 2)), regexp = "data frame")
})

test_that("eIR >= IR raises error", {
  dt <- make_imbalanced(n_maj = 100, n_min = 20, n_out = 0)
  # IR = 5, so eIR = 6 should fail
  expect_error(ROBOS_RM_SMOTE(dt, target = "positive", eIR = 6),
               regexp = "imbalance ratio")
})

test_that("negative k raises error", {
  dt <- make_imbalanced()
  expect_error(ROBOS_RM_SMOTE(dt, target = "positive", k = -1), regexp = "positive")
})

# ── Reproducibility ────────────────────────────────────────────────────────

test_that("same seed produces identical results", {
  dt <- make_imbalanced()
  set.seed(99); r1 <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  set.seed(99); r2 <- ROBOS_RM_SMOTE(dt, target = "positive", eIR = 1)
  expect_equal(r1, r2)
})