# ===========================================================================
# Tests for C++ functions
# ===========================================================================

# ---------------------------------------------------------------------------
# checkTemporalOrder
# ---------------------------------------------------------------------------

test_that("checkTemporalOrder detects look-ahead bias", {
  # Timestamps: 1-10 for rows
  timestamps <- as.numeric(1:10)
  train_idx <- 1:5   # timestamps 1-5

test_idx <- 6:10   # timestamps 6-10

  # No violations - test is after train
  result <- checkTemporalOrder(timestamps, train_idx, test_idx)
  expect_false(result$has_violations)
  expect_equal(result$n_violations, 0)
  expect_equal(result$max_train_time, 5)
})

test_that("checkTemporalOrder finds test data before train data", {
  timestamps <- as.numeric(1:10)
  train_idx <- 5:10  # timestamps 5-10
  test_idx <- 1:4    # timestamps 1-4 (BEFORE train!)

  result <- checkTemporalOrder(timestamps, train_idx, test_idx)
  expect_true(result$has_violations)
  expect_equal(result$n_violations, 4)
  expect_equal(sort(result$violation_indices), 1:4)
  expect_equal(result$max_train_time, 10)
})

test_that("checkTemporalOrder handles mixed temporal violations", {
  timestamps <- as.numeric(c(1, 5, 2, 8, 3, 9, 4, 10, 6, 7))
  train_idx <- c(2, 4, 6, 8)  # timestamps 5, 8, 9, 10
  test_idx <- c(1, 3, 5, 7, 9, 10)  # timestamps 1, 2, 3, 4, 6, 7

  result <- checkTemporalOrder(timestamps, train_idx, test_idx)
  expect_true(result$has_violations)
  # All test timestamps < max_train_time (10)
  expect_equal(result$n_violations, 6)
  expect_equal(result$max_train_time, 10)
})

test_that("checkTemporalOrder handles NA timestamps", {
  timestamps <- as.numeric(c(1, 2, NA, 4, 5, NA, 7, 8, 9, 10))
  train_idx <- 1:5
  test_idx <- 6:10

  result <- checkTemporalOrder(timestamps, train_idx, test_idx)
  # max_train_time should be 5 (ignoring NA at position 3)
  expect_equal(result$max_train_time, 5)
  # Violations: positions 7,8,9,10 have times 7,8,9,10 > 5, but position 6 is NA
  # So no violations since test times are after train
  expect_false(result$has_violations)
})

test_that("checkTemporalOrder with partial look-ahead", {
  timestamps <- as.numeric(1:10)
  train_idx <- 3:7   # timestamps 3-7
  test_idx <- c(1, 2, 8, 9, 10)  # 1,2 are before, 8,9,10 are after

  result <- checkTemporalOrder(timestamps, train_idx, test_idx)
  expect_true(result$has_violations)
  expect_equal(result$n_violations, 2)  # positions 1 and 2
  expect_equal(sort(result$violation_indices), c(1, 2))
  expect_equal(result$max_train_time, 7)
})


# ---------------------------------------------------------------------------
# checkGroupOverlap
# ---------------------------------------------------------------------------

test_that("checkGroupOverlap detects no overlap when groups are separate", {
  groups <- c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)
  train_idx <- c(1, 2, 3, 4, 5, 6)   # groups 1 and 2
  test_idx <- c(7, 8, 9, 10, 11, 12) # groups 3 and 4

  result <- checkGroupOverlap(groups, train_idx, test_idx)
  expect_false(result$has_overlap)
  expect_equal(result$n_overlapping_groups, 0)
  expect_equal(result$n_affected_rows, 0)
})

test_that("checkGroupOverlap detects group overlap", {
  groups <- c(1, 1, 1, 2, 2, 2, 1, 1, 3, 3)  # Group 1 in both train and test
  train_idx <- 1:6    # groups 1, 1, 1, 2, 2, 2
  test_idx <- 7:10    # groups 1, 1, 3, 3

  result <- checkGroupOverlap(groups, train_idx, test_idx)
  expect_true(result$has_overlap)
  expect_equal(result$n_overlapping_groups, 1)  # group 1
  expect_true(1 %in% result$overlapping_groups)
  expect_equal(result$n_affected_rows, 2)  # rows 7 and 8
  expect_equal(sort(result$affected_indices), c(7, 8))
})

test_that("checkGroupOverlap handles multiple overlapping groups", {
  groups <- c(1, 2, 3, 4, 1, 2, 5, 6)
  train_idx <- 1:4    # groups 1, 2, 3, 4
  test_idx <- 5:8     # groups 1, 2, 5, 6

  result <- checkGroupOverlap(groups, train_idx, test_idx)
  expect_true(result$has_overlap)
  expect_equal(result$n_overlapping_groups, 2)  # groups 1 and 2
  expect_true(all(c(1, 2) %in% result$overlapping_groups))
  expect_equal(result$n_affected_rows, 2)  # rows 5 and 6
})

test_that("checkGroupOverlap handles complete overlap", {
  groups <- rep(1:2, each = 5)  # All same groups
  train_idx <- 1:5   # group 1s
  test_idx <- 6:10   # group 2s... wait, let me fix

  # Actually test complete overlap
  groups <- rep(1, 10)  # All same group
  train_idx <- 1:5
  test_idx <- 6:10

  result <- checkGroupOverlap(groups, train_idx, test_idx)
  expect_true(result$has_overlap)
  expect_equal(result$n_overlapping_groups, 1)
  expect_equal(result$n_affected_rows, 5)
})


# ---------------------------------------------------------------------------
# computeCorrelation
# ---------------------------------------------------------------------------

test_that("computeCorrelation computes Pearson correlation correctly", {
  x <- c(1, 2, 3, 4, 5)
  y <- c(2, 4, 6, 8, 10)  # Perfect positive correlation

  result <- computeCorrelation(x, y)
  expect_equal(result, 1.0, tolerance = 1e-10)
})

test_that("computeCorrelation handles negative correlation", {
  x <- c(1, 2, 3, 4, 5)
  y <- c(10, 8, 6, 4, 2)  # Perfect negative correlation

  result <- computeCorrelation(x, y)
  expect_equal(result, -1.0, tolerance = 1e-10)
})

test_that("computeCorrelation handles zero correlation", {
  set.seed(42)
  x <- rnorm(1000)
  y <- rnorm(1000)

  result <- computeCorrelation(x, y)
  # Should be close to 0 for independent random variables

  expect_true(abs(result) < 0.1)
})

test_that("computeCorrelation handles NA values", {
  x <- c(1, 2, NA, 4, 5)
  y <- c(2, 4, 6, 8, 10)

  result <- computeCorrelation(x, y)
  # Should compute correlation excluding NA
  expect_true(!is.na(result))
  expect_equal(result, 1.0, tolerance = 1e-10)
})

test_that("computeCorrelation handles NA in both vectors", {
  x <- c(1, NA, 3, 4, 5)
  y <- c(2, 4, NA, 8, 10)

  result <- computeCorrelation(x, y)
  # Should exclude rows where either is NA
  expect_true(!is.na(result))
})

test_that("computeCorrelation returns NA for constant vectors", {
  x <- c(5, 5, 5, 5, 5)  # Zero variance
  y <- c(1, 2, 3, 4, 5)

  result <- computeCorrelation(x, y)
  expect_true(is.na(result))
})

test_that("computeCorrelation returns NA with insufficient data", {
  x <- c(1, NA, NA, NA, NA)
  y <- c(2, NA, NA, NA, NA)

  result <- computeCorrelation(x, y)
  expect_true(is.na(result))  # Only 1 complete case
})

test_that("computeCorrelation matches base R cor", {
  set.seed(123)
  x <- rnorm(100)
  y <- x * 2 + rnorm(100, sd = 0.5)

  cpp_result <- computeCorrelation(x, y)
  r_result <- cor(x, y)

  expect_equal(cpp_result, r_result, tolerance = 1e-10)
})

test_that("computeCorrelation matches cor with NAs", {
  set.seed(456)
  x <- rnorm(100)
  y <- x * 2 + rnorm(100, sd = 0.5)
  x[sample(100, 10)] <- NA
  y[sample(100, 10)] <- NA

  cpp_result <- computeCorrelation(x, y)
  r_result <- cor(x, y, use = "pairwise.complete.obs")

  expect_equal(cpp_result, r_result, tolerance = 1e-10)
})