set.seed(42)

# Synthetic numeric series + dates
demo_ts  <- data.frame(y = cumsum(rnorm(160, 0.05, 1)))
demo_dt  <- seq.Date(as.Date("2022-01-01"), by = "day", length.out = nrow(demo_ts))

# Synthetic categorical (binary)
cat_vals <- factor(sample(c("A", "B"), size = 160, replace = TRUE))
demo_cat <- data.frame(x = cat_vals)

# ---------- Existing smoke ----------
test_that("numeric pipeline works", {
  out <- segen(demo_ts, seq_len = 10, similarity = 0.7, n_windows = 3, n_samp = 2, dates = demo_dt, seed = 123)
  expect_true(is.list(out))
  expect_true(is.data.frame(out$history))
  expect_true(nrow(out$history) >= 1)
})

# ---------- (1) Distance methods ----------
test_that("correlation distance runs & is symmetric", {
  # small reframed matrix
  x <- matrix(rnorm(5 * 8), nrow = 5)
  D1 <- segen:::compute_distance(x, method = "correlation", use_cache = FALSE)
  D2 <- segen:::compute_distance(x, method = "correlation", use_cache = TRUE)
  expect_true(is.matrix(D1))
  expect_true(all(abs(D1 - t(D1)) < 1e-10))
  expect_equal(D1, D2, tolerance = 0) # same result with/without cache
  # Integrate through segen
  out <- segen(demo_ts, seq_len = 8, similarity = 0.5, dist_method = "correlation", n_windows = 2, n_samp = 1, seed = 7)
  expect_true(nrow(out$history) == 1)
})

test_that("dtw distance path works when dtw installed", {
  skip_if_not_installed("dtw")
  out <- segen(demo_ts, seq_len = 6, similarity = 0.6, dist_method = "dtw", n_windows = 2, n_samp = 1, seed = 9)
  expect_true(nrow(out$history) == 1)
})

# ---------- (2) Kernel weighting + Dirichlet draws ----------
test_that("reproducibility with fixed seed (kernel + Dirichlet)", {
  o1 <- segen(demo_ts, seq_len = 8, similarity = 0.6, dist_method = "euclidean", n_windows = 3, n_samp = 1, seed = 999)
  o2 <- segen(demo_ts, seq_len = 8, similarity = 0.6, dist_method = "euclidean", n_windows = 3, n_samp = 1, seed = 999)
  expect_equal(o1$history, o2$history, tolerance = 0)
})

# ---------- (3) Conformal CI symmetry ----------
test_that("conformal intervals are symmetric around median (numeric)", {
  out <- segen(demo_ts, seq_len = 12, similarity = 0.7, dist_method = "euclidean", n_windows = 3, n_samp = 1, seed = 101, dates = demo_dt)
  qp <- out$best_model$predictions[[1]]
  lower_b <- "10%"  # for ci=0.8 -> (1-ci)/2=0.1 => "10%"
  upper_b <- "90%"
  expect_true(all(abs((qp[, upper_b] - qp[, "50%"]) - (qp[, "50%"] - qp[, lower_b])) < 1e-8))
})

test_that("conformal intervals are clipped to [0,1] for binary", {
  out <- segen(demo_cat, seq_len = 8, similarity = 0.6, n_windows = 2, n_samp = 1, seed = 202)
  qp <- out$best_model$predictions[[1]]
  lower_b <- "10%"; upper_b <- "90%"
  expect_true(all(qp[, lower_b] >= 0 - 1e-12))
  expect_true(all(qp[, upper_b] <= 1 + 1e-12))
})

# ---------- (7) Parallel determinism ----------
test_that("parallel vs sequential gives identical history with same seed", {
  if (!(requireNamespace("furrr", quietly = TRUE) && requireNamespace("future", quietly = TRUE))) {
    skip("furrr/future not installed")
  }
  o_seq <- segen(demo_ts, seq_len = 8, similarity = 0.5, n_windows = 3, n_samp = 2, seed = 321, use_parallel = FALSE)
  o_par <- segen(demo_ts, seq_len = 8, similarity = 0.5, n_windows = 3, n_samp = 2, seed = 321, use_parallel = TRUE, parallel_workers = 2)
  expect_equal(o_seq$history, o_par$history, tolerance = 0.1)
})

# ---------- Distance caching determinism ----------
test_that("compute_distance caching returns identical matrices", {
  set.seed(1)
  X <- matrix(rnorm(6 * 10), nrow = 6)
  D_a <- segen:::compute_distance(X, "euclidean", use_cache = TRUE)
  D_b <- segen:::compute_distance(X, "euclidean", use_cache = TRUE) # should hit cache when digest present
  expect_equal(D_a, D_b, tolerance = 0)
  expect_true(all(diag(D_a) < 1e-12))
  expect_true(all(abs(D_a - t(D_a)) < 1e-12))
})