set.seed(42) # Synthetic numeric series + dates demo_ts <- data.frame(y = cumsum(rnorm(160, 0.05, 1))) demo_dt <- seq.Date(as.Date("2022-01-01"), by = "day", length.out = nrow(demo_ts)) # Synthetic categorical (binary) cat_vals <- factor(sample(c("A", "B"), size = 160, replace = TRUE)) demo_cat <- data.frame(x = cat_vals) # ---------- Existing smoke ---------- test_that("numeric pipeline works", { out <- segen(demo_ts, seq_len = 10, similarity = 0.7, n_windows = 3, n_samp = 2, dates = demo_dt, seed = 123) expect_true(is.list(out)) expect_true(is.data.frame(out$history)) expect_true(nrow(out$history) >= 1) }) # ---------- (1) Distance methods ---------- test_that("correlation distance runs & is symmetric", { # small reframed matrix x <- matrix(rnorm(5 * 8), nrow = 5) D1 <- segen:::compute_distance(x, method = "correlation", use_cache = FALSE) D2 <- segen:::compute_distance(x, method = "correlation", use_cache = TRUE) expect_true(is.matrix(D1)) expect_true(all(abs(D1 - t(D1)) < 1e-10)) expect_equal(D1, D2, tolerance = 0) # same result with/without cache # Integrate through segen out <- segen(demo_ts, seq_len = 8, similarity = 0.5, dist_method = "correlation", n_windows = 2, n_samp = 1, seed = 7) expect_true(nrow(out$history) == 1) }) test_that("dtw distance path works when dtw installed", { skip_if_not_installed("dtw") out <- segen(demo_ts, seq_len = 6, similarity = 0.6, dist_method = "dtw", n_windows = 2, n_samp = 1, seed = 9) expect_true(nrow(out$history) == 1) }) # ---------- (2) Kernel weighting + Dirichlet draws ---------- test_that("reproducibility with fixed seed (kernel + Dirichlet)", { o1 <- segen(demo_ts, seq_len = 8, similarity = 0.6, dist_method = "euclidean", n_windows = 3, n_samp = 1, seed = 999) o2 <- segen(demo_ts, seq_len = 8, similarity = 0.6, dist_method = "euclidean", n_windows = 3, n_samp = 1, seed = 999) expect_equal(o1$history, o2$history, tolerance = 0) }) # ---------- (3) Conformal CI symmetry ---------- test_that("conformal intervals are symmetric around median (numeric)", { out <- segen(demo_ts, seq_len = 12, similarity = 0.7, dist_method = "euclidean", n_windows = 3, n_samp = 1, seed = 101, dates = demo_dt) qp <- out$best_model$predictions[[1]] lower_b <- "10%" # for ci=0.8 -> (1-ci)/2=0.1 => "10%" upper_b <- "90%" expect_true(all(abs((qp[, upper_b] - qp[, "50%"]) - (qp[, "50%"] - qp[, lower_b])) < 1e-8)) }) test_that("conformal intervals are clipped to [0,1] for binary", { out <- segen(demo_cat, seq_len = 8, similarity = 0.6, n_windows = 2, n_samp = 1, seed = 202) qp <- out$best_model$predictions[[1]] lower_b <- "10%"; upper_b <- "90%" expect_true(all(qp[, lower_b] >= 0 - 1e-12)) expect_true(all(qp[, upper_b] <= 1 + 1e-12)) }) # ---------- (7) Parallel determinism ---------- test_that("parallel vs sequential gives identical history with same seed", { if (!(requireNamespace("furrr", quietly = TRUE) && requireNamespace("future", quietly = TRUE))) { skip("furrr/future not installed") } o_seq <- segen(demo_ts, seq_len = 8, similarity = 0.5, n_windows = 3, n_samp = 2, seed = 321, use_parallel = FALSE) o_par <- segen(demo_ts, seq_len = 8, similarity = 0.5, n_windows = 3, n_samp = 2, seed = 321, use_parallel = TRUE, parallel_workers = 2) expect_equal(o_seq$history, o_par$history, tolerance = 0.1) }) # ---------- Distance caching determinism ---------- test_that("compute_distance caching returns identical matrices", { set.seed(1) X <- matrix(rnorm(6 * 10), nrow = 6) D_a <- segen:::compute_distance(X, "euclidean", use_cache = TRUE) D_b <- segen:::compute_distance(X, "euclidean", use_cache = TRUE) # should hit cache when digest present expect_equal(D_a, D_b, tolerance = 0) expect_true(all(diag(D_a) < 1e-12)) expect_true(all(abs(D_a - t(D_a)) < 1e-12)) })