test_that("dense SIMPLS: bigPLSR.simpls.solve = 'tri' beats 'solve' (bench)", { skip_on_cran() skip_on_ci() skip_if_not_installed("bench") dat <- make_dense_data(n = 6000L, p = 512L, m = 8L, seed = 1L) # Warm-up and bench both solvers with min_time to avoid 0.000s warmup_dense_simpls(dat) res <- suppressWarnings(bench::mark( chol = withr::with_options(list(bigPLSR.simpls.solve = "chol"), bigPLSR::pls_fit(dat$X, dat$Y, ncomp = 12L, backend = "arma", algorithm = "simpls", scores = "r")), tri = withr::with_options(list(bigPLSR.simpls.solve = "tri"), bigPLSR::pls_fit(dat$X, dat$Y, ncomp = 12L, backend = "arma", algorithm = "simpls", scores = "r")), qr = withr::with_options(list(bigPLSR.simpls.solve = "qr"), bigPLSR::pls_fit(dat$X, dat$Y, ncomp = 12L, backend = "arma", algorithm = "simpls", scores = "r")), solve = withr::with_options(list(bigPLSR.simpls.solve = "solve"), bigPLSR::pls_fit(dat$X, dat$Y, ncomp = 12L, backend = "arma", algorithm = "simpls", scores = "r")), iterations = 8, min_time = .1, check = FALSE )) t_chol <- bench_pick_median(res, "chol") t_qr <- bench_pick_median(res, "qr") t_tri <- bench_pick_median(res, "tri") t_solve <- bench_pick_median(res, "solve") speedup <- t_solve / t_tri msg <- sprintf("Observed speedup = %.2fx (tri=%.3fs, solve=%.3fs)", speedup, t_tri, t_solve) ## Be conservative to avoid flakiness across BLAS/CPUs expect_gt(speedup, .95, label = msg) message("1/6") }) test_that("dense SIMPLS: 'tri' and 'solve' produce identical coefficients", { skip_on_cran() skip_on_ci() dat <- make_dense_data() withr::with_options(list(bigPLSR.simpls.solve = "tri"), { fit_tri <- bigPLSR::pls_fit(dat$X, dat$Y, ncomp = 12L, backend = "arma", algorithm = "simpls", scores = "r") }) withr::with_options(list(bigPLSR.simpls.solve = "solve"), { fit_sol <- bigPLSR::pls_fit(dat$X, dat$Y, ncomp = 12L, backend = "arma", algorithm = "simpls", scores = "r") }) expect_equal(unclass(fit_tri$coefficients), unclass(fit_sol$coefficients), tolerance = 1e-10) message("2/6") }) test_that("bigmem SIMPLS: scores='none' is faster than 'r' (no streaming)", { skip_on_cran() skip_on_ci() skip_if_not_installed("bench") dat <- make_bigmem_data() warmup_bigmem_simplansin <- tryCatch({ warmup_bigmem_simpls(dat, 8192L); TRUE }, error = function(e) FALSE) # Even if warmup fails on tiny CI boxes, still attempt benchmark guarded res <- suppressWarnings(bench::mark( none = withr::with_options(list(bigPLSR.simpls.stream_chunk = 8192L), bigPLSR::pls_fit(dat$Xbm, dat$Ybm, ncomp = 8L, backend = "bigmem", algorithm = "simpls", scores = "none")), r = withr::with_options(list(bigPLSR.simpls.stream_chunk = 8192L), bigPLSR::pls_fit(dat$Xbm, dat$Ybm, ncomp = 8L, backend = "bigmem", algorithm = "simpls", scores = "r")), iterations = 6, min_time = 1, check = FALSE )) t_none <- bench_pick_median(res, "none") t_r <- bench_pick_median(res, "r") msg <- sprintf("scores='none' (%.3fs) should be <= scores='r' (%.3fs)", t_none, t_r) expect_lte(t_none, t_r + 1e-6, label = msg) message("3/6") }) test_that("bigmem SIMPLS: chunk size affects runtime (sanity check)", { skip_on_cran() skip_on_ci() skip_if_not_installed("bench") set.seed(1639) dat <- make_bigmem_data() warmup_bigmem_simpls(dat, 8192L) set.seed(1639) res <- suppressWarnings(bench::mark( ch_4k = withr::with_options(list(bigPLSR.simpls.stream_chunk = 4096L), bigPLSR::pls_fit(dat$Xbm, dat$Ybm, ncomp = 8L, backend = "bigmem", algorithm = "simpls", scores = "none")), ch_64k = withr::with_options(list(bigPLSR.simpls.stream_chunk = 65536L), bigPLSR::pls_fit(dat$Xbm, dat$Ybm, ncomp = 8L, backend = "bigmem", algorithm = "simpls", scores = "none")), iterations = 6, min_time = 1, check = FALSE )) t4 <- bench_pick_median(res, "ch_4k") t64 <- bench_pick_median(res, "ch_64k") # We don't assert which one is faster (hardware dependent), # only that the timings are not numerically identical. expect_true(abs(t4 - t64) > 1e-4, info = sprintf("chunk 4k=%.3fs vs 64k=%.3fs", t4, t64)) message("4/6") }) test_that("dense cross: C fused path matches R and can be faster", { skip_on_cran() skip_on_ci() skip_if_not_installed("bench") set.seed(2025) n <- 3000L; p <- 256L; m <- 12L; A <- 12L X <- matrix(rnorm(n * p), n, p) Y <- matrix(rnorm(n * m), n, m) # correctness parity (always run) fit_R <- withr::with_options(list(bigPLSR.simpls.cross = "R"), bigPLSR::pls_fit(X, Y, ncomp = A, backend = "arma", algorithm = "simpls", scores = "none")) fit_C <- withr::with_options(list(bigPLSR.simpls.cross = "dense_cxx"), bigPLSR::pls_fit(X, Y, ncomp = A, backend = "arma", algorithm = "simpls", scores = "none")) expect_equal(fit_C$coefficients, fit_R$coefficients, tolerance = 1e-12, ignore_attr = TRUE) expect_equal(as.numeric(fit_C$intercept), as.numeric(fit_R$intercept), tolerance = 1e-12) message("5/6") }) test_that("dense cross: C fused path matches R and can be faster", { skip_on_cran() skip_on_ci() skip_if_not_installed("bench") set.seed(2025) n <- 3000L; p <- 256L; m <- 12L; A <- 12L X <- matrix(rnorm(n * p), n, p) Y <- matrix(rnorm(n * m), n, m) res <- suppressWarnings(bench::mark( pls = withr::with_options(list(bigPLSR.simpls.cross = "R"), pls::plsr(method = "simpls", Y~X,ncomp=A)), R = withr::with_options(list(bigPLSR.simpls.cross = "R"), bigPLSR::pls_fit(X, Y, ncomp = A, backend = "arma", algorithm = "simpls", scores = "none")), CXX = withr::with_options(list(bigPLSR.simpls.cross = "dense_cxx"), bigPLSR::pls_fit(X, Y, ncomp = A, backend = "arma", algorithm = "simpls", scores = "none")), CXX_SYRK = withr::with_options(list(bigPLSR.simpls.cross = "dense_cxx", bigPLSR.simpls.use_syrk = TRUE), bigPLSR::pls_fit(X, Y, ncomp = A, backend = "arma", algorithm = "simpls", scores = "none")), iterations = 8, min_time = 0.5, check = FALSE )) med <- function(tag) median(as.numeric(res$time[names(res$expression) == tag][[1]])) speedup <- med("R") / med("CXX") testthat::expect_gt(as.numeric(speedup), .95) message("6/6 Done") })