# Helper utilities for fastLPR test suite # Shared functions for setting up tests and validating results # ----------------------------------------------------------------------------- # Load fastlpr package or source files # ----------------------------------------------------------------------------- # Strategy: # 1. Try installed package first (proper Rcpp symbol registration) # 2. Fall back to source mode if package not installed # Note: Rcpp acceleration unavailable in source mode .fastlpr_loaded <- FALSE # Method 1: Use installed package (preferred - full Rcpp support) if (requireNamespace("fastlpr", quietly = TRUE)) { tryCatch({ library(fastlpr) .fastlpr_loaded <- TRUE }, error = function(e) NULL) } # Method 2: Fall back to sourcing R files (no Rcpp acceleration) if (!.fastlpr_loaded) { pkg_root <- getwd() while (!grepl("fastLPR_R$", basename(pkg_root)) && dirname(pkg_root) != pkg_root) { pkg_root <- dirname(pkg_root) } if (grepl("fastLPR_R$", basename(pkg_root))) { r_dir <- file.path(pkg_root, "R") if (dir.exists(r_dir)) { r_files <- list.files(r_dir, pattern = "\\.R$", full.names = TRUE) for (f in r_files) { source(f, local = FALSE) } } # Note: Rcpp DLL not loaded in source mode - symbol registration not possible # Tests will use pure R fallback (slower but still correct) } } rm(.fastlpr_loaded) # ----------------------------------------------------------------------------- # Global Tolerance Constants for Cross-Language Tests (Phase 2.2) # ----------------------------------------------------------------------------- # These tolerances are used across all cross-language (xl) tests to ensure # consistent error thresholds. See CLAUDE.md for pass criteria. #' Strict tolerance for exact numerical matching (machine precision) #' Use for: Pure R internal consistency tests TOL_STRICT <- 1e-12 #' Numerical tolerance for algorithm comparisons #' Use for: R-to-R comparisons, self-consistency tests TOL_NUMERICAL <- 1e-6 #' Cross-language tolerance for R vs MATLAB comparisons #' Use for: test-xl-*.R files, cross-validation tests #' Per CLAUDE.md: BW MaxErr < 0.02, Mean MaxErr < 0.05 TOL_CROSSLANG <- list( bw_maxerr = 0.02, # Bandwidth selection threshold mean_maxerr = 0.05, # Mean estimate max error var_maxerr = 0.05, # Variance estimate max error lcv_relerr = 0.01, # LCV relative error (1%) gcv_relerr = 0.05, # GCV relative error (5%) speed_ratio = 8.0 # Max allowed speed ratio vs MATLAB ) #' NUFFT accuracy level tolerances (expected max error for each accuracy level) ACCURACY_TOLERANCES <- list( `4` = 1e-3, `6` = 1e-5, `8` = 1e-7, `9` = 1e-9, `12` = 1e-11 ) # ----------------------------------------------------------------------------- # Skip Functions for CRAN Compatibility (Phase 2.1) # ----------------------------------------------------------------------------- #' Skip test if MATLAB reference data directory is not available #' #' This function checks if the MATLAB reference data files exist and #' skips the test if they are not available (e.g., on CRAN where #' reference .mat files are not included in the package). #' #' @param ref_subdir Subdirectory under fastLPR/tests/refs/ (default: "crosslang_e2e") #' @return NULL invisibly if refs available, otherwise skips the test #' #' @examples #' test_that("XL test", { #' skip_if_no_matlab_refs() #' # ... test code that requires MATLAB reference data #' }) skip_if_no_matlab_refs <- function(ref_subdir = "crosslang_e2e") { # Check if R.matlab package is available if (!requireNamespace("R.matlab", quietly = TRUE)) { skip("R.matlab package not available - required for cross-language tests") } # Construct reference directory path ref_dir <- tryCatch({ file.path(find_root(), "fastLPR", "tests", "refs", ref_subdir) }, error = function(e) { skip(paste("Cannot locate reference directory:", e$message)) return(NULL) }) # Check if directory exists if (is.null(ref_dir) || !dir.exists(ref_dir)) { skip(paste("MATLAB reference directory not found:", ref_subdir, "- skipping cross-language test (expected on CRAN)")) } # Check if at least one .mat file exists mat_files <- tryCatch({ list.files(ref_dir, pattern = "\\.mat$", full.names = FALSE) }, error = function(e) { character(0) }) if (length(mat_files) == 0) { skip(paste("No .mat reference files found in:", ref_subdir, "- skipping cross-language test")) } invisible(NULL) } #' Skip test if running on CRAN (NOT_CRAN != "true") #' #' Convenience wrapper that combines testthat::skip_on_cran() with #' custom messaging for cross-language tests. #' #' @return NULL invisibly if not on CRAN, otherwise skips skip_xl_on_cran <- function() { # Check NOT_CRAN environment variable not_cran <- Sys.getenv("NOT_CRAN", unset = "") if (tolower(not_cran) != "true") { skip("Skipping cross-language test on CRAN (set NOT_CRAN=true to run)") } invisible(NULL) } #' Load MATLAB reference data from .mat files #' #' @param filename Path to .mat file (relative to project root) #' @return List containing MATLAB data load_matlab_reference <- function(filename) { if (!requireNamespace("R.matlab", quietly = TRUE)) { skip("R.matlab package not available") } full_path <- file.path(find_root(), filename) if (!file.exists(full_path)) { skip(paste("Reference data not found:", filename)) } R.matlab::readMat(full_path) } #' Find project root directory #' #' @return Path to project root find_root <- function() { # Start from current working directory path <- getwd() # Look for fastLPR_R directory while (!grepl("fastLPR_R$", path) && dirname(path) != path) { path <- dirname(path) } # Navigate to jss-code root if (grepl("fastLPR_R", path)) { return(dirname(path)) } # Fallback: assume we're already in tests/testthat return(file.path(dirname(dirname(dirname(getwd()))))) } #' Compute error metrics between R and reference values #' #' @param r_val R implementation output #' @param ref_val Reference (MATLAB/Python) output #' @return List with error metrics compute_error_metrics <- function(r_val, ref_val) { r_vec <- as.vector(r_val) ref_vec <- as.vector(ref_val) abs_err <- abs(r_vec - ref_vec) max_abs_err <- max(abs_err, na.rm = TRUE) mean_abs_err <- mean(abs_err, na.rm = TRUE) ref_mean <- mean(abs(ref_vec), na.rm = TRUE) max_err_pct <- 100 * max_abs_err / (ref_mean + 1e-10) list( max_abs_err = max_abs_err, mean_abs_err = mean_abs_err, max_err_pct = max_err_pct, ref_mean = ref_mean, rmse = sqrt(mean((r_vec - ref_vec)^2, na.rm = TRUE)) ) } #' Check if values match within tolerance #' #' @param r_val R implementation output #' @param ref_val Reference output #' @param tolerance Maximum allowed percentage error (default: 1.0%) #' @return TRUE if values match within tolerance values_match <- function(r_val, ref_val, tolerance = 1.0) { metrics <- compute_error_metrics(r_val, ref_val) metrics$max_err_pct <= tolerance } #' Generate synthetic test data #' #' @param n Number of samples #' @param d Number of dimensions #' @param fun Function to generate true values (default: sin) #' @param noise_sd Standard deviation of noise (default: 0.1) #' @return List with x, y_true, y_noisy generate_test_data <- function(n = 100, d = 1, fun = NULL, noise_sd = 0.1) { set.seed(42) # For reproducibility if (d == 1) { x <- matrix(runif(n, 0, 2*pi), ncol = 1) if (is.null(fun)) fun <- sin y_true <- fun(x) } else if (d == 2) { x <- matrix(runif(n * 2, -1, 1), ncol = 2) if (is.null(fun)) { fun <- function(x) sin(pi * x[, 1]) * cos(pi * x[, 2]) } y_true <- fun(x) } else if (d == 3) { x <- matrix(runif(n * 3, -1, 1), ncol = 3) if (is.null(fun)) { fun <- function(x) sin(pi * x[, 1]) * cos(pi * x[, 2]) * sin(pi * x[, 3]) } y_true <- fun(x) } else { stop("Only d = 1, 2, 3 supported") } y_noisy <- matrix(y_true + rnorm(n, 0, noise_sd), ncol = 1) list( x = x, y_true = y_true, y = y_noisy, n = n, d = d ) } #' Validate structure of KDE result #' #' @param kde KDE result object #' @param expected_fields Expected field names #' @return TRUE if valid, otherwise throws error validate_kde_structure <- function(kde, expected_fields = c("fhat", "h", "xlist")) { expect_true(is.list(kde)) for (field in expected_fields) { expect_true(field %in% names(kde), info = paste("Missing field:", field)) } expect_true(!is.null(kde$fhat)) expect_true(!is.null(kde$h)) expect_true(length(kde$h) >= 1) TRUE } #' Validate structure of regression result #' #' @param regs Regression result object #' @param expected_fields Expected field names #' @return TRUE if valid, otherwise throws error validate_regression_structure <- function(regs, expected_fields = c("yhat", "gcv_yhat")) { expect_true(is.list(regs)) for (field in expected_fields) { expect_true(field %in% names(regs), info = paste("Missing field:", field)) } expect_true(!is.null(regs$yhat)) if ("gcv_yhat" %in% names(regs)) { expect_true(!is.null(regs$gcv_yhat$gcv_m)) expect_true(!is.null(regs$gcv_yhat$mse)) } TRUE } #' Check if density integrates to approximately 1 #' #' @param kde KDE result with fhat and xlist #' @param tolerance Tolerance for integration (default: 0.1) #' @return TRUE if integral is close to 1 check_density_integral <- function(kde, tolerance = 0.1) { d <- length(kde$xlist) if (d == 1) { grid <- kde$xlist[[1]] dx <- diff(grid)[1] integral <- sum(kde$fhat) * dx } else if (d == 2) { dx1 <- diff(kde$xlist[[1]])[1] dx2 <- diff(kde$xlist[[2]])[1] integral <- sum(kde$fhat) * dx1 * dx2 } else if (d == 3) { dx1 <- diff(kde$xlist[[1]])[1] dx2 <- diff(kde$xlist[[2]])[1] dx3 <- diff(kde$xlist[[3]])[1] integral <- sum(kde$fhat) * dx1 * dx2 * dx3 } else { stop("Only 1D, 2D, 3D supported") } abs(integral - 1.0) < tolerance }