test_that("rkhs_xy: predict parity on dense RBF example + orthonormal scores", { skip_on_cran() skip_on_ci() set.seed(42) n <- 60; p <- 6; m <- 2 X <- matrix(rnorm(n * p), n, p) # non-linear Y tied to X f1 <- sin(X[,1]) + 0.4 * X[,2]^2 f2 <- cos(X[,3]) - 0.3 * X[,4]^2 Y <- cbind(f1, f2) + matrix(rnorm(n * m, sd = 0.05), n, m) # Kernel & ridge settings (make them explicit so the test is stable) opts_old <- options( bigPLSR.rkhs_xy.kernel_x = "rbf", bigPLSR.rkhs_xy.gamma_x = 0.5, # gamma for RBF(X,X) bigPLSR.rkhs_xy.degree_x = 3L, bigPLSR.rkhs_xy.coef0_x = 1.0, bigPLSR.rkhs_xy.kernel_y = "linear", bigPLSR.rkhs_xy.gamma_y = 1.0, # unused for linear bigPLSR.rkhs_xy.degree_y = 3L, bigPLSR.rkhs_xy.coef0_y = 1.0, bigPLSR.rkhs_xy.lambda_x = 1e-6, bigPLSR.rkhs_xy.lambda_y = 1e-6, bigPLSR.store_X_max = 10000L ) on.exit(options(opts_old), add = TRUE) fit <- pls_fit(X, Y, ncomp = 3, backend = "arma", algorithm = "rkhs_xy", tol = 1e-8) # basic structure expect_s3_class(fit, "big_plsr") expect_true(all(c("dual_coef", "scores", "intercept", "ncomp") %in% names(fit))) expect_equal(nrow(fit$dual_coef), n) expect_equal(ncol(fit$dual_coef), m) expect_true(fit$ncomp >= 1) # helper: RBF kernel + double-centering wrt train rbf <- function(A, B, gamma) { a2 <- rowSums(A*A); b2 <- rowSums(B*B) D2 <- outer(a2, b2, "+") - 2 * (A %*% t(B)) exp(-gamma * D2) } H_center_cross <- function(K_cross, K_train) { # K_train must be the uncentered Gram(X,X) n <- nrow(K_train) one_n <- matrix(1/n, n, n) colm <- colMeans(K_train) # 1 x n mu <- mean(K_train) # scalar # Center cross-kernel (rows = X*, cols = Xtrain) rowm_cross <- rowMeans(K_cross) # n* x 1 K_cross - outer(rowm_cross, rep(1, n)) - matrix(rep(colm, each = nrow(K_cross)), nrow(K_cross), n, byrow = FALSE) + mu } # Manual “train” prediction using the *same* centering stats used in training # (pull from the model; fall back to options only for kernel hyperparams) kernel_x <- fit$kernel_x %||% getOption("bigPLSR.rkhs_xy.kernel_x", "rbf") gamma_x <- fit$gamma_x %||% getOption("bigPLSR.rkhs_xy.gamma_x", 1 / ncol(X)) degree_x <- fit$degree_x %||% getOption("bigPLSR.rkhs_xy.degree_x", 3L) coef0_x <- fit$coef0_x %||% getOption("bigPLSR.rkhs_xy.coef0_x", 1.0) # Build *uncentered* train Gram with exactly the same kernel hyperparams K_train <- bigPLSR:::.bigPLSR_make_kernel(X, X, kernel_x, gamma_x, degree_x, coef0_x) # Use the training centering stats saved by the trainer kstats <- fit$kstats_x %||% list(r = fit$kx_colmeans %||% fit$k_colmeans, g = fit$kx_grandmean %||% fit$k_grandmean) stopifnot(!is.null(kstats$r), !is.null(kstats$g)) # Double-center the train Gram with the model’s statistics Kc_train <- .bigPLSR_center_cross_kernel(K_train, r_train = kstats$r, g_train = kstats$g) Yhat_train_manual <- Kc_train %*% fit$dual_coef + matrix(fit$intercept, n, m, byrow = TRUE) # predict() on training X should match manual formula closely Yhat_train_pred <- predict(fit, X) expect_equal(Yhat_train_pred, Yhat_train_manual, tolerance = 1e-6) # Scores orthonormality (approx): T' T ~ I G <- crossprod(fit$scores) I <- diag(ncol(G)) expect_lt(max(abs(G - I)), 1e-5) })