# ============================================================================= # TEST-GATE-01: SVD & PCA Baseline Tests # Phase 0 safety net — must pass before any restructuring begins. # ============================================================================= options(RcppML.threads = 1) options(RcppML.verbose = FALSE) options(RcppML.gpu = FALSE) library(Matrix) # --------------------------------------------------------------------------- # Shared test data # --------------------------------------------------------------------------- set.seed(42) m <- 100; n <- 80; k_true <- 5 A_dense <- matrix(abs(rnorm(m * n)), m, n) A_sparse <- as(Matrix::rsparsematrix(m, n, density = 0.3), "dgCMatrix") A_sparse@x <- abs(A_sparse@x) # Give them dimnames for the dimnames test rownames(A_dense) <- paste0("row", 1:m) colnames(A_dense) <- paste0("col", 1:n) rownames(A_sparse) <- paste0("row", 1:m) colnames(A_sparse) <- paste0("col", 1:n) # Small iris data for fast prcomp comparison data(iris) A_iris <- as.matrix(iris[, 1:4]) # =========================================================================== # Basic factorization tests # =========================================================================== test_that("svd() returns valid rank-5 factorization (dense)", { s <- RcppML::svd(A_dense, k = k_true, method = "deflation", maxit = 200, tol = 1e-5, seed = 1, test_fraction = 0) expect_s4_class(s, "svd") expect_equal(ncol(s@u), k_true) expect_equal(length(s@d), k_true) expect_equal(ncol(s@v), k_true) expect_equal(nrow(s@u), m) expect_equal(nrow(s@v), n) # Singular values should be positive and in decreasing order expect_true(all(s@d > 0)) expect_true(all(diff(s@d) <= 0)) }) test_that("svd() returns valid rank-5 factorization (sparse)", { s <- RcppML::svd(A_sparse, k = k_true, method = "deflation", maxit = 200, tol = 1e-5, seed = 1, test_fraction = 0) expect_s4_class(s, "svd") expect_equal(ncol(s@u), k_true) expect_equal(length(s@d), k_true) expect_equal(ncol(s@v), k_true) expect_true(all(s@d > 0)) }) test_that("svd() reconstruction has bounded residual (dense)", { s <- RcppML::svd(A_dense, k = 10, method = "deflation", maxit = 200, tol = 1e-6, seed = 1, test_fraction = 0) A_hat <- reconstruct(s) # With 10 factors, reconstruction should explain a decent fraction residual_frac <- sum((A_dense - A_hat)^2) / sum(A_dense^2) expect_lt(residual_frac, 0.5) # At least 50% explained }) test_that("svd() reconstruction has bounded residual (sparse)", { s <- RcppML::svd(A_sparse, k = 10, method = "deflation", maxit = 200, tol = 1e-6, seed = 1, test_fraction = 0) A_hat <- reconstruct(s) A_sp_dense <- as.matrix(A_sparse) residual_frac <- sum((A_sp_dense - A_hat)^2) / sum(A_sp_dense^2) expect_lt(residual_frac, 0.6) }) # =========================================================================== # center = TRUE (PCA mode) should match prcomp eigenvalues # =========================================================================== test_that("svd() with center=TRUE produces meaningful PCA", { k <- 3 # RcppML centers by row means, not column means like prcomp. # Instead of comparing to prcomp, verify SVD properties. s <- RcppML::svd(A_iris, k = k, center = TRUE, method = "lanczos", seed = 1) expect_s4_class(s, "svd") expect_equal(length(s@d), k) expect_true(all(s@d > 0)) expect_true(all(diff(s@d) <= 0)) # decreasing expect_true(s@misc$centered) # Reconstruction should be close to original A_hat <- reconstruct(s) mse <- mean((A_iris - A_hat)^2) expect_lt(mse, 1.0) # reasonable reconstruction }) # =========================================================================== # Method-specific tests # =========================================================================== test_that("svd(method='deflation') returns orthogonal U,V", { s <- RcppML::svd(A_dense, k = k_true, method = "deflation", maxit = 200, tol = 1e-6, seed = 1, test_fraction = 0) # U'U should be close to identity UtU <- crossprod(s@u) expect_equal(UtU, diag(k_true), tolerance = 0.05) # V'V should be close to identity VtV <- crossprod(s@v) expect_equal(VtV, diag(k_true), tolerance = 0.05) }) test_that("svd(method='irlba') converges for sparse input", { s <- RcppML::svd(A_sparse, k = k_true, method = "irlba", seed = 1) expect_s4_class(s, "svd") expect_equal(ncol(s@u), k_true) expect_true(all(s@d > 0)) }) test_that("svd(method='krylov') agrees with deflation on singular values", { s_kry <- RcppML::svd(A_dense, k = 3, method = "krylov", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) s_def <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) # Singular values should be close (same subspace, different algorithms) expect_equal(s_kry@d, s_def@d, tolerance = 0.1) }) test_that("svd(method='lanczos') agrees with deflation on singular values", { s_lan <- RcppML::svd(A_sparse, k = 3, method = "lanczos", seed = 1) s_def <- RcppML::svd(A_sparse, k = 3, method = "deflation", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) expect_equal(s_lan@d, s_def@d, tolerance = 0.1) }) test_that("svd(method='randomized') has bounded residual", { s <- RcppML::svd(A_dense, k = k_true, method = "randomized", seed = 1) expect_s4_class(s, "svd") expect_equal(ncol(s@u), k_true) A_hat <- reconstruct(s) residual_frac <- sum((A_dense - A_hat)^2) / sum(A_dense^2) expect_lt(residual_frac, 0.5) }) # =========================================================================== # Convenience aliases # =========================================================================== test_that("pca() is an alias for svd() with center=TRUE", { s <- pca(A_iris, k = 3, method = "deflation", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) expect_s4_class(s, "svd") expect_true(s@misc$centered) }) test_that("svd() with L1 penalty applies sparsity to V", { set.seed(99) A_sp <- matrix(abs(rnorm(100 * 50)), 100, 50) sp <- RcppML::svd(A_sp, k = 3, L1 = 1.0, method = "deflation", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) expect_s4_class(sp, "svd") expect_equal(ncol(sp@v), 3L) }) test_that("svd() with nonneg applies non-negativity constraint (deflation)", { set.seed(77) A_nn <- matrix(abs(rnorm(80 * 40)), 80, 40) nn <- RcppML::svd(A_nn, k = 3, nonneg = TRUE, method = "deflation", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) expect_s4_class(nn, "svd") # Note: deflation applies Gram-Schmidt reorthogonalization AFTER nonneg # projection, which can reintroduce negatives. This is a known limitation. # For strict non-negativity, use method="krylov" instead. # Here we just check reasonable reconstruction quality. A_hat <- reconstruct(nn) residual_frac <- sum((A_nn - A_hat)^2) / sum(A_nn^2) expect_lt(residual_frac, 0.5) }) test_that("svd() with nonneg via krylov produces non-negative factors", { set.seed(77) A_nn <- matrix(abs(rnorm(80 * 40)), 80, 40) nn <- RcppML::svd(A_nn, k = 3, nonneg = TRUE, method = "krylov", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) expect_s4_class(nn, "svd") expect_true(all(nn@v >= -1e-8), info = "krylov nonneg: V should be non-negative") expect_true(all(nn@u >= -1e-8), info = "krylov nonneg: U should be non-negative") }) test_that("svd() with L1 via krylov applies sparsity", { set.seed(99) A_sp <- matrix(abs(rnorm(100 * 50)), 100, 50) sp <- RcppML::svd(A_sp, k = 3, L1 = 1.0, method = "krylov", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) expect_s4_class(sp, "svd") expect_equal(ncol(sp@v), 3L) # L1 should induce some zero entries in V expect_gt(mean(sp@v == 0), 0.01) }) test_that("svd() krylov+nonneg+L1 combined constraints", { set.seed(42) A_combo <- matrix(abs(rnorm(80 * 40)), 80, 40) res <- RcppML::svd(A_combo, k = 3, nonneg = TRUE, L1 = c(0, 0.5), method = "krylov", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) expect_s4_class(res, "svd") expect_true(all(res@u >= -1e-8), info = "krylov nonneg+L1: U non-negative") expect_true(all(res@v >= -1e-8), info = "krylov nonneg+L1: V non-negative") # L1 on V should induce some sparsity expect_gt(mean(res@v == 0), 0.01) }) test_that("krylov vs deflation nonneg quality is comparable", { set.seed(77) A_nn <- matrix(abs(rnorm(80 * 40)), 80, 40) defl <- RcppML::svd(A_nn, k = 3, nonneg = TRUE, method = "deflation", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) kry <- RcppML::svd(A_nn, k = 3, nonneg = TRUE, method = "krylov", seed = 1, maxit = 200, tol = 1e-6, test_fraction = 0) # Both should reconstruct to similar quality A_hat_defl <- reconstruct(defl) A_hat_kry <- reconstruct(kry) err_defl <- sum((A_nn - A_hat_defl)^2) / sum(A_nn^2) err_kry <- sum((A_nn - A_hat_kry)^2) / sum(A_nn^2) # Both should have reasonable reconstruction (<50% relative error for k=3) expect_lt(err_defl, 0.5) expect_lt(err_kry, 0.5) }) # =========================================================================== # S4 methods # =========================================================================== test_that("reconstruct(svd_result) has low MSE", { s <- RcppML::svd(A_dense, k = 10, method = "deflation", maxit = 200, tol = 1e-6, seed = 1, test_fraction = 0) A_hat <- reconstruct(s) expect_equal(dim(A_hat), dim(A_dense)) # PCA mode reconstruction adds row means back s_pca <- pca(A_iris, k = 4, method = "deflation", maxit = 200, tol = 1e-8, seed = 1, test_fraction = 0) A_iris_hat <- reconstruct(s_pca) mse <- mean((A_iris - A_iris_hat)^2) expect_lt(mse, 0.01) # 4 factors should explain iris well }) test_that("variance_explained() returns decreasing proportions", { s <- RcppML::svd(A_dense, k = 5, method = "deflation", maxit = 200, tol = 1e-6, seed = 1, test_fraction = 0) ve <- variance_explained(s) expect_length(ve, 5) expect_true(all(ve > 0)) expect_true(all(ve <= 1)) expect_true(all(diff(ve) <= 1e-10)) # non-increasing expect_lt(abs(sum(ve) - 1), 1) # sum should be <= 1 (truncated) }) test_that("dim() works on svd", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 1, maxit = 50, test_fraction = 0) # dim() returns c(nrow(u), ncol(v), length(d)) = c(m, k, k) expect_equal(dim(s), c(m, 3, 3)) }) test_that("show() works on svd", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 1, maxit = 50, test_fraction = 0) expect_output(show(s), "svd") }) test_that("head() works on svd", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 1, maxit = 50, test_fraction = 0) expect_output(head(s), "svd") }) test_that("subsetting [i] works on svd", { s <- RcppML::svd(A_dense, k = 5, method = "deflation", seed = 1, maxit = 50, test_fraction = 0) s2 <- s[1:3] expect_equal(ncol(s2@u), 3) expect_equal(length(s2@d), 3) expect_equal(ncol(s2@v), 3) }) # =========================================================================== # Dimnames preservation # =========================================================================== test_that("svd() preserves dimnames", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 1, maxit = 50, test_fraction = 0) expect_equal(rownames(s@u), rownames(A_dense)) expect_equal(rownames(s@v), colnames(A_dense)) }) test_that("svd() preserves dimnames for sparse input", { s <- RcppML::svd(A_sparse, k = 3, method = "deflation", seed = 1, maxit = 50, test_fraction = 0) expect_equal(rownames(s@u), rownames(A_sparse)) expect_equal(rownames(s@v), colnames(A_sparse)) }) # =========================================================================== # Reproducibility # =========================================================================== test_that("svd() works with default NULL seed", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", maxit = 50) expect_s4_class(s, "svd") expect_equal(length(s@d), 3) }) test_that("svd() is reproducible with same seed", { s1 <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 42, maxit = 50, test_fraction = 0) s2 <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 42, maxit = 50, test_fraction = 0) expect_equal(s1@d, s2@d) expect_equal(s1@u, s2@u) expect_equal(s1@v, s2@v) }) test_that("svd() gives different results with different seeds", { s1 <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 1, maxit = 50, test_fraction = 0) s2 <- RcppML::svd(A_dense, k = 3, method = "deflation", seed = 99, maxit = 50, test_fraction = 0) # d should converge to similar values but u/v initialization differs # At minimum, raw U matrices should differ expect_false(isTRUE(all.equal(s1@u, s2@u))) }) # =========================================================================== # Input validation # =========================================================================== test_that("svd() rejects invalid inputs", { expect_error(RcppML::svd(A_dense, k = 0)) expect_error(RcppML::svd(A_dense, k = 3, method = "bogus")) expect_error(RcppML::svd(A_dense, k = 3, tol = -1)) expect_error(RcppML::svd(A_dense, k = 3, L1 = -1)) expect_error(RcppML::svd("nonexistent.spz", k = 3)) }) # =========================================================================== # k = 1 edge case # =========================================================================== test_that("svd() works with k = 1", { s <- RcppML::svd(A_dense, k = 1, method = "deflation", seed = 1, maxit = 200, test_fraction = 0) expect_equal(ncol(s@u), 1) expect_equal(length(s@d), 1) expect_equal(ncol(s@v), 1) expect_true(s@d > 0) }) # =========================================================================== # scale = TRUE (correlation PCA) # =========================================================================== # Reference: prcomp centers by columns, RcppML centers by rows. # To compare, transpose so RcppML row-centering matches prcomp column-centering. test_that("svd(scale=TRUE) returns correct metadata fields", { s <- RcppML::svd(A_dense, k = 3, center = TRUE, scale = TRUE, method = "lanczos", seed = 1) expect_s4_class(s, "svd") expect_true(s@misc$centered) expect_true(s@misc$scaled) expect_false(is.null(s@misc$row_means)) expect_false(is.null(s@misc$row_sds)) expect_equal(length(s@misc$row_means), nrow(A_dense)) expect_equal(length(s@misc$row_sds), nrow(A_dense)) # row_sds should all be positive expect_true(all(s@misc$row_sds > 0)) }) test_that("svd(scale=TRUE) auto-enables center=TRUE", { # scale=TRUE with center=FALSE should still produce centered result s <- RcppML::svd(A_dense, k = 2, center = FALSE, scale = TRUE, method = "lanczos", seed = 1) expect_true(s@misc$centered) expect_true(s@misc$scaled) }) test_that("svd(scale=TRUE) singular values match prcomp on transposed iris", { # prcomp centers/scales columns; RcppML centers/scales rows. # So run RcppML on t(iris) to get the same decomposition as prcomp(iris). A_t <- t(A_iris) # 4 x 150 k <- 3 s <- RcppML::svd(A_t, k = k, center = TRUE, scale = TRUE, method = "lanczos", seed = 1) ref <- prcomp(A_iris, center = TRUE, scale. = TRUE, rank. = k) # Compare proportion of variance: leading component should agree well; # smaller components may differ more due to algorithm differences. rcppml_pov <- s@d^2 / s@misc$frobenius_norm_sq ref_pov <- ref$sdev[1:k]^2 / sum(ref$sdev^2) # First singular value (dominant) should match closely expect_equal(rcppml_pov[1], ref_pov[1], tolerance = 0.05) # Overall: RcppML captures at least as much total variance as prcomp # (sum of POV should be in the same ballpark) expect_gt(sum(rcppml_pov), 0.5) # at least 50% of variance }) test_that("svd(scale=TRUE) works on sparse matrices", { k <- 3 s_sp <- RcppML::svd(A_sparse, k = k, center = TRUE, scale = TRUE, method = "lanczos", seed = 42) expect_s4_class(s_sp, "svd") expect_true(s_sp@misc$scaled) expect_equal(length(s_sp@misc$row_sds), nrow(A_sparse)) expect_true(all(s_sp@d > 0)) expect_true(all(diff(s_sp@d) <= 0)) # decreasing }) test_that("svd(scale=TRUE) sparse and dense agree", { # Convert sparse to dense and compare A_sp_dense <- as.matrix(A_sparse) k <- 3 s_sp <- RcppML::svd(A_sparse, k = k, center = TRUE, scale = TRUE, method = "lanczos", seed = 1) s_de <- RcppML::svd(A_sp_dense, k = k, center = TRUE, scale = TRUE, method = "lanczos", seed = 1) # Singular values should match closely expect_equal(s_sp@d, s_de@d, tolerance = 1e-4) # Row means and SDs should match expect_equal(s_sp@misc$row_means, s_de@misc$row_means, tolerance = 1e-5) expect_equal(s_sp@misc$row_sds, s_de@misc$row_sds, tolerance = 1e-5) }) test_that("svd(scale=TRUE) frobenius_norm_sq equals m*n", { # By definition, when each row is standardized: ||A_tilde||_F^2 = m * n s <- RcppML::svd(A_dense, k = 3, center = TRUE, scale = TRUE, method = "lanczos", seed = 1) expect_equal(s@misc$frobenius_norm_sq, nrow(A_dense) * ncol(A_dense), tolerance = 0.01) }) test_that("svd(scale=TRUE) is reproducible", { s1 <- RcppML::svd(A_dense, k = 3, center = TRUE, scale = TRUE, method = "lanczos", seed = 7) s2 <- RcppML::svd(A_dense, k = 3, center = TRUE, scale = TRUE, method = "lanczos", seed = 7) expect_equal(s1@d, s2@d) expect_equal(s1@u, s2@u) expect_equal(s1@v, s2@v) expect_equal(s1@misc$row_sds, s2@misc$row_sds) }) test_that("svd(scale=TRUE) works across multiple methods", { methods_to_test <- c("deflation", "irlba", "randomized") k <- 3 for (meth in methods_to_test) { s <- RcppML::svd(A_dense, k = k, center = TRUE, scale = TRUE, method = meth, seed = 1, maxit = 200, tol = 1e-5, test_fraction = 0) expect_s4_class(s, "svd") expect_true(s@misc$scaled, info = paste("scaled flag, method =", meth)) expect_equal(length(s@d), k, info = paste("k factors, method =", meth)) expect_true(all(s@d > 0), info = paste("positive d, method =", meth)) } }) test_that("variance_explained() with scale=TRUE sums to <=1", { s <- RcppML::svd(A_dense, k = 5, center = TRUE, scale = TRUE, method = "lanczos", seed = 1) ve <- variance_explained(s) expect_length(ve, 5) expect_true(all(ve > 0)) expect_true(all(ve <= 1)) expect_lte(sum(ve), 1 + 1e-6) expect_true(all(diff(ve) <= 1e-10)) # non-increasing }) # =========================================================================== # Cross-validation (CV) tests # =========================================================================== test_that("svd(test_fraction>0, deflation) produces test_loss (sparse)", { s <- RcppML::svd(A_sparse, k = 3, method = "deflation", maxit = 100, tol = 1e-5, seed = 1, test_fraction = 0.1, cv_seed = 42) expect_s4_class(s, "svd") expect_true(!is.null(s@misc$test_loss)) expect_true(length(s@misc$test_loss) >= 1) expect_true(all(is.finite(s@misc$test_loss))) expect_true(all(s@misc$test_loss > 0)) }) test_that("svd(test_fraction>0, deflation) produces test_loss (dense)", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", maxit = 100, tol = 1e-5, seed = 1, test_fraction = 0.1, cv_seed = 42) expect_s4_class(s, "svd") expect_true(!is.null(s@misc$test_loss)) expect_true(length(s@misc$test_loss) >= 1) expect_true(all(is.finite(s@misc$test_loss))) }) test_that("svd(test_fraction>0, krylov) produces test_loss (sparse)", { s <- RcppML::svd(A_sparse, k = 3, method = "krylov", seed = 1, test_fraction = 0.1, cv_seed = 42) expect_s4_class(s, "svd") expect_true(!is.null(s@misc$test_loss)) expect_true(length(s@misc$test_loss) >= 1) expect_true(all(is.finite(s@misc$test_loss))) }) test_that("svd(test_fraction>0, krylov) produces test_loss (dense)", { s <- RcppML::svd(A_dense, k = 3, method = "krylov", seed = 1, test_fraction = 0.1, cv_seed = 42) expect_s4_class(s, "svd") expect_true(!is.null(s@misc$test_loss)) expect_true(length(s@misc$test_loss) >= 1) expect_true(all(is.finite(s@misc$test_loss))) }) test_that("mask='zeros' changes CV behavior on sparse data", { s_nz <- RcppML::svd(A_sparse, k = 3, method = "deflation", maxit = 100, tol = 1e-5, seed = 1, test_fraction = 0.1, cv_seed = 42, mask = "zeros") s_all <- RcppML::svd(A_sparse, k = 3, method = "deflation", maxit = 100, tol = 1e-5, seed = 1, test_fraction = 0.1, cv_seed = 42) # Both should produce valid test_loss expect_true(!is.null(s_nz@misc$test_loss)) expect_true(!is.null(s_all@misc$test_loss)) expect_true(all(is.finite(s_nz@misc$test_loss))) expect_true(all(is.finite(s_all@misc$test_loss))) # Both should produce positive singular values expect_true(all(s_nz@d > 0)) expect_true(all(s_all@d > 0)) }) test_that("auto-rank (k='auto') selects reasonable rank", { # Known-rank matrix: rank 3 set.seed(99) W <- matrix(rnorm(m * 3), m, 3) V <- matrix(rnorm(n * 3), n, 3) A_rank3 <- W %*% t(V) + matrix(rnorm(m * n, sd = 0.01), m, n) s <- RcppML::svd(A_rank3, k = "auto", method = "deflation", maxit = 100, tol = 1e-5, seed = 1, k_max = 10, patience = 2) expect_s4_class(s, "svd") # Should select a small rank (not the full k_max) expect_lte(ncol(s@u), 10) # test_loss trajectory should exist expect_true(!is.null(s@misc$test_loss)) expect_true(length(s@misc$test_loss) >= 1) }) # =========================================================================== # Robust SVD tests # =========================================================================== test_that("svd(robust=TRUE) succeeds on dense input", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", maxit = 50, tol = 1e-4, seed = 1, test_fraction = 0, robust = TRUE) expect_s4_class(s, "svd") expect_equal(ncol(s@u), 3) expect_true(all(s@d > 0)) }) test_that("svd(robust=TRUE) succeeds on sparse input", { s <- RcppML::svd(A_sparse, k = 3, method = "deflation", maxit = 50, tol = 1e-4, seed = 1, test_fraction = 0, robust = TRUE) expect_s4_class(s, "svd") expect_equal(ncol(s@u), 3) expect_true(all(s@d > 0)) }) test_that("svd(robust='mae') succeeds", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", maxit = 50, tol = 1e-4, seed = 1, test_fraction = 0, robust = "mae") expect_s4_class(s, "svd") expect_equal(ncol(s@u), 3) expect_true(all(s@d > 0)) }) test_that("svd(robust=numeric) uses custom Huber delta", { s <- RcppML::svd(A_dense, k = 3, method = "deflation", maxit = 50, tol = 1e-4, seed = 1, test_fraction = 0, robust = 2.0) expect_s4_class(s, "svd") expect_equal(ncol(s@u), 3) expect_true(all(s@d > 0)) }) test_that("robust SVD downweights outliers", { # Create matrix with moderate outliers that robust SVD can handle A_out <- A_dense A_out[1, 1] <- A_out[1, 1] + 50 # moderate outlier A_out[2, 3] <- A_out[2, 3] + 50 s_robust <- RcppML::svd(A_out, k = 3, method = "deflation", maxit = 100, tol = 1e-5, seed = 1, test_fraction = 0, robust = TRUE) s_normal <- RcppML::svd(A_out, k = 3, method = "deflation", maxit = 100, tol = 1e-5, seed = 1, test_fraction = 0, robust = FALSE) # Robust reconstruction should have larger residual at outlier positions # because it downweights them, while normal SVD tries to fit them hat_r <- reconstruct(s_robust) hat_n <- reconstruct(s_normal) res_r <- abs(A_out[1,1] - hat_r[1,1]) res_n <- abs(A_out[1,1] - hat_n[1,1]) # Robust should NOT fit the outlier as closely (larger residual) expect_gt(res_r, res_n * 0.5) # robust residual is at least half of normal # Both should still produce valid results expect_true(all(s_robust@d > 0)) expect_true(all(s_normal@d > 0)) }) test_that("svd(robust + CV) works together", { s <- RcppML::svd(A_sparse, k = 3, method = "deflation", maxit = 50, tol = 1e-4, seed = 1, test_fraction = 0.1, cv_seed = 42, robust = TRUE) expect_s4_class(s, "svd") expect_true(!is.null(s@misc$test_loss)) expect_true(all(is.finite(s@misc$test_loss))) }) # =========================================================================== # Ground-truth tests: RcppML SVD vs base::svd() (P0) # =========================================================================== # Matrix where base::svd() is exact. Verify: # 1. ||U*diag(d)*V' - A||/||A|| < 1e-6 (reconstruction) # 2. singular values match base::svd()$d[1:k] within 1e-4 # Shared ground-truth data: low-rank matrices with well-separated singular values # NOTE: Must be large enough (gt_m >= 100, gt_n >= 80) for Krylov/Lanczos to # converge reliably on all CPU architectures (Intel Xeon with AVX-512 needs # more Lanczos steps than AMD EPYC for the bottom singular vectors). set.seed(123) gt_m <- 100; gt_n <- 80; gt_k <- 5 # Create rank-5 dense matrix with prescribed well-separated singular values # Using QR of random matrices for orthogonal bases U_raw <- qr.Q(qr(matrix(rnorm(gt_m * gt_k), gt_m, gt_k))) V_raw <- qr.Q(qr(matrix(rnorm(gt_n * gt_k), gt_n, gt_k))) D_true <- c(50, 30, 15, 8, 4) # well-separated A_gt_dense <- U_raw %*% diag(D_true) %*% t(V_raw) # For sparse: use the same structure but as dgCMatrix (still dense content, # but tests the sparse code path) A_gt_sparse <- as(Matrix(A_gt_dense, sparse = TRUE), "dgCMatrix") # Reference from base::svd ref_dense <- base::svd(A_gt_dense) ref_sparse <- base::svd(as.matrix(A_gt_sparse)) test_that("TEST-SVD-DEFLATION-GROUNDTRUTH: deflation matches base::svd (sparse)", { s <- RcppML::svd(A_gt_sparse, k = gt_k, method = "deflation", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) # Reconstruction: ||UdV' - A|| / ||A|| should be near-zero for rank-5 matrix A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((as.matrix(A_gt_sparse) - A_hat)^2)) / sqrt(sum(as.matrix(A_gt_sparse)^2)) expect_lt(rel_err, 1e-3) # Singular values match base::svd expect_equal(s@d, ref_sparse$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-KRYLOV-GROUNDTRUTH: krylov matches base::svd (sparse)", { s <- RcppML::svd(A_gt_sparse, k = gt_k, method = "krylov", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((as.matrix(A_gt_sparse) - A_hat)^2)) / sqrt(sum(as.matrix(A_gt_sparse)^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_sparse$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-LANCZOS-GROUNDTRUTH: lanczos matches base::svd (sparse)", { s <- RcppML::svd(A_gt_sparse, k = gt_k, method = "lanczos", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((as.matrix(A_gt_sparse) - A_hat)^2)) / sqrt(sum(as.matrix(A_gt_sparse)^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_sparse$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-IRLBA-GROUNDTRUTH: irlba matches base::svd (sparse)", { s <- RcppML::svd(A_gt_sparse, k = gt_k, method = "irlba", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((as.matrix(A_gt_sparse) - A_hat)^2)) / sqrt(sum(as.matrix(A_gt_sparse)^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_sparse$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-RANDOMIZED-GROUNDTRUTH: randomized matches base::svd (sparse)", { s <- RcppML::svd(A_gt_sparse, k = gt_k, method = "randomized", seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((as.matrix(A_gt_sparse) - A_hat)^2)) / sqrt(sum(as.matrix(A_gt_sparse)^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_sparse$d[1:gt_k], tolerance = 1e-3) }) # =========================================================================== # Dense input SVD tests (P1) — verify all 5 methods work on dense matrices # =========================================================================== test_that("TEST-SVD-DEFLATION-DENSE: deflation matches base::svd (dense)", { s <- RcppML::svd(A_gt_dense, k = gt_k, method = "deflation", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((A_gt_dense - A_hat)^2)) / sqrt(sum(A_gt_dense^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_dense$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-KRYLOV-DENSE: krylov matches base::svd (dense)", { s <- RcppML::svd(A_gt_dense, k = gt_k, method = "krylov", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((A_gt_dense - A_hat)^2)) / sqrt(sum(A_gt_dense^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_dense$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-LANCZOS-DENSE: lanczos matches base::svd (dense)", { s <- RcppML::svd(A_gt_dense, k = gt_k, method = "lanczos", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((A_gt_dense - A_hat)^2)) / sqrt(sum(A_gt_dense^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_dense$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-IRLBA-DENSE: irlba matches base::svd (dense)", { s <- RcppML::svd(A_gt_dense, k = gt_k, method = "irlba", maxit = 500, tol = 1e-10, seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((A_gt_dense - A_hat)^2)) / sqrt(sum(A_gt_dense^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_dense$d[1:gt_k], tolerance = 1e-3) }) test_that("TEST-SVD-RANDOMIZED-DENSE: randomized matches base::svd (dense)", { s <- RcppML::svd(A_gt_dense, k = gt_k, method = "randomized", seed = 1, test_fraction = 0) A_hat <- s@u %*% diag(s@d) %*% t(s@v) rel_err <- sqrt(sum((A_gt_dense - A_hat)^2)) / sqrt(sum(A_gt_dense^2)) expect_lt(rel_err, 1e-3) expect_equal(s@d, ref_dense$d[1:gt_k], tolerance = 1e-3) })