context("test-pc_projection") library(foreach) library(RhpcBLASctl) registerDoSEQ() # tests/testthat/test-ortho_projection.R # ============================================================================= # Setup helper # ============================================================================= .setup_ortho_data <- function(n_xr = 40, n_xu = 20) { data("NIRsoil", package = "prospectr") Xu <- NIRsoil$spc[!as.logical(NIRsoil$train), ] Yu <- NIRsoil$CEC[!as.logical(NIRsoil$train)] Yr <- NIRsoil$CEC[as.logical(NIRsoil$train)] Yr_2 <- NIRsoil$Ciso[as.logical(NIRsoil$train)] Xr <- NIRsoil$spc[as.logical(NIRsoil$train), ] Xu <- Xu[!is.na(Yu), ] y_sel <- !is.na(Yr) & !is.na(Yr_2) Xr <- Xr[y_sel, ] Yu <- Yu[!is.na(Yu)] Yr_2 <- Yr_2[y_sel] Yr <- Yr[y_sel] Xu <- Xu[seq_len(n_xu), ] Yu <- Yu[seq_len(n_xu)] Xr <- Xr[seq_len(n_xr), ] Yr <- Yr[seq_len(n_xr)] Yr_2 <- Yr_2[seq_len(n_xr)] list(Xr = Xr, Xu = Xu, Yr = Yr, Yr_2 = Yr_2, Yu = Yu) } # ============================================================================= # Basic ortho_projection tests # ============================================================================= test_that("ortho_projection with ncomp_by_cumvar works (single matrix)", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() cumvar_value <- 0.999 result <- ortho_projection( Xr = d$Xr, ncomp = ncomp_by_cumvar(cumvar_value), center = TRUE, scale = FALSE, method = "pca" ) expect_s3_class(result, "ortho_projection") expect_equal(ncol(result$scores), result$ncomp) # All but last component should have cumvar < threshold test_ncomp <- result$ncomp - 1 expect_true(all( result$variance$x_var["cumulative_explained_var", seq_len(test_ncomp)] < cumvar_value )) }) test_that("ortho_projection with ncomp_by_cumvar works (two matrices)", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() cumvar_value <- 0.999 result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = ncomp_by_cumvar(cumvar_value), center = TRUE, scale = FALSE, method = "pca" ) expect_s3_class(result, "ortho_projection") expect_equal(ncol(result$scores), result$ncomp) expect_equal(nrow(result$scores), nrow(d$Xr) + nrow(d$Xu)) test_ncomp <- result$ncomp - 1 expect_true(all( result$variance$x_var["cumulative_explained_var", seq_len(test_ncomp)] < cumvar_value )) }) test_that("ortho_projection with ncomp_by_var works", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() min_var <- 0.001 # equivalent to 1 - 0.999 result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = ncomp_by_var(min_var), center = TRUE, scale = FALSE, method = "pca" ) expect_s3_class(result, "ortho_projection") expect_equal(ncol(result$scores), result$ncomp) # All retained components should explain >= min_var expect_true(all( result$variance$x_var["explained_var", seq_len(result$ncomp)] >= min_var )) }) test_that("ortho_projection with ncomp_fixed works", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = ncomp_fixed(10), center = TRUE, scale = FALSE, method = "pca" ) expect_s3_class(result, "ortho_projection") expect_equal(result$ncomp, 10) expect_equal(ncol(result$scores), 10) }) test_that("ortho_projection with integer ncomp works", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = 10, center = TRUE, scale = FALSE, method = "pca" ) expect_s3_class(result, "ortho_projection") expect_equal(result$ncomp, 10) }) # ============================================================================= # predict method tests # ============================================================================= test_that("predict.ortho_projection works", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() tol <- 1e-5 result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = ncomp_by_cumvar(0.999), center = TRUE, scale = FALSE, method = "pca" ) # Predicting Xr should match stored scores for Xr rows preds <- predict(result, d$Xr) stored_xr <- predict(result)[seq_len(nrow(d$Xr)), ] expect_lt(sum(abs(preds - stored_xr)), tol) }) # ============================================================================= # ncomp_by_opc tests # ============================================================================= test_that("ortho_projection with ncomp_by_opc works (PCA)", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, Yr = d$Yr, ncomp = ncomp_by_opc(max_ncomp = 15), center = TRUE, scale = TRUE, method = "pca" ) expect_s3_class(result, "ortho_projection") expect_true("opc_evaluation" %in% names(result)) # ncomp should match the minimum RMSD position expect_equal(result$ncomp, as.vector(which.min(result$opc_evaluation[, "rmsd_Yr"]))) expect_equal(result$ncomp, 7) }) test_that("ortho_projection pca vs pca_nipals give equivalent results", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() tol <- 1e-5 opc_pca <- ortho_projection( Xr = d$Xr, Xu = d$Xu, Yr = d$Yr, ncomp = ncomp_by_opc(max_ncomp = 15), center = TRUE, scale = TRUE, method = "pca" ) opc_nipals <- ortho_projection( Xr = d$Xr, Xu = d$Xu, Yr = d$Yr, ncomp = ncomp_by_opc(max_ncomp = 30), center = TRUE, scale = TRUE, method = "pca_nipals" ) # Same number of components selected expect_equal(opc_pca$ncomp, opc_nipals$ncomp) # Scores should be equivalent (up to sign) cor_equiv <- sapply( seq_len(opc_pca$ncomp), function(i) abs(cor(opc_pca$scores[, i], opc_nipals$scores[, i])) ) expect_lt(sum(1 - cor_equiv), tol) }) test_that("ncomp_by_var and ncomp_by_cumvar select consistent components", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() cumvar_value <- 0.999 # Get full variance information via OPC (more components) opc_result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, Yr = d$Yr, ncomp = ncomp_by_opc(max_ncomp = 15), center = TRUE, scale = TRUE, method = "pca" ) cumvar_result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = ncomp_by_cumvar(cumvar_value), center = TRUE, scale = FALSE, method = "pca" ) var_result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = ncomp_by_var(1 - cumvar_value), center = TRUE, scale = FALSE, method = "pca" ) # Check consistency between selection methods # cumvar: counts components where cumvar < threshold, then adds 1 expect_equal( sum(opc_result$variance$x_var["cumulative_explained_var", ] < cumvar_value), cumvar_result$ncomp - 1 ) # var: counts components where var >= threshold expect_equal( sum(opc_result$variance$x_var["explained_var", ] >= (1 - cumvar_value)), var_result$ncomp ) }) # ============================================================================= # Multi-response Yr tests # ============================================================================= test_that("ortho_projection with multi-column Yr works", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() Yr_multi <- cbind(name_test_yr = d$Yr, Yr_2 = d$Yr_2) result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, Yr = Yr_multi, ncomp = ncomp_by_opc(max_ncomp = 30), center = TRUE, scale = FALSE, method = "pca_nipals" ) expect_s3_class(result, "ortho_projection") expect_true("opc_evaluation" %in% names(result)) expect_true("rmsd_name_test_yr" %in% colnames(result$opc_evaluation)) expect_true("rmsd_Yr_2" %in% colnames(result$opc_evaluation)) }) # ============================================================================= # PLS method tests # ============================================================================= test_that("ortho_projection with method = 'pls' works", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, Yr = d$Yr, ncomp = ncomp_by_opc(max_ncomp = 15), method = "pls", scale = TRUE ) expect_s3_class(result, "ortho_projection") expect_true("weights" %in% names(result)) expect_true("projection_mat" %in% names(result)) expect_true("Y_loadings" %in% names(result)) }) test_that("ortho_projection with method = 'mpls' works", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() result <- ortho_projection( Xr = d$Xr, Xu = d$Xu, Yr = d$Yr, ncomp = 10, method = "mpls", scale = TRUE ) expect_s3_class(result, "ortho_projection") expect_equal(result$ncomp, 10) expect_equal(result$method, "mpls") }) test_that("ortho_projection PLS requires Yr", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() expect_error( ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = 10, method = "pls" ), "'Yr' is required" ) }) test_that("ortho_projection ncomp_by_opc requires Yr", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() expect_error( ortho_projection( Xr = d$Xr, Xu = d$Xu, ncomp = ncomp_by_opc(), method = "pca" ), "'Yr' is required" ) }) # ============================================================================= # Validation tests # ============================================================================= test_that("ortho_projection validates center and scale", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() expect_error( ortho_projection(d$Xr, center = "yes"), "TRUE or FALSE" ) expect_error( ortho_projection(d$Xr, scale = "yes"), "TRUE or FALSE" ) }) test_that("ortho_projection validates method", { skip_if_not_installed("prospectr") d <- .setup_ortho_data() expect_error( ortho_projection(d$Xr, method = "invalid"), fixed = '"pca", "pca_nipals", "pls", "mpls", "simpls"' ) }) # ============================================================================= # Large dataset tests (skipped on CRAN) # ============================================================================= test_that("ortho_projection works with larger datasets", { skip_on_cran() skip_if_not_installed("prospectr") data("NIRsoil", package = "prospectr") tol <- 1e-5 Xu <- NIRsoil$spc[!as.logical(NIRsoil$train), ] Yu <- NIRsoil$CEC[!as.logical(NIRsoil$train)] Yr <- NIRsoil$CEC[as.logical(NIRsoil$train)] Xr <- NIRsoil$spc[as.logical(NIRsoil$train), ] Xu <- Xu[!is.na(Yu), ] Xr <- Xr[!is.na(Yr), ] Yu <- Yu[!is.na(Yu)] Yr <- Yr[!is.na(Yr)] cumvar_value <- 0.999 # Single matrix one_input <- ortho_projection( Xr, ncomp = ncomp_by_cumvar(cumvar_value), center = TRUE, scale = FALSE, method = "pca" ) expect_equal(ncol(one_input$scores), one_input$ncomp) test_ncomp <- one_input$ncomp - 1 expect_true(all( one_input$variance$x_var["cumulative_explained_var", seq_len(test_ncomp)] < cumvar_value )) # Two matrices two_input <- ortho_projection( Xr, Xu, ncomp = ncomp_by_cumvar(cumvar_value), center = TRUE, scale = FALSE, method = "pca" ) expect_equal(ncol(two_input$scores), two_input$ncomp) # Predictions match preds <- sum(abs( predict(two_input)[seq_len(nrow(Xr)), ] - predict(two_input, Xr) )) expect_lt(preds, tol) # OPC selection opc_pca <- ortho_projection( Xr, Xu, Yr = Yr, ncomp = ncomp_by_opc(max_ncomp = 30), center = TRUE, scale = FALSE, method = "pca" ) opc_nipals <- ortho_projection( Xr, Xu, Yr = Yr, ncomp = ncomp_by_opc(max_ncomp = 30), center = TRUE, scale = FALSE, method = "pca_nipals" ) expect_equal(opc_pca$ncomp, as.vector(which.min(opc_pca$opc_evaluation[, "rmsd_Yr"]))) expect_equal(opc_pca$ncomp, 20) # PCA and NIPALS should select same number expect_equal(opc_pca$ncomp, opc_nipals$ncomp) # Scores should be equivalent cor_equiv <- sapply( seq_len(opc_pca$ncomp), function(i) abs(cor(opc_pca$scores[, i], opc_nipals$scores[, i])) ) expect_lt(sum(1 - cor_equiv), tol) # Variance selection consistency expect_equal( sum(opc_pca$variance$x_var["cumulative_explained_var", ] < cumvar_value), two_input$ncomp - 1 ) var_result <- ortho_projection( Xr, Xu, ncomp = ncomp_by_var(1 - cumvar_value), center = TRUE, scale = FALSE, method = "pca" ) expect_equal( sum(opc_pca$variance$x_var["explained_var", ] >= (1 - cumvar_value)), var_result$ncomp ) })