# Project: gspcr # Objective: Test pca_mix.R function # Author: Edoardo Costantini # Created: 2023-05-02 # Modified: 2023-05-23 # Notes: # Define tolerance tol <- 1e-5 # Test: PCAmix = PCA on numeric data ------------------------------------------- # Load data data(wine, package = "FactoMineR") # Define only numeric data X <- wine[, -c(1:2)] # Define training and validation datasets npcs <- 5 # Apply PCA_mix function pca_mix_out <- pca_mix( X_tr = X, X_va = X, npcs = npcs ) # Perform regular PCA X_centered <- scale(X, center = TRUE, scale = FALSE) X_scaled <- apply(X_centered, 2, function(j){ j / (sd(j) * (nrow(X_centered)-1) / nrow(X_centered) ) }) # Perform PCA on the training data svd_Xtr <- svd(X_scaled) # Project training and validation data on the PCs PC_tr <- (X_scaled %*% svd_Xtr$v)[, 1:npcs] # Compute correlations between the npcs cors <- sapply(1:npcs, function(j){ cor(PC_tr[, j], pca_mix_out$PC_tr[, j]) }) # Check they are all equal to 1 testthat::expect_true(all(abs(cors) - 1 < tol)) # Test: validation data projections as expected on numeric data ---------------- # Define a training validation index train <- 1:15 valid <- -train # Use pca_mix on training and validation pca_mix_out_tv <- pca_mix( X_tr = X[train, ], X_va = X[valid, ], npcs = npcs ) # Scale training data X_tr_scaled <- scale(X[train, ], center = TRUE, scale = TRUE) # Scale validation data based on training data X_va_scaled <- scale( x = X[valid, ], center = attributes(X_tr_scaled)$`scaled:center`, scale = attributes(X_tr_scaled)$`scaled:scale` ) # Perform PCA on the training data svd_Xtr_tv <- svd(X_tr_scaled) # Project training on the PCs PC_tr_tv <- (X_tr_scaled %*% svd_Xtr_tv$v)[, 1:npcs] # Project validation data on the PCs PC_va_tv <- (X_va_scaled %*% svd_Xtr_tv$v)[, 1:npcs] # Compute correlations between the two training PCs cors_tr <- sapply(1:npcs, function(j) { cor(PC_tr_tv[, j], pca_mix_out_tv$PC_tr[, j]) }) # Compute correlations between the two validation PCs cors_va <- sapply(1:npcs, function(j) { cor(PC_va_tv[, j], pca_mix_out_tv$PC_va[, j]) }) # Check they are all equal to 1 testthat::expect_true(all(abs(cors_tr) - 1 < tol)) testthat::expect_true(all(abs(cors_va) - 1 < tol)) # Test: npcs = 1 works with mixed data ----------------------------------------- # Define a training validation index train <- sample(1:nrow(iris))[1:100] valid <- -train # Use pca_mix on training and validation pca_mix_out <- tryCatch( pca_mix_out_tv <- pca_mix( X_tr = iris[train, ], X_va = iris[valid, ], npcs = 1 ), error = function(e) print(e) ) # Check is the result an error? testthat::expect_false("error" %in% class(pca_mix_out)) # Test: treatment of ordered factor is as nominal variables -------------------- # Use pca_mix on ordinal data pca_mix_ordinal <- tryCatch( pca_mix_out_tv <- pca_mix( X_tr = GSPCRexdata$X$ord, X_va = GSPCRexdata$X$ord, npcs = 2 ), error = function(e) print(e) ) # Use pca_mix on nominal data pca_mix_nominal <- tryCatch( pca_mix_out_tv <- pca_mix( X_tr = GSPCRexdata$X$cat, X_va = GSPCRexdata$X$cat, npcs = 2 ), error = function(e) print(e) ) # Check is the result an error? testthat::expect_equal(pca_mix_ordinal, pca_mix_nominal)