context("pre-processing") library(magrittr) library(testthat) test_that("can preprocess a matrix no center, no scale", { mat1 <- matrix(rnorm(10*15), 10, 15) pp <- pass() %>% prep() X <- pp$init(mat1) x2 <- pp$reverse_transform(X) expect_equal(mat1,x2) expect_equal(X, mat1) }) test_that("can preprocess a matrix center only", { mat1 <- matrix(rnorm(10*15), 10, 15) pp <- center() %>% prep() Xp <- pp$init(mat1) x2 <- pp$reverse_transform(Xp) expect_equal(mat1,x2) expect_true(all(mat1 != Xp)) }) test_that("can apply a centering transform", { mat1 <- matrix(rnorm(10*15), 10, 15) pp <- center() x <- prep(pp) x2 <- init_transform(x,mat1) x3 <- multivarious::transform(x, mat1) expect_equal(x2,x3) }) test_that("can apply a scaling transform", { mat1 <- matrix(rnorm(10*15), 10, 15) pp <- standardize() x <- prep(pp) x2 <- init_transform(x, mat1) x3 <- multivarious::transform(x, mat1) expect_equal(x2,x3) }) test_that("can preprocess a matrix with column scaling", { mat1 <- matrix(rnorm(10*15), 10, 15) wts <- 2:16 pp <- colscale(type="weights", weights=wts) x <- prep(pp) xinit <- init_transform(x, mat1) xrev <- multivarious::inverse_transform(x, xinit) expect_equal(mat1,xrev) }) # test_that("can reset a prepper with `fresh`", { # mat1 <- matrix(rnorm(10*15), 10, 15) # pp <- center() # x <- prep(pp, mat1) # # }) test_that("can reverse transform a matrix after standardization", { mat1 <- matrix(rnorm(10*15), 10, 15) pp <- standardize() x <- prep(pp) x1 <- init_transform(x,mat1) x2 <- multivarious::inverse_transform(x, x1) expect_equal(mat1,x2) }) test_that("can compose two pre-processors", { mat1 <- matrix(rnorm(10*15), 10, 15) x <- center() %>% colscale(type="z") %>% prep() x1 <- init_transform(x,mat1) x2 <- multivarious::inverse_transform(x, x1) expect_equal(mat1,x2) }) # ============================================================================= # Tests for New Preprocessing API # ============================================================================= test_that("new API: fit() and transform() work correctly", { mat1 <- matrix(rnorm(10*15), 10, 15) # Test centering with new API preproc <- center() fitted_preproc <- fit(preproc, mat1) # Transform the same data x_transformed <- multivarious::transform(fitted_preproc, mat1) # Inverse transform x_reconstructed <- multivarious::inverse_transform(fitted_preproc, x_transformed) expect_equal(mat1, x_reconstructed) # Check that centering actually happened expect_true(all(mat1 != x_transformed)) expect_true(max(abs(colMeans(x_transformed))) < 1e-14) }) test_that("new API: fit_transform() works correctly", { mat1 <- matrix(rnorm(10*15), 10, 15) result <- fit_transform(center(), mat1) fitted_preproc <- result$preproc x_transformed <- result$transformed # Should be equivalent to separate fit() and transform() preproc <- center() fitted_preproc2 <- fit(preproc, mat1) x_transformed2 <- multivarious::transform(fitted_preproc2, mat1) expect_equal(x_transformed, x_transformed2) }) test_that("new API: preprocess() helper works correctly", { mat1 <- matrix(rnorm(10*15), 10, 15) result <- preprocess(standardize(), mat1) fitted_preproc <- result$preproc x_transformed <- result$transformed # Verify standardization worked expect_true(max(abs(colMeans(x_transformed))) < 1e-14) expect_true(max(abs(apply(x_transformed, 2, sd) - 1)) < 1e-14) # Verify reconstruction x_reconstructed <- multivarious::inverse_transform(fitted_preproc, x_transformed) expect_equal(mat1, x_reconstructed) }) test_that("new API: error handling for unfitted preprocessor", { mat1 <- matrix(rnorm(10*15), 10, 15) # For prepper objects, they don't have methods for the new API unfitted_preproc <- center() expect_error( multivarious::inverse_transform(unfitted_preproc, mat1), "no applicable method" ) # Create a pre_processor but don't initialize it properly # This simulates an unfitted preprocessor from the new API proc <- prep(center()) attr(proc, "fitted") <- FALSE expect_error( multivarious::transform(proc, mat1), "Pre-processor not fitted" ) expect_error( multivarious::inverse_transform(proc, mat1), "Pre-processor not fitted" ) }) test_that("new API: works with different preprocessing types", { mat1 <- matrix(rnorm(10*15), 10, 15) # Test pass() preprocessor result1 <- preprocess(pass(), mat1) expect_equal(mat1, result1$transformed) # Test colscale with z-scoring result2 <- preprocess(colscale(type="z"), mat1) x_scaled <- result2$transformed expect_true(max(abs(apply(x_scaled, 2, sd) - 1)) < 1e-14) # Test chain: center then scale result3 <- preprocess(center() %>% colscale(type="z"), mat1) x_std <- result3$transformed expect_true(max(abs(colMeans(x_std))) < 1e-14) expect_true(max(abs(apply(x_std, 2, sd) - 1)) < 1e-14) }) test_that("can preprocess a matrix with a colind", { mat1 <- matrix(rnorm(10*15), 10, 15) pp <- center() %>% prep() x <- init_transform(pp,mat1) ret <- multivarious::transform(pp, mat1[,1:2], colind=1:2) expect_equal(ret, x[,1:2]) }) test_that("can concatenate two pre-processors", { mat1 <- matrix(rnorm(10*15), 10, 15) mat2 <- matrix(rnorm(10*15), 10, 15) p <- center() proclist <- lapply(1:2, function(i) { fresh(p) %>% prep() }) m1 <- init_transform(proclist[[1]], mat1) m2 <- init_transform(proclist[[2]], mat2) proc <- concat_pre_processors(proclist, list(1:15, 16:30)) a1 <- multivarious::transform(proc, cbind(mat1,mat2)) a2 <- multivarious::transform(proc, mat1, colind=1:15) a3 <- multivarious::transform(proc, mat2, colind=16:30) # Suppress expected warning about ncomp reduction due to matrix dimensions pres <- suppressWarnings(pca(cbind(mat1,mat2), ncomp=12)) proj <- multiblock_biprojector(pres$v, s=pres$s, sdev=pres$sdev, proc, block_indices=list(1:15, 16:30)) p1 <- project_block(proj, m1, 1) p2 <- project_block(proj, m2, 2) expect_true(!is.null(p1)) expect_true(!is.null(p2)) proc$transform(mat1, colind=1:15) proc$transform(mat2, colind=16:30) proj <- multiblock_projector(pres$v, proc, block_indices=list(1:15, 16:30)) p1 <- project_block(proj, m1, 1) p2 <- project_block(proj, m2, 2) expect_true(!is.null(p1)) expect_true(!is.null(p2)) }) test_that("concat_pre_processors handles complex colind across different block types", { set.seed(123) # for reproducibility mat1 <- matrix(rnorm(10*5, mean=10), 10, 5) mat2 <- matrix(rnorm(10*7, mean=20, sd=5), 10, 7) mat3 <- matrix(rnorm(10*3, mean=0, sd=1), 10, 3) p1 <- center() %>% prep() # Center only p2 <- standardize() %>% prep() # Center and scale p3 <- pass() %>% prep() # No-op # Initialize individual processors m1_init <- init_transform(p1, mat1) m2_init <- init_transform(p2, mat2) m3_init <- init_transform(p3, mat3) proclist <- list(p1, p2, p3) block_indices <- list(1:5, 6:12, 13:15) proc_concat <- concat_pre_processors(proclist, block_indices) # --- Test apply_transform --- # Select columns: 2nd and 4th from block 1, 2nd and 3rd from block 2, 2nd from block 3 colind_global <- c(2, 4, 7, 8, 14) # Create the input matrix corresponding to these global columns test_mat_apply <- cbind(mat1[, c(2, 4)], mat2[, c(2, 3)], mat3[, 2, drop=FALSE]) # Apply the concatenated transform result_apply <- multivarious::transform(proc_concat, test_mat_apply, colind = colind_global) # Manually apply transforms to corresponding original blocks and columns manual_res1 <- multivarious::transform(p1, mat1[, c(2, 4), drop = FALSE], colind = c(2, 4)) # Local indices: 2, 4 manual_res2 <- multivarious::transform(p2, mat2[, c(2, 3), drop = FALSE], colind = c(2, 3)) # Local indices: 2, 3 manual_res3 <- multivarious::transform(p3, mat3[, 2, drop = FALSE], colind = 2) # Local index: 2 # Combine manual results in the order defined by colind_global expected_apply <- cbind(manual_res1[, 1, drop=FALSE], # Corresponds to global col 2 manual_res1[, 2, drop=FALSE], # Corresponds to global col 4 manual_res2[, 1, drop=FALSE], # Corresponds to global col 7 manual_res2[, 2, drop=FALSE], # Corresponds to global col 8 manual_res3[, 1, drop=FALSE]) # Corresponds to global col 14 expect_equal(result_apply, expected_apply, tolerance = 1e-7) # --- Test inverse_transform --- # Input for inverse is the result from transform test_mat_reverse <- result_apply # Inverse using concatenated processor result_reverse <- multivarious::inverse_transform(proc_concat, test_mat_reverse, colind = colind_global) # Manually inverse transforms using the outputs from the manual forward transforms manual_rev1 <- multivarious::inverse_transform(p1, manual_res1, colind = c(2, 4)) manual_rev2 <- multivarious::inverse_transform(p2, manual_res2, colind = c(2, 3)) manual_rev3 <- multivarious::inverse_transform(p3, manual_res3, colind = 2) # Combine manual reverse results in the order defined by colind_global expected_reverse <- cbind(manual_rev1[, 1, drop=FALSE], manual_rev1[, 2, drop=FALSE], manual_rev2[, 1, drop=FALSE], manual_rev2[, 2, drop=FALSE], manual_rev3[, 1, drop=FALSE]) # The result of reverse should match the original input subset 'test_mat_apply' expect_equal(result_reverse, test_mat_apply, tolerance = 1e-7) }) # # test_that("can preprocess a block projector", { # mat1 <- matrix(rnorm(10*15), 10, 15) # mat2 <- matrix(rnorm(10*10), 10, 10) # pca1 <- pca(mat1, ncomp=4) # pca2 <- pca(mat2, ncomp=2) # # bm <- block_projector(list(pca1,pca2)) # pp <- pre_processor(bm,center=FALSE, scale=FALSE) # pdat <- pre_process(pp) # expect_equal(ncol(pdat), 6) # expect_equal(project(bm), pdat) # }) # # test_that("can preprocess a block projector with newdata", { # mat1 <- matrix(rnorm(10*15), 10, 15) # mat2 <- matrix(rnorm(10*10), 10, 10) # pca1 <- pca(mat1, ncomp=4) # pca2 <- pca(mat2, ncomp=2) # # bm <- block_projector(list(pca1,pca2)) # pp <- pre_processor(bm,center=FALSE, scale=FALSE) # # mat3 <- cbind(mat1,mat2) # pdat <- pre_process(pp,mat3) # # expect_equal(ncol(pdat), 6) # expect_equal(project(bm), pdat) # }) # # test_that("can preprocess a block projector with newdata from a sub-block", { # mat1 <- matrix(rnorm(10*15), 10, 15) # mat2 <- matrix(rnorm(10*10), 10, 10) # pca1 <- pca(mat1, ncomp=4) # pca2 <- pca(mat2, ncomp=2) # # bm <- block_projector(list(pca1,pca2)) # pp <- pre_processor(bm,center=FALSE, scale=FALSE) # # mat3 <- cbind(mat2) # pdat <- pre_process(pp,mat3, block_index=2) # # expect_equivalent(project(bm, block_index=2), unclass(pdat)) # })