context("test-pls_projection") test_that("pls_projection works", { # tolernce for results supposed to be 0s tol <- 1e-5 nirdata <- data("NIRsoil", package = "prospectr") Xu <- NIRsoil$spc[!as.logical(NIRsoil$train), ] Yu <- NIRsoil$CEC[!as.logical(NIRsoil$train)] Yr <- NIRsoil$CEC[as.logical(NIRsoil$train)] Yr_2 <- NIRsoil$Ciso[as.logical(NIRsoil$train)] Xr <- NIRsoil$spc[as.logical(NIRsoil$train), ] Xr_3 <- NIRsoil$spc[as.logical(NIRsoil$train), ] Yr_3 <- NIRsoil[as.logical(NIRsoil$train), c("Ciso", "Nt")] pls_var_2ys <- pls_projection( Xr = Xr_3, Yr = Yr_3, pc_selection = list("var", 0.01) ) expect_true(ncol(pls_var_2ys$scores) == 2) pls_cumvar_2ys <- pls_projection( Xr = Xr_3, Yr = Yr_3, pc_selection = list("cumvar", 0.99) ) expect_true(ncol(pls_cumvar_2ys$scores) == 2) Xu <- Xu[!is.na(Yu), ] y_sel <- !is.na(Yr) & !is.na(Yr_2) Xr <- Xr[y_sel, ] Yu <- Yu[!is.na(Yu)] Yr_2 <- Yr_2[y_sel] Yr <- Yr[y_sel] Xu <- Xu[1:20, ] Yu <- Yu[1:20] Xr <- Xr[1:40, ] Yr <- Yr[1:40] Yr_2 <- Yr_2[1:40] testthat::expect_type( pc_projection(Xr, Yr = Yr, pc_selection = list(method = "manual", value = 1), center = TRUE, scale = TRUE ), "list" ) testthat::expect_type( pc_projection(Xr, Yr = Yr, method = "pca.nipals", pc_selection = list(method = "manual", value = 1), center = TRUE, scale = T ), "list" ) testthat::expect_type( pls_projection(Xr, Yr = Yr, pc_selection = list(method = "manual", value = 1), center = TRUE, scale = TRUE ), "list" ) cumvar_value <- 0.74 one_input_matrix <- pls_projection(Xr, Yr = Yr, pc_selection = list(method = "cumvar", value = cumvar_value), center = TRUE, scale = FALSE ) test_ncomp <- one_input_matrix$n_components - 1 expect_true(ncol(one_input_matrix$scores) == one_input_matrix$n_components) expect_true(all(one_input_matrix$variance$x_var[3, 1:test_ncomp] > cumvar_value)) two_input_matrices <- pls_projection(Xr, Xu, Yr, pc_selection = list(method = "cumvar", value = cumvar_value), center = TRUE, scale = FALSE ) two_input_matrices_var <- pls_projection(Xr, Xu, Yr, pc_selection = list(method = "var", value = 1 - cumvar_value), center = TRUE, scale = FALSE ) two_test_ncomp <- two_input_matrices$n_components - 1 expect_true(ncol(two_input_matrices$scores) == two_input_matrices$n_components) expect_true(all(two_input_matrices$variance$x_var[3, 1:two_test_ncomp] > cumvar_value)) preds <- sum(abs(predict(two_input_matrices)[1:nrow(Xr), ] - predict(two_input_matrices, Xr))) expect_true(preds < tol) opc_method <- pls_projection(Xr, Xu, Yr = Yr, pc_selection = list(method = "opc", value = 15), center = TRUE, scale = TRUE ) expect_true(opc_method$n_components == which.min(opc_method$opc_evaluation[, 2])) expect_true(opc_method$n_components == 8) # check that the number of components for method = "cumvar" is properly # obtained, this can be done with the results of opc_method as it selects more # components than in the "cumvar" test expect_true(sum(opc_method$variance$x_var[3, ] < cumvar_value) == two_input_matrices$n_components - 1) # do the same for method = "var" expect_true(sum(opc_method$variance$x_var[2, ] > (1 - cumvar_value)) == two_input_matrices_var$n_components) expect_true(ncol(two_input_matrices$scores) == two_input_matrices$n_components) two_test_ncnomp <- two_input_matrices$n_components - 1 expect_true(all(two_input_matrices$variance$x_var[3, 1:two_test_ncnomp] > cumvar_value)) opc_method <- pls_projection(Xr, Xu, Yr = Yr, pc_selection = list(method = "opc", value = 15), center = TRUE, scale = TRUE ) ## pls2 pls2_opc_method <- pls_projection(Xr, Xu, Yr = cbind(Yr, Yr_2), pc_selection = list(method = "opc", value = 15), center = TRUE, scale = TRUE ) expect_true(pls2_opc_method$n_components == which.min(pls2_opc_method$opc_evaluation[, 4])) distm <- as.matrix(dist(scale(pls2_opc_method$scores[1:nrow(Xr), ], TRUE, TRUE))) distm2 <- f_diss(pls2_opc_method$scores[1:nrow(Xr), ], scale = TRUE, center = TRUE) nn <- apply(distm, MARGIN = 2, FUN = function(x) order(x)[2]) result_rmsd <- as.vector(round(colMeans((cbind(Yr, Yr_2) - cbind(Yr, Yr_2)[nn, ])^2)^0.5, 4)) resemble_rmsds <- as.vector(round(pls2_opc_method$opc_evaluation[pls2_opc_method$n_components, 2:3], 4)) expect_true(all(resemble_rmsds == result_rmsd)) }) test_that("pls_projection large sets works", { skip_on_cran() skip_on_travis() # tolernce for results supposed to be 0s tol <- 1e-5 nirdata <- data("NIRsoil", package = "prospectr") Xu <- NIRsoil$spc[!as.logical(NIRsoil$train), ] Yu <- NIRsoil$CEC[!as.logical(NIRsoil$train)] Yr <- NIRsoil$CEC[as.logical(NIRsoil$train)] Yr_2 <- NIRsoil$Ciso[as.logical(NIRsoil$train)] Xr <- NIRsoil$spc[as.logical(NIRsoil$train), ] Xu <- Xu[!is.na(Yu), ] y_sel <- !is.na(Yr) & !is.na(Yr_2) Xr <- Xr[y_sel, ] Yu <- Yu[!is.na(Yu)] Yr_2 <- Yr_2[y_sel] Yr <- Yr[y_sel] cumvar_value <- 0.991 one_input_matrix <- pls_projection(Xr, Yr = Yr, pc_selection = list(method = "cumvar", value = cumvar_value), center = TRUE, scale = FALSE ) test_ncomp <- one_input_matrix$n_components - 1 expect_true(ncol(one_input_matrix$scores) == one_input_matrix$n_components) expect_true(all(one_input_matrix$variance$x_var[3, 1:test_ncomp] < cumvar_value)) two_input_matrices <- pls_projection(Xr, Xu, Yr, pc_selection = list(method = "cumvar", value = cumvar_value), center = TRUE, scale = FALSE ) two_input_matrices_var <- pls_projection(Xr, Xu, Yr, pc_selection = list(method = "var", value = 1 - cumvar_value), center = TRUE, scale = FALSE ) two_test_ncomp <- two_input_matrices$n_components - 1 expect_true(ncol(two_input_matrices$scores) == two_input_matrices$n_components) expect_true(all(two_input_matrices$variance$x_var[3, 1:two_test_ncomp] < cumvar_value)) preds <- sum(abs(predict(two_input_matrices)[1:nrow(Xr), ] - predict(two_input_matrices, Xr))) expect_true(preds < tol) opc_method <- pls_projection(Xr, Xu, Yr = Yr, pc_selection = list(method = "opc", value = 20), center = TRUE, scale = FALSE ) expect_true(opc_method$n_components == which.min(opc_method$opc_evaluation[, 2])) expect_true(opc_method$n_components == 12) # check that the number of components for method = "cumvar" is properly # obtained, this can be done with the results of opc_method as it selects more # components than in the "cumvar" test expect_true(sum(opc_method$variance$x_var[3, ] < cumvar_value) == two_input_matrices$n_components - 1) # do the same for method = "var" expect_true(sum(opc_method$variance$x_var[2, ] > (1 - cumvar_value)) == two_input_matrices_var$n_components) expect_true(ncol(two_input_matrices$scores) == two_input_matrices$n_components) two_test_ncnomp <- two_input_matrices$n_components - 1 expect_true(all(two_input_matrices$variance$x_var[3, 1:two_test_ncnomp] < cumvar_value)) opc_method <- pls_projection(Xr, Xu, Yr = Yr, pc_selection = list(method = "opc", value = 20), center = TRUE, scale = FALSE ) ## pls2 pls2_opc_method <- pls_projection(Xr, Xu, Yr = cbind(Yr, Yr_2), pc_selection = list(method = "opc", value = 20), center = TRUE, scale = FALSE ) expect_true(pls2_opc_method$n_components == which.min(pls2_opc_method$opc_evaluation[, 4])) distm <- as.matrix(dist(scale(pls2_opc_method$scores[1:nrow(Xr), ], TRUE, TRUE))) nn <- apply(distm, MARGIN = 2, FUN = function(x) order(x)[2]) result_rmsd <- as.vector(round(colMeans((cbind(Yr, Yr_2) - cbind(Yr, Yr_2)[nn, ])^2)^0.5, 4)) resemble_rmsds <- as.vector(round(pls2_opc_method$opc_evaluation[pls2_opc_method$n_components, 2:3], 4)) expect_true(all(resemble_rmsds == result_rmsd)) })