context("test-pca.R -- Lrnr_pca for preprocessing with Pipelines")
library(origami)
set.seed(37912)

# data
ncomp <- 3
data(cpp_imputed)
covars <- c(
  "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs",
  "sexn"
)
outcome <- "haz"
task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome)

# define learners
glm_fast <- Lrnr_glm$new(intercept = FALSE)
pca_sl3 <- Lrnr_pca$new(n_comp = ncomp, center = TRUE, scale. = TRUE)
pcr_pipe_sl3 <- Pipeline$new(pca_sl3, glm_fast)

# create stacks + train and predict
pcr_pipe_sl3_fit <- pcr_pipe_sl3$train(task)
out_pcr_fit <- pcr_pipe_sl3_fit$predict()

# extract prcomp object (PCA results) from Pipeline
pca_from_pipe <- pcr_pipe_sl3_fit$fit_object$learner_fits[[1]]$fit_object

# compute PCA with GLM manually

cpp_pca <- stats::prcomp(x = task$X, center = TRUE, scale. = TRUE)
cpp_pca_rotated <- as.matrix(task$X) %*% cpp_pca$rotation[, seq_len(ncomp)]
pcr_cpp <- glm(cpp_imputed$haz ~ -1 + cpp_pca_rotated)
pcr_preds <- as.numeric(predict(pcr_cpp))
attributes(cpp_pca) <- attributes(pca_from_pipe)
pcr_pipe_sl3_fit$fit_object$learner_fits[[1]]$predict()

test_that("PCA computed by Lrnr_pca matches stats::prcomp exactly.", {
  expect_equal(pca_from_pipe, cpp_pca)
})

test_that("Regression on PCs matches between Pipeline and manual invocation.", {
  expect_equal(out_pcr_fit, pcr_preds)
})

test_that("Arguments are passed to prcomp correctly by Lrnr_pca", {
  # pass some arguments to prcomp via Lrnr_pca
  pca_sl3 <- Lrnr_pca$new(
    n_comp = ncomp, retx = FALSE, center = TRUE,
    scale. = FALSE
  )
  pcr_pipe_sl3 <- Pipeline$new(pca_sl3, glm_fast)
  pcr_pipe_sl3_fit <- pcr_pipe_sl3$train(task)
  pca_from_pipe <- pcr_pipe_sl3_fit$fit_object$learner_fits[[1]]$fit_object

  # do the same thing with prcomp
  cpp_pca <- stats::prcomp(x = task$X, retx = FALSE, center = TRUE, scale. = FALSE)
  attributes(cpp_pca) <- attributes(pca_from_pipe)
  expect_equal(pca_from_pipe, cpp_pca)
})