# load required functions and packages
library("testthat")
suppressWarnings(library("SuperLearner"))

# generate the data -- note that this is a simple setting, for speed -----------
set.seed(4747)
p <- 3
n <- 1e4
x <- as.data.frame(replicate(p, stats::rnorm(n, 0, 1)))
# apply the function to the covariates
y <- 1 + 0.5 * x[, 1] + 0.75 * x[, 2] + stats::rnorm(n, 0, 1)
true_var <- 1 + .5 ^ 2 + .75 ^ 2
# note that true difference in R-squareds for variable j, under independence, is
# beta_j^2 * var(x_j) / var(y)
r2_one <- 0.5 ^ 2 * 1 / true_var
r2_two <- 0.75 ^ 2 * 1 / true_var
folds <- sample(rep(seq_len(2), length = length(y)))
folds_lst <- lapply(as.list(seq_len(2)), function(i) which(folds == i))

# set up a library for SuperLearner
learners <- c("SL.glm", "SL.mean")
V <- 2

# test VIM ---------------------------------------------------------------------
set.seed(4747)
test_that("VIM without sample splitting works", {
  est <- vim(Y = y, X = x, indx = 2, type = "r_squared", run_regression = TRUE,
               SL.library = learners, alpha = 0.05, cvControl = list(V = V),
               env = environment(), sample_splitting = FALSE)
  # check that the estimate is approximately correct
  expect_equal(est$est, r2_two, tolerance = 0.1, scale = 1)
})
set.seed(4747)
test_that("ANOVA-VIM without sample splitting works", {
  expect_message(
    est <- vim(Y = y, X = x, indx = 2, type = "anova", run_regression = TRUE,
               SL.library = learners, alpha = 0.05, cvControl = list(V = V),
               env = environment(), sample_splitting = FALSE)
  )
  # check that the estimate is approximately correct
  expect_equal(est$est, r2_two, tolerance = 0.1, scale = 1)
})

set.seed(1234)
test_that("General variable importance estimates using internally-computed fitted values work", {
  est <- vim(Y = y, X = x, indx = 2, type = "r_squared", run_regression = TRUE,
                SL.library = learners, alpha = 0.05, cvControl = list(V = V),
             env = environment(), sample_splitting_folds = folds)
  # check that the estimate is approximately correct
  expect_equal(est$est, r2_two, tolerance = 0.1, scale = 1)
  # check that the SE, CI work
  expect_length(est$ci, 2)
  expect_length(est$se, 1)
  # check that the p-value worked
  expect_length(est$p_value, 1)
  expect_true(est$test)
  # check that printing, plotting, etc. work
  expect_silent(format(est)[1])
  expect_output(print(est), "Estimate", fixed = TRUE)
  # check that influence curve worked
  expect_length(est$eif_full, length(y) / 2)
})

# fit nuisance regressions -----------------------------------------------------
# fit the data with all covariates and sample-splitting (note V = 2 in CV.SL)
set.seed(4747)
full_fit <- suppressWarnings(
  SuperLearner::CV.SuperLearner(Y = y, X = x, SL.library = learners,
                                          cvControl = list(V = 2, validRows = folds_lst),
                                          innerCvControl = list(list(V = V)))
)
full_fitted <- SuperLearner::predict.SuperLearner(full_fit, onlySL = TRUE)$pred
# fit the data with only X1
reduced_fit <- suppressWarnings(
  SuperLearner::CV.SuperLearner(Y = full_fitted,
                                             X = x[, -2, drop = FALSE],
                                             SL.library = learners,
                                             cvControl = list(V = 2, validRows = full_fit$folds),
                                             innerCvControl = list(list(V = V)))
)
reduced_fitted <- SuperLearner::predict.SuperLearner(reduced_fit, onlySL = TRUE)$pred

# test VIM with pre-computed nuisance estimates --------------------------------
test_that("General variable importance estimates using externally-computed fitted values work", {
    # provide folds
    est <- vim(Y = y, X = x, indx = 2, type = "r_squared",
               run_regression = FALSE,
               f1 = full_fitted, f2 = reduced_fitted,
               sample_splitting_folds = folds)
    # check that estimate worked
    expect_equal(est$est, r2_two, tolerance = 0.15, scale = 1)
    # check that p-value exists
    expect_length(est$p_value, 1)
    # check that CIs with other transformations work
    ci_log <- vimp_ci(est$est, est$se, scale = "log", level = 0.95)
    ci_logit <- vimp_ci(est$est, est$se, scale = "logit", level = 0.95)
    expect_length(ci_log, 2)
    expect_length(ci_logit, 2)
})
test_that("VIM estimates using externally-computed fitted values with different folds for estimation and inference work", {
  # provide folds
  est_average <- vim(Y = y, X = x, indx = 2, type = "r_squared",
                     run_regression = FALSE,
                     f1 = full_fitted, f2 = reduced_fitted,
                     sample_splitting_folds = folds,
                     final_point_estimate = "average")
  est_full <- vim(Y = y, X = x, indx = 2, type = "r_squared",
                  run_regression = FALSE,
                  f1 = full_fitted, f2 = reduced_fitted,
                  sample_splitting_folds = folds,
                  final_point_estimate = "full")
  # check that estimate worked
  expect_equal(est_average$est, r2_two, tolerance = 0.15, scale = 1)
  expect_equal(est_full$est, r2_two, tolerance = 0.15, scale = 1)
})


# measures of predictiveness ---------------------------------------------------
test_that("Measures of predictiveness work", {
  full_rsquared <- est_predictiveness(full_fitted, y,
                                      type = "r_squared")
  expect_equal(full_rsquared$point_est, 0.44, tolerance = 0.1)
  expect_length(full_rsquared$eif, length(y))
})

# error messages ---------------------------------------------------------------
test_that("Error messages work", {
    expect_error(vim(X = x))
    expect_error(vim(Y = y))
    expect_error(vim(Y = y, X = x, SL.library = NULL))
    expect_error(vim(Y = y, X = x, run_regression = FALSE))
    expect_error(vim(Y = y, f1 = mean(y)))
    expect_error(vim(Y = y, f1 = rep(mean(y), length(y)), f2 = mean(y)))
    expect_error(vim(Y = y, X = as.data.frame(x), SL.library = learners,
                     type = "nonsense_type"))
    expect_error(vim(Y = y, X = X, SL.library = learners, indx = ncol(X) + 1))
})