test_that("fit_q_functions handles varying stage-action sets", {
  d <- sim_two_stage_multi_actions(n = 1e2)
  expect_error(
    pd <- policy_data(data = d,
                      action = c("A_1", "A_2"),
                      baseline = c("B", "BB"),
                      covariates = list(L = c("L_1", "L_2"),
                                        C = c("C_1", "C_2")),
                      utility = c("U_1", "U_2", "U_3")),
    NA
  )
  p <- policy_def(
    c("yes", "no")
  )

  expect_error(
    qfit <- fit_Q_functions(pd, policy_actions = p(pd), q_models = list(q_glm(), q_glm()), full_history = FALSE),
    NA
  )
  expect_error(
    tmp <- predict(qfit, pd),
    NA
  )

  tmp2 <- predict(qfit$stage_1$q_model$model,
                  newdata = cbind(A = "yes", get_history(pd, stage = 1)$H[1,]))
  tmp2 <- tmp2 + get_history(pd, stage = 1)$U$U_bar[1]
  expect_equal(unname(tmp2), unname(unlist(tmp[1, "Q_yes"])))

  tmp2 <- predict(qfit$stage_1$q_model$model,
                  newdata = cbind(A = "no", get_history(pd, stage = 1)$H[1,]))
  tmp2 <- tmp2 + get_history(pd, stage = 1)$U$U_bar[1]
  expect_equal(unname(tmp2), unname(unlist(tmp[1, "Q_no"])))

})

test_that("q_models checks formula input", {
  d <- sim_two_stage(2e3, seed=1)
  pd <- policy_data(d,
                    action = c("A_1", "A_2"),
                    baseline = c("BB", "B"),
                    covariates = list(L = c("L_1", "L_2"),
                                      C = c("C_1", "C_2")),
                    utility = c("U_1", "U_2", "U_3"))

  p_dynamic <- policy_def(
    policy_functions = list(function(L_1) (L_1>0)*1,
                            function(C_2) (C_2>0)*1),
    reuse = FALSE
  )

  expect_error(policy_eval(policy_data = pd,
                           policy = p_dynamic,
                           q_models = q_glm(formula = Y~X)))
  expect_error(policy_eval(policy_data = pd,
                           policy = p_dynamic,
                           q_models = q_sl(formula = res~X)))
  expect_error(policy_eval(policy_data = pd,
                           policy = p_dynamic,
                           q_models = q_rf(formula = V_res~X * (.))))
  expect_error(policy_eval(policy_data = pd,
                           policy = p_dynamic,
                           q_models = q_glmnet(formula = Y~X)))
})

test_that("q_rf formats data correctly via the formula",{
  d1 <- sim_single_stage(200, seed=1)
  d1$BB <- sample(c("group 1", "group & 2", "group & 3"), size = 200, replace = TRUE)
  pd1 <- policy_data(d1,
                     action="A",
                     covariates = list("Z", "B", "L", "BB"),
                     utility="U")

  expect_error(
    pe <- policy_eval(
      policy_data = pd1,
      policy_learn = policy_learn(type = "ql", alpha = 0.05),
      g_models = g_glm(),
      g_full_history = FALSE,
      q_models = q_rf()
    ),
    NA
  )
})

test_that("q_sl formats data correctly via the formula",{
  d1 <- sim_single_stage(200, seed=1)
  d1$BB <- sample(c("group 1", "group & 2", "group & 3"), size = 200, replace = TRUE)
  pd1 <- policy_data(d1,
                     action="A",
                     covariates = list("Z", "B", "L", "BB"),
                     utility="U")

  expect_error(
    pe <- policy_eval(
      policy_data = pd1,
      policy_learn = policy_learn(type = "ql", alpha = 0.05),
      g_models = g_glm(),
      g_full_history = FALSE,
      q_models = q_sl()
    ),
    NA
  )

})

test_that("q_sl can find user-defined learners",{
  library("polle")
  d <- sim_single_stage(200, seed=1)
  d$BB <- sample(c("group 1", "group & 2", "group & 3"), size = 200, replace = TRUE)
  pd <- policy_data(d,
                    action="A",
                    covariates = list("Z", "B", "L", "BB"),
                    utility="U")
  p <- policy_def(1)

  env <- as.environment("package:SuperLearner")
  env <- new.env(parent = env)
  with(env,{
    SL.test <- function (Y, X, newX, family, obsWeights, model = TRUE, ...){
      if (is.matrix(X)) {
        X = as.data.frame(X)
      }
      fit.glm <- glm(Y ~ ., data = X, family = family, weights = obsWeights,
                     model = model)
      if (is.matrix(newX)) {
        newX = as.data.frame(newX)
      }
      pred <- predict(fit.glm, newdata = newX, type = "response")
      fit <- list(object = fit.glm)
      class(fit) <- "SL.glm" # SL.test
      out <- list(pred = pred, fit = fit)
      return(out)
    }
  })

  expect_error(
    qfun <- polle:::fit_Q_functions(pd,
                                    p(pd),
                                    q_sl(SL.library = "SL.test", env = env)),
    NA)

})

test_that("q_glm and q_sl(SL.library('SL.glm')) are (almost) equivalent",{
  library("SuperLearner")
  d1 <- sim_single_stage(200, seed=1)
  d1$A <- as.character(d1$A)
  d1$BB <- sample(c("group 1", "group_2", "G & 4"), size = nrow(d1), replace = TRUE)

  q1 <- q_glm(formula = ~.)
  q2 <- q_sl(formula = ~., SL.library = "SL.glm")

  q1 <- q1(AH = d1[,c("A", "B", "Z", "L", "BB")], V_res = d1$U)
  q2 <- q2(AH = d1[,c("A", "B", "Z", "L", "BB")], V_res = d1$U)

  # names are different
  expect_equal(
    unname(coef(q1$model)),
    unname(coef(q2$model$fitLibrary$SL.glm_All$object))
  )

  q1 <- q_glm(formula = ~.)
  q2 <- q_sl(formula = ~., SL.library = "SL.glm")
  d1$B <- as.character(d1$B)
  pd1 <- policy_data(d1, action = "A", covariates = c("Z", "L", "B", "BB"), utility = "U")
  pe1 <- policy_eval(policy_data = pd1,
              policy = policy_def(1),
              type = "or",
              q_models = q1)
  pe2 <- policy_eval(policy_data = pd1,
                     policy = policy_def(1),
                     type = "or",
                     q_models = q2)

  # names are different
  expect_equal(
    unname(coef(pe1$q_functions$stage_1$q_model$glm_model)),
    unname(coef( pe2$q_functions$stage_1$q_model$fit$fitLibrary$SL.glm_All$object))
  )
})

test_that("q_glmnet formats data correctly via the formula",{
  d1 <- sim_single_stage(200, seed=1)
  d1$BB <- sample(c("group 1", "group & 2", "group & 3"), size = 200, replace = TRUE)
  pd1 <- policy_data(d1,
                     action="A",
                     covariates = list("Z", "B", "L", "BB"),
                     utility="U")

  expect_error(
    pe <- policy_eval(
      policy_data = pd1,
      policy_learn = policy_learn(type = "ql", alpha = 0.05),
      q_models = q_glmnet(formula = ~ A*.)
    ),
    NA
  )
})


# missing data ------------------------------------------------------------


test_that("q_glm handles missing covariates", {
  d <- sim_two_stage(2e3, seed=1)
  d$C_1 <- NULL
  pd <- policy_data(d,
                    action = c("A_1", "A_2"),
                    baseline = c("BB", "B"),
                    covariates = list(L = c("L_1", "L_2"),
                                      C = c(NA, "C_2")), # C_1 is missing
                    utility = c("U_1", "U_2", "U_3"))
  p <- policy_def(1, reuse = TRUE)

  expect_error(
    policy_eval(policy_data = pd,
                policy = p),
    "NA/NaN/Inf in 'x'"
  )
  expect_error(
    policy_eval(policy_data = pd,
                policy = p,
                type = "or",
                q_models = q_glm(~L)),
    NA
  )
  expect_error(
    policy_eval(policy_data = pd,
                policy = p,
                type = "or",
                q_models = list(q_glm(~L), q_glm())),
    NA
  )
})

test_that("q_glmnet handles missing covariates", {
  d <- sim_two_stage(2e3, seed=1)
  d$C_1 <- NULL
  pd <- policy_data(d,
                    action = c("A_1", "A_2"),
                    baseline = c("BB", "B"),
                    covariates = list(L = c("L_1", "L_2"),
                                      C = c(NA, "C_2")), # C_1 is missing
                    utility = c("U_1", "U_2", "U_3"))
  p <- policy_def(1, reuse = TRUE)

  expect_error(
    suppressWarnings({
      pe <- policy_eval(policy_data = pd,
                        policy = p,
                        type = "or",
                        q_models = q_glmnet())
    })
  )


  expect_error(
    policy_eval(policy_data = pd,
                policy = p,
                type = "or",
                q_models = q_glmnet(~L + A)),
    NA
  )
  expect_error(
    policy_eval(policy_data = pd,
                policy = p,
                type = "or",
                q_models = list(q_glmnet(~L + A), q_glmnet())),
    NA
  )

  d <- sim_two_stage(2e3, seed=1)
  d$C_1[1:10] <- NA
  pd <- policy_data(d,
                    action = c("A_1", "A_2"),
                    baseline = c("BB", "B"),
                    covariates = list(L = c("L_1", "L_2"),
                                      C = c("C_1", "C_2")),
                    utility = c("U_1", "U_2", "U_3"))

  expect_error(
    expect_warning(
      policy_eval(policy_data = pd,
                  policy = p,
                  type = "or",
                  q_models = q_glmnet()),
      "The regression variables C have missing NA values."
    )
  )

})