test_that("get_policy.ptl returns a policy", {
  d <- sim_single_stage(200, seed=1)
  pd <- policy_data(d,
                    action="A",
                    covariates = list("Z", "B", "L"),
                    utility="U")

  pl <- policy_learn(type = "ptl", control = control_ptl())
  expect_error({
    p <- get_policy(pl(pd, q_models = q_glm(), g_models = g_glm()))
  },
  NA
  )

  expect_true(
    inherits(p, what = "policy")
  )

  expect_error(
    p(pd),
    NA
  )
})

test_that("policy_learn with type ptl works as intended", {
  d <- sim_two_stage(200, seed = 1)

  pd <- policy_data(d,
    action = c("A_1", "A_2"),
    baseline = c("BB", "B"),
    covariates = list(
      L = c("L_1", "L_2"),
      C = c("C_1", "C_2")
    ),
    utility = c("U_1", "U_2", "U_3")
  )

  pl <- policy_learn(
    type = "ptl",
    control = control_ptl(),
    alpha = 0,
    L = 1
  )

  expect_error(
    {
      po <- pl(
        policy_data = pd,
        g_models = g_glm(),
        q_models = q_glm()
      )
    },
    NA
  )

  expect_s3_class(
    get_policy(po)(pd),
    "data.table"
  )


  expect_error(
    {
      pl <- policy_learn(
        type = "ptl",
        control = control_ptl(
          policy_vars = c("L", "BB"),
          depth = 3,
          hybrid = TRUE
        ),
        alpha = 0,
        L = 1
      )
    },
    NA
  )

  expect_error(
    {
      po <- pl(
        policy_data = pd,
        g_models = g_glm(),
        q_models = q_glm()
      )
    },
    NA
  )

  expect_s3_class(get_policy(po)(pd), "data.table")
})


test_that("policy_learn with type ptl handles varying action sets", {
  d <- sim_two_stage_multi_actions(n = 1e2)
  pd <- policy_data(
    data = d,
    action = c("A_1", "A_2"),
    baseline = c("B", "BB"),
    covariates = list(
      L = c("L_1", "L_2"),
      C = c("C_1", "C_2")
    ),
    utility = c("U_1", "U_2", "U_3")
  )

  pl <- policy_learn(
    type = "ptl",
    control = control_ptl()
  )
  expect_error(
    po <- pl(pd, q_models = q_glm(), g_models = list(g_glm(), g_rf())),
    NA
  )
  expect_error(
    pa <- get_policy(po)(pd),
    NA
  )

  expect_error(
    pe <- policy_eval(pd,
      policy = get_policy(po),
      g_models = list(g_glm(), g_rf())
    ),
    NA
  )

  # realistic actions
  d2 <- copy(d)
  d2$A_2[d2$A_2 == "no"] <- "yes"
  pd2 <- policy_data(
    data = d2,
    action = c("A_1", "A_2"),
    baseline = c("B", "BB"),
    covariates = list(
      L = c("L_1", "L_2"),
      C = c("C_1", "C_2")
    ),
    utility = c("U_1", "U_2", "U_3")
  )

  pl2 <- policy_learn(
    type = "ptl",
    control = control_ptl(),
    alpha = 0.45,
    L = 2
  )
  expect_error(
    po2 <- pl2(pd2, q_models = q_glm(), g_models = list(g_glm(), g_rf())),
    NA
  )
  expect_error(
    pa2 <- get_policy(po2)(pd2),
    NA
  )
  expect_error(
    pe2 <- policy_eval(pd2,
      policy_learn = pl2,
      g_models = list(g_glm(), g_rf())
    ),
    NA
  )

  expect_error(
    pf2_1 <- get_policy_functions(po2, stage = 1),
    NA
  )
  H1 <- get_history(pd2, stage = 1)$H
  expect_error(
    pfa2_1 <- pf2_1(H1),
    NA
  )
  expect_equal(
    pa2[stage == 1, ]$d,
    pfa2_1
  )

  expect_error(
    pf2_2 <- get_policy_functions(po2, stage = 2),
    NA
  )
  H1 <- get_history(pd2, stage = 2)$H
  expect_error(
    pfa2_2 <- pf2_2(H1),
    NA
  )
  expect_equal(
    pa2[stage == 2, ]$d,
    pfa2_2
  )
})

test_that("policy_learn with type = 'ptl' works with thresholds.", {

  d <- sim_two_stage(200, seed=1)
  pd <- policy_data(d,
                    action = c("A_1", "A_2"),
                    baseline = c("BB", "B"),
                    covariates = list(L = c("L_1", "L_2"),
                                      C = c("C_1", "C_2")),
                    utility = c("U_1", "U_2", "U_3"))

  pl <- policy_learn(
    type = "ptl",
    control = control_ptl(),
    threshold = c(0, 0.5, 1)
  )

  expect_no_error(
    po <- pl(
      policy_data = pd,
      g_models = g_glm(),
      q_models = q_glm()
    )
  )

  expect_no_error(
    pf <- get_policy(po)
  )

  expect_equal(
    lapply(pf, function(x) attr(x, "name")) |> unlist(),
    paste0("ptl(eta=", c(0,0.5,1), ")")
  )

  expect_error(
    pf <- get_policy(po, threshold = 2),
    "Invalid threshold. Choose between 0, 0.5, 1"
  )

  expect_no_error(
    pf <- get_policy(po, threshold = 0.5)
  )

  expect_true(
    is.data.table(pf(pd)),
    all(pf(pd)$d %in% get_action_set(pd))
  )

  expect_error(
    pf <- get_policy_functions(po, stage = 1),
    "Choose threshold"
  )

  expect_no_error(
    pf <- get_policy_functions(po, stage = 1, threshold = 1)
  )
})

test_that("get_policy and get_policy_functions agree with type ptl and a non-zero threshold argument,", {
  d <- sim_two_stage(200, seed=1)
  pd <- policy_data(d,
                    action = c("A_1", "A_2"),
                    baseline = c("BB", "B"),
                    covariates = list(L = c("L_1", "L_2"),
                                      C = c("C_1", "C_2")),
                    utility = c("U_1", "U_2", "U_3"))

  pl <- policy_learn(
    type = "ptl",
    threshold = 1,
    control = control_ptl()
  )
  po <- pl(pd, q_models = q_glm(), g_models = g_glm())


  ## pred <- predict.blip_function(po$blip_functions$stage_1, new_history = his)
  ## hist(x = pred$blip, breaks = 100)

  d <- get_policy(po)(pd)[stage == 1]$d

  pf <- get_policy_functions(po, stage = 1)
  his <- get_history(pd, stage = 1)
  H <- get_H(his)
  dd <- pf(H)

  expect_equal(
    d,
    dd
  )

  d <- get_policy(po)(pd)[stage == 2]$d
  pf <- get_policy_functions(po, stage = 2)
  his <- get_history(pd, stage = 2)
  H <- get_H(his)
  dd <- pf(H)

  expect_equal(
    d,
    dd
  )
})