test_that("get_policy.ql returns a policy", { d <- sim_single_stage(200, seed = 1) pd <- policy_data(d, action = "A", covariates = list("Z", "B", "L"), utility = "U" ) pl <- policy_learn(type = "ql") expect_error( { p <- get_policy(pl(pd, q_models = q_glm())) }, NA ) expect_true( inherits(p, what = "policy") ) expect_error( p(pd), NA ) }) test_that("get_policy_functions.ql agrees with get_policy.ql", { d <- sim_two_stage(500, seed=1) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list(L = c("L_1", "L_2"), C = c("C_1", "C_2")), utility = c("U_1", "U_2", "U_3")) # not full history: pl <- policy_learn(type = "ql") po <- pl(pd, q_models = q_glm()) expect_error( get_policy_functions(po), "stage argument is missing." ) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = FALSE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = FALSE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar"), with = FALSE]) pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) # full history: pl <- policy_learn(type = "ql") po <- pl(pd, q_models = q_glm(), q_full_history = TRUE) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = TRUE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] H1 <- cbind(H1, his_1$U[, -c("id", "stage", "U_bar"), with = FALSE]) pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = TRUE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar"), with = FALSE]) pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) }) test_that("get_policy_functions.ql handles deterministic rewards", { d <- sim_two_stage(500, seed=1, deterministic_rewards = TRUE) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list(L = c("L_1", "L_2"), C = c("C_1", "C_2")), utility = c("U_1", "U_2", "U_3"), deterministic_rewards = list( U_A0 = c("U_1_A0", "U_2_A0"), U_A1 = c("U_1_A1", "U_2_A1") )) # not full history: pl <- policy_learn(type = "ql") po <- pl(pd, q_models = q_glm()) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = FALSE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] H1 <- cbind(H1, his_1$U[, -c("id", "stage", "U_bar"), with = FALSE]) pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = FALSE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar"), with = FALSE]) pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) # full history: pl <- policy_learn(type = "ql") po <- pl(pd, q_models = q_glm(), q_full_history = TRUE) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = TRUE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] H1 <- cbind(H1, his_1$U[, -c("id", "stage", "U_bar"), with = FALSE]) pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = TRUE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar"), with = FALSE]) pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) }) test_that("get_policy_functions.ql handles partially missing deterministic rewards", { d <- sim_two_stage(500, seed=1, deterministic_rewards = TRUE) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list(L = c("L_1", "L_2"), C = c("C_1", "C_2")), utility = c("U_1", "U_2", "U_3"), deterministic_rewards = list( U_A0 = c("U_1_A0", "U_2_A0"), U_A1 = c("U_1_A1", "U_2_A1") )) # not full history: pl <- policy_learn(type = "ql") po <- pl(pd, q_models = q_glm()) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = FALSE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] H1 <- cbind(H1, his_1$U[, -c("id", "stage", "U_bar", "U_A0"), with = FALSE]) pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = FALSE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar", "U_A0"), with = FALSE]) pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) # full history: pl <- policy_learn(type = "ql") po <- pl(pd, q_models = q_glm(), q_full_history = TRUE) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = TRUE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] H1 <- cbind(H1, his_1$U[, -c("id", "stage", "U_bar", "U_A0"), with = FALSE]) pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = TRUE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar", "U_A0"), with = FALSE]) pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) }) test_that("get_policy_functions.ql handles realistic actions", { d <- sim_two_stage(500, seed=1) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list(L = c("L_1", "L_2"), C = c("C_1", "C_2")), utility = c("U_1", "U_2", "U_3")) # not full history: pl <- policy_learn(alpha = 0.1, type = "ql") po <- pl(pd, q_models = q_glm(), g_models = g_glm()) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = FALSE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = FALSE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar", "U_A0"), with = FALSE]) pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) # full history: pl <- policy_learn(alpha = 0.1, type = "ql") po <- pl(pd, q_models = q_glm(), q_full_history = TRUE, g_models = g_glm(), g_full_history = FALSE) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = TRUE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] H1 <- cbind(H1, his_1$U[, -c("id", "stage", "U_bar", "U_A0"), with = FALSE]) H1$L <- H1$L_1 H1$C <- H1$C_1 pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = TRUE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] H2 <- cbind(H2, his_2$U[, -c("id", "stage", "U_bar", "U_A0"), with = FALSE]) H2$L <- H2$L_2 H2$C <- H2$C_2 pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) }) test_that("get_policy_functions.ql handles varying stage action set", { d <- sim_two_stage_multi_actions(n=500, seed=1) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list(L = c("L_1", "L_2"), C = c("C_1", "C_2")), utility = c("U_1", "U_2", "U_3")) # not full history: pl <- policy_learn(alpha = 0.05, type = "ql") po <- pl(pd, q_models = q_glm(), g_models = list(g_glm(), g_empir())) pf_1 <- get_policy_functions(po, stage = 1) his_1 <- get_history(pd, stage = 1, full_history = FALSE) H1 <- his_1$H[, -c("id", "stage"), with = FALSE] pa_1 <- pf_1(H1) pf_2 <- get_policy_functions(po, stage = 2) his_2 <- get_history(pd, stage = 2, full_history = FALSE) H2 <- his_2$H[, -c("id", "stage"), with = FALSE] pa_2 <- pf_2(H2) ## benchmark pa <- get_policy(po)(pd) expect_equal( pa[stage ==1]$d, pa_1 ) expect_equal( pa[stage ==2]$d, pa_2 ) }) test_that("policy_learn with type = 'ql' works as intended",{ d <- sim_two_stage(200, seed=1) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list(L = c("L_1", "L_2"), C = c("C_1", "C_2")), utility = c("U_1", "U_2", "U_3")) ql <- policy_learn(type = "ql", alpha = 0, L = 1) expect_error( ql(policy_data = pd, q_models = q_glm()), NA ) expect_error( ql(policy_data = pd, q_models = list(q_glm(), q_glm())), NA ) ql <- policy_learn(type = "ql", alpha = 0.1, L = 1) expect_error( ql(policy_data = pd, q_models = q_glm()), "Please provide g_models." ) expect_error({ po <- ql(policy_data = pd, q_models = q_glm(), g_glm()) }, NA) expect_error({ p <- get_policy(po) }, NA) expect_s3_class(p(pd), class = "data.table") }) test_that("policy_learn with type ql handles varying action sets", { d <- sim_two_stage_multi_actions(n = 1e2) pd <- policy_data( data = d, action = c("A_1", "A_2"), baseline = c("B", "BB"), covariates = list( L = c("L_1", "L_2"), C = c("C_1", "C_2") ), utility = c("U_1", "U_2", "U_3") ) pl <- policy_learn(type = "ql") expect_error( po <- pl(pd, q_models = q_glm()), NA ) expect_error( pa <- get_policy(po)(pd), NA ) expect_error( pe <- policy_eval(pd, policy = get_policy(po), g_models = list(g_glm(), g_rf()) ), NA ) })