test_that("policy_learn with type blip works as intended", { d <- sim_two_stage(200, seed=1) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list(L = c("L_1", "L_2"), C = c("C_1", "C_2")), utility = c("U_1", "U_2", "U_3")) # default settings: pl <- policy_learn(type = "blip", control = control_blip()) expect_true({ po <- pl( policy_data = pd, g_models = g_glm(), q_models = q_glm() ) inherits(po, "policy_object") }) # realistic policy: pl <- policy_learn(type = "blip", control = control_blip(), alpha = 0.2) expect_true({ po <- pl( policy_data = pd, g_models = g_glm(), q_models = q_glm() ) inherits(po, "policy_object") }) # predictions from the blip model: expect_true({ blip <-predict(po$blip_functions, pd) !any(is.na(blip$blip)) }) expect_true({ all(is.numeric(blip$blip)) }) # folds passed to the blip model: pl <- policy_learn(type = "blip", L = 2, control = control_blip( blip_models = q_sl() )) expect_true({ po <- pl( policy_data = pd, g_models = g_glm(), q_models = q_glm() ) t1 <- inherits(po, "policy_object") t2 <- po$blip_functions$stage_1$blip_model$model$cvControl$V == 2 t3 <- all.equal(po$folds, po$blip_functions$stage_1$blip_model$model$cvControl$validRows) all(c(t1, t2, t3)) }) }) test_that("get_policy_functions.blip returns a list of policy functions", { d <- sim_two_stage(200, seed = 1) d$A_2 <- paste(d$A_2, "test", sep = "") d$A_1 <- as.character(d$A_1) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list( L = c("L_1", "L_2"), C = c("C_1", "C_2") ), utility = c("U_1", "U_2", "U_3") ) pl <- policy_learn( type = "blip", control = control_blip() ) po <- pl( policy_data = pd, g_models = list(g_glm(), g_glm()), q_models = q_glm() ) expect_error( pf1 <- get_policy_functions(po, stage = 1), NA ) his <- get_history(pd, stage = 1) H <- get_H(his) expect_true( all(pf1(H) %in% get_stage_action_sets(pd)[[1]]) ) # realistic action set at level alpha: pl <- policy_learn( type = "blip", control = control_blip(), alpha = 0.2 ) po <- pl( policy_data = pd, g_models = list(g_glm(), g_glm()), q_models = q_glm() ) expect_error( pf1 <- get_policy_functions(po, stage = 1), NA ) his <- get_history(pd, stage = 1) H <- get_H(his) expect_true( all(pf1(H) %in% get_stage_action_sets(pd)[[1]]) ) expect_error( pf2 <- get_policy_functions(po, stage = 2), NA ) his <- get_history(pd, stage = 2) H <- get_H(his) expect_true( all(pf2(H) %in% get_stage_action_sets(pd)[[2]]) ) expect_error( get_policy_functions(po), "stage argument is missing" ) }) test_that("get_policy.blip returns a policy", { d <- sim_two_stage(200, seed = 1) d$A_2 <- paste(d$A_2, "test", sep = "") d$A_1 <- as.character(d$A_1) pd <- policy_data(d, action = c("A_1", "A_2"), baseline = c("BB", "B"), covariates = list( L = c("L_1", "L_2"), C = c("C_1", "C_2") ), utility = c("U_1", "U_2", "U_3") ) pl <- policy_learn( type = "blip", control = control_blip() ) po <- pl( policy_data = pd, g_models = list(g_glm(), g_glm()), q_models = q_glm() ) expect_true({ p <- get_policy(po) inherits(p, "policy") }) expect_true( is.data.table(p(pd)) ) # realistic action set at level alpha: pl <- policy_learn( type = "blip", control = control_blip(), alpha = 0.2 ) po <- pl( policy_data = pd, g_models = list(g_glm(), g_glm()), q_models = q_glm() ) expect_true({ p <- get_policy(po) inherits(p, "policy") }) pa <- p(pd) expect_true(is.data.table(pa)) expect_true(all(pa$d %in% get_action_set(pd))) expect_true(all(key(pa) == c("id", "stage"))) expect_true(inherits(pa$d, "character")) # comparing get_policy_function and get_policy: pf1 <- get_policy_functions(po, stage = 1) his1 <- get_history(pd, stage = 1) H1 <- get_H(his1) expect_equal( pf1(H1), pa[stage == 1, ]$d ) pf2 <- get_policy_functions(po, stage = 2) his2 <- get_history(pd, stage = 2) H2 <- get_H(his2) expect_equal( pf2(H2), pa[stage == 2, ]$d ) }) test_that("policy_learn with type blip is persistent in the single stage case.", { d <- sim_single_stage(200, seed = 1) pd <- policy_data(d, action = "A", covariates = list("Z", "B", "L"), utility = "U" ) pl <- policy_learn( type = "blip", control = control_blip() ) po <- pl(pd, q_models = q_glm(), g_models = g_glm()) ## his <- get_history(pd) ## pred <- predict.blip_function(po$blip_functions$stage_1, new_history = his) ## hist(x = pred$blip, breaks = 100) d <- get_policy(po)(pd)$d fix <- c( 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 ) fix <- as.character(fix) expect_equal( d, fix ) }) test_that("policy_learn with type blip passes the threshold argument in the single-stage case,", { d <- sim_single_stage(200, seed = 1) pd <- policy_data(d, action = "A", covariates = list("Z", "B", "L"), utility = "U" ) pl <- policy_learn( type = "blip", threshold = c(1, 0.5), control = control_blip() ) po <- pl(pd, q_models = q_glm(), g_models = g_glm()) expect_equal( po$threshold, c(0.5, 1) ) pl <- policy_learn( type = "blip", threshold = c(3), control = control_blip() ) po2 <- pl(pd, q_models = q_glm(), g_models = g_glm()) ## the choosen treshold(s) should not influence the blip-function: expect_equal( po$blip_functions, po2$blip_functions ) }) test_that("get_policy.blip uses the threshold argument", { d <- sim_single_stage(200, seed = 1) pd <- policy_data(d, action = "A", covariates = list("Z", "B", "L"), utility = "U" ) pl <- policy_learn( type = "blip", threshold = 1, control = control_blip() ) po <- pl(pd, q_models = q_glm(), g_models = g_glm()) his <- get_history(pd) pred <- predict.blip_function(po$blip_functions$stage_1, new_history = his) ## hist(x = pred$blip, breaks = 100) d <- get_policy(po)(pd)$d dd <- c("0", "1")[(pred$blip > 1) + 1] dd0 <- c("0", "1")[(pred$blip > 0) + 1] expect_equal( d, dd ) d <- get_policy(po, threshold = 1)(pd)$d expect_equal( d, dd ) d <- get_policy(po, threshold = 0)(pd)$d expect_true( any(!(d == dd)) ) d <- get_policy(po, threshold = c(0, 1))[[2]](pd)$d expect_equal( d, dd ) d <- get_policy(po, threshold = c(0, 1))[[1]](pd)$d expect_equal( d, dd0 ) }) test_that("policy_learn with type blip uses the threshold argument in the two-stage case,", { d <- sim_two_stage(n = 1e2, seed = 1) d$U <- d$U_1 + d$U_2 + d$U_3 - 2 * d$A_2 pd <- policy_data(d, action = c("A_1", "A_2"), covariates = list( L = c("L_1", "L_2"), C = c("C_1", "C_2") ), utility = "U" ) pl <- policy_learn( type = "blip", threshold = 1, control = control_blip() ) po <- pl(pd, q_models = q_glm(), g_models = g_glm()) ## first stage: his <- get_history(pd, stage = 1) pred <- predict.blip_function(po$blip_functions$stage_1, new_history = his) ## hist(x = pred$blip, breaks = 100) dd <- c("0", "1")[(pred$blip > 1) + 1] # use the threshold in the first stage d <- get_policy(po)(pd)[stage == 1]$d expect_equal( d, dd ) ## second stage: his <- get_history(pd, stage = 2) pred <- predict.blip_function(po$blip_functions$stage_2, new_history = his) ## hist(x = pred$blip, breaks = 100) dd <- c("0", "1")[(pred$blip > 0) + 1] # usual optimal policy in the second stage d <- get_policy(po)(pd)[stage == 2]$d expect_equal( d, dd ) }) test_that("get_policy and get_policy_functions agree with type blip and a non-zero threshold argument,", { d <- sim_single_stage(200, seed = 1) pd <- policy_data(d, action = "A", covariates = list("Z", "B", "L"), utility = "U" ) pl <- policy_learn( type = "blip", threshold = 1, control = control_blip() ) po <- pl(pd, q_models = q_glm(), g_models = g_glm()) ## pred <- predict.blip_function(po$blip_functions$stage_1, new_history = his) ## hist(x = pred$blip, breaks = 100) d <- get_policy(po)(pd)$d pf <- get_policy_functions(po, stage = 1) his <- get_history(pd) H <- get_H(his) dd <- pf(H) expect_equal( d, dd ) d <- get_policy(po, threshold = 0)(pd)$d pf <- get_policy_functions(po, stage = 1, threshold = 0) his <- get_history(pd) H <- get_H(his) dd <- pf(H) expect_equal( d, dd ) expect_error( get_policy_functions(po, threshold = c(0, 1)), "Set single threshold" ) }) test_that("get_policy.blip() returns multiple policies when given multiple thresholds.", { d <- sim_single_stage(200, seed = 1) pd <- policy_data(d, action = "A", covariates = list("Z", "B", "L"), utility = "U" ) pl <- policy_learn( type = "blip", threshold = c(0, 1, 2), control = control_blip() ) po <- pl(pd, q_models = q_glm(), g_models = g_glm()) pol_list <- get_policy(po) expect_equal( lapply(pol_list, function(x) attr(x, "name")) |> unlist(), paste0("blip(eta=", c(0,1,2), ")") ) expect_true( is.list(pol_list), length(pol_list) == 3, all(unlist(lapply(pol_list, function(x) inherits(x, what = "policy")))) ) expect_no_error( pol_list[[1]](pd) ) expect_no_error( pol_list[[2]](pd) ) expect_no_error( pol_list[[3]](pd) ) his <- get_history(pd) pred <- predict.blip_function(po$blip_functions$stage_1, new_history = his) ## hist(x = pred$blip, breaks = 100) d <- pol_list[[2]](pd)$d dd <- c("0", "1")[(pred$blip > 1) + 1] expect_equal( d, dd ) }) test_that("policy_learn with type blip uses the quantile_threshold argument.", { d <- sim_single_stage(200, seed = 1) pd <- policy_data(d, action = "A", covariates = list("Z", "B", "L"), utility = "U" ) ## policy value pl <- policy_learn( type = "blip", threshold = c(1), control = control_blip(quantile_prob_threshold = c(0.25, 0.5)) ) po <- pl(pd, q_models = q_glm(), g_models = g_glm()) pe <- policy_eval( policy_data = pd, policy_learn = pl ) pl_ref <- policy_learn( type = "blip", threshold = pe$policy_object$threshold, control = control_blip() ) pe_ref <- policy_eval( policy_data = pd, policy_learn = pl_ref ) expect_equal( estimate(pe), estimate(pe_ref) ) pl <- policy_learn( type = "blip", threshold = c(1), control = control_blip(quantile_prob_threshold = c(0.25, 1.1)) ) expect_error( policy_eval(policy_data = pd, policy_learn = pl), "quantile_prob_threshold must be numeric vector with values in" ) }) test_that("policy_learn with type 'blip' has the expected output for a fixed two-stage case with a single final utility outcome.", { set.seed(1) x1 <- runif(n = 1e2, min = -1, max = 1) a1 <- c(rep(1, 50), rep(2, 50)) x2 <- runif(n = 1e2, min = -1, max = 1) a2 <- c(rep(3, 50), rep(4, 50)) x3 <- runif(n = 1e2, min = -1, max = 1) y <- a1 * x1 + a2 * x2 + runif(n = 1e2, min = -1, max = 1) p1 <- c(rep(1, 25), rep(2, 25), rep(1, 25), rep(2, 25)) p2 <- c(rep(3, 25), rep(4, 25), rep(3, 25), rep(4, 25)) d <- data.table(x1 = x1, a1 = a1, x2 = x2, a2 = a2, x3 = x3, y = y, p1 = p1, p2 = p2) pd <- policy_data( data = d, action = c("a1", "a2"), covariates = list(x = c("x1", "x2"), p = c("p1", "p2")), utility = c("y") ) gf <- fit_g_functions(pd, g_models = list(g_glm(~1), g_glm(~1))) ## ## no cross-fitting ## ## policy_learn(type = "blip")): pl <- policy_learn( type = "blip", control = control_blip(blip_models = q_glm(~x)) ) expect_no_error( po <- pl(pd, q_models = list(q_glm(~x), q_degen(var = "x")), g_functions = gf) ) ## stage 2 Z2 <- cbind( d$x2 + (d$a2 == "3") / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2), d$x2 + (d$a2 == "4") / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2) ) blip2 <- Z2[,2] - Z2[,1] blip_model2 <- glm(blip2 ~ d$x2) expect_equal( po$blip_functions$stage_2$blip_model$model |> coef() |> unname(), blip_model2 |> coef() |> unname() ) d2 <- ifelse(predict(blip_model2, newdata = data.frame(x2 = d$x2)) > 0, "4", "3") expect_equal( unname(d2), get_policy(po)(pd)[stage == 2]$d ) q1 <- predict(glm(d$x2 ~ d$x1), type = "response") ## stage 1 Z1 <- cbind( q1 + (d$a1 == "1") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$x2 - q1) + (d$a1 == "1") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$a2 == d2) / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2), q1 + (d$a1 == "2") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$x2 - q1) + (d$a1 == "2") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$a2 == d2) / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2) ) blip1 <- Z1[,2] - Z1[,1] blip_model1 <- glm(blip1 ~ d$x1) expect_equal( po$blip_functions$stage_1$blip_model$model |> coef() |> unname(), blip_model1 |> coef() |> unname() ) d1 <- ifelse(predict(blip_model1, newdata = data.frame(x1 = d$x1)) > 0, "2", "1") expect_equal( unname(d1), get_policy(po)(pd)[stage == 1]$d ) rm(list = ls()[!(ls() %in% c("pd", "d", "gf"))]) ## ## cross-fitting ## ## policy_learn(type = "blip")): pl <- policy_learn( type = "blip", control = control_blip(blip_models = q_glm(~x)), L = 2, cross_fit_g_models = FALSE ) expect_no_error( po <- pl(pd, q_models = list(q_glm(~x), q_degen(var = "x")), g_functions = gf) ) ## stage 2 Z2 <- cbind( d$x2 + (d$a2 == "3") / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2), d$x2 + (d$a2 == "4") / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2) ) blip2 <- Z2[,2] - Z2[,1] blip_model2 <- glm(blip2 ~ d$x2) expect_equal( po$blip_functions$stage_2$blip_model$model |> coef() |> unname(), blip_model2 |> coef() |> unname() ) d2 <- ifelse(predict(blip_model2, newdata = data.frame(x2 = d$x2)) > 0, "4", "3") expect_equal( unname(d2), get_policy(po)(pd)[stage == 2]$d ) q1 <- rep(as.numeric(NA), 1e2) q1[po$folds[[1]]] <- predict(glm(x2 ~ x1, data = d[-po$folds[[1]],]), type = "response", newdata = d[po$folds[[1]],]) q1[po$folds[[2]]] <- predict(glm(x2 ~ x1, data = d[-po$folds[[2]],]), type = "response", newdata = d[po$folds[[2]],]) ## stage 1 Z1 <- cbind( q1 + (d$a1 == "1") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$x2 - q1) + (d$a1 == "1") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$a2 == d2) / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2), q1 + (d$a1 == "2") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$x2 - q1) + (d$a1 == "2") / (0.5 * (d$a1 == "1") + 0.5 * (d$a1 == "2")) * (d$a2 == d2) / (0.5 * (d$a2 == "3") + 0.5 * (d$a2 == "4")) * (d$y - d$x2) ) blip1 <- Z1[,2] - Z1[,1] blip_model1 <- glm(blip1 ~ d$x1) expect_equal( po$blip_functions$stage_1$blip_model$model |> coef() |> unname(), blip_model1 |> coef() |> unname() ) d1 <- ifelse(predict(blip_model1, newdata = data.frame(x1 = d$x1)) > 0, "2", "1") expect_equal( unname(d1), get_policy(po)(pd)[stage == 1]$d ) }) test_that("policy_learn with type 'blip' has the correct outputs for the stochastic two-stage case and a single final utility outcome.", { set.seed(1) x1 <- runif(n = 1e2, min = -1, max = 1) a1 <- c(rep(1, 50), rep(2, 50)) x2 <- runif(n = 1e2, min = -1, max = 1) a2 <- c(rep(3, 50), rep(4, 50)) y <- a1 * x1 + a2 * x2 + runif(n = 1e2, min = -1, max = 1) p1 <- c(rep(1, 25), rep(2, 25), rep(1, 25), rep(2, 25)) p2 <- c(rep(3, 25), rep(4, 25), rep(3, 25), rep(4, 25)) m <- rbinom(n = 1e2, size = 1, prob = 0.5) # stage 2 missing indicator d <- data.table(x1 = x1, a1 = a1, x2 = x2, a2 = a2, y = y, p1 = p1, p2 = p2, m = m) rm(x1, a1, x2, a2, y) pd <- policy_data( data = d, action = c("a1", "a2"), covariates = list(x = c("x1", "x2"), p = c("p1", "p2")), utility = c("y") ) gf <- fit_g_functions(pd, g_models = list(g_glm(~1), g_glm(~1))) ld <- pd$stage_data ld[stage == 2, m := m] ld[, m := nafill(m, fill = 0)] ld <- ld[m == 0, ] ld[ , stage := 1:.N, by = id] setkey(ld, id, stage) ld[ , m := NULL] pd <- policy_data(data = ld, type = "long") ## ## no cross-fitting ## ## policy_learn(type = "blip")): pl <- policy_learn( type = "blip", control = control_blip(blip_models = q_glm(~x)) ) expect_no_error( po <- pl(pd, q_models = list(q_glm(~x), q_degen(var = "x")), g_functions = gf) ) ## stage2 p <- policy_def(function(p) p, name = "test", reuse = TRUE) ref_pe <- mean(d$x1 + (d$a1 == d$p1) / 0.5 * (((d$m == 0) * d$x2 + (d$m == 1) * d$y) - d$x1) + (d$m == 0) * (d$a1 == d$p1) / 0.5 * (d$a2 == d$p2) / 0.5 * (d$y - d$x2)) ref_IC <- (d$x1 + (d$a1 == d$p1) / 0.5 * (((d$m == 0) * d$x2 + (d$m == 1) * d$y) - d$x1) + (d$m == 0) * (d$a1 == d$p1) / 0.5 * (d$a2 == d$p2) / 0.5 * (d$y - d$x2) - ref_pe) ref_pe_or <- mean(d$x1) ref_pe_ipw <- mean((d$a1 == d$p1) / 0.5 * (d$m == 0) * (d$a2 == d$p2) / 0.5 * d$y) ## ## no cross-fitting ## pe <- policy_eval( policy_data = pd, policy = p, q_models = list(q_degen(var = "x"), q_degen(var = "x")), g_functions = gf ) expect_equal( pe$name, names(coef(pe)) ) expect_equal( pe$name, c("E[U(d)]: d=test") ) expect_equal( coef(pe) |> unname(), ref_pe ) expect_equal( IC(pe), matrix(ref_IC) ) expect_equal( get_element(pe, "coef_or"), ref_pe_or ) expect_equal( get_element(pe, "coef_ipw"), ref_pe_ipw ) ## ## cross-fitting ## pe <- policy_eval( policy_data = pd, policy = p, q_models = list(q_degen(var = "x"), q_degen(var = "x")), g_functions = gf, M = 2 ) expect_equal( pe$name, names(coef(pe)) ) expect_equal( pe$name, c("E[U(d)]: d=test") ) expect_equal( coef(pe) |> unname(), ref_pe ) expect_equal( IC(pe), matrix(ref_IC) ) expect_equal( get_element(pe, "coef_or"), ref_pe_or ) expect_equal( get_element(pe, "coef_ipw"), ref_pe_ipw ) })