test_that("policy_eval with target = 'subgroup' checks inputs.", { d <- sim_single_stage(1e2, seed = 1) pd <- policy_data(d, action = "A", covariates = c("Z"), utility = "U") p <- policy_def(1) expect_error( policy_eval( policy_data = pd, policy = p, target = "test" ), "target must be either 'value' or 'subgroup'." ) d <- sim_single_stage_multi_actions(1e2, seed = 1) pd <- policy_data(d, action = "a", covariates = c("z"), utility = "u") p <- policy_def(1) expect_error( policy_eval( policy_data = pd, policy = p, g_models = g_empir(), target = "subgroup" ), "subgroup average treatment effect evaluation is not implemented for more than two actions." ) d <- sim_two_stage(1e2, seed = 1) pd <- policy_data( data = d, action = c("A_1", "A_2"), covariates = list(L = c("L_1", "L_2")), utility = "U_3" ) p <- policy_def(1, reuse = TRUE) expect_error( policy_eval( policy_data = pd, policy = p, g_models = g_empir(), target = "subgroup" ), "subgroup average treatment effect evaluation is not implemeted for multiple stages." ) }) test_that("policy_eval with target 'subgroup' agrees with targeted::cate.", { n <- 1e3 Z <- rnorm(n = n) A <- rbinom(size = 1, n = n, prob = 0.5) U <- rnorm(mean = Z + Z * A, n = n) d <- data.table(Z = Z, A = A, U = U) rm(Z, A, U) pd <- policy_data(d, action = "A", covariates = c("Z"), utility = "U") p <- policy_def(function(Z) (Z > 0) * 1) ## ## no cross-fitting: ## pe <- policy_eval( policy_data = pd, policy = p, g_models = g_glm(~1), q_models = q_glm(~ A * Z), target = "sub_effect", M = 1 ) ## implementation from the targeted package: d$d <- p(pd)$d ca <- targeted::cate( cate.model = ~ factor(d) - 1, response.model = U ~ A * Z, propensity.model = A ~ 1, data = d, second.order = FALSE, nfolds = 1, ) expect_equal( unname(coef(pe)), unname(coef(ca)[c("factor(d)1", "factor(d)0")]) ) expect_equal( IC(pe) |> unname(), IC(ca)[, c("factor(d)1", "factor(d)0"), drop=FALSE] |> unname() ) ## ## cross-fitting: pooled estimate and variance ## set.seed(1) pe <- policy_eval( policy_data = pd, policy = p, g_models = g_glm(~1), q_models = q_glm(~ A * Z), target = "sub_effect", cross_fit_type = "pooled", variance_type = "pooled", M = 2 ) set.seed(1) ca <- targeted::cate( cate.model = ~ factor(d) - 1, response.model = U ~ A * Z, propensity.model = A ~ 1, second.order = FALSE, data = d, nfolds = 2, ) expect_equal( unname(coef(pe)), unname(coef(ca)[c("factor(d)1", "factor(d)0")]) ) expect_equal( IC(pe), IC(ca)[, c("factor(d)1", "factor(d)0")] |> unname() ) }) test_that("policy_eval with target 'sub_effect' has the correct outputs: test1.", { test_output <- function(pe) { ## value_estimate expect_true( !is.null(coef(pe)) && is.numeric(coef(pe)) ) ## IC in group d=1 should be zero for Z < 0 expect_true( all(IC(pe)[d$Z <= 0, 1] == 0) ) ## ## ... and for the other subgroup: ## expect_true( ## all(IC(pe)[d$Z >0, 2] == 0) ## ) ## id expect_true( all(pe$id == 1:1e2) ) ## type expect_equal( pe$type, "dr" ) ## target expect_equal( pe$target, "subgroup" ) ## name expect_equal( pe$name, names(pe$coef) ) expect_equal( pe$name, c("E[U(1)-U(0)|d=1]: d=test", "E[U(1)-U(0)|d=0]: d=test") ) } d <- sim_single_stage(1e2, seed = 1) d$A <- c(rep(0, 50), rep(1, 50)) pd <- policy_data(d, action = "A", covariates = c("Z"), utility = "U") p <- policy_def(function(Z) (Z > 0) * 1, name = "test") ## no cross-fitting expect_no_error( pe <- policy_eval( policy_data = pd, policy = p, target = "subgroup" ) ) test_output(pe) expect_no_error( pe <- policy_eval( policy_data = pd, policy = p, target = "subgroup", name = c("group1", "group2") ) ) expect_equal( names(coef(pe)), c("group1: d=test", "group2: d=test") ) ## cross-fitting: stacked estimator set.seed(1) expect_no_error( pe <- policy_eval( policy_data = pd, policy = p, target = "subgroup", M = 2, cross_fit_type = "stacked", variance_type = "stacked" ) ) test_output(pe) ## cross-fitting: pooled estimator set.seed(1) expect_no_error( pe <- policy_eval( policy_data = pd, policy = p, target = "sub_effect", M = 2, cross_fit_type = "pooled", variance_type = "pooled" ) ) test_output(pe) ## cross-fitting: pooled estimator, complete variance estimate expect_no_error( pe <- policy_eval( policy_data = pd, policy = p, target = "subgroup", M = 2, cross_fit_type = "pooled", variance_type = "complete" ) ) test_output(pe) }) test_that("policy_eval with target 'subgroup' has the correct outputs: test2.", { z <- 1:1e2 a <- c(rep(1, 50), rep(2, 50)) y <- a * 2 p <- c(rep(1, 25), rep(2, 25), rep(1, 25), rep(2, 25)) d <- data.table(z = z, a = a, y = y, p = p) rm(a, z, y) pd <- policy_data( data = d, action = "a", covariates = c("z", "p"), utility = c("y") ) ## his <- get_history(pd, stage = 1) ## qfun <- fit_Q_function(history = his, Q = d$y, q_degen(var = "z")) ## predict.Q_function(qfun, new_history = his) p <- policy_def(function(p) p, name = "p") ref_Z <- cbind( (d$a == 1) / 0.5 * (d$y - d$z) + d$z, (d$a == 2) / 0.5 * (d$y - d$z) + d$z ) ref_blip <- ref_Z[, 2] - ref_Z[, 1] ref_sub <- mean(ref_blip[d$p == 2]) ref_sub_comp <- mean(ref_blip[d$p == 1]) ref_IC <- 2 * (d$p == 2) * (ref_blip - ref_sub) ref_IC_comp <- 2 * (d$p == 1) * (ref_blip - ref_sub_comp) ## ## no cross-fitting ## sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p, q_models = polle:::q_degen(var = "z"), g_models = g_glm(~1) ) expect_equal( coef(sub) |> unname(), c(ref_sub, ref_sub_comp) ) expect_equal( IC(sub), cbind(ref_IC, ref_IC_comp) |> unname() ) expect_equal( names(sub$coef), c("E[U(2)-U(1)|d=2]: d=p", "E[U(2)-U(1)|d=1]: d=p") ) ## ## cross-fitting ## ## in each training, the empirical propensity is no longer 0.5 ## instead a g_model is fitted on the complete data: gf <- fit_g_functions(pd, g_models = g_glm(~1)) sub <- policy_eval( target = "sub_effect", policy_data = pd, policy = p, q_models = polle:::q_degen(var = "z"), g_functions = gf, M = 2, ) expect_equal( coef(sub) |> unname(), c(ref_sub, ref_sub_comp) ) expect_equal( IC(sub), cbind(ref_IC, ref_IC_comp) |> unname() ) }) test_that("policy_eval with target 'subgroup' returns NA when no subjects are in the subgroup.", { z <- 1:1e2 a <- c(rep(1, 50), rep(2, 50)) y <- a * 2 p1 <- c(rep(1, 50), rep(2, 50)) p2 <- c(rep(1, 100)) d <- data.table(z = z, a = a, y = y, p1 = p1, p2 = p2) rm(a, z, y, p1, p2) pd <- policy_data( data = d, action = "a", covariates = c("z", "p1", "p2"), utility = c("y") ) ref_Z <- cbind( (d$a == 1) / 0.5 * (d$y - d$z) + d$z, (d$a == 2) / 0.5 * (d$y - d$z) + d$z ) ref_blip <- ref_Z[, 2] - ref_Z[, 1] p <- policy_def(1) sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p, q_models = polle:::q_degen(var = "z"), g_models = g_glm(~1) ) expect_equal( coef(sub) |> unname(), c(as.numeric(NA), mean(ref_blip)) ) expect_equal( unname(IC(sub)[,1,drop=FALSE]), cbind(rep(as.numeric(NA), 1e2)) ) expect_no_error( tmp <- capture.output(print(sub)) ) ## cross-fitting ## in each training, the empirical propensity is no longer 0.5 ## instead a g_model is fitted on the complete data: gf <- fit_g_functions(pd, g_models = g_glm(~1)) set.seed(1) sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p, q_models = polle:::q_degen(var = "z"), g_functions = gf, M = 2 ) expect_equal( coef(sub) |> unname(), c(as.numeric(NA), mean(ref_blip)) ) expect_equal( unname(IC(sub)[,1,drop=FALSE]), cbind(rep(as.numeric(NA), 1e2)) ) expect_no_error( tmp <- capture.output(print(sub)) ) set.seed(1) sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p, q_models = polle:::q_degen(var = "z"), g_functions = gf, M = 2, cross_fit_type = "stacked", variance_type = "stacked" ) expect_equal( coef(sub) |> unname(), c(as.numeric(NA), mean(ref_blip)) ) expect_equal( unname(IC(sub)[,1,drop=FALSE]), cbind(rep(as.numeric(NA), 1e2)) ) expect_no_error( tmp <- capture.output(print(sub)) ) }) test_that("policy_eval with target 'subgroup' works with policy_learning with multiple thresholds.", { z <- 1:1e2 a <- c(rep(1, 50), rep(2, 50)) y <- a * 2 p1 <- c(rep(1, 50), rep(2, 50)) p2 <- c(rep(1, 100)) d <- data.table(z = z, a = a, y = y, p1 = p1, p2 = p2) rm(a, z, y) pd <- policy_data( data = d, action = "a", covariates = c("z", "p1", "p2"), utility = c("y") ) ref_Z <- cbind( (d$a == 1) / 0.5 * (d$y - d$z) + d$z, (d$a == 2) / 0.5 * (d$y - d$z) + d$z ) ref_blip <- ref_Z[, 2] - ref_Z[, 1] ref_sub2_eta50 <- mean(ref_blip[d$p1 == 2]) ref_IC2_eta50 <- 2 * (d$p1 == 2) * (ref_blip - ref_sub2_eta50) ref_sub1_eta50 <- mean(ref_blip[d$p1 == 1]) ref_IC1_eta50 <- 2 * (d$p1 == 1) * (ref_blip - ref_sub1_eta50) ref_sub1_eta101 <- mean(ref_blip[d$p2 == 1]) ref_IC1_eta101 <- (d$p2 == 1) * (ref_blip - ref_sub1_eta101) pl <- policy_learn( type = "blip", threshold = c(50, 101), control = control_blip(blip_models = polle:::q_degen(var = "z")) ) sub <- policy_eval( target = "subgroup", policy_data = pd, policy_learn = pl, q_models = polle:::q_degen(var = "z"), g_models = g_glm(~1) ) expect_no_error( tmp <- capture.output(print(sub)) ) expect_equal( sub$name, names(sub$coef) ) expect_equal( sub$name, c("E[U(2)-U(1)|d=2]: d=blip(eta=50)", "E[U(2)-U(1)|d=1]: d=blip(eta=50)", "E[U(2)-U(1)|d=2]: d=blip(eta=101)", "E[U(2)-U(1)|d=1]: d=blip(eta=101)") ) expect_equal( coef(sub) |> unname(), c(ref_sub2_eta50, ref_sub1_eta50, as.numeric(NA), ref_sub1_eta101) ) expect_equal( IC(sub), cbind(ref_IC2_eta50, ref_IC1_eta50, rep(as.numeric(NA), 1e2), ref_IC1_eta101) |> unname() ) expect_equal( sub$subgroup_indicator, cbind(p1 == 2, p1 == 1, p2 == 2, p2 == 1) ) ## cross-fitting: ## in each training, the empirical propensity is no longer 0.5 ## instead a g_model is fitted on the complete data: gf <- fit_g_functions(pd, g_models = g_glm(~1)) set.seed(1) sub <- policy_eval( target = "subgroup", policy_data = pd, policy_learn = pl, q_models = polle:::q_degen(var = "z"), g_functions = gf, cross_fit_type = "pooled", variance_type = "pooled", M = 2) expect_no_error( tmp <- capture.output(print(sub)) ) expect_equal( sub$name, names(sub$coef) ) expect_equal( sub$name, c("E[U(2)-U(1)|d=2]: d=blip(eta=50)", "E[U(2)-U(1)|d=1]: d=blip(eta=50)", "E[U(2)-U(1)|d=2]: d=blip(eta=101)", "E[U(2)-U(1)|d=1]: d=blip(eta=101)") ) expect_equal( coef(sub) |> unname(), c(ref_sub2_eta50, ref_sub1_eta50, as.numeric(NA), ref_sub1_eta101) ) expect_equal( IC(sub), cbind(ref_IC2_eta50, ref_IC1_eta50, rep(as.numeric(NA), 1e2), ref_IC1_eta101) |> unname() ) expect_equal( sub$subgroup_indicator, cbind(p1 == 2, p1 == 1, p2 == 2, p2 == 1) ) ## stacked: set.seed(1) sub <- policy_eval( target = "subgroup", policy_data = pd, policy_learn = pl, q_models = polle:::q_degen(var = "z"), g_functions = gf, M = 2, cross_fit_type = "stacked", variance_type = "stacked" ) expect_no_error( tmp <- capture.output(print(sub)) ) expect_equal( sub$name, names(sub$coef) ) expect_equal( sub$name, c("E[U(2)-U(1)|d=2]: d=blip(eta=50)", "E[U(2)-U(1)|d=1]: d=blip(eta=50)", "E[U(2)-U(1)|d=2]: d=blip(eta=101)", "E[U(2)-U(1)|d=1]: d=blip(eta=101)") ) ref_sub2_eta50_fold1 <- mean(ref_blip[sub$folds[[1]]][d$p1[sub$folds[[1]]] == 2]) ref_sub2_eta50_fold2 <- mean(ref_blip[sub$folds[[2]]][d$p1[sub$folds[[2]]] == 2]) ref_sub1_eta50_fold1 <- mean(ref_blip[sub$folds[[1]]][d$p1[sub$folds[[1]]] == 1]) ref_sub1_eta50_fold2 <- mean(ref_blip[sub$folds[[2]]][d$p1[sub$folds[[2]]] == 1]) ref_sub2_eta101_fold1 <- mean(ref_blip[sub$folds[[1]]][d$p2[sub$folds[[1]]] == 2]) ref_sub2_eta101_fold2 <- mean(ref_blip[sub$folds[[2]]][d$p2[sub$folds[[2]]] == 2]) ref_sub1_eta101_fold1 <- mean(ref_blip[sub$folds[[1]]][d$p2[sub$folds[[1]]] == 1]) ref_sub1_eta101_fold2 <- mean(ref_blip[sub$folds[[2]]][d$p2[sub$folds[[2]]] == 1]) expect_equal( c( mean(c(ref_sub2_eta50_fold1, ref_sub2_eta50_fold2)), mean(c(ref_sub1_eta50_fold1, ref_sub1_eta50_fold2)), mean(c(ref_sub2_eta101_fold1, ref_sub2_eta101_fold2)), mean(c(ref_sub1_eta101_fold1, ref_sub1_eta101_fold2)) ), coef(sub) |> unname() ) }) test_that("policy_eval with target 'subgroup' returns NA when the subgroup count is below the minimum.", { z <- 1:1e2 a <- c(rep(1, 50), rep(2, 50)) y <- a * 2 p1 <- c(rep(1, 4), rep(2, 96)) p2 <- c(rep(2, 4), rep(1, 96)) d <- data.table(z = z, a = a, y = y, p1 = p1, p2 = p2) rm(a, z, y, p1, p2) pd <- policy_data( data = d, action = "a", covariates = c("z", "p1", "p2"), utility = c("y") ) p1 <- policy_def(function(p1) p1, name = "p1") # 4 observations in the subgroup d = 1 p2 <- policy_def(function(p2) p2, name = "p2") # 4 observations in the subgroup d = 2 sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p1, q_models = polle:::q_degen(var = "z"), g_models = g_glm(~1) ) expect_equal( coef(sub) |> is.na() |> unname(), c(FALSE, FALSE) ) sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p1, q_models = polle:::q_degen(var = "z"), g_models = g_glm(~1), min_subgroup_size = 5 ) expect_equal( coef(sub) |> is.na() |> unname(), c(FALSE, TRUE) ) sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p2, q_models = polle:::q_degen(var = "z"), g_models = g_glm(~1), min_subgroup_size = 5 ) expect_equal( coef(sub) |> is.na() |> unname(), c(TRUE, FALSE) ) ## cross-fitting ## in each training, the empirical propensity is no longer 0.5 ## instead a g_model is fitted on the complete data: gf <- fit_g_functions(pd, g_models = g_glm(~1)) sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p2, q_models = polle:::q_degen(var = "z"), g_functions = gf, min_subgroup_size = 5, M = 2 ) expect_equal( coef(sub) |> is.na() |> unname(), c(TRUE, FALSE) ) sub <- policy_eval( target = "subgroup", policy_data = pd, policy = p2, q_models = polle:::q_degen(var = "z"), g_functions = gf, min_subgroup_size = 5, M = 2, cross_fit_type = "stacked" ) expect_equal( coef(sub) |> is.na() |> unname(), c(TRUE, FALSE) ) }) test_that("policy_eval runs with policy_learn using quantile_prob_thres ", { d1 <- sim_single_stage(1e3, seed=1) pd1 <- policy_data(d1, action = "A", covariates = c("Z"), utility = "U") qs <- c(0.25, 0.5, 0.75) learn1 <- policy_learn(type = "blip", control = control_blip( blip_models = q_glm(~ .), quantile_prob_threshold = qs )) expect_error( pe1 <- policy_eval( policy_data = pd1, policy_learn = learn1, target = "subgroup", g_models = g_glm(~ 1), q_models = q_glm(~ A * (.)), M = 2 ), NA ) })