set.seed(20051920) # 20051920 is derived from 'test' n <- 250 data <- dplyr::tibble( uid = 1:n ) %>% dplyr::mutate( a = rbinom(n, 1, 0.5), ps = rep(0.5, n), x1 = rnorm(n), x2 = factor(sample(1:4, n, prob = c(1 / 5, 1 / 5, 1 / 5, 2 / 5), replace = TRUE)), x3 = factor(sample(1:3, n, prob = c(1 / 5, 1 / 5, 3 / 5), replace = TRUE)), x4 = (x1 + rnorm(n)) / 2, x5 = rnorm(n), y = a + x1 - 0.5 * a * (x1 - mean(x1)) + as.double(x2) + rnorm(n) ) userid <- rlang::expr(uid) propensity_score_variable_name <- "ps" continuous_covariates <- c("x1") discrete_covariates <- c("x2", "x3") continuous_moderators <- rlang::exprs(x1, x4, x5) discrete_moderators <- rlang::exprs(x2, x3) moderators <- c(continuous_moderators, discrete_moderators) model_covariate_names <- c(continuous_covariates, discrete_covariates) model_covariates <- rlang::syms(model_covariate_names) outcome_variable <- rlang::expr(y) treatment_variable <- rlang::expr(a) trt.cfg <- SLEnsemble_cfg$new( learner_cfgs = list( SLLearner_cfg$new( "SL.glm" ) ), family = stats::binomial() ) regression.cfg <- SLEnsemble_cfg$new( learner_cfgs = list( SLLearner_cfg$new( "SL.glm" ), SLLearner_cfg$new( "SL.gam", list( deg.gam = c(2, 3) ) ), SLLearner_cfg$new( "SL.glmnet", list( alpha = c(0.05, 0.15) ) ), SLLearner_cfg$new( "SL.glmnet.interaction", list( alpha = c(0.05, 0.15) ) ) ) ) qoi.list <- list() for (cov in continuous_moderators) { qoi.list[[rlang::as_string(cov)]] <- KernelSmooth_cfg$new(neval = 100) } for (cov in discrete_moderators) { qoi.list[[rlang::as_string(cov)]] <- Stratified_cfg$new(cov) } qoi.cfg <- QoI_cfg$new( mcate = MCATE_cfg$new(cfgs = qoi.list), diag = Diagnostics_cfg$new( outcome = c("SL_risk", "SL_coefs", "MSE"), ps = c("SL_risk", "SL_coefs", "AUC") ) ) cfg <- HTE_cfg$new( treatment = trt.cfg, outcome = regression.cfg, qoi = qoi.cfg ) E = new.env(parent = emptyenv()) test_that("add config", { E$data1 <- attach_config(data, cfg) checkmate::expect_data_frame(E$data1) expect_true("HTE_cfg" %in% names(attributes(E$data1))) }) test_that("Split data", { E$data2 <- make_splits(E$data1, {{ userid }}, .num_splits = 3) checkmate::expect_data_frame(E$data2) }) test_that("Estimate Plugin Models", { E$data3 <- produce_plugin_estimates( E$data2, {{ outcome_variable }}, {{ treatment_variable }}, !!!model_covariates ) checkmate::expect_data_frame(E$data3) }) test_that("Construct Pseudo-outcomes", { E$data4 <- construct_pseudo_outcomes(E$data3, {{ outcome_variable }}, {{ treatment_variable }}) checkmate::expect_data_frame(E$data4) }) test_that("Estimate QoIs", { expect_message( E$results <- estimate_QoI(E$data4), "No moderators specified, so pulling list from definitions in QoI." ) checkmate::expect_data_frame(E$results) }) n_rows <- ( 1 + # SATE estimate 2 + # MSE for y(0) & y(1) 1 + # AUC for pscore 2 * 7 + 1 + # one row per model in the ensemble for each PO + ps for SL risk 2 * 7 + 1 + # one row per model in the ensemble for each PO + ps for SL coefficient 3 * 100 + # 100 rows per continuous moderator for local regression for MCATE and for PCATE (4 + 3) # 2 rows per discrete moderator level for MCATE and for PCATE ) test_that("Check results data", { checkmate::check_character(E$results$estimand, any.missing = FALSE) checkmate::check_double(E$results$estimate, any.missing = FALSE) checkmate::check_double(E$results$std_error, any.missing = FALSE) checkmate::expect_tibble( E$results, all.missing = FALSE, nrows = n_rows, ncols = 6, types = c( estimand = "character", term = "character", value = "double", level = "character", estimate = "double", std_error = "double" ) ) })