set.seed(20051920) # 20051920 is derived from 'test' n <- 250 n0 <- 20 data <- dplyr::tibble( uid = 1:(n + n0) ) %>% dplyr::mutate( a = rbinom(n + n0, 1, 0.5), ps = rep(0.5, n + n0), x1 = c(rnorm(n), rep(0, n0)), x2 = factor(sample(1:4, n + n0, prob = c(1 / 5, 1 / 5, 1 / 5, 2 / 5), replace = TRUE)), y = a + x1 - 2.5 * a * (x1 - mean(x1)) + as.double(x2) + c(rnorm(n), rep(rnorm(1), n0)) ) userid <- rlang::expr(uid) propensity_score_variable_name <- "ps" continuous_covariates <- c("x1") discrete_covariates <- c("x2") continuous_moderators <- rlang::exprs(x1) discrete_moderators <- rlang::exprs(x2) moderators <- c(continuous_moderators, discrete_moderators) model_covariate_names <- c(continuous_covariates, discrete_covariates) model_covariates <- rlang::syms(model_covariate_names) outcome_variable <- rlang::expr(y) treatment_variable <- rlang::expr(a) trt.cfg <- Constant_cfg$new() regression.cfg <- SLEnsemble_cfg$new( learner_cfgs = list( SLLearner_cfg$new( "SL.glm" ), SLLearner_cfg$new( "SL.gam", list( degree = c(2, 3, 5) ) ) ) ) qoi.list <- list() for (cov in continuous_moderators) { qoi.list[[rlang::as_string(cov)]] <- KernelSmooth_cfg$new(neval = 100, eval_min_quantile = 0.05) } for (cov in discrete_moderators) { qoi.list[[rlang::as_string(cov)]] <- Stratified_cfg$new(cov) } qoi.cfg <- QoI_cfg$new( mcate = MCATE_cfg$new(cfgs = qoi.list), vimp = VIMP_cfg$new(linear_only = TRUE), diag = Diagnostics_cfg$new( outcome = c("SL_risk", "SL_coefs", "MSE", "RROC") ) ) cfg <- HTE_cfg$new( treatment = trt.cfg, outcome = regression.cfg, effect = regression.cfg, qoi = qoi.cfg ) test_that("add config", { data <<- attach_config(data, cfg) checkmate::expect_data_frame(data) expect_true("HTE_cfg" %in% names(attributes(data))) }) test_that("Split data", { data2 <<- make_splits(data, {{ userid }}, .num_splits = 4) checkmate::expect_data_frame(data2) }) test_that("Estimate Plugin Models", { data3 <<- produce_plugin_estimates( data2, {{ outcome_variable }}, {{ treatment_variable }}, !!!model_covariates ) checkmate::expect_data_frame(data3) }) test_that("Construct Pseudo-outcomes", { data4 <<- construct_pseudo_outcomes(data3, {{ outcome_variable }}, {{ treatment_variable }}) checkmate::expect_data_frame(data4) }) test_that("Estimate QoIs", { skip_on_cran() results <<- estimate_QoI(data4, !!!moderators) checkmate::expect_data_frame(results) }) test_that("VIMP is valid", { skip_on_cran() vimp <- results %>% dplyr::filter(grepl("VIMP", estimand)) vimp_z <- vimp$estimate / vimp$std_error # expect small p-value for x1 which has actual HTE expect_lt(2 * pnorm(vimp_z[1], lower.tail = FALSE), 0.01) # expect large p-value for x2 which has no HTE expect_gt(2 * pnorm(vimp_z[2], lower.tail = FALSE), 0.1) }) n_rows <- ( 1 + # SATE estimate 2 + # MSE for y(0) & y(1) 2 * 4 + # one row per model in the ensemble for each PO + ps for SL risk 2 * 4 + # one row per model in the ensemble for each PO + ps for SL coefficient 2 + # one row per moderator for variable importance 1 * 100 + # 100 rows per continuous moderator for local regression for MCATE and for PCATE (2 + 2) + # 2 rows per discrete moderator level for MCATE and for PCATE n + n0 # 1 row per observation for RROC ) test_that("Check results data", { skip_on_cran() checkmate::check_character(results$estimand, any.missing = FALSE) checkmate::check_double(results$estimate, any.missing = FALSE) checkmate::check_double(results$std_error, any.missing = FALSE) checkmate::expect_tibble( results, all.missing = FALSE, nrows = n_rows, ncols = 6, types = c( estimand = "character", term = "character", value = "double", level = "character", estimate = "double", std_error = "double" ) ) })