library(data.table) set.seed(76921) # data simulation sim_data_set <- function(n_obs = 1000) { w <- rbinom(n_obs, 1, 0.5) w[w == 0] <- -1 prob_delta <- runif(n_obs, 0.05, 1) delta <- rbinom(n = n_obs, size = 1, prob = prob_delta) a <- rnorm(n = n_obs, mean = 2 * w, sd = 0.5) data_in <- as.data.table(cbind(a, delta, w, 1 / prob_delta)) data_in <- data_in[delta == 1, ] data_in[, delta := NULL] setnames(data_in, c("a", "w", "wts")) return(data_in) } data_in <- sim_data_set(n_obs = 100) # learn relationship A|W using HAL-based density estimation procedure dens_lrn <- with( data_in, haldensify( A = a, W = w, wts = wts, n_bins = c(3, 5), lambda_seq = exp(seq(-1, -8, length = 100)), max_degree = 3, smoothness_orders = 0 ) ) # predictions to recover conditional density of A, given W = 0 or W = 1 new_a <- seq(-4, 4, by = 0.1) new_w_neg <- rep(-2, length(new_a)) new_w_pos <- rep(2, length(new_a)) new_dat <- as.data.table( list(a = new_a, w_neg = new_w_neg, w_pos = new_w_pos) ) new_dat$pred_w_neg <- predict(dens_lrn, new_A = new_dat$a, new_W = new_dat$w_neg, trim = FALSE ) new_dat$pred_w_pos <- predict(dens_lrn, new_A = new_dat$a, new_W = new_dat$w_pos, trim = FALSE ) # test that maximum value of prediction happens at appropriate mean of the # conditional density N(mu = \pm 2, sd = 0.5) # test_that("Maximum predicted probability of p(A|W = -1) matches N(-2, 0.5)", { # expect_equal( # new_dat[which.max(pred_w_neg), a], # data_in[w == -1, mean(a)] # ) # }) # test_that("Maximum predicted probability of p(A|W = +1) matches N(+2, 0.5)", { # expect_equal( # new_dat[which.max(pred_w_pos), a], # data_in[w == 1, mean(a)] # ) # })