# # Generate Data ----------------------------------------------------------- # set.seed(123) # # dgm_demo <- function(sample_size, total_T) { # alpha_0 <- -1.5 # alpha_1 <- 0.5 # alpha_2 <- 0.3 # # beta_0 <- 0.1 # beta_1 <- 0.3 # # # With the above parameter specification, the range of success probability of Y would be # # [exp(-1.5), exp(-1.5 + 0.5 + 0.3 + 0.1 + 0.3)] = [0.223, 0.741] # # df_names <- c("userid", "time", "time_var1", "time_var2", "Y", "A", "avail", "prob_Y", "prob_Y_A0", "rand_prob") # # time_var1 is time / total_T # # time_var2 is 1(time > total_T/2) # # dta <- data.frame(matrix(NA, nrow = sample_size * total_T, ncol = length(df_names))) # names(dta) <- df_names # # dta$userid <- rep(1:sample_size, each = total_T) # dta$time <- rep(1:total_T, times = sample_size) # dta$time_var1 <- dta$time / total_T # dta$time_var2 <- as.numeric(dta$time > (total_T / 2)) # # for (t in 1:total_T) { # # row index for the rows corresponding to time t for every subject # row_index <- seq(from = t, by = total_T, length = sample_size) # # dta$avail[row_index] <- rbinom(sample_size, 1, 0.8) # 0.8 probability to be available # # dta$rand_prob[row_index] <- ifelse(t %% 3 == 1, 0.3, ifelse(t %% 3 == 2, 0.5, 0.7)) # dta$A[row_index] <- rbinom(sample_size, 1, dta$rand_prob[row_index]) * dta$avail[row_index] # A can only be 1 if avail = 1 # # We keep rand_prob as-is for those observations with avail = 0. It's OK because those rand_prob won't be used in the estimation. # # dta$prob_Y_A0[row_index] <- exp(alpha_0 + alpha_1 * dta$time_var1[row_index] + alpha_2 * dta$time_var2[row_index]) # dta$prob_Y[row_index] <- dta$prob_Y_A0[row_index] * exp(dta$A[row_index] * (beta_0 + beta_1 * dta$time_var1[row_index])) # dta$Y[row_index] <- rbinom(sample_size, 1, dta$prob_Y[row_index]) # } # # return(dta) # } # # data_binary <- dgm_demo(sample_size = 100, total_T = 30) # Tests ------------------------------------------------------------------- test_that( "check beta_hat", { expect_equal( as.numeric(emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", rand_prob = "rand_prob", moderator_formula = ~time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$beta_hat), as.vector(c(0.08502509, 0.41260611)), tolerance = 1e-7 ) } ) test_that( "check alpha_hat", { expect_equal( as.numeric(emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", rand_prob = "rand_prob", moderator_formula = ~ time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$alpha_hat), as.vector(c(-1.4358563, 0.6935913, 0.2647090)), tolerance = 1e-6 ) } ) test_that( "check beta_se", { expect_equal( as.numeric(emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", rand_prob = "rand_prob", moderator_formula = ~ time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$beta_se), as.vector(c(0.1250711, 0.1810747)), tolerance = 1e-6 ) } ) test_that( "check alpha_se", { expect_equal( as.numeric(emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", rand_prob = "rand_prob", moderator_formula = ~ time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$alpha_se), as.vector(c(0.06213613, 0.17279523, 0.10357601)), tolerance = 1e-6 ) } ) test_that( "check beta_se_adjusted", { expect_equal( as.numeric(emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", moderator_formula = ~ time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", rand_prob = "rand_prob", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$beta_se_adjusted), as.vector(c(0.1266788, 0.1834912)), tolerance = 1e-6 ) } ) test_that( "check alpha_se_adjusted", { expect_equal( as.numeric(emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", moderator_formula = ~time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", rand_prob = "rand_prob", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$alpha_se_adjusted), as.vector(c(0.06290973, 0.17500825, 0.10478227)), tolerance = 1e-6 ) } ) test_that( "check varcov", { expect_equal( emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", moderator_formula = ~time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", rand_prob = "rand_prob", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$varcov, matrix( c( 0.0038608991, -0.007512966, 0.0020372384, -0.0005068623, 0.001975262, -0.0075129657, 0.029858190, -0.0156664102, 0.0026684541, -0.008951825, 0.0020372384, -0.015666410, 0.0107279889, -0.0008563799, 0.003400086, -0.0005068623, 0.002668454, -0.0008563799, 0.0156427857, -0.020762192, 0.0019752617, -0.008951825, 0.0034000860, -0.0207621925, 0.032788043 ), nrow = 5, ncol = 5, byrow = TRUE ), tolerance = 1e-7 ) } ) test_that( "check varcov_adjusted", { expect_equal( emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", moderator_formula = ~time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", rand_prob = "rand_prob", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$varcov_adjusted, matrix( c( 0.0039576339, -0.0077190622, 0.0021002006, -0.0005141107, 0.0020213942, -0.0077190622, 0.0306278873, -0.0160546827, 0.0027628732, -0.0092120177, 0.0021002006, -0.0160546827, 0.0109793233, -0.0009021267, 0.0035113704, -0.0005141107, 0.0027628732, -0.0009021267, 0.0160475226, -0.0213114237, 0.0020213942, -0.0092120177, 0.0035113704, -0.0213114237, 0.0336690206 ), nrow = 5, ncol = 5, byrow = TRUE ), tolerance = 1e-7 ) } ) # extract the confidence interval from the output and drop its column and row names conf_int <- emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", moderator_formula = ~time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", rand_prob = "rand_prob", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$conf_int dimnames(conf_int) <- c() test_that( "check conf_int", { expect_equal(conf_int, matrix(c(-0.16011431, 0.3301645, 0.05769972, 0.7675125), nrow = 2, ncol = 2, byrow = TRUE ), tolerance = 1e-6 ) } ) # extract the adjusted confidence interval from the output and drop its column and row names conf_int_adjusted <- emee2( data = data_binary, id = "userid", outcome = "Y", treatment = "A", rand_prob = "rand_prob", moderator_formula = ~time_var1, control_formula = ~ time_var1 + time_var2, availability = "avail", numerator_prob = 0.5, start = NULL, verbose = FALSE )$fit$conf_int_adjusted dimnames(conf_int_adjusted) <- c() test_that( "check conf_int_adjusted", { expect_equal(conf_int_adjusted, matrix(c(-0.16646416, 0.3365143, 0.04833002, 0.7768822), nrow = 2, ncol = 2, byrow = TRUE ), tolerance = 1e-6 ) } ) # test_that( # "check error when moderator_formula has Y in formula", # { # expect_error( # emee2( # outcome = Y, # treatment = A, # moderator_formula = Y ~ time_var1, # control_formula = ~ time_var1 + time_var2, # availability = avail, # rand_prob = rand_prob, # numerator_prob = 0.5, # data = data_binary, # id = userid, # start = NULL, # verbose = FALSE # ), # "It seems like you included variables to left of ~. moderator_formula should look like ~1 or ~ mod_var1 + mod_var2." # ) # } # )