library('data.table') expit <- function(x) { return(exp(x) / (1 + exp(x))) } datagen <- function(i, time_points, beta_L, beta_Z, beta_A, beta_R, beta_Y, sigma_Y, U_lb, U_ub, m){ # Preallocate space in vectors time <- 0:time_points id <- rep(as.numeric(i), time_points+1) L <- rep(NA, time_points+1) L0 <- rep(NA, time_points+1) Z <- rep(NA, time_points+1) A <- rep(NA, time_points+1) lag1_A <- rep(NA, time_points+1) R <- rep(NA, time_points+1) Y <- rep(NA, time_points+1) C_artificial <- rep(NA, time_points+1) G <- rep(NA, time_points+1) U <- rep(NA, time_points+1) # Baseline U[1:(time_points + 1)] <- runif(1, min = U_lb, max = U_ub) L[1] <- rbinom(1, 1, expit(beta_L[1] + beta_L[4] * U[1])) L0[1:(time_points + 1)] <- L[1] Z[1:(time_points + 1)] <- rbinom(1, 1, expit(beta_Z[1] + beta_Z[2] * L[1])) A[1] <- 1 R[1] <- rbinom(1, 1, expit(beta_R[1] + beta_R[2] * A[1] + beta_R[3] * Z[1] + beta_R[4] * L[1])) if (R[1] == 1){ Y[1] <- rnorm(1, beta_Y[1] + beta_Y[2] * A[1] + beta_Y[3] * Z[1] + beta_Y[4] * L[1] + beta_Y[5] * U[1] + beta_Y[6] * Z[1] * A[1] + beta_Y[7] * Z[1] * L[1] + beta_Y[8] * L[1] * A[1], sigma_Y) } lag1_A[1] <- 0 C_artificial[1] <- ifelse(A[1] == 0, 1, 0) count_not_adhered <- 0 G[1] <- 0 # Follow-up times for (j in 2:(time_points + 1)){ L[j] <- rbinom(1, 1, expit(beta_L[1] + beta_L[2] * A[j-1] + beta_L[3] * Z[1] + beta_L[4] * U[1] + beta_L[5] * (j - 1))) A[j] <- rbinom(1, 1, expit(beta_A[1] + beta_A[2] * A[j-1] + beta_A[3] * Z[1] + beta_A[4] * L[j] + beta_A[5] * (j - 1))) R[j] <- rbinom(1, 1, expit(beta_R[1] + beta_R[2] * A[j] + beta_R[3] * Z[1] + beta_R[4] * L[j])) if (R[j] == 1){ Y[j] <- rnorm(1, beta_Y[1] + beta_Y[2] * A[j] + beta_Y[3] * Z[1] + beta_Y[4] * L[j] + beta_Y[5] * U[1] + beta_Y[6] * Z[1] * A[j] - beta_Y[7] * Z[1] * L[j] + beta_Y[8] * L[j] * A[j] + beta_Y[9] * (j - 1), sigma_Y) } count_not_adhered <- (count_not_adhered + 1) * (1 - A[j-1]) G[j] <- ifelse(count_not_adhered == m & all(C_artificial[1:(j - 1)] == 0), 1, 0) if ((A[j] == 0 & G[j] == 1) | C_artificial[j-1] == 1){ C_artificial[j] <- 1 } else { C_artificial[j] <- 0 } lag1_A[j] <- A[j-1] } # Consolidate data in a single data frame temp_data <- data.table(id = id, time = time, L = L, L0 = L0, Z = Z, A = A, lag1_A = lag1_A, R = R, Y = Y, C_artificial = C_artificial, G = G) return(temp_data) } set.seed(1234) data_null_temp <- lapply(as.list(1:1000), FUN=function(ind){ datagen(i = ind, time_points = 24, beta_L = c(0, 0.5, 0.25, 0.5, 0), beta_Z = c(0.5, -1), beta_A = c(0.5, 0.5, -0.25, 0.5, 0), beta_R = c(-1, 0.5, 0.25, -0.5), beta_Y = c(0, 0, 0, 0, 2, 0, 0, 0, 0), sigma_Y = 5, U_lb = -1, U_ub = 1, m = 2) }) data_null_full <- rbindlist(data_null_temp) data_null_processed <- prep_data(data_null, grace_period_length = 2, baseline_vars = 'L', lag_vars = 'A') test_that("prep_data works", { expect_equal(data_null_processed$C_artificial, data_null_full$C_artificial, tolerance = 1e-7) expect_equal(data_null_processed$A_model_eligible, data_null_full$G, tolerance = 1e-7) expect_equal(data_null_processed$A_lag1, data_null_full$lag1_A, tolerance = 1e-7) })