# # Generate Data -----------------------------------------------------------
# set.seed(123)
#
# dgm_demo <- function(sample_size, total_T) {
#     alpha_0 <- -1.5
#     alpha_1 <- 0.5
#     alpha_2 <- 0.3
#
#     beta_0 <- 0.1
#     beta_1 <- 0.3
#
#     # With the above parameter specification, the range of success probability of Y would be
#     # [exp(-1.5), exp(-1.5 + 0.5 + 0.3 + 0.1 + 0.3)] = [0.223, 0.741]
#
#     df_names <- c("userid", "time", "time_var1", "time_var2", "Y", "A", "avail", "prob_Y", "prob_Y_A0", "rand_prob")
#     # time_var1 is time / total_T
#     # time_var2 is 1(time > total_T/2)
#
#     dta <- data.frame(matrix(NA, nrow = sample_size * total_T, ncol = length(df_names)))
#     names(dta) <- df_names
#
#     dta$userid <- rep(1:sample_size, each = total_T)
#     dta$time <- rep(1:total_T, times = sample_size)
#     dta$time_var1 <- dta$time / total_T
#     dta$time_var2 <- as.numeric(dta$time > (total_T / 2))
#
#     for (t in 1:total_T) {
#         # row index for the rows corresponding to time t for every subject
#         row_index <- seq(from = t, by = total_T, length = sample_size)
#
#         dta$avail[row_index] <- rbinom(sample_size, 1, 0.8) # 0.8 probability to be available
#
#         dta$rand_prob[row_index] <- ifelse(t %% 3 == 1, 0.3, ifelse(t %% 3 == 2, 0.5, 0.7))
#         dta$A[row_index] <- rbinom(sample_size, 1, dta$rand_prob[row_index]) * dta$avail[row_index] # A can only be 1 if avail = 1
#         # We keep rand_prob as-is for those observations with avail = 0. It's OK because those rand_prob won't be used in the estimation.
#
#         dta$prob_Y_A0[row_index] <- exp(alpha_0 + alpha_1 * dta$time_var1[row_index] + alpha_2 * dta$time_var2[row_index])
#         dta$prob_Y[row_index] <- dta$prob_Y_A0[row_index] * exp(dta$A[row_index] * (beta_0 + beta_1 * dta$time_var1[row_index]))
#         dta$Y[row_index] <- rbinom(sample_size, 1, dta$prob_Y[row_index])
#     }
#
#     return(dta)
# }
#
# data_binary <- dgm_demo(sample_size = 100, total_T = 30)


# Tests -------------------------------------------------------------------

test_that(
    "check beta_hat",
    {
        expect_equal(
            as.numeric(emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                rand_prob = "rand_prob",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$beta_hat),
            as.vector(c(0.08114495, 0.42931332)),
            tolerance = 1e-7
        )
    }
)

test_that(
    "check alpha_hat",
    {
        expect_equal(
            as.numeric(emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                rand_prob = "rand_prob",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$alpha_hat),
            as.vector(c(-1.4542369, 0.4565905, 0.2479836)),
            tolerance = 1e-6
        )
    }
)


test_that(
    "check beta_se",
    {
        expect_equal(
            as.numeric(emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                rand_prob = "rand_prob",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$beta_se),
            as.vector(c(0.1301265, 0.1882975)),
            tolerance = 1e-6
        )
    }
)

test_that(
    "check alpha_se",
    {
        expect_equal(
            as.numeric(emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                rand_prob = "rand_prob",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$alpha_se),
            as.vector(c(0.09699208, 0.24074140, 0.11167475)),
            tolerance = 1e-6
        )
    }
)

test_that(
    "check beta_se_adjusted",
    {
        expect_equal(
            as.numeric(emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                rand_prob = "rand_prob",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$beta_se_adjusted),
            as.vector(c(0.1316436, 0.1905454)),
            tolerance = 1e-6
        )
    }
)

test_that(
    "check alpha_se_adjusted",
    {
        expect_equal(
            as.numeric(emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                rand_prob = "rand_prob",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$alpha_se_adjusted),
            as.vector(c(0.09813404, 0.24373700, 0.11301692)),
            tolerance = 1e-6
        )
    }
)

test_that(
    "check varcov",
    {
        expect_equal(
            emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                rand_prob = "rand_prob",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$varcov,
            matrix(
                c(
                    0.009407463, -0.01848050, 0.0036293844, -0.0088951364, 0.013687095,
                    -0.018480501, 0.05795642, -0.0213308200, 0.0142698457, -0.028903632,
                    0.003629384, -0.02133082, 0.0124712494, -0.0009844053, 0.004400843,
                    -0.008895136, 0.01426985, -0.0009844053, 0.0169329062, -0.022639280,
                    0.013687095, -0.02890363, 0.0044008428, -0.0226392795, 0.035455933
                ),
                nrow = 5, ncol = 5, byrow = TRUE
            ),
            tolerance = 1e-7
        )
    }
)

test_that(
    "check varcov_adjusted",
    {
        expect_equal(
            emee(
                data = data_binary,
                id = "userid",
                outcome = "Y",
                treatment = "A",
                moderator_formula = ~time_var1,
                control_formula = ~ time_var1 + time_var2,
                availability = "avail",
                rand_prob = "rand_prob",
                numerator_prob = 0.5,
                start = NULL,
                verbose = FALSE
            )$fit$varcov_adjusted,
            matrix(
                c(
                    0.009630289, -0.01893458, 0.0037223411, -0.0090997258, 0.01401250,
                    -0.018934578, 0.05940773, -0.0218615441, 0.0145917501, -0.02960477,
                    0.003722341, -0.02186154, 0.0127728233, -0.0009971158, 0.00450630,
                    -0.009099726, 0.01459175, -0.0009971158, 0.0173300425, -0.02317355,
                    0.014012503, -0.02960477, 0.0045063005, -0.0231735513, 0.03630756
                ),
                nrow = 5, ncol = 5, byrow = TRUE
            ),
            tolerance = 1e-7
        )
    }
)

# extract the confidence interval from the output and drop its column and row names

conf_int <- emee(
    data = data_binary,
    id = "userid",
    outcome = "Y",
    treatment = "A",
    moderator_formula = ~time_var1,
    control_formula = ~ time_var1 + time_var2,
    availability = "avail",
    rand_prob = "rand_prob",
    numerator_prob = 0.5,
    start = NULL,
    verbose = FALSE
)$fit$conf_int
dimnames(conf_int) <- c()

test_that(
    "check conf_int",
    {
        expect_equal(conf_int,
            matrix(c(-0.1739030, 0.3361929, 0.0602503, 0.7983763),
                nrow = 2, ncol = 2, byrow = TRUE
            ),
            tolerance = 1e-6
        )
    }
)

# extract the adjusted confidence interval from the output and drop its column and row names

conf_int_adjusted <- emee(
    data = data_binary,
    id = "userid",
    outcome = "Y",
    treatment = "A",
    moderator_formula = ~time_var1,
    control_formula = ~ time_var1 + time_var2,
    availability = "avail",
    rand_prob = "rand_prob",
    numerator_prob = 0.5,
    start = NULL,
    verbose = FALSE
)$fit$conf_int_adjusted
dimnames(conf_int_adjusted) <- c()

test_that(
    "check conf_int_adjusted",
    {
        expect_equal(conf_int_adjusted,
            matrix(c(-0.1802007, 0.3424906, 0.0510328, 0.8075939),
                nrow = 2, ncol = 2, byrow = TRUE
            ),
            tolerance = 1e-6
        )
    }
)


# test_that(
#     "check error when moderator_formula has Y in formula",
#     {
#         expect_error(
#             emee(
#                 data = data_binary,
#                 id = "userid",
#                 outcome = "Y",
#                 treatment = "A",
#                 moderator_formula = Y ~ time_var1,
#                 control_formula = ~ time_var1 + time_var2,
#                 availability = "avail",
#                 rand_prob = "rand_prob",
#                 numerator_prob = 0.5,
#                 start = NULL,
#                 verbose = FALSE
#             ),
#             "It seems like you included variables to left of ~. moderator_formula should look like ~1 or ~ mod_var1 + mod_var2."
#         )
#     }
# )