# G-computation library(dplyr) library(stdReg2) # test_that("different combinations of covariates in formula", { load(test_path("testdata/BC_ALD.RData")) load(test_path("testdata/AC_IPD.RData")) BC_ALD <- reshape_ald_to_long(BC_ALD) AC_IPD$trt <- factor(AC_IPD$trt, labels = c("C", "A")) # from 0, 1 ### gcomp_ml expect_error(strategy_gcomp_ml(formula = as.formula("y ~ 1")), regexp = "Treatment term 'trt' is missing in the formula") expect_error(strategy_gcomp_ml(formula = as.formula("y ~ X3 + X4"), trt_var = "trt"), regexp = "Treatment term 'trt' is missing in the formula") strat_1234 <- strategy_gcomp_ml(formula = as.formula("y ~ X3 + X4 + trt*X1 + trt*X2")) strat_31 <- strategy_gcomp_ml(formula = as.formula("y ~ X3 + trt*X1")) strat_13 <- strategy_gcomp_ml(formula = as.formula("y ~ trt*X1 + X3")) strat_1 <- strategy_gcomp_ml(formula = as.formula("y ~ trt*X1")) res <- outstandR(ipd_trial = AC_IPD, ald_trial = BC_ALD, strategy = strat_1234) expect_length(res, 2) # expect_equal(outstandR(AC_IPD, BC_ALD, strategy = strat_31)) # expect_equal(outstandR(AC_IPD, BC_ALD, strategy = strat_13)) # expect_equal(outstandR(AC_IPD, BC_ALD, strategy = strat_1)) ### gcomp_stan expect_error(strategy_gcomp_stan(formula = as.formula("y ~ 1")), regexp = "Treatment term 'trt' is missing in the formula") expect_error(strategy_gcomp_stan(formula = as.formula("y ~ X3 + X4"), trt_var = "trt"), regexp = "Treatment term 'trt' is missing in the formula") strat_1234 <- strategy_gcomp_stan(formula = as.formula("y ~ X3 + X4 + trt*X1 + trt*X2")) strat_31 <- strategy_gcomp_stan(formula = as.formula("y ~ X3 + trt*X1")) strat_13 <- strategy_gcomp_stan(formula = as.formula("y ~ trt*X1 + X3")) strat_1 <- strategy_gcomp_stan(formula = as.formula("y ~ trt*X1")) res <- outstandR(ipd_trial = AC_IPD, ald_trial = BC_ALD, strategy = strat_1234) expect_length(res, 2) # expect_equal(outstandR(AC_IPD, BC_ALD, strategy = strat_31)) # expect_equal(outstandR(AC_IPD, BC_ALD, strategy = strat_13)) # expect_equal(outstandR(AC_IPD, BC_ALD, strategy = strat_1)) }) test_that("compare with stdReg2 package for continuous outcome", { ## original library(causaldata) # load dataset nhefs_dat <- causaldata::nhefs_complete # m <- glm(wt82_71 ~ qsmk + sex + age, # data = nhefs_dat) res_stdReg2 <- stdReg2::standardize_glm( wt82_71 ~ qsmk + sex + age, data = nhefs_dat, values = list(qsmk = c(0,1)), family = "gaussian", contrasts = c("difference", "ratio"), reference = 0) ## {outstandR} nhefs_ipd <- nhefs_dat |> dplyr::select(qsmk, sex, age, wt82_71) |> dplyr::rename(trt = qsmk, y = wt82_71) |> dplyr::mutate(sex = as.numeric(sex) - 1, trt = factor(trt, labels = c("C", "A"))) lin_form <- as.formula(y ~ trt * (sex + age)) # create aggregate data nhefs.X <- nhefs_ipd |> dplyr::summarise(across(-c(trt, y), list(mean = mean, sd = sd), .names = "{fn}.{col}")) |> reshape2::melt() |> tidyr::separate(variable, into = c("statistic", "variable"), sep = "\\.") |> dplyr::mutate(trt = NA) ald_out <- nhefs_ipd |> group_by(trt) |> dplyr::summarise( y.sum = sum(y), y.mean = mean(y), y.sd = sd(y), .N = dplyr::n()) |> reshape2::melt(id.vars = "trt") |> tidyr::separate(variable, into = c("variable", "statistic"), sep = "\\.") |> dplyr::mutate(trt = ifelse(trt == "A", "B", "C")) nhefs_ald <- dplyr::bind_rows(nhefs.X, ald_out) |> tibble::as_tibble() |> dplyr::mutate(trt = factor(trt)) res_outstandr <- gcomp_ml_means( formula = lin_form, family = "gaussian", ipd = nhefs_ipd, ald = nhefs_ald, ref_trt = "C", comp_trt = "A", trt_var = "trt", N = 100000) # means stdReg2_estimates <- res_stdReg2$res$estimates expect_equal(unname(res_outstandr["0"]), stdReg2_estimates$estimates[stdReg2_estimates$qsmk == 0], tolerance = 0.1) expect_equal(unname(res_outstandr["1"]), stdReg2_estimates$estimates[stdReg2_estimates$qsmk == 1], tolerance = 0.1) # std errors ##TODO: # different output scales expect_equal( res_stdReg2$res_contrast[[2]]$est_table$Estimate[2], calculate_ate(mean_comp = unname(res_outstandr["1"]), mean_ref = unname(res_outstandr["0"]), effect = "risk_difference"), tolerance = 0.01) expect_equal( res_stdReg2$res_contrast[[3]]$est_table$Estimate[2], exp(calculate_ate(mean_comp = unname(res_outstandr["1"]), mean_ref = unname(res_outstandr["0"]), effect = "log_relative_risk")), tolerance = 0.01) }) test_that("compare with stdReg2 package for binary outcome", { # simulate data n <- 1000 data <- data.frame( age = rnorm(n, mean = 50, sd = 10), sex = rbinom(n, 1, 0.5), trt = rbinom(n, 1, 0.5)) # generate a binary outcome with logit link logit_p <- -1 + data$trt * (0.03 * data$age + 0.2 * data$sex) prob <- 1 / (1 + exp(-logit_p)) data$y <- rbinom(n, 1, prob) res_stdReg2 <- standardize_glm( y ~ trt + age + sex, family = "binomial", data = data, values = list(trt = c(0,1)), contrasts = c("difference", "ratio"), reference = 0) ## {outstandR} # create aggregate data data.X <- data |> dplyr::summarise(across(-c(trt, y), list(mean = mean, sd = sd), .names = "{fn}.{col}")) |> reshape2::melt() |> tidyr::separate(variable, into = c("statistic", "variable"), sep = "\\.") |> dplyr::mutate(trt = NA) ald_out <- data |> group_by(trt) |> dplyr::summarise( y.sum = sum(y), y.mean = mean(y), y.sd = sd(y), .N = dplyr::n()) |> reshape2::melt(id.vars = "trt") |> tidyr::separate(variable, into = c("variable", "statistic"), sep = "\\.") |> dplyr::mutate(trt = ifelse(trt == "1", "B", "C")) data_ald <- bind_rows(data.X, ald_out) |> tibble::as_tibble() |> dplyr::mutate(trt = factor(trt)) data$trt <- factor(data$trt, labels = c("C", "A")) lin_form <- as.formula(y ~ trt * (sex + age)) res_outstandr <- gcomp_ml_means( formula = lin_form, family = "binomial", ipd = data, ald = data_ald, trt_var = "trt", comp_trt = "A", ref_trt = "C", N = 10000) # means stdReg2_estimates <- res_stdReg2$res$estimates expect_equal(unname(res_outstandr["0"]), stdReg2_estimates$estimates[stdReg2_estimates$trt == 0], tolerance = 0.1) expect_equal(unname(res_outstandr["1"]), stdReg2_estimates$estimates[stdReg2_estimates$trt == 1], tolerance = 0.1) # std errors ##TODO: # different output scales expect_equal( res_stdReg2$res_contrast[[2]]$est_table$Estimate[2], calculate_ate(mean_comp = unname(res_outstandr["1"]), mean_ref = unname(res_outstandr["0"]), effect = "risk_difference"), tolerance = 0.01) expect_equal( res_stdReg2$res_contrast[[3]]$est_table$Estimate[2], exp(calculate_ate(mean_comp = unname(res_outstandr["1"]), mean_ref = unname(res_outstandr["0"]), effect = "log_relative_risk")), tolerance = 0.01) }) test_that("compare with marginaleffects package for binary outcome", { # simulate data n <- 1000 data <- data.frame( age = rnorm(n, mean = 50, sd = 10), sex = rbinom(n, 1, 0.5), trt = rbinom(n, 1, 0.5)) # generate a binary outcome with logit link logit_p <- -1 + data$trt * (0.03 * data$age + 0.2 * data$sex) prob <- 1 / (1 + exp(-logit_p)) data$y <- rbinom(n, 1, prob) data$trt <- factor(data$trt, labels = c("C", "A")) lin_form <- as.formula(y ~ trt * (sex + age)) fit <- glm(lin_form, family = binomial(link = "logit"), data = data) m_eff_pred <- marginaleffects::avg_predictions(fit, variables = "trt", by = "trt") ## {outstandR} # create aggregate data data.X <- data |> dplyr::summarise(across(-c(trt, y), list(mean = mean, sd = sd), .names = "{fn}.{col}")) |> reshape2::melt() |> tidyr::separate(variable, into = c("statistic", "variable"), sep = "\\.") |> dplyr::mutate(trt = NA) ald_out <- data |> group_by(trt) |> dplyr::summarise( y.sum = sum(y), y.mean = mean(y), y.sd = sd(y), .N = dplyr::n()) |> reshape2::melt(id.vars = "trt") |> tidyr::separate(variable, into = c("variable", "statistic"), sep = "\\.") |> dplyr::mutate(trt = ifelse(trt == "1", "B", "C")) data_ald <- bind_rows(data.X, ald_out) |> tibble::as_tibble() |> dplyr::mutate(trt = factor(trt)) data$trt <- factor(data$trt, labels = c("C", "A")) res_outstandr <- gcomp_ml_means( formula = lin_form, family = "binomial", ipd = data, ald = data_ald, trt_var = "trt", comp_trt = "A", ref_trt = "C", N = 10000) # means estimates <- m_eff_pred$estimate expect_equal(unname(res_outstandr["0"]), estimates[1], tolerance = 0.1) expect_equal(unname(res_outstandr["1"]), estimates[2], tolerance = 0.1) }) test_that("compare with marginaleffects package for continuous outcome", { ## original library(causaldata) # load dataset nhefs_dat <- causaldata::nhefs_complete fit <- glm(wt82_71 ~ qsmk * (sex + age), family = "gaussian", data = nhefs_dat) m_eff_pred <- marginaleffects::avg_predictions(fit, variables = "qsmk", by = "qsmk") ## {outstandR} nhefs_ipd <- nhefs_dat |> select(qsmk, sex, age, wt82_71) |> rename(trt = qsmk, y = wt82_71) |> mutate(sex = as.numeric(sex) - 1, trt = factor(trt, labels = c("C", "A"))) lin_form <- as.formula(y ~ trt * (sex + age)) # create aggregate data nhefs.X <- nhefs_ipd |> dplyr::summarise(across(-c(trt, y), list(mean = mean, sd = sd), .names = "{fn}.{col}")) |> reshape2::melt() |> tidyr::separate(variable, into = c("statistic", "variable"), sep = "\\.") |> dplyr::mutate(trt = NA) ald_out <- nhefs_ipd |> group_by(trt) |> dplyr::summarise( y.sum = sum(y), y.mean = mean(y), y.sd = sd(y), .N = dplyr::n()) |> reshape2::melt(id.vars = "trt") |> tidyr::separate(variable, into = c("variable", "statistic"), sep = "\\.") |> dplyr::mutate(trt = ifelse(trt == "1", "B", "C")) nhefs_ald <- bind_rows(nhefs.X, ald_out) |> tibble::as_tibble() |> dplyr::mutate(trt = factor(trt)) res_outstandr <- gcomp_ml_means( formula = lin_form, family = "gaussian", ipd = nhefs_ipd, ald = nhefs_ald, trt_var = "trt", comp_trt = "A", ref_trt = "C", N = 100000) # means expect_equal(unname(res_outstandr["0"]), m_eff_pred$estimate[1], tolerance = 0.1) expect_equal(unname(res_outstandr["1"]), m_eff_pred$estimate[2], tolerance = 0.1) # std errors ##TODO: # different output scales }) # test_that("mismatch between covariates in ald and ipd / formula", { # different order between ald and ipd # age sex; sex age # different order between ald and formula # age + sex; mean.sex mean.age # same covariates as EM and PF # age + trt * (age) # transformed covariates in formula # I(X1^2) })