context("baggr() calls with IPD version of Rubin model")
library(baggr)

skip_on_cran()

set.seed(1990)

# Generate 8 schools like IPD data
schools_ipd <- data.frame()
N <- c(rep(10, 4), rep(20, 4))
bsl <- vector(length=8)
for(i in 1:8){
  bsl[i] <- rnorm(1, 0, 5)

  x <- rnorm(N[i])
  x <- (x-mean(x))/sd(x)
  x <- x*schools$se[i]*sqrt(N[i])/1.41 + schools$tau[i]

  y <- rnorm(N[i])
  y <- (y-mean(y))/sd(y)
  y <- y*schools$se[i]*sqrt(N[i])/1.41

  schools_ipd <- rbind(schools_ipd,
                       data.frame(group = schools$group[i], outcome = bsl[i] + x, treatment = 1),
                       # This is just so that we don't trip off prepare_ma:
                       data.frame(group = schools$group[i], outcome = bsl[i] + y, treatment = 0))
}



bg_n <- expect_warning(baggr(schools_ipd, pooling = "none", iter = 150, refresh=0))
bg_p <- expect_warning(baggr(schools_ipd, pooling = "partial", iter = 150, refresh=0))
bg_f <- expect_warning(baggr(schools_ipd, pooling = "full", iter = 150, refresh=0))


test_that("Different pooling methods work for the rubin_full model", {
  expect_is(bg_n, "baggr")
  expect_is(bg_p, "baggr")
  expect_is(bg_f, "baggr")
})

test_that("Basic operations on rubin_full model", {
  expect_error(baggr(schools_ipd, rubbish = 41))
  expect_is(pooling(bg_p)[,,1], "matrix")
  expect_is(plot(bg_p), "gg")
  expect_is(effect_plot(bg_p), "gg")
  expect_is(funnel_plot(bg_p), "gg")
  expect_is(forest_plot(bg_p), "gforge_forestplot")
  bgc <- try(baggr_compare(bg_n, bg_p, bg_f))
  expect_is(bgc, "baggr_compare")

  # No pooling gives sensible results:
  expect_equal(
    as.numeric(group_effects(bg_n,s=T)[,"mean",1]),
    schools$tau, tolerance = 1)

})

test_that("extra pooling stats work", {
  # Extra pooling checks
  # Calculation of I^2 and H^2
  i2 <- pooling(bg_p, metric = "isq")
  expect_is(i2, "array")
  expect_gte(min(i2), 0)
  expect_lte(max(i2), 1)
  h2 <- pooling(bg_p, metric = "hsq")
  expect_is(h2, "array")
  expect_gte(min(h2), 1)
  # Calculation of weights makes sense
  wt <- weights(bg_p)
  expect_is(wt, "array")
  expect_equal(dim(wt), c(3,8,1))
  expect_equal(sum(wt[2,,1]), 1)
  expect_lte(sum(wt[1,,1]), sum(wt[2,,1]))
  expect_gte(sum(wt[3,,1]), sum(wt[2,,1]))
  expect_gte(sum(wt[1,,1]), 0)
  wt2 <- pooling(bg_p, metric = "weights")
  expect_identical(wt, wt2)
})

test_that("rubin_full model crashes with nonsense inputs", {
  expect_error(baggr(schools_ipd, outcome = 2), "Arguments")
  expect_error(baggr(schools_ipd, group = 2), "Arguments")
  expect_error(baggr(schools_ipd, treatment = 2), "Arguments")
  expect_error(baggr(schools_ipd, treatment = "wrong"), "no column")
  schools_ipd2 <- schools_ipd; schools_ipd2$treatment <- as.character(schools_ipd$treatment)
  expect_error(baggr(schools_ipd2), "has to be numeric")
  schools_ipd2 <- schools_ipd; schools_ipd2$outcome <- as.character(schools_ipd$outcome)
  expect_error(baggr(schools_ipd2), "has to be numeric")

})

test_that("Using old syntax (model = full) still works", {
  bg <- expect_warning(expect_message(baggr(schools_ipd,
                                            model = "full",
                                            pooling = "none", iter = 10, refresh=0)))
  expect_is(bg, "baggr")
})


comp_flpl <- expect_warning(baggr_compare(
  schools, model = "rubin", iter = 150, what = "pooling"
))

comp_flpr <- expect_warning(baggr_compare(
  schools, model = "rubin", iter = 150, what = "prior"
))

comp_flmdls <- baggr_compare(bg_f, bg_p)

test_that("baggr comparison method works for rubin_full model", {

  expect_is(comp_flpl, "baggr_compare")
  expect_is(comp_flpr, "baggr_compare")
  expect_is(comp_flmdls, "baggr_compare")

  expect_is(testthat::capture_output(print(comp_flpl)), "character")
  expect_is(testthat::capture_output(print(comp_flpr)), "character")
  expect_is(testthat::capture_output(print(comp_flmdls)), "character")

  expect_gt(length(comp_flpl), 0)
  expect_gt(length(comp_flpr), 0)
  expect_gt(length(comp_flmdls), 0)

  expect_is(plot(comp_flpl), "gg")

  expect_is(plot(comp_flpr), "ggplot")

  expect_is(plot(comp_flmdls), "gg")

  expect_is(plot(comp_flpl, grid_models = TRUE), "gtable")

  expect_is(plot(comp_flpr, grid_models = TRUE), "gtable")

  expect_is(plot(comp_flmdls, grid_models = TRUE), "gtable")
})

test_that("rubin_full cross-validation works", {
  # Missing held-out site controls in training should now fail clearly.
  expect_error(
    baggr(subset(schools_ipd, group != "School A"), iter = 20, refresh = 0,
          test_data = subset(schools_ipd, group == "School A" & treatment == 1)),
    "all test groups must be present in training data"
  )

  # Correct CV split: keep controls for held-out site in training,
  # evaluate treated rows for that site in test_data.
  bg <- expect_warning(
    baggr(subset(schools_ipd, group != "School A" | treatment == 0),
          iter = 20, refresh = 0,
          test_data = subset(schools_ipd, group == "School A" & treatment == 1))
  )
  expect_is(bg, "baggr")
  expect_gt(bg$mean_lpd, 0)
})

test_that("rubin_full CV handles numeric/character groups and K_test is correct", {
  run_cv_case <- function(df, held_out_groups) {
    train_data <- subset(df, !(group %in% held_out_groups) | treatment == 0)
    test_data <- subset(df, group %in% held_out_groups & treatment == 1)

    bg <- expect_warning(
      baggr(train_data, iter = 20, chains = 1, refresh = 0, test_data = test_data)
    )

    expect_is(bg, "baggr")
    expect_true(is.finite(bg$mean_lpd))
    expect_equal(bg$inputs$K_test, length(unique(test_data$group)))
    expect_equal(length(unique(bg$inputs$test_site)), bg$inputs$K_test)
  }

  schools_ipd_num <- schools_ipd
  schools_ipd_num$group <- as.numeric(factor(schools_ipd_num$group, levels = unique(schools_ipd_num$group)))

  run_cv_case(schools_ipd, "School A")
  run_cv_case(schools_ipd, c("School A", "School C", "School E"))
  run_cv_case(schools_ipd_num, 1)
  run_cv_case(schools_ipd_num, c(1, 3, 5))
})

test_that("full-data test sigma uses SD and not SE", {
  train <- data.frame(
    group = c(rep("Train", 8), rep(c("TestA", "TestB"), each = 2)),
    outcome = c(-0.5, -0.2, 0.1, 0.4, 1.4, 1.8, 2.2, 2.5, 0.8, 1.1, 0.3, 0.9),
    treatment = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0)
  )
  test <- data.frame(
    group = rep(c("TestA", "TestB"), each = 4),
    outcome = c(1, 2, 3, 4, 0.5, 1.5, 1.7, 2.3),
    treatment = 1
  )

  expected_sd <- tapply(test$outcome, test$group, sd)

  inp_rubin <- convert_inputs(train, model = "rubin_full", quantiles = seq(.05, .95, .1),
                              test_data = test, outcome = "outcome",
                              group = "group", treatment = "treatment")
  inp_mutau <- convert_inputs(train, model = "mutau_full", quantiles = seq(.05, .95, .1),
                              test_data = test, outcome = "outcome",
                              group = "group", treatment = "treatment")

  expect_equal(as.numeric(inp_rubin$test_sigma_y_k), as.numeric(expected_sd))
  expect_equal(as.numeric(inp_mutau$test_sigma_y_k), as.numeric(expected_sd))
})

test_that("rubin_full predictive log density matches analytic Normal density scale", {
  train <- data.frame(
    group = c(rep("Train1", 8), rep("Train2", 8), rep("Test", 4)),
    outcome = c(-0.5, -0.2, 0.1, 0.4, 1.4, 1.8, 2.2, 2.5,
                -0.3, 0.0, 0.2, 0.6, 1.1, 1.7, 2.0, 2.4,
                0.2, 0.6, 0.9, 1.1),
    treatment = c(rep(c(0, 0, 0, 0, 1, 1, 1, 1), 2), 0, 0, 0, 0)
  )
  test <- data.frame(
    group = rep("Test", 4),
    outcome = c(1, 2, 3, 4),
    treatment = 1
  )
  test_sd <- sd(test$outcome)
  test_se <- test_sd / sqrt(nrow(test))

  bg <- expect_warning(
    baggr(train,
          model = "rubin_full",
          pooling = "full",
          pooling_control = "remove",
          prior_hypermean = normal(0, 5),
          prior_sigma = uniform(0.1, 5),
          test_data = test,
          iter = 250, warmup = 125, chains = 2, refresh = 0, seed = 137)
  )

  mu_draw <- rstan::extract(bg$fit, "mu")[[1]][,1]
  logpd <- rstan::extract(bg$fit, "logpd[1]")[[1]]

  expected_sd_logpd <- vapply(mu_draw, function(m) {
    sum(dnorm(test$outcome, mean = m, sd = test_sd, log = TRUE))
  }, numeric(1))
  expected_se_logpd <- vapply(mu_draw, function(m) {
    sum(dnorm(test$outcome, mean = m, sd = test_se, log = TRUE))
  }, numeric(1))

  expect_equal(as.numeric(logpd), as.numeric(expected_sd_logpd), tolerance = 1e-6)
  expect_gt(mean(abs(logpd - expected_se_logpd)), 0.1)
})



# covariates ------
sa <- schools_ipd
sa$a <- rnorm(nrow(schools_ipd))
sa$b <- rnorm(nrow(schools_ipd))
sa$c <- sample(c("A", "B", "C"), nrow(schools_ipd), replace = T)

sb <- sa
sb$b <- NULL

test_that("rubin_full warns about covariates constant within every site", {
  site_cov <- data.frame(
    group = rep(1:2, each = 2),
    x = c(0, 0, 1, 1)
  )

  expect_warning(
    .warn_constant_within_site_covariates(site_cov, "x", "group"),
    "covariates x are constant within every site"
  )
})

test_that("rubin_full does not warn when covariate varies within a site", {
  individual_cov <- data.frame(
    group = rep(1:2, each = 2),
    x = c(0, 1, 1, 1)
  )

  expect_warning(
    .warn_constant_within_site_covariates(individual_cov, "x", "group"),
    NA
  )
})

test_that("rubin_full constant within-site covariate warning names only bad covariates", {
  mixed_cov <- data.frame(
    group = rep(1:2, each = 2),
    x = c(0, 0, 1, 1),
    z = c(0, 1, 1, 1)
  )

  expect_warning(
    .warn_constant_within_site_covariates(mixed_cov, c("x", "z"), "group"),
    regexp = "covariates x are constant within every site"
  )
  warning_msg <- NULL
  withCallingHandlers(
    .warn_constant_within_site_covariates(mixed_cov, c("x", "z"), "group"),
    warning = function(w) {
      warning_msg <<- conditionMessage(w)
      invokeRestart("muffleWarning")
    }
  )
  expect_match(warning_msg, "covariates x are constant within every site")
  expect_false(grepl("z", warning_msg, fixed = TRUE))
})

test_that("rubin_full constant within-site covariate warning ignores missing values", {
  missing_cov <- data.frame(
    group = rep(1:2, each = 2),
    x = c(0, NA, 1, NA)
  )

  expect_warning(
    .warn_constant_within_site_covariates(missing_cov, "x", "group"),
    "covariates x are constant within every site"
  )
})

test_that("Model with covariates works fine", {
  bg_cov <- expect_warning(
    baggr(sa, covariates = c("a", "b", "c"), iter = 150, chains = 1, refresh = 0))

  expect_is(bg_cov, "baggr")
  expect_error(baggr(sa, covariates = c("made_up_covariates")), "are not columns")
  expect_error(baggr(sa, covariates = c("a", "b", "made_up_covariates")), "are not columns")
  expect_length(bg_p$covariates, 0)
  expect_length(bg_cov$covariates, 3)
  expect_null(bg_cov$mean_lpd)

  # Fixed effects extraction
  expect_is(fixed_effects(bg_cov), "matrix")
  expect_is(fixed_effects(bg_cov, transform = exp), "matrix")
  expect_equal(dim(fixed_effects(bg_cov, summary = TRUE)), c(4,5,1))
  expect_equal(dim(fixed_effects(bg_cov, summary = FALSE))[2], 4)
})



test_that("Fixed within-study covariates are added to summary_data", {
  sa_fixed <- schools_ipd
  sa_fixed$site_cov <- as.numeric(factor(sa_fixed$group))
  bg_cov_fixed <- expect_warning(
    baggr(sa_fixed, covariates = c("site_cov"), iter = 150, chains = 1, refresh = 0)
  )

  expect_true("site_cov" %in% names(bg_cov_fixed$summary_data))
  expected <- sa_fixed$site_cov[match(bg_cov_fixed$summary_data$group, sa_fixed$group)]
  expect_equal(bg_cov_fixed$summary_data$site_cov, expected)
})


test_that("Within-study varying covariates are reported as not meta-regression", {
  sa_vary <- schools_ipd
  sa_vary$ind_cov <- rnorm(nrow(sa_vary))

  bg_cov_vary <- expect_message(
    suppressWarnings(
      baggr(sa_vary, covariates = c("ind_cov"),
            iter = 150, chains = 1, refresh = 0, warn = FALSE)
    ),
    "Covariate ind_cov varies within studies. Model fitting will work but is not a meta-regression."
  )
  expect_is(bg_cov_vary, "baggr")
})
bg_pr <- expect_warning(baggr(schools_ipd,
                              pooling = "partial",
                              pooling_control = "remove",
                              iter = 150,
                              refresh=0))
bg_pn <- expect_warning(baggr(schools_ipd,
                              pooling = "partial",
                              pooling_control = "none",
                              iter = 150,
                              refresh=0))
test_that("You can change pooling on values in control group", {
  expect_is(bg_pr, "baggr")
  expect_is(bg_pn, "baggr")
  bsl_k <- apply(rstan::extract(bg_pr$fit, "baseline_k")[[1]], 2, mean)
  expect_length(bsl_k, 8)
  expect_equal(bsl_k, rep(0, 8))
  bsl_k <- apply(rstan::extract(bg_pn$fit, "baseline_k")[[1]], 2, mean)
  expect_length(bsl_k, 8)
  expect_equal(bsl_k, bsl, tolerance = 1)
})