test_that("Binary posterior intervals are finite, shrink with n, and coverage is non-degenerate", {
  skip_if_not_installed("RBesT")
  
  set.seed(201)
  
  dose_levels <- c(0, 1, 2, 4)
  p_true      <- c(0.10, 0.20, 0.35, 0.55)
  theta_true  <- qlogis(p_true)
  
  prior_list <- setNames(
    lapply(seq_along(dose_levels), function(i) {
      RBesT::mixnorm(comp1 = c(w = 1, m = 0, s = 3), sigma = 3)
    }),
    c("Ctr", paste0("DG_", seq_along(dose_levels[-1])))
  )
  
  run_replications <- function(n_per_dose, R) {
    covered <- matrix(FALSE, nrow = R, ncol = length(dose_levels))
    widths  <- matrix(NA_real_, nrow = R, ncol = length(dose_levels))
    
    for (r in seq_len(R)) {
      dat <- data.frame(
        simulation = 1L,
        dose       = rep(dose_levels, each = n_per_dose),
        response   = unlist(lapply(p_true, function(p) rbinom(n_per_dose, 1, p)))
      )
      attr(dat, "probability_scale") <- TRUE
      
      post <- getPosterior(
        prior_list        = prior_list,
        data              = dat,
        probability_scale = TRUE
      )
      
      lo <- as.numeric(sapply(post, function(m) stats::quantile(m, probs = 0.025)))
      hi <- as.numeric(sapply(post, function(m) stats::quantile(m, probs = 0.975)))
      
      # Always-required sanity
      if (!all(is.finite(lo)) || !all(is.finite(hi)) || !all(lo <= hi)) {
        return(list(ok = FALSE))
      }
      
      widths[r, ]  <- (hi - lo)
      covered[r, ] <- (theta_true >= lo) & (theta_true <= hi)
    }
    
    cov_rate <- colMeans(covered)
    width    <- colMeans(widths)
    
    list(
      ok = TRUE,
      cov_rate = cov_rate,
      width = width,
      cov_mean = mean(cov_rate),
      width_mean = mean(width),
      cov_min = min(cov_rate),
      cov_max = max(cov_rate)
    )
  }
  
  # Keep runtime reasonable but stable
  res_small <- run_replications(n_per_dose = 40,  R = 40)
  res_big   <- run_replications(n_per_dose = 200, R = 40)
  
  expect_true(res_small$ok)
  expect_true(res_big$ok)
  
  # 1) Width must shrink with more data (core statistical plausibility)
  expect_true(res_big$width_mean < res_small$width_mean)
  
  # 2) Coverage must be non-degenerate overall (not ~0, not identically 1)
  #    These are intentionally loose to accommodate approximate posteriors.
  expect_true(res_small$cov_mean > 0.05)
  expect_true(res_small$cov_mean < 0.999)
  
  expect_true(res_big$cov_mean > 0.05)
  expect_true(res_big$cov_mean < 0.999)
  
  # 3) It should not get dramatically worse with more data in aggregate
  #    (This catches regressions where CI computation breaks asymptotically.)
  expect_true(res_big$cov_mean + 0.20 >= res_small$cov_mean)
  
  # 4) Guard against "everything always covers" for all doses at large n
  #    If max coverage is 1.0 that's fine, but not all doses should be ~1 simultaneously.
  expect_true(mean(res_big$cov_rate > 0.999) < 0.75)
})