# Tests for Bayesian GLM mixture (real Stan MCMC)
# Part 1: glmMixBayes() basic object validity on a simple synthetic mixture dataset.
#
# NOTE:
# - skipped on CRAN
# - opt-in by setting RUN_STAN_TESTS=true
#
# Supported families (glmMixBayes): gaussian, poisson, binomial, gamma
#
# Example:
#   Sys.setenv(RUN_STAN_TESTS = "true")
#   devtools::test()

local_edition(3)

# ------------------------------------------------------------------------------
# Helper: generate simple synthetic 2-component mixture GLM data
# (in the spirit of generate_synthetic_mixture_data.R)
# ------------------------------------------------------------------------------
generate_bayesglm_mixture_data <- function(
  family = "gaussian",
  seed = 123,
  n = 120,
  theta = 0.6
) {
 family <- tolower(trimws(as.character(family)[1]))
 if (!family %in% c("gaussian", "poisson", "binomial", "gamma")) {
  stop("family must be one of gaussian/poisson/binomial/gamma", call. = FALSE)
 }

 set.seed(seed)

 # Design: intercept + one continuous + one binary predictor
 x1 <- stats::runif(n, -2, 2)
 x2 <- stats::rbinom(n, 1, 0.5)
 X  <- cbind(1, x1, x2)
 colnames(X) <- c("(Intercept)", "x1", "x2")

 # Mixture membership: 1 or 2
 z <- stats::rbinom(n, 1, theta) + 1L

 # Choose component-specific parameters with decent separation
 if (family == "gaussian") {
  beta1 <- c(0.2,  1.0, -0.6)
  beta2 <- c(1.5, -0.8,  0.9)
  sigma1 <- 0.7
  sigma2 <- 1.1

  eta1 <- drop(X %*% beta1)
  eta2 <- drop(X %*% beta2)
  mu   <- ifelse(z == 1L, eta1, eta2)
  sd   <- ifelse(z == 1L, sigma1, sigma2)
  y    <- stats::rnorm(n, mean = mu, sd = sd)

  truth <- list(beta1 = beta1, beta2 = beta2,
                dispersion1 = sigma1^2, dispersion2 = sigma2^2, theta = theta)

 } else if (family == "poisson") {
  beta1 <- c(-0.2, 0.4, -0.3)
  beta2 <- c( 0.8, 0.2,  0.5)

  eta1 <- drop(X %*% beta1)
  eta2 <- drop(X %*% beta2)
  lam  <- ifelse(z == 1L, exp(eta1), exp(eta2))
  y    <- stats::rpois(n, lambda = lam)

  truth <- list(beta1 = beta1, beta2 = beta2, theta = theta)

 } else if (family == "binomial") {
  beta1 <- c(-0.5, 0.8, -0.6)
  beta2 <- c( 0.7,-0.4,  0.9)

  eta1 <- drop(X %*% beta1)
  eta2 <- drop(X %*% beta2)
  p    <- ifelse(z == 1L, stats::plogis(eta1), stats::plogis(eta2))
  y    <- stats::rbinom(n, size = 1, prob = p)

  truth <- list(beta1 = beta1, beta2 = beta2, theta = theta)

 } else if (family == "gamma") {
  # Use mean = exp(eta), with component-specific shape (phi)
  beta1 <- c(0.1, 0.3, -0.2)
  beta2 <- c(0.6,-0.2,  0.4)
  phi1  <- 3.0
  phi2  <- 2.0

  eta1  <- drop(X %*% beta1)
  eta2  <- drop(X %*% beta2)
  mu    <- ifelse(z == 1L, exp(eta1), exp(eta2))

  shape <- ifelse(z == 1L, phi1, phi2)
  rate  <- shape / mu
  y     <- stats::rgamma(n, shape = shape, rate = rate)

  truth <- list(beta1 = beta1, beta2 = beta2,
                dispersion1 = 1/phi1, dispersion2 = 1/phi2, theta = theta)
 }

 list(X = X, y = y, truth = truth)
}

# ------------------------------------------------------------------------------
# Test 1: glmMixBayes basic validity
# ------------------------------------------------------------------------------
test_that("glmMixBayes runs real MCMC and returns a valid object", {
 skip_on_cran()
 skip_if_not_installed("rstan")
 skip_if_not_installed("label.switching")

 if (!identical(Sys.getenv("RUN_STAN_TESTS"), "true")) {
  skip("Set RUN_STAN_TESTS=true to run real Stan MCMC tests.")
 }

 rstan::rstan_options(auto_write = TRUE)
 options(mc.cores = 1L)

 # Document supported family types explicitly
 supported_families <- c("gaussian", "poisson", "binomial", "gamma")
 expect_error(glmMixBayes(X = matrix(1, 3, 1), y = rnorm(3), family = "badfamily"))
 expect_true(all(supported_families %in% c("gaussian","poisson","binomial","gamma")))

 # Keep this test simple + stable: run ONE family (gaussian) on mixture-like data
 dat <- generate_bayesglm_mixture_data(family = "gaussian", seed = 123, n = 100, theta = 0.6)
 X <- dat$X
 y <- dat$y

 custom_priors <- list(
  beta1 = "normal(0,3)",
  beta2 = "normal(0,4)",
  sigma1 = "cauchy(0,1.5)",
  sigma2 = "cauchy(0,2)",
  theta = "beta(2,2)"
 )

 fit <- glmMixBayes(
  X = X,
  y = y,
  family = "gaussian",
  priors = custom_priors,
  control = list(
   iterations = 2000,
   burnin.iterations = 1000,
   seed = 123,
   cores = 1,
   adapt_delta = 0.99,
   max_treedepth = 12
  )
 )

 expect_s3_class(fit, "glmMixBayes")

 # Basic structure checks (match your existing conventions)
 expect_true(is.matrix(fit$m_samples))
 expect_equal(ncol(fit$m_samples), nrow(X))

 expect_true(is.list(fit$estimates))
 expect_true(is.matrix(fit$estimates$coefficients))
 expect_true(is.matrix(fit$estimates$m.coefficients))
 expect_equal(ncol(fit$estimates$coefficients), ncol(X))
 expect_equal(ncol(fit$estimates$m.coefficients), ncol(X))

 if (!is.null(colnames(X))) {
  expect_equal(colnames(fit$estimates$coefficients), colnames(X))
  expect_equal(colnames(fit$estimates$m.coefficients), colnames(X))
 }

 # Dispersion present for gaussian (and gamma)
 expect_true("dispersion" %in% names(fit$estimates))
 expect_true("m.dispersion" %in% names(fit$estimates))
})