# ======================================================================
# tests/testthat/test-glmmFEL-matrix-only.R
#
# Tests for the *simplified* glmmFEL development setting:
#   - Matrix-only interface: glmmFEL(y, X, Z, ...)
#   - Single variance component: G = tau2 * I_q (one tau2 scalar)
#   - Keep FE mean/full machinery + RSPL/MSPL (PQL) machinery
#   - Convert dense X/Z to sparse immediately
#
# Coverage:
#   (1) Derivative equivalence vs mvglmmrank rder*/jder* (moderate q-range)
#   (2) FE trace corrections for diag-G match finite differences (tilting)
#   (3) Smoke fits for Laplace / FE_mean / FE_full and RSPL/MSPL
#   (4) Multi-membership Z smoke (2 nonzeros per row)
#   (5) Dense->sparse coercion
#   (6) Smoke fits for binomial_logit across Laplace/FE/PL
# ======================================================================

testthat::local_edition(3)

# ---- helpers ----

make_Z_oneway <- function(id, n_id = max(as.integer(id))) {
  n <- length(id)
  Matrix::sparseMatrix(
    i = seq_len(n),
    j = as.integer(id),
    x = 1,
    dims = c(n, n_id)
  )
}

simulate_oneway <- function(family = c("binomial_probit", "binomial_logit", "poisson_log"),
                            n_id = 30L,
                            m_per_id = 6L,
                            beta = c(0.2, 0.7),
                            tau2 = 0.5,
                            seed = NULL) {
  family <- match.arg(family)
  if (!is.null(seed)) set.seed(seed)

  id <- factor(rep(seq_len(n_id), each = m_per_id))
  n  <- length(id)

  x <- stats::rnorm(n)
  X <- stats::model.matrix(~ x)

  Z <- make_Z_oneway(id, n_id)

  eta <- stats::rnorm(n_id, sd = sqrt(tau2))
  lp  <- as.vector(X %*% beta + Z %*% eta)

  y <- switch(
    family,
    binomial_probit = stats::rbinom(n, 1, stats::pnorm(lp)),
    binomial_logit  = stats::rbinom(n, 1, stats::plogis(lp)),
    poisson_log     = stats::rpois(n, exp(lp))
  )

  list(y = y, X = X, Z = Z, id = id, x = x, truth = list(beta = beta, tau2 = tau2))
}

simulate_multimember_2perrow <- function(family = c("binomial_probit", "binomial_logit", "poisson_log"),
                                         n_re = 12L,
                                         n_obs = 60L,
                                         beta = 0.0,
                                         tau2 = 0.7,
                                         seed = NULL) {
  # Multi-membership: each row has +1 for "home", -1 for "away"
  family <- match.arg(family)
  if (!is.null(seed)) set.seed(seed)

  teams <- seq_len(n_re)
  home <- sample(teams, n_obs, replace = TRUE)
  away <- sample(teams, n_obs, replace = TRUE)
  # enforce home != away
  same <- home == away
  while (any(same)) {
    away[same] <- sample(teams, sum(same), replace = TRUE)
    same <- home == away
  }

  i <- rep(seq_len(n_obs), times = 2)
  j <- c(home, away)
  x <- c(rep(1, n_obs), rep(-1, n_obs))

  Z <- Matrix::sparseMatrix(i = i, j = j, x = x, dims = c(n_obs, n_re))
  X <- matrix(1, n_obs, 1)
  colnames(X) <- "(Intercept)"

  eta <- stats::rnorm(n_re, sd = sqrt(tau2))
  lp  <- as.vector(X %*% beta + Z %*% eta)

  y <- switch(
    family,
    binomial_probit = stats::rbinom(n_obs, 1, stats::pnorm(lp)),
    binomial_logit  = stats::rbinom(n_obs, 1, stats::plogis(lp)),
    poisson_log     = stats::rpois(n_obs, exp(lp))
  )

  list(y = y, X = X, Z = Z, truth = list(beta = beta, tau2 = tau2))
}



# ---- (2) FE trace diag-G finite-difference validation ----

testthat::test_that("fe_trace_diagG matches finite-difference tilting derivatives (sign check)", {
  testthat::skip_on_cran()  # safe: this is a numeric finite-difference validation

  set.seed(123)
  n <- 120
  q <- 25
  tau2 <- 0.8

  # Build an arbitrary sparse-ish Z
  Z <- Matrix::rsparsematrix(n, q, density = 0.08)
  Z <- Matrix::drop0(Z)

  # One fixed effect (intercept) to keep it simple here
  X <- matrix(1, n, 1)
  beta <- 0.0

  # Simulate eta, then y from probit
  eta_true <- stats::rnorm(q, sd = sqrt(tau2))
  lin_true <- as.vector(X %*% beta + Z %*% eta_true)
  y <- stats::rbinom(n, 1, stats::pnorm(lin_true))

  # === Helpers needed for finite-difference tilting ===
  derivs <- glmmFEL:::fe_derivatives_binomial_probit()
  d1 <- derivs$d1; d2 <- derivs$d2; d3 <- derivs$d3; d4 <- derivs$d4

  Ginv <- Matrix::Diagonal(q, x = rep(1 / tau2, q))

  # Negative log posterior up to constants (tilted by c):
  # f_c(eta) = -sum log Phi(s * (Xb + Z eta)) + 0.5 eta'Ginv eta - c'eta
  # where s = +1 for y=1, s=-1 for y=0
  f_c <- function(eta, c) {
    s <- ifelse(y == 1, 1, -1)
    lin <- as.vector(X %*% beta + Z %*% eta)
    qv  <- s * lin
    nll <- -sum(stats::pnorm(qv, log.p = TRUE))
    prior <- 0.5 * as.numeric(crossprod(eta, as.matrix(Ginv %*% eta)))
    tilt  <- -as.numeric(crossprod(c, eta))
    nll + prior + tilt
  }

  grad_c <- function(eta, c) {
    s <- ifelse(y == 1, 1, -1)
    lin <- as.vector(X %*% beta + Z %*% eta)
    qv  <- s * lin
    # d/deta [-log Phi(qv)] = -Z' [ s * d1(qv) ]
    g_like <- -Matrix::crossprod(Z, s * d1(qv))
    g_prior <- as.matrix(Ginv %*% eta)
    g_tilt  <- -c
    as.vector(g_like + g_prior + g_tilt)
  }

  hess_c <- function(eta, c) {
    s <- ifelse(y == 1, 1, -1)
    lin <- as.vector(X %*% beta + Z %*% eta)
    qv  <- s * lin
    # Second derivative of -log Phi(qv) w.r.t. linear predictor is (-d2(qv)) >= 0
    W <- -d2(qv)
    # Hessian: Z' diag(W) Z + Ginv
    Matrix::crossprod(Z, W * Z) + Ginv
  }

  solve_mode <- function(c, eta_init = rep(0, q), maxit = 60, tol = 1e-10) {
    eta <- eta_init
    for (it in seq_len(maxit)) {
      g <- grad_c(eta, c)
      H <- hess_c(eta, c)
      step <- as.vector(solve(as.matrix(H), g))
      eta_new <- eta - step
      if (max(abs(step)) < tol) break
      eta <- eta_new
    }
    eta
  }

  # c=0 Laplace mode and covariance
  c0 <- rep(0, q)
  eta0 <- solve_mode(c0)
  H0 <- hess_c(eta0, c0)
  var_eta <- solve(as.matrix(H0))

  # Build FE ingredients at c=0
  s <- ifelse(y == 1, 1, -1)
  lin0 <- as.vector(X %*% beta + Z %*% eta0)
  q0 <- s * lin0
  temp_trc_C <- s * d3(q0)          # matches Appendix-B form via s * d3(s*lin)
  temp_trc_D <- d4(q0)              # d4 is already w.r.t q0

  fe <- glmmFEL:::fe_trace_diagG(
    Z = Z,
    var_eta = var_eta,
    temp_trc_C = temp_trc_C,
    temp_trc_D = temp_trc_D
  )
  trc_y1 <- fe$trc_y1
  trc_y2 <- fe$trc_y2

  testthat::expect_equal(length(trc_y1), q)
  testthat::expect_equal(dim(trc_y2), c(q, q))
  testthat::expect_true(all(abs(trc_y2[row(trc_y2) != col(trc_y2)]) < 1e-12))

  # --- Finite differences for one component j ---
  j <- 7L
  eps <- 1e-4
  ej <- rep(0, q); ej[j] <- 1

  # dsig/dc_j and d2sig/dc_j^2 at c=0 by finite differences:
  # Sigma(c) = Hessian at tilted mode eta_hat(c)
  Sigma_at <- function(c) {
    eta_c <- solve_mode(c, eta_init = eta0)
    hess_c(eta_c, c)
  }

  Sigma_p <- Sigma_at(+eps * ej)
  Sigma_m <- Sigma_at(-eps * ej)
  Sigma_0 <- Sigma_at(c0)

  dSig_dc_fd  <- (Sigma_p - Sigma_m) / (2 * eps)
  d2Sig_dc2_fd <- (Sigma_p - 2 * Sigma_0 + Sigma_m) / (eps^2)

  # Our convention inside FE code is D = -dSigma/dc and D2 = -d2Sigma/dc2
  dsig_fd  <- -as.matrix(dSig_dc_fd)
  d2sig_fd <- -as.matrix(d2Sig_dc2_fd)

  trc_y1_fd <- sum(diag(var_eta %*% dsig_fd))
  term1_fd  <- sum(diag(var_eta %*% d2sig_fd))
  A_fd <- var_eta %*% dsig_fd
  term2_fd  <- sum(A_fd * t(A_fd))  # trace(A^2) when var_eta is symmetric

  trc_y2_fd <- term1_fd + term2_fd

  # Compare signs + magnitude (relative tolerance)
  testthat::expect_equal(trc_y1[j], trc_y1_fd, tolerance = 5e-3)
  testthat::expect_equal(trc_y2[j, j], trc_y2_fd, tolerance = 1e-2)
})

# ---- (3) Smoke fits for core methods ----

testthat::test_that("glmmFEL runs for Laplace / FE_mean / FE_full (one-way intercept, probit)", {
  dat <- simulate_oneway(
    family = "binomial_probit",
    n_id = 25L, m_per_id = 8L,
    beta = c(-0.2, 0.6),
    tau2 = 0.6,
    seed = 101
  )

  fits <- list(
    Laplace = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_probit",
                               approx = "Laplace", max_iter = 120, tol = 1e-5,
                               control = list(verbose = FALSE)),
    FE_mean = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_probit",
                               approx = "FE_mean", max_iter = 160, tol = 1e-5,
                               control = list(verbose = FALSE)),
    FE_full = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_probit",
                               approx = "FE_full", max_iter = 200, tol = 1e-5,
                               control = list(verbose = FALSE))
  )

  for (nm in names(fits)) {
    fit <- fits[[nm]]
    testthat::expect_true(inherits(fit, "glmmFELMod"))
    testthat::expect_equal(length(fit$beta), ncol(dat$X))
    testthat::expect_equal(length(fit$eta), ncol(dat$Z))
    testthat::expect_true(is.finite(fit$logLik))
    testthat::expect_true(is.finite(fit$tau2))
    testthat::expect_true(fit$tau2 >= 0)
  }
})

testthat::test_that("glmmFEL runs for RSPL and MSPL (one-way intercept, poisson)", {
  dat <- simulate_oneway(
    family = "poisson_log",
    n_id = 20L, m_per_id = 10L,
    beta = c(log(2), 0.2),
    tau2 = 0.4,
    seed = 202
  )

  fits <- list(
    RSPL = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "poisson_log",
                            approx = "RSPL", max_iter = 80, tol = 1e-5,
                            control = list(verbose = FALSE)),
    MSPL = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "poisson_log",
                            approx = "MSPL", max_iter = 80, tol = 1e-5,
                            control = list(verbose = FALSE))
  )

  for (nm in names(fits)) {
    fit <- fits[[nm]]
    testthat::expect_true(inherits(fit, "glmmFELMod"))
    testthat::expect_equal(length(fit$beta), ncol(dat$X))
    testthat::expect_equal(length(fit$eta), ncol(dat$Z))
    testthat::expect_true(is.finite(fit$tau2))
    testthat::expect_true(fit$tau2 >= 0)
  }
})

# ---- (4) Multi-membership smoke ----

testthat::test_that("glmmFEL runs with multi-membership Z (2 nonzeros per row)", {
  dat <- simulate_multimember_2perrow(
    family = "binomial_probit",
    n_re = 12L, n_obs = 50L,
    beta = 0.0, tau2 = 0.8,
    seed = 303
  )

  fit <- glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_probit",
                          approx = "Laplace", max_iter = 150, tol = 1e-5,
                          control = list(verbose = FALSE))

  testthat::expect_true(inherits(fit, "glmmFELMod"))
  testthat::expect_equal(length(fit$eta), ncol(dat$Z))
  testthat::expect_true(is.finite(fit$tau2))
})

# ---- (5) Dense->sparse coercion ----

testthat::test_that("dense Z is converted to sparse internally", {
  dat <- simulate_oneway(
    family = "binomial_probit",
    n_id = 10L, m_per_id = 6L,
    beta = c(0.0, 0.3),
    tau2 = 0.5,
    seed = 404
  )

  # Make a dense Z on purpose
  Z_dense <- as.matrix(dat$Z)

  fit <- glmmFEL::glmmFEL(dat$y, dat$X, Z_dense, family = "binomial_probit",
                          approx = "Laplace", max_iter = 100, tol = 1e-5,
                          control = list(verbose = FALSE))

  testthat::expect_true(inherits(fit$Z, "dgCMatrix"))
  testthat::expect_equal(dim(fit$Z), dim(dat$Z))
})

# ---- (6) binomial_logit smoke fits ----

testthat::test_that("glmmFEL runs for Laplace / FE_mean / FE_full (one-way intercept, logit)", {
  dat <- simulate_oneway(
    family = "binomial_logit",
    n_id = 25L, m_per_id = 8L,
    beta = c(-0.2, 0.6),
    tau2 = 0.6,
    seed = 505
  )

  fits <- list(
    Laplace = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_logit",
                               approx = "Laplace", max_iter = 120, tol = 1e-5,
                               control = list(verbose = FALSE)),
    FE_mean = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_logit",
                               approx = "FE_mean", max_iter = 160, tol = 1e-5,
                               control = list(verbose = FALSE)),
    FE_full = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_logit",
                               approx = "FE_full", max_iter = 200, tol = 1e-5,
                               control = list(verbose = FALSE))
  )

  for (nm in names(fits)) {
    fit <- fits[[nm]]
    testthat::expect_true(inherits(fit, "glmmFELMod"))
    testthat::expect_equal(length(fit$beta), ncol(dat$X))
    testthat::expect_equal(length(fit$eta), ncol(dat$Z))
    testthat::expect_true(is.finite(fit$logLik))
    testthat::expect_true(is.finite(fit$tau2))
    testthat::expect_true(fit$tau2 >= 0)
  }
})

testthat::test_that("glmmFEL runs for RSPL and MSPL (one-way intercept, logit)", {
  dat <- simulate_oneway(
    family = "binomial_logit",
    n_id = 20L, m_per_id = 10L,
    beta = c(-0.1, 0.2),
    tau2 = 0.4,
    seed = 606
  )

  fits <- list(
    RSPL = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_logit",
                            approx = "RSPL", max_iter = 80, tol = 1e-5,
                            control = list(verbose = FALSE)),
    MSPL = glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_logit",
                            approx = "MSPL", max_iter = 80, tol = 1e-5,
                            control = list(verbose = FALSE))
  )

  for (nm in names(fits)) {
    fit <- fits[[nm]]
    testthat::expect_true(inherits(fit, "glmmFELMod"))
    testthat::expect_equal(length(fit$beta), ncol(dat$X))
    testthat::expect_equal(length(fit$eta), ncol(dat$Z))
    testthat::expect_true(is.finite(fit$tau2))
    testthat::expect_true(fit$tau2 >= 0)
  }
})

testthat::test_that("glmmFEL runs with multi-membership Z (2 nonzeros per row, logit)", {
  dat <- simulate_multimember_2perrow(
    family = "binomial_logit",
    n_re = 12L, n_obs = 50L,
    beta = 0.0, tau2 = 0.8,
    seed = 707
  )

  fit <- glmmFEL::glmmFEL(dat$y, dat$X, dat$Z, family = "binomial_logit",
                          approx = "Laplace", max_iter = 150, tol = 1e-5,
                          control = list(verbose = FALSE))

  testthat::expect_true(inherits(fit, "glmmFELMod"))
  testthat::expect_equal(length(fit$eta), ncol(dat$Z))
  testthat::expect_true(is.finite(fit$tau2))
})


test_that("glmmFEL input checks fire", {
  dat <- simulate_oneway(seed = 1)

  testthat::expect_error(
    glmmFEL(dat$y[-1], dat$X, dat$Z),
    "Z has .* rows but expected n"
  )
  expect_error(glmmFEL(dat$y, dat$X[,1,drop=FALSE], matrix(0, nrow(dat$X), 0)), "at least one")
  expect_error(glmmFEL(c(0,2,1,0), dat$X[1:4,], dat$Z[1:4,], family="binomial_probit"), "0/1")
  expect_error(glmmFEL(c(0,-1,2,3), dat$X[1:4,], dat$Z[1:4,], family="poisson_log"), "nonnegative")
})


test_that("permuting Z columns leaves beta/tau2 unchanged", {
  dat <- simulate_oneway(family="binomial_logit", seed=2)
  perm <- sample(seq_len(ncol(dat$Z)))
  Zp <- dat$Z[, perm]

  fit1 <- glmmFEL(dat$y, dat$X, dat$Z, family="binomial_logit", approx="Laplace",
                  max_iter=80, tol=1e-5)
  fit2 <- glmmFEL(dat$y, dat$X, Zp, family="binomial_logit", approx="Laplace",
                  max_iter=80, tol=1e-5)

  expect_equal(fit1$beta, fit2$beta, tolerance=1e-6)
  expect_equal(fit1$tau2, fit2$tau2, tolerance=1e-6)
})


test_that("Laplace fit roughly recovers parameters (sanity)", {
  dat <- simulate_oneway(family="binomial_probit", n_id=40, m_per_id=8,
                         beta=c(-0.3, 0.8), tau2=0.7, seed=3)
  fit <- glmmFEL(dat$y, dat$X, dat$Z, family="binomial_probit", approx="Laplace",
                 max_iter=150, tol=1e-6)

  expect_true(sign(fit$beta[2]) == sign(dat$truth$beta[2]))
  expect_true(fit$tau2 > 0.05 && fit$tau2 < 5)   # wide bounds
})


test_that("tau2_init extremes remain stable", {
  dat <- simulate_oneway(family="poisson_log", seed=4)

  fit_small <- glmmFEL(dat$y, dat$X, dat$Z, family="poisson_log", approx="Laplace",
                       control=list(tau2_init=1e-10), max_iter=80, tol=1e-5)
  fit_big <- glmmFEL(dat$y, dat$X, dat$Z, family="poisson_log", approx="Laplace",
                     control=list(tau2_init=1e3), max_iter=80, tol=1e-5)

  expect_true(is.finite(fit_small$tau2) && fit_small$tau2 >= 0)
  expect_true(is.finite(fit_big$tau2) && fit_big$tau2 >= 0)
})


test_that("FE_full var_eta is base matrix and symmetric", {
  dat <- simulate_oneway(family="binomial_logit", seed=5)
  fit <- glmmFEL(dat$y, dat$X, dat$Z, family="binomial_logit", approx="FE_full",
                 max_iter=120, tol=1e-5)

  expect_true(is.matrix(fit$var_eta))
  expect_equal(fit$var_eta, t(fit$var_eta), tolerance=1e-10)
  expect_true(all(is.finite(diag(fit$var_eta))))
})