local_edition(3)

# Helper function to simulate GLM data with ELE linkage errors
# Adapted from Chambers (2009) simulation logic
generate_ele_glm_data <- function(n = 200, m_rate = 0.1, family = "gaussian") {
 set.seed(42)

 # 1. Design Matrix and True Parameters
 x1 <- rnorm(n)
 x2 <- rbinom(n, 1, 0.5)
 X <- cbind(Intercept = 1, x1 = x1, x2 = x2)
 true_beta <- c(1.5, 0.8, -0.5)

 # 2. Generate Response based on family
 eta <- as.vector(X %*% true_beta)

 if (family == "gaussian") {
  y <- eta + rnorm(n, sd = 0.5)
  fam_obj <- gaussian()
 } else if (family == "binomial") {
  prob <- plogis(eta)
  y <- rbinom(n, 1, prob)
  fam_obj <- binomial()
 } else if (family == "poisson") {
  mu <- exp(eta)
  y <- rpois(n, mu)
  fam_obj <- poisson()
 }

 # 3. Induce Linkage Errors (ELE model within a single block)
 # Shuffling a portion of the response vector to simulate mismatches
 mis_idx <- sample(1:n, size = round(m_rate * n))
 linked_y <- y
 if (length(mis_idx) > 1) {
  linked_y[mis_idx] <- y[sample(mis_idx)]
 }

 return(list(
  X = X,
  y = linked_y,
  true_beta = true_beta,
  family = family,
  fam_obj = fam_obj
 ))
}

test_that("glmELE engine fits all weighting methods for Gaussian", {
 dat <- generate_ele_glm_data(n = 300, m_rate = 0.15, family = "gaussian")

 # Test with "all" weight matrices
 fit <- glmELE(
  x = dat$X,
  y = dat$y,
  family = "gaussian",
  m.rate = 0.15,
  blocks = rep(1, 300),
  weight.matrix = "all"
 )

 expect_s3_class(fit, "glmELE")
 expect_equal(nrow(fit$coefficients), 3) # ratio, LL, BLUE
 expect_true(all(c("ratio", "LL", "BLUE") %in% rownames(fit$coefficients)))
 expect_true(all(diag(fit$var$BLUE) > 0)) # Check variance matrix
})

test_that("glmELE works for Binomial and Poisson families", {
 # Binomial
 dat_b <- generate_ele_glm_data(n = 200, m_rate = 0.1, family = "binomial")
 fit_b <- glmELE(dat_b$X, dat_b$y, family = "binomial", m.rate = 0.1, blocks = rep(1, 200))
 expect_equal(fit_b$family$family, "binomial")

 # Poisson
 dat_p <- generate_ele_glm_data(n = 200, m_rate = 0.1, family = "poisson")
 fit_p <- glmELE(dat_p$X, dat_p$y, family = "poisson", m.rate = 0.1, blocks = rep(1, 200))
 expect_equal(fit_p$family$family, "poisson")
})

test_that("S3 methods (print, summary, vcov, confint) behave as expected", {
 dat <- generate_ele_glm_data(n = 150)

 adj <- adjELE(linked.data = data.frame(dat$y, dat$X),
               m.rate = 0.1,
               weight.matrix = "all")

 fit <- plglm(dat.y ~ x1 + x2, data = data.frame(dat.y = dat$y, dat$X),
              family = "gaussian", adjustment = adj)

 expect_output(print(fit))
 s_fit <- summary(fit)
 expect_s3_class(s_fit, "summary.glmELE")

 expect_true("BLUE" %in% names(s_fit$coefficients))

 expect_equal(dim(vcov(fit, weight.matrix = "BLUE")), c(3, 3))

 ci <- confint(fit, level = 0.95, weight.matrix = "LL")
 expect_equal(nrow(ci), 3)
 expect_equal(colnames(ci), c("2.5 %", "97.5 %"))
})

test_that("Predict method handles new data and standard errors", {
 dat <- generate_ele_glm_data(n = 100)
 adj <- adjELE(linked.data = data.frame(dat.y = dat$y, x1 = dat$X[,2], x2 = dat$X[,3]), m.rate = 0.1)
 fit <- plglm(dat.y ~ x1 + x2, data = data.frame(dat.y = dat$y, x1 = dat$X[,2], x2 = dat$X[,3]),
              family = "gaussian", adjustment = adj)

 # In-sample prediction
 pred_in <- predict(fit, type = "response")
 expect_length(pred_in, 100)

 # New data with SE and intervals
 new_df <- data.frame(x1 = c(0, 0.5), x2 = c(1, 0))
 pred_new <- predict(fit, newdata = new_df, se.fit = TRUE, interval = "confidence")

 expect_equal(nrow(pred_new$fit), 2)
 expect_equal(colnames(pred_new$fit), c("fit", "lwr", "upr"))
 expect_length(pred_new$se.fit, 2)
 expect_true(pred_new$residual.scale > 0)
})

test_that("glmELE handles multi-block m.rates and audit.size", {
 # Simulate 2 blocks with different rates
 n <- 200
 blocks <- c(rep(1, 100), rep(2, 100))
 dat <- generate_ele_glm_data(n = n)

 # m.rate as a vector
 fit <- glmELE(
  x = dat$X, y = dat$y,
  m.rate = c(0.1, 0.05),
  blocks = blocks,
  audit.size = c(20, 20) # Variance inflation from estimated alpha
 )

 expect_equal(nrow(fit$coefficients), 3)
 expect_true(!is.null(fit$var$BLUE))
})

test_that("fitglm.adjELE correctly synchronizes subsets and row names", {
 n <- 100
 dat <- generate_ele_glm_data(n = n)
 df <- data.frame(y = dat$y, x1 = dat$X[,2], x2 = dat$X[,3])
 rownames(df) <- paste0("ID_", 1:n)

 adj <- adjELE(linked.data = df, m.rate = 0.1, weight.matrix = "all")

 # Fit on a subset
 # fitglm.adjELE must find row names ID_11 to ID_60 in the linked.data
 fit_sub <- plglm(y ~ x1 + x2, data = df, subset = 11:60, adjustment = adj)

 expect_equal(fit_sub$df.residual, 50 - 3)
 expect_equal(length(fit_sub$residuals["BLUE",]), 50)
 expect_equal(names(fit_sub$residuals["BLUE",])[1], "ID_11")
})

test_that("Error handling for invalid inputs", {
 dat <- generate_ele_glm_data(n = 20)

 # Dimension mismatch
 expect_error(glmELE(x = dat$X[1:10,], y = dat$y, m.rate = 0.1, blocks = rep(1, 20)))

 # Binomial response not 0/1
 expect_error(glmELE(x = dat$X, y = runif(20, 2, 5), family = "binomial", m.rate = 0.1, blocks = rep(1, 20)))

 # Unsupported family
 expect_error(glmELE(x = dat$X, y = dat$y, family = "inverse.gaussian", m.rate = 0.1, blocks = rep(1, 20)))
})

test_that("summary.glmELE handles missing variance and non-Gaussian families correctly", {
 dat_b <- generate_ele_glm_data(n = 100, m_rate = 0.1, family = "binomial")
 adj_b <- adjELE(linked.data = data.frame(y = dat_b$y, dat_b$X),
                 m.rate = 0.1, weight.matrix = "BLUE")
 fit_b <- plglm(y ~ x1 + x2, data = data.frame(y = dat_b$y, dat_b$X),
                family = "binomial", adjustment = adj_b)

 # 1. Test non-Gaussian z-value logic and pnorm branch
 s_fit_b <- summary(fit_b)
 expect_true("z value" %in% colnames(s_fit_b$coefficients$BLUE))
 expect_true("Pr(>|z|)" %in% colnames(s_fit_b$coefficients$BLUE))

 # 2. Test defensive fallback for missing variance matrix
 fit_b_broken <- fit_b
 fit_b_broken$var$BLUE <- NULL

 expect_warning(s_fit_broken <- summary(fit_b_broken),
                "Variance matrix not found for method: BLUE")
 # Assert SEs, z-values, and p-values became NA correctly, but estimates remain
 expect_true(all(is.na(s_fit_broken$coefficients$BLUE[, "Std. Error"])))
 expect_true(all(is.na(s_fit_broken$coefficients$BLUE[, "z value"])))
 expect_true(!any(is.na(s_fit_broken$coefficients$BLUE[, "Estimate"])))
})

test_that("print.summary.glmELE output matches exact snapshot formatting", {
 dat <- generate_ele_glm_data(n = 50, family = "gaussian")
 adj <- adjELE(linked.data = data.frame(y = dat$y, dat$X),
               m.rate = 0.1, weight.matrix = "BLUE")
 fit <- plglm(y ~ x1 + x2, data = data.frame(y = dat$y, dat$X),
              family = "gaussian", adjustment = adj)

 s_fit <- summary(fit)

 # expect_snapshot tests print methods for formatting,
 # dispersion values, stars, and call layout.
 expect_snapshot(print(s_fit))
})

test_that("vcov and confint handle defaults, numeric params, validations, and qnorm branches", {
 dat <- generate_ele_glm_data(n = 50, family = "gaussian")
 adj <- adjELE(linked.data = data.frame(y = dat$y, dat$X),
               m.rate = 0.1, weight.matrix = "all")
 fit <- plglm(y ~ x1 + x2, data = data.frame(y = dat$y, dat$X),
              family = "gaussian", adjustment = adj)

 # 1. Test weight.matrix default fallback in vcov
 v_default <- vcov(fit)
 v_explicit <- vcov(fit, weight.matrix = "ratio") # first method in adj
 expect_identical(v_default, v_explicit)

 # 2. Test weight.matrix default fallback in confint
 ci_default <- confint(fit)
 ci_explicit <- confint(fit, weight.matrix = "ratio")
 expect_identical(ci_default, ci_explicit)

 # 3. Test invalid weight.matrix stops
 expect_error(confint(fit, weight.matrix = "INVALID"),
              "Method INVALID not found in object coefficients.")

 # 4. Test numeric `parm` parsing in confint
 ci_num <- confint(fit, parm = c(2, 3))
 expect_equal(rownames(ci_num), c("x1", "x2"))
 expect_equal(nrow(ci_num), 2)

 # 5. Test confint qnorm branch (Binomial)
 dat_b <- generate_ele_glm_data(n = 50, family = "binomial")
 adj_b <- adjELE(linked.data = data.frame(y = dat_b$y, dat_b$X),
                 m.rate = 0.1, weight.matrix = "BLUE")
 fit_b <- plglm(y ~ x1 + x2, data = data.frame(y = dat_b$y, dat_b$X),
                family = "binomial", adjustment = adj_b)

 ci_bin <- confint(fit_b)
 # Ensure confidence bounds are symmetric around estimate on the link scale using qnorm
 est <- fit_b$coefficients["BLUE", 1]
 se <- sqrt(fit_b$var$BLUE[1, 1])
 crit <- stats::qnorm(0.975)
 expect_equal(ci_bin[1, "2.5 %"], est - crit * se)
})

test_that("predict.glmELE correctly handles missing components, in-sample reconstructions, and warnings", {
 dat <- generate_ele_glm_data(n = 50)
 adj <- adjELE(linked.data = data.frame(y = dat$y, dat$X),
               m.rate = 0.1, weight.matrix = "BLUE")
 fit <- plglm(y ~ x1 + x2, data = data.frame(y = dat$y, dat$X),
              family = "gaussian", adjustment = adj)

 # 1. Invalid weight.matrix
 expect_error(predict(fit, weight.matrix = "INVALID"),
              "Method INVALID not found in object coefficients.")

 # 2. Missing model component
 fit_no_mod <- fit
 fit_no_mod$model <- NULL
 expect_error(predict(fit_no_mod),
              "Ensure the model was fitted with 'model = TRUE'")

 # 3. In-sample prediction with SE/intervals forces reconstruction of X
 p_in <- predict(fit, se.fit = TRUE, interval = "confidence")
 expect_equal(nrow(p_in$fit), 50)
 expect_true(all(c("fit", "lwr", "upr") %in% colnames(p_in$fit)))
 expect_length(p_in$se.fit, 50)

 # 4. Missing variance warning in predict
 fit_no_var <- fit
 fit_no_var$var$BLUE <- NULL
 expect_warning(p_no_var <- predict(fit_no_var, se.fit = TRUE),
                "Variance matrix missing for this method. SEs set to NA.")
 expect_true(all(is.na(p_no_var$se.fit)))
})

test_that("predict.glmELE computes mathematical offsets correctly", {
 dat <- generate_ele_glm_data(n = 50)
 df <- data.frame(y = dat$y, x1 = dat$X[,2], x2 = dat$X[,3], off_val = rep(1, 50))
 adj <- adjELE(linked.data = df, m.rate = 0.1, weight.matrix = "BLUE")

 # Fit model with an offset
 fit_off <- plglm(y ~ x1 + x2 + offset(off_val), data = df,
                  family = "gaussian", adjustment = adj)

 # Mathematical validation of offset logic in new data
 nd_base <- data.frame(x1 = 0, x2 = 0, off_val = 0)
 nd_shifted <- data.frame(x1 = 0, x2 = 0, off_val = 5)

 p_base <- predict(fit_off, newdata = nd_base)
 p_shifted <- predict(fit_off, newdata = nd_shifted)

 # The shifted prediction should be EXACTLY 5 units higher
 expect_equal(unname(p_shifted), unname(p_base) + 5)
})

test_that("predict.glmELE computes napredict padding correctly", {
 dat <- generate_ele_glm_data(n = 50)
 df <- data.frame(y = dat$y, x1 = dat$X[,2], x2 = dat$X[,3])
 adj <- adjELE(linked.data = df, m.rate = 0.1, weight.matrix = "BLUE")

 fit <- plglm(y ~ x1 + x2, data = df, family = "gaussian", adjustment = adj)

 # NA action and padding validation (na.exclude adds NAs to outputs)
 nd_na <- data.frame(x1 = c(0, NA, 0), x2 = c(0, 0, 0))

 # Simple predict
 p_na_simple <- predict(fit, newdata = nd_na, na.action = na.exclude)
 expect_length(p_na_simple, 3)
 expect_true(is.na(p_na_simple[2]))
 expect_false(is.na(p_na_simple[1]))

 # Complex predict (matrix + list)
 p_na_complex <- predict(fit, newdata = nd_na, na.action = na.exclude,
                         se.fit = TRUE, interval = "confidence")

 expect_equal(nrow(p_na_complex$fit), 3)
 expect_true(is.na(p_na_complex$fit[2, "fit"]))
 expect_true(is.na(p_na_complex$fit[2, "lwr"]))
 expect_true(is.na(p_na_complex$se.fit[2]))
 expect_false(is.na(p_na_complex$se.fit[3]))
})

test_that("predict.glmELE non-Gaussian confidence intervals and response scaling are correct", {
 dat_p <- generate_ele_glm_data(n = 50, family = "poisson")
 adj_p <- adjELE(linked.data = data.frame(y = dat_p$y, dat_p$X),
                 m.rate = 0.1, weight.matrix = "BLUE")
 fit_p <- plglm(y ~ x1 + x2, data = data.frame(y = dat_p$y, dat_p$X),
                family = "poisson", adjustment = adj_p)

 # 1. Link scale interval (uses qnorm)
 p_link <- predict(fit_p, interval = "confidence", type = "link",
                   newdata = data.frame(x1 = 0, x2 = 0))

 expect_true(p_link$fit[1, "lwr"] < p_link$fit[1, "fit"])
 expect_true(p_link$fit[1, "upr"] > p_link$fit[1, "fit"])

 # 2. Response scale interval (linkinv transformation)
 p_resp <- predict(fit_p, interval = "confidence", type = "response",
                   newdata = data.frame(x1 = 0, x2 = 0))

 # For Poisson (log link), exp(link) == response
 expect_equal(p_resp$fit[1, "fit"], exp(p_link$fit[1, "fit"]))
 expect_equal(p_resp$fit[1, "lwr"], exp(p_link$fit[1, "lwr"]))
 expect_equal(p_resp$fit[1, "upr"], exp(p_link$fit[1, "upr"]))
})