# tests/testthat/test-g_regression.R

test_that("g_regression: g keyword mappings behave as implemented", {
  set.seed(123)

  m <- 60
  x1 <- rnorm(m)
  x2 <- rnorm(m, sd = 2)
  y  <- 2 + x1 + 2 * x2 + rnorm(m)

  dat <- cbind(y, x1, x2)
  r <- ncol(dat) - 1

  # helper to pull betas for comparing implied g choices via closed form
  ref_beta <- function(g_num) {
    Y <- matrix(dat[, 1], ncol = 1)
    X <- dat[, -1, drop = FALSE]
    Z <- cbind(1, X)
    beta_ols <- solve(crossprod(Z), crossprod(Z, Y))
    as.numeric(beta_ols / (1 + g_num))
  }

  # UIP: g = 1/m (also NULL and unknown)
  out_uip  <- g_regression(dat, g = "UIP")
  out_null <- g_regression(dat, g = NULL)
  out_unk  <- g_regression(dat, g = "something_else")

  g_uip <- 1 / m
  expect_equal(as.numeric(out_uip[[1]]),  ref_beta(g_uip), tolerance = 1e-10)
  expect_equal(as.numeric(out_null[[1]]), ref_beta(g_uip), tolerance = 1e-10)
  expect_equal(as.numeric(out_unk[[1]]),  ref_beta(g_uip), tolerance = 1e-10)

  # RIC: g = 1/(r^2)
  g_ric <- 1 / (r^2)
  out_ric <- g_regression(dat, g = "RIC")
  expect_equal(as.numeric(out_ric[[1]]), ref_beta(g_ric), tolerance = 1e-10)

  # Benchmark (note: your code expects exactly "Benchmark" with capital B)
  g_bench <- 1 / max(m, (r^2))
  out_bench <- g_regression(dat, g = "Benchmark")
  expect_equal(as.numeric(out_bench[[1]]), ref_beta(g_bench), tolerance = 1e-10)

  # HQ: g = 1/(log(m)^3)
  g_hq <- 1 / (log(m)^3)
  out_hq <- g_regression(dat, g = "HQ")
  expect_equal(as.numeric(out_hq[[1]]), ref_beta(g_hq), tolerance = 1e-10)

  # rootUIP: g = sqrt(1/m)
  g_root <- sqrt(1 / m)
  out_root <- g_regression(dat, g = "rootUIP")
  expect_equal(as.numeric(out_root[[1]]), ref_beta(g_root), tolerance = 1e-10)
})

test_that("g_regression matches closed-form quantities for a numeric g", {
  set.seed(321)

  m <- 120
  x1 <- rnorm(m)
  x2 <- rnorm(m, sd = 2)
  e  <- rnorm(m, sd = 1)
  y  <- 2 + 1.5 * x1 - 0.7 * x2 + e

  dat <- cbind(y, x1, x2)
  g <- 0.8

  out <- g_regression(dat, g = g)

  betas    <- out[[1]]
  post_se  <- out[[2]]
  log_like <- out[[3]]
  R2       <- out[[4]]
  df       <- out[[5]]
  Dil      <- out[[6]]

  # independent reference calculations
  Y <- matrix(dat[, 1], ncol = 1)
  X <- dat[, -1, drop = FALSE]
  Z <- cbind(1, X)

  r <- ncol(X)
  k <- ncol(Z)
  df_ref <- m - k

  expect_equal(as.numeric(df), df_ref)

  # dilution
  expect_equal(as.numeric(Dil), as.numeric(det(stats::cor(X))), tolerance = 1e-12)

  ZtZ <- crossprod(Z)
  ZtZ_inv <- solve(ZtZ)
  Zty <- crossprod(Z, Y)
  yty <- drop(crossprod(Y))

  beta_ols <- ZtZ_inv %*% Zty
  beta_ref <- beta_ols / (1 + g)
  expect_equal(as.numeric(betas), as.numeric(beta_ref), tolerance = 1e-10)

  yPzy <- yty - drop(t(Zty) %*% ZtZ_inv %*% Zty)
  ybar <- mean(Y)
  SST <- yty - m * (ybar^2)

  EX_1_ref <- (1/(g+1)) * yPzy + (g/(g+1)) * SST

  V <- ZtZ_inv / (1 + g)
  se_2_ref <- EX_1_ref / m
  co_var_ref <- V * ((m * se_2_ref) / (m - 2))
  post_se_ref <- sqrt(diag(co_var_ref))

  expect_equal(as.numeric(post_se), as.numeric(post_se_ref), tolerance = 1e-10)

  log_like_ref <- (r/2) * log(g/(g+1)) - ((m-1)/2) * log(EX_1_ref)
  expect_equal(as.numeric(log_like), as.numeric(log_like_ref), tolerance = 1e-10)

  R2_ref <- 1 - (yPzy / SST)
  expect_equal(as.numeric(R2), as.numeric(R2_ref), tolerance = 1e-10)
})

test_that("g_regression errors on invalid numeric g", {
  set.seed(1)
  dat <- cbind(rnorm(10), rnorm(10), rnorm(10))

  expect_error(g_regression(dat, g = 0), "g must be strictly positive")
  expect_error(g_regression(dat, g = -1), "g must be strictly positive")
  expect_error(g_regression(dat, g = NaN), "g must be strictly positive")
  expect_error(g_regression(dat, g = c(0.1, 0.2)), "g must be strictly positive")
})

test_that("g_regression errors on singular Z'Z", {
  set.seed(42)

  m <- 40
  x1 <- rnorm(m)
  x2 <- 2 * x1   # perfect collinearity
  y  <- rnorm(m)

  dat <- cbind(y, x1, x2)

  expect_error(g_regression(dat, g = 0.5), "Determinant of Z'Z=0")
})