set.seed(1234)

aa <- airquality
aa2 <- rbind(aa, aa)

testthat::test_that("summary_c2c is properly adjust the std error", {
  ll <- lm(Ozone ~ ., aa)
  ss <- summary(ll)

  ll2 <- lm(Ozone ~ ., aa2)
  ss2 <- summary_c2c(ll2, ll$df.residual, ll2$df.residual)

  expect_equal(unname(ss$coefficients[, 3]), ss2[, 7], tolerance = 1e-8)
})

data("occup", package = "cat2cat")
data("trans", package = "cat2cat")

occup_old <- occup[occup$year == 2008, ]
occup_new <- occup[occup$year == 2010, ]

data_simple <- list(
  old = occup_old, new = occup_new, cat_var = "code", time_var = "year"
)
mappings_simple_back <- list(trans = trans, direction = "backward")
ml <- list(
  data = occup_new,
  cat_var = "code",
  method = c("knn", "rf", "lda"),
  features = c("age", "sex", "edu", "exp", "parttime", "salary"),
  args = list(k = 10, ntree = 30),
  fail_warn = FALSE
)

testthat::test_that(
  "summary_c2c is properly adjust the std error - cat2cat case",
  {
    occup <- cat2cat(
      data = data_simple,
      mappings = mappings_simple_back,
      ml = ml
    )

    lms <- lm(
      I(log(salary)) ~ age + sex + factor(edu) + parttime + exp, occup$old,
      weights = multiplier * wei_freq_c2c
    )

    ss_c2c <- summary_c2c(lms, df_old = nrow(occup_old) - length(lms$assign))
    lms$df.residual <- nrow(occup_old) - length(lms$assign)
    ss1 <- suppressWarnings(summary(lms))

    lms2 <- lm(
      I(log(salary)) ~ age + sex + factor(edu) + parttime + exp, occup_old,
      weights = multiplier
    )
    ss2 <- summary(lms2)

    expect_true(sum((ss2$coefficients[, 2] - ss1$coefficients[, 2])**2) < 0.01)
    expect_true(sum((ss2$coefficients[, 2] - ss_c2c$std.error_c)**2) < 0.01)
  }
)

# -----------------------------------------------------------------------------
# summary_c2c with occup_panel (id_var case)
# -----------------------------------------------------------------------------

data("occup_panel", package = "cat2cat")

panel_2009Q4 <- occup_panel[occup_panel$quarter == "2009Q4", ]
panel_2010Q1 <- occup_panel[occup_panel$quarter == "2010Q1", ]

testthat::test_that(
  "summary_c2c corrects SE with occup_panel id_var data",
  {
    result <- cat2cat(
      data = list(
        old = panel_2009Q4,
        new = panel_2010Q1,
        id_var = "panel_id",
        cat_var = "code",
        time_var = "quarter"
      ),
      mappings = list(trans = trans, direction = "backward")
    )

    # Fit model on replicated data
    lm_rep <- lm(
      I(log(salary)) ~ age + sex + factor(edu),
      data = result$old,
      weights = wei_freq_c2c
    )

    # Get corrected SE
    df_orig <- nrow(panel_2009Q4) - length(coef(lm_rep))
    ss_c2c <- summary_c2c(lm_rep, df_old = df_orig)

    # Corrected SE should be larger than naive SE (replication inflates df)
    naive_se <- summary(lm_rep)$coefficients[, "Std. Error"]
    expect_true(all(ss_c2c$std.error_c >= naive_se * 0.99))

    # Corrected stats should use original df
    expect_equal(nrow(ss_c2c), length(coef(lm_rep)))
    expect_true(all(c("statistic_c", "p.value_c", "std.error_c") %in% names(ss_c2c)))
  }
)

testthat::test_that(

  "summary_c2c SE correction ratio matches replication factor",
  {
    result <- cat2cat(
      data = list(
        old = panel_2009Q4,
        new = panel_2010Q1,
        cat_var = "code",
        time_var = "quarter"
      ),
      mappings = list(trans = trans, direction = "backward")
    )

    lm_rep <- lm(
      I(log(salary)) ~ age + sex,
      data = result$old,
      weights = wei_freq_c2c
    )

    df_orig <- nrow(panel_2009Q4) - length(coef(lm_rep))
    df_rep <- nrow(result$old) - length(coef(lm_rep))
    ss_c2c <- summary_c2c(lm_rep, df_old = df_orig, df_new = df_rep)

    # SE correction factor should be sqrt(df_rep / df_orig)
    expected_factor <- sqrt(df_rep / df_orig)
    naive_se <- summary(lm_rep)$coefficients[, "Std. Error"]
    actual_factor <- unname(ss_c2c$std.error_c[1] / naive_se[1])

    expect_equal(actual_factor, expected_factor, tolerance = 0.01)
  }
)

testthat::test_that(
  "summary_c2c handles glm gaussian path",
  {
    aa_glm <- na.omit(airquality)
    aa_glm2 <- rbind(aa_glm, aa_glm)

    g1 <- glm(Ozone ~ Solar.R + Wind + Temp,
              data = aa_glm, family = gaussian())
    g2 <- glm(Ozone ~ Solar.R + Wind + Temp,
              data = aa_glm2, family = gaussian())

    ss1 <- summary(g1)$coefficients[, 3]
    ss2 <- summary_c2c(g2, g1$df.residual, g2$df.residual)

    expect_true(all(c("std.error_c", "statistic_c", "p.value_c", "reference_dist") %in% names(ss2)))
    expect_equal(unname(ss1), ss2$statistic_c, tolerance = 0.06)
    expect_true(all(ss2$reference_dist == "t"))
  }
)

testthat::test_that(
  "summary_c2c handles glm binomial path",
  {
    d1 <- mtcars
    d2 <- rbind(d1, d1)

    g1 <- glm(vs ~ mpg + wt,
              data = d1, family = binomial())
    g2 <- glm(vs ~ mpg + wt,
              data = d2, family = binomial())

    ss1 <- summary(g1)$coefficients[, "z value"]
    ss2 <- summary_c2c(g2, g1$df.residual, g2$df.residual)

    expect_true(all(c("std.error_c", "statistic_c", "p.value_c", "reference_dist") %in% names(ss2)))
    expect_equal(unname(ss1), ss2$statistic_c, tolerance = 0.06)
    expect_true(all(ss2$reference_dist == "normal"))
  }
)

testthat::test_that(
  "summary_c2c rejects invalid df inputs",
  {
    ll <- lm(Ozone ~ Solar.R + Wind + Temp, data = na.omit(airquality))

    expect_error(summary_c2c(ll, df_old = 0), "`df_old` must be a single positive finite numeric value.")
    expect_error(summary_c2c(ll, df_old = NA_real_), "`df_old` must be a single positive finite numeric value.")
    expect_error(summary_c2c(ll, df_old = ll$df.residual, df_new = -1), "`df_new` must be a single positive finite numeric value.")
  }
)

testthat::test_that(
  "summary_c2c rejects unsupported coefficient layout",
  {
    summary.badglm <- function(object, ...) {
      list(coefficients = cbind("Estimate" = c(1, 2), "Std. Error" = c(0.1, 0.2)))
    }
    assign("summary.badglm", summary.badglm, envir = .GlobalEnv)
    on.exit(rm("summary.badglm", envir = .GlobalEnv), add = TRUE)

    bad <- structure(list(df.residual = 10), class = c("badglm", "glm", "lm"))

    expect_error(
      summary_c2c(bad, df_old = 10, df_new = 20),
      "must contain either 't value' or 'z value'"
    )
  }
)

testthat::test_that(
  "summary_c2c rejects non-tabular coefficients object",
  {
    summary.badcoef <- function(object, ...) {
      list(coefficients = c(1, 2, 3))
    }
    assign("summary.badcoef", summary.badcoef, envir = .GlobalEnv)
    on.exit(rm("summary.badcoef", envir = .GlobalEnv), add = TRUE)

    bad <- structure(list(df.residual = 10), class = c("badcoef", "lm"))

    expect_error(
      summary_c2c(bad, df_old = 10, df_new = 20),
      "must be a matrix or data.frame"
    )
  }
)

testthat::test_that(
  "summary_c2c requires Std. Error column",
  {
    summary.nose <- function(object, ...) {
      list(coefficients = cbind("Estimate" = c(1, 2), "t value" = c(3, 4)))
    }
    assign("summary.nose", summary.nose, envir = .GlobalEnv)
    on.exit(rm("summary.nose", envir = .GlobalEnv), add = TRUE)

    bad <- structure(list(df.residual = 10), class = c("nose", "lm"))

    expect_error(
      summary_c2c(bad, df_old = 10, df_new = 20),
      "must contain a 'Std. Error' column"
    )
  }
)