## tests/testthat/test-recon.R
## Run with: devtools::test()  or  testthat::test_local()

base_df <- data.frame(
  id   = 1:5,
  age  = c(25L, 30L, 35L, 40L, 45L),
  name = c("Alice", "Bob", "Carol", "Dave", "Eve"),
  stringsAsFactors = FALSE
)

# ── 1. Identical datasets ─────────────────────────────────────────────────────
test_that("identical datasets return IDENTICAL verdict", {
  r <- recon(base_df, base_df, id = "id")
  expect_equal(r$verdict, "DATASETS ARE IDENTICAL")
  expect_equal(r$summary$total_value_diffs, 0L)
})

# ── 2. Result class ───────────────────────────────────────────────────────────
test_that("recon() returns a rDataRecon_result object", {
  r <- recon(base_df, base_df, id = "id")
  expect_s3_class(r, "rDataRecon_result")
})

# ── 3. Numeric difference detected ───────────────────────────────────────────
test_that("numeric difference is detected", {
  comp <- base_df; comp$age[2] <- 99L
  r <- recon(base_df, comp, id = "id")
  expect_equal(r$verdict, "DATASETS HAVE DIFFERENCES")
  expect_equal(r$summary$total_value_diffs, 1L)
  expect_equal(get_diffs(r)$variable, "age")
})

# ── 4. Within absolute criterion treated as equal ─────────────────────────────
test_that("difference within criterion is treated as equal", {
  comp <- base_df; comp$age[2] <- 30L + 1L
  r <- recon(base_df, comp, id = "id", criterion = 2)
  expect_equal(r$verdict, "DATASETS ARE IDENTICAL")
})

# ── 5. Character difference ───────────────────────────────────────────────────
test_that("character difference is detected", {
  comp <- base_df; comp$name[1] <- "alice"
  r <- recon(base_df, comp, id = "id")
  expect_equal(r$summary$total_value_diffs, 1L)
  expect_equal(get_diffs(r)$base_value,    "Alice")
  expect_equal(get_diffs(r)$compare_value, "alice")
})

# ── 6. Relative criterion ─────────────────────────────────────────────────────
test_that("relative criterion filters small proportional differences", {
  b  <- data.frame(id = 1:2, val = c(1000, 2000))
  c1 <- data.frame(id = 1:2, val = c(1001, 2000))   # 0.1% — within 1%
  r  <- recon(b, c1, id = "id", criterion = 0.01, relative = TRUE)
  expect_equal(r$verdict, "DATASETS ARE IDENTICAL")

  c2 <- data.frame(id = 1:2, val = c(1000, 2200))   # 10% — exceeds 1%
  r2 <- recon(b, c2, id = "id", criterion = 0.01, relative = TRUE)
  expect_equal(r2$verdict, "DATASETS HAVE DIFFERENCES")
})

# ── 7. Variable renaming (base_var / compare_var) ─────────────────────────────
test_that("base_var / compare_var renaming works", {
  b <- data.frame(id = 1:3, revenue = c(100, 200, 300))
  c <- data.frame(id = 1:3, sales   = c(100, 250, 300))
  r <- recon(b, c, id = "id", base_var = "revenue", compare_var = "sales")
  expect_equal(r$summary$total_value_diffs, 1L)
})

# ── 8. Variable in BASE only ──────────────────────────────────────────────────
test_that("variable in BASE only is reported", {
  comp <- base_df[, c("id", "name")]
  r    <- recon(base_df, comp, id = "id")
  expect_true("age" %in% r$vars_base_only)
})

# ── 9. Variable in COMPARE only ──────────────────────────────────────────────
test_that("variable in COMPARE only is reported", {
  comp       <- base_df; comp$extra <- 1:5
  r          <- recon(base_df, comp, id = "id")
  expect_true("extra" %in% r$vars_compare_only)
})

# ── 10. Type mismatch ─────────────────────────────────────────────────────────
test_that("type mismatch is detected and excluded from value comparison", {
  comp     <- base_df; comp$age <- as.character(comp$age)
  r        <- recon(base_df, comp, id = "id")
  expect_equal(nrow(r$type_mismatches), 1L)
  expect_equal(r$type_mismatches$variable, "age")
})

# ── 11. Multi-column ID ───────────────────────────────────────────────────────
test_that("multi-column ID matching works", {
  b <- data.frame(site = c("A","A","B"), period = c(1,2,1),
                  score = c(10,20,30), stringsAsFactors = FALSE)
  c <- b; c$score[2] <- 99
  r <- recon(b, c, id = c("site","period"))
  expect_equal(r$summary$total_value_diffs, 1L)
})

# ── 12. Positional matching (no id) ──────────────────────────────────────────
test_that("positional matching works when id is NULL", {
  b <- data.frame(x = c(1.0, 2.0, 3.0))
  c <- data.frame(x = c(1.0, 99.0, 3.0))
  r <- recon(b, c)
  expect_equal(r$summary$total_value_diffs, 1L)
})

# ── 12b. Integer vs numeric mix is treated as comparable (not a type mismatch) ──
test_that("integer vs numeric columns are compared, not skipped as type mismatch", {
  b <- data.frame(id = 1:3, score = c(10L, 20L, 30L))   # integer
  c <- data.frame(id = 1:3, score = c(10.0, 99.0, 30.0)) # numeric/double
  r <- recon(b, c, id = "id")
  # integer vs numeric must NOT be flagged as a type mismatch
  expect_equal(nrow(r$type_mismatches), 0L)
  # the value difference must be detected
  expect_equal(r$summary$total_value_diffs, 1L)
})

# ── 13. Different row counts ──────────────────────────────────────────────────
test_that("extra observations in BASE are reported", {
  comp <- base_df[1:3, ]
  r    <- recon(base_df, comp, id = "id")
  expect_equal(r$summary$obs_base_only, 2L)
  expect_equal(r$verdict, "DATASETS HAVE DIFFERENCES")
})

# ── 14. out_data = FALSE ──────────────────────────────────────────────────────
test_that("out_data = FALSE suppresses value_diffs in result", {
  comp <- base_df; comp$age[1] <- 99L
  r <- recon(base_df, comp, id = "id", out_data = FALSE)
  expect_null(r$value_diffs)
  expect_error(get_diffs(r))
})

# ── 15. summary_df returns one row ───────────────────────────────────────────
test_that("summary_df returns a one-row data frame", {
  r   <- recon(base_df, base_df, id = "id")
  sdf <- summary_df(r)
  expect_s3_class(sdf, "data.frame")
  expect_equal(nrow(sdf), 1L)
  expect_true("verdict" %in% names(sdf))
})

# ── 16. compare_stats ─────────────────────────────────────────────────────────
test_that("compare_stats returns correct columns", {
  cs <- compare_stats(base_df, base_df)
  expect_true(all(c("variable","base_mean","compare_mean","mean_diff") %in% names(cs)))
  expect_equal(cs$mean_diff, rep(0, nrow(cs)))
})

# ── 17. is_structure_equal ────────────────────────────────────────────────────
test_that("is_structure_equal returns TRUE for matching structures", {
  df2 <- base_df[, rev(names(base_df))]
  expect_true(is_structure_equal(base_df, df2))
})

test_that("is_structure_equal returns FALSE for type mismatch", {
  df2     <- base_df; df2$age <- as.numeric(df2$age)
  expect_false(is_structure_equal(base_df, df2))
})

# ── 18. print method ──────────────────────────────────────────────────────────
test_that("print.rDataRecon_result works without error", {
  r <- recon(base_df, base_df, id = "id")
  expect_output(print(r), "rDataRecon result")
})

# ── 19. Input validation errors ───────────────────────────────────────────────
test_that("non-data-frame input raises error", {
  expect_error(recon(list(a = 1), base_df))
  expect_error(recon(base_df, matrix(1:4, 2, 2)))
})

test_that("invalid criterion raises error", {
  expect_error(recon(base_df, base_df, criterion = -1))
  expect_error(recon(base_df, base_df, criterion = "big"))
})

test_that("mismatched base_var / compare_var raises error", {
  expect_error(
    recon(base_df, base_df,
          base_var = c("age","name"), compare_var = "age")
  )
})

test_that("missing ID variable raises error", {
  expect_error(recon(base_df, base_df, id = "nonexistent"))
})

# ── 20. noequal suppresses equal vars in output ───────────────────────────────
test_that("noequal = TRUE suppresses equal-variable output", {
  comp <- base_df; comp$age[1] <- 99L
  expect_output(recon(base_df, comp, id = "id", noequal = TRUE), "suppressed")
})

# ── 21. brief mode ────────────────────────────────────────────────────────────
test_that("brief = TRUE skips Section 3 output", {
  comp <- base_df; comp$age[1] <- 99L
  out  <- capture.output(recon(base_df, comp, id = "id", brief = TRUE))
  expect_false(any(grepl("SECTION 3", out)))
  expect_true(any(grepl("SECTION 4", out)))
})

# ── 22. out writes to file ────────────────────────────────────────────────────
test_that("out argument writes report to file", {
  tmp  <- tempfile(fileext = ".txt")
  comp <- base_df; comp$age[1] <- 99L
  recon(base_df, comp, id = "id", out = tmp)
  expect_true(file.exists(tmp))
  lines <- readLines(tmp)
  expect_true(any(grepl("rDataRecon", lines)))
  unlink(tmp)
})

# ── 23. var argument restricts comparison ─────────────────────────────────────
test_that("var argument restricts which variables are compared", {
  comp <- base_df; comp$age[1] <- 99L; comp$name[2] <- "bob"
  r <- recon(base_df, comp, id = "id", var = "age")
  expect_equal(r$summary$vars_compared, 1L)
  expect_equal(r$summary$total_value_diffs, 1L)
})