test_that("compare_tables detects identical tables", {
  x <- data.frame(id = 1:3, value = c(10, 20, 30))
  result <- compare_tables(x, x)

  expect_s3_class(result, "compare_tbl")
  expect_equal(result$nrow_x, 3L)
  expect_equal(result$nrow_y, 3L)
  expect_equal(length(result$only_x), 0L)
  expect_equal(length(result$only_y), 0L)
})

test_that("compare_tables detects column differences", {
  x <- data.frame(id = 1:3, value = 1:3, extra_x = 4:6)
  y <- data.frame(id = 1:3, value = 1:3, extra_y = 7:9)
  result <- compare_tables(x, y)

  expect_equal(result$only_x, "extra_x")
  expect_equal(result$only_y, "extra_y")
  expect_true("id" %in% result$common_columns)
  expect_true("value" %in% result$common_columns)
})

test_that("compare_tables detects row count differences", {
  x <- data.frame(id = 1:5, value = 1:5)
  y <- data.frame(id = 1:3, value = 1:3)
  result <- compare_tables(x, y)

  expect_equal(result$nrow_x, 5L)
  expect_equal(result$nrow_y, 3L)
})

test_that("compare_tables detects type mismatches", {
  x <- data.frame(id = 1:3, mixed = c(1L, 2L, 3L))
  y <- data.frame(id = 1:3, mixed = c("a", "b", "c"), stringsAsFactors = FALSE)
  result <- compare_tables(x, y)

  expect_false(is.null(result$type_mismatches))
  expect_equal(nrow(result$type_mismatches), 1L)
  expect_equal(result$type_mismatches$column, "mixed")
})

test_that("compare_tables auto-detects keys", {
  x <- data.frame(id = c(1L, 2L, 3L), name = c("a", "b", "c"),
                   value = c(10.0, 20.0, 30.0), stringsAsFactors = FALSE)
  y <- data.frame(id = c(1L, 2L, 3L), name = c("a", "b", "c"),
                   value = c(10.1, 20.0, 30.5), stringsAsFactors = FALSE)
  result <- compare_tables(x, y)

  expect_false(is.null(result$key_summary))
  expect_true(result$key_summary$auto)
  expect_true("id" %in% result$key_summary$keys)
  expect_true("name" %in% result$key_summary$keys)
})

test_that("compare_tables works with explicit key_cols", {
  x <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = 1:3, value = c(10.1, 20.0, 30.5))
  result <- compare_tables(x, y, key_cols = "id")

  expect_false(is.null(result$key_summary))
  expect_false(result$key_summary$auto)
  expect_equal(result$key_summary$keys, "id")
})

test_that("compare_tables computes numeric discrepancies with keys", {
  x <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = 1:3, value = c(10.5, 20.0, 30.5))
  result <- compare_tables(x, y, key_cols = "id")

  expect_false(is.null(result$numeric_summary))
  expect_equal(result$comparison_method, "keys")
  expect_equal(result$rows_matched, 3L)
  expect_equal(result$numeric_summary$column, "value")
})

test_that("compare_tables computes numeric discrepancies by row index", {
  x <- data.frame(value = c(10.0, 20.0, 30.0))
  y <- data.frame(value = c(10.5, 20.0, 30.5))
  result <- compare_tables(x, y)

  expect_equal(result$comparison_method, "row_index")
  expect_false(is.null(result$numeric_summary))
})

test_that("compare_tables errors with no common columns", {
  x <- data.frame(a = 1:3)
  y <- data.frame(b = 1:3)
  expect_error(compare_tables(x, y), "No matching column names")
})

test_that("compare_tables errors with invalid key_cols", {
  x <- data.frame(id = 1:3)
  y <- data.frame(id = 1:3)
  expect_error(compare_tables(x, y, key_cols = "nonexistent"), "not present")
})

test_that("compare_tables key overlap summary is correct", {
  x <- data.frame(id = c(1L, 2L, 3L), value = c(10, 20, 30))
  y <- data.frame(id = c(2L, 3L, 4L), value = c(40, 50, 60))
  result <- compare_tables(x, y, key_cols = "id")

  expect_equal(result$key_summary$matches, 2L)
  expect_equal(result$key_summary$only_x, 1L)
  expect_equal(result$key_summary$only_y, 1L)
})

test_that("print.compare_tbl produces output", {
  x <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = 1:3, value = c(10.1, 20.0, 30.5))
  result <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(result), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("Table Comparison", combined))
})

test_that("compare_tables works with tibbles", {
  skip_if_not_installed("dplyr")
  x <- dplyr::tibble(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- dplyr::tibble(id = 1:3, value = c(10.1, 20.0, 30.5))
  result <- compare_tables(x, y, key_cols = "id")

  expect_s3_class(result, "compare_tbl")
  expect_false(is.null(result$numeric_summary))
})

test_that("compare_tables handles no numeric columns", {
  x <- data.frame(id = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = c("a", "b", "d"), stringsAsFactors = FALSE)
  result <- compare_tables(x, y)

  expect_true(is.na(result$comparison_method))
  expect_null(result$numeric_summary)
})

test_that("compare_tables warns when key columns are non-unique", {
  x <- data.frame(id = c(1L, 1L, 2L), value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = c(1L, 2L), value = c(10.1, 30.5))

  expect_warning(
    compare_tables(x, y, key_cols = "id"),
    "not unique"
  )
})

# --- tol, top_n, discrepancies, match_summary tests ---

test_that("tol = 0 is backward compatible", {
  x <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = 1:3, value = c(10.5, 20.0, 30.5))
  result <- compare_tables(x, y, key_cols = "id")

  expect_equal(result$tol, .Machine$double.eps)
  expect_equal(result$top_n, Inf)
  expect_false(is.null(result$match_summary))
  expect_false(is.null(result$discrepancies))
  expect_equal(result$numeric_summary$n_over_tol, 2L)
})

test_that("tol filters discrepancies correctly", {
  x <- data.frame(id = 1:4, value = c(10.0, 20.0, 30.0, 40.0))
  y <- data.frame(id = 1:4, value = c(10.1, 20.0, 30.5, 40.2))
  result <- compare_tables(x, y, key_cols = "id", tol = 0.15)

  # Only id=3 (diff=0.5) and id=4 (diff=0.2) exceed tol=0.15
  expect_equal(nrow(result$discrepancies), 2L)
  expect_true(all(result$discrepancies$abs_diff > 0.15))
  expect_equal(result$numeric_summary$n_over_tol, 2L)
})

test_that("tol classifies matched rows correctly", {
  x <- data.frame(id = 1:4, value = c(10.0, 20.0, 30.0, 40.0))
  y <- data.frame(id = 1:4, value = c(10.1, 20.0, 30.5, 40.0))
  result <- compare_tables(x, y, key_cols = "id", tol = 0.15)

  ms <- result$match_summary
  # id=1 (diff=0.1 <= 0.15): no disc; id=2,4 (diff=0): no disc; id=3 (diff=0.5): disc
  expect_equal(ms$matched_no_disc, 3L)
  expect_equal(ms$matched_with_disc, 1L)
  expect_equal(ms$pct_no_disc, 0.75)
  expect_equal(ms$pct_with_disc, 0.25)
})

test_that("top_n truncates discrepancies per column", {
  x <- data.frame(id = 1:5, value = c(10, 20, 30, 40, 50))
  y <- data.frame(id = 1:5, value = c(11, 22, 33, 44, 55))
  result <- compare_tables(x, y, key_cols = "id", top_n = 2)

  # All 5 rows differ, but only top 2 per column stored
  expect_equal(nrow(result$discrepancies), 2L)
  # Should be the largest diffs (id=5: diff=5, id=4: diff=4)
  expect_equal(result$discrepancies$abs_diff[1], 5)
  expect_equal(result$discrepancies$abs_diff[2], 4)
})

test_that("top_n = Inf returns all discrepancies", {
  x <- data.frame(id = 1:5, value = c(10, 20, 30, 40, 50))
  y <- data.frame(id = 1:5, value = c(11, 22, 33, 44, 55))
  result <- compare_tables(x, y, key_cols = "id", top_n = Inf)

  expect_equal(nrow(result$discrepancies), 5L)
})

test_that("discrepancies data.frame has correct structure with keys", {
  x <- data.frame(id = c("a", "b", "c"), date = c("2024-01-01", "2024-01-02", "2024-01-03"),
                   value = c(10.0, 20.0, 30.0), stringsAsFactors = FALSE)
  y <- data.frame(id = c("a", "b", "c"), date = c("2024-01-01", "2024-01-02", "2024-01-03"),
                   value = c(10.5, 20.0, 30.5), stringsAsFactors = FALSE)
  result <- compare_tables(x, y, key_cols = c("id", "date"))

  disc <- result$discrepancies
  expect_true("id" %in% names(disc))
  expect_true("date" %in% names(disc))
  expect_true("column" %in% names(disc))
  expect_true("value_x" %in% names(disc))
  expect_true("value_y" %in% names(disc))
  expect_true("abs_diff" %in% names(disc))
})

test_that("discrepancies use row_index in row-index mode", {
  x <- data.frame(value = c(10.0, 20.0, 30.0))
  y <- data.frame(value = c(10.5, 20.0, 30.5))
  result <- compare_tables(x, y)

  disc <- result$discrepancies
  expect_true("row_index" %in% names(disc))
  expect_false("id" %in% names(disc))
})

test_that("only_x_keys and only_y_keys data.frames are stored", {
  x <- data.frame(id = c(1L, 2L, 3L), value = c(10, 20, 30))
  y <- data.frame(id = c(2L, 3L, 4L), value = c(40, 50, 60))
  result <- compare_tables(x, y, key_cols = "id")

  expect_false(is.null(result$only_x_keys))
  expect_equal(nrow(result$only_x_keys), 1L)
  expect_equal(result$only_x_keys$id, 1L)

  expect_false(is.null(result$only_y_keys))
  expect_equal(nrow(result$only_y_keys), 1L)
  expect_equal(result$only_y_keys$id, 4L)
})

test_that("top_n truncates unmatched key data.frames", {
  x <- data.frame(id = 1:10, value = 1:10)
  y <- data.frame(id = 6:15, value = 6:15)
  result <- compare_tables(x, y, key_cols = "id", top_n = 3)

  # x has ids 1-5 unmatched, but top_n = 3
  expect_equal(nrow(result$only_x_keys), 3L)
  # y has ids 11-15 unmatched, but top_n = 3
  expect_equal(nrow(result$only_y_keys), 3L)
  # key_summary still has full counts
  expect_equal(result$key_summary$only_x, 5L)
  expect_equal(result$key_summary$only_y, 5L)
})

test_that("match_summary counts are correct with key overlap", {
  x <- data.frame(id = c(1L, 2L, 3L, 4L), value = c(10, 20, 30, 40))
  y <- data.frame(id = c(2L, 3L, 5L), value = c(25, 30, 50))
  result <- compare_tables(x, y, key_cols = "id")

  ms <- result$match_summary
  expect_equal(ms$only_x, 2L)    # ids 1, 4

  expect_equal(ms$only_y, 1L)    # id 5
  # ids 2,3 match; id=2 differs (20 vs 25), id=3 same (30 vs 30)
  expect_equal(ms$matched_no_disc, 1L)
  expect_equal(ms$matched_with_disc, 1L)
  expect_equal(ms$pct_no_disc, 0.5)
  expect_equal(ms$pct_with_disc, 0.5)
})

test_that("match_summary in row-index mode", {
  x <- data.frame(value = c(10.0, 20.0, 30.0, 40.0, 50.0))
  y <- data.frame(value = c(10.0, 20.0, 30.0))
  result <- compare_tables(x, y)

  ms <- result$match_summary
  expect_equal(ms$only_x, 2L)
  expect_equal(ms$only_y, 0L)
  expect_equal(ms$matched_no_disc, 3L)
  expect_equal(ms$matched_with_disc, 0L)
})

test_that("no discrepancies returns NULL discrepancies", {
  x <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  result <- compare_tables(x, y, key_cols = "id")

  expect_null(result$discrepancies)
  expect_equal(result$match_summary$matched_no_disc, 3L)
  expect_equal(result$match_summary$matched_with_disc, 0L)
})

test_that("match_summary works with no numeric columns", {
  x <- data.frame(id = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = c("a", "b", "d"), stringsAsFactors = FALSE)
  result <- compare_tables(x, y)

  ms <- result$match_summary
  expect_equal(ms$only_x, 1L)
  expect_equal(ms$only_y, 1L)
  expect_equal(ms$matched_no_disc, 2L)
  expect_equal(ms$matched_with_disc, 0L)
})

test_that("print.compare_tbl shows new sections", {
  x <- data.frame(id = c(1L, 2L, 3L, 4L), value = c(10.0, 20.0, 30.0, 40.0))
  y <- data.frame(id = c(2L, 3L, 5L), value = c(25.0, 30.0, 50.0))
  result <- compare_tables(x, y, key_cols = "id", tol = 0.5)

  output <- capture.output(print(result), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("Row matching", combined))
  expect_true(grepl("tol = 0.5", combined))
  expect_true(grepl("Unmatched keys", combined))
  expect_true(grepl("Top discrepancies", combined))
  expect_true(grepl(">tol", combined))
})

test_that("print.compare_tbl respects show_n", {
  x <- data.frame(id = 1:10, value = as.numeric(1:10))
  y <- data.frame(id = 1:10, value = as.numeric(1:10) + 1)
  result <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(result, show_n = 2), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("and 8 more", combined))
})

test_that("tol validation rejects invalid values", {
  x <- data.frame(id = 1:3, value = 1:3)
  y <- data.frame(id = 1:3, value = 1:3)
  expect_error(compare_tables(x, y, tol = -1), "non-negative")
  expect_error(compare_tables(x, y, tol = "abc"), "non-negative")
  expect_error(compare_tables(x, y, tol = c(0, 1)), "non-negative")
})

test_that("top_n validation rejects invalid values", {
  x <- data.frame(id = 1:3, value = 1:3)
  y <- data.frame(id = 1:3, value = 1:3)
  expect_error(compare_tables(x, y, top_n = 0), "positive")
  expect_error(compare_tables(x, y, top_n = -1), "positive")
  expect_error(compare_tables(x, y, top_n = "abc"), "positive")
})

test_that("NA-vs-value is treated as discrepancy with abs_diff = NA (keys mode)", {
  x <- data.frame(id = 1:3, value = c(10.0, NA, 30.0))
  y <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  result <- compare_tables(x, y, key_cols = "id")

  # id=2: NA vs 20.0 should be a discrepancy

  expect_false(is.null(result$discrepancies))
  expect_equal(nrow(result$discrepancies), 1L)
  expect_equal(result$discrepancies$id, 2L)
  expect_true(is.na(result$discrepancies$abs_diff))
  expect_true(is.na(result$discrepancies$value_x))
  expect_equal(result$discrepancies$value_y, 20.0)

  # match_summary: 2 no-disc (id=1,3), 1 with-disc (id=2)
  expect_equal(result$match_summary$matched_no_disc, 2L)
  expect_equal(result$match_summary$matched_with_disc, 1L)
})

test_that("NA-vs-value is treated as discrepancy with abs_diff = NA (row-index mode)", {
  x <- data.frame(value = c(10.0, NA, 30.0))
  y <- data.frame(value = c(10.0, 20.0, 30.0))
  result <- compare_tables(x, y)

  expect_false(is.null(result$discrepancies))
  expect_equal(nrow(result$discrepancies), 1L)
  expect_equal(result$discrepancies$row_index, 2L)
  expect_true(is.na(result$discrepancies$abs_diff))
  expect_equal(result$match_summary$matched_with_disc, 1L)
})

test_that("both-NA is NOT treated as discrepancy", {
  x <- data.frame(id = 1:3, value = c(10.0, NA, 30.0))
  y <- data.frame(id = 1:3, value = c(10.0, NA, 30.0))
  result <- compare_tables(x, y, key_cols = "id")

  expect_null(result$discrepancies)
  expect_equal(result$match_summary$matched_no_disc, 3L)
  expect_equal(result$match_summary$matched_with_disc, 0L)
})

test_that("n_over_tol includes NA-vs-value pairs", {
  x <- data.frame(id = 1:4, value = c(10.0, NA, 30.0, 40.0))
  y <- data.frame(id = 1:4, value = c(10.5, 20.0, 30.0, 40.0))
  result <- compare_tables(x, y, key_cols = "id")

  # id=1 (diff=0.5 > eps): over tol; id=2 (NA vs 20): NA mismatch
  # n_over_tol should count both
  expect_equal(result$numeric_summary$n_over_tol, 2L)
})

test_that("NA discrepancies are sorted to the end", {
  x <- data.frame(id = 1:4, value = c(10.0, NA, 30.0, 40.0))
  y <- data.frame(id = 1:4, value = c(15.0, 20.0, 30.0, 42.0))
  result <- compare_tables(x, y, key_cols = "id")

  disc <- result$discrepancies
  # id=1 (diff=5), id=4 (diff=2) should come before id=2 (NA), id=3 matches
  expect_equal(nrow(disc), 3L)
  expect_false(is.na(disc$abs_diff[1]))  # largest finite diff first
  expect_false(is.na(disc$abs_diff[2]))  # second finite diff
  expect_true(is.na(disc$abs_diff[3]))   # NA at end
})

test_that("multiple numeric columns produce correct discrepancies", {
  x <- data.frame(id = 1:3, val_a = c(10.0, 20.0, 30.0), val_b = c(1.0, 2.0, 3.0))
  y <- data.frame(id = 1:3, val_a = c(10.5, 20.0, 30.5), val_b = c(1.0, 2.5, 3.0))
  result <- compare_tables(x, y, key_cols = "id")

  disc <- result$discrepancies
  # val_a: id=1 (0.5), id=3 (0.5); val_b: id=2 (0.5)
  expect_equal(nrow(disc), 3L)
  expect_true("val_a" %in% disc$column)
  expect_true("val_b" %in% disc$column)
  expect_equal(sum(disc$column == "val_a"), 2L)
  expect_equal(sum(disc$column == "val_b"), 1L)
})

test_that("top_n applies per-column with multiple numeric columns", {
  x <- data.frame(id = 1:5, val_a = as.numeric(1:5), val_b = as.numeric(1:5))
  y <- data.frame(id = 1:5, val_a = as.numeric(1:5) + 1, val_b = as.numeric(1:5) + 1)
  result <- compare_tables(x, y, key_cols = "id", top_n = 2)

  disc <- result$discrepancies
  # top_n = 2 per column, 2 columns => up to 4 rows
  expect_equal(sum(disc$column == "val_a"), 2L)
  expect_equal(sum(disc$column == "val_b"), 2L)
})

test_that("tol works correctly in row-index mode", {
  x <- data.frame(value = c(10.0, 20.0, 30.0, 40.0))
  y <- data.frame(value = c(10.1, 20.0, 30.5, 40.2))
  result <- compare_tables(x, y, tol = 0.15)

  # Only row 3 (diff=0.5) and row 4 (diff=0.2) exceed tol=0.15
  expect_equal(nrow(result$discrepancies), 2L)
  expect_true(all(result$discrepancies$abs_diff > 0.15))
  expect_equal(result$match_summary$matched_no_disc, 2L)
  expect_equal(result$match_summary$matched_with_disc, 2L)
  expect_equal(result$numeric_summary$n_over_tol, 2L)
})

test_that("only_x_keys and only_y_keys in row-index mode", {
  x <- data.frame(value = c(10.0, 20.0, 30.0, 40.0, 50.0))
  y <- data.frame(value = c(10.0, 20.0, 30.0))
  result <- compare_tables(x, y)

  expect_false(is.null(result$only_x_keys))
  expect_equal(nrow(result$only_x_keys), 2L)
  expect_equal(result$only_x_keys$row_index, c(4L, 5L))
  expect_null(result$only_y_keys)
})

test_that("print with no discrepancies omits Top discrepancies section", {
  x <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  result <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(result), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_false(grepl("Top discrepancies", combined))
  expect_true(grepl("Row matching", combined))
})

test_that("print section numbering is sequential even without match_summary", {
  x <- data.frame(value = c(10.0, 20.0, 30.0))
  y <- data.frame(value = c(10.0, 20.0, 30.0))
  result <- compare_tables(x, y)

  # No match_summary (all identical, row-index mode has match_summary)
  # but let's test a case with no match_summary: identical tables with no keys
  # Actually row-index mode now always has match_summary when num_cols > 0.
  # Test the no-numeric-no-key case:
  x2 <- data.frame(a = c(1L, 2L, 3L))
  y2 <- data.frame(a = c(1L, 2L, 3L))
  result2 <- compare_tables(x2, y2)

  output <- capture.output(print(result2), type = "message")
  combined <- paste(output, collapse = "\n")
  # Should have sections 1, 2, 3, 4 (row matching), 5 (numeric) — or skip 4 if no match_summary
  # With keys auto-detected (integer col "a"), match_summary exists
  expect_true(grepl("1\\. Row counts", combined))
  expect_true(grepl("2\\. Column names", combined))
  expect_true(grepl("3\\. Key columns", combined))
})

test_that("print.compare_tbl suppresses tol label at default eps", {
  x <- data.frame(id = 1:3, value = c(10.0, 20.0, 30.0))
  y <- data.frame(id = 1:3, value = c(10.5, 20.0, 30.5))
  result <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(result), type = "message")
  combined <- paste(output, collapse = "\n")
  # Default eps should NOT show tol label
  expect_false(grepl("tol =", combined))
})

test_that("print.compare_tbl shows top_n cap in remaining message", {
  x <- data.frame(id = 1:10, value = 1:10)
  y <- data.frame(id = 6:15, value = 6:15)
  result <- compare_tables(x, y, key_cols = "id", top_n = 3)

  output <- capture.output(print(result, show_n = 2), type = "message")
  combined <- paste(output, collapse = "\n")
  # top_n = 3 < total unmatched (5), so should mention "stored"
  expect_true(grepl("3 stored", combined))
})

test_that("print.compare_tbl validates show_n", {
  x <- data.frame(id = 1:3, value = 1:3)
  y <- data.frame(id = 1:3, value = 1:3)
  result <- compare_tables(x, y, key_cols = "id")
  expect_error(print(result, show_n = -1), "non-negative")
  expect_error(print(result, show_n = "abc"), "non-negative")
})

test_that("zero-row tables are handled gracefully", {
  x <- data.frame(id = integer(0), value = numeric(0))
  y <- data.frame(id = integer(0), value = numeric(0))
  result <- compare_tables(x, y, key_cols = "id")

  expect_equal(result$nrow_x, 0L)
  expect_equal(result$nrow_y, 0L)
  expect_equal(result$key_summary$matches, 0L)
  expect_null(result$discrepancies)
  expect_null(result$numeric_summary)
})

test_that("print handles NA values in discrepancies display", {
  x <- data.frame(id = 1:3, value = c(10.0, NA, 30.0))
  y <- data.frame(id = 1:3, value = c(15.0, 20.0, 30.0))
  result <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(result), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("NA", combined))
  expect_true(grepl("Top discrepancies", combined))
})

# -- compare_cols / exclude_cols ----------------------------------------------

test_that("compare_cols selects only specified columns", {
  x <- data.frame(id = 1:3, a = c(1, 2, 3), b = c(4, 5, 6))
  y <- data.frame(id = 1:3, a = c(1, 2, 4), b = c(4, 5, 7))
  r <- compare_tables(x, y, key_cols = "id", compare_cols = "a")
  expect_equal(r$discrepancies$column, "a")
})

test_that("exclude_cols removes specified columns from comparison", {
  x <- data.frame(id = 1:3, a = c(1, 2, 3), b = c(4, 5, 6))
  y <- data.frame(id = 1:3, a = c(1, 2, 4), b = c(4, 5, 7))
  r <- compare_tables(x, y, key_cols = "id", exclude_cols = "a")
  expect_equal(r$discrepancies$column, "b")
})

test_that("compare_cols and exclude_cols are mutually exclusive", {
  x <- data.frame(id = 1:3, a = 1:3)
  y <- data.frame(id = 1:3, a = 1:3)
  expect_error(
    compare_tables(x, y, compare_cols = "a", exclude_cols = "a"),
    "cannot both be specified"
  )
})

test_that("compare_cols errors on unknown column names", {
  x <- data.frame(id = 1:3, a = 1:3)
  y <- data.frame(id = 1:3, a = 1:3)
  expect_error(
    compare_tables(x, y, key_cols = "id", compare_cols = "nonexistent"),
    "Not found"
  )
})

test_that("exclude_cols warns on nonexistent columns", {
  x <- data.frame(id = 1:3, a = 1:3)
  y <- data.frame(id = 1:3, a = 1:3)
  expect_warning(
    compare_tables(x, y, key_cols = "id", exclude_cols = "nonexistent"),
    "not found among common non-key columns"
  )
})

test_that("compare_cols silently excludes key columns", {
  x <- data.frame(id = 1:3, a = c(1, 2, 3))
  y <- data.frame(id = 1:3, a = c(1, 2, 4))
  # "id" is a key so asking for it in compare_cols is a non-key column error
  expect_error(
    compare_tables(x, y, key_cols = "id", compare_cols = c("id", "a")),
    "Not found"
  )
})

# -- Categorical discrepancy detection ---------------------------------------

test_that("character column discrepancies detected in keys mode", {
  x <- data.frame(id = 1:4, cat = c("a", "b", "c", "d"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:4, cat = c("a", "x", "c", "z"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")

  expect_false(is.null(r$categorical_summary))
  expect_equal(r$categorical_summary$column, "cat")
  expect_equal(r$categorical_summary$n_compared, 4L)
  expect_equal(r$categorical_summary$n_mismatched, 2L)
  expect_equal(r$categorical_summary$pct_mismatched, 0.5)
  expect_equal(r$categorical_summary$n_na_mismatch, 0L)

  expect_false(is.null(r$categorical_discrepancies))
  expect_equal(nrow(r$categorical_discrepancies), 2L)
  expect_true(all(c("id", "column", "value_x", "value_y") %in% names(r$categorical_discrepancies)))
})

test_that("character column discrepancies detected in row-index mode", {
  x <- data.frame(val = c(1, 2), cat = c("a", "b"), stringsAsFactors = FALSE)
  y <- data.frame(val = c(1, 3), cat = c("a", "x"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = character(0))

  expect_false(is.null(r$categorical_discrepancies))
  expect_equal(nrow(r$categorical_discrepancies), 1L)
  expect_equal(r$categorical_discrepancies$row_index, 2L)
  expect_equal(r$categorical_discrepancies$value_x, "b")
  expect_equal(r$categorical_discrepancies$value_y, "x")
})

test_that("factor columns compared as character values", {
  x <- data.frame(id = 1:3, cat = factor(c("a", "b", "c")))
  y <- data.frame(id = 1:3, cat = factor(c("a", "x", "c")))
  r <- compare_tables(x, y, key_cols = "id")

  expect_false(is.null(r$categorical_discrepancies))
  expect_equal(nrow(r$categorical_discrepancies), 1L)
  expect_equal(r$categorical_discrepancies$value_x, "b")
  expect_equal(r$categorical_discrepancies$value_y, "x")
})

test_that("logical column discrepancies detected", {
  x <- data.frame(id = 1:3, flag = c(TRUE, FALSE, TRUE))
  y <- data.frame(id = 1:3, flag = c(TRUE, TRUE, TRUE))
  r <- compare_tables(x, y, key_cols = "id")

  expect_equal(r$categorical_summary$n_mismatched, 1L)
  expect_equal(nrow(r$categorical_discrepancies), 1L)
})

test_that("Date column discrepancies detected", {
  x <- data.frame(id = 1:3, dt = as.Date(c("2024-01-01", "2024-02-01", "2024-03-01")))
  y <- data.frame(id = 1:3, dt = as.Date(c("2024-01-01", "2024-02-15", "2024-03-01")))
  r <- compare_tables(x, y, key_cols = "id")

  expect_equal(r$categorical_summary$n_mismatched, 1L)
  expect_equal(nrow(r$categorical_discrepancies), 1L)
})

test_that("NA-vs-value is categorical discrepancy; both-NA is not", {
  x <- data.frame(id = 1:4, cat = c("a", NA, "c", NA), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:4, cat = c("a", "b", NA, NA), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")

  expect_equal(r$categorical_summary$n_mismatched, 2L)
  expect_equal(r$categorical_summary$n_na_mismatch, 2L)
  expect_equal(nrow(r$categorical_discrepancies), 2L)
})

test_that("top_n applies per-column for categorical discrepancies", {
  x <- data.frame(id = 1:5,
                   a = c("a", "b", "c", "d", "e"),
                   b = c("x", "y", "z", "w", "v"),
                   stringsAsFactors = FALSE)
  y <- data.frame(id = 1:5,
                   a = c("A", "B", "C", "D", "E"),
                   b = c("X", "Y", "Z", "W", "V"),
                   stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id", top_n = 2)

  # 2 per column = 4 total stored
  expect_equal(nrow(r$categorical_discrepancies), 4L)
  expect_equal(sum(r$categorical_discrepancies$column == "a"), 2L)
  expect_equal(sum(r$categorical_discrepancies$column == "b"), 2L)
})

test_that("match_summary counts rows with either numeric or categorical discrepancies", {
  x <- data.frame(id = 1:4, val = c(1, 2, 3, 4),
                   cat = c("a", "b", "c", "d"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:4, val = c(1, 2.5, 3, 4),
                   cat = c("a", "b", "x", "d"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")

  # Row 2: numeric disc only, Row 3: categorical disc only
  expect_equal(r$match_summary$matched_with_disc, 2L)
  expect_equal(r$match_summary$matched_no_disc, 2L)
})

test_that("type-mismatched columns excluded from categorical comparison", {
  x <- data.frame(id = 1:3, cat = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:3, cat = c(1L, 2L, 3L))
  r <- compare_tables(x, y, key_cols = "id")

  expect_null(r$categorical_summary)
  expect_null(r$categorical_discrepancies)
  expect_false(is.null(r$type_mismatches))
})

# -- pct_diff -----------------------------------------------------------------

test_that("pct_diff column present in numeric discrepancies", {
  x <- data.frame(id = 1:3, value = c(10, 20, 30))
  y <- data.frame(id = 1:3, value = c(10, 25, 30))
  r <- compare_tables(x, y, key_cols = "id")

  expect_true("pct_diff" %in% names(r$discrepancies))
  # 5/25 = 0.2
  expect_equal(r$discrepancies$pct_diff, 0.2)
})

test_that("pct_diff handles both-zero case", {
  x <- data.frame(id = 1:2, value = c(0, 10))
  y <- data.frame(id = 1:2, value = c(0, 15))
  r <- compare_tables(x, y, key_cols = "id")

  # Only row 2 has a discrepancy (both-zero diff = 0, not > tol)
  expect_equal(nrow(r$discrepancies), 1L)
  # 5/15 = 0.333...
  expect_equal(r$discrepancies$pct_diff, 5 / 15, tolerance = 1e-10)
})

test_that("pct_diff handles one-zero case", {
  x <- data.frame(id = 1L, value = 0)
  y <- data.frame(id = 1L, value = 5)
  r <- compare_tables(x, y, key_cols = "id")

  # 5/max(0,5) = 1.0
  expect_equal(r$discrepancies$pct_diff, 1.0)
})

test_that("pct_diff is NA when value is NA", {
  x <- data.frame(id = 1L, value = NA_real_)
  y <- data.frame(id = 1L, value = 5)
  r <- compare_tables(x, y, key_cols = "id")

  expect_true(is.na(r$discrepancies$pct_diff))
})

# -- total_discrepancies ------------------------------------------------------

test_that("total_discrepancies counts numeric discrepancies only", {
  x <- data.frame(id = 1:3, value = c(10, 20, 30))
  y <- data.frame(id = 1:3, value = c(10, 25, 35))
  r <- compare_tables(x, y, key_cols = "id")

  expect_equal(r$total_discrepancies, 2L)
})

test_that("total_discrepancies counts both numeric and categorical", {
  x <- data.frame(id = 1:3, val = c(1, 2, 3),
                   cat = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:3, val = c(1, 2.5, 3),
                   cat = c("a", "x", "c"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")

  # 1 numeric + 1 categorical = 2

  expect_equal(r$total_discrepancies, 2L)
})

test_that("total_discrepancies is not limited by top_n", {
  x <- data.frame(id = 1:5, val = c(1, 2, 3, 4, 5))
  y <- data.frame(id = 1:5, val = c(2, 3, 4, 5, 6))
  r <- compare_tables(x, y, key_cols = "id", top_n = 2)

  # All 5 rows differ but only 2 stored
  expect_equal(nrow(r$discrepancies), 2L)
  expect_equal(r$total_discrepancies, 5L)
})

# -- as.data.frame.compare_tbl -----------------------------------------------

test_that("as.data.frame returns correct columns for mixed discrepancies", {
  x <- data.frame(id = 1:3, val = c(1, 2, 3),
                   cat = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:3, val = c(1, 2.5, 3),
                   cat = c("a", "x", "c"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")
  df <- as.data.frame(r)

  expect_s3_class(df, "data.frame")
  expect_true(all(c("id", "column", "type", "value_x", "value_y",
                     "abs_diff", "pct_diff") %in% names(df)))
  expect_equal(nrow(df), 2L)
  expect_equal(sum(df$type == "numeric"), 1L)
  expect_equal(sum(df$type == "categorical"), 1L)
})

test_that("as.data.frame works with numeric-only discrepancies", {
  x <- data.frame(id = 1:3, val = c(1, 2, 3))
  y <- data.frame(id = 1:3, val = c(1, 2.5, 3))
  r <- compare_tables(x, y, key_cols = "id")
  df <- as.data.frame(r)

  expect_equal(nrow(df), 1L)
  expect_equal(df$type, "numeric")
})

test_that("as.data.frame works with categorical-only discrepancies", {
  x <- data.frame(id = 1:3, cat = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:3, cat = c("a", "x", "c"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")
  df <- as.data.frame(r)

  expect_equal(nrow(df), 1L)
  expect_equal(df$type, "categorical")
  expect_true(is.na(df$abs_diff))
  expect_true(is.na(df$pct_diff))
})

test_that("as.data.frame returns 0-row data.frame with no discrepancies", {
  x <- data.frame(id = 1:3, val = c(1, 2, 3))
  y <- data.frame(id = 1:3, val = c(1, 2, 3))
  r <- compare_tables(x, y, key_cols = "id")
  df <- as.data.frame(r)

  expect_equal(nrow(df), 0L)
  expect_true("column" %in% names(df))
  expect_true("type" %in% names(df))
})

test_that("as.data.frame converts numeric values to character", {
  x <- data.frame(id = 1:2, val = c(1, 2))
  y <- data.frame(id = 1:2, val = c(1, 3))
  r <- compare_tables(x, y, key_cols = "id")
  df <- as.data.frame(r)

  expect_type(df$value_x, "character")
  expect_type(df$value_y, "character")
})

# -- Print method updates ----------------------------------------------------

test_that("print shows categorical discrepancies section", {
  x <- data.frame(id = 1:3, cat = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:3, cat = c("a", "x", "c"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(r), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("Categorical discrepancies", combined))
  expect_true(grepl("Mismatched", combined))
})

test_that("print shows pct_diff column in numeric discrepancies", {
  x <- data.frame(id = 1:3, value = c(10, 20, 30))
  y <- data.frame(id = 1:3, value = c(10, 25, 30))
  r <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(r), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("pct_diff", combined))
})

test_that("print shows total cell discrepancies line", {
  x <- data.frame(id = 1:3, val = c(1, 2, 3),
                   cat = c("a", "b", "c"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:3, val = c(1, 2.5, 3),
                   cat = c("a", "x", "c"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(r), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("Total cell discrepancies", combined))
  expect_true(grepl("numeric", combined))
  expect_true(grepl("categorical", combined))
})

test_that("print handles show_n for categorical discrepancies", {
  x <- data.frame(id = 1:5, cat = c("a", "b", "c", "d", "e"), stringsAsFactors = FALSE)
  y <- data.frame(id = 1:5, cat = c("A", "B", "C", "D", "E"), stringsAsFactors = FALSE)
  r <- compare_tables(x, y, key_cols = "id")

  output <- capture.output(print(r, show_n = 2L), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("and 3 more", combined))
})