test_that("validate_join detects one-to-one relationships", {
  dt1 <- data.table::data.table(id = 1:3, value = c("a", "b", "c"))
  dt2 <- data.table::data.table(id = 2:4, score = c(10, 20, 30))

  result <- validate_join(dt1, dt2, by = "id")

  expect_s3_class(result, "validate_join")
  expect_equal(result$relation, "one-to-one")
  expect_equal(result$counts$x_rows, 3)
  expect_equal(result$counts$y_rows, 3)
  expect_equal(result$counts$n_key_overlap, 2)  # ids 2 and 3
})

test_that("validate_join detects many-to-one relationships", {
  dt1 <- data.table::data.table(id = c(1, 1, 2, 3), value = letters[1:4])
  dt2 <- data.table::data.table(id = 1:3, score = c(10, 20, 30))

  result <- validate_join(dt1, dt2, by = "id")

  expect_equal(result$relation, "many-to-one")
  expect_true(result$duplicates$x_has_dups)
  expect_false(result$duplicates$y_has_dups)
})

test_that("validate_join detects one-to-many relationships", {
  dt1 <- data.table::data.table(id = 1:3, value = letters[1:3])
  dt2 <- data.table::data.table(id = c(1, 1, 2, 3), score = 10:13)

  result <- validate_join(dt1, dt2, by = "id")

  expect_equal(result$relation, "one-to-many")
  expect_false(result$duplicates$x_has_dups)
  expect_true(result$duplicates$y_has_dups)
})

test_that("validate_join detects many-to-many relationships", {
  dt1 <- data.table::data.table(id = c(1, 1, 2), value = letters[1:3])
  dt2 <- data.table::data.table(id = c(1, 2, 2), score = 10:12)

  result <- validate_join(dt1, dt2, by = "id")

  expect_equal(result$relation, "many-to-many")
  expect_true(result$duplicates$x_has_dups)
  expect_true(result$duplicates$y_has_dups)
})

test_that("validate_join handles no matches", {
  dt1 <- data.table::data.table(id = 1:3, value = letters[1:3])
  dt2 <- data.table::data.table(id = 4:6, score = 10:12)

  result <- validate_join(dt1, dt2, by = "id")

  expect_equal(result$relation, "no matches")
  expect_equal(result$counts$n_key_overlap, 0)
})

test_that("validate_join errors on missing key columns", {
  dt1 <- data.table::data.table(id = 1:3, value = letters[1:3])
  dt2 <- data.table::data.table(other_id = 1:3, score = 10:12)

  expect_error(validate_join(dt1, dt2, by = "id"), "missing key")
})

test_that("validate_join supports different key names", {
  dt1 <- data.table::data.table(id_x = 1:3, value = letters[1:3])
  dt2 <- data.table::data.table(id_y = 2:4, score = 10:12)

  result <- validate_join(dt1, dt2, by.x = "id_x", by.y = "id_y")

  expect_equal(result$by.x, "id_x")
  expect_equal(result$by.y, "id_y")
  expect_equal(result$relation, "one-to-one")
})

test_that("print.validate_join returns invisibly", {
  dt1 <- data.table::data.table(id = 1:3, value = letters[1:3])
  dt2 <- data.table::data.table(id = 2:4, score = 10:12)

  result <- validate_join(dt1, dt2, by = "id")

  expect_output(ret <- print(result), "Join Validation Summary")
  expect_identical(ret, result)
})

test_that("summary.validate_join returns summary_table", {
  dt1 <- data.table::data.table(id = 1:3, value = letters[1:3])
  dt2 <- data.table::data.table(id = 2:4, score = 10:12)

  result <- validate_join(dt1, dt2, by = "id")

  expect_output(tbl <- summary(result), "Join Validation Summary")
  expect_s3_class(tbl, "data.table")
})

# --- stat argument tests ---

test_that("stat computes totals for x when column only in x", {
  orders <- data.table::data.table(id = 1:4, revenue = c(100, 200, 300, 400))
  products <- data.table::data.table(id = 2:5, cost = c(10, 20, 30, 40))

  result <- validate_join(orders, products, by = "id", stat.x = "revenue")

  expect_false(is.null(result$stat))
  expect_equal(result$stat$stat_col_x, "revenue")
  expect_null(result$stat$stat_col_y)
  # Total revenue = 100+200+300+400 = 1000
  expect_equal(result$stat$x$total, 1000)
  # Matched keys: 2,3,4 -> 200+300+400 = 900
  expect_equal(result$stat$x$matched, 900)
  # Only in x: key 1 -> 100
  expect_equal(result$stat$x$only, 100)
  expect_equal(result$stat$x$rate, 90)
  expect_null(result$stat$y)
})

test_that("stat works via shorthand stat= for same column in both tables", {
  x <- data.table::data.table(id = 1:3, amount = c(10, 20, 30))
  y <- data.table::data.table(id = 2:4, amount = c(100, 200, 300))

  result <- validate_join(x, y, by = "id", stat = "amount")

  expect_true(!is.null(result$stat$x))
  expect_true(!is.null(result$stat$y))
  # x: total=60, matched (keys 2,3)=50, only (key 1)=10
  expect_equal(result$stat$x$total, 60)
  expect_equal(result$stat$x$matched, 50)
  expect_equal(result$stat$x$only, 10)
  # y: total=600, matched (keys 2,3)=300, only (key 4)=300
  expect_equal(result$stat$y$total, 600)
  expect_equal(result$stat$y$matched, 300)
  expect_equal(result$stat$y$only, 300)
})

test_that("stat.x and stat.y with different column names", {
  x <- data.table::data.table(id = 1:3, revenue = c(100, 200, 300))
  y <- data.table::data.table(id = 2:4, cost = c(10, 20, 30))

  result <- validate_join(x, y, by = "id", stat.x = "revenue", stat.y = "cost")

  expect_equal(result$stat$stat_col_x, "revenue")
  expect_equal(result$stat$stat_col_y, "cost")
  # x: matched keys 2,3 -> 200+300=500; only key 1 -> 100
  expect_equal(result$stat$x$matched, 500)
  expect_equal(result$stat$x$only, 100)
  # y: matched keys 2,3 -> 10+20=30; only key 4 -> 30
  expect_equal(result$stat$y$matched, 30)
  expect_equal(result$stat$y$only, 30)
})

test_that("stat.y only reports for y", {
  x <- data.table::data.table(id = 1:3, value = letters[1:3])
  y <- data.table::data.table(id = 2:4, revenue = c(100, 200, 300))

  result <- validate_join(x, y, by = "id", stat.y = "revenue")

  expect_null(result$stat$x)
  expect_true(!is.null(result$stat$y))
  # y: total=600, matched (keys 2,3)=300, only (key 4)=300
  expect_equal(result$stat$y$total, 600)
  expect_equal(result$stat$y$matched, 300)
  expect_equal(result$stat$y$only, 300)
})

test_that("stat handles NAs in stat column", {
  x <- data.table::data.table(id = 1:4, revenue = c(100, NA, 300, 400))
  y <- data.table::data.table(id = 2:5, cost = 10:13)

  result <- validate_join(x, y, by = "id", stat.x = "revenue")

  # Total (na.rm): 100+300+400 = 800
  expect_equal(result$stat$x$total, 800)
  expect_equal(result$stat$x$n_na, 1)
  # Matched keys: 2,3,4 -> NA+300+400 = 700
  expect_equal(result$stat$x$matched, 700)
  expect_equal(result$stat$x$only, 100)
})

test_that("stat handles no overlapping keys", {
  x <- data.table::data.table(id = 1:3, revenue = c(100, 200, 300))
  y <- data.table::data.table(id = 4:6, revenue = c(400, 500, 600))

  result <- validate_join(x, y, by = "id", stat = "revenue")

  expect_equal(result$stat$x$matched, 0)
  expect_equal(result$stat$x$only, 600)
  expect_equal(result$stat$y$matched, 0)
  expect_equal(result$stat$y$only, 1500)
})

test_that("stat handles many-to-many without double-counting", {
  x <- data.table::data.table(id = c(1L, 1L, 2L), revenue = c(10, 20, 30))
  y <- data.table::data.table(id = c(1L, 2L, 2L), revenue = c(100, 200, 300))

  result <- validate_join(x, y, by = "id", stat = "revenue")

  # x: total=60, all keys match -> matched=60, only=0
  expect_equal(result$stat$x$total, 60)
  expect_equal(result$stat$x$matched, 60)
  expect_equal(result$stat$x$only, 0)
  # y: total=600, all keys match -> matched=600, only=0
  expect_equal(result$stat$y$total, 600)
  expect_equal(result$stat$y$matched, 600)
  expect_equal(result$stat$y$only, 0)
})

test_that("stat works with different key names (by.x/by.y)", {
  x <- data.table::data.table(key_x = 1:3, revenue = c(100, 200, 300))
  y <- data.table::data.table(key_y = 2:4, revenue = c(10, 20, 30))

  result <- validate_join(x, y, by.x = "key_x", by.y = "key_y", stat = "revenue")

  # x: matched keys 2,3 -> 200+300=500; only key 1 -> 100
  expect_equal(result$stat$x$matched, 500)
  expect_equal(result$stat$x$only, 100)
  # y: matched keys 2,3 -> 10+20=30; only key 4 -> 30
  expect_equal(result$stat$y$matched, 30)
  expect_equal(result$stat$y$only, 30)
})

test_that("stat handles all-zero stat column", {
  x <- data.table::data.table(id = 1:3, revenue = c(0, 0, 0))
  y <- data.table::data.table(id = 2:4, score = 10:12)

  result <- validate_join(x, y, by = "id", stat.x = "revenue")

  expect_equal(result$stat$x$total, 0)
  expect_true(is.na(result$stat$x$rate))
})

test_that("stat=NULL returns no stat info (backward compat)", {
  x <- data.table::data.table(id = 1:3, value = 10:12)
  y <- data.table::data.table(id = 2:4, score = 20:22)

  result <- validate_join(x, y, by = "id")
  expect_null(result$stat)
})

test_that("stat errors when column not found", {
  x <- data.table::data.table(id = 1:3, value = 10:12)
  y <- data.table::data.table(id = 2:4, score = 20:22)

  expect_error(validate_join(x, y, by = "id", stat.x = "nonexistent"), "not found")
  expect_error(validate_join(x, y, by = "id", stat.y = "nonexistent"), "not found")
})

test_that("stat errors when column is not numeric", {
  x <- data.table::data.table(id = 1:3, name = c("a", "b", "c"))
  y <- data.table::data.table(id = 2:4, score = 20:22)

  expect_error(validate_join(x, y, by = "id", stat.x = "name"), "not numeric")
})

test_that("stat errors when stat is not a single string", {
  x <- data.table::data.table(id = 1:3, value = 10:12)
  y <- data.table::data.table(id = 2:4, score = 20:22)

  expect_error(
    validate_join(x, y, by = "id", stat.x = c("value", "score")),
    "single column name"
  )
})

test_that("stat and stat.x/stat.y conflict errors", {
  x <- data.table::data.table(id = 1:3, value = 10:12)
  y <- data.table::data.table(id = 2:4, value = 20:22)

  expect_error(
    validate_join(x, y, by = "id", stat = "value", stat.x = "value"),
    "not both"
  )
})

test_that("print shows stat section when stat is provided", {
  x <- data.table::data.table(id = 1:3, revenue = c(100, 200, 300))
  y <- data.table::data.table(id = 2:4, score = 10:12)

  result <- validate_join(x, y, by = "id", stat.x = "revenue")

  expect_output(print(result), "Stat: revenue")
  expect_output(print(result), "Total revenue in x")
  expect_output(print(result), "Matched revenue in x")
})

test_that("print omits stat section when no stat provided", {
  x <- data.table::data.table(id = 1:3, revenue = c(100, 200, 300))
  y <- data.table::data.table(id = 2:4, score = 10:12)

  result <- validate_join(x, y, by = "id")

  output <- capture.output(print(result))
  expect_false(any(grepl("Stat:", output)))
})

test_that("print shows both table names with different stat columns", {
  x <- data.table::data.table(id = 1:3, revenue = c(100, 200, 300))
  y <- data.table::data.table(id = 2:4, cost = c(10, 20, 30))

  result <- validate_join(x, y, by = "id", stat.x = "revenue", stat.y = "cost")

  expect_output(print(result), "revenue")
  expect_output(print(result), "cost")
  expect_output(print(result), "Total revenue in x")
  expect_output(print(result), "Total cost in y")
})