# ---------------------------------------------------------------------------
# Test data
# ---------------------------------------------------------------------------

orders <- data.frame(
  id     = c(1L, 2L, 3L, 4L),
  amount = c(100, 200, 300, 400),
  stringsAsFactors = FALSE
)

customers <- data.frame(
  id   = c(2L, 3L, 5L),
  name = c("Alice", "Bob", "Charlie"),
  stringsAsFactors = FALSE
)

# ---------------------------------------------------------------------------
# left_join_tap — basic with trail
# ---------------------------------------------------------------------------

test_that("left_join_tap produces correct join result", {
  trail <- audit_trail("lj_basic")
  result <- left_join_tap(orders, customers, by = "id",
                           .trail = trail, .label = "joined")

  expected <- dplyr::left_join(orders, customers, by = "id")
  expect_equal(result, expected)
})

test_that("left_join_tap records enriched snapshot in trail", {
  trail <- audit_trail("lj_enriched")
  result <- left_join_tap(orders, customers, by = "id",
                           .trail = trail, .label = "joined")

  expect_equal(length(trail$snapshots), 1L)
  expect_equal(trail$labels, "joined")

  snap <- trail$snapshots[[1]]
  expect_s3_class(snap, "audit_snap")
  expect_equal(snap$type, "join")
  expect_equal(snap$label, "joined")

  diag <- snap$diagnostics
  expect_equal(diag$join_type, "left_join")
  expect_equal(diag$relation, "one-to-one")
  expect_true(!is.null(diag$match_rate$x))
  expect_true(!is.null(diag$match_rate$y))
})

test_that("left_join_tap with stat tracks stat column", {
  # stat requires the column to exist in both tables
  x_stat <- data.frame(id = 1:4, amount = c(100, 200, 300, 400))
  y_stat <- data.frame(id = c(2L, 3L, 5L), amount = c(50, 75, 100))
  trail <- audit_trail("lj_stat")
  result <- left_join_tap(x_stat, y_stat, by = "id",
                           .trail = trail, .label = "joined",
                           .stat = "amount")

  snap <- trail$snapshots[[1]]
  expect_true(!is.null(snap$diagnostics$stat))
})

# ---------------------------------------------------------------------------
# All six join types
# ---------------------------------------------------------------------------

test_that("right_join_tap produces correct result and enriched snapshot", {
  trail <- audit_trail("rj")
  result <- right_join_tap(orders, customers, by = "id",
                            .trail = trail, .label = "rj")
  expected <- dplyr::right_join(orders, customers, by = "id")
  expect_equal(result, expected)
  expect_equal(trail$snapshots[[1]]$diagnostics$join_type, "right_join")
})

test_that("inner_join_tap produces correct result and enriched snapshot", {
  trail <- audit_trail("ij")
  result <- inner_join_tap(orders, customers, by = "id",
                            .trail = trail, .label = "ij")
  expected <- dplyr::inner_join(orders, customers, by = "id")
  expect_equal(result, expected)
  expect_equal(trail$snapshots[[1]]$diagnostics$join_type, "inner_join")
})

test_that("full_join_tap produces correct result and enriched snapshot", {
  trail <- audit_trail("fj")
  result <- full_join_tap(orders, customers, by = "id",
                           .trail = trail, .label = "fj")
  expected <- dplyr::full_join(orders, customers, by = "id")
  expect_equal(result, expected)
  expect_equal(trail$snapshots[[1]]$diagnostics$join_type, "full_join")
})

test_that("anti_join_tap produces correct result and enriched snapshot", {
  trail <- audit_trail("aj")
  result <- anti_join_tap(orders, customers, by = "id",
                           .trail = trail, .label = "aj")
  expected <- dplyr::anti_join(orders, customers, by = "id")
  expect_equal(result, expected)
  expect_equal(trail$snapshots[[1]]$diagnostics$join_type, "anti_join")
})

test_that("semi_join_tap produces correct result and enriched snapshot", {
  trail <- audit_trail("sj")
  result <- semi_join_tap(orders, customers, by = "id",
                           .trail = trail, .label = "sj")
  expected <- dplyr::semi_join(orders, customers, by = "id")
  expect_equal(result, expected)
  expect_equal(trail$snapshots[[1]]$diagnostics$join_type, "semi_join")
})

# ---------------------------------------------------------------------------
# inner_join_tap row loss
# ---------------------------------------------------------------------------

test_that("inner_join_tap captures partial overlap diagnostics", {
  trail <- audit_trail("ij_partial")
  result <- inner_join_tap(orders, customers, by = "id",
                            .trail = trail, .label = "inner")
  diag <- trail$snapshots[[1]]$diagnostics
  expect_true(diag$n_only_x > 0)
  expect_true(diag$n_only_y > 0)
})

# ---------------------------------------------------------------------------
# Passthrough of dplyr join args via ...
# ---------------------------------------------------------------------------

test_that("join_tap passes suffix and other args through", {
  trail <- audit_trail("suffix_test")
  x <- data.frame(id = 1:3, value = 10:12)
  y <- data.frame(id = 2:4, value = 20:22)
  result <- left_join_tap(x, y, by = "id", suffix = c("_left", "_right"),
                           .trail = trail, .label = "sfx")
  expect_true("value_left" %in% names(result))
  expect_true("value_right" %in% names(result))
})

# ---------------------------------------------------------------------------
# Join tap without trail
# ---------------------------------------------------------------------------

test_that("join_tap without trail performs plain dplyr join", {
  result <- left_join_tap(orders, customers, by = "id")
  expected <- dplyr::left_join(orders, customers, by = "id")
  expect_equal(result, expected)
})

test_that("join_tap without trail + .stat prints diagnostics", {
  # stat requires the column in both tables
  x_stat <- data.frame(id = 1:4, amount = c(100, 200, 300, 400))
  y_stat <- data.frame(id = c(2L, 3L, 5L), amount = c(50, 75, 100))
  output <- capture.output(
    result <- left_join_tap(x_stat, y_stat, by = "id", .stat = "amount"),
    type = "message"
  )
  expected <- dplyr::left_join(x_stat, y_stat, by = "id")
  expect_equal(result, expected)
  # validate_join output should appear
  expect_true(any(grepl("Join Validation|Relationship|Match rate", output)))
})

test_that("join_tap without trail + .label warns", {
  expect_warning(
    left_join_tap(orders, customers, by = "id", .label = "foo"),
    "ignored"
  )
})

# ---------------------------------------------------------------------------
# join_by() equality — enriched diagnostics
# ---------------------------------------------------------------------------

test_that("join_tap with join_by() equality produces enriched diagnostics", {
  trail <- audit_trail("jb_eq")
  result <- left_join_tap(orders, customers,
                           by = dplyr::join_by(id),
                           .trail = trail, .label = "jb")
  diag <- trail$snapshots[[1]]$diagnostics
  expect_equal(diag$join_type, "left_join")
  expect_true(!is.null(diag$relation))
  expect_true(!is.null(diag$match_rate$x))
})

test_that("join_tap with join_by(a == b) produces enriched diagnostics", {
  x <- data.frame(key_a = 1:3, val = 10:12)
  y <- data.frame(key_b = 2:4, score = 20:22)
  trail <- audit_trail("jb_eq2")
  result <- left_join_tap(x, y,
                           by = dplyr::join_by(key_a == key_b),
                           .trail = trail, .label = "jb2")
  diag <- trail$snapshots[[1]]$diagnostics
  expect_true(!is.null(diag$relation))
})

# ---------------------------------------------------------------------------
# join_by() non-equi — basic snapshot only
# ---------------------------------------------------------------------------

test_that("join_tap with non-equi join_by produces basic snapshot", {
  x <- data.frame(id = 1:3, start = c(1, 5, 10))
  y <- data.frame(id = 1:3, threshold = c(2, 4, 8))
  trail <- audit_trail("jb_neq")
  result <- left_join_tap(x, y,
                           by = dplyr::join_by(id, start >= threshold),
                           .trail = trail, .label = "neq")
  snap <- trail$snapshots[[1]]
  expect_equal(snap$type, "join")
  diag <- snap$diagnostics
  expect_equal(diag$join_type, "left_join")
  # No relation or match_rate for non-equi
  expect_null(diag$relation)
  expect_null(diag$match_rate)
})

# ---------------------------------------------------------------------------
# Label management
# ---------------------------------------------------------------------------

test_that("join_tap auto-generates labels", {
  trail <- audit_trail("auto_lbl")
  left_join_tap(orders, customers, by = "id", .trail = trail)
  expect_equal(trail$labels, "left_join_1")
})

test_that("join_tap rejects duplicate labels", {
  trail <- audit_trail("dup_lbl")
  left_join_tap(orders, customers, by = "id", .trail = trail, .label = "j")
  expect_error(
    left_join_tap(orders, customers, by = "id", .trail = trail, .label = "j"),
    "already exists"
  )
})

test_that("join_tap rejects invalid trail", {
  expect_error(
    left_join_tap(orders, customers, by = "id", .trail = "not_a_trail"),
    "audit_trail"
  )
})

# ---------------------------------------------------------------------------
# Change detection
# ---------------------------------------------------------------------------

test_that("join_tap computes changes from previous snapshot", {
  trail <- audit_trail("changes")
  orders |> audit_tap(trail, "raw")
  left_join_tap(orders, customers, by = "id", .trail = trail, .label = "joined")

  snap2 <- trail$snapshots[[2]]
  expect_true(!is.null(snap2$changes))
  expect_true(snap2$changes$col_delta > 0L)  # join adds columns
})

# ---------------------------------------------------------------------------
# Trail timeline with mixed types
# ---------------------------------------------------------------------------

test_that("mixed pipeline with audit_tap + join_tap + filter_tap renders correctly", {
  trail <- audit_trail("mixed")
  result <- orders |>
    audit_tap(trail, "raw") |>
    left_join_tap(customers, by = "id", .trail = trail, .label = "joined") |>
    dplyr::filter(amount > 100) |>
    audit_tap(trail, "filtered")

  expect_equal(length(trail$snapshots), 3L)
  expect_equal(trail$snapshots[[1]]$type, "tap")
  expect_equal(trail$snapshots[[2]]$type, "join")
  expect_equal(trail$snapshots[[3]]$type, "tap")

  # Print should work without error
  output <- capture.output(print(trail), type = "message")
  expect_true(any(grepl("left_join", output)))
})

# ---------------------------------------------------------------------------
# audit_diff across typed snapshots
# ---------------------------------------------------------------------------

test_that("audit_diff works across join and tap snapshots", {
  trail <- audit_trail("diff_typed")
  orders |> audit_tap(trail, "raw")
  left_join_tap(orders, customers, by = "id", .trail = trail, .label = "joined")

  diff_obj <- audit_diff(trail, "raw", "joined")
  expect_s3_class(diff_obj, "audit_diff")
  expect_true(diff_obj$col_delta > 0L)
})

# ---------------------------------------------------------------------------
# Pipeline capture in join_tap
# ---------------------------------------------------------------------------

test_that("join_tap captures pipeline", {
  trail <- audit_trail("pipeline")
  result <- orders |>
    dplyr::mutate(x = 1) |>
    left_join_tap(customers, by = "id", .trail = trail, .label = "j")

  snap <- trail$snapshots[[1]]
  expect_true(!is.null(snap$pipeline))
  expect_true(length(snap$pipeline) > 0L)
})

# ---------------------------------------------------------------------------
# Grouped tibble handling
# ---------------------------------------------------------------------------

test_that("join_tap preserves grouping on left table", {
  grouped_orders <- dplyr::group_by(orders, id)
  trail <- audit_trail("grouped")
  result <- left_join_tap(grouped_orders, customers, by = "id",
                           .trail = trail, .label = "gj")

  # Result should be a grouped tibble (dplyr preserves groups on left)
  expect_true(dplyr::is_grouped_df(result))
})

# ---------------------------------------------------------------------------
# Named by vector (different column names)
# ---------------------------------------------------------------------------

test_that("join_tap works with named by vector", {
  x <- data.frame(key_x = 1:3, val = 10:12)
  y <- data.frame(key_y = 2:4, score = 20:22)
  trail <- audit_trail("named_by")
  result <- left_join_tap(x, y, by = c("key_x" = "key_y"),
                           .trail = trail, .label = "nb")
  expected <- dplyr::left_join(x, y, by = c("key_x" = "key_y"))
  expect_equal(result, expected)

  diag <- trail$snapshots[[1]]$diagnostics
  expect_true(!is.null(diag$relation))
})

# ---------------------------------------------------------------------------
# Duplicate key detection
# ---------------------------------------------------------------------------

test_that("join_tap detects duplicate keys", {
  x <- data.frame(id = c(1L, 2L, 2L, 3L), val = 1:4)
  y <- data.frame(id = c(2L, 3L), score = c(10, 20))
  trail <- audit_trail("dups")
  result <- left_join_tap(x, y, by = "id", .trail = trail, .label = "d")

  diag <- trail$snapshots[[1]]$diagnostics
  expect_true(diag$x_has_dups)
  expect_false(diag$y_has_dups)
  expect_equal(diag$relation, "many-to-one")
})

# ---------------------------------------------------------------------------
# .normalize_by internal helper
# ---------------------------------------------------------------------------

test_that(".normalize_by handles character vectors", {
  expect_equal(tidyaudit:::.normalize_by("id"), "id")
  expect_equal(tidyaudit:::.normalize_by(c("a" = "b")), c("a" = "b"))
  expect_equal(tidyaudit:::.normalize_by(c("x", "y")), c("x", "y"))
})

test_that(".normalize_by handles NULL", {
  expect_null(tidyaudit:::.normalize_by(NULL))
})

test_that(".normalize_by handles equality join_by", {
  jb <- dplyr::join_by(id)
  result <- tidyaudit:::.normalize_by(jb)
  expect_equal(result, c("id" = "id"))

  jb2 <- dplyr::join_by(a == b)
  result2 <- tidyaudit:::.normalize_by(jb2)
  expect_equal(result2, c("a" = "b"))
})

test_that(".normalize_by returns NULL for non-equi join_by", {
  jb <- dplyr::join_by(x >= y)
  expect_null(tidyaudit:::.normalize_by(jb))
})

test_that(".normalize_by returns NULL for unknown types", {
  expect_null(tidyaudit:::.normalize_by(42))
  expect_null(tidyaudit:::.normalize_by(list(a = 1)))
})

# ---------------------------------------------------------------------------
# Multiple snapshots via pipe chain
# ---------------------------------------------------------------------------

test_that("multiple join_tap calls in a pipe chain work correctly", {
  lookup_a <- data.frame(id = 1:3, a_val = c("x", "y", "z"))
  lookup_b <- data.frame(id = 2:4, b_val = c(10, 20, 30))

  trail <- audit_trail("chain")
  result <- orders |>
    audit_tap(trail, "raw") |>
    left_join_tap(lookup_a, by = "id", .trail = trail, .label = "join_a") |>
    left_join_tap(lookup_b, by = "id", .trail = trail, .label = "join_b")

  expect_equal(length(trail$snapshots), 3L)
  expect_equal(trail$labels, c("raw", "join_a", "join_b"))

  # Verify correct ordering
  expect_equal(trail$snapshots[[1]]$index, 1L)
  expect_equal(trail$snapshots[[2]]$index, 2L)
  expect_equal(trail$snapshots[[3]]$index, 3L)

  # Changes should be computed
  expect_true(!is.null(trail$snapshots[[2]]$changes))
  expect_true(!is.null(trail$snapshots[[3]]$changes))
})

# ---------------------------------------------------------------------------
# Zero-row tables
# ---------------------------------------------------------------------------

test_that("join_tap handles zero-row tables", {
  empty <- orders[0, ]
  trail <- audit_trail("empty_join")
  result <- left_join_tap(empty, customers, by = "id",
                           .trail = trail, .label = "ej")
  expect_equal(nrow(result), 0L)
  expect_equal(trail$snapshots[[1]]$nrow, 0L)
})

# ---------------------------------------------------------------------------
# y_name capture
# ---------------------------------------------------------------------------

test_that("join_tap captures y_name in diagnostics", {
  trail <- audit_trail("yname")
  left_join_tap(orders, customers, by = "id", .trail = trail, .label = "yn")
  expect_equal(trail$snapshots[[1]]$diagnostics$y_name, "customers")
})

# ---------------------------------------------------------------------------
# .data validation
# ---------------------------------------------------------------------------

test_that("join_tap rejects non-data.frame input", {
  trail <- audit_trail("bad_data")
  expect_error(
    left_join_tap(list(a = 1), customers, by = "id", .trail = trail),
    "data.frame"
  )
  expect_error(
    left_join_tap(42, customers, by = "id", .trail = trail),
    "data.frame"
  )
})

# ---------------------------------------------------------------------------
# Label validation
# ---------------------------------------------------------------------------

test_that("join_tap rejects NA label", {
  trail <- audit_trail("na_label")
  expect_error(
    left_join_tap(orders, customers, by = "id",
                   .trail = trail, .label = NA_character_),
    "single character string"
  )
})

test_that("join_tap rejects multi-element label", {
  trail <- audit_trail("multi_label")
  expect_error(
    left_join_tap(orders, customers, by = "id",
                   .trail = trail, .label = c("a", "b")),
    "single character string"
  )
})

# ---------------------------------------------------------------------------
# Natural join (by = NULL)
# ---------------------------------------------------------------------------

test_that("join_tap with by = NULL performs natural join with basic snapshot", {
  trail <- audit_trail("natural_join")
  result <- left_join_tap(orders, customers, .trail = trail, .label = "nj")

  expected <- suppressMessages(dplyr::left_join(orders, customers, by = "id"))
  expect_equal(result, expected)

  # Natural join => by is NULL => no enriched diagnostics
  snap <- trail$snapshots[[1]]
  expect_equal(snap$type, "join")
  diag <- snap$diagnostics
  expect_equal(diag$join_type, "left_join")
  expect_null(diag$relation)
  expect_null(diag$match_rate)
})

# ---------------------------------------------------------------------------
# validate_join error handling (no-trail mode)
# ---------------------------------------------------------------------------

test_that("join_tap no-trail warns when validate_join fails", {
  # .stat referencing a column that doesn't exist in one table
  expect_warning(
    result <- left_join_tap(orders, customers, by = "id",
                             .stat = "nonexistent_col"),
    "validate_join.*failed"
  )
  # Join itself should still succeed
  expected <- dplyr::left_join(orders, customers, by = "id")
  expect_equal(result, expected)
})

# ---------------------------------------------------------------------------
# validate_join error handling (with trail)
# ---------------------------------------------------------------------------

test_that("join_tap with trail degrades gracefully when validate_join fails", {
  trail <- audit_trail("vj_fail")
  # .stat referencing a column that doesn't exist
  result <- left_join_tap(orders, customers, by = "id",
                           .trail = trail, .label = "graceful",
                           .stat = "nonexistent_col")

  # Join result should be correct
  expected <- dplyr::left_join(orders, customers, by = "id")
  expect_equal(result, expected)

  # Snapshot should have basic diagnostics (no enriched)
  snap <- trail$snapshots[[1]]
  expect_equal(snap$type, "join")
  expect_equal(snap$diagnostics$join_type, "left_join")
  expect_null(snap$diagnostics$relation)
})