test_that("diagnose_strings detects basic issues", {
  skip_if_not_installed("stringi")
  x <- c("Apple", "APPLE", "apple", "  Microsoft ", "Google", NA, "")
  result <- diagnose_strings(x)

  expect_s3_class(result, "diagnose_strings")
  expect_equal(result$n_total, 7L)
  expect_equal(result$n_na, 1L)
  expect_equal(result$n_empty, 1L)
})

test_that("diagnose_strings detects whitespace issues", {
  skip_if_not_installed("stringi")
  x <- c("  leading", "trailing  ", "  both  ", "normal", "   ")
  result <- diagnose_strings(x)

  expect_equal(result$n_leading_ws, 2L)   # "  leading", "  both  "
  expect_equal(result$n_trailing_ws, 2L)  # "trailing  ", "  both  "
  expect_equal(result$n_whitespace_only, 1L)  # "   "
})

test_that("diagnose_strings detects case variants", {
  skip_if_not_installed("stringi")
  x <- c("Apple", "APPLE", "apple", "Google", "google")
  result <- diagnose_strings(x)

  expect_equal(result$n_case_variant_groups, 2L)
  expect_true(nrow(result$case_variant_examples) > 0L)
})

test_that("diagnose_strings handles all-NA input", {
  skip_if_not_installed("stringi")
  x <- c(NA_character_, NA_character_)
  result <- diagnose_strings(x)

  expect_equal(result$n_total, 2L)
  expect_equal(result$n_na, 2L)
  expect_equal(result$n_empty, 0L)
  expect_equal(result$n_case_variant_groups, 0L)
})

test_that("diagnose_strings handles clean input", {
  skip_if_not_installed("stringi")
  x <- c("alpha", "beta", "gamma")
  result <- diagnose_strings(x)

  expect_equal(result$n_na, 0L)
  expect_equal(result$n_empty, 0L)
  expect_equal(result$n_whitespace_only, 0L)
  expect_equal(result$n_leading_ws, 0L)
  expect_equal(result$n_trailing_ws, 0L)
  expect_equal(result$n_case_variant_groups, 0L)
})

test_that("diagnose_strings captures variable name", {
  skip_if_not_installed("stringi")
  my_var <- c("a", "b")
  result <- diagnose_strings(my_var)

  expect_equal(result$name, "my_var")
})

test_that("diagnose_strings uses explicit name", {
  skip_if_not_installed("stringi")
  result <- diagnose_strings(c("a", "b"), name = "test_col")

  expect_equal(result$name, "test_col")
})

test_that("print.diagnose_strings produces output", {
  skip_if_not_installed("stringi")
  x <- c("Apple", "APPLE", NA, "")
  result <- diagnose_strings(x)

  output <- capture.output(print(result), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("String Column Diagnosis", combined))
})

test_that("diagnose_strings detects non-ASCII", {
  skip_if_not_installed("stringi")
  x <- c("hello", "caf\u00e9", "na\u00efve")
  result <- diagnose_strings(x)

  expect_equal(result$n_non_ascii, 2L)
})

# audit_transform tests

test_that("audit_transform reports changes from trimws", {
  x <- c("  hello ", "world", "  foo  ", NA)
  result <- audit_transform(x, trimws)

  expect_s3_class(result, "audit_transform")
  expect_equal(result$n_total, 4L)
  expect_equal(result$n_na, 1L)
  expect_equal(result$n_changed, 2L)  # "  hello " and "  foo  " change
  expect_equal(result$n_unchanged, 2L) # "world" and NA unchanged
  expect_equal(length(result$cleaned), 4L)
  expect_equal(result$cleaned[1], "hello")
  expect_equal(result$cleaned[3], "foo")
  expect_true(is.na(result$cleaned[4]))
})

test_that("audit_transform captures function name", {
  x <- c("a", "b")
  result <- audit_transform(x, toupper)

  expect_equal(result$clean_fn_name, "toupper")
  expect_equal(result$cleaned, c("A", "B"))
  expect_equal(result$n_changed, 2L)
})

test_that("audit_transform handles no changes", {
  x <- c("HELLO", "WORLD")
  result <- audit_transform(x, toupper)

  expect_equal(result$n_changed, 0L)
  expect_equal(nrow(result$change_examples), 0L)
  expect_equal(result$pct_changed, 0)
})

test_that("audit_transform provides change examples", {
  x <- c("  a  ", "b", "  c  ", "d", "  e  ")
  result <- audit_transform(x, trimws)

  expect_true(nrow(result$change_examples) > 0L)
  expect_true(all(c("before", "after") %in% names(result$change_examples)))
})

test_that("audit_transform handles all-NA input", {
  x <- c(NA_character_, NA_character_)
  result <- audit_transform(x, toupper)

  expect_equal(result$n_changed, 0L)
  expect_equal(result$n_na, 2L)
  expect_equal(result$pct_changed, 0)
})

test_that("audit_transform captures custom name", {
  result <- audit_transform(c("a", "b"), toupper, name = "test_col")
  expect_equal(result$name, "test_col")
})

test_that("print.audit_transform produces output", {
  x <- c("  hello ", "world")
  result <- audit_transform(x, trimws)

  output <- capture.output(print(result), type = "message")
  combined <- paste(output, collapse = "\n")
  expect_true(grepl("String Transformation Audit", combined))
  expect_true(grepl("trimws", combined))
})

test_that("audit_transform pct_changed is correct", {
  x <- c("a", "b", "c", "d", NA)
  # toupper changes all 4 non-NA values
  result <- audit_transform(x, toupper)

  expect_equal(result$pct_changed, 100)
})

test_that("audit_transform change examples limited to 10", {
  x <- paste0("item_", 1:20)
  result <- audit_transform(x, toupper)

  expect_true(nrow(result$change_examples) <= 10L)
})

test_that("audit_transform errors when clean_fn returns wrong length", {
  expect_error(
    audit_transform(c("a", "b", "c"), function(x) x[1]),
    "same length"
  )
})

test_that("audit_transform errors when clean_fn returns longer vector", {
  expect_error(
    audit_transform(c("a", "b"), function(x) rep(x, 2)),
    "same length"
  )
})