# Tests for Issue 8: Regex pattern in clean_table()
# Tests both current and proposed regex patterns

# context("clean_table() - Regex pattern behavior")

test_that("clean_table() current regex matches intended patterns", {
  # Current pattern from R/clean_table.R:69
  current_pattern <- "\\bNA\\b|\\bInf\\b|^[0\\s%().,]+$"

  # Should match these (true positives)
  expect_true(grepl(current_pattern, "NA", perl = TRUE))
  expect_true(grepl(current_pattern, "Inf", perl = TRUE))
  expect_true(grepl(current_pattern, "NA (NA)", perl = TRUE))
  expect_true(grepl(current_pattern, "0 (0%)", perl = TRUE))

  # Should NOT match real data (but might fail with current pattern)
  expect_false(grepl(current_pattern, "15 (30%)", perl = TRUE))
  expect_false(grepl(current_pattern, "0.5 (25%)", perl = TRUE))
  expect_false(grepl(current_pattern, "45 (40, 50)", perl = TRUE))
})

test_that("clean_table() current regex has known false positives", {
  current_pattern <- "\\bNA\\b|\\bInf\\b|^[0\\s%().,]+$"

  # These are FALSE POSITIVES - pattern matches but shouldn't
  # Documents the problem with current regex
  expect_true(grepl(current_pattern, "...", perl = TRUE))  # Just dots
  expect_true(grepl(current_pattern, "   ", perl = TRUE))  # Just spaces
  expect_true(grepl(current_pattern, "()", perl = TRUE))   # Just parens
})

test_that("clean_table() proposed regex fixes false positives", {
  # Proposed pattern (Option B)
  proposed_pattern <- paste(c(
    "\\bNA\\b",
    "\\bInf\\b",
    "-Inf",
    "^0 \\(0%\\)$",
    "^0 \\(NA%\\)$",
    "^NA \\(NA\\)$",
    "^NA \\(NA, NA\\)$",
    "^0\\.0+ \\(0\\.0+%?\\)$",
    "^NA, NA$"
  ), collapse = "|")

  # Should match intended patterns
  expect_true(grepl(proposed_pattern, "NA", perl = TRUE))
  expect_true(grepl(proposed_pattern, "Inf", perl = TRUE))
  expect_true(grepl(proposed_pattern, "-Inf", perl = TRUE))
  expect_true(grepl(proposed_pattern, "0 (0%)", perl = TRUE))
  expect_true(grepl(proposed_pattern, "0 (NA%)", perl = TRUE))
  expect_true(grepl(proposed_pattern, "NA (NA)", perl = TRUE))
  expect_true(grepl(proposed_pattern, "NA (NA, NA)", perl = TRUE))
  expect_true(grepl(proposed_pattern, "0.00 (0.00)", perl = TRUE))
  expect_true(grepl(proposed_pattern, "0.00 (0.00%)", perl = TRUE))
  expect_true(grepl(proposed_pattern, "NA, NA", perl = TRUE))

  # Should NOT match false positives
  expect_false(grepl(proposed_pattern, "...", perl = TRUE))
  expect_false(grepl(proposed_pattern, "   ", perl = TRUE))
  expect_false(grepl(proposed_pattern, "()", perl = TRUE))

  # Should NOT match real data
  expect_false(grepl(proposed_pattern, "15 (30%)", perl = TRUE))
  expect_false(grepl(proposed_pattern, "0.5 (25%)", perl = TRUE))
  expect_false(grepl(proposed_pattern, "45 (40, 50)", perl = TRUE))
  expect_false(grepl(proposed_pattern, "2.5, 3.8", perl = TRUE))
  expect_false(grepl(proposed_pattern, "1.23 (0.45, 2.01)", perl = TRUE))
  expect_false(grepl(proposed_pattern, "0.001", perl = TRUE))
})

test_that("clean_table() proposed regex avoids partial matches", {
  proposed_pattern <- paste(c(
    "\\bNA\\b",
    "\\bInf\\b",
    "-Inf",
    "^0 \\(0%\\)$",
    "^0 \\(NA%\\)$",
    "^NA \\(NA\\)$",
    "^NA \\(NA, NA\\)$",
    "^0\\.0+ \\(0\\.0+%?\\)$",
    "^NA, NA$"
  ), collapse = "|")

  # Should not match NA/Inf within larger words
  expect_false(grepl(proposed_pattern, "BANANA", perl = TRUE))
  expect_false(grepl(proposed_pattern, "Information", perl = TRUE))
})

test_that("clean_table() works with actual gtsummary table", {
  skip_if_not_installed("gtsummary")
  skip_if_not_installed("dplyr")

  # Create table with missing data
  test_data <- gtsummary::trial |>
    dplyr::mutate(
      marker = dplyr::if_else(trt == "Drug A", NA_real_, marker)
    )

  tbl <- test_data |>
    gtsummary::tbl_summary(by = trt, include = c(age, marker, grade))

  # Should execute without error
  expect_s3_class(
    clean_table(tbl),
    "gtsummary"
  )
})

test_that("clean_table() handles zero counts correctly", {
  skip_if_not_installed("gtsummary")
  skip_if_not_installed("dplyr")

  # Create data where a category has zero counts
  zero_data <- gtsummary::trial |>
    dplyr::filter(!(trt == "Drug A" & grade == "I"))

  tbl <- zero_data |>
    gtsummary::tbl_summary(by = trt, include = grade)

  expect_s3_class(
    clean_table(tbl),
    "gtsummary"
  )
})

test_that("clean_table() handles Inf values", {
  skip_if_not_installed("gtsummary")
  skip_if_not_installed("dplyr")

  # Create data with Inf
  inf_data <- gtsummary::trial |>
    dplyr::mutate(marker = dplyr::if_else(dplyr::row_number() == 1, Inf, marker))

  tbl <- inf_data |>
    gtsummary::tbl_summary(by = trt, include = marker)

  expect_s3_class(
    clean_table(tbl),
    "gtsummary"
  )
})

test_that("clean_table() preserves actual data values", {
  skip_if_not_installed("gtsummary")

  tbl <- gtsummary::trial |>
    gtsummary::tbl_summary(by = trt, include = age) |>
    clean_table()

  # Check that age statistics are still present
  age_row <- tbl$table_body[tbl$table_body$variable == "age", ]
  expect_true(nrow(age_row) > 0)

  # Stat columns should have values (not all NA)
  stat_cols <- names(age_row)[grepl("^stat_", names(age_row))]
  has_values <- sapply(stat_cols, function(col) !all(is.na(age_row[[col]])))
  expect_true(any(has_values))
})