test_that("validate_column_existence() works", {
  expect_snapshot(
    error = TRUE,
    validate_column_existence(
      data = data.frame(a = 1),
      id_var = "patid",
      time_var = "survtime",
      cause_var = "status"
    )
  )

  expect_snapshot(
    error = TRUE,
    validate_column_existence(
      data = data.frame(a = 1),
      id_var = "patid",
      time_var = "survtime",
      cause_var = "status",
      tstart_var = "tstart"
    )
  )

  expect_no_error(
    validate_column_existence(
      data = data.frame(patid = 1, survtime = 1, status = 1),
      id_var = "patid",
      time_var = "survtime",
      cause_var = "status"
    )
  )

  expect_no_error(
    validate_column_existence(
      data = data.frame(patid = 1, survtime = 1, status = 1, tstart = 1),
      id_var = "patid",
      time_var = "survtime",
      cause_var = "status",
      tstart_var = "tstart"
    )
  )
})


test_that("validate_data_type() works", {
  expect_snapshot(error = TRUE, validate_data_type(data = "not a data.frame"))
})


test_that("validate_cause_values() works", {
  expect_snapshot(
    error = TRUE,
    validate_cause_values(data.frame(x = -1), quote(x))
  )
})


test_that("validate_time_tstart() works", {
  expect_snapshot(
    error = TRUE,
    validate_time_tstart(
      data = data.frame(t = 1:3, tstart = 2:4),
      time_var = "t",
      tstart_var = "tstart"
    )
  )
})


test_that("standardize_data() works", {
  expect_snapshot(
    standardize_data(
      data = mtcars,
      id_var = 1,
      time_var = "mpg",
      cause_var = "cyl",
      tstart_var = "wt"
    )
  )

  expect_snapshot(
    error = TRUE,
    standardize_data(
      data = mtcars,
      id_var = "carb",
      time_var = NULL,
      cause_var = "cyl"
    )
  )

  expect_snapshot(
    error = TRUE,
    standardize_data(
      data = NULL,
      time_var = "mpg",
      id_var = "cyl",
      cause_var = "disp"
    )
  )
})

test_that("validate_data_type() works with various data types", {
  # Test with valid data types
  expect_no_error(validate_data_type(data.frame(x = 1)))
  expect_no_error(validate_data_type(tibble::tibble(x = 1)))

  # Test with invalid data types
  expect_snapshot(error = TRUE, validate_data_type(list(x = 1)))
  expect_snapshot(error = TRUE, validate_data_type(matrix(1:4, nrow = 2)))
  expect_snapshot(error = TRUE, validate_data_type(c(1, 2, 3)))
  expect_snapshot(error = TRUE, validate_data_type(NULL))
})

test_that("validate_cause_values() works with valid values", {
  # Test with valid cause values
  expect_no_error(validate_cause_values(
    data.frame(cause = c(0, 1, 2)),
    quote(cause)
  ))
  expect_no_error(validate_cause_values(
    data.frame(cause = c(1, 1, 1)),
    quote(cause)
  ))
  expect_no_error(validate_cause_values(data.frame(cause = 0), quote(cause)))
  expect_no_error(validate_cause_values(data.frame(cause = 2), quote(cause)))

  # Test with multiple invalid values
  expect_snapshot(
    error = TRUE,
    validate_cause_values(data.frame(status = c(-1, 3, 5)), quote(status))
  )

  # Test with mixed valid and invalid values
  expect_snapshot(
    error = TRUE,
    validate_cause_values(
      data.frame(outcome = c(0, 1, 3, 2, -1)),
      quote(outcome)
    )
  )
})

test_that("validate_time_tstart() works with valid values", {
  # Test with valid time > tstart
  expect_no_error(
    validate_time_tstart(
      data = data.frame(time = c(5, 10, 15), tstart = c(0, 2, 8)),
      time_var = "time",
      tstart_var = "tstart"
    )
  )

  # Test with single valid pair
  expect_no_error(
    validate_time_tstart(
      data = data.frame(end_time = 10, start_time = 5),
      time_var = "end_time",
      tstart_var = "start_time"
    )
  )

  # Test with NA values (should be ignored)
  expect_no_error(
    validate_time_tstart(
      data = data.frame(time = c(5, NA, 15), tstart = c(0, 2, NA)),
      time_var = "time",
      tstart_var = "tstart"
    )
  )

  # Test with equal times (should fail)
  expect_snapshot(
    error = TRUE,
    validate_time_tstart(
      data = data.frame(time = c(5, 5), tstart = c(5, 3)),
      time_var = "time",
      tstart_var = "tstart"
    )
  )
})

test_that("validate_by_variable() works", {
  # Test with NULL (should pass)
  expect_no_error(validate_by_variable(data.frame(x = 1), NULL))

  # Test with valid by variable
  expect_no_error(
    validate_by_variable(
      data.frame(group = c("A", "B", "A")),
      "group"
    )
  )

  # Test with non-character by variable
  expect_snapshot(
    error = TRUE,
    validate_by_variable(data.frame(x = 1), 123)
  )

  # Test with multiple values in by
  expect_snapshot(
    error = TRUE,
    validate_by_variable(data.frame(x = 1), c("group1", "group2"))
  )

  # Test with missing column
  expect_snapshot(
    error = TRUE,
    validate_by_variable(data.frame(x = 1), "missing_col")
  )

  # Test with all NA values
  expect_snapshot(
    error = TRUE,
    validate_by_variable(
      data.frame(group = c(NA, NA, NA)),
      "group"
    )
  )

  # Test with many groups (should warn)
  many_groups_data <- data.frame(
    group = paste0("group_", 1:25)
  )
  expect_snapshot(
    validate_by_variable(many_groups_data, "group")
  )

  # Test with some NA values mixed with valid values (should pass)
  expect_no_error(
    validate_by_variable(
      data.frame(treatment = c("A", "B", NA, "A")),
      "treatment"
    )
  )
})

test_that("standardize_data() works with symbol inputs", {
  # Test with symbols instead of strings
  expect_no_error(
    standardize_data(
      data = data.frame(patient = 1:3, followup = 4:6, event = 7:9),
      id_var = rlang::sym("patient"),
      time_var = rlang::sym("followup"),
      cause_var = rlang::sym("event")
    )
  )

  # Test with tstart_var as symbol
  result_with_tstart <- standardize_data(
    data = data.frame(id = 1:2, start = 0:1, stop = 5:6, status = 1:2),
    id_var = rlang::sym("id"),
    time_var = rlang::sym("stop"),
    cause_var = rlang::sym("status"),
    tstart_var = rlang::sym("start")
  )

  expect_true("tstart" %in% names(result_with_tstart))
  expect_equal(result_with_tstart$tstart, c(0, 1))

  # Test column order
  expect_equal(names(result_with_tstart), c("id", "tstart", "time", "cause"))
})

test_that("standardize_data() column ordering and values", {
  # Test that columns are properly ordered when tstart is added
  result_no_tstart <- standardize_data(
    data = data.frame(patient_id = 1:3, time_to_event = 4:6, outcome = 7:9),
    id_var = rlang::sym("patient_id"),
    time_var = rlang::sym("time_to_event"),
    cause_var = rlang::sym("outcome")
  )

  expect_equal(names(result_no_tstart), c("id", "tstart", "time", "cause"))
  expect_equal(result_no_tstart$tstart, c(0, 0, 0))
  expect_equal(result_no_tstart$id, 1:3)
  expect_equal(result_no_tstart$time, 4:6)
  expect_equal(result_no_tstart$cause, 7:9)
})

test_that("validate_last_observation() works with no problematic cases", {
  # Data where all last observations are censoring or competing risks
  data_std <- data.frame(
    id = c(1, 1, 2, 2, 3, 3),
    tstart = c(0, 0, 0, 0, 0, 0),
    time = c(5, 10, 8, 12, 6, 15),
    cause = c(1, 0, 1, 2, 1, 0) # Last obs for each ID: 0, 2, 0
  )

  expect_no_warning(
    result <- validate_last_observation(data_std)
  )

  expect_true(result)
})

test_that("validate_last_observation() warns for single problematic case", {
  # Data where one participant's last observation is an event
  data_std <- data.frame(
    id = c(1, 1, 2, 2, 3),
    tstart = c(0, 0, 0, 0, 0),
    time = c(5, 10, 8, 12, 15),
    cause = c(1, 0, 1, 1, 2) # ID 2's last obs is cause = 1
  )

  expect_warning(
    result <- validate_last_observation(data_std),
    "Found 1.*where last observation is an event of interest"
  )

  expect_true(result)
})

test_that("validate_last_observation() warns for multiple problematic cases", {
  # Data where multiple participants' last observations are events
  data_std <- data.frame(
    id = c(1, 1, 2, 3, 4, 5),
    tstart = c(0, 0, 0, 0, 0, 0),
    time = c(5, 10, 8, 15, 12, 20),
    cause = c(1, 1, 1, 1, 0, 2) # IDs 1, 2, 3 have last obs as cause = 1
  )

  expect_warning(
    result <- validate_last_observation(data_std),
    "Found 3.*where last observation is an event of interest"
  )

  expect_true(result)
})

test_that("validate_last_observation() handles many problematic cases", {
  # Data with many participants having problematic last observations
  data_std <- data.frame(
    id = 1:10,
    tstart = rep(0, 10),
    time = 1:10,
    cause = c(1, 1, 1, 1, 1, 1, 1, 0, 2, 0) # 7 participants with cause = 1
  )

  expect_warning(
    result <- validate_last_observation(data_std),
    "Found 7.*where last observation is an event of interest"
  )

  # Should mention "First 5 IDs" when more than 5
  expect_warning(
    validate_last_observation(data_std),
    "First 5 IDs"
  )

  expect_true(result)
})

test_that("validate_last_observation() works with unsorted data", {
  # Data not sorted by time
  data_std <- data.frame(
    id = c(1, 1, 1, 2, 2),
    tstart = c(0, 0, 0, 0, 0),
    time = c(10, 5, 15, 8, 3), # Unsorted times
    cause = c(0, 1, 1, 2, 1) # ID 1's actual last obs (time=15) is cause=1
  )

  expect_warning(
    result <- validate_last_observation(data_std),
    "Found 1.*where last observation is an event of interest"
  )

  expect_true(result)
})

test_that("validate_last_observation() works with single observation per ID", {
  # Test where each participant has only one observation
  data_std <- data.frame(
    id = c(1, 2, 3, 4),
    tstart = c(0, 0, 0, 0),
    time = c(5, 8, 12, 15),
    cause = c(1, 2, 0, 1) # IDs 1 and 4 have single obs with cause = 1
  )

  expect_warning(
    result <- validate_last_observation(data_std),
    "Found 2.*where last observation is an event of interest"
  )

  expect_true(result)
})

test_that("validate_last_observation() handles empty data", {
  # Test with empty data frame
  data_std <- data.frame(
    id = numeric(0),
    tstart = numeric(0),
    time = numeric(0),
    cause = numeric(0)
  )

  expect_no_warning(
    result <- validate_last_observation(data_std)
  )

  expect_true(result)
})

test_that("validate_last_observation() handles duplicate times correctly", {
  # Test with participants having multiple observations at the same time
  # (the last chronological observation should be checked)
  data_std <- data.frame(
    id = c(1, 1, 1, 2, 2),
    tstart = c(0, 0, 0, 0, 0),
    time = c(5, 10, 10, 8, 8), # Duplicate times
    cause = c(1, 1, 0, 1, 2) # For ties, should check the last row
  )

  # Since we're checking the last row for each ID after sorting,
  # ID 1's last observation is cause = 0, ID 2's is cause = 2
  expect_no_warning(
    result <- validate_last_observation(data_std)
  )

  expect_true(result)
})

test_that("validate_last_observation() warning message content", {
  # Test the specific content of warning messages
  data_std <- data.frame(
    id = c(1, 2, 3),
    tstart = c(0, 0, 0),
    time = c(5, 8, 12),
    cause = c(1, 1, 0) # IDs 1 and 2 have problematic last obs
  )

  expect_warning(
    validate_last_observation(data_std),
    "assumes"
  )
})

# Test validate_weights_variable function
test_that("validate_weights_variable works correctly", {
  # Create test data
  test_data <- data.frame(
    id = 1:5,
    time = c(1, 2, 3, 4, 5),
    cause = c(1, 0, 2, 1, 0),
    good_weights = c(1.0, 2.0, 0.5, 1.5, 1.2),
    zero_weights = c(1.0, 0.0, 0.5, 1.5, 1.2),
    negative_weights = c(1.0, -0.5, 0.5, 1.5, 1.2),
    missing_weights = c(1.0, NA, 0.5, 1.5, 1.2),
    character_weights = c("1.0", "2.0", "0.5", "1.5", "1.2"),
    extreme_weights = c(1.0, 100.0, 0.01, 1.5, 1.2)
  )

  # Test with NULL weights (should return TRUE)
  expect_true(validate_weights_variable(test_data, NULL))

  # Test with valid weights
  expect_true(validate_weights_variable(test_data, "good_weights"))

  # Test with non-character weights argument
  expect_error(
    validate_weights_variable(test_data, 123),
    class = "rlang_error"
  )
  expect_error(
    validate_weights_variable(test_data, c("weight1", "weight2")),
    class = "rlang_error"
  )

  # Test with non-existent column
  expect_error(
    validate_weights_variable(test_data, "nonexistent_weights"),
    class = "rlang_error"
  )

  # Test with missing values
  expect_error(
    validate_weights_variable(test_data, "missing_weights"),
    class = "rlang_error"
  )

  # Test with non-numeric values
  expect_error(
    validate_weights_variable(test_data, "character_weights"),
    class = "rlang_error"
  )

  # Test with negative values
  expect_error(
    validate_weights_variable(test_data, "negative_weights"),
    class = "rlang_error"
  )

  # Test with zero weights (should warn but not error)
  expect_warning(
    validate_weights_variable(test_data, "zero_weights"),
    class = "rlang_warning"
  )

  # Test with extreme weights (should warn but not error)
  expect_warning(
    validate_weights_variable(test_data, "extreme_weights"),
    class = "rlang_warning"
  )
})

test_that("validate_weights_variable error messages are informative", {
  test_data <- data.frame(
    id = 1:3,
    weights_with_na = c(1.0, NA, 1.5),
    negative_weights = c(1.0, -0.5, 1.5),
    character_weights = c("a", "b", "c")
  )

  # Test missing values error message
  expect_error(
    validate_weights_variable(test_data, "weights_with_na"),
    "Missing values found"
  )

  # Test negative values error message
  expect_error(
    validate_weights_variable(test_data, "negative_weights"),
    "Negative values found"
  )

  # Test non-numeric error message
  expect_error(
    validate_weights_variable(test_data, "character_weights"),
    "must contain numeric values"
  )

  # Test non-existent column error message
  expect_error(
    validate_weights_variable(test_data, "missing_col"),
    "not found in"
  )
})

# Test updated validate_column_existence function with weights
test_that("validate_column_existence works with weights parameter", {
  test_data <- data.frame(
    id = 1:3,
    time = c(1, 2, 3),
    cause = c(1, 0, 2),
    weights = c(1.0, 1.5, 0.8)
  )

  # Test with valid weights column
  expect_true(validate_column_existence(
    test_data,
    rlang::sym("id"),
    rlang::sym("time"),
    rlang::sym("cause"),
    weights = "weights"
  ))

  # Test with non-existent weights column
  expect_error(
    validate_column_existence(
      test_data,
      rlang::sym("id"),
      rlang::sym("time"),
      rlang::sym("cause"),
      weights = "nonexistent_weights"
    ),
    class = "rlang_error"
  )

  # Test with NULL weights (should work)
  expect_true(validate_column_existence(
    test_data,
    rlang::sym("id"),
    rlang::sym("time"),
    rlang::sym("cause"),
    weights = NULL
  ))
})

# Test updated standardize_data function with weights
test_that("standardize_data works with weights parameter", {
  test_data <- data.frame(
    participant = 1:3,
    follow_time = c(1, 2, 3),
    event_type = c(1, 0, 2),
    start_time = c(0, 0.5, 0),
    wt = c(1.0, 1.5, 0.8)
  )

  # Test with weights and tstart
  result <- standardize_data(
    test_data,
    rlang::sym("participant"),
    rlang::sym("follow_time"),
    rlang::sym("event_type"),
    rlang::sym("start_time"),
    weights = "wt"
  )

  expect_equal(names(result), c("id", "tstart", "time", "cause", "weights"))
  expect_equal(result$weights, c(1.0, 1.5, 0.8))
  expect_equal(result$tstart, c(0, 0.5, 0))

  # Test with weights but no tstart
  result2 <- standardize_data(
    test_data,
    rlang::sym("participant"),
    rlang::sym("follow_time"),
    rlang::sym("event_type"),
    weights = "wt"
  )

  expect_equal(names(result2), c("id", "tstart", "time", "cause", "weights"))
  expect_equal(result2$weights, c(1.0, 1.5, 0.8))
  expect_equal(result2$tstart, c(0, 0, 0))

  # Test without weights
  result3 <- standardize_data(
    test_data,
    rlang::sym("participant"),
    rlang::sym("follow_time"),
    rlang::sym("event_type")
  )

  expect_equal(names(result3), c("id", "tstart", "time", "cause"))
  expect_false("weights" %in% names(result3))
})