## Survey metadata tests
##
## survey_data combines raw data (`dat`) with a metadata dictionary (`dpdict`), and many operations depend
## on that pairing staying aligned. The tests in this file check dpdict/dat updates, validation error
## messages, and metadata propagation behaviours.

##### Data/dpdict update functions #####
# Checks functions that apply dpdict changes back onto data (names, labels, value labels) and preserve survey_data.
test_that("update_dat_from_dpdict works", {
  temp_dat <- get_minimal_labelled_test_dat()
  temp_dpdict <- create_dict(temp_dat)

  # Modify dpdict for testing
  temp_dpdict$variable_names[1] <- "new_var1_name"
  temp_dpdict$variable_labels[1] <- "New Var1 Label"
  temp_dpdict$value_labels[[2]] <- c("A"=1, "B"=2, "C"=3) # Add value labels to second var

  updated_dat <- update_dat_from_dpdict(temp_dat, temp_dpdict)

  # Check names
  expect_equal(names(updated_dat)[1], "new_var1_name")
  expect_equal(names(updated_dat)[2], names(temp_dat)[2]) # Unchanged name

  # Check labels
  expect_equal(attr(updated_dat[[1]], "label"), "New Var1 Label")
  expect_equal(attr(updated_dat[[2]], "label"), attr(temp_dat[[2]], "label")) # Unchanged label

  # Check value labels
  expect_null(attr(updated_dat[[1]], "labels")) # Original had no value labels
  expect_equal(attr(updated_dat[[2]], "labels"), c("A"=1, "B"=2, "C"=3))

  # Test with survey_data object
  survey_obj <- create_survey_data(get_minimal_labelled_test_dat())
  updated_survey_obj <- update_dat_from_dpdict(survey_obj, temp_dpdict)
  expect_s3_class(updated_survey_obj, "survey_data")
  expect_equal(names(updated_survey_obj$dat)[1], "new_var1_name")
  expect_equal(attr(updated_survey_obj$dat[[2]], "labels"), c("A"=1, "B"=2, "C"=3))

  # Test error handling
  mismatched_dpdict <- temp_dpdict[-1,]
  expect_error(update_dat_from_dpdict(temp_dat, mismatched_dpdict), "Some data columns don't match either variable_names or old_variable_names in dpdict")
})

##### Alias and question dictionary updates #####
# Checks alias updates and how those changes propagate through dpdict (including survey_data wrappers).
test_that("update_aliases works", {
  temp_dpdict <- create_dict_with_metadata(get_big_test_dat(n=10))
  questions_dict <- get_questions_dict(temp_dpdict)
  original_alias <- questions_dict$question_alias[1]
  original_description <- temp_dpdict$question_description[temp_dpdict$question_alias == original_alias][1]
  original_alias_with_suffix <- temp_dpdict$alias_with_suffix[temp_dpdict$question_alias == original_alias][1]

  # Modify alias
  questions_dict$question_alias[1] <- "new_alias_test_a"
  updated_dpdict <- update_aliases(temp_dpdict, questions_dict)

  # Check updates
  expect_equal(updated_dpdict$question_alias[temp_dpdict$question_group == questions_dict$question_group[1]][1], "new_alias_test_a")
  # Description should update the prefix part
  expect_true(grepl("new_alias_test", updated_dpdict$question_description[temp_dpdict$question_group == questions_dict$question_group[1]][1]))
  expect_false(grepl(sub("_.*", "", original_alias), updated_dpdict$question_description[temp_dpdict$question_group == questions_dict$question_group[1]][1]))
  # Alias with suffix should update
  expect_true(grepl("new_alias_test_a", updated_dpdict$alias_with_suffix[temp_dpdict$question_group == questions_dict$question_group[1]][1]))

  # Test with survey_data object
  survey_obj <- create_survey_data(get_big_test_dat(n=10))
  updated_survey_obj <- update_aliases(survey_obj, questions_dict)
  expect_s3_class(updated_survey_obj, "survey_data")
  expect_equal(updated_survey_obj$dpdict$question_alias[updated_survey_obj$dpdict$question_group == questions_dict$question_group[1]][1], "new_alias_test_a")

  # Error handling
  invalid_questions_dict <- questions_dict[, "question_group", drop=FALSE]
  expect_error(update_aliases(temp_dpdict, invalid_questions_dict), "Invalid questions_dict")
})


##### Validation error-message contracts #####
# Checks that dpdict/dat validation returns detailed, user-actionable errors for common mismatch scenarios.
test_that("validate_dat_dpdict_alignment provides detailed error messages", {

  # Test 1: Dimension mismatch
  test_dat <- data.frame(a = 1:3, b = 4:6, c = 7:9)
  test_dpdict <- data.frame(
    variable_names = c("a", "b"),
    variable_labels = c("Variable A", "Variable B")
  )

  expect_error(
    validate_dat_dpdict_alignment(test_dat, test_dpdict),
    "DIMENSION_MISMATCH.*Expected: dpdict should have one row per dat column \\(3 rows\\).*Actual: dpdict has 2 rows",
    class = "simpleError"
  )

  # Test 2: Missing required columns
  test_dpdict_missing_cols <- data.frame(
    var_names = c("a", "b", "c"),  # Wrong column name
    some_other_col = c("x", "y", "z")
  )

  expect_error(
    validate_dat_dpdict_alignment(test_dat, test_dpdict_missing_cols),
    "MISSING_REQUIRED_COLUMNS.*Expected: dpdict must contain columns: variable_names, variable_labels.*Missing columns: variable_names, variable_labels",
    class = "simpleError"
  )

  # Test 3: Variable names mismatch with detailed positioning
  test_dpdict_name_mismatch <- data.frame(
    variable_names = c("x", "b", "z"),  # 'a' and 'c' don't match
    variable_labels = c("Variable X", "Variable B", "Variable Z")
  )

  expect_error(
    validate_dat_dpdict_alignment(test_dat, test_dpdict_name_mismatch),
    "VARIABLE_NAMES_MISMATCH.*Variables in dat but not in dpdict\\$variable_names.*\\[1\\] 'a'.*\\[3\\] 'c'.*Variables in dpdict\\$variable_names but not in dat.*\\[1\\] 'x'.*\\[3\\] 'z'",
    class = "simpleError"
  )

  # Test 4: Variable names found in old_variable_names with helpful suggestions
  test_dpdict_with_old <- data.frame(
    variable_names = c("new_a", "new_b", "new_c"),
    variable_labels = c("Variable A", "Variable B", "Variable C"),
    old_variable_names = c("a", "b", "c")  # Original names match dat
  )

  error_msg <- expect_error(
    validate_dat_dpdict_alignment(test_dat, test_dpdict_with_old),
    "VARIABLE_NAMES_MISMATCH.*Variables found in dpdict\\$old_variable_names.*'a' -> should be 'new_a' \\(dpdict row 1\\).*Consider running update_dat_from_dpdict\\(\\) first",
    class = "simpleError"
  )

  # Test 5: Variable labels mismatch (using labeled data)
  test_dat_labeled <- test_dat
  attr(test_dat_labeled$a, "label") <- "Label A"
  attr(test_dat_labeled$b, "label") <- "Label B"
  attr(test_dat_labeled$c, "label") <- "Label C"

  test_dpdict_label_mismatch <- data.frame(
    variable_names = c("a", "b", "c"),
    variable_labels = c("Different Label A", "Label B", "Different Label C")
  )

  expect_error(
    validate_dat_dpdict_alignment(test_dat_labeled, test_dpdict_label_mismatch),
    "VARIABLE_LABELS_MISMATCH.*Labels in dat but not in dpdict\\$variable_labels.*'Label A'.*'Label C'",
    class = "simpleError"
  )
})

# Test enhanced validate_no_dpdict_duplicates() function
test_that("validate_no_dpdict_duplicates provides detailed error messages", {

  # Test 1: Duplicate variable names with positioning details
  test_dpdict_dup_names <- data.frame(
    variable_names = c("q1", "q2", "q1", "q3", "q2"),  # q1 and q2 are duplicated
    variable_labels = c("Question 1", "Question 2", "Question 1 Again", "Question 3", "Question 2 Again")
  )

  expect_error(
    validate_no_dpdict_duplicates(test_dpdict_dup_names, check_variable_labels = FALSE, check_alias_with_suffix = FALSE),
    "DUPLICATE_VARIABLE_NAMES.*Variable name: 'q1'.*Appears at dpdict rows: 1, 3.*Associated labels:.*\\[1\\]: 'Question 1'.*\\[3\\]: 'Question 1 Again'.*Variable name: 'q2'.*Appears at dpdict rows: 2, 5",
    class = "simpleError"
  )

  # Test 2: Duplicate variable labels with positioning details
  test_dpdict_dup_labels <- data.frame(
    variable_names = c("q1", "q2", "q3", "q4"),
    variable_labels = c("Same Label", "Different Label", "Same Label", "Another Label")
  )

  expect_error(
    validate_no_dpdict_duplicates(test_dpdict_dup_labels, check_variable_names = FALSE, check_alias_with_suffix = FALSE),
    "DUPLICATE_VARIABLE_LABELS.*Variable label: 'Same Label'.*Appears at dpdict rows: 1, 3.*Associated variable names:.*\\[1\\]: 'q1'.*\\[3\\]: 'q3'",
    class = "simpleError"
  )

  # Test 3: Duplicate alias_with_suffix with context
  test_dpdict_dup_alias <- data.frame(
    variable_names = c("sc1_1", "sc1_2", "sc2_1", "sc2_2"),
    variable_labels = c("Scale 1 Item 1", "Scale 1 Item 2", "Scale 2 Item 1", "Scale 2 Item 2"),
    alias_with_suffix = c("scale_1", "scale_2", "scale_1", "scale_4")  # scale_1 duplicated
  )

  expect_error(
    validate_no_dpdict_duplicates(test_dpdict_dup_alias, check_variable_names = FALSE, check_variable_labels = FALSE),
    "DUPLICATE_ALIAS_WITH_SUFFIX.*Alias with suffix: 'scale_1'.*Appears at dpdict rows: 1, 3.*Associated variables:.*\\[1\\]: 'sc1_1' \\('Scale 1 Item 1'\\).*\\[3\\]: 'sc2_1' \\('Scale 2 Item 1'\\)",
    class = "simpleError"
  )

  # Test 4: Multiple types of duplicates in one validation
  test_dpdict_multiple_dups <- data.frame(
    variable_names = c("q1", "q1", "q3"),  # duplicate names
    variable_labels = c("Label A", "Label B", "Label A"),  # duplicate labels
    alias_with_suffix = c("alias1", "alias2", "alias1")  # duplicate aliases
  )

  error_msg <- expect_error(
    validate_no_dpdict_duplicates(test_dpdict_multiple_dups),
    "DUPLICATE_VARIABLE_NAMES.*DUPLICATE_VARIABLE_LABELS.*DUPLICATE_ALIAS_WITH_SUFFIX",
    class = "simpleError"
  )

  # Test 5: Proper handling of NA values (should not be flagged as duplicates)
  test_dpdict_with_nas <- data.frame(
    variable_names = c("q1", "q2", "q3"),
    variable_labels = c("Label 1", NA, NA),  # NAs should not be considered duplicates
    alias_with_suffix = c("alias1", NA, NA)
  )

  expect_true(validate_no_dpdict_duplicates(test_dpdict_with_nas))
})

# Test enhanced validate_variable_names() function
test_that("validate_variable_names provides detailed error explanations", {

  # Test 1: Names starting with numbers
  expect_error(
    validate_variable_names(c("1q", "2var", "validname")),
    "Variable name validation failed.*\\[1\\] '1q'.*starts with number \\(must start with letter\\).*\\[2\\] '2var'.*starts with number \\(must start with letter\\)",
    class = "simpleError"
  )

  # Test 2: Names with invalid characters
  expect_error(
    validate_variable_names(c("q.1", "var-name", "q@2")),
    "contains invalid character\\(s\\): '\\.'.*contains invalid character\\(s\\): '-'.*contains invalid character\\(s\\): '@'",
    class = "simpleError"
  )

  # Test 3: Names starting with special characters
  expect_error(
    validate_variable_names(c("_var", ".name", "#question")),
    "\\[1\\] '_var'.*starts with underscore \\(must start with letter\\).*\\[2\\] '\\.name'.*starts with special character \\(must start with letter\\).*\\[3\\] '#question'.*starts with special character \\(must start with letter\\)",
    class = "simpleError"
  )

  # Test 4: Incorrect underscore patterns
  expect_error(
    validate_variable_names(c("q_a", "var_", "q__1", "sc_x2")),
    "\\[1\\] 'q_a'.*underscore not followed by number.*\\[2\\] 'var_'.*ends with underscore.*\\[3\\] 'q__1'.*contains consecutive underscores.*\\[4\\] 'sc_x2'.*underscore not followed by number",
    class = "simpleError"
  )

  # Test 5: Multiple violations in single name
  expect_error(
    validate_variable_names(c("1q.a_b")),
    "\\[1\\] '1q\\.a_b'.*starts with number.*contains invalid character\\(s\\): '\\.'.*underscore not followed by number",
    class = "simpleError"
  )

  # Test 6: Valid names should pass without error
  expect_true(validate_variable_names(c("q1", "Q1", "SC1_1", "satisfaction_1", "SC1a_1", "SC1_1oe")))

  # Test 7: Check that the examples in error message are actually valid
  valid_examples <- c("q1", "Q1", "SC1_1", "satisfaction_1", "SC1a_1", "SC1_1oe")
  expect_true(validate_variable_names(valid_examples))
})

# Test warning behavior (warn_only = TRUE)
test_that("validation functions provide same detailed messages for warnings", {

  # Test that warning messages contain the same detail as error messages
  test_dat <- data.frame(a = 1:2, b = 3:4)
  test_dpdict <- data.frame(
    variable_names = c("x", "y"),  # Names don't match
    variable_labels = c("Variable X", "Variable Y")
  )

  expect_warning(
    result <- validate_dat_dpdict_alignment(test_dat, test_dpdict, warn_only = TRUE),
    "VARIABLE_NAMES_MISMATCH.*Variables in dat but not in dpdict\\$variable_names.*\\[1\\] 'a'.*\\[2\\] 'b'"
  )
  expect_false(result)

  # Test warning for duplicates
  test_dpdict_dups <- data.frame(
    variable_names = c("q1", "q1"),
    variable_labels = c("Label 1", "Label 2")
  )

  expect_warning(
    result <- validate_no_dpdict_duplicates(test_dpdict_dups, warn_only = TRUE),
    "DUPLICATE_VARIABLE_NAMES.*Variable name: 'q1'.*Appears at dpdict rows: 1, 2"
  )
  expect_false(result)

  # Test warning for invalid variable names
  expect_warning(
    result <- validate_variable_names(c("1invalid"), warn_only = TRUE),
    "\\[1\\] '1invalid'.*starts with number \\(must start with letter\\)"
  )
  expect_false(result)
})

# Test successful validation (should return TRUE invisibly)
test_that("validation functions return TRUE for valid inputs", {

  # Valid alignment
  test_dat <- data.frame(a = 1:3, b = 4:6)
  attr(test_dat$a, "label") <- "Variable A"
  attr(test_dat$b, "label") <- "Variable B"
  test_dpdict <- data.frame(
    variable_names = c("a", "b"),
    variable_labels = c("Variable A", "Variable B")
  )

  expect_true(validate_dat_dpdict_alignment(test_dat, test_dpdict))

  # No duplicates
  test_dpdict_unique <- data.frame(
    variable_names = c("q1", "q2", "q3"),
    variable_labels = c("Question 1", "Question 2", "Question 3"),
    alias_with_suffix = c("alias1", "alias2", "alias3")
  )

  expect_true(validate_no_dpdict_duplicates(test_dpdict_unique))

  # Valid variable names
  expect_true(validate_variable_names(c("q1", "SC1_1", "satisfaction_2a")))
})

# Test edge cases
test_that("validation functions handle edge cases appropriately", {

  # Empty data
  expect_error(
    validate_dat_dpdict_alignment(data.frame(), data.frame(variable_names = character(), variable_labels = character())),
    NA  # Should not error for empty but aligned data
  )

  # Single column/row
  single_dat <- data.frame(single_var = 1:5)
  attr(single_dat$single_var, "label") <- "Single Variable"
  single_dpdict <- data.frame(
    variable_names = "single_var",
    variable_labels = "Single Variable"
  )
  expect_true(validate_dat_dpdict_alignment(single_dat, single_dpdict))

  # Check validation with missing alias_with_suffix column (should not error)
  dpdict_no_alias <- data.frame(
    variable_names = c("a", "b"),
    variable_labels = c("Label A", "Label B")
  )
  expect_true(validate_no_dpdict_duplicates(dpdict_no_alias, check_alias_with_suffix = TRUE))
})


# Test dpdict functions
test_that("create_dict works", {
  # Basic functionality
  temp_dat <- get_minimal_labelled_test_dat()
  dpdict_prefill <- create_dict(temp_dat, prefill = TRUE)
  dpdict_no_prefill <- create_dict(temp_dat, prefill = FALSE)

  expect_equal(nrow(dpdict_prefill), ncol(temp_dat))
  expect_named(dpdict_prefill, c("old_variable_names", "old_variable_labels", "old_value_labels", "variable_names", "variable_labels", "value_labels"))
  expect_equal(dpdict_prefill$variable_names, dpdict_prefill$old_variable_names)
  expect_equal(dpdict_prefill$variable_labels, dpdict_prefill$old_variable_labels)
  expect_equal(dpdict_prefill$value_labels, dpdict_prefill$old_value_labels)

  expect_true(all(is.na(dpdict_no_prefill$variable_names)))
  expect_true(all(is.na(dpdict_no_prefill$variable_labels)))
  expect_true(all(is.na(dpdict_no_prefill$value_labels)))

  # Edge cases
  empty_dat <- data.frame()
  expect_error(create_dict(empty_dat), "temp_dat must be a dataframe with more than zero columns")

  dat_no_labels <- data.frame(a = 1:3, b = letters[1:3])
  dpdict_no_labels <- create_dict(dat_no_labels)
  expect_true(all(is.na(dpdict_no_labels$old_variable_labels)))
  expect_true(all(is.na(dpdict_no_labels$old_value_labels)))
})

test_that("create_dict_with_metadata works", {

  # tests for correct definition of multiresponse questiontype:

  # test 1: is multiresponse:

  temp_dat <- data.frame("Q1_1" = haven::labelled(c(0, 0, 1, NA_real_), label = "Q1_1: Select all that apply - Statement 1", labels = c("Not selected" = 0, "Selected" = 1)),
                         "Q1_2" = haven::labelled(c(0, 0, 0, 0), label = "Q1_2: Select all that apply - Statement 2", labels = c("Not selected" = 0, "Selected" = 1)),
                         "Q1_3" = haven::labelled(c(0, 1, 0, 0), label = "Q1_3: Select all that apply - Statement 3", labels = c("Not selected" = 0, "Selected" = 1)),
                         "Q1_4" = haven::labelled(c(0, 0, NA, 0), label = "Q1_4: Select all that apply - Statement 4", labels = c("Not selected" = 0, "Selected" = 1)))

  temp_dpdict <- create_dict_with_metadata(temp_dat)

  expect_equal(temp_dpdict$questiontype, rep("multiresponse", 4))

  # test 2: some are multiresponse

  temp_dat <- data.frame(
    # Single-variable group - even with non-zero values, shouldn't be multiresponse
    "Q1_1" = haven::labelled(c(1, 0, 1, NA_real_),
                             label = "Q1_1: Single variable question",
                             labels = c("Not selected" = 0, "Selected" = 1)),

    # Multi-variable group - should be detected as multiresponse
    "Q2_1" = haven::labelled(c(0, 1, 0, NA_real_),
                             label = "Q2_1: Multi variable question - Statement 1",
                             labels = c("Not selected" = 0, "Selected" = 1)),
    "Q2_2" = haven::labelled(c(1, 0, 0, 0),
                             label = "Q2_2: Multi variable question - Statement 2",
                             labels = c("Not selected" = 0, "Selected" = 1))
  )

  # Create dpdict and run update_dict_with_metadata
  temp_dpdict <- create_dict(temp_dat, prefill = TRUE)
  updated_dpdict <- update_dict_with_metadata(NULL, temp_dat, temp_dpdict)

  # Check question groups are assigned correctly
  q1_group <- updated_dpdict$question_group[updated_dpdict$variable_names == "Q1_1"]
  q2_group_1 <- updated_dpdict$question_group[updated_dpdict$variable_names == "Q2_1"]
  q2_group_2 <- updated_dpdict$question_group[updated_dpdict$variable_names == "Q2_2"]

  # First verify the grouping is as expected
  expect_false(q1_group %in% c(q2_group_1, q2_group_2)) # Q1 should be in a different group
  expect_equal(q2_group_1, q2_group_2) # Q2 variables should be in the same group

  # Verify questiontype assignment
  expect_equal(updated_dpdict$questiontype[updated_dpdict$variable_names == "Q1_1"], "categorical")
  expect_equal(updated_dpdict$questiontype[updated_dpdict$variable_names == "Q2_1"], "multiresponse")
  expect_equal(updated_dpdict$questiontype[updated_dpdict$variable_names == "Q2_2"], "multiresponse")

  # Would benefit from more tests here, including for ill-formed dats

})

test_that("update_dict_with_metadata works", {
  temp_dat <- get_big_test_dat(n = 10)
  temp_dpdict <- create_dict(temp_dat, prefill = TRUE)

  # Check basic column creation
  updated_dpdict <- update_dict_with_metadata(NULL, temp_dat, temp_dpdict)
  expect_true("question_group" %in% names(updated_dpdict))
  expect_true("questiontype" %in% names(updated_dpdict))
  expect_true("alias_with_suffix" %in% names(updated_dpdict))
  expect_equal(nrow(updated_dpdict), nrow(temp_dpdict))

  # Test with variables_to_update
  variables_to_update <- rep(FALSE, nrow(temp_dpdict))
  variables_to_update[1:2] <- TRUE
  # expect warnings due to most variables initialising with NA
  expect_warning(updated_partial_dpdict <- update_dict_with_metadata(NULL, temp_dat, temp_dpdict, variables_to_update = variables_to_update))
  # Check if metadata is updated only for the first two variables
  expect_false(any(is.na(updated_partial_dpdict$question_group[1:2])))
  expect_true(all(is.na(updated_partial_dpdict$question_group[3:nrow(updated_partial_dpdict)])))

  # Test error handling for misaligned inputs
  misaligned_dpdict <- temp_dpdict[-1, ]
  expect_error(update_dict_with_metadata(NULL, temp_dat, misaligned_dpdict))
})

test_that("check_seps function works", {
  # Test 1: Basic dataset with consistent separators
  basic_dat <- data.frame(
    Q1_1 = c(1, 2, 3),
    Q1_2 = c(4, 5, 6),
    Q2_1 = c(7, 8, 9)
  )

  # Add labels with consistent separators
  attr(basic_dat$Q1_1, "label") <- "Q1: First question - Statement 1"
  attr(basic_dat$Q1_2, "label") <- "Q1: First question - Statement 2"
  attr(basic_dat$Q2_1, "label") <- "Q2: Second question - Statement 1"

  basic_result <- check_seps(basic_dat)

  expect_true(basic_result$consistency["var_name_sep"])
  expect_true(basic_result$consistency["prefix_sep"])
  expect_true(basic_result$consistency["statement_sep"])
  expect_equal(basic_result$separators[["var_name_sep"]], "_")
  expect_equal(basic_result$separators[["prefix_sep"]], ": ")
  expect_equal(basic_result$separators[["statement_sep"]], " - ")
  expect_length(basic_result$issues, 0)

  # Test 2: Dataset with inconsistent varname separators
  inconsistent_dat <- data.frame(
    Q1_1 = c(1, 2, 3),
    Q1.2 = c(4, 5, 6),
    Q2_1 = c(7, 8, 9)
  )

  attr(inconsistent_dat$Q1_1, "label") <- "Q1: First question - Statement 1"
  attr(inconsistent_dat$Q1.2, "label") <- "Q1: First question - Statement 2"
  attr(inconsistent_dat$Q2_1, "label") <- "Q2: Second question - Statement 1"

  inconsistent_result <- check_seps(inconsistent_dat)

  expect_false(inconsistent_result$consistency["var_name_sep"])
  expect_true(inconsistent_result$consistency["prefix_sep"])
  expect_true(inconsistent_result$consistency["statement_sep"])
  expect_length(inconsistent_result$issues, 1)

  # Test 3: Dataset with inconsistent prefix separators
  inconsistent_dat <- data.frame(
    Q1_1 = c(1, 2, 3),
    Q1_2 = c(4, 5, 6),
    Q2_1 = c(7, 8, 9)
  )

  attr(inconsistent_dat$Q1_1, "label") <- "Q1: First question - Statement 1"
  attr(inconsistent_dat$Q1_2, "label") <- "Q1. First question - Statement 2"
  attr(inconsistent_dat$Q2_1, "label") <- "Q2: Second question - Statement 1"

  inconsistent_result <- check_seps(inconsistent_dat)

  expect_true(inconsistent_result$consistency["var_name_sep"])
  expect_false(inconsistent_result$consistency["prefix_sep"])
  expect_true(inconsistent_result$consistency["statement_sep"])
  expect_length(inconsistent_result$issues, 1)

  # Test 4: Dataset with inconsistent statement separators
  inconsistent_dat <- data.frame(
    Q1_1 = c(1, 2, 3),
    Q1_2 = c(4, 5, 6),
    Q2_1 = c(7, 8, 9)
  )

  attr(inconsistent_dat$Q1_1, "label") <- "Q1: First question - Statement 1"
  attr(inconsistent_dat$Q1_2, "label") <- "Q1: First question : Statement 2"
  attr(inconsistent_dat$Q2_1, "label") <- "Q2: Second question - Statement 1"

  inconsistent_result <- check_seps(inconsistent_dat)

  expect_true(inconsistent_result$consistency["var_name_sep"])
  expect_true(inconsistent_result$consistency["prefix_sep"])
  expect_false(inconsistent_result$consistency["statement_sep"])
  expect_length(inconsistent_result$issues, 1)

  # Test 5: Non-data frame input
  expect_error(check_seps(c(1, 2, 3)), "Input must be a data frame")

  # Test 6: Dataset with no separators
  no_sep_dat <- data.frame(
    Q1 = c(1, 2, 3),
    Q2 = c(4, 5, 6)
  )

  attr(no_sep_dat$Q1, "label") <- "First question"
  attr(no_sep_dat$Q2, "label") <- "Second question"

  no_sep_result <- check_seps(no_sep_dat)

  expect_true(all(is.na(no_sep_result$separators)))
  expect_true(all(no_sep_result$consistency))
  expect_length(no_sep_result$issues, 0)
})

test_that("get_affix_df works", {
  test_string <- "Q1: Main question text - Statement 1 - Sub statement A"
  seps <- c(" - ", ": ")

  # Suffix test
  suffix_df <- get_affix_df(test_string, affix_type = "suffix", seps_priority = seps)
  expect_equal(nrow(suffix_df), 3)
  expect_named(suffix_df, c("sep_type", "location_found", "sep_count_from_end", "affix_found", "starts_with_cap"))
  expect_in(c("Statement 1 - Sub statement A", "Sub statement A", "Main question text - Statement 1 - Sub statement A"), suffix_df$affix_found)
  expect_equal(suffix_df$sep_count_from_end[suffix_df$affix_found == "Statement 1 - Sub statement A"], 2)
  expect_equal(suffix_df$sep_count_from_end[suffix_df$affix_found == "Sub statement A"], 1)
  expect_equal(suffix_df$sep_count_from_end[suffix_df$affix_found == "Main question text - Statement 1 - Sub statement A"], 1)
  expect_equal(suffix_df$sep_type[suffix_df$affix_found == "Statement 1 - Sub statement A"], " - ")
  expect_equal(suffix_df$sep_type[suffix_df$affix_found == "Sub statement A"], " - ")
  expect_equal(suffix_df$sep_type[suffix_df$affix_found == "Main question text - Statement 1 - Sub statement A"], ": ")

  # Prefix test
  prefix_df <- get_affix_df(test_string, affix_type = "prefix", seps_priority = seps)
  expect_equal(nrow(prefix_df), 3)
  expect_named(prefix_df, c("sep_type", "location_found", "sep_count_from_start", "affix_found", "starts_with_cap"))
  expect_in(c("Q1: Main question text", "Q1", "Q1: Main question text - Statement 1"), prefix_df$affix_found)
  expect_equal(prefix_df$sep_count_from_start[prefix_df$affix_found == "Q1: Main question text"], 1)
  expect_equal(prefix_df$sep_count_from_start[prefix_df$affix_found == "Q1"], 1)
  expect_equal(prefix_df$sep_count_from_start[prefix_df$affix_found == "Q1: Main question text - Statement 1"], 2)
  expect_equal(prefix_df$sep_type[prefix_df$affix_found == "Q1: Main question text"], " - ")
  expect_equal(prefix_df$sep_type[prefix_df$affix_found == "Q1"], ": ")
  expect_equal(prefix_df$sep_type[prefix_df$affix_found == "Q1: Main question text - Statement 1"], " - ")

  # Edge cases
  expect_true(is.na(get_affix_df("NoSeps", affix_type = "suffix", seps_priority = seps)))
  expect_error(get_affix_df("", affix_type = "suffix", seps_priority = seps))

  # Test filtering and sorting
  test_string_trailing_sep <- "Text with sep - "
  suffix_df_unfiltered <- get_affix_df(test_string_trailing_sep, affix_type = "suffix", seps_priority = c(" - "), filter_results = FALSE)
  expect_equal(nrow(suffix_df_unfiltered), 1)
  expect_equal(suffix_df_unfiltered$affix_found, "") # Empty suffix
  suffix_df_filtered <- get_affix_df(test_string_trailing_sep, affix_type = "suffix", seps_priority = c(" - "), filter_results = TRUE)
  expect_true(is.na(suffix_df_filtered)) # Should be removed by filter
})

test_that("get_updated_seps works", {
  # Setup inconsistent data
  inconsistent_dat <- data.frame(Q1_1 = 1, Q1.2 = 2, Q3_1 = 3)
  attr(inconsistent_dat$Q1_1, "label") <- "Q1: Question A - Statement 1"
  attr(inconsistent_dat$Q1.2, "label") <- "Q1. Question A - Statement 2" # Inconsistent prefix sep
  attr(inconsistent_dat$Q3_1, "label") <- "Q3: Question B : Statement X" # Inconsistent statement sep

  sep_analysis <- check_seps(inconsistent_dat)
  # Expected analysis: var_name inconsistent (_ vs .), prefix inconsistent (: vs .), statement inconsistent (- vs :)

  # Test updating to most common (should pick "_" for varname, ": " for prefix, " - " for statement if Q1_1 label is more common)
  # Let's assume "_" , ": ", " - " are most common based on analysis (adjust if check_seps logic differs)
  seps_to_use_common <- list(var_name_sep = "_", prefix_sep = ": ", statement_sep = " - ") # Manually set expected most common
  updated_seps_common <- get_updated_seps(inconsistent_dat, sep_analysis, seps_to_use = seps_to_use_common)

  expect_equal(updated_seps_common$new_variable_names, c("Q1_1", "Q1_2", "Q3_1")) # Q1.2 corrected
  expect_equal(updated_seps_common$new_variable_labels, c("Q1: Question A - Statement 1", "Q1: Question A - Statement 2", "Q3: Question B - Statement X")) # Prefix and statement seps corrected

  # Test updating to specific chosen seps
  seps_to_use_specific <- list(var_name_sep = ".", prefix_sep = ". ", statement_sep = " : ")
  updated_seps_specific <- get_updated_seps(inconsistent_dat, sep_analysis, seps_to_use = seps_to_use_specific)

  expect_equal(updated_seps_specific$new_variable_names, c("Q1.1", "Q1.2", "Q3.1"))
  expect_equal(updated_seps_specific$new_variable_labels, c("Q1. Question A : Statement 1", "Q1. Question A : Statement 2", "Q3. Question B : Statement X"))

  # Edge case: No separators present
  no_sep_dat <- data.frame(Q1 = 1, Q2 = 2)
  attr(no_sep_dat$Q1, "label") <- "Question 1"
  attr(no_sep_dat$Q2, "label") <- "Question 2"
  no_sep_analysis <- check_seps(no_sep_dat)
  no_sep_updated <- get_updated_seps(no_sep_dat, no_sep_analysis)
  expect_equal(no_sep_updated$new_variable_names, names(no_sep_dat)) # Should be unchanged
  expect_equal(no_sep_updated$new_variable_labels, c("Question 1", "Question 2")) # Should be unchanged
})

test_that("get_updated_seps correctly handles separator position indexing", {
  # This test specifically addresses the bug where matched_sep (position in sep_portion)
  # was incorrectly used as an index into the full string

  # Test case 1: Variable names with different lengths before separator
  # This ensures the fix correctly converts sep_portion indices to full string indices
  test_dat1 <- data.frame(
    A_1 = 1,           # Short prefix before sep
    ABC_2 = 2,         # Medium prefix before sep
    ABCDEF_3 = 3,      # Long prefix before sep
    Q1.test = 4        # Different sep to be updated
  )
  attr(test_dat1$A_1, "label") <- "A: Label 1 - Statement 1"
  attr(test_dat1$ABC_2, "label") <- "ABC: Label 2 - Statement 2"
  attr(test_dat1$ABCDEF_3, "label") <- "ABCDEF: Label 3 - Statement 3"
  attr(test_dat1$Q1.test, "label") <- "Q1. Label 4 - Statement 4"

  sep_analysis1 <- check_seps(test_dat1)
  seps_to_use1 <- list(var_name_sep = "_", prefix_sep = ": ", statement_sep = " - ")
  result1 <- get_updated_seps(test_dat1, sep_analysis1, seps_to_use = seps_to_use1)

  # All variable names should have consistent "_" separator
  expect_equal(result1$new_variable_names, c("A_1", "ABC_2", "ABCDEF_3", "Q1_test"))

  # All labels should have consistent separators
  expect_equal(result1$new_variable_labels,
               c("A: Label 1 - Statement 1",
                 "ABC: Label 2 - Statement 2",
                 "ABCDEF: Label 3 - Statement 3",
                 "Q1: Label 4 - Statement 4"))

  # Verify the separator was found at the correct position for each variable
  expect_equal(result1$var_name_seps_found, c("_", "_", "_", "."))

  # Test case 2: Mix of separators at different positions
  test_dat2 <- data.frame(
    X.A = 1,
    YY_B = 2,
    ZZZ.C = 3
  )
  attr(test_dat2$X.A, "label") <- "X. Prefix A - Stat"
  attr(test_dat2$YY_B, "label") <- "YY: Prefix B - Stat"
  attr(test_dat2$ZZZ.C, "label") <- "ZZZ. Prefix C - Stat"

  sep_analysis2 <- check_seps(test_dat2)
  seps_to_use2 <- list(var_name_sep = ".", prefix_sep = ". ", statement_sep = " - ")
  result2 <- get_updated_seps(test_dat2, sep_analysis2, seps_to_use = seps_to_use2)

  # Should correctly update separators regardless of prefix length
  expect_equal(result2$new_variable_names, c("X.A", "YY.B", "ZZZ.C"))
  expect_equal(result2$new_variable_labels,
               c("X. Prefix A - Stat",
                 "YY. Prefix B - Stat",
                 "ZZZ. Prefix C - Stat"))

  # Test case 3: Verify actual character replacement happens at correct position
  # Create a case where the bug would manifest if not fixed
  test_dat3 <- data.frame(LONG_VAR_NAME_test = 1)
  attr(test_dat3$LONG_VAR_NAME_test, "label") <- "LONG_VAR_NAME. Something"

  sep_analysis3 <- check_seps(test_dat3)
  seps_to_use3 <- list(var_name_sep = "_", prefix_sep = ": ", statement_sep = " - ")
  result3 <- get_updated_seps(test_dat3, sep_analysis3, seps_to_use = seps_to_use3)

  # The separator before "test" should be correctly identified and replaced
  expect_equal(result3$new_variable_names, "LONG_VAR_NAME_test")
  expect_equal(result3$var_name_seps_found, "_")

  # Ensure the prefix separator is also correctly updated
  expect_equal(result3$new_variable_labels, "LONG_VAR_NAME: Something")
  expect_equal(result3$prefix_seps_found, ".")
})

test_that("split_into_question_groups works", {

  n = 10
  seps_to_use <- list( variable_name_sep = "_", prefix_sep = ": ", statement_sep = " - ")

  # test 1 - split by class
  temp_dat <- data.frame("Q1_1" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_1: Select all that apply - Statement 1"),
                         "Q1_2" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_2: Select all that apply - Statement 2"),
                         "Q1_3" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_3: Select all that apply - Statement 3"),
                         "Q1_4" = haven::labelled(sample(c("0", "1", NA_character_), n, replace = TRUE), label = "Q1_4: Select all that apply - Statement 5"))

  temp_dict <- create_dict(temp_dat, prefill = TRUE)
  temp_dict <- temp_dict[, !(names(temp_dict) %in% c("old_variable_names", "old_variable_labels", "old_value_labels"))]

  config1 <- list(splitbyclass = TRUE,
                  splitbynumlabelledvalues = FALSE,
                  splitbynoncontiguous = FALSE,
                  splitbycommonlabel = FALSE,
                  findlongest = FALSE,
                  min_lcs_length = 10,
                  min_common_strings = 3,
                  consistent_consecutive_mode = FALSE,
                  consecutive_range = 10,
                  variable_compare_mode = "complete")

  test_result <- split_into_question_groups(temp_dict, temp_dat, variables_to_process = NULL, seps_to_use = seps_to_use, ignorelabelbeforeprefix = TRUE, config = config1)

  expected_test_result <- c("Q1_a", "Q1_a", "Q1_a", "Q1_b")

  expect_equal(test_result$question_group, expected_test_result)

  # test 2 - split by numlabelledvalues
  temp_dat <- data.frame("Q1_1" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_1: Select all that apply - Statement 1", labels = c("Not selected" = 0, "Selected" = 1)),
                         "Q1_2" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_2: Select all that apply - Statement 2", labels = c("Not selected" = 0, "Selected" = 1)),
                         "Q1_3" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_3: Select all that apply - Statement 3", labels = c("Not selected" = 0, "Selected" = 1)),
                         "Q1_4" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_4: Select all that apply - Statement 4"))

  temp_dict <- create_dict(temp_dat, prefill = TRUE)
  temp_dict <- temp_dict[, !(names(temp_dict) %in% c("old_variable_names", "old_variable_labels", "old_value_labels"))]

  config2 <- list(splitbyclass = FALSE,
                  splitbynumlabelledvalues = TRUE,
                  splitbynoncontiguous = FALSE,
                  splitbycommonlabel = FALSE,
                  findlongest = FALSE,
                  min_lcs_length = 10,
                  min_common_strings = 3,
                  consistent_consecutive_mode = FALSE,
                  consecutive_range = 10,
                  variable_compare_mode = "complete")

  test_result <- split_into_question_groups(temp_dict, temp_dat, variables_to_process = NULL, seps_to_use = seps_to_use, ignorelabelbeforeprefix = TRUE, config = config2)

  expected_test_result <- c("Q1_a", "Q1_a", "Q1_a", "Q1_b")

  expect_equal(test_result$question_group, expected_test_result)

  # test 3 - split by noncontiguous
  temp_dat <- data.frame("Q1_1" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_1: Select all that apply - Statement 1"),
                         "Q1_2" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_2: Select all that apply - Statement 2"),
                         "Q2_1" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q2_1: Select all that apply - Statement 1"),
                         "Q1_3" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_3: Select all that apply - Statement 3"))

  temp_dict <- create_dict(temp_dat, prefill = TRUE)
  temp_dict <- temp_dict[, !(names(temp_dict) %in% c("old_variable_names", "old_variable_labels", "old_value_labels"))]

  config3 <- list (splitbyclass = FALSE,
                   splitbynumlabelledvalues = FALSE,
                   splitbynoncontiguous = TRUE,
                   splitbycommonlabel = FALSE,
                   findlongest = FALSE,
                   min_lcs_length = 10,
                   min_common_strings = 3,
                   consistent_consecutive_mode = FALSE,
                   consecutive_range = 10,
                   variable_compare_mode = "complete")

  test_result <- split_into_question_groups(temp_dict, temp_dat, variables_to_process = NULL, seps_to_use = seps_to_use, ignorelabelbeforeprefix = TRUE, config = config3)

  expected_test_result <- c("Q1_a", "Q1_a", "Q2_a", "Q1_b")

  expect_equal(test_result$question_group, expected_test_result)

  # test 4 - split by commonlabel, no findlongest, no ignoreprefix
  temp_dat <- data.frame("Q1_1" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_1: Select all that apply - Statement 1"),
                         "Q1_2" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_2: Select all that apply - Statement 2"),
                         "Q1_3" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_3: Select all that apply - Statement 3"),
                         "Q1_4" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_4: Click one of these - Statement 4"))

  temp_dict <- create_dict(temp_dat, prefill = TRUE)
  temp_dict <- temp_dict[, !(names(temp_dict) %in% c("old_variable_names", "old_variable_labels", "old_value_labels"))]

  config4 <- list(splitbyclass = FALSE,
                  splitbynumlabelledvalues = FALSE,
                  splitbynoncontiguous = FALSE,
                  splitbycommonlabel = TRUE,
                  findlongest = FALSE,
                  min_lcs_length = 10,
                  min_common_strings = 3,
                  consistent_consecutive_mode = FALSE,
                  consecutive_range = 10,
                  variable_compare_mode = "complete")

  test_result <- split_into_question_groups(temp_dict, temp_dat, variables_to_process = NULL, seps_to_use = seps_to_use, ignorelabelbeforeprefix = FALSE, config = config4)

  expected_test_result <- c("Q1_a", "Q1_b", "Q1_c", "Q1_d")

  expect_equal(test_result$question_group, expected_test_result)

  # test 5 - split by commonlabel, no findlongest, do ignoreprefix
  temp_dat <- data.frame("Q1_1" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_1: Select all that apply - Statement 1"),
                         "Q1_2" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_2: Select all that apply - Statement 2"),
                         "Q1_3" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_3: Select all that apply - Statement 3"),
                         "Q1_4" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_4: Click one of these - Statement 4"))

  temp_dict <- create_dict(temp_dat, prefill = TRUE)
  temp_dict <- temp_dict[, !(names(temp_dict) %in% c("old_variable_names", "old_variable_labels", "old_value_labels"))]

  config5 <- list(splitbyclass = FALSE,
                  splitbynumlabelledvalues = FALSE,
                  splitbynoncontiguous = FALSE,
                  splitbycommonlabel = TRUE,
                  findlongest = FALSE,
                  min_lcs_length = 10,
                  min_common_strings = 3,
                  consistent_consecutive_mode = FALSE,
                  consecutive_range = 10,
                  variable_compare_mode = "complete")

  test_result <- split_into_question_groups(temp_dict, temp_dat, variables_to_process = NULL, seps_to_use = seps_to_use, ignorelabelbeforeprefix = TRUE, config = config5)

  expected_test_result <- c("Q1_a", "Q1_a", "Q1_a", "Q1_b")

  expect_equal(test_result$question_group, expected_test_result)

  # test 6 - split by commonlabel, do findlongest
  temp_dat <- data.frame("Q1_1" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_1: Select all that apply Statement 1"),
                         "Q1_2" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_2: Select all that apply Statement 2"),
                         "Q1_3" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_3: Select all that apply Statement 3"),
                         "Q1_4" = haven::labelled(sample(c(0, 1, NA_real_), n, replace = TRUE), label = "Q1_4: Click one of these Statement 4"))

  temp_dict <- create_dict(temp_dat, prefill = TRUE)
  temp_dict <- temp_dict[, !(names(temp_dict) %in% c("old_variable_names", "old_variable_labels", "old_value_labels"))]

  config6 <- list(splitbyclass = FALSE,
                  splitbynumlabelledvalues = FALSE,
                  splitbynoncontiguous = FALSE,
                  splitbycommonlabel = TRUE,
                  findlongest = TRUE,
                  min_lcs_length = 10,
                  min_common_strings = 3,
                  consistent_consecutive_mode = FALSE,
                  consecutive_range = 10,
                  variable_compare_mode = "complete")

  test_result <- split_into_question_groups(temp_dict, temp_dat, variables_to_process = NULL, seps_to_use = seps_to_use, ignorelabelbeforeprefix = TRUE, config = config6)

  expected_test_result <- c("Q1_a", "Q1_a", "Q1_a", "Q1_b")

  expect_equal(test_result$question_group, expected_test_result)

})

test_that("get_longest_common_substring works", {
  expect_equal(get_longest_common_substring("boat", "boar", fromstart=TRUE)[1], "boa")
  expect_equal(attr(get_longest_common_substring("boat", "boar", fromstart=TRUE), "lcs_distance"), 3)

  expect_equal(get_longest_common_substring("banana", "bandana", fromstart=TRUE)[1], "ban")
  expect_equal(get_longest_common_substring("prefix_statement1", "prefix_statement2", fromstart=TRUE)[1], "prefix_statement")

  # Test fromstart = FALSE
  expect_equal(get_longest_common_substring("banana", "cabana", fromstart=FALSE)[1], "bana")
  expect_equal(attr(get_longest_common_substring("banana", "cabana", fromstart=FALSE), "lcs_distance"), 4)
  expect_equal(get_longest_common_substring("abcde", "fghij", fromstart=FALSE)[1], "")
  expect_equal(get_longest_common_substring("testing", "test", fromstart=FALSE)[1], "test")

  # Edge cases
  expect_equal(get_longest_common_substring("", "abc", fromstart=TRUE)[1], "")
  expect_equal(get_longest_common_substring("abc", "", fromstart=FALSE)[1], "")
  expect_equal(get_longest_common_substring("abc", "abc", fromstart=TRUE)[1], "abc")
})

test_that("get_unique_suffixes works", {

  expect_equal(get_unique_suffixes(create_dict_with_metadata(get_big_test_dat())), c(NA, NA, "a single 'word' of random characters", NA, NA, NA, NA,
                                                                                     "statement 1", "statement 2", "statement 1", "statement 2",
                                                                                     "statement 1 - attribute 1", "statement 1 - attribute 2",
                                                                                     "statement 2 - attribute 1", "statement 2 - attribute 2",
                                                                                     NA, NA, "statement 1", "statement 2",
                                                                                     NA, NA, NA, NA, "statement 1", "statement 2",
                                                                                     "1", "2", "3", "4"))

})

test_that("create_questions_dict works", {
  temp_dpdict <- create_dict_with_metadata(get_big_test_dat(n=10))
  questions_dict <- create_questions_dict(NULL, temp_dpdict, editfirst = FALSE)

  expect_true(is.data.frame(questions_dict))
  expect_named(questions_dict, c("question_group", "question_lcs", "question_alias", "question_description", "question_folder"))
  expect_equal(nrow(questions_dict), length(unique(temp_dpdict$question_group)))
  expect_equal(questions_dict$question_group, unique(temp_dpdict$question_group))
  expect_equal(questions_dict$question_alias, questions_dict$question_group) # Default behaviour

  # Test error handling
  expect_error(create_questions_dict(NULL, temp_dpdict[, "variable_names", drop = FALSE]), "dpdict must include columns 'question_group'")

  # Test behaviour when question_lcs is not present initially (should calculate it)
  temp_dpdict_no_lcs <- temp_dpdict[, !(names(temp_dpdict) %in% "question_lcs")]
  questions_dict_no_lcs <- create_questions_dict(NULL, temp_dpdict_no_lcs, editfirst = FALSE)
  expect_true("question_lcs" %in% names(questions_dict_no_lcs))
  expect_false(any(is.na(questions_dict_no_lcs$question_lcs))) # Should have calculated LCS
})

test_that("get_questions_dict works", {
  temp_dpdict <- create_dict_with_metadata(get_big_test_dat(n=10))
  questions_dict <- get_questions_dict(temp_dpdict)

  expect_true(is.data.frame(questions_dict))
  expect_equal(nrow(questions_dict), length(unique(temp_dpdict$question_group)))
  expect_true(all(c("question_group", "questiontype", "question_alias", "question_description") %in% names(questions_dict)))

  # Check with survey_data object
  survey_obj <- create_survey_data(get_big_test_dat(n=10))
  questions_dict_survey <- get_questions_dict(survey_obj)
  expect_equal(questions_dict, questions_dict_survey)

  # Error handling
  expect_error(get_questions_dict(data.frame(a=1)), "dpdict must at a minimum have a question group variable")
})

test_that("split_grid_labels works", {
  temp_dpdict <- create_dict_with_metadata(get_big_test_dat(n=10))
  alias_to_split <- "labelledmultiresponsegrid_a" # From the example dataset structure
  example_stem <- "statement 1" # Stem to move
  count_before_repeat <- 2 # Number of attributes per statement

  # Find the original alias and suffix for a specific variable in the grid
  original_alias <- temp_dpdict$question_alias[temp_dpdict$question_alias == alias_to_split][1]
  original_suffix <- temp_dpdict$question_suffix[temp_dpdict$question_alias == alias_to_split][1] # e.g., "statement 1 - attribute 1"

  updated_dpdict <- split_grid_labels(temp_dpdict, alias_to_split, example_stem, count_before_repeat)

  # Check the first variable that was part of the split group
  updated_var_row <- updated_dpdict[temp_dpdict$question_alias == alias_to_split, ][1, ]

  # New alias should include the stem
  expect_equal(updated_var_row$question_alias, paste0(original_alias, " - ", example_stem))
  # New suffix should be what was after the stem
  expect_equal(updated_var_row$question_suffix, gsub(paste0(example_stem, " - "), "", original_suffix))

  # Check another variable in the same original grid
  original_suffix_next <- temp_dpdict$question_suffix[temp_dpdict$question_alias == alias_to_split][count_before_repeat + 1] # e.g., "statement 2 - attribute 1"
  next_stem <- gsub(" - .*", "", original_suffix_next) # "statement 2"
  updated_var_row_next <- updated_dpdict[temp_dpdict$question_alias == alias_to_split, ][count_before_repeat + 1, ]
  expect_equal(updated_var_row_next$question_alias, paste0(original_alias, " - ", next_stem))
  expect_equal(updated_var_row_next$question_suffix, gsub(paste0(next_stem, " - "), "", original_suffix_next))

  # Error handling
  expect_error(split_grid_labels(temp_dpdict, "nonexistent_alias", example_stem, count_before_repeat), "`alias_to_split` .* not found")
  expect_warning(split_grid_labels(temp_dpdict, alias_to_split, example_stem, 100), "`count_before_repeat` is greater than the number of variables")
})

# Test survey data object creation and validation
test_that("create_survey_data works", {
  dat <- get_minimal_labelled_test_dat()
  dpdict <- create_dict_with_metadata(dat)

  # Create with explicit dpdict
  survey_obj1 <- create_survey_data(dat, dpdict)
  expect_s3_class(survey_obj1, "survey_data")
  expect_equal(survey_obj1$dat, dat)
  expect_equal(survey_obj1$dpdict, dpdict)

  # Create with automatic dpdict generation
  survey_obj2 <- create_survey_data(dat)
  expect_s3_class(survey_obj2, "survey_data")
  expect_equal(survey_obj2$dat, dat)
  expect_true(is.data.frame(survey_obj2$dpdict))
  expect_equal(nrow(survey_obj2$dpdict), ncol(dat)) # dpdict created

  # Error handling
  expect_error(create_survey_data(as.matrix(dat)), "'dat' must be a data frame")
  expect_error(create_survey_data(dat, as.list(dpdict)), "'dpdict' must be a data frame")
})

test_that("is.survey_data works", {
  survey_obj <- create_survey_data(get_minimal_labelled_test_dat())
  expect_true(is.survey_data(survey_obj))
  expect_false(is.survey_data(survey_obj$dat))
  expect_false(is.survey_data(list(dat=1, dpdict=2)))
})

test_that("validate_survey_data works", {
  survey_obj <- create_survey_data(get_minimal_labelled_test_dat())
  expect_true(validate_survey_data(survey_obj))

  # Test invalid structures
  bad_obj1 <- list(dat=survey_obj$dat, dpdict=as.matrix(survey_obj$dpdict))
  class(bad_obj1) <- "survey_data"
  expect_error(validate_survey_data(bad_obj1), "Both 'dat' and 'dpdict' must be data frames")

  bad_obj2 <- survey_obj
  bad_obj2$dpdict <- bad_obj2$dpdict[-1, ] # Misalign dpdict
  expect_warning(expect_error(validate_survey_data(bad_obj2), "Misalignment between dat and dpdict"))

  bad_obj3 <- survey_obj
  names(bad_obj3$dat)[2] <- names(bad_obj3$dat)[1] # Duplicate names
  bad_obj3$dpdict$variable_names[2] <- bad_obj3$dpdict$variable_names[1] # Duplicate names
  expect_warning(expect_error(validate_survey_data(bad_obj3), "Duplicate variable names"))
})

# Test survey data object methods
test_that("print.survey_data works", {
  survey_obj <- create_survey_data(get_big_test_dat(n=10))
  expect_output(print(survey_obj), "Survey data object:")
  expect_invisible(print(survey_obj)) # Should return object invisibly
})

test_that("[.survey_data works", {
  survey_obj <- create_survey_data(get_big_test_dat(n=5))
  original_names <- names(survey_obj$dat)
  original_nrow <- nrow(survey_obj$dat)

  # Row subsetting
  sub_row <- survey_obj[1:3, ]
  expect_s3_class(sub_row, "survey_data")
  expect_equal(nrow(sub_row$dat), 3)
  expect_equal(ncol(sub_row$dat), ncol(survey_obj$dat))
  expect_equal(nrow(sub_row$dpdict), ncol(survey_obj$dat)) # dpdict rows shouldn't change

  # Column subsetting by index
  sub_col_idx <- survey_obj[, 1:2]
  expect_s3_class(sub_col_idx, "survey_data")
  expect_equal(nrow(sub_col_idx$dat), original_nrow)
  expect_equal(ncol(sub_col_idx$dat), 2)
  expect_equal(names(sub_col_idx$dat), original_names[1:2])
  expect_equal(nrow(sub_col_idx$dpdict), 2)
  expect_equal(sub_col_idx$dpdict$variable_names, original_names[1:2])

  # Column subsetting by name
  sub_col_name <- survey_obj[, c(original_names[1], original_names[3])]
  expect_s3_class(sub_col_name, "survey_data")
  expect_equal(ncol(sub_col_name$dat), 2)
  expect_equal(names(sub_col_name$dat), c(original_names[1], original_names[3]))
  expect_equal(nrow(sub_col_name$dpdict), 2)
  expect_equal(sub_col_name$dpdict$variable_names, c(original_names[1], original_names[3]))

  # Combined subsetting
  sub_both <- survey_obj[1, 1, drop = FALSE] # Ensure drop=FALSE keeps structure
  expect_s3_class(sub_both, "survey_data")
  expect_equal(nrow(sub_both$dat), 1)
  expect_equal(ncol(sub_both$dat), 1)
  expect_equal(nrow(sub_both$dpdict), 1)

  # Error handling
  expect_error(survey_obj[, 1000], "Column subscript out of bounds")
  expect_error(survey_obj[, "nonexistent_col"], "Unknown column names")
})

test_that("[[.survey_data works", {
  survey_obj <- create_survey_data(get_minimal_labelled_test_dat())
  # Extract by name
  expect_equal(survey_obj[["uid"]], survey_obj$dat[["uid"]])
  # Extract by index
  expect_equal(survey_obj[[2]], survey_obj$dat[[2]])

  # Error handling
  expect_error(survey_obj[[100]], "Column subscript out of bounds")
  expect_error(survey_obj[["nonexistent"]], "Unknown column name")
})

test_that("filter.survey_data works", {
  survey_obj <- create_survey_data(get_minimal_labelled_test_dat()) # Assumes 'uid' column exists
  filtered_obj <- filter(survey_obj, uid > 1)

  expect_s3_class(filtered_obj, "survey_data")
  expect_equal(nrow(filtered_obj$dat), nrow(survey_obj$dat) - 1)
  expect_equal(ncol(filtered_obj$dat), ncol(survey_obj$dat))
  expect_equal(filtered_obj$dpdict, survey_obj$dpdict) # dpdict should be unchanged
})

test_that("select.survey_data works", {
  survey_obj <- create_survey_data(get_minimal_labelled_test_dat())
  original_names <- names(survey_obj$dat)

  # Select columns
  selected_obj <- select(survey_obj, uid, csat)
  expect_s3_class(selected_obj, "survey_data")
  expect_equal(names(selected_obj$dat), c("uid", "csat"))
  expect_equal(nrow(selected_obj$dpdict), 2)
  expect_equal(selected_obj$dpdict$variable_names, c("uid", "csat"))

  # Select and rename
  renamed_obj <- select(survey_obj, user_id = uid, csat)
  expect_s3_class(renamed_obj, "survey_data")
  expect_equal(names(renamed_obj$dat), c("user_id", "csat"))
  expect_equal(nrow(renamed_obj$dpdict), 2)
  expect_equal(renamed_obj$dpdict$variable_names, c("user_id", "csat"))
  # Check that the row corresponding to 'uid' in the original dpdict now has name 'user_id'
  expect_equal(renamed_obj$dpdict$variable_labels[renamed_obj$dpdict$variable_names == "user_id"],
               survey_obj$dpdict$variable_labels[survey_obj$dpdict$variable_names == "uid"])
})

test_that("mutate.survey_data works", {
  survey_obj <- create_survey_data(get_minimal_labelled_test_dat()) # Has 'uid' column

  # Add a new column with a custom label
  custom_label <- "Multiplied ID Value"
  mutated_obj_custom_label <- mutate(survey_obj, new_col = realiselabelled_vec(uid * 10, variable_label = custom_label))

  expect_s3_class(mutated_obj_custom_label, "survey_data")
  expect_true("new_col" %in% names(mutated_obj_custom_label$dat))
  expect_equal(sjlabelled::remove_label(mutated_obj_custom_label$dat$new_col),
               sjlabelled::remove_label(survey_obj$dat$uid * 10))

  # Check if custom label was applied to the variable
  expect_equal(sjlabelled::get_label(mutated_obj_custom_label$dat$new_col), custom_label)

  # Check if custom label was recorded in dpdict
  new_row_dpdict <- mutated_obj_custom_label$dpdict[mutated_obj_custom_label$dpdict$variable_names == "new_col", ]
  expect_equal(new_row_dpdict$variable_labels, custom_label)

  # Modify an existing column with a new label
  new_uid_label <- "Modified User ID"
  mutated_obj_relabel <- mutate(survey_obj, uid = realiselabelled_vec(uid + 1, variable_label = new_uid_label))

  expect_s3_class(mutated_obj_relabel, "survey_data")
  # Corrected test - compare values after removing labels
  expect_equal(sjlabelled::remove_label(mutated_obj_relabel$dat$uid),
               sjlabelled::remove_label(survey_obj$dat$uid + 1))

  # Check if the new label was applied to the modified variable
  expect_equal(sjlabelled::get_label(mutated_obj_relabel$dat$uid), new_uid_label)

  # Check if the new label was recorded in dpdict
  modified_row_dpdict <- mutated_obj_relabel$dpdict[mutated_obj_relabel$dpdict$variable_names == "uid", ]
  expect_equal(modified_row_dpdict$variable_labels, new_uid_label)

  # Test multiple operations in one mutate call
  # Only run if get_minimal_labelled_test_dat() has at least one column of each type
  skip_if_not(all(c("uid", "csat", "nps_response") %in% names(survey_obj$dat)),
              "Test data doesn't have required columns")

  multi_obj <- mutate(survey_obj,
                      uid = realiselabelled_vec(uid + 1, variable_label = "New UID Label"),
                      new_var1 = realiselabelled_vec(csat * 2, variable_label = "Double CSAT"),
                      new_var2 = nps_response * 3)  # No custom label

  expect_true(all(c("uid", "new_var1", "new_var2") %in% names(multi_obj$dat)))
  expect_equal(sjlabelled::get_label(multi_obj$dat$uid), "New UID Label")
  expect_equal(sjlabelled::get_label(multi_obj$dat$new_var1), "Double CSAT")
  expect_equal(sjlabelled::get_label(multi_obj$dat$new_var2), "NPS response (1)")  # Default label
})

# Test: Value labels preserved with !!!-spliced variables
test_that("mutate preserves value labels from spliced variables", {

  # Create original survey data
  original_dat <- data.frame(
    id = 1:3,
    existing_var = c(1, 2, 1)
  )
  attr(original_dat$existing_var, "label") <- "Existing Variable"
  attr(original_dat$existing_var, "labels") <- c("No" = 1, "Yes" = 2)

  survey_obj <- suppressWarnings(create_survey_data(original_dat))

  # Create a named list with value labels (like a1a_wide)
  new_vars_list <- list(
    new_var1 = c(2, 1, 2),
    new_var2 = c(1, 1, 2)
  )

  # Add value labels to the new variables
  attr(new_vars_list$new_var1, "label") <- "New Variable 1"
  attr(new_vars_list$new_var1, "labels") <- c("Low" = 1, "High" = 2)

  attr(new_vars_list$new_var2, "label") <- "New Variable 2"
  attr(new_vars_list$new_var2, "labels") <- c("Bad" = 1, "Good" = 2)

  # Test the mutation with splicing
  result <- suppressWarnings(survey_obj %>%
    mutate(!!!new_vars_list))

  # Check that new variables exist
  expect_true("new_var1" %in% names(result$dat))
  expect_true("new_var2" %in% names(result$dat))

  # Check that value labels are preserved
  expect_equal(attr(result$dat$new_var1, "labels"), c("Low" = 1, "High" = 2))
  expect_equal(attr(result$dat$new_var2, "labels"), c("Bad" = 1, "Good" = 2))

  # Check that variable labels are preserved
  expect_equal(attr(result$dat$new_var1, "label"), "New Variable 1")
  expect_equal(attr(result$dat$new_var2, "label"), "New Variable 2")

  # Check that original variable labels are still intact
  expect_equal(attr(result$dat$existing_var, "labels"), c("No" = 1, "Yes" = 2))
  expect_equal(attr(result$dat$existing_var, "label"), "Existing Variable")

  # Check that dpdict is updated correctly
  expect_true("new_var1" %in% result$dpdict$variable_names)
  expect_true("new_var2" %in% result$dpdict$variable_names)
  expect_equal(result$dpdict$variable_labels[result$dpdict$variable_names == "new_var1"], "New Variable 1")
  expect_equal(result$dpdict$variable_labels[result$dpdict$variable_names == "new_var2"], "New Variable 2")
})


test_that("mutate updates dpdict when transforming existing labeled variables", {
  # Create initial survey data with labeled variables
  dat <- get_minimal_labelled_test_dat(50)
  survey_obj <- create_survey_data(dat)

  # Verify original csat has value labels in both data and dpdict
  original_csat_labels <- sjlabelled::get_labels(survey_obj$dat$csat, attr.only = TRUE, values = "as.name")
  csat_idx <- which(survey_obj$dpdict$variable_names == "csat")
  original_dpdict_labels <- survey_obj$dpdict$value_labels[[csat_idx]]

  expect_false(all(is.na(original_csat_labels)))
  expect_false(all(is.na(original_dpdict_labels)))

  # Use mutate to create a copy of csat (simple assignment that should preserve labels)
  survey_obj <- suppressWarnings(survey_obj %>%
    dplyr::mutate(csat_copy = csat))  # Simple assignment should preserve labels

  # Check that value labels are preserved in the data
  copy_labels_in_data <- sjlabelled::get_labels(survey_obj$dat$csat_copy, attr.only = TRUE, values = "as.name")
  expect_false(all(is.na(copy_labels_in_data)))
  expect_equal(copy_labels_in_data, original_csat_labels)

  # Check that value labels are captured in dpdict
  copy_idx <- which(survey_obj$dpdict$variable_names == "csat_copy")
  copy_labels_in_dpdict <- survey_obj$dpdict$value_labels[[copy_idx]]

  expect_false(all(is.na(copy_labels_in_dpdict)))
  expect_equal(copy_labels_in_dpdict, copy_labels_in_data)

  # Check that has_value_labels is TRUE
  expect_true(survey_obj$dpdict$has_value_labels[copy_idx])
})


test_that("left_join works for survey_data objects", {
  # Create base survey_data object
  base_data <- get_minimal_labelled_test_dat()
  survey1 <- create_survey_data(base_data)

  # Test 1: Join with another survey_data object
  survey2_dat <- data.frame(
    uid = c(1, 3, 5),
    new_var = c("a", "b", "c"),
    stringsAsFactors = FALSE
  )
  attr(survey2_dat$new_var, "label") <- "New Variable"
  survey2 <- suppressWarnings(create_survey_data(survey2_dat))

  result1 <- left_join(survey1, survey2, by = "uid")

  expect_s3_class(result1, "survey_data")
  expect_true("new_var" %in% names(result1$dat))
  expect_equal(nrow(result1$dat), nrow(survey1$dat))
  expect_equal(attr(result1$dat$new_var, "label"), "New Variable")
  expect_true("new_var" %in% result1$dpdict$variable_names)

  # Test 2: Join with regular data frame
  df2 <- data.frame(
    uid = 1:10,
    score = rnorm(10)
  )

  result2 <- left_join(survey1, df2, by = "uid")

  expect_s3_class(result2, "survey_data")
  expect_true("score" %in% names(result2$dat))
  expect_true("score" %in% result2$dpdict$variable_names)
  expect_equal(result2$dpdict$variable_labels[result2$dpdict$variable_names == "score"], "score")

  # Test 3: Handle column conflicts
  survey3_dat <- data.frame(
    uid = 1:5,
    csat = 5:1  # Conflicts with existing csat
  )
  attr(survey3_dat$csat, "label") <- "Customer Satisfaction (New)"
  survey3 <- suppressWarnings(create_survey_data(survey3_dat))

  expect_message(
    result3 <- left_join(survey1, survey3, by = "uid"),
    "naming conflicts"
  )

  expect_true("csat.x" %in% names(result3$dat))
  expect_true("csat.y" %in% names(result3$dat))
  expect_true(all(c("csat.x", "csat.y") %in% result3$dpdict$variable_names))
  expect_false("csat" %in% result3$dpdict$variable_names)

  # Check that the suffixed versions have appropriate labels
  expect_true(grepl("\\(x\\)", result3$dpdict$variable_labels[result3$dpdict$variable_names == "csat.x"]))
  expect_true(grepl("\\(y\\)", result3$dpdict$variable_labels[result3$dpdict$variable_names == "csat.y"]))
})