## Convenience function tests ## ## Many user-facing workflows rely on small string/label utilities. ## The tests in this file check core behaviour and edge cases (errors, NA/empty handling, and options). test_that("any_grepl works", { # Error cases expect_error(any_grepl(1:3, c("big cat", "small dog", "red fish")), "must be character vectors") expect_error(any_grepl(c("cat", "dog"), 1:3), "must be character vectors") expect_error(any_grepl(character(0), c("cat", "dog")), "must have length > 0") expect_error(any_grepl(c("cat", "dog"), character(0)), "must have length > 0") # Simple match expect_equal(any_grepl(c("cat"), c("cat dog", "hat dog")), c(TRUE, FALSE)) # Test with multiple patterns, none found expect_equal(any_grepl(c("apple", "pear"), c("cat dog", "hat dog")), c(FALSE, FALSE)) # Overlapping patterns expect_equal(any_grepl(c("dog", "do"), c("dog", "cat", "do")), c(TRUE, FALSE, TRUE)) # NA handling expect_equal(any_grepl(c("cat", NA, "dog"), c("big cat", NA, "small dog", "red fish", NA)), c(TRUE, TRUE, TRUE, FALSE, TRUE)) # Empty string handling expect_equal(any_grepl(c("cat", "", "dog"), c("big cat", "", "small dog", "red fish", "")), c(TRUE, TRUE, TRUE, FALSE, TRUE)) # Only NA in pattern_vector expect_equal(any_grepl(NA_character_, c("big cat", NA, "small dog", "red fish", NA)), c(FALSE, TRUE, FALSE, FALSE, TRUE)) # Multiple patterns, multiple found expect_equal(any_grepl(c("cat", "shoe"), c("shoe dog", "hat dog", "cat dog")), c(TRUE, FALSE, TRUE)) # Case sensitivity expect_equal(any_grepl(c("CAT"), c("cat", "Cat", "CAT"), ignore.case = FALSE), c(FALSE, FALSE, TRUE)) expect_equal(any_grepl(c("CAT"), c("cat", "Cat", "CAT"), ignore.case = TRUE), c(TRUE, TRUE, TRUE)) # Special characters expect_equal(any_grepl(c("^cat"), c("cat", "1 cat", "cattle")), c(TRUE, FALSE, TRUE)) expect_equal(any_grepl(c("^cat"), c("cat", "1 cat", "cattle"), fixed = TRUE), c(FALSE, FALSE, FALSE)) # perl expect_equal(any_grepl("foo(?=bar)", c("foobar", "foobaz", "barfoo"), perl = TRUE), c(TRUE, FALSE, FALSE)) }) test_that("any_gsub works", { # Error cases expect_error(any_gsub(1:3, "ANIMAL", c("big cat", "small dog", "red fish"), trimws_at_end = TRUE), "must be character vectors") expect_error(any_gsub(c("cat", "dog"), "ANIMAL", 1:3, trimws_at_end = TRUE), "must be character vectors") expect_error(any_gsub(character(0), "ANIMAL", c("cat", "dog"), trimws_at_end = TRUE), "must have length > 0") expect_error(any_gsub(c("cat", "dog"), "ANIMAL", character(0), trimws_at_end = TRUE), "must have length > 0") # Simple match expect_equal(any_gsub(c("cat"), "", c("cat dog", "hat dog"), trimws_at_end = TRUE), c("dog", "hat dog")) # Multiple patterns, multiple found expect_equal(any_gsub(c("cat", "shoe"), "", c("shoe dog", "hat dog", "cat dog"), trimws_at_end = TRUE), c("dog", "hat dog", "dog")) # Test with multiple patterns, none found expect_equal(any_gsub(c("apple", "pear"), "", c("cat dog", "hat dog"), trimws_at_end = TRUE), c("cat dog", "hat dog")) # Overlapping patterns expect_equal(any_gsub(c("at", "dog"), "", c("shoe dog", "hat dog", "cat dog"), trimws_at_end = TRUE), c("shoe", "h", "c")) # NA handling expect_equal( any_gsub(c("cat", NA, "dog"), "REPLACED", c("big cat", NA, "small dog", "red fish", NA), trimws_at_end = TRUE), c("big REPLACED", "REPLACED", "small REPLACED", "red fish", "REPLACED") ) # Empty string handling expect_equal( any_gsub(c("cat", "", "dog"), "REPLACED", c("big cat", "", "small dog", "red fish", ""), trimws_at_end = TRUE), c("big REPLACED", "REPLACED", "small REPLACED", "red fish", "REPLACED") ) # Only NA in pattern_vector expect_equal( any_gsub(NA_character_, "REPLACED", c("big cat", NA, "small dog", "red fish", NA), trimws_at_end = TRUE), c("big cat", "REPLACED", "small dog", "red fish", "REPLACED") ) # Only empty string in pattern_vector expect_equal( any_gsub("", "REPLACED", c("big cat", "", "small dog", "red fish", ""), trimws_at_end = TRUE), c("big cat", "REPLACED", "small dog", "red fish", "REPLACED") ) # Complex case with mix of NAs, empty strings, and regular patterns expect_equal( any_gsub(c("cat", NA, "", "fish"), "REPLACED", c("big cat", NA, "", "small dog", "red fish", NA, ""), trimws_at_end = TRUE), c("big REPLACED", "REPLACED", "REPLACED", "small dog", "red REPLACED", "REPLACED", "REPLACED") ) # Trim option false expect_equal( any_gsub(c("cat", "dog"), "ANIMAL ", c("big cat ", "small dog", "red fish "), trimws_at_end = FALSE), c("big ANIMAL ", "small ANIMAL ", "red fish ") ) # Case sensitivity expect_equal(any_gsub(c("CAT"), "replacement", c("cat", "Cat", "CAT"), trimws_at_end = TRUE, ignore.case = FALSE), c("cat", "Cat", "replacement")) expect_equal(any_gsub(c("CAT"), "replacement", c("cat", "Cat", "CAT"), trimws_at_end = TRUE, ignore.case = TRUE), c("replacement", "replacement", "replacement")) # Special characters expect_equal(any_gsub(c("^cat"), "", c("cat", "1 cat", "cattle"), trimws_at_end = TRUE), c("", "1 cat", "tle")) expect_equal(any_gsub(c("^cat"), "", c("cat", "1 cat", "cattle"), trimws_at_end = TRUE, fixed = TRUE), c("cat", "1 cat", "cattle")) # perl expect_equal(any_gsub("foo(?=bar)", "", c("foobar", "foobaz", "barfoo"), trimws_at_end = TRUE, perl = TRUE), c("bar", "foobaz", "barfoo")) }) test_that("strsplit_keep_delim works", { # Simple case expect_equal(strsplit_keep_delim("a-b-c", "-"), c("a", "-", "b", "-", "c")) # Where delimiter not found expect_equal(strsplit_keep_delim("abc", "-"), c("abc")) # Special character delimiter expect_equal(strsplit_keep_delim("a|b|c", "|"), c("a", "|", "b", "|", "c")) # Where delimiter also at start expect_equal(strsplit_keep_delim("-a-b-c", "-"), c("-", "a", "-", "b", "-", "c")) # Where delimiter at end expect_equal(strsplit_keep_delim("abc-", "-"), c("abc", "-")) # Where delimiter at start and end expect_equal(strsplit_keep_delim("-a-b-c-", "-"), c("-", "a", "-", "b", "-", "c", "-")) # Repeated delimiters expect_equal(strsplit_keep_delim("a--b-c", "-"), c("a", "-", "-", "b", "-", "c")) # Delimiter of length 0 expect_equal(strsplit_keep_delim("abc", ""), c("a", "b", "c")) # No string given expect_error(strsplit_keep_delim("", "-")) }) test_that("insert_into_str works", { # Simple single insert before expect_equal(insert_into_str("this is a test", "new ", "test"), "this is a new test") # Insert after expect_equal(insert_into_str("this is a test", " 2.0", "test", position = "after"), "this is a test 2.0") # Multiple insert all before expect_equal(insert_into_str("this is a test, a good test, not a bad test", "new ", "test"), "this is a new test, a good new test, not a bad new test") # Multiple insert specific instances before expect_equal(insert_into_str("this is a test, a good test, not a bad test", "new ", "test", instances_to_insert_at = c(1, 3)), "this is a new test, a good test, not a bad new test") # Insert at start expect_equal(insert_into_str("testing... yes this is a test", "new ", "test"), "new testing... yes this is a new test") # No string_to_find given expect_warning( result <- insert_into_str("this is a", " test", "elephant"), "'elephant' not found in 'this is a'. Inserting at end instead." ) expect_equal(result, "this is a test") # more instances_to_insert_at than instances found, strict_mode == FALSE expect_equal(insert_into_str("this is a test, a good test, not a bad test", "new ", "test", strict_mode = FALSE, instances_to_insert_at = c(1, 3, 4)), "this is a new test, a good test, not a bad new test") # more instances_to_insert_at than instances found, strict_mode == TRUE expect_error(insert_into_str("this is a test, a good test, not a bad test", "new ", "test", strict_mode = TRUE, instances_to_insert_at = c(1, 3, 4))) }) test_that("gsub_by_dict works", { # data frame expect_equal(gsub_by_dict(data.frame(find = c("an apple", "a carrot"), replace = c("a fruit", "a vegetable")), "I have an apple and a carrot.", "find", "replace"), "I have a fruit and a vegetable.") # named list expect_equal(gsub_by_dict(list("an apple" = "a fruit", "a carrot" = "a vegetable"), "I have an apple and a carrot."), "I have a fruit and a vegetable.") # neither a data frame nor a named list expect_error(gsub_by_dict(c("an apple", "a carrot"), "I have an apple and a carrot.")) # duplicate keys in data frame expect_warning( result <- gsub_by_dict(data.frame(find = c("an apple", "an apple"), replace = c("a fruit", "a vegetable")), "I have an apple and a carrot.", "find", "replace"), "Found duplicate values in reference_dict\\[\\[find_column_name\\]\\]. will only use first match" ) expect_equal(result, "I have a fruit and a carrot.") # duplicate keys in data frame expect_equal(gsub_by_dict(list("an apple" = "a fruit", "a carrot" = "a vegetable"), "I have an apple and a carrot."), "I have a fruit and a vegetable.") }) test_that("query_dict works", { # Simple single match expect_equal(query_dict(data.frame(item = c("apple", "carrot"), price = c("£0.20","£0.50")), key_label = "item", key = "apple", value_label = "price"), "£0.20") # Multiple matching key values expect_equal(query_dict(data.frame(item = c("apple", "apple", "carrot"), price = c("£0.20", "£0.30", "£0.50")), key_label = "item", key = "apple", value_label = "price"), c("£0.20", "£0.30")) }) test_that("remove_trailing_characters works", { # Simple case expect_equal(remove_trailing_characters("SC10ab", c("a", "b")), "SC10") }) test_that("number_to_word_rank works", { expect_equal(number_to_word_rank(1), "first") }) test_that("word_rank_to_number works", { # Number word expect_equal(word_rank_to_number("one"), 1) expect_equal(word_rank_to_number("fifty"), 50) # Ranking word expect_equal(word_rank_to_number("first"), 1) expect_equal(word_rank_to_number("fiftieth"), 50) }) test_that("null_to_NA works", { expect_equal(null_to_NA(list(NULL, 1, 2, 3, NULL)), list(NA, 1, 2, 3, NA)) }) test_that("check_for_common_alphabets works", { expect_equal(check_for_common_alphabets("words, слова, كلمات, 漢語, বাংলা, देवनागरी"), "Latin, Cyrillic, Arabic, Chinese, Bengali, Devanagari") }) test_that("sub_char_by_index works", { # Single character expect_equal(sub_char_by_index("testing", 3, "x"), "texting") # Removal expect_equal(sub_char_by_index("testing", 3, ""), "teting") # Range expect_equal(sub_char_by_index("testing", 3:4, "em"), "teeming") }) test_that("closest_matching_string works", { # Single match expect_equal(closest_matching_string(c("ant", "dog", "cat"), "dug"), "dog") # multiple matches expect_warning( result <- closest_matching_string(c("ant", "dog", "cat"), "ruf"), "Multiple strings with the same distance found. Returning the first instance." ) expect_equal(result, "ant") }) test_that("bind_cols preserves variable/value labels and other attributes", { v1 <- sjlabelled::set_labels(c(1, 2), labels = c("Yes" = 1, "No" = 2)) attr(v1, "label") <- "Q1 - response" attr(v1, "foo") <- "bar" v2 <- sjlabelled::set_labels(c(1, 2), labels = c("A" = 1, "B" = 2)) attr(v2, "label") <- "Q2 - response" df1 <- data.frame(a = v1) df2 <- data.frame(b = v2) res <- surveydatar:::bind_cols(df1, df2) expect_equal(sjlabelled::get_label(res$a), "Q1 - response") expect_equal(unname(sjlabelled::get_labels(res$a, values = "as.name")["1"]), "Yes") expect_equal(unname(sjlabelled::get_labels(res$a, values = "as.name")["2"]), "No") expect_equal(attr(res$a, "foo"), "bar") expect_equal(sjlabelled::get_label(res$b), "Q2 - response") expect_equal(unname(sjlabelled::get_labels(res$b, values = "as.name")["1"]), "A") expect_equal(unname(sjlabelled::get_labels(res$b, values = "as.name")["2"]), "B") }) test_that("bind_rows preserves metadata, merges value labels, and prefers first on conflicts", { df1 <- data.frame( a = sjlabelled::set_labels(c(1, 1), labels = c("Yes" = 1)) ) attr(df1$a, "label") <- "Label A" # Conflicting variable label; value labels add a new code and conflict on code 1 text df2 <- data.frame( a = sjlabelled::set_labels(c(1, 2), labels = c("Yea" = 1, "No" = 2)) ) attr(df2$a, "label") <- "Label A (changed)" expect_warning( res <- surveydatar:::bind_rows(df1, df2, .id = "src"), "bind_rows metadata conflicts detected" ) # Variable label prefers first dataset expect_equal(sjlabelled::get_label(res$a), "Label A") # Value labels merged; code 1 retains first dataset's text, code 2 added from second labs <- sjlabelled::get_labels(res$a, values = "as.name") expect_equal(unname(labs["1"]), "Yes") expect_equal(unname(labs["2"]), "No") }) test_that("bind_rows with identical metadata emits no warning and preserves labels", { df1 <- data.frame(a = sjlabelled::set_labels(c(1, 2), labels = c("Yes" = 1, "No" = 2))) attr(df1$a, "label") <- "Label A" df2 <- data.frame(a = sjlabelled::set_labels(c(2, 1), labels = c("Yes" = 1, "No" = 2))) attr(df2$a, "label") <- "Label A" expect_silent({ res <- surveydatar:::bind_rows(df1, df2) }) expect_equal(sjlabelled::get_label(res$a), "Label A") labs <- sjlabelled::get_labels(res$a, values = "as.name") expect_equal(unname(labs["1"]), "Yes") expect_equal(unname(labs["2"]), "No") }) test_that("bind_rows works with survey_data and preserves dpdict", { s1 <- create_survey_data(get_minimal_labelled_test_dat()) s2 <- create_survey_data(get_minimal_labelled_test_dat()) expect_silent({ res <- surveydatar:::bind_rows(s1, s2, .id = "src") }) expect_true(is.survey_data(res)) # dpdict should be unchanged (identical to first's dpdict) expect_equal(res$dpdict, s1$dpdict) # .id should be present on dat and reflect two sources expect_true("src" %in% names(res$dat)) expect_setequal(unique(res$dat$src), c("1", "2")) }) test_that("bind_cols works with survey_data and updates dpdict", { s <- create_survey_data(get_minimal_labelled_test_dat()) newvec <- rep(c(1, 2), length.out = nrow(s$dat)) newvec <- sjlabelled::set_labels(newvec, labels = c("X" = 1, "Y" = 2)) attr(newvec, "label") <- "New Var" res <- suppressWarnings(surveydatar:::bind_cols(s, data.frame(newvar = newvec))) expect_true(is.survey_data(res)) expect_true("newvar" %in% names(res$dat)) expect_equal(sjlabelled::get_label(res$dat$newvar), "New Var") # dpdict includes new variable with correct label expect_true("newvar" %in% res$dpdict$variable_names) idx <- which(res$dpdict$variable_names == "newvar") expect_equal(res$dpdict$variable_labels[idx], "New Var") }) test_that("labelled_map_dfc works", { unlabelled_test_dat <- get_minimal_unlabelled_test_dat() labelled_test_dat <- get_minimal_labelled_test_dat() # Fully labelled labelled_result <- labelled_map_dfc(labelled_test_dat, function(x) ifelse(is.na(x), 0, x)) expect_equal(sjlabelled::get_label(labelled_result$uid), sjlabelled::get_label(labelled_test_dat$uid)) expect_equal(sjlabelled::get_label(labelled_result$csat), sjlabelled::get_label(labelled_test_dat$csat)) expect_equal(sjlabelled::get_labels(labelled_result$csat), sjlabelled::get_labels(labelled_test_dat$csat)) # No labels unlabelled_result <- labelled_map_dfc(unlabelled_test_dat, function(x) ifelse(is.na(x), 0, x)) expect_null(attr(unlabelled_result$uid, "label")) expect_null(attr(unlabelled_result$csat, "label")) expect_null(attr(unlabelled_result$csat, "labels")) # Mix of labelled and unlabelled partially_labelled_test_dat <- unlabelled_test_dat partially_labelled_test_dat$uid <- sjlabelled::remove_label(partially_labelled_test_dat$uid) partially_labelled_result <- labelled_map_dfc(partially_labelled_test_dat, function(x) ifelse(is.na(x), 0, x)) expect_null(attr(partially_labelled_result$uid, "label")) expect_equal(sjlabelled::get_label(partially_labelled_result$csat), sjlabelled::get_label(partially_labelled_test_dat$csat)) expect_equal(sjlabelled::get_labels(partially_labelled_result$csat), sjlabelled::get_labels(partially_labelled_test_dat$csat)) }) test_that("value_from_labels works", { labelled_test_dat <- get_minimal_labelled_test_dat() # Simple match expect_equal(value_from_labels(labelled_test_dat$csat, "Neutral"), 3) # No match expect_equal(value_from_labels(labelled_test_dat$csat, "shoe"), NA) }) test_that("merge_left function works correctly", { # Create sample data frames df1 <- data.frame(id = c(1, 2, 3), x = c("a", "b", "c"), y = c(10, 20, 30)) df2 <- data.frame(id = c(1, 2, 4), x = c("A", "B", "D"), z = c(100, 200, 400)) # Test overwrite = TRUE result1 <- merge_left(df1, df2, by = "id", overwrite = TRUE) expected1 <- data.frame(id = c(1, 2, 3), x = c("A", "B", "c"), y = c(10, 20, 30), z = c(100, 200, NA)) expect_equal(result1, expected1) # Test overwrite = FALSE result2 <- merge_left(df1, df2, by = "id", overwrite = FALSE) expected2 <- data.frame(id = c(1, 2, 3), x = c("a", "b", "c"), y = c(10, 20, 30), z = c(100, 200, NA)) expect_equal(result2, expected2) }) test_that("check_key_consistency function works correctly", { # Create sample data frames consistent_df <- data.frame(id = c(1, 1, 2, 2, 3), value1 = c("A", "A", "B", "B", "C"), value2 = c(10, 10, 20, 20, 30)) consistent_result <- TRUE one_inconsistent_df <- data.frame(id = c(1, 1, 2, 2, 3), value1 = c("A", "A", "B", "C", "D"), value2 = c(10, 10, 20, 20, 30)) one_inconsistent_result <- one_inconsistent_df[c(3, 4), c(1, 2)] two_inconsistent_df <- data.frame(id = c(1, 1, 2, 2, 3), value1 = c("A", "A", "B", "C", "D"), value2 = c(10, 15, 20, 20, 30)) two_inconsistent_result <- two_inconsistent_df[c(1, 2, 3, 4), c(1, 2, 3)] # consistent result1 <- check_key_consistency(consistent_df, "id") expect_equal(result1, consistent_result) # one inconsistent result2 <- check_key_consistency(one_inconsistent_df, "id") expect_equal(result2$status, "Key is inconsistent") expect_equal(result2$problematic_data, one_inconsistent_result) # two inconsistent result3 <- check_key_consistency(two_inconsistent_df, "id") expect_equal(result3$status, "Key is inconsistent") expect_equal(result3$problematic_data, two_inconsistent_result) }) test_that("concatenate_by_group works correctly", { temp_dat <- dplyr::tibble( ID = c(1, 1, 2, 2, 3), Name = c("John", "John", "Alice", "Alice", "Bob"), City = c("New York", "Los Angeles", "Chicago", "Chicago", "Houston"), Age = c(25, 25, 30, 30, 35) ) expected_output <- dplyr::tibble( ID = c(1, 2, 3), concat_Name = c("John", "Alice", "Bob"), concat_City = c("Los Angeles, New York", "Chicago", "Houston"), concat_Age = c("25", "30", "35") ) result <- concatenate_by_group(temp_dat, "ID") expect_equal(result, expected_output) })