test_that("vector elements are converted to sentence form", { expect_equal(.vector_to_sentence(LETTERS[1]), "A") expect_equal(.vector_to_sentence(LETTERS[1:2]), "A and B") expect_equal(.vector_to_sentence(LETTERS[1:3]), "A, B and C") }) test_that(".determine functions correctly identify cells", { determine_functions_df <- data.frame( # Columns will all empty cells should remain the same empty = c(rep("", 8), NA), # Numeric columns should stay numeric numeric1 = c(1, 5, 26.25, 123, -15677, 23.45, 67, 45, NA), numeric2 = c(1:7, -8001L, NA), # Character columns containing only numbers should be converted to numeric number_character = c("1", "-1.345", "2322.456", "12.44", NA, "- 23.546", " - 23", " 232.3 ", "0.12 "), # Character columns with mixed numbers and text in same cell should not be # converted to numeric mixed_text_numbers = c("12", "-15.6", "-13.4 [u]", " 1800.1 [note2]", NA, "[c]", "13.7[u][bc] ", "78.6 [note 1][Note 4]", "12.7 [note1, note2]"), # Character columns with numbers and note cells should be converted to # numeric number_notes = c("1", " -3.3", "-2834.459 ", "-23,456", "[c]", "[-0.3cs] [g f]", "[ as][f]", "[dvb12,v.]", "7.8"), # Columns containing money should be converted to numeric # Columns with money and notes/numbers should be converted to numeric currency_single = c(NA, "-£12.3", "£13.556", "£0.6", "£-13", "£15001", "£19,000.12", " £ 12.3", "£- 12 "), # currency, all £ currency_multiple = c("£12.3", NA, "-£ 13.559", "$0.6", "£13", " £15,001", " €19,000.12", " £12.3", "12 "), # Currency, mixed symbols currency_notes = c("£12.3", NA, "£ 13.55", "-$0.6", " [-£0.2] [cvb] ", " £15,001", " €19,000.12", " £12.3", "12 $"), # Currency and notes # Columns containing only notes should stay as character notes = c("[a]", "[abv] [efg]", " [note 1]", "[c] ", "[abc][efg]", "[ab cd]", "[a][b][c]", "[a$]", "[12]") ) expected_notes_cells <- data.frame( empty = rep(FALSE, 9), numeric1 = rep(FALSE, 9), numeric2 = rep(FALSE, 9), number_character = rep(FALSE, 9), mixed_text_numbers = c(rep(FALSE, 5), TRUE, rep(FALSE, 3)), number_notes = c(rep(FALSE, 4), rep(TRUE, 4), FALSE), currency_single = rep(FALSE, 9), currency_multiple = rep(FALSE, 9), currency_notes = c(rep(FALSE, 4), TRUE, rep(FALSE, 4)), notes = rep(TRUE, 9) ) expected_numeric_cells <- data.frame( empty = rep(FALSE, 9), numeric1 = c(rep(TRUE, 8), FALSE), numeric2 = c(rep(TRUE, 8), FALSE), number_character = c(rep(TRUE, 4), FALSE, rep(TRUE, 4)), mixed_text_numbers = c(rep(TRUE, 2), rep(FALSE, 7)), number_notes = c(rep(TRUE, 4), rep(FALSE, 4), TRUE), currency_single = rep(FALSE, 9), currency_multiple = c(rep(FALSE, 8), TRUE), currency_notes = rep(FALSE, 9), notes = rep(FALSE, 9) ) expected_currency <- data.frame( empty = rep(FALSE, 9), numeric1 = rep(FALSE, 9), numeric2 = rep(FALSE, 9), number_character = rep(FALSE, 9), mixed_text_numbers = rep(FALSE, 9), number_notes = rep(FALSE, 9), currency_single = c(FALSE, rep(TRUE, 8)), currency_multiple = c(TRUE, FALSE, rep(TRUE, 6), FALSE), currency_notes = c(TRUE, FALSE, rep(TRUE, 2), FALSE, rep(TRUE, 4)), notes = rep(FALSE, 9) ) expected_empty <- data.frame( empty = rep(TRUE, 9), numeric1 = c(rep(FALSE, 8), TRUE), numeric2 = c(rep(FALSE, 8), TRUE), number_character = c(rep(FALSE, 4), TRUE, rep(FALSE, 4)), mixed_text_numbers = c(rep(FALSE, 4), TRUE, rep(FALSE, 4)), number_notes = rep(FALSE, 9), currency_single = c(TRUE, rep(FALSE, 8)), currency_multiple = c(FALSE, TRUE, rep(FALSE, 7)), currency_notes = c(FALSE, TRUE, rep(FALSE, 7)), notes = rep(FALSE, 9) ) notes_cells <- .determine_note_cells(determine_functions_df) expect_equal(notes_cells, expected_notes_cells) numeric_cells <- .determine_numeric_cells(determine_functions_df) expect_equal(numeric_cells, expected_numeric_cells) currency_cells <- .determine_currency_cells(determine_functions_df) expect_equal(currency_cells, expected_currency) empty_cells <- .determine_empty_cells(determine_functions_df) expect_equal(empty_cells, expected_empty) # numeric_columns identifies all columns which need to be output as numeric, # including columns containing currency and mix of numbers/currency and notes numeric_columns <- .determine_table_datatypes(determine_functions_df)$numeric_columns expect_equal( numeric_columns, c("numeric1", "numeric2", "number_character", "number_notes", "currency_single", "currency_multiple", "currency_notes") ) }) test_that("table cleaning functions work as intended", { # .replace_currency_units ---------------------------------------------------- currency_df <- data.frame( col1 = c("£12.30", " 13$ ", " €123,123.1234"), col2 = c(" €19,000.12", "£12.30", "£12 ") ) expected_currency_df <- data.frame( col1 = c("12.30", " 13 ", " 123,123.1234"), col2 = c(" €19,000.12", "£12.30", "£12 ") ) currency_df <- .replace_currency_units(currency_df, "col1") expect_equal(currency_df, expected_currency_df) # .clean_numeric_data -------------------------------------------------------- df <- data.frame( col1 = c("12.30", NA, " 13 ", " 123,123.1234"), col2 = c(" 19,000.12", "12.30", "12 ", NA), col3 = c(123, 235, NA, 12.4), col4 = c(" 123", "12,001 ", NA, " 12.1") ) cleaned_df_expected <- data.frame( col1 = c(12.30, NA, 13, 123123.1234), col2 = c(19000.12, 12.30, 12, NA), col3 = c(123, 235, NA, 12.4), col4 = c(" 123", "12,001 ", NA, " 12.1") ) # Clean 3 of the 4 columns # Col 1 and 2 should be converted to numeric. Col 3 and 4 should remain the # same. cleaned_df <- .clean_numeric_data(df, c("col1", "col2", "col3") ) expect_equal(cleaned_df, cleaned_df_expected) })