# The tests in this file are primarily designed to check that the results of the # taxonomy updating give expected results. # Limited attention is given to checking all possible ways of running a function # We have established spreadsheets with benchmarks of expected ouput test_that("consistency with previous runs", { # Check that results are consistent through time taxa <- c( "Banksia integrifolia", "Acacia longifolia", "Commersonia rosea", "Thelymitra pauciflora", "Justicia procumbens", "Hibbertia stricta", "Rostellularia adscendens", "Hibbertia sericea", "Hibbertia sp.", "Athrotaxis laxiflolia", "Genoplesium insigne", "Polypogon viridis", "Acacia aneura", "Acacia paraneura", "Galactia striata" ) output <- create_taxonomic_update_lookup( taxa, resources = resources, full = TRUE, taxonomic_splits = "return_all" ) %>% dplyr::arrange(original_name, accepted_name) #readr::write_csv(output, "consistency_lookup.csv") past_result <- readr::read_csv("benchmarks/consistency_lookup.csv", show_col_types = FALSE) %>% dplyr::arrange(original_name, canonical_name) %>% dplyr::rename(accepted_name = canonical_name) %>% dplyr::distinct(aligned_name, accepted_name_usage_ID, .keep_all = TRUE) # tests the most important columns # other cols changed so we can't check other columns v <-c("original_name", "aligned_name", "accepted_name") expect_equal(past_result[,v], output[,v]) }) test_that("taxon name splits and complex taxonomic status values work as expected", { # Compare results to a table of values that have been closely scrutinised benchmarks <- readr::read_csv("benchmarks/test_splits_synonyms.csv", show_col_types = FALSE) %>% arrange(original_name, accepted_name_usage_ID, taxonomic_status) out1 <- create_taxonomic_update_lookup( benchmarks$original_name, taxonomic_splits = "most_likely_species", resources = resources, full = TRUE) %>% arrange(original_name, taxon_ID, taxonomic_status) expect_equal(benchmarks$original_name, out1$original_name) expect_equal(benchmarks$accepted_name_usage_ID, out1$taxon_ID) #todo: include test that confirms taxonomic_status in benchmarks is present (str_detect) in either out1$taxonomic_status or out1$alternative_taxonomic_status_aligned out2 <- create_taxonomic_update_lookup( benchmarks$original_name, taxonomic_splits = "return_all", resources = resources, full = TRUE) %>% arrange(original_name, taxon_ID, taxonomic_status) expect_gte(nrow(out2), 60) expect_contains(out2$original_name, benchmarks$original_name) expect_contains(out2$accepted_name, out1$accepted_name) out3 <- create_taxonomic_update_lookup( benchmarks$original_name, taxonomic_splits = "collapse_to_higher_taxon", resources = resources, full = TRUE) %>% arrange(original_name, taxon_ID, taxonomic_status) %>% mutate(number_of_collapsed_taxa = ifelse(is.na(number_of_collapsed_taxa), 1, number_of_collapsed_taxa)) rows_gt_1 <- out3 %>% filter(number_of_collapsed_taxa > 1) rows_end_sp <- out3 %>% filter(stringr::str_detect(suggested_name, "sp.")) rows_alt_names <- out3 %>% filter(stringr::str_detect(suggested_name, "collapsed names:")) expect_equal(nrow(out1), nrow(out3)) #will be less (slightly) because `return_all` excludes misapplied and excluded taxa expect_equal(nrow(out2), sum(out3$number_of_collapsed_taxa)-2) expect_equal(nrow(rows_gt_1), nrow(rows_end_sp)) expect_equal(nrow(rows_gt_1), nrow(rows_alt_names)) out4 <- create_taxonomic_update_lookup( benchmarks$original_name, resources = resources, full = TRUE) %>% arrange(original_name, taxon_ID, taxonomic_status) expect_equal(out1, out4) }) test_that("taxon name alignment matches and updates work as expected", { # Compare results to a table of values that have been closely scrutinised benchmarks <- readr::read_csv("benchmarks/test_matches_alignments_updates.csv", show_col_types = FALSE) %>% dplyr::rename( alignment_code = alignment_code_all_matches_TRUE, aligned_name = aligned_name_all_matches_TRUE, taxon_rank = taxon_rank_all_matches_TRUE, taxonomic_dataset = taxonomic_dataset_all_matches_TRUE, ) %>% dplyr::select( original_name, alignment_code, aligned_name, taxon_rank, taxonomic_dataset, updated_name, updated_name_passes ) %>% dplyr::arrange(original_name, aligned_name) output_align <- align_taxa( original_name = benchmarks$original_name, resources = resources, full = TRUE, fuzzy_abs_dist = 3, fuzzy_rel_dist = 0.2, imprecise_fuzzy_matches = TRUE, APNI_matches = TRUE, fuzzy_matches = TRUE, identifier = "test_all_matches_TRUE" ) expect_equal(benchmarks$original_name, output_align$original_name) expect_equal(benchmarks$aligned_name, output_align$aligned_name) expect_equal(benchmarks$taxon_rank, output_align$taxon_rank) expect_equal(benchmarks$taxonomic_dataset, output_align$taxonomic_dataset) expect_equal(benchmarks$alignment_code, stringr::str_extract(output_align$alignment_code, "match_[:digit:][:digit:][:alpha:]")) output_updates <- update_taxonomy( output_align, resources = resources, taxonomic_splits = "most_likely_species" ) output_updates <- output_updates %>% dplyr::left_join(by = "original_name", benchmarks %>% select(original_name, updated_name, updated_name_passes), ) %>% # Make a logical to see if the suggested name matches the updated_name in the spreadsheet # We don't expect all of these to match perfectly. # The column `updated_name_passes` has our expectation on whether the match works, # and is used below for the test dplyr::mutate( test_column = ifelse(suggested_name == updated_name, TRUE, FALSE), test_column = ifelse(is.na(suggested_name) & is.na(updated_name), TRUE, test_column), test_column = ifelse(is.na(test_column), FALSE, test_column) ) expect_equal(benchmarks$original_name, output_updates$original_name) # We expect 100% success in alignment expect_equal(benchmarks$aligned_name, output_updates$aligned_name) # for update_taxonomony, there are cases where the algorithm doesn't produce a desired result (suggested_name != updated_name) # these are known and expected failures. expect_equal(benchmarks$updated_name_passes, output_updates$test_column) })