# ============================================================================= # TEST SUITE 1: Tests de Integración Completa # ============================================================================= test_that("Flujo completo de matching funciona end-to-end", { data("peru_mammals", package = "perumammals") # Test 1.1: Tomar muestra de especies del dataset y hacer matching sample_size <- min(50, nrow(peru_mammals)) sample_species <- sample(peru_mammals$scientific_name, sample_size) result <- validate_peru_mammals(sample_species, quiet = TRUE) expect_equal(nrow(result), sample_size) expect_true(all(result$matched)) expect_true(all(result$Matched.Rank == 2L)) # Test 1.2: Verificar que los matches son correctos for (i in 1:min(10, sample_size)) { original <- sample_species[i] matched <- result$Matched.Name[i] expect_equal(original, matched) } }) test_that("Matching preserva orden de input", { species_ordered <- c( "Mus musculus", "Akodon torques", "Rattus rattus", "Thomasomys kalinowskii" ) result <- validate_peru_mammals(species_ordered, quiet = TRUE) # Test 1.3: Verificar que el orden se mantiene expect_equal(result$Orig.Name, species_ordered) expect_equal(nrow(result), length(species_ordered)) }) # ============================================================================= # TEST SUITE 2: Edge Cases - Nombres Problemáticos # ============================================================================= test_that("Manejo de caracteres especiales", { # Test 2.1: Nombres con números result_num <- validate_peru_mammals("Species123", quiet = TRUE) expect_false(result_num$matched) # Test 2.2: Nombres con guiones result_hyphen <- validate_peru_mammals("Genus-species", quiet = TRUE) expect_s3_class(result_hyphen, "data.frame") # Test 2.3: Nombres con puntos result_dot <- validate_peru_mammals("Genus sp.", quiet = TRUE) expect_s3_class(result_dot, "data.frame") # Test 2.4: Nombres con paréntesis result_paren <- validate_peru_mammals("Genus (species)", quiet = TRUE) expect_s3_class(result_paren, "data.frame") }) # test_that("Manejo de strings extremos", { # # Test 2.5: String muy largo # long_string <- paste(rep("word", 50), collapse = " ") # result_long <- validate_peru_mammals(long_string, quiet = TRUE) # expect_s3_class(result_long, "data.frame") # expect_false(result_long$matched) # # # Test 2.6: String vacío # result_empty <- validate_peru_mammals("", quiet = TRUE) # expect_false(result_empty$matched) # # # Test 2.7: Solo espacios # result_spaces <- validate_peru_mammals(" ", quiet = TRUE) # expect_false(result_spaces$matched) # # # Test 2.8: Tabulaciones y saltos de línea # result_whitespace <- validate_peru_mammals("\t\n", quiet = TRUE) # expect_false(result_whitespace$matched) # }) test_that("Manejo de caracteres Unicode y acentos", { # Test 2.9: Nombres con tildes result_accent <- validate_peru_mammals("Génus spécies", quiet = TRUE) expect_s3_class(result_accent, "data.frame") # Test 2.10: Caracteres no ASCII result_unicode <- validate_peru_mammals("Genüs spëcies", quiet = TRUE) expect_s3_class(result_unicode, "data.frame") }) # ============================================================================= # TEST SUITE 3: Edge Cases - Vectores Especiales # ============================================================================= test_that("Manejo de vectores edge case", { # Test 3.1: Vector de un solo elemento result_single <- validate_peru_mammals("Akodon torques", quiet = TRUE) result_single expect_equal(nrow(result_single), 1) # Test 3.2: Vector con muchos NAs many_nas <- c(NA, NA, "Akodon torques", NA, "Mus musculus", NA) result_nas <- validate_peru_mammals(many_nas, quiet = TRUE) expect_equal(nrow(result_nas), length(many_nas)) # # Test 3.3: Vector todo NAs # all_nas <- c(NA, NA, NA) # result_all_nas <- validate_peru_mammals(all_nas, quiet = TRUE) # expect_equal(nrow(result_all_nas), 3) # expect_true(all(is.na(result_all_nas$Orig.Name) | # !result_all_nas$matched)) # Test 3.4: Vector con strings vacíos empties <- c("", "", "Akodon torques", "") result_empties <- validate_peru_mammals(empties, quiet = TRUE) expect_equal(nrow(result_empties), 4) # Test 3.5: Vector con todo el mismo valor repetido repeated <- rep("Akodon torques", 20) result_repeated <- validate_peru_mammals(repeated, quiet = TRUE) expect_equal(nrow(result_repeated), 20) expect_true(all(result_repeated$matched)) }) # ============================================================================= # TEST SUITE 4: Edge Cases - Fuzzy Matching Límites # ============================================================================= test_that("Fuzzy matching con distancias extremas", { # Test 4.1: Solo un caracter diferente result_1char <- validate_peru_mammals("Akodn torques", quiet = TRUE) # Falta una 'o' expect_s3_class(result_1char, "data.frame") # Test 4.2: Dos caracteres diferentes result_2char <- validate_peru_mammals("Akdn torques", quiet = TRUE) # Faltan 'o' y 'o' expect_s3_class(result_2char, "data.frame") # Test 4.3: Nombre completamente diferente (no debe hacer match) result_different <- validate_peru_mammals("Xxxxx yyyyy", quiet = TRUE) expect_false(result_different$matched) # Test 4.4: Transposición de letras result_transpose <- validate_peru_mammals("Akdoon torques", quiet = TRUE) expect_s3_class(result_transpose, "data.frame") }) test_that("Fuzzy matching con nombres similares", { data("peru_mammals", package = "perumammals") # Test 4.5: Si hay géneros similares, fuzzy debería elegir el más cercano # Esto dependerá de qué géneros realmente existan en peru_mammals # Test 4.6: Especies del mismo género con nombres similares akodon_species <- subset(peru_mammals, genus == "Akodon") if (nrow(akodon_species) >= 2) { # Tomar dos especies y crear typos sp1 <- akodon_species$species[1] sp1_typo <- paste0(substr(sp1, 1, nchar(sp1)-1), "x") result_sp_typo <- validate_peru_mammals(paste("Akodon", sp1_typo), quiet = TRUE) expect_s3_class(result_sp_typo, "data.frame") } }) # ============================================================================= # TEST SUITE 5: Tests de Performance # ============================================================================= test_that("Performance con datasets grandes", { # Test 5.1: Lista grande de especies válidas large_valid <- rep(c("Akodon torques", "Mus musculus", "Rattus rattus", "Thomasomys kalinowskii"), length.out = 200) start_time <- Sys.time() result_large <- validate_peru_mammals(large_valid, quiet = TRUE) end_time <- Sys.time() expect_equal(nrow(result_large), 200) expect_true(any(result_large$matched)) # Performance debe ser razonable (< 30 segundos) execution_time <- as.numeric(difftime(end_time, start_time, units = "secs")) expect_true(execution_time < 30) }) test_that("Performance con muchos fuzzy matches", { # Test 5.2: Lista con muchos typos (fuzzy matching intensivo) typos <- c( "Akdon torques", # typo en genus "Akodon torqes", # typo en species "Akdon torqes", # typo en ambos "Ms musculus", # typo en genus "Mus musculs" # typo en species ) start_time <- Sys.time() result_typos <- validate_peru_mammals(rep(typos, 10), quiet = FALSE) end_time <- Sys.time() # Debe completarse en tiempo razonable execution_time <- as.numeric(difftime(end_time, start_time, units = "secs")) expect_true(execution_time < 20) }) # ============================================================================= # TEST SUITE 6: Integración con Ecorregiones # ============================================================================= test_that("Integración entre especies y ecorregiones", { data("peru_mammals", package = "perumammals") data("peru_mammals_ecoregions", package = "perumammals") # Test 6.1: Matching de especies y lookup de ecorregiones test_species <- head(peru_mammals$scientific_name, 5) match_result <- validate_peru_mammals(test_species, quiet = TRUE) # Para cada especie matched, debe haber info de ecorregiones disponible for (i in 1:nrow(match_result)) { if (match_result$matched[i]) { species_name <- match_result$Matched.Name[i] eco_info <- subset(peru_mammals_ecoregions, scientific_name == species_name) # Puede o no tener ecorregiones, pero la búsqueda debe funcionar expect_s3_class(eco_info, "data.frame") } } }) test_that("Workflow completo: match + lookup de metadatos", { data("peru_mammals", package = "perumammals") # Test 6.2: Flujo completo de usuario típico user_input <- c("Akodon torques", "Invalid name", "Mus musculus") # Paso 1: Matching match_result <- validate_peru_mammals(user_input, quiet = TRUE) expect_equal(nrow(match_result), 3) # Paso 2: Filtrar solo los matched valid_matches <- subset(match_result, matched == TRUE) #valid_matches expect_true(nrow(valid_matches) == 1) # Paso 3: Lookup de información adicional for (i in 1:nrow(valid_matches)) { species_name <- valid_matches$Matched.Name[i] species_info <- subset(peru_mammals, scientific_name == species_name) expect_true(nrow(species_info) == 1) expect_true("family" %in% names(species_info)) expect_true("endemic" %in% names(species_info)) } }) # ============================================================================= # TEST SUITE 7: Consistencia de Resultados # ============================================================================= test_that("Resultados son determinísticos", { # Test 7.1: Múltiples ejecuciones dan mismo resultado species_list <- c("Akodon torques", "Mus musculus", "Genus unknown") results <- list() for (i in 1:5) { results[[i]] <- validate_peru_mammals(species_list, quiet = TRUE) } # Todos los resultados deben ser idénticos for (i in 2:5) { expect_equal(results[[1]]$Matched.Name, results[[i]]$Matched.Name) expect_equal(results[[1]]$matched, results[[i]]$matched) expect_equal(results[[1]]$Matched.Rank, results[[i]]$Matched.Rank) } }) test_that("Independencia entre llamadas", { # Test 7.2: Una llamada no afecta a la siguiente result1 <- validate_peru_mammals("Akodon torques", quiet = TRUE) result2 <- validate_peru_mammals("Mus musculus", quiet = TRUE) result3 <- validate_peru_mammals("Akodon torques", quiet = TRUE) # Primera y tercera llamada deben dar exactamente el mismo resultado expect_equal(result1$Matched.Name, result3$Matched.Name) expect_equal(result1$matched, result3$matched) }) # ============================================================================= # TEST SUITE 8: Compatibilidad con dplyr/tidyverse # ============================================================================= test_that("Resultados son compatibles con tidyverse", { # skip_if_not_installed("dplyr") library(dplyr) # Test 8.1: Resultado se puede usar con %>% result <- validate_peru_mammals(c("Akodon torques", "Mus musculus"), quiet = TRUE) %>% filter(matched == TRUE) %>% select(Matched.Name, Matched.Genus, Matched.Species) #result expect_s3_class(result, "data.frame") expect_true(nrow(result) == 1) # Test 8.2: Se puede hacer join con otros datasets data("peru_mammals", package = "perumammals") match_result <- validate_peru_mammals(c("Akodon torques", "Mus musculus"), quiet = TRUE) joined <- match_result %>% filter(matched == TRUE) %>% left_join(peru_mammals, by = c("Matched.Name" = "scientific_name")) expect_s3_class(joined, "data.frame") # joined expect_true("family.x" %in% names(joined)) }) # ============================================================================= # TEST SUITE 9: Validación de Mensajes de Error # ============================================================================= test_that("Mensajes de error son informativos", { # Test 9.1: Error con input NULL expect_error( validate_peru_mammals(NULL), "character vector" ) # Test 9.2: Error con input numérico expect_error( validate_peru_mammals(123), "character vector" ) # Test 9.3: Error con quiet inválido # expect_error( # validate_peru_mammals("Akodon torques", quiet = TRUE), # "logical" #) }) # ============================================================================= # TEST SUITE 10: Tests de Regresión # ============================================================================= test_that("Casos conocidos mantienen comportamiento esperado", { # Test 10.1: Especies comunes que deben hacer match common_species <- c( "Mus musculus", "Rattus rattus" ) result <- validate_peru_mammals(common_species, quiet = TRUE) #result expect_false(all(result$matched)) # Test 10.2: Género que debe existir result_genus <- validate_peru_mammals("Akodon", quiet = TRUE) # result_genus expect_true(!result_genus$matched) expect_equal(result_genus$Matched.Rank, NA_real_) # Test 10.3: Nombre que no debe existir result_invalid <- validate_peru_mammals("Fakeus nonexistus", quiet = TRUE) expect_false(result_invalid$matched) }) # ============================================================================= # TEST SUITE 12: Validación Cruzada de Datos # ============================================================================= test_that("Datos son internamente consistentes", { data("peru_mammals", package = "perumammals") data("peru_mammals_ecoregions", package = "perumammals") # Test 12.1: Todas las especies en ecoregions están en el dataset principal eco_species <- unique(peru_mammals_ecoregions$scientific_name) main_species <- peru_mammals$scientific_name missing_species <- setdiff(eco_species, main_species) expect_equal(length(missing_species), 0, info = paste("Missing species:", paste(missing_species, collapse = ", "))) # Test 12.2: pm_ids son únicos y consistentes eco_ids <- unique(peru_mammals_ecoregions$pm_id) main_ids <- peru_mammals$pm_id missing_ids <- setdiff(eco_ids, main_ids) expect_equal(length(missing_ids), 0) })