library(testthat) library(mockery) # Create mock data for abc_model testing create_mock_comat <- function() { # Create a small co-occurrence matrix for testing comat <- matrix(c( 1.0, 0.5, 0.3, 0.1, 0.0, 0.5, 1.0, 0.6, 0.2, 0.0, 0.3, 0.6, 1.0, 0.7, 0.2, 0.1, 0.2, 0.7, 1.0, 0.4, 0.0, 0.0, 0.2, 0.4, 1.0 ), nrow = 5, ncol = 5, byrow = TRUE) rownames(comat) <- c("term_A", "term_B", "term_C", "term_D", "term_E") colnames(comat) <- c("term_A", "term_B", "term_C", "term_D", "term_E") # Add entity types as an attribute entity_types <- c( "term_A" = "disease", "term_B" = "drug", "term_C" = "gene", "term_D" = "protein", "term_E" = "pathway" ) attr(comat, "entity_types") <- entity_types return(comat) } test_that("abc_model returns correct structure and data", { # Create mock co-occurrence matrix mock_comat <- create_mock_comat() # Run ABC model with mock data results <- abc_model( co_matrix = mock_comat, a_term = "term_A", min_score = 0.1, n_results = 10 ) # Test structure of results expect_true(is.data.frame(results)) expect_true(all(c("a_term", "b_term", "c_term", "a_b_score", "b_c_score", "abc_score") %in% colnames(results))) # Verify that all results contain term_A as the A term expect_true(all(results$a_term == "term_A")) # Verify that scores are within expected range [0, 1] expect_true(all(results$a_b_score >= 0 & results$a_b_score <= 1)) expect_true(all(results$b_c_score >= 0 & results$b_c_score <= 1)) expect_true(all(results$abc_score >= 0 & results$abc_score <= 1)) # Test that filtering by min_score works expect_true(all(results$a_b_score >= 0.1)) expect_true(all(results$b_c_score >= 0.1)) }) test_that("abc_model handles entity type constraints correctly", { # Create mock co-occurrence matrix mock_comat <- create_mock_comat() # Run ABC model with B term type constraint results_drug <- abc_model( co_matrix = mock_comat, a_term = "term_A", min_score = 0.1, b_term_types = "drug" ) # Check that all B terms have type "drug" if (nrow(results_drug) > 0) { expect_true(all(results_drug$b_type == "drug")) } # Run model with C term type constraint results_pathway <- abc_model( co_matrix = mock_comat, a_term = "term_A", min_score = 0.1, c_term_types = "pathway" ) # Check that all C terms have type "pathway" if (nrow(results_pathway) > 0) { expect_true(all(results_pathway$c_type == "pathway")) } }) test_that("abc_model applies scoring methods correctly", { # Create mock co-occurrence matrix mock_comat <- create_mock_comat() # Test multiplication scoring method results_mult <- abc_model( co_matrix = mock_comat, a_term = "term_A", min_score = 0.1, scoring_method = "multiplication" ) # Test that ABC score equals a_b_score * b_c_score if (nrow(results_mult) > 0) { for (i in 1:nrow(results_mult)) { expect_equal( results_mult$abc_score[i], results_mult$a_b_score[i] * results_mult$b_c_score[i] ) } } # Test average scoring method results_avg <- abc_model( co_matrix = mock_comat, a_term = "term_A", min_score = 0.1, scoring_method = "average" ) # Now we'll need to check if the results have the $abc_score_avg column # Since the original function might calculate this differently, adapt as needed if (nrow(results_avg) > 0 && "abc_score_avg" %in% colnames(results_avg)) { for (i in 1:nrow(results_avg)) { expect_equal( results_avg$abc_score_avg[i], (results_avg$a_b_score[i] + results_avg$b_c_score[i]) / 2 ) } } }) test_that("abc_model handles error cases gracefully", { # Create mock co-occurrence matrix mock_comat <- create_mock_comat() # Test with non-existent A term expect_error( abc_model(mock_comat, a_term = "nonexistent_term"), "A-term 'nonexistent_term' not found in the co-occurrence matrix" ) # Test with non-existent C term expect_error( abc_model(mock_comat, a_term = "term_A", c_term = "nonexistent_term"), "C-term 'nonexistent_term' not found in the co-occurrence matrix" ) # Test with min_score that filters out all results high_threshold_results <- abc_model( mock_comat, a_term = "term_A", min_score = 0.9 ) expect_true(is.data.frame(high_threshold_results)) expect_equal(nrow(high_threshold_results), 0) }) test_that("is_valid_biomedical_entity correctly validates entities", { # Test positive cases expect_true(is_valid_biomedical_entity("migraine", "disease")) expect_true(is_valid_biomedical_entity("ibuprofen", "drug")) expect_true(is_valid_biomedical_entity("BRCA1", "gene")) expect_true(is_valid_biomedical_entity("receptor", "protein")) # Test negative cases expect_false(is_valid_biomedical_entity("europe", "disease")) expect_false(is_valid_biomedical_entity("the", "gene")) expect_false(is_valid_biomedical_entity("optimization", "chemical")) expect_false(is_valid_biomedical_entity("analysis", "drug")) # Test case sensitivity expect_true(is_valid_biomedical_entity("Migraine", "disease")) expect_true(is_valid_biomedical_entity("IBUPROFEN", "drug")) }) test_that("calculate_score applies different scoring methods correctly", { # Test multiplication method expect_equal(calculate_score(0.5, 0.6, "multiplication"), 0.3) # Test average method expect_equal(calculate_score(0.5, 0.6, "average"), 0.55) # Test combined method (0.7 * multiplication + 0.3 * average) expected <- 0.7 * (0.5 * 0.6) + 0.3 * ((0.5 + 0.6) / 2) expect_equal(calculate_score(0.5, 0.6, "combined"), expected) # Test default method expect_equal(calculate_score(0.5, 0.6, "non_existent_method"), 0.5 * 0.6) }) test_that("diversify_b_terms creates diverse results", { # Create a simple results data frame with repeated B terms test_results <- data.frame( a_term = rep("A", 9), b_term = rep(c("B1", "B2", "B3"), each = 3), c_term = c("C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"), a_b_score = rep(0.5, 9), b_c_score = rep(0.6, 9), abc_score = c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1), stringsAsFactors = FALSE ) # Apply diversification diverse_results <- diversify_b_terms(test_results, max_per_group = 2) # Check that we have at most 2 results per B term expect_true(all(table(diverse_results$b_term) <= 2)) # Check that we keep the highest scoring results for each B term b1_results <- diverse_results[diverse_results$b_term == "B1", ] expect_true(all(b1_results$c_term %in% c("C1", "C2"))) }) test_that("validate_abc applies statistical validation correctly", { # Create a mock ABC results data frame test_results <- data.frame( a_term = rep("term_A", 3), b_term = c("term_B", "term_B", "term_C"), c_term = c("term_D", "term_E", "term_E"), a_b_score = c(0.5, 0.5, 0.3), b_c_score = c(0.6, 0.4, 0.6), abc_score = c(0.3, 0.2, 0.18), stringsAsFactors = FALSE ) # Create a mock co-occurrence matrix mock_comat <- create_mock_comat() # Apply validation validated_results <- validate_abc( test_results, mock_comat, alpha = 0.05, correction = "none" ) # Check that validation adds expected columns expect_true(all(c("p_value", "adjusted_p_value", "significant") %in% colnames(validated_results))) # Check that p-values are between 0 and 1 expect_true(all(validated_results$p_value >= 0 & validated_results$p_value <= 1)) expect_true(all(validated_results$adjusted_p_value >= 0 & validated_results$adjusted_p_value <= 1)) # Check that significant column is logical expect_true(is.logical(validated_results$significant)) })