library(testthat) library(jsonlite) library(httr) # Helper function to check if API key is available api_key_available <- function() { !is.na(Sys.getenv("SYNTHESIZE_API_KEY")) && Sys.getenv("SYNTHESIZE_API_KEY") != "" } # ----------------------------- # Live API Tests # ----------------------------- test_that("predict_query live call success (bulk)", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting live predict_query call for bulk modality...") test_query <- get_valid_query() results <- predict_query( query = test_query, as_counts = TRUE, ) expect_type(results, "list") expect_true("metadata" %in% names(results)) expect_true("expression" %in% names(results)) expect_s3_class(results$metadata, "data.frame") expect_s3_class(results$expression, "data.frame") expect_true(nrow(results$metadata) > 0) expect_true(nrow(results$expression) > 0) expect_true(ncol(results$expression) > 0) # Verify get_json returned valid data structure # Check that expression data contains numeric values expect_true(all(sapply(results$expression, is.numeric)), info = "Expression data should contain numeric values" ) # Check that metadata and expression have matching row counts expect_equal(nrow(results$metadata), nrow(results$expression), info = "Metadata and expression should have same number of rows" ) # Verify expression values are non-negative (counts should be >= 0) expect_true(all(results$expression >= 0, na.rm = TRUE), info = "Expression counts should be non-negative" ) # Verify gene names exist as column names expect_true(length(colnames(results$expression)) > 0, info = "Expression data should have gene names as column names" ) # Verify all gene names are non-empty strings expect_true(all(nchar(colnames(results$expression)) > 0), info = "All gene names should be non-empty strings" ) message("Live bulk API test passed with data validation.") }) test_that("predict_query live call success (single-cell)", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting live predict_query call for single-cell modality...") test_query <- get_valid_query(modality = "single-cell") results <- predict_query( query = test_query, as_counts = TRUE, ) expect_type(results, "list") expect_true("metadata" %in% names(results)) expect_true("expression" %in% names(results)) expect_s3_class(results$metadata, "data.frame") expect_s3_class(results$expression, "data.frame") expect_true(nrow(results$metadata) > 0) expect_true(nrow(results$expression) > 0) expect_true(ncol(results$expression) > 0) # Verify get_json returned valid data structure # Check that expression data contains numeric values expect_true(all(sapply(results$expression, is.numeric)), info = "Expression data should contain numeric values" ) # Check that metadata and expression have matching row counts expect_equal(nrow(results$metadata), nrow(results$expression), info = "Metadata and expression should have same number of rows" ) # Verify expression values are non-negative (counts should be >= 0) expect_true(all(results$expression >= 0, na.rm = TRUE), info = "Expression counts should be non-negative" ) # Verify gene names exist as column names expect_true(length(colnames(results$expression)) > 0, info = "Expression data should have gene names as column names" ) # Verify all gene names are non-empty strings expect_true(all(nchar(colnames(results$expression)) > 0), info = "All gene names should be non-empty strings" ) # Verify metadata contains expected fields for single-cell metadata_cols <- colnames(results$metadata) expect_true(length(metadata_cols) > 0, info = "Metadata should have at least one column" ) message("Live single-cell API test passed with data validation.") }) test_that("predict_query live call invalid UBERON (bulk)", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting live predict_query with invalid UBERON ID for bulk...") # Create a query with an invalid UBERON ID invalid_query <- list( inputs = list( list( metadata = list( tissue_ontology_id = "UBERON:9999999", # Invalid ID age_years = "65", sex = "female", sample_type = "primary tissue" ), num_samples = 1 ) ), modality = "bulk", mode = "sample generation" ) # The API should reject this with an error expect_error( predict_query( query = invalid_query, as_counts = TRUE ), "Model query failed" ) # Verify the error contains validation details error_result <- tryCatch( predict_query(query = invalid_query, as_counts = TRUE), error = function(e) e$message ) message(paste("API correctly rejected invalid UBERON ID with error:", error_result)) # The error message should contain the validation details expect_true( grepl("UBERON:9999999", error_result), info = paste("Error message should mention the invalid UBERON ID. Got:", error_result) ) expect_true( grepl("bad values|invalid", error_result, ignore.case = TRUE), info = paste("Error message should indicate validation failure. Got:", error_result) ) message("Successfully validated error message contains UBERON validation details") }) test_that("predict_query live call invalid UBERON (single-cell)", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting live predict_query (single-cell) with invalid UBERON ID...") # Create a single-cell query with an invalid UBERON ID invalid_query <- list( inputs = list( list( metadata = list( cell_type_ontology_id = "CL:0000786", tissue_ontology_id = "UBERON:9999999", # Invalid ID sex = "male" ), num_samples = 1 ) ), modality = "single-cell", mode = "mean estimation", return_classifier_probs = TRUE, seed = 42 ) # The API should reject this with an error expect_error( predict_query( query = invalid_query, as_counts = TRUE ), "Model query failed" ) # Verify the error contains validation details error_result <- tryCatch( predict_query(query = invalid_query, as_counts = TRUE), error = function(e) e$message ) message(paste("API correctly rejected invalid UBERON ID (single-cell) with error:", error_result)) # The error message should contain the validation details expect_true( grepl("UBERON:9999999", error_result), info = paste("Error message should mention the invalid UBERON ID. Got:", error_result) ) expect_true( grepl("bad values|invalid", error_result, ignore.case = TRUE), info = paste("Error message should indicate validation failure. Got:", error_result) ) message("Successfully validated error message contains UBERON validation details (single-cell)") }) test_that("predict_query download URL flow works correctly", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting download URL flow with return_download_url=TRUE...") test_query <- get_valid_query() # First get the download URL without parsing result_with_url <- predict_query( query = test_query, as_counts = TRUE, return_download_url = TRUE ) expect_type(result_with_url, "list") expect_true("download_url" %in% names(result_with_url)) expect_true("metadata" %in% names(result_with_url)) expect_true("expression" %in% names(result_with_url)) # Verify download URL is a valid URL download_url <- result_with_url$download_url expect_type(download_url, "character") expect_true(nchar(download_url) > 0) expect_true(grepl("^https?://", download_url), info = "Download URL should start with http:// or https://" ) # Verify empty data frames when return_download_url=TRUE expect_equal(nrow(result_with_url$metadata), 0) expect_equal(nrow(result_with_url$expression), 0) # Now manually fetch the URL using get_json to verify it works message("Testing get_json with real download URL...") json_result <- rsynthbio:::get_json(download_url) expect_type(json_result, "list") expect_true("outputs" %in% names(json_result) || "gene_order" %in% names(json_result), info = "JSON result should contain expected fields" ) # Verify we got valid data if ("outputs" %in% names(json_result)) { expect_true(length(json_result$outputs) > 0, info = "Outputs should contain at least one element" ) } if ("gene_order" %in% names(json_result)) { expect_true(length(json_result$gene_order) > 0, info = "Gene order should contain gene names" ) } message("Download URL flow and get_json validation passed.") }) test_that("predict_query returns biologically valid expression data (differential expression)", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting biological validity with simple differential expression analysis...") # Create query with two distinct conditions de_query <- list( inputs = list( # Condition 1: One cell type list( metadata = list( cell_type_ontology_id = "CL:0000786", # Plasmacytoid dendritic cell tissue_ontology_id = "UBERON:0002371", # bone marrow sex = "female", sample_type = "primary tissue" ), num_samples = 5 ), # Condition 2: Different cell type list( metadata = list( cell_type_ontology_id = "CL:0000763", # Myeloid cell tissue_ontology_id = "UBERON:0002371", # bone marrow sex = "female", sample_type = "primary tissue" ), num_samples = 5 ) ), modality = "bulk", mode = "sample generation", seed = 42 ) results <- predict_query(query = de_query, as_counts = TRUE) # Split samples by condition group1_idx <- 1:5 group2_idx <- 6:10 expr_group1 <- results$expression[group1_idx, ] expr_group2 <- results$expression[group2_idx, ] # Calculate basic statistics for each gene n_genes <- ncol(results$expression) # Calculate mean expression for each group mean_group1 <- colMeans(expr_group1) mean_group2 <- colMeans(expr_group2) # Calculate fold changes (using pseudocount to avoid division by zero) pseudocount <- 1 fold_changes <- log2((mean_group2 + pseudocount) / (mean_group1 + pseudocount)) # Perform t-tests for each gene p_values <- sapply(1:n_genes, function(i) { tryCatch( { t.test(expr_group1[, i], expr_group2[, i])$p.value }, error = function(e) { NA } ) }) # Basic validation of differential expression results message("Validating differential expression statistics...") # 1. Check that we have valid p-values valid_pvals <- !is.na(p_values) expect_true(sum(valid_pvals) > n_genes * 0.9, info = "At least 90% of genes should have valid p-values" ) # 2. P-values should be distributed between 0 and 1 expect_true(all(p_values[valid_pvals] >= 0 & p_values[valid_pvals] <= 1), info = "All p-values should be between 0 and 1" ) # 3. Not all p-values should be identical (showing variation) expect_true(length(unique(p_values[valid_pvals])) > 100, info = "P-values should show variation across genes" ) # 4. Fold changes should be reasonable (not all zero, not all extreme) expect_true(sd(fold_changes, na.rm = TRUE) > 0, info = "Fold changes should show variation" ) expect_true(abs(median(fold_changes, na.rm = TRUE)) < 10, info = "Median fold change should be reasonable (|log2FC| < 10)" ) # 5. Check for differentially expressed genes (p < 0.05) de_genes <- which(p_values < 0.05) expect_true(length(de_genes) > 0, info = "Should detect some differentially expressed genes" ) expect_true(length(de_genes) < n_genes * 0.5, info = "Not all genes should be differentially expressed" ) # 6. Variance should exist within groups (biological variation) var_group1 <- apply(expr_group1, 2, var) var_group2 <- apply(expr_group2, 2, var) expect_true(median(var_group1, na.rm = TRUE) > 0, info = "Group 1 should show within-group variance" ) expect_true(median(var_group2, na.rm = TRUE) > 0, info = "Group 2 should show within-group variance" ) # 7. Expression levels should be reasonable for count data overall_mean <- mean(as.matrix(results$expression), na.rm = TRUE) expect_true(overall_mean > 0, info = "Mean expression should be positive" ) expect_true(overall_mean < 1e6, info = "Mean expression should be in reasonable range" ) message(sprintf( "DE analysis complete: %d DE genes (p<0.05) out of %d tested", length(de_genes), sum(valid_pvals) )) message(sprintf("Median fold change: %.3f (log2)", median(fold_changes, na.rm = TRUE))) message(sprintf( "Expression range: %.1f to %.1f", min(results$expression, na.rm = TRUE), max(results$expression, na.rm = TRUE) )) message("Biological validity tests passed!") }) test_that("predict_query returns biologically valid single-cell expression data (differential expression)", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting single-cell biological validity with differential expression analysis...") # Create query with two distinct cell types sc_de_query <- list( inputs = list( # Condition 1: T cells list( metadata = list( cell_type_ontology_id = "CL:0000084", # T cell tissue_ontology_id = "UBERON:0002371", # bone marrow sex = "female" ), num_samples = 10 ), # Condition 2: B cells list( metadata = list( cell_type_ontology_id = "CL:0000236", # B cell tissue_ontology_id = "UBERON:0002371", # bone marrow sex = "female" ), num_samples = 10 ) ), modality = "single-cell", mode = "mean estimation", seed = 123 ) results <- predict_query(query = sc_de_query, as_counts = TRUE) # Split samples by condition group1_idx <- 1:10 group2_idx <- 11:20 expr_group1 <- results$expression[group1_idx, ] expr_group2 <- results$expression[group2_idx, ] # Calculate basic statistics for each gene n_genes <- ncol(results$expression) n_cells <- nrow(results$expression) message(sprintf("Analyzing %d cells across %d genes...", n_cells, n_genes)) # Single-cell specific metrics # 1. Calculate sparsity (proportion of zeros) sparsity_group1 <- sum(expr_group1 == 0) / (nrow(expr_group1) * ncol(expr_group1)) sparsity_group2 <- sum(expr_group2 == 0) / (nrow(expr_group2) * ncol(expr_group2)) expect_true(sparsity_group1 > 0.3, info = "Single-cell data should show sparsity (>30% zeros)" ) expect_true(sparsity_group1 < 0.95, info = "Single-cell data should not be too sparse (<95% zeros)" ) message(sprintf( "Sparsity: Group1 = %.1f%%, Group2 = %.1f%%", sparsity_group1 * 100, sparsity_group2 * 100 )) # 2. Calculate mean expression for each gene mean_group1 <- colMeans(expr_group1) mean_group2 <- colMeans(expr_group2) # 3. Calculate fold changes (using pseudocount for sparse data) pseudocount <- 0.1 fold_changes <- log2((mean_group2 + pseudocount) / (mean_group1 + pseudocount)) # 4. Perform Wilcoxon rank-sum tests (better for sparse/non-normal single-cell data) # Use exact = FALSE to avoid warnings about ties (common with sparse data) p_values <- sapply(1:n_genes, function(i) { tryCatch( { wilcox.test(expr_group1[, i], expr_group2[, i], exact = FALSE)$p.value }, error = function(e) { NA } ) }) # Validation of single-cell differential expression results message("Validating single-cell differential expression statistics...") # 1. Check that we have valid p-values # Note: Very sparse single-cell data may have many genes with all zeros valid_pvals <- !is.na(p_values) n_valid <- sum(valid_pvals) expect_true(n_valid > 100, info = sprintf("Should have at least 100 testable genes (got %d)", n_valid) ) # 2. P-values should be distributed between 0 and 1 expect_true(all(p_values[valid_pvals] >= 0 & p_values[valid_pvals] <= 1), info = "All p-values should be between 0 and 1" ) # 3. P-values should show variation (not all the same) expect_true(length(unique(p_values[valid_pvals])) > 100, info = "P-values should show variation across genes" ) # 4. Fold changes should show variation expect_true(sd(fold_changes, na.rm = TRUE) > 0, info = "Fold changes should show variation" ) expect_true(abs(median(fold_changes, na.rm = TRUE)) < 15, info = "Median fold change should be reasonable for single-cell" ) # 5. Check for differentially expressed genes de_genes <- which(p_values < 0.05) expect_true(length(de_genes) > 0, info = "Should detect some differentially expressed genes" ) expect_true(length(de_genes) < n_genes * 0.6, info = "Not all genes should be differentially expressed" ) # 6. Check for genes with expression in at least some cells genes_expressed <- colSums(results$expression > 0) pct_expressed_genes <- mean(genes_expressed > 0) * 100 expect_true(pct_expressed_genes > 5, info = sprintf( "At least 5%% of genes should be expressed in some cells (got %.1f%%)", pct_expressed_genes ) ) # 7. Single-cell specific: check for variance in expressed genes # Only calculate CV for expressed genes (not all zeros) expressed_genes <- mean_group1 > 0 | mean_group2 > 0 n_expressed <- sum(expressed_genes) if (n_expressed > 10) { var_group1 <- apply(expr_group1[, expressed_genes, drop = FALSE], 2, var) cv_group1 <- sqrt(var_group1) / (colMeans(expr_group1[, expressed_genes, drop = FALSE]) + 1e-6) # For expressed genes, some should show variation expect_true(sum(cv_group1 > 0.1, na.rm = TRUE) > 10, info = sprintf("Expressed genes should show variation (testing %d genes)", n_expressed) ) } # 8. Expression levels should be reasonable for single-cell count data overall_mean <- mean(as.matrix(results$expression), na.rm = TRUE) expect_true(overall_mean > 0, info = "Mean expression should be positive" ) expect_true(overall_mean < 1e5, info = "Mean expression should be in reasonable single-cell range" ) # 9. Check that cell type markers might be differential # For T cells vs B cells, we'd expect some strong differences strong_de <- sum(abs(fold_changes) > 2 & p_values < 0.01, na.rm = TRUE) expect_true(strong_de > 10, info = "Should detect some strongly DE genes between T and B cells" ) message(sprintf( "DE analysis complete: %d DE genes (p<0.05) out of %d tested", length(de_genes), sum(valid_pvals) )) message(sprintf("Strongly DE genes (|log2FC|>2, p<0.01): %d", strong_de)) message(sprintf("Median fold change: %.3f (log2)", median(fold_changes, na.rm = TRUE))) message(sprintf( "Sparsity: %.1f%% (Group1), %.1f%% (Group2)", sparsity_group1 * 100, sparsity_group2 * 100 )) message(sprintf( "Expression range: %.1f to %.1f", min(results$expression, na.rm = TRUE), max(results$expression, na.rm = TRUE) )) message("Single-cell biological validity tests passed!") }) test_that("predict_query with total_count parameter works correctly", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting predict_query with custom total_count parameter...") test_query <- get_valid_query() test_query$total_count <- 5000000 # Test with custom total_count results <- predict_query( query = test_query, as_counts = TRUE ) expect_type(results, "list") expect_true("metadata" %in% names(results)) expect_true("expression" %in% names(results)) expect_s3_class(results$metadata, "data.frame") expect_s3_class(results$expression, "data.frame") expect_true(nrow(results$metadata) > 0) expect_true(nrow(results$expression) > 0) expect_true(ncol(results$expression) > 0) # Verify expression values are non-negative expect_true(all(results$expression >= 0, na.rm = TRUE), info = "Expression counts should be non-negative" ) message("total_count parameter test passed.") }) test_that("predict_query with deterministic_latents produces reproducible results", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting predict_query with deterministic_latents for reproducibility...") test_query <- get_valid_query() test_query$seed <- 12345 # Set a seed for consistency test_query$deterministic_latents <- TRUE # First call with deterministic_latents = TRUE results1 <- predict_query( query = test_query, as_counts = TRUE ) # Second call with same query and deterministic_latents = TRUE results2 <- predict_query( query = test_query, as_counts = TRUE ) expect_type(results1, "list") expect_type(results2, "list") # With deterministic_latents, results should be identical expect_equal(dim(results1$expression), dim(results2$expression), info = "Expression dimensions should match" ) # Check that at least some values are identical (allowing for potential minor differences) # In practice, deterministic_latents should make results highly similar if not identical correlation <- cor( as.vector(as.matrix(results1$expression)), as.vector(as.matrix(results2$expression)) ) expect_true(correlation > 0.99, info = sprintf("With deterministic_latents, results should be highly correlated (got r=%.4f)", correlation) ) message(sprintf("Deterministic latents test passed (correlation: %.6f)", correlation)) }) test_that("predict_query with deterministic_latents FALSE shows variation", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting predict_query with deterministic_latents=FALSE shows variation...") test_query <- get_valid_query() test_query$seed <- NULL # Remove seed to allow variation test_query$deterministic_latents <- FALSE # First call with deterministic_latents = FALSE results1 <- predict_query( query = test_query, as_counts = TRUE ) # Second call with deterministic_latents = FALSE results2 <- predict_query( query = test_query, as_counts = TRUE ) expect_type(results1, "list") expect_type(results2, "list") # With deterministic_latents = FALSE and no seed, results should show some variation expect_equal(dim(results1$expression), dim(results2$expression), info = "Expression dimensions should match" ) # Calculate correlation - should be high but not perfect correlation <- cor( as.vector(as.matrix(results1$expression)), as.vector(as.matrix(results2$expression)) ) # Results should still be similar (same biological context) but not identical expect_true(correlation < 1.0, info = sprintf("Without deterministic_latents, results should show some variation (got r=%.4f)", correlation) ) expect_true(correlation > 0.8, info = sprintf("Results should still be reasonably correlated (got r=%.4f)", correlation) ) message(sprintf("Stochastic latents test passed (correlation: %.4f, showing expected variation)", correlation)) }) test_that("predict_query with total_count for single-cell", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting predict_query with custom total_count for single-cell...") test_query <- get_valid_query(modality = "single-cell") test_query$total_count <- 5000 # Test with custom total_count (typical single-cell library size) results <- predict_query( query = test_query, as_counts = TRUE ) expect_type(results, "list") expect_true("metadata" %in% names(results)) expect_true("expression" %in% names(results)) expect_s3_class(results$metadata, "data.frame") expect_s3_class(results$expression, "data.frame") expect_true(nrow(results$metadata) > 0) expect_true(nrow(results$expression) > 0) expect_true(ncol(results$expression) > 0) # Verify expression values are non-negative expect_true(all(results$expression >= 0, na.rm = TRUE), info = "Expression counts should be non-negative" ) message("Single-cell total_count parameter test passed.") }) test_that("predict_query with both total_count and deterministic_latents", { skip_if_not( api_key_available(), "Skipping live API test because SYNTHESIZE_API_KEY is not set." ) message("\nTesting predict_query with both total_count and deterministic_latents...") test_query <- get_valid_query() test_query$total_count <- 8000000 test_query$deterministic_latents <- TRUE # Test with both parameters results <- predict_query( query = test_query, as_counts = TRUE ) expect_type(results, "list") expect_true("metadata" %in% names(results)) expect_true("expression" %in% names(results)) expect_s3_class(results$metadata, "data.frame") expect_s3_class(results$expression, "data.frame") expect_true(nrow(results$metadata) > 0) expect_true(nrow(results$expression) > 0) expect_true(ncol(results$expression) > 0) # Verify expression values are non-negative expect_true(all(results$expression >= 0, na.rm = TRUE), info = "Expression counts should be non-negative" ) # Verify numeric data expect_true(all(sapply(results$expression, is.numeric)), info = "Expression data should contain numeric values" ) message("Combined parameters test passed.") })