# tests/testthat/test-integration.R # Copyright (c) 2025 Omid Arhami omid.arhami@uga.edu test_that("full data processing and optimization workflow executes on general data", { # 1. Create temporary data file test_data <- data.frame( object = rep(paste0("V", 1:4), each = 4), reference = rep(paste0("S", 1:4), 4), score = sample(c(10, 20, 40, 80, 160, 320, 640, "<10", ">1280"), 16, replace = TRUE) ) # 2. process it processed_matrix <- list_to_matrix( data = test_data, # Pass the data frame, not file path object_col = "object", reference_col = "reference", value_col = "score", is_similarity = TRUE ) expect_true(is.matrix(processed_matrix)) # 3. Run optimization result <- euclidean_embedding( dissimilarity_matrix = processed_matrix, ndim = 2, mapping_max_iter = 50, # Keep low for testing k0 = 5.0, cooling_rate = 0.05, c_repulsion = 0.01, write_positions_to_csv = FALSE ) expect_s3_class(result, "topolow") # 4. Run a diagnostic plot plots <- scatterplot_fitted_vs_true( dissimilarity_matrix = processed_matrix, p_dissimilarity_mat = result$est_distances, save_plot = FALSE ) expect_s3_class(plots$scatter_plot, "ggplot") }) test_that("full workflow executes correctly on antigenic data", { # Create a temporary file path temp_csv_path <- tempfile(fileext = ".csv") # Create test data test_data <- data.frame( antigen = rep(paste0("V", 1:3), each=3), serum = rep(paste0("S", 1:3), 3), titer = c(40, 80, 160, 320, "<10", ">640", 40, 80, 160), virusYear = rep(2000:2002, each=3), serumYear = rep(2000:2002, 3), cluster = rep(c("A", "B", "C"), 3), color = rep(c("red", "blue", "green"), 3) ) # Add year to antigen/serum names before writing test_data$antigen <- paste0(test_data$antigen, "/", test_data$virusYear) test_data$serum <- paste0(test_data$serum, "/", test_data$serumYear) # Process data using the temporary file path results <- process_antigenic_data( test_data, antigen_col = "antigen", serum_col = "serum", value_col = "titer", is_similarity = TRUE, metadata_cols = c("cluster", "color", "virusYear", "serumYear") ) # Run optimization topo_result <- euclidean_embedding(dissimilarity_matrix = results$matrix, ndim = 2, mapping_max_iter = 100, k0 = 3.0, cooling_rate = 0.1, c_repulsion = 0.001, write_positions_to_csv = FALSE ) # Create visualization positions <- as.data.frame(topo_result$positions) positions$name <- rownames(positions) positions$antigen <- grepl("^V/", rownames(positions)) positions$antiserum <- grepl("^S/", rownames(positions)) # Extract year based on point type positions$year <- sapply(rownames(positions), function(x) { if (grepl("^V/", x)) { # Extract year from virus name year <- as.numeric(sub(".*/(\\d{4})$", "\\1", x)) } else { # Extract year from serum name year <- as.numeric(sub(".*/(\\d{4})$", "\\1", x)) } return(year) }) # Verify year extraction worked expect_true(!any(is.na(positions$year))) expect_true(all(positions$year %in% 2000:2002)) plot <- plot_temporal_mapping(positions, ndim = 2) expect_s3_class(plot, "ggplot") }) test_that("parameter optimization workflow works", { # Create test matrix with enough variation test_mat <- matrix(c(0, 1, 2, 3, 1, 0, 2.5, 3.5, 2, 2.5, 0, 4, 3, 3.5, 4, 0), 4, 4) rownames(test_mat) <- colnames(test_mat) <- paste0("Point", 1:4) # Run parameter optimization with minimal settings for testing results <- initial_parameter_optimization( dissimilarity_matrix = test_mat, mapping_max_iter = 50, relative_epsilon = 1e-3, convergence_counter = 3, scenario_name = "test_opt", N_min = 2, N_max = 3, k0_min = 0.5, k0_max = 5, c_repulsion_min = 0.001, c_repulsion_max = 0.01, cooling_rate_min = 0.001, cooling_rate_max = 0.05, num_samples = 2, # Reduced for testing folds = 2, # Reduced for testing max_cores = 1, write_files = FALSE ) expect_true(is.data.frame(results)) expect_true(all(c("log_N", "log_k0", "log_cooling_rate", "log_c_repulsion", "Holdout_MAE", "NLL") %in% names(results))) }) test_that("adaptive sampling workflow executes", { # Create a temporary file path for the samples temp_samples_path <- tempfile(fileext = ".csv") # Create initial samples with log-transformed names samples <- data.frame( log_N = log(c(3, 4)), log_k0 = log(c(1, 1.5)), log_cooling_rate = log(c(0.001, 0.002)), log_c_repulsion = log(c(0.001, 0.002)), NLL = c(100, 90), Holdout_MAE = c(2, 1.8) ) write.csv(samples, temp_samples_path, row.names = FALSE) # Create test dissimilarity matrix test_mat <- as.matrix(dist(matrix(rnorm(10 * 3), ncol = 3))) # Run adaptive sampling using the temporary file # We test the internal function as it's the core of the logic result <- adaptive_MC_sampling( samples_file = temp_samples_path, dissimilarity_matrix = test_mat, iterations = 1, # Just one iteration for a quick test mapping_max_iter = 10, relative_epsilon = 1e-3, folds = 2, scenario_name = "test_amc", verbose = FALSE ) expect_true(is.data.frame(result)) expect_true(nrow(result) > nrow(samples)) # Check that samples were added expect_true(all(names(samples) %in% names(result))) unlink(temp_samples_path) }) test_that("adaptive sampling workflow executes", { # Create a temporary file path for the samples temp_samples_path <- tempfile(fileext = ".csv") # Create initial samples with log-transformed names samples <- data.frame( log_N = log(c(3, 4)), log_k0 = log(c(1, 1.5)), log_cooling_rate = log(c(0.001, 0.002)), log_c_repulsion = log(c(0.001, 0.002)), NLL = c(100, 90), Holdout_MAE = c(2, 1.8) ) write.csv(samples, temp_samples_path, row.names = FALSE) # Create test dissimilarity matrix test_mat <- as.matrix(dist(matrix(rnorm(10 * 3), ncol = 3))) # Run adaptive sampling using the temporary file # We test the internal function as it's the core of the logic result <- adaptive_MC_sampling( samples_file = temp_samples_path, dissimilarity_matrix = test_mat, iterations = 1, # Just one iteration for a quick test mapping_max_iter = 10, relative_epsilon = 1e-3, folds = 2, scenario_name = "test_amc", verbose = FALSE ) expect_true(is.data.frame(result)) expect_true(nrow(result) > nrow(samples)) # Check that samples were added expect_true(all(names(samples) %in% names(result))) unlink(temp_samples_path) })