# tests/testthat/test-adaptive-sampling.R # Copyright (c) 2025 Omid Arhami omid.arhami@uga.edu test_that("generate_kde_samples works correctly", { # Create test samples test_samples <- data.frame( log_N = log(runif(50, 2, 10)), log_k0 = log(runif(50, 1, 5)), log_cooling_rate = log(runif(50, 0.01, 0.1)), log_c_repulsion = log(runif(50, 0.1, 1)), NLL = runif(50, 20, 100) ) # Generate new samples new_samples <- generate_kde_samples(test_samples, n = 10) expect_true(is.data.frame(new_samples)) expect_equal(nrow(new_samples), 10) expect_equal(ncol(new_samples), 4) # Should only have parameter columns par_names <- c("log_N", "log_k0", "log_cooling_rate", "log_c_repulsion") expect_true(all(par_names %in% names(new_samples))) expect_true(all(sapply(new_samples, is.numeric))) }) test_that("generate_kde_samples handles edge cases", { # Test with minimal samples minimal_samples <- data.frame( log_N = log(c(2, 3)), log_k0 = log(c(1, 2)), log_cooling_rate = log(c(0.01, 0.02)), log_c_repulsion = log(c(0.1, 0.2)), NLL = c(50, 60) ) expect_no_error(new_samples <- generate_kde_samples(minimal_samples, n = 5)) expect_equal(nrow(new_samples), 5) # Test with exploration epsilon exploratory_samples <- generate_kde_samples(minimal_samples, n = 3, epsilon = 0.5) expect_equal(nrow(exploratory_samples), 3) }) test_that("weighted_kde produces valid density estimates", { x <- rnorm(100, mean = 5, sd = 2) weights <- runif(100) weights <- weights / sum(weights) # Normalize kde_result <- weighted_kde(x, weights, n = 100) expect_true(is.list(kde_result)) expect_true(all(c("x", "y") %in% names(kde_result))) expect_length(kde_result$x, 100) expect_length(kde_result$y, 100) expect_true(all(kde_result$y >= 0)) # Check approximate normalization (integral should be close to 1) integral_approx <- sum(kde_result$y) * diff(kde_result$x)[1] expect_true(abs(integral_approx - 1) < 0.2) # Allow some tolerance }) test_that("weighted_kde handles different parameters", { x <- c(1, 2, 3, 4, 5) weights <- c(0.1, 0.2, 0.4, 0.2, 0.1) # Test with different evaluation ranges kde1 <- weighted_kde(x, weights, from = 0, to = 6, n = 50) expect_length(kde1$x, 50) expect_equal(min(kde1$x), 0) expect_equal(max(kde1$x), 6) # Test with default range kde2 <- weighted_kde(x, weights) expect_equal(min(kde2$x), min(x)) expect_equal(max(kde2$x), max(x)) }) test_that("calculate_weighted_marginals works correctly", { # Create test samples test_samples <- data.frame( log_N = log(runif(50, 2, 10)), log_k0 = log(runif(50, 1, 5)), log_cooling_rate = log(runif(50, 0.01, 0.1)), log_c_repulsion = log(runif(50, 0.1, 1)), NLL = runif(50, 20, 100) ) marginals <- calculate_weighted_marginals(test_samples) expect_true(is.list(marginals)) expect_length(marginals, 4) par_names <- c("log_N", "log_k0", "log_cooling_rate", "log_c_repulsion") expect_true(all(par_names %in% names(marginals))) # Check that each marginal has the expected structure for (param in par_names) { expect_true(all(c("x", "y") %in% names(marginals[[param]]))) expect_true(all(marginals[[param]]$y >= 0)) } }) test_that("calculate_weighted_marginals input validation", { # Test with missing columns incomplete_samples <- data.frame( log_N = log(runif(20, 2, 10)), log_k0 = log(runif(20, 1, 5)) # Missing log_cooling_rate, log_c_repulsion, NLL ) expect_error( calculate_weighted_marginals(incomplete_samples), "Missing required columns" ) # Test with non-numeric columns invalid_samples <- data.frame( log_N = log(runif(20, 2, 10)), log_k0 = log(runif(20, 1, 5)), log_cooling_rate = log(runif(20, 0.01, 0.1)), log_c_repulsion = log(runif(20, 0.1, 1)), NLL = as.character(runif(20, 20, 100)) # Character instead of numeric ) expect_error( calculate_weighted_marginals(invalid_samples), "All required parameter and NLL columns must be numeric" ) # Test with all infinite NLL values infinite_nll_samples <- data.frame( log_N = log(runif(20, 2, 10)), log_k0 = log(runif(20, 1, 5)), log_cooling_rate = log(runif(20, 0.01, 0.1)), log_c_repulsion = log(runif(20, 0.1, 1)), NLL = rep(Inf, 20) ) expect_error( calculate_weighted_marginals(infinite_nll_samples), "All NLL values are infinite" ) }) test_that("likelihood_function works with cross-validation", { # Create test dissimilarity matrix test_mat <- matrix(runif(16, 1, 5), 4, 4) diag(test_mat) <- 0 test_mat[lower.tri(test_mat)] <- t(test_mat)[lower.tri(test_mat)] # Make symmetric # Test likelihood calculation result <- likelihood_function( dissimilarity_matrix = test_mat, mapping_max_iter = 20, relative_epsilon = 1e-3, N = 2, k0 = 1.0, cooling_rate = 0.01, c_repulsion = 0.01, folds = 3, num_cores = 1 ) expect_true(is.list(result)) expect_true(all(c("Holdout_MAE", "NLL") %in% names(result))) expect_true(is.numeric(result$Holdout_MAE)) expect_true(is.numeric(result$NLL)) expect_true(result$Holdout_MAE > 0 || is.na(result$Holdout_MAE)) }) test_that("likelihood_function handles sparse data", { # Create very sparse matrix sparse_mat <- matrix(NA, 4, 4) sparse_mat[1, 2] <- sparse_mat[2, 1] <- 1 sparse_mat[2, 3] <- sparse_mat[3, 2] <- 2 diag(sparse_mat) <- 0 # Suppress expected warning about sparse data suppressWarnings({ result <- likelihood_function( dissimilarity_matrix = sparse_mat, mapping_max_iter = 10, relative_epsilon = 1e-3, N = 2, k0 = 1.0, cooling_rate = 0.01, c_repulsion = 0.01, folds = 2, num_cores = 1 ) }) # Should handle sparse data gracefully (may return NA) expect_true(is.list(result)) expect_true(all(c("Holdout_MAE", "NLL") %in% names(result))) }) test_that("get_grid creates appropriate parameter grids", { # Create test samples test_samples <- data.frame( log_N = log(runif(30, 2, 10)), log_k0 = log(runif(30, 1, 5)), log_cooling_rate = log(runif(30, 0.01, 0.1)), log_c_repulsion = log(runif(30, 0.1, 1)), NLL = runif(30, 20, 100) ) # Test grid creation for log_N grid_values <- get_grid(test_samples, "log_N", num_points = 10, start_factor = 0.8, end_factor = 1.2) expect_length(grid_values, 10) expect_true(is.numeric(grid_values)) expect_true(all(diff(grid_values) > 0)) # Should be increasing }) test_that("profile_likelihood handles various parameter ranges", { # Create test samples with known distribution set.seed(456) # Different seed for better distribution test_samples <- data.frame( log_N = log(runif(60, 2, 8)), # Even more samples log_k0 = log(runif(60, 1, 4)), # More concentrated range log_cooling_rate = log(runif(60, 0.01, 0.1)), log_c_repulsion = log(runif(60, 0.1, 1)), NLL = runif(60, 20, 100) ) # Test profile likelihood calculation - suppress expected warnings for edge cases suppressWarnings({ pl_result <- profile_likelihood("log_k0", test_samples, grid_size = 5, # Even smaller grid bandwidth_factor = 0.5, min_samples = 1) # Even larger bandwidth, minimum samples }) expect_s3_class(pl_result, "profile_likelihood") expect_equal(length(pl_result$param), 5) # Updated expectation expect_equal(length(pl_result$ll), 5) expect_equal(pl_result$param_name, "log_k0") }) test_that("profile_likelihood input validation", { test_samples <- data.frame( log_N = log(runif(20, 2, 10)), NLL = runif(20, 20, 100) ) # Test with invalid parameter name expect_error( profile_likelihood("invalid_param", test_samples), "Parameter 'invalid_param' not found in samples" ) # Test with missing NLL column samples_no_nll <- test_samples[, "log_N", drop = FALSE] expect_error( profile_likelihood("log_N", samples_no_nll), "Samples data frame must contain an 'NLL' column" ) # Test with invalid grid_size expect_error( profile_likelihood("log_N", test_samples, grid_size = 1), "grid_size must be at least 2" ) # Test with invalid bandwidth_factor expect_error( profile_likelihood("log_N", test_samples, bandwidth_factor = -0.1), "bandwidth_factor must be positive" ) })