# Create test file for si_estim function test_that("si_estim produces correct estimates with simulated data", { library(fdrtool) set.seed(1234) # Simulate data with known parameters (as seen in your validation document) N <- 1000 true_mu <- 15 true_sigma <- 3 hw1 <- 0.2 hw2 <- 0.5 hw3 <- 0.2 hw4 <- 0.1 # Generate from different transmission routes CP <- round(rhalfnorm((hw1*N), theta=sqrt(pi/2)/(sqrt(2)*true_sigma))) PS <- round(rnorm(hw2*N, mean=true_mu, sd=true_sigma)) PT <- round(rnorm(hw3*N, mean=2*true_mu, sd=sqrt(2)*true_sigma)) PQ <- round(rnorm(hw4*N, mean=3*true_mu, sd=sqrt(3)*true_sigma)) sim_data <- c(CP, PS, PT, PQ) # Run si_estim with default parameters (normal distribution) result_normal <- si_estim(sim_data) # Test normal distribution estimates expect_type(result_normal, "list") expect_setequal(names(result_normal), c("mean", "sd", "wts")) # Check that the estimate is close to true value expect_true(abs(result_normal$mean[1] - true_mu) < 1, "Mean estimate should be within 1 unit of true value") expect_true(abs(result_normal$sd[1] - true_sigma) < 1, "SD estimate should be within 1 unit of true value") # Check weights expect_equal(length(result_normal$wts), 7) # Expected number of weight components expect_true(abs(result_normal$wts[1] - hw1) < 0.1, "Co-primary weight should be close to true value") expect_true(abs((result_normal$wts[2] + result_normal$wts[3]) - hw2) < 0.1, "Primary-secondary weight should be close to true value") expect_true(abs((result_normal$wts[4] + result_normal$wts[5]) - hw3) < 0.1, "Primary-tertiary weight should be close to true value") expect_true(abs((result_normal$wts[6] + result_normal$wts[7]) - hw4) < 0.1, "Primary-quaternary weight should be close to true value") # Test gamma distribution result_gamma <- si_estim(sim_data, dist = "gamma") # Basic validation for gamma distribution expect_type(result_gamma, "list") expect_named(result_gamma, c("mean", "sd", "wts")) # Gamma may not match as closely since data was generated using normal distribution # But should still be reasonable expect_true(result_gamma$mean[1] > 5 && result_gamma$mean[1] < 25, "Gamma mean should be within reasonable range") expect_true(result_gamma$sd[1] > 0, "SD should be positive") }) test_that("si_estim handles edge cases appropriately", { # Test with minimal dataset (at least 2 values with some difference) small_data <- c(1, 5, 10) result_small <- si_estim(small_data) expect_type(result_small, "list") expect_true(!is.na(result_small$mean[1])) expect_true(!is.na(result_small$sd[1])) # Test with initial values provided init_test <- si_estim(c(10, 12, 15, 20), init = c(12, 3)) expect_type(init_test, "list") expect_length(init_test$mean, 1) expect_length(init_test$sd, 1) # Test with a dataset that has all the same values # This should still work but might give a warning about standard deviation repeated_data <- rep(10, 5) expect_error( si_estim(repeated_data), regexp = "Initial standard deviation must be positive|standard deviation|zero variance|singular", info = "Should error when all values are identical (SD = 0)" ) if (exists("result_repeated")) { expect_type(result_repeated, "list") expect_equal(result_repeated$mean[1], 10) # SD might be 0 or close to 0 expect_true(result_repeated$sd[1] >= 0) } }) test_that("si_estim handles invalid inputs appropriately", { # Test with negative values (should now give a warning) expect_warning( result_negative <- si_estim(c(-5, 10, 15)), regexp = "negative values|negative serial intervals" ) # Check that it still returns valid results despite the warning expect_type(result_negative, "list") expect_true(!is.na(result_negative$mean[1])) expect_true(!is.na(result_negative$sd[1])) # Test with NAs (should now throw an error) expect_error( si_estim(c(NA, 10, 15)), regexp = "Data contains NA values|NA values" ) # Test with non-numeric data expect_error( si_estim(as.character(c(1, 2, 3))), regexp = "numeric|character|type" ) # Test with invalid distribution expect_error( si_estim(c(10, 15, 20), dist = "invalid_dist"), regexp = "normal|gamma|invalid" ) # Test with a single value (should error) expect_error( si_estim(10), regexp = "at least 2|single value|insufficient" ) # Test gamma distribution with negative values (should error) expect_condition( expect_error( si_estim(c(-5, 10, 15), dist = "gamma"), regexp = "Gamma distribution cannot be used with negative values|negative values.*gamma" ), regexp = "negative values|negative serial intervals", class = "warning" ) }) test_that("si_estim properly estimates historical data", { # Create a small subset of data similar to what's in the validation document # This test is for simple functionality, not exact matching of historical results measles_data <- c(13, 14, 14, 14, 15, 15, 16, 16, 16, 18) influenza_data <- c(1, 2, 2, 3, 3, 3, 4, 4, 5) # Test with measles data measles_result <- si_estim(measles_data, init = c(15, 3)) expect_type(measles_result, "list") expect_true(measles_result$mean[1] > 10 && measles_result$mean[1] < 20, "Measles mean should be in reasonable range") # Test with influenza data flu_result <- si_estim(influenza_data, init = c(3, 1)) expect_type(flu_result, "list") expect_true(flu_result$mean[1] > 0 && flu_result$mean[1] < 10, "Influenza mean should be in reasonable range") }) test_that("si_estim is consistent with different seeds", { set.seed(123) data1 <- rnorm(100, mean = 15, sd = 3) # Run with different seeds set.seed(456) result1 <- si_estim(data1) set.seed(789) result2 <- si_estim(data1) # Results should be the same since the data is the same expect_equal(result1$mean, result2$mean) expect_equal(result1$sd, result2$sd) }) test_that("si_estim converges with different initial values", { set.seed(123) test_data <- rnorm(50, mean = 10, sd = 2) # Run with different initial values result1 <- si_estim(test_data, init = c(5, 1)) result2 <- si_estim(test_data, init = c(15, 3)) # Results should be similar regardless of initial values # We use a tolerance because EM algorithm might converge to slightly different values expect_equal(result1$mean, result2$mean, tolerance = 1) expect_equal(result1$sd, result2$sd, tolerance = 1) })