# Tests for helper distribution constructors # Tests verify analytical formulas against numerical computation and known values test_that("dfr_exponential creates valid distribution", { # With parameter exp_dist <- dfr_exponential(lambda = 0.5) expect_true(is_dfr_dist(exp_dist)) # Hazard is constant h <- hazard(exp_dist) expect_equal(h(1), 0.5) expect_equal(h(10), 0.5) expect_equal(h(c(1, 5, 10)), c(0.5, 0.5, 0.5)) # Cumulative hazard is linear H <- cum_haz(exp_dist) expect_equal(H(2), 1.0) expect_equal(H(4), 2.0) # Survival matches analytical S <- surv(exp_dist) expect_equal(S(2), exp(-1.0), tolerance = 1e-6) # Without parameter (for fitting) exp_null <- dfr_exponential() expect_true(is_dfr_dist(exp_null)) h_null <- hazard(exp_null) expect_equal(h_null(1, par = c(2)), 2) }) test_that("dfr_exponential score function is correct", { exp_dist <- dfr_exponential() s <- score(exp_dist) # Create test data df <- data.frame(t = c(1, 2, 3, 4, 5), delta = c(1, 1, 1, 0, 0)) # At the MLE, score should be approximately zero # MLE for exponential: lambda = n_events / sum(t) n_events <- sum(df$delta == 1) total_time <- sum(df$t) lambda_mle <- n_events / total_time score_at_mle <- s(df, par = c(lambda_mle)) expect_equal(score_at_mle, 0, tolerance = 1e-10) }) test_that("dfr_exponential MLE matches analytical solution", { set.seed(42) true_lambda <- 0.5 n <- 100 times <- rexp(n, rate = true_lambda) df <- data.frame(t = times, delta = rep(1, n)) # Analytical MLE analytical_mle <- n / sum(times) # Fitted MLE solver <- fit(dfr_exponential()) result <- solver(df, par = c(1.0)) expect_equal(coef(result), analytical_mle, tolerance = 1e-6) }) test_that("dfr_weibull creates valid distribution", { # With parameters weib_dist <- dfr_weibull(shape = 2, scale = 3) expect_true(is_dfr_dist(weib_dist)) # Hazard formula: h(t) = (k/sigma) * (t/sigma)^(k-1) h <- hazard(weib_dist) t <- 2 k <- 2 sigma <- 3 expected_h <- (k / sigma) * (t / sigma)^(k - 1) expect_equal(h(t), expected_h, tolerance = 1e-10) # Cumulative hazard: H(t) = (t/sigma)^k H <- cum_haz(weib_dist) expected_H <- (t / sigma)^k expect_equal(H(t), expected_H, tolerance = 1e-6) # Survival S <- surv(weib_dist) expect_equal(S(t), exp(-expected_H), tolerance = 1e-6) # Shape = 1 reduces to exponential weib_exp <- dfr_weibull(shape = 1, scale = 2) # lambda = 1/2 h_weib <- hazard(weib_exp) expect_equal(h_weib(5), 0.5, tolerance = 1e-10) }) test_that("dfr_weibull score function is correct", { weib_dist <- dfr_weibull() s <- score(weib_dist) # Generate Weibull data set.seed(123) true_k <- 2 true_sigma <- 3 n <- 50 u <- runif(n) times <- true_sigma * (-log(u))^(1 / true_k) df <- data.frame(t = times, delta = rep(1, n)) # Fit the model solver <- fit(weib_dist) result <- solver(df, par = c(1.5, 2.5)) # Score at MLE should be near zero score_at_mle <- s(df, par = coef(result)) expect_equal(score_at_mle, c(0, 0), tolerance = 1e-3) }) test_that("dfr_weibull MLE recovers true parameters", { set.seed(456) true_k <- 2.5 true_sigma <- 10 n <- 200 # Inverse CDF sampling for Weibull u <- runif(n) times <- true_sigma * (-log(u))^(1 / true_k) df <- data.frame(t = times, delta = rep(1, n)) solver <- fit(dfr_weibull()) result <- solver(df, par = c(2, 8)) # Should recover parameters reasonably well expect_equal(coef(result)[1], true_k, tolerance = 0.3) expect_equal(coef(result)[2], true_sigma, tolerance = 1.0) }) test_that("dfr_gompertz creates valid distribution", { # With parameters gomp_dist <- dfr_gompertz(a = 0.01, b = 0.1) expect_true(is_dfr_dist(gomp_dist)) # Hazard formula: h(t) = a * exp(b * t) h <- hazard(gomp_dist) t <- 5 a <- 0.01 b <- 0.1 expected_h <- a * exp(b * t) expect_equal(h(t), expected_h, tolerance = 1e-10) # Cumulative hazard: H(t) = (a/b) * (exp(b*t) - 1) H <- cum_haz(gomp_dist) expected_H <- (a / b) * (exp(b * t) - 1) expect_equal(H(t), expected_H, tolerance = 1e-6) # Survival S <- surv(gomp_dist) expect_equal(S(t), exp(-expected_H), tolerance = 1e-6) # Initial hazard equals a expect_equal(h(0), a, tolerance = 1e-10) }) test_that("dfr_gompertz hazard doubles correctly", { # Set b = log(2)/T so hazard doubles every T units T_double <- 10 b <- log(2) / T_double gomp_dist <- dfr_gompertz(a = 0.01, b = b) h <- hazard(gomp_dist) # Hazard at t=0 h0 <- h(0) # Hazard should double at t = T_double expect_equal(h(T_double), 2 * h0, tolerance = 1e-10) # And quadruple at 2*T_double expect_equal(h(2 * T_double), 4 * h0, tolerance = 1e-10) }) test_that("dfr_loglogistic creates valid distribution", { # With parameters ll_dist <- dfr_loglogistic(alpha = 10, beta = 2) expect_true(is_dfr_dist(ll_dist)) # Hazard formula: h(t) = (beta/alpha) * (t/alpha)^(beta-1) / (1 + (t/alpha)^beta) h <- hazard(ll_dist) t <- 5 alpha <- 10 beta <- 2 t_ratio <- t / alpha expected_h <- (beta / alpha) * t_ratio^(beta - 1) / (1 + t_ratio^beta) expect_equal(h(t), expected_h, tolerance = 1e-10) # Cumulative hazard: H(t) = log(1 + (t/alpha)^beta) H <- cum_haz(ll_dist) expected_H <- log(1 + t_ratio^beta) expect_equal(H(t), expected_H, tolerance = 1e-6) # Survival: S(t) = 1 / (1 + (t/alpha)^beta) S <- surv(ll_dist) expected_S <- 1 / (1 + t_ratio^beta) expect_equal(S(t), expected_S, tolerance = 1e-6) }) test_that("dfr_loglogistic median equals alpha when beta > 1", { alpha <- 100 beta <- 3 ll_dist <- dfr_loglogistic(alpha = alpha, beta = beta) Q <- inv_cdf(ll_dist) # Median should equal alpha median <- Q(0.5) expect_equal(median, alpha, tolerance = 1e-3) }) test_that("dfr_loglogistic hazard is non-monotonic for beta > 1", { ll_dist <- dfr_loglogistic(alpha = 10, beta = 3) h <- hazard(ll_dist) # Hazard should increase initially, then decrease t_early <- 2 t_mid <- 10 t_late <- 50 h_early <- h(t_early) h_mid <- h(t_mid) h_late <- h(t_late) # Early should be less than mid (increasing phase) expect_true(h_early < h_mid) # Late should be less than mid (decreasing phase) expect_true(h_late < h_mid) }) test_that("all distributions handle censored data", { # Test that fitting works with mixed exact/censored data df <- data.frame( t = c(1, 2, 3, 4, 5, 6, 7, 8), delta = c(1, 1, 1, 0, 0, 1, 1, 0) ) # Exponential solver_exp <- fit(dfr_exponential()) result_exp <- solver_exp(df, par = c(0.5)) expect_true(result_exp$converged) # Weibull solver_weib <- fit(dfr_weibull()) result_weib <- solver_weib(df, par = c(1.5, 5)) expect_true(result_weib$converged) # Gompertz solver_gomp <- fit(dfr_gompertz()) result_gomp <- solver_gomp(df, par = c(0.1, 0.1)) expect_true(result_gomp$converged) # Log-logistic solver_ll <- fit(dfr_loglogistic()) result_ll <- solver_ll(df, par = c(5, 2)) expect_true(result_ll$converged) }) test_that("distributions work with parameter override", { # All distribution methods should accept par argument exp_dist <- dfr_exponential(lambda = 1) h <- hazard(exp_dist) expect_equal(h(1), 1) # default expect_equal(h(1, par = c(2)), 2) # override weib_dist <- dfr_weibull(shape = 2, scale = 1) S <- surv(weib_dist) S_default <- S(1) S_override <- S(1, par = c(3, 2)) expect_true(S_default != S_override) gomp_dist <- dfr_gompertz(a = 0.1, b = 0.1) H <- cum_haz(gomp_dist) H_default <- H(5) H_override <- H(5, par = c(0.2, 0.2)) expect_true(H_default != H_override) ll_dist <- dfr_loglogistic(alpha = 10, beta = 2) cdf_fn <- cdf(ll_dist) F_default <- cdf_fn(10) F_override <- cdf_fn(10, par = c(20, 3)) expect_true(F_default != F_override) }) test_that("analytical cumulative hazard matches numerical integration", { # Test that our analytical cum_haz_rate matches numerical integration # Exponential exp_dist <- dfr_exponential(lambda = 0.5) H_analytical <- cum_haz(exp_dist) h <- hazard(exp_dist) H_numerical <- function(t) { integrate(h, 0, t)$value } expect_equal(H_analytical(5), H_numerical(5), tolerance = 1e-3) # Weibull weib_dist <- dfr_weibull(shape = 2, scale = 3) H_analytical <- cum_haz(weib_dist) h <- hazard(weib_dist) H_numerical <- function(t) { integrate(h, 0, t)$value } expect_equal(H_analytical(5), H_numerical(5), tolerance = 1e-3) # Gompertz gomp_dist <- dfr_gompertz(a = 0.01, b = 0.1) H_analytical <- cum_haz(gomp_dist) h <- hazard(gomp_dist) H_numerical <- function(t) { integrate(h, 0, t)$value } expect_equal(H_analytical(5), H_numerical(5), tolerance = 1e-3) # Log-logistic ll_dist <- dfr_loglogistic(alpha = 10, beta = 2) H_analytical <- cum_haz(ll_dist) h <- hazard(ll_dist) H_numerical <- function(t) { integrate(h, 0, t)$value } expect_equal(H_analytical(5), H_numerical(5), tolerance = 1e-3) }) test_that("analytical score matches numerical gradient", { skip_if_not_installed("numDeriv") # Test that analytical score_fn matches numerical gradient df <- data.frame(t = c(1, 2, 3, 4, 5), delta = c(1, 1, 0, 1, 0)) # Exponential exp_dist <- dfr_exponential() ll_exp <- loglik(exp_dist) s_exp <- score(exp_dist) par_exp <- c(0.3) grad_numerical <- numDeriv::grad(function(p) ll_exp(df, p), par_exp) grad_analytical <- s_exp(df, par_exp) expect_equal(grad_analytical, grad_numerical, tolerance = 1e-4) # Weibull weib_dist <- dfr_weibull() ll_weib <- loglik(weib_dist) s_weib <- score(weib_dist) par_weib <- c(1.5, 3) grad_numerical <- numDeriv::grad(function(p) ll_weib(df, p), par_weib) grad_analytical <- s_weib(df, par_weib) expect_equal(grad_analytical, grad_numerical, tolerance = 1e-4) # Gompertz gomp_dist <- dfr_gompertz() ll_gomp <- loglik(gomp_dist) s_gomp <- score(gomp_dist) par_gomp <- c(0.1, 0.2) grad_numerical <- numDeriv::grad(function(p) ll_gomp(df, p), par_gomp) grad_analytical <- s_gomp(df, par_gomp) expect_equal(grad_analytical, grad_numerical, tolerance = 1e-4) # Log-logistic ll_dist <- dfr_loglogistic() ll_ll <- loglik(ll_dist) s_ll <- score(ll_dist) par_ll <- c(3, 2) grad_numerical <- numDeriv::grad(function(p) ll_ll(df, p), par_ll) grad_analytical <- s_ll(df, par_ll) expect_equal(grad_analytical, grad_numerical, tolerance = 1e-4) })