# test-logL_cat.R - Tests for categorical AD log-likelihood test_that("logL_cat validates inputs correctly", { set.seed(123) y <- matrix(sample(1:3, 50, replace = TRUE), nrow = 10, ncol = 5) # Order must be non-negative expect_error(logL_cat(y, order = -1, marginal = list(), transition = list()), "non-negative") # Order must be less than n_time expect_error(logL_cat(y, order = 5, marginal = list(), transition = list()), "less than") }) test_that("logL_cat missing-data modes behave correctly", { set.seed(124) y <- simulate_cat(120, 5, order = 1, n_categories = 2) fit <- fit_cat(y, order = 1) y_miss <- y y_miss[1, 3] <- NA y_miss[2, 5] <- NA expect_error( logL_cat(y_miss, order = 1, marginal = fit$marginal, transition = fit$transition, na_action = "fail"), "Missing data" ) keep <- stats::complete.cases(y_miss) ll_complete_mode <- logL_cat( y_miss, order = 1, marginal = fit$marginal, transition = fit$transition, na_action = "complete" ) ll_complete_ref <- logL_cat( y_miss[keep, , drop = FALSE], order = 1, marginal = fit$marginal, transition = fit$transition ) expect_equal(as.numeric(ll_complete_mode), as.numeric(ll_complete_ref), tolerance = 1e-10) ll_no_missing <- logL_cat(y, order = 1, marginal = fit$marginal, transition = fit$transition) ll_no_missing_marg <- logL_cat( y, order = 1, marginal = fit$marginal, transition = fit$transition, na_action = "marginalize" ) expect_equal(as.numeric(ll_no_missing_marg), as.numeric(ll_no_missing), tolerance = 1e-10) }) test_that("logL_cat marginalize is finite for MAR-style missingness", { set.seed(125) y <- simulate_cat(160, 6, order = 1, n_categories = 2) # Monotone missingness (dropout) y_monotone <- y dropout_time <- sample(2:6, nrow(y_monotone), replace = TRUE) dropout_flag <- runif(nrow(y_monotone)) < 0.35 for (i in seq_len(nrow(y_monotone))) { if (dropout_flag[i]) { y_monotone[i, dropout_time[i]:ncol(y_monotone)] <- NA } } fit_monotone <- fit_cat(y_monotone, order = 1, na_action = "marginalize") ll_monotone <- logL_cat( y_monotone, order = 1, marginal = fit_monotone$marginal, transition = fit_monotone$transition, na_action = "marginalize" ) expect_true(is.finite(fit_monotone$log_l)) expect_true(is.finite(ll_monotone)) # Intermittent missingness y_intermittent <- y mask <- matrix(runif(length(y_intermittent)) < 0.15, nrow = nrow(y_intermittent)) y_intermittent[mask] <- NA fit_intermittent <- fit_cat(y_intermittent, order = 1, na_action = "marginalize") ll_intermittent <- logL_cat( y_intermittent, order = 1, marginal = fit_intermittent$marginal, transition = fit_intermittent$transition, na_action = "marginalize" ) expect_true(is.finite(fit_intermittent$log_l)) expect_true(is.finite(ll_intermittent)) }) test_that("logL_cat computes correct log-likelihood for order 0", { set.seed(234) n_subjects <- 50 n_time <- 4 c <- 2 # Create uniform marginals marginal <- lapply(1:n_time, function(k) c(0.5, 0.5)) names(marginal) <- 1:n_time # Simulate uniform data y <- matrix(sample(1:c, n_subjects * n_time, replace = TRUE), nrow = n_subjects, ncol = n_time) # Compute log-likelihood ll <- logL_cat(y, order = 0, marginal = marginal, n_categories = c) # Manual computation: each observation has prob 0.5 ll_manual <- n_subjects * n_time * log(0.5) expect_equal(as.numeric(ll), ll_manual) }) test_that("logL_cat computes correct log-likelihood for order 1", { set.seed(345) n_subjects <- 100 n_time <- 3 c <- 2 # Known marginal marginal <- list(t1 = c(0.6, 0.4)) # Known transitions transition <- list( t2 = matrix(c(0.8, 0.2, 0.3, 0.7), nrow = 2, byrow = TRUE), t3 = matrix(c(0.8, 0.2, 0.3, 0.7), nrow = 2, byrow = TRUE) ) # Simulate data from this model y <- simulate_cat(n_subjects, n_time, order = 1, n_categories = c, marginal = marginal, transition = transition) # Compute log-likelihood at true parameters ll_true <- logL_cat(y, order = 1, marginal = marginal, transition = transition, n_categories = c) # Compute at wrong parameters (uniform) marginal_wrong <- list(t1 = c(0.5, 0.5)) transition_wrong <- list( t2 = matrix(c(0.5, 0.5, 0.5, 0.5), nrow = 2, byrow = TRUE), t3 = matrix(c(0.5, 0.5, 0.5, 0.5), nrow = 2, byrow = TRUE) ) ll_wrong <- logL_cat(y, order = 1, marginal = marginal_wrong, transition = transition_wrong, n_categories = c) # Log-likelihood should be higher at true parameters expect_true(as.numeric(ll_true) > as.numeric(ll_wrong)) }) test_that("logL_cat matches fit_cat log-likelihood", { set.seed(456) n_subjects <- 80 n_time <- 5 # Simulate data y <- simulate_cat(n_subjects, n_time, order = 1, n_categories = 2) # Fit model fit <- fit_cat(y, order = 1) # Compute log-likelihood at fitted parameters ll_check <- logL_cat(y, order = 1, marginal = fit$marginal, transition = fit$transition, n_categories = 2) expect_equal(as.numeric(ll_check), as.numeric(fit$log_l), tolerance = 1e-10) }) test_that("logL_cat works for order 2", { set.seed(567) n_subjects <- 100 n_time <- 5 c <- 2 # Simulate order 2 data y <- simulate_cat(n_subjects, n_time, order = 2, n_categories = c) # Fit order 2 model fit <- fit_cat(y, order = 2) # Check that log-likelihood matches ll_check <- logL_cat(y, order = 2, marginal = fit$marginal, transition = fit$transition, n_categories = c) expect_equal(as.numeric(ll_check), as.numeric(fit$log_l), tolerance = 1e-10) }) test_that("logL_cat handles more than 2 categories", { set.seed(678) n_subjects <- 80 n_time <- 4 c <- 3 # Simulate with 3 categories y <- simulate_cat(n_subjects, n_time, order = 1, n_categories = c) # Fit and verify fit <- fit_cat(y, order = 1, n_categories = c) ll_check <- logL_cat(y, order = 1, marginal = fit$marginal, transition = fit$transition, n_categories = c) expect_equal(as.numeric(ll_check), as.numeric(fit$log_l), tolerance = 1e-10) }) test_that("logL_cat works with heterogeneous blocks", { set.seed(789) n1 <- 40 n2 <- 40 n_time <- 4 c <- 2 # Simulate two groups with different parameters marg1 <- list(t1 = c(0.7, 0.3)) trans1 <- list( t2 = matrix(c(0.9, 0.1, 0.2, 0.8), 2, byrow = TRUE), t3 = matrix(c(0.9, 0.1, 0.2, 0.8), 2, byrow = TRUE), t4 = matrix(c(0.9, 0.1, 0.2, 0.8), 2, byrow = TRUE) ) marg2 <- list(t1 = c(0.4, 0.6)) trans2 <- list( t2 = matrix(c(0.5, 0.5, 0.5, 0.5), 2, byrow = TRUE), t3 = matrix(c(0.5, 0.5, 0.5, 0.5), 2, byrow = TRUE), t4 = matrix(c(0.5, 0.5, 0.5, 0.5), 2, byrow = TRUE) ) y1 <- simulate_cat(n1, n_time, order = 1, n_categories = c, marginal = marg1, transition = trans1) y2 <- simulate_cat(n2, n_time, order = 1, n_categories = c, marginal = marg2, transition = trans2) y <- rbind(y1, y2) blocks <- c(rep(1, n1), rep(2, n2)) # Fit heterogeneous model fit <- fit_cat(y, order = 1, blocks = blocks, homogeneous = FALSE) # Check log-likelihood ll_check <- logL_cat(y, order = 1, marginal = fit$marginal, transition = fit$transition, blocks = blocks, homogeneous = FALSE, n_categories = c) expect_equal(as.numeric(ll_check), as.numeric(fit$log_l), tolerance = 1e-10) }) test_that("logL_cat is additive across independent subjects", { set.seed(901) n_time <- 4 c <- 2 # Create two subjects y <- matrix(sample(1:c, 2 * n_time, replace = TRUE), nrow = 2, ncol = n_time) # Fit model to get valid parameters fit <- fit_cat(y, order = 1, n_categories = c) # Full log-likelihood ll_full <- logL_cat(y, order = 1, marginal = fit$marginal, transition = fit$transition, n_categories = c) # Individual log-likelihoods ll_1 <- logL_cat(y[1, , drop = FALSE], order = 1, marginal = fit$marginal, transition = fit$transition, n_categories = c) ll_2 <- logL_cat(y[2, , drop = FALSE], order = 1, marginal = fit$marginal, transition = fit$transition, n_categories = c) expect_equal(as.numeric(ll_full), as.numeric(ll_1) + as.numeric(ll_2), tolerance = 1e-10) }) test_that("logL_cat handles n_categories override", { set.seed(112) n_subjects <- 50 n_time <- 4 # Generate data with only categories 1 and 2 observed y <- matrix(sample(1:2, n_subjects * n_time, replace = TRUE), nrow = n_subjects, ncol = n_time) # Fit with 3 categories (even though category 3 not observed) fit <- fit_cat(y, order = 1, n_categories = 3) # Verify log-likelihood works with the override ll <- logL_cat(y, order = 1, marginal = fit$marginal, transition = fit$transition, n_categories = 3) expect_true(is.finite(as.numeric(ll))) expect_equal(as.numeric(ll), as.numeric(fit$log_l), tolerance = 1e-10) }) test_that("logL_cat likelihood increases with correct model", { set.seed(223) n_subjects <- 150 n_time <- 5 c <- 2 # Strong dependence structure marginal_true <- list(t1 = c(0.5, 0.5)) transition_true <- list( t2 = matrix(c(0.95, 0.05, 0.05, 0.95), 2, byrow = TRUE), t3 = matrix(c(0.95, 0.05, 0.05, 0.95), 2, byrow = TRUE), t4 = matrix(c(0.95, 0.05, 0.05, 0.95), 2, byrow = TRUE), t5 = matrix(c(0.95, 0.05, 0.05, 0.95), 2, byrow = TRUE) ) # Simulate from true model y <- simulate_cat(n_subjects, n_time, order = 1, n_categories = c, marginal = marginal_true, transition = transition_true) # Log-likelihood at true parameters ll_true <- logL_cat(y, order = 1, marginal = marginal_true, transition = transition_true, n_categories = c) # Log-likelihood at MLE (should be >= true) fit <- fit_cat(y, order = 1, n_categories = c) ll_mle <- fit$log_l # MLE should be at least as good as true parameters expect_true(as.numeric(ll_mle) >= as.numeric(ll_true) - 1e-6) # Small tolerance for numerical issues })