# Test NMF convergence behavior to prevent regression # These tests ensure the warm-start fix is working correctly library(testthat) library(RcppML) library(Matrix) # Helper: compute MSE between original and reconstructed matrix compute_mse <- function(A, model) { W <- as.matrix(model@w) H <- as.matrix(model@h) D <- as.numeric(model@d) A_hat <- W %*% diag(D) %*% H mean((as.matrix(A) - A_hat)^2) } # ============================================================================ # CRITICAL: These tests catch the warm-start initialization bug where MSE # was INCREASING with more iterations instead of decreasing # ============================================================================ test_that("NMF converges (MSE decreases with iterations) on sparse matrices", { skip_on_cran() set.seed(42) # Create sparse test matrix A <- rsparsematrix(200, 100, density = 0.3) A@x <- abs(A@x) # Run with increasing iterations mse_1 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 1, verbose = FALSE)) mse_5 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 5, verbose = FALSE)) mse_20 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 20, verbose = FALSE)) mse_50 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 50, verbose = FALSE)) # MSE should decrease monotonically (allowing small tolerance for numerical noise) expect_lt(mse_5, mse_1 * 1.01, label = sprintf("MSE should decrease: iter5=%.6f should be < iter1=%.6f", mse_5, mse_1)) expect_lt(mse_20, mse_5 * 1.01, label = sprintf("MSE should decrease: iter20=%.6f should be < iter5=%.6f", mse_20, mse_5)) expect_lt(mse_50, mse_20 * 1.01, label = sprintf("MSE should decrease: iter50=%.6f should be < iter20=%.6f", mse_50, mse_20)) # Final MSE should be lower than initial (at least some improvement) expect_lt(mse_50, mse_1, label = "50 iterations should achieve some MSE reduction vs 1 iteration") }) test_that("NMF converges (MSE decreases with iterations) on dense matrices", { skip_on_cran() set.seed(42) # Create dense test matrix A <- abs(matrix(rnorm(200 * 100), 200, 100)) # Run with increasing iterations mse_1 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 1, verbose = FALSE)) mse_5 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 5, verbose = FALSE)) mse_20 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 20, verbose = FALSE)) mse_50 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 50, verbose = FALSE)) # MSE should decrease monotonically expect_lt(mse_5, mse_1 * 1.01) expect_lt(mse_20, mse_5 * 1.01) expect_lt(mse_50, mse_20 * 1.01) # Final MSE should be lower than initial expect_lt(mse_50, mse_1) }) test_that("NMF converges on real AML dataset", { skip_on_cran() data(aml) set.seed(123) mse_1 <- compute_mse(aml, nmf(aml, k = 5, tol = 0, maxit = 1, verbose = FALSE)) mse_10 <- compute_mse(aml, nmf(aml, k = 5, tol = 0, maxit = 10, verbose = FALSE)) mse_50 <- compute_mse(aml, nmf(aml, k = 5, tol = 0, maxit = 50, verbose = FALSE)) # MSE must decrease (allow small tolerance for stochastic variance) expect_lt(mse_10, mse_1, label = sprintf("AML: MSE should decrease: iter10=%.6f < iter1=%.6f", mse_10, mse_1)) # mse_50 should be less than or approximately equal to mse_10 (convergence may stall) expect_lt(mse_50, mse_10 * 1.05, label = sprintf("AML: MSE should decrease or stabilize: iter50=%.6f vs iter10=%.6f", mse_50, mse_10)) }) test_that("NMF converges consistently across random seeds", { skip_on_cran() set.seed(42) A <- rsparsematrix(150, 80, density = 0.4) A@x <- abs(A@x) # Test multiple seeds - all should converge n_seeds <- 5 for (seed in 1:n_seeds) { set.seed(seed * 100) mse_early <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 5, verbose = FALSE)) mse_late <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 30, verbose = FALSE)) expect_lt(mse_late, mse_early, label = sprintf("Seed %d: MSE should decrease (early=%.6f, late=%.6f)", seed, mse_early, mse_late)) } }) test_that("NMF reconstruction values stay bounded (no divergence)", { skip_on_cran() set.seed(42) A <- rsparsematrix(200, 100, density = 0.3) A@x <- abs(A@x) A_range <- range(A@x) # Run NMF model <- nmf(A, k = 5, tol = 0, maxit = 50, verbose = FALSE) # Reconstruct W <- as.matrix(model@w) H <- as.matrix(model@h) D <- as.numeric(model@d) A_hat <- W %*% diag(D) %*% H # Reconstruction should have similar scale to original (not exploding) hat_range <- range(A_hat) # Max reconstruction should not be more than 10x the max original value # (This catches the divergence bug where values were growing unboundedly) expect_lt(max(abs(A_hat)), max(abs(A)) * 10, label = sprintf("Reconstruction max=%.2f should not be >> original max=%.2f", max(abs(A_hat)), max(abs(A)))) # Mean reconstruction should be similar to original mean expect_lt(abs(mean(A_hat) - mean(A)) / mean(A), 0.5, label = "Reconstruction mean should be within 50% of original mean") }) test_that("NMF achieves reasonable MSE on known factorizable matrix", { skip_on_cran() set.seed(42) # Create a matrix with known low-rank structure k_true <- 5 W_true <- abs(matrix(rnorm(100 * k_true), 100, k_true)) H_true <- abs(matrix(rnorm(k_true * 50), k_true, 50)) A <- W_true %*% H_true # Reset seed for NMF initialization (separate from data generation) set.seed(123) # NMF should be able to reconstruct this well model <- nmf(A, k = k_true, tol = 0, maxit = 100, verbose = FALSE) mse <- compute_mse(A, model) # MSE should be reasonably low for a truly low-rank matrix # Threshold of 0.1 accounts for non-convexity and local minima expect_lt(mse, 0.1, label = sprintf("MSE=%.6f should be low for known rank-%d matrix", mse, k_true)) }) test_that("NMF with higher rank achieves lower or equal MSE", { skip_on_cran() set.seed(42) A <- rsparsematrix(150, 80, density = 0.4) A@x <- abs(A@x) # Higher rank should give same or better fit mse_k3 <- compute_mse(A, nmf(A, k = 3, tol = 0, maxit = 50, verbose = FALSE)) mse_k5 <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 50, verbose = FALSE)) mse_k10 <- compute_mse(A, nmf(A, k = 10, tol = 0, maxit = 50, verbose = FALSE)) # More factors should generally give better or equal fit # Allow small tolerance for optimization variance expect_lte(mse_k5, mse_k3 * 1.05) expect_lte(mse_k10, mse_k5 * 1.05) }) test_that("NMF handles different L1/L2 regularization without diverging", { skip_on_cran() set.seed(42) A <- rsparsematrix(100, 60, density = 0.4) A@x <- abs(A@x) # Test various regularization settings regularizations <- list( list(L1 = c(0, 0), L2 = c(0, 0)), list(L1 = c(0.01, 0.01), L2 = c(0, 0)), list(L1 = c(0, 0), L2 = c(0.01, 0.01)), list(L1 = c(0.01, 0), L2 = c(0, 0.01)) ) for (reg in regularizations) { model <- nmf(A, k = 5, tol = 0, maxit = 30, verbose = FALSE, L1 = reg$L1, L2 = reg$L2) mse <- compute_mse(A, model) # MSE should be finite and reasonable (not diverged) expect_true(is.finite(mse), label = sprintf("MSE should be finite with L1=%s, L2=%s", paste(reg$L1, collapse=","), paste(reg$L2, collapse=","))) expect_lt(mse, 10, label = sprintf("MSE=%.4f should be reasonable with regularization", mse)) } }) # ============================================================================ # Edge case tests # ============================================================================ test_that("NMF handles tall matrices (m >> n)", { skip_on_cran() set.seed(42) A <- rsparsematrix(500, 50, density = 0.3) A@x <- abs(A@x) mse_early <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 5, verbose = FALSE)) mse_late <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 30, verbose = FALSE)) expect_lt(mse_late, mse_early, label = "Tall matrix should converge") }) test_that("NMF handles wide matrices (n >> m)", { skip_on_cran() set.seed(42) A <- rsparsematrix(50, 500, density = 0.3) A@x <- abs(A@x) mse_early <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 5, verbose = FALSE)) mse_late <- compute_mse(A, nmf(A, k = 5, tol = 0, maxit = 30, verbose = FALSE)) expect_lt(mse_late, mse_early, label = "Wide matrix should converge") }) test_that("NMF handles very sparse matrices", { skip_on_cran() set.seed(42) A <- rsparsematrix(200, 100, density = 0.05) A@x <- abs(A@x) mse_early <- compute_mse(A, nmf(A, k = 3, tol = 0, maxit = 5, verbose = FALSE)) mse_late <- compute_mse(A, nmf(A, k = 3, tol = 0, maxit = 30, verbose = FALSE)) expect_lt(mse_late, mse_early, label = "Very sparse matrix should converge") }) # ============================================================================ # Factor convergence mode tests # ============================================================================ test_that("convergence='factor' mode converges on sparse", { skip_on_cran() set.seed(42) A <- rsparsematrix(50, 30, density = 0.3) A@x <- abs(A@x) result <- nmf(A, k = 3, seed = 1, maxit = 200, tol = 1e-4, convergence = "factor", verbose = FALSE) expect_s4_class(result, "nmf") # Factor convergence should terminate before maxit with reasonable tol expect_lt(result@misc$iter, 200) }) test_that("convergence='factor' mode converges on dense", { skip_on_cran() set.seed(42) A <- abs(matrix(rnorm(50 * 30), 50, 30)) result <- nmf(A, k = 3, seed = 1, maxit = 200, tol = 1e-4, convergence = "factor", verbose = FALSE) expect_s4_class(result, "nmf") expect_lt(result@misc$iter, 200) }) test_that("convergence='both' mode works", { skip_on_cran() set.seed(42) A <- rsparsematrix(50, 30, density = 0.3) A@x <- abs(A@x) result <- nmf(A, k = 3, seed = 1, maxit = 200, tol = 1e-4, convergence = "both", verbose = FALSE) expect_s4_class(result, "nmf") # Should converge, though potentially takes more iterations than single-mode expect_true(result@misc$iter <= 200) }) test_that("convergence='loss' gives same result as default", { skip_on_cran() set.seed(42) A <- rsparsematrix(50, 30, density = 0.3) A@x <- abs(A@x) r1 <- nmf(A, k = 3, seed = 1, maxit = 100, tol = 1e-4, verbose = FALSE) r2 <- nmf(A, k = 3, seed = 1, maxit = 100, tol = 1e-4, convergence = "loss", verbose = FALSE) # Same seed, same convergence mode → same result expect_equal(r1@misc$iter, r2@misc$iter) expect_equal(r1@w, r2@w, tolerance = 1e-10) }) test_that("convergence modes all produce valid factorizations", { skip_on_cran() set.seed(42) A <- rsparsematrix(40, 25, density = 0.3) A@x <- abs(A@x) for (mode in c("loss", "factor", "both")) { result <- nmf(A, k = 3, seed = 1, maxit = 100, tol = 1e-4, convergence = mode, verbose = FALSE) # All factors should be non-negative expect_true(all(result@w >= 0), info = paste("mode:", mode)) expect_true(all(result@h >= 0), info = paste("mode:", mode)) expect_true(all(result@d >= 0), info = paste("mode:", mode)) # Reconstruction should be reasonable mse <- compute_mse(A, result) expect_true(is.finite(mse), info = paste("mode:", mode)) } })