test_that("kmed initialization converges on the anisotropic same-mean regression case", { set.seed(111) gauss_dat <- rbind( simulate_gaussian(250, c(0, 0), diag(2)), simulate_gaussian(250, c(0, 0), matrix(c(20, 0, 0, 0.01), ncol = 2)) ) fit_kmed <- gmix(gauss_dat, K = 2, init = "kmed", save_taus = TRUE) fit_pam <- gmix(gauss_dat, K = 2, init = "pam") expect_s3_class(fit_kmed, "mbcfit") expect_equal(fit_kmed$info$code, 1) expect_true(all(is.finite(fit_kmed$posterior))) expect_equal(dim(fit_kmed$posterior), c(500L, 2L)) expect_equal(rowSums(fit_kmed$posterior), rep(1, 500), tolerance = 1e-8) expect_gt(best_label_accuracy(fit_kmed$cluster, fit_pam$cluster), 0.95) }) test_that("gmix recovers a well-separated three-cluster solution and predicts consistently", { set.seed(20260309) dat <- rbind( simulate_gaussian(90, c(-6, 0), matrix(c(1.0, 0.2, 0.2, 0.7), ncol = 2)), simulate_gaussian(90, c(0, 6), matrix(c(0.8, -0.1, -0.1, 1.2), ncol = 2)), simulate_gaussian(90, c(6, -2), matrix(c(1.1, 0.3, 0.3, 0.9), ncol = 2)) ) truth <- rep(1:3, each = 90) fit <- gmix(dat, K = 3, init = "kmed", erc = 20) pred <- predict(fit, dat) expect_equal(fit$info$code, 1) expect_equal(sum(fit$size), nrow(dat)) expect_gt(best_label_accuracy(truth, fit$cluster), 0.97) expect_gt(best_label_accuracy(truth, pred), 0.97) }) test_that("plot works for the previous kmed failure case", { set.seed(111) gauss_dat <- rbind( simulate_gaussian(250, c(0, 0), diag(2)), simulate_gaussian(250, c(0, 0), matrix(c(20, 0, 0, 0.01), ncol = 2)) ) fit <- gmix(gauss_dat, K = 2, init = "kmed") path <- tempfile(fileext = ".pdf") grDevices::pdf(path) on.exit({ grDevices::dev.off() unlink(path) }, add = TRUE) expect_no_error( plot(fit, data = gauss_dat, what = c("clustering", "contour", "boundary")) ) }) test_that("gmix handles imbalanced clusters with a soft custom initialization", { set.seed(20260310) cov_mid <- matrix( c( 1.0, 1.8, 0.0, 1.8, 10.0, 0.0, 0.0, 0.0, 0.02 ), ncol = 3 ) train <- rbind( simulate_gaussian(120, c(-5, 0, 0.1), diag(c(1.2, 18, 0.03))), simulate_gaussian(70, c(2, 35, -0.2), cov_mid), simulate_gaussian(10, c(8, -28, 0.35), diag(c(0.15, 2.0, 0.01))) ) truth <- rep(1:3, c(120, 70, 10)) init <- matrix(0.02, nrow(train), 3) init[cbind(seq_len(nrow(train)), truth)] <- 0.96 init[1:8, c(1, 2)] <- init[1:8, c(2, 1)] init <- init / rowSums(init) fit <- gmix(train, K = 3, erc = 25, init = init, save_taus = TRUE, iter.max = 300) score <- qscore(train, fit$params, type = "both") held_out <- rbind( simulate_gaussian(5, c(-5, 0, 0.1), diag(c(1.2, 18, 0.03))), simulate_gaussian(5, c(2, 35, -0.2), cov_mid), simulate_gaussian(5, c(8, -28, 0.35), diag(c(0.15, 2.0, 0.01))) ) held_truth <- rep(1:3, each = 5) pred <- predict(fit, held_out) expect_equal(fit$info$code, 1) expect_equal(fit$iter, 4L) expect_equal(unname(fit$size), c(120, 70, 10), tolerance = 0) expect_true(all(is.finite(fit$posterior))) expect_equal(rowSums(fit$posterior), rep(1, nrow(train)), tolerance = 1e-8) expect_true(all(is.finite(unlist(fit$params)))) expect_true(all(is.finite(score))) expect_equal(best_label_accuracy(truth, fit$cluster), 1) expect_equal(best_label_accuracy(held_truth, pred), 1) }) test_that("gmix validates and coerces custom function initializations before entering C", { set.seed(20260311) dat <- rbind( simulate_gaussian(20, c(0, 0), diag(2)), simulate_gaussian(20, c(4, 4), diag(2)) ) init_ok <- function(data, K) { z <- matrix(0L, nrow(data), K) half <- nrow(data) / 2 z[seq_len(half), 1] <- 1L z[seq.int(half + 1, nrow(data)), 2] <- 1L z } init_bad <- function(data, K) { matrix(1L, nrow(data) - 1, K) } init_zero_row <- init_ok(dat, 2) init_zero_row[1, ] <- 0 init_df <- as.data.frame(init_ok(dat, 2)) init_ok_df <- function(data, K) { as.data.frame(init_ok(data, K)) } init_inf <- init_ok(dat, 2) init_inf[1, 1] <- Inf init_neg <- init_ok(dat, 2) init_neg[1, 1] <- -1 init_zero_cluster <- init_ok(dat, 2) init_zero_cluster[, 2] <- 0 fit <- gmix(dat, K = 2, init = init_ok, erc = 20) fit_zero_row <- gmix(dat, K = 2, init = init_zero_row, erc = 20) fit_df <- gmix(dat, K = 2, init = init_df, erc = 20) fit_fun_df <- gmix(dat, K = 2, init = init_ok_df, erc = 20) expect_s3_class(fit, "mbcfit") expect_equal(fit$info$code, 1) expect_equal(sum(fit$size), nrow(dat)) expect_equal(fit_zero_row$info$code, 1) expect_equal(sum(fit_zero_row$size), nrow(dat)) expect_equal(fit_df$info$code, 1) expect_equal(sum(fit_df$size), nrow(dat)) expect_equal(fit_fun_df$info$code, 1) expect_equal(sum(fit_fun_df$size), nrow(dat)) expect_error( gmix(dat, K = 2, init = init_bad, erc = 20), "incoherent number of instances" ) expect_error( gmix(dat, K = 2, init = init_inf, erc = 20), "finite values" ) expect_error( gmix(dat, K = 2, init = init_neg, erc = 20), "non-negative weights" ) expect_error( gmix(dat, K = 2, init = init_zero_cluster, erc = 20), "positive total weight" ) expect_error( gmix(dat, K = 2, init = "foo", erc = 20), "Unsupported character value for 'init'" ) expect_error( gmix(dat, init = "kmeans", erc = 20), "'K' must be given if 'init' is 'kmeans'" ) expect_error( gmix(dat, init = "pam", erc = 20), "'K' must be given if 'init' is 'pam'" ) }) test_that("mset_gmix validates init specifications early", { expect_error( mset_gmix(init = "foo"), "character values must be one of" ) expect_error( mset_gmix(init = list("kmed", 5)), "character values must be one of" ) expect_error( mset_gmix(init = list("foo", "kmed")), "character values must be one of" ) }) test_that("mset_gmix preserves mixed init lists of supported strings, functions and weights", { dat <- iris[1:20, 1:4] init_fun <- function(data, K) { z <- matrix(0, nrow(data), K) z[1:10, 1] <- 1 z[11:20, 2] <- 1 z } init_df <- as.data.frame(init_fun(dat, 2)) mset <- mset_gmix(K = 2, init = list("kmed", init_fun, init_df)) fits <- lapply(mset, function(m) m$fn(dat)) expect_length(mset, 9L) expect_true(all(vapply(fits, inherits, logical(1), what = "mbcfit"))) expect_true(all(vapply(fits, function(x) x$info$code == 1, logical(1)))) })