#plot_means_001 test_that("plot_means runs with formula + data and returns desc_var object", { df <- data.frame(y = rnorm(100), group = rep(c("A", "B"), 50)) result <- plot_means(y ~ group, data = df, save.as = NULL) expect_true(is.list(result)) expect_true(is.data.frame(result$means)) }) #plot_means_002 test_that("plot_means works with multiple grouping variables (y ~ x1 + x2)", { df <- data.frame( y = rnorm(200), x1 = rep(c("A", "B"), 100), x2 = rep(c("X", "Y"), each = 100) ) result <- plot_means(y ~ x1 + x2, data = df, save.as = NULL) expect_true(is.list(result)) expect_true(is.data.frame(result$means)) }) #plot_means_003 test_that("plot_means order reorders groups for single grouping variable", { df <- data.frame(y = rnorm(100), group = rep(c("A", "B"), 50)) result_default <- plot_means(y ~ group, data = df, save.as = NULL) expect_true(is.list(result_default)) result_reversed <- plot_means(y ~ group, data = df, order = -1, save.as = NULL) expect_true(is.list(result_reversed)) result_custom <- plot_means(y ~ group, data = df, order = c("B", "A"), save.as = NULL) expect_true(is.list(result_custom)) }) #plot_means_004 test_that("plot_means works with three grouping variables and missing combinations", { set.seed(1) df <- data.frame( y = rnorm(96), x1 = rep(c("A", "B"), 48), x2 = rep(rep(c("X", "Y"), each = 24), 2), x3 = rep(c("M", "N"), each = 48) ) # Remove one x1 level from one (x2,x3) block to create a missing combination df <- df[!(df$x1 == "B" & df$x2 == "Y" & df$x3 == "N"), , drop = FALSE] result <- plot_means(y ~ x1 + x2 + x3, data = df, save.as = NULL) expect_true(is.list(result)) }) #plot_means_005 test_that("plot_means tests=auto works for scenario 1 (binary x1 only)", { df <- data.frame(y = rnorm(60), x1 = rep(c("A", "B"), each = 30)) expect_error(plot_means(y ~ x1, data = df, tests = "auto", save.as = NULL), NA) }) #plot_means_006 test_that("plot_means tests=auto works for scenario 2 (binary x1 and x2)", { df <- data.frame( y = rnorm(120), x1 = rep(c("A", "B"), 60), x2 = rep(rep(c("X", "Y"), each = 30), 2) ) expect_error(plot_means(y ~ x1 + x2, data = df, tests = "auto", save.as = NULL), NA) }) #plot_means_007 test_that("plot_means tests=auto works for scenario 3 (binary x1, x2 has >2 levels)", { df <- expand.grid( x1 = c("A", "B"), x2 = c("X", "Y", "Z"), rep = seq_len(30), stringsAsFactors = FALSE ) df$y <- rnorm(nrow(df)) expect_error(plot_means(y ~ x1 + x2, data = df, tests = "auto", save.as = NULL), NA) }) #plot_means_008 test_that("plot_means save.as saves png", { df <- data.frame(y = rnorm(80), x1 = rep(c("A", "B"), each = 40)) out <- tempfile(fileext = ".png") on.exit(unlink(out), add = TRUE) grDevices::png(tempfile(fileext = ".png"), width = 800, height = 500, res = 100) on.exit(grDevices::dev.off(), add = TRUE) result <- plot_means(y ~ x1, data = df, save.as = out) expect_true(is.list(result)) expect_true(file.exists(out)) expect_gt(file.info(out)$size, 0) }) #plot_means_009 test_that("plot_means means + per-cell CIs match t.test() when cluster=NULL", { set.seed(123) df <- data.frame( y = rnorm(120), x1 = rep(c("A", "B", "C"), each = 40), stringsAsFactors = FALSE ) ci_level <- 90 pm <- plot_means(y ~ x1, data = df, tests = "none", ci.level = ci_level, save.as = NULL, quiet = TRUE) expect_true(is.data.frame(pm$means)) # Means/sd/n should match base computations m <- tapply(df$y, df$x1, mean) s <- tapply(df$y, df$x1, sd) n <- tapply(df$y, df$x1, function(z) sum(is.finite(z))) idx <- match(pm$means$x1, names(m)) expect_false(anyNA(idx)) # desc_var()/plot_means may round display columns; allow small tolerance. expect_true(max(abs(unname(pm$means$mean) - as.numeric(m[idx]))) < 1e-3) expect_true(max(abs(unname(pm$means$sd) - as.numeric(s[idx]))) < 1e-3) expect_true(max(abs(unname(pm$means$n.total) - as.numeric(n[idx]))) < 1e-12) # CI should match one-sample t.test(y ~ 1) within each cell for (g in pm$means$x1) { yy <- df$y[df$x1 == g] tt <- stats::t.test(yy, conf.level = ci_level / 100) pm_row <- pm$means[pm$means$x1 == g, , drop = FALSE] expect_equal(pm_row$ciL, as.numeric(tt$conf.int[1]), tolerance = 1e-10) expect_equal(pm_row$ciH, as.numeric(tt$conf.int[2]), tolerance = 1e-10) } }) #plot_means_010 test_that("plot_means tests=auto (scenario 1) matches Welch t.test()", { set.seed(1) df <- data.frame(y = rnorm(80), x1 = rep(c("A", "B"), each = 40)) pm <- plot_means(y ~ x1, data = df, tests = "auto", ci.level = 95, save.as = NULL, quiet = TRUE) expect_true(is.data.frame(pm$means)) expect_true(is.data.frame(pm$tests)) expect_gt(nrow(pm$tests), 0) # First (and only) comparison should correspond to Welch two-sample t-test tt <- stats::t.test(y ~ x1, data = df, var.equal = FALSE) row <- pm$tests[1, , drop = FALSE] expect_true(is.finite(row$t.value)) expect_true(is.finite(row$p.value)) expect_equal(as.numeric(row$p.value), as.numeric(tt$p.value), tolerance = 1e-10) expect_equal(as.numeric(row$df), as.numeric(tt$parameter[[1]]), tolerance = 1e-10) # Our diff uses mean2 - mean1, and our t.value is aligned to that sign. expect_equal(as.numeric(row$diff), as.numeric(row$mean2 - row$mean1), tolerance = 1e-12) }) #plot_means_011 test_that("plot_means 2x2 interaction p-value matches estimatr::lm_robust(HC3)", { skip_if_not_installed("estimatr") set.seed(2) df <- data.frame( y = rnorm(400), x1 = rep(c("A", "B"), 200), x2 = rep(rep(c("X", "Y"), each = 100), 2), stringsAsFactors = FALSE ) pm <- plot_means(y ~ x1 + x2, data = df, tests = "auto", ci.level = 95, save.as = NULL, quiet = TRUE) expect_true(is.data.frame(pm$tests)) int_row <- pm$tests[grepl("^interaction\\(", as.character(pm$tests$group1)), , drop = FALSE] expect_equal(nrow(int_row), 1) fit <- estimatr::lm_robust(y ~ x1 * x2, data = df, se_type = "HC3") sm <- summary(fit) coef_tab <- sm$coefficients int_term <- rownames(coef_tab)[grepl(":", rownames(coef_tab))] expect_equal(length(int_term), 1) expect_equal(as.numeric(int_row$p.value), as.numeric(coef_tab[int_term, "Pr(>|t|)"]), tolerance = 1e-10) expect_equal(as.numeric(int_row$t.value), as.numeric(coef_tab[int_term, "t value"]), tolerance = 1e-10) expect_equal(as.numeric(int_row$df), as.numeric(sm$df.residual), tolerance = 1e-10) }) #plot_means_012 test_that("plot_means clustered CI uses regression-based intervals governed by ci.level", { skip_if_not_installed("estimatr") set.seed(3) n_cl <- 40 n_per <- 20 cl <- rep(seq_len(n_cl), each = n_per) df <- data.frame( y = rnorm(n_cl * n_per), x1 = rep(c("A", "B"), length.out = n_cl * n_per), cl = cl, stringsAsFactors = FALSE ) ci_level <- 90 pm <- plot_means(y ~ x1, data = df, cluster = df$cl, tests = "none", ci.level = ci_level, save.as = NULL, quiet = TRUE) # Reconstruct the same cell_key as plot_means_compute() uses (x1|All|All) df2 <- df df2$cell_key <- paste(as.character(df2$x1), "All", "All", sep = "|") df2$cell_factor <- factor(df2$cell_key) fit <- estimatr::lm_robust(y ~ 0 + cell_factor, data = df2, clusters = df2$cl, se_type = "CR2") beta <- stats::coef(fit) V <- stats::vcov(fit) df_fit <- fit$df if (length(df_fit) != 1) df_fit <- suppressWarnings(min(df_fit, na.rm = TRUE)) if (!is.finite(df_fit) || df_fit <= 0) df_fit <- 1 alpha <- 1 - ci_level / 100 tcrit <- stats::qt(1 - alpha / 2, df = df_fit) lev <- levels(df2$cell_factor) newdata <- data.frame(cell_factor = factor(lev, levels = lev)) X <- stats::model.matrix(~ 0 + cell_factor, data = newdata) b_idx <- match(colnames(X), names(beta)) X <- X[, !is.na(b_idx), drop = FALSE] b_idx <- b_idx[!is.na(b_idx)] beta_use <- beta[b_idx] V_use <- V[b_idx, b_idx, drop = FALSE] fit_hat <- as.numeric(X %*% beta_use) se_hat <- sqrt(pmax(0, diag(X %*% V_use %*% t(X)))) lwr <- fit_hat - tcrit * se_hat upr <- fit_hat + tcrit * se_hat ci_df <- data.frame(cell_key = lev, lwr = lwr, upr = upr, stringsAsFactors = FALSE) for (i in seq_len(nrow(pm$means))) { key <- paste(as.character(pm$means$x1[i]), "All", "All", sep = "|") j <- match(key, ci_df$cell_key) expect_false(is.na(j)) expect_equal(pm$means$ciL[i], ci_df$lwr[j], tolerance = 1e-3) expect_equal(pm$means$ciH[i], ci_df$upr[j], tolerance = 1e-3) } })