# test-grm_integration.R # Objective 14: Full pipeline integration test for GRM # Exercises: irt_design → irt_study → irt_simulate → summary → plot → recommended_n # ---- Helper: build a standard GRM study for integration tests ---- make_grm_integration_study <- function(n_items = 5, n_categories = 4, sample_sizes = c(200, 500), missing = "none", missing_rate = NULL) { n_thresh <- n_categories - 1L b_mat <- matrix( seq(-2, 2, length.out = n_items * n_thresh), nrow = n_items, ncol = n_thresh ) if (n_thresh > 1L) { b_mat <- t(apply(b_mat, 1, sort)) } design <- irt_design( model = "GRM", n_items = n_items, item_params = list(a = rep(1.2, n_items), b = b_mat) ) irt_study(design, sample_sizes = sample_sizes, missing = missing, missing_rate = missing_rate) } # =========================================================================== # 1. Full pipeline — clean data (no missingness) # =========================================================================== test_that("GRM full pipeline runs without error (no missing)", { study <- make_grm_integration_study() expect_no_error( res <- irt_simulate(study, iterations = 3, seed = 101) ) expect_s3_class(res, "irt_results") }) test_that("GRM item_results has correct structure", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 3, seed = 101) ir <- res$item_results expect_true(is.data.frame(ir)) expected_cols <- c("iteration", "sample_size", "item", "param", "true_value", "estimate", "se", "ci_lower", "ci_upper", "converged") expect_true(all(expected_cols %in% names(ir))) }) test_that("GRM item_results contains correct parameter names", { study <- make_grm_integration_study(n_items = 5, n_categories = 4) res <- irt_simulate(study, iterations = 3, seed = 101) params <- unique(res$item_results$param) expect_true("a" %in% params) expect_true("b1" %in% params) expect_true("b2" %in% params) expect_true("b3" %in% params) # 4 categories → 3 thresholds (b1, b2, b3) + a = 4 param types expect_equal(length(params), 4) }) test_that("GRM item_results has correct row count", { n_items <- 5 n_categories <- 4 n_thresh <- n_categories - 1 # 3 n_params_per_item <- 1 + n_thresh # a + b1,b2,b3 sample_sizes <- c(200, 500) iterations <- 3 study <- make_grm_integration_study( n_items = n_items, n_categories = n_categories, sample_sizes = sample_sizes ) res <- irt_simulate(study, iterations = iterations, seed = 101) # Expected rows (assuming all converge): # n_items * n_params_per_item * length(sample_sizes) * iterations expected_rows <- n_items * n_params_per_item * length(sample_sizes) * iterations converged <- res$item_results[res$item_results$converged, ] expect_equal(nrow(converged), expected_rows) }) test_that("GRM theta_results has correct structure and row count", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 3, seed = 101) tr <- res$theta_results expect_true(is.data.frame(tr)) expect_true(all(c("iteration", "sample_size", "theta_cor", "theta_rmse", "converged") %in% names(tr))) # 2 sample sizes × 3 iterations = 6 rows expect_equal(nrow(tr), 6) }) test_that("GRM true_value matches design parameters", { n_items <- 5 a_vals <- rep(1.2, n_items) b_mat <- matrix( seq(-2, 2, length.out = n_items * 3), nrow = n_items, ncol = 3 ) b_mat <- t(apply(b_mat, 1, sort)) design <- irt_design( model = "GRM", n_items = n_items, item_params = list(a = a_vals, b = b_mat) ) study <- irt_study(design, sample_sizes = c(300)) res <- irt_simulate(study, iterations = 2, seed = 202) ir <- res$item_results # Check a true values a_rows <- ir[ir$param == "a" & ir$iteration == 1, ] expect_equal(a_rows$true_value, a_vals) # Check b1 true values b1_rows <- ir[ir$param == "b1" & ir$iteration == 1, ] expect_equal(b1_rows$true_value, b_mat[, 1]) # Check b3 true values b3_rows <- ir[ir$param == "b3" & ir$iteration == 1, ] expect_equal(b3_rows$true_value, b_mat[, 3]) }) test_that("GRM seed reproducibility", { study <- make_grm_integration_study(sample_sizes = c(300)) res1 <- irt_simulate(study, iterations = 3, seed = 42) res2 <- irt_simulate(study, iterations = 3, seed = 42) expect_identical(res1$item_results, res2$item_results) expect_identical(res1$theta_results, res2$theta_results) }) # =========================================================================== # 2. Summary method # =========================================================================== test_that("summary() on GRM results returns summary_irt_results", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res) expect_s3_class(s, "summary_irt_results") expect_true("item_summary" %in% names(s)) expect_true("theta_summary" %in% names(s)) }) test_that("summary() item_summary has rows for all GRM params", { study <- make_grm_integration_study(n_items = 5, n_categories = 4, sample_sizes = c(200, 500)) res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res) is_df <- s$item_summary # 5 items × 4 params (a, b1, b2, b3) × 2 sample sizes = 40 rows expect_equal(nrow(is_df), 40) expect_true(all(c("a", "b1", "b2", "b3") %in% unique(is_df$param))) }) test_that("summary() with param filter works for GRM thresholds", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res, param = c("b1", "b3")) expect_true(all(s$item_summary$param %in% c("b1", "b3"))) }) test_that("summary() criterion values are finite for converged GRM sims", { study <- make_grm_integration_study(sample_sizes = c(500)) res <- irt_simulate(study, iterations = 10, seed = 404) s <- summary(res) is_df <- s$item_summary # All criterion columns should be numeric for (col in c("bias", "empirical_se", "mse", "rmse")) { if (col %in% names(is_df)) { vals <- is_df[[col]] # Should have finite values (some may be NA if SE was NA) expect_true(any(is.finite(vals)), info = paste("No finite values in", col)) } } }) test_that("summary() theta_summary has correct structure for GRM", { study <- make_grm_integration_study(sample_sizes = c(200, 500)) res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res) ts_df <- s$theta_summary expect_true(is.data.frame(ts_df)) expect_true("sample_size" %in% names(ts_df)) expect_equal(nrow(ts_df), 2) # 2 sample sizes }) # =========================================================================== # 3. Plot method # =========================================================================== test_that("plot() on GRM results returns a ggplot", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) p <- plot(res) expect_s3_class(p, "ggplot") }) test_that("plot() on GRM results with threshold line works", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) p <- plot(res, criterion = "rmse", threshold = 0.3) expect_s3_class(p, "ggplot") }) test_that("plot() on GRM summary works", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res) p <- plot(s, criterion = "bias") expect_s3_class(p, "ggplot") }) test_that("plot() on GRM with param filter works", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) p <- plot(res, param = "a") expect_s3_class(p, "ggplot") }) # =========================================================================== # 4. recommended_n() # =========================================================================== test_that("recommended_n() works on GRM summary", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res) rec <- recommended_n(s, criterion = "rmse", threshold = 1.0) expect_true(is.data.frame(rec)) expect_true(all(c("item", "param", "recommended_n") %in% names(rec))) }) test_that("recommended_n() returns results for all GRM params", { study <- make_grm_integration_study(n_items = 5, n_categories = 4) res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res) rec <- recommended_n(s, criterion = "rmse", threshold = 1.0) expect_true(all(c("a", "b1", "b2", "b3") %in% unique(rec$param))) }) test_that("recommended_n() with param filter on GRM", { study <- make_grm_integration_study() res <- irt_simulate(study, iterations = 5, seed = 303) s <- summary(res) rec <- recommended_n(s, criterion = "rmse", threshold = 1.0, param = "b2") expect_true(all(rec$param == "b2")) }) # =========================================================================== # 5. GRM with MCAR missingness # =========================================================================== test_that("GRM pipeline works with MCAR missingness", { study <- make_grm_integration_study( sample_sizes = c(300), missing = "mcar", missing_rate = 0.1 ) res <- irt_simulate(study, iterations = 3, seed = 505) expect_s3_class(res, "irt_results") expect_true(nrow(res$item_results) > 0) # Summary should also work s <- summary(res) expect_s3_class(s, "summary_irt_results") }) # =========================================================================== # 6. GRM with 2 categories (binary — edge case) # =========================================================================== test_that("GRM pipeline works with 2 categories (binary edge case)", { n_items <- 5 # 2 categories → 1 threshold b_mat <- matrix(seq(-1.5, 1.5, length.out = n_items), nrow = n_items, ncol = 1) design <- irt_design( model = "GRM", n_items = n_items, item_params = list(a = rep(1.0, n_items), b = b_mat) ) study <- irt_study(design, sample_sizes = c(300)) res <- irt_simulate(study, iterations = 3, seed = 606) expect_s3_class(res, "irt_results") params <- unique(res$item_results$param) expect_true("a" %in% params) expect_true("b1" %in% params) # Only 1 threshold, so no b2 or higher expect_false("b2" %in% params) # Summary and plot should still work s <- summary(res) expect_s3_class(s, "summary_irt_results") p <- plot(res) expect_s3_class(p, "ggplot") }) # =========================================================================== # 7. GRM with irt_params_grm() helper # =========================================================================== test_that("GRM pipeline works with irt_params_grm() generated parameters", { params <- irt_params_grm(n_items = 6, n_categories = 5, seed = 99) design <- irt_design( model = "GRM", n_items = 6, item_params = params ) study <- irt_study(design, sample_sizes = c(300)) res <- irt_simulate(study, iterations = 3, seed = 707) expect_s3_class(res, "irt_results") # 5 categories → 4 thresholds params_found <- unique(res$item_results$param) expect_true(all(c("a", "b1", "b2", "b3", "b4") %in% params_found)) }) # =========================================================================== # 8. Parameter recovery sanity (large N) # =========================================================================== test_that("GRM parameter estimates are in reasonable range at large N", { study <- make_grm_integration_study( n_items = 5, n_categories = 4, sample_sizes = c(1000) ) res <- irt_simulate(study, iterations = 5, seed = 808) s <- summary(res) is_df <- s$item_summary # At N=1000 with 5 iterations, bias should be modest (not wildly off) bias_vals <- is_df$bias[is.finite(is_df$bias)] expect_true(all(abs(bias_vals) < 1.0), info = "Bias exceeds 1.0 — parameter recovery may be broken") # RMSE should be bounded rmse_vals <- is_df$rmse[is.finite(is_df$rmse)] expect_true(all(rmse_vals < 2.0), info = "RMSE exceeds 2.0 — parameter recovery may be broken") }) # =========================================================================== # 9. Print methods don't error # =========================================================================== test_that("print methods work for GRM pipeline objects", { study <- make_grm_integration_study(sample_sizes = c(300)) res <- irt_simulate(study, iterations = 3, seed = 909) s <- summary(res) expect_output(print(res)) expect_output(print(s)) })