# test-model_misspec.R # TDD tests for model misspecification support (Objective 26) # Tests cover cross-fitting scenarios where generation and estimation models differ. # --- Helper: Create test designs ------------------------------------------------- make_design_1pl <- function(n = 10) { irt_design( model = "1PL", n_items = n, item_params = list(b = seq(-2, 2, length.out = n)) ) } make_design_2pl <- function(n = 10) { irt_design( model = "2PL", n_items = n, item_params = list( a = rlnorm(n, 0, 0.25), b = seq(-2, 2, length.out = n) ) ) } make_design_grm <- function(n = 10, n_cat = 4) { n_thresh <- n_cat - 1L b_mat <- matrix( seq(-2, 2, length.out = n * n_thresh), nrow = n, ncol = n_thresh ) b_mat <- t(apply(b_mat, 1, sort)) irt_design( model = "GRM", n_items = n, item_params = list(a = rlnorm(n, 0, 0.25), b = b_mat) ) } # ============================================================================= # 1. irt_study() with estimation_model parameter # ============================================================================= test_that("irt_study accepts estimation_model parameter", { d <- make_design_2pl() study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") expect_s3_class(study, "irt_study") expect_equal(study$estimation_model, "1PL") }) test_that("irt_study defaults estimation_model to design$model when NULL", { d <- make_design_2pl() study <- irt_study(d, sample_sizes = 300) expect_equal(study$estimation_model, d$model) }) test_that("irt_study defaults estimation_model to design$model when not provided", { d <- make_design_1pl() study <- irt_study(d, sample_sizes = 300) expect_equal(study$estimation_model, "1PL") }) test_that("irt_study rejects unknown estimation_model", { d <- make_design_2pl() expect_error( irt_study(d, sample_sizes = 300, estimation_model = "UNKNOWN"), "model|must be one of" ) }) # --- Compatible cross-fits: same response format --------------------------------- test_that("irt_study allows (gen=1PL, est=2PL) cross-fit", { d <- make_design_1pl() study <- irt_study(d, sample_sizes = 300, estimation_model = "2PL") expect_equal(study$estimation_model, "2PL") }) test_that("irt_study allows (gen=2PL, est=1PL) cross-fit", { d <- make_design_2pl() study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") expect_equal(study$estimation_model, "1PL") }) test_that("irt_study allows (gen=1PL, est=1PL) same-model", { d <- make_design_1pl() study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") expect_equal(study$estimation_model, "1PL") }) test_that("irt_study allows (gen=2PL, est=2PL) same-model", { d <- make_design_2pl() study <- irt_study(d, sample_sizes = 300, estimation_model = "2PL") expect_equal(study$estimation_model, "2PL") }) test_that("irt_study allows (gen=GRM, est=GRM) same-model", { d <- make_design_grm() study <- irt_study(d, sample_sizes = 300, estimation_model = "GRM") expect_equal(study$estimation_model, "GRM") }) # --- Incompatible cross-fits: response format mismatch -------------------------- test_that("irt_study rejects (gen=1PL, est=GRM) incompatible cross-fit", { d <- make_design_1pl() expect_error( irt_study(d, sample_sizes = 300, estimation_model = "GRM"), "Incompatible|response format|binary|polytomous" ) }) test_that("irt_study rejects (gen=2PL, est=GRM) incompatible cross-fit", { d <- make_design_2pl() expect_error( irt_study(d, sample_sizes = 300, estimation_model = "GRM"), "Incompatible|response format|binary|polytomous" ) }) test_that("irt_study rejects (gen=GRM, est=1PL) incompatible cross-fit", { d <- make_design_grm() expect_error( irt_study(d, sample_sizes = 300, estimation_model = "1PL"), "Incompatible|response format|polytomous|binary" ) }) test_that("irt_study rejects (gen=GRM, est=2PL) incompatible cross-fit", { d <- make_design_grm() expect_error( irt_study(d, sample_sizes = 300, estimation_model = "2PL"), "Incompatible|response format|polytomous|binary" ) }) # ============================================================================= # 2. Model registry response_format metadata # ============================================================================= test_that("model_registry has response_format for 1PL", { cfg <- get_model_config("1PL") expect_equal(cfg$response_format, "binary") }) test_that("model_registry has response_format for 2PL", { cfg <- get_model_config("2PL") expect_equal(cfg$response_format, "binary") }) test_that("model_registry has response_format for GRM", { cfg <- get_model_config("GRM") expect_equal(cfg$response_format, "polytomous") }) # ============================================================================= # 3. build_true_params_for_estimation() helper # ============================================================================= test_that("build_true_params_for_estimation(gen=2PL, est=2PL) returns correct rows", { d <- make_design_2pl(n = 10) true_params <- build_true_params_for_estimation(d, "2PL") # Should have 2 rows per item (a and b) expect_equal(nrow(true_params), 20L) expect_equal(ncol(true_params), 3L) expect_true(all(c("item", "param", "true_value") %in% colnames(true_params))) # First 10 rows are "a", next 10 are "b" expect_true(all(true_params$param[1:10] == "a")) expect_true(all(true_params$param[11:20] == "b")) # True values match design expect_equal(true_params$true_value[1:10], d$item_params$a) expect_equal(true_params$true_value[11:20], d$item_params$b) }) test_that("build_true_params_for_estimation(gen=1PL, est=1PL) returns only b rows", { d <- make_design_1pl(n = 10) true_params <- build_true_params_for_estimation(d, "1PL") # Should have 1 row per item (only b) expect_equal(nrow(true_params), 10L) expect_equal(ncol(true_params), 3L) expect_true(all(true_params$param == "b")) # True values match design expect_equal(true_params$true_value, d$item_params$b) }) test_that("build_true_params_for_estimation(gen=1PL, est=2PL) adds a=1 rows", { d <- make_design_1pl(n = 10) true_params <- build_true_params_for_estimation(d, "2PL") # Should have 2 rows per item (a and b) expect_equal(nrow(true_params), 20L) # First 10 rows are "a" with value 1 (Rasch constraint) expect_true(all(true_params$param[1:10] == "a")) expect_equal(unique(true_params$true_value[1:10]), 1.0) # Next 10 rows are "b" from design expect_true(all(true_params$param[11:20] == "b")) expect_equal(true_params$true_value[11:20], d$item_params$b) }) test_that("build_true_params_for_estimation(gen=2PL, est=1PL) drops a rows", { d <- make_design_2pl(n = 10) true_params <- build_true_params_for_estimation(d, "1PL") # Should have 1 row per item (only b, drop a) expect_equal(nrow(true_params), 10L) expect_true(all(true_params$param == "b")) # True values match design b expect_equal(true_params$true_value, d$item_params$b) }) test_that("build_true_params_for_estimation(gen=GRM, est=GRM) returns correct schema", { d <- make_design_grm(n = 5, n_cat = 4) true_params <- build_true_params_for_estimation(d, "GRM") # GRM with 5 items, 3 thresholds: 5 a rows + (5 * 3) b rows = 20 rows expect_equal(nrow(true_params), 20L) # First 5 rows are "a" expect_true(all(true_params$param[1:5] == "a")) expect_equal(true_params$true_value[1:5], d$item_params$a) # Remaining 15 rows are "b1", "b2", "b3" expect_true(all(true_params$param[6:10] == "b1")) expect_true(all(true_params$param[11:15] == "b2")) expect_true(all(true_params$param[16:20] == "b3")) }) # ============================================================================= # 4. irt_simulate() with estimation_model # ============================================================================= test_that("irt_simulate smoke test: (gen=2PL, est=1PL) cross-fit", { d <- make_design_2pl(n = 10) study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") results <- irt_simulate(study, iterations = 3, seed = 42, progress = FALSE) expect_s3_class(results, "irt_results") # item_results should only have "b" rows (1PL doesn't estimate a) expect_true(all(results$item_results$param == "b")) # Should have n_items rows per iteration per sample_size n_expected_rows <- 10L * 1L * 3L # n_items * n_sample_sizes * iterations expect_equal(nrow(results$item_results), n_expected_rows) # Convergence rate should be > 0 expect_true(sum(results$item_results$converged) > 0) }) test_that("irt_simulate smoke test: (gen=1PL, est=2PL) cross-fit", { d <- make_design_1pl(n = 10) study <- irt_study(d, sample_sizes = 300, estimation_model = "2PL") results <- irt_simulate(study, iterations = 3, seed = 42, progress = FALSE) expect_s3_class(results, "irt_results") # item_results should have both "a" and "b" rows params <- unique(results$item_results$param) expect_true("a" %in% params) expect_true("b" %in% params) # Should have 2 * n_items rows per iteration per sample_size n_expected_rows <- 10L * 2L * 1L * 3L # n_items * 2 params * n_sample_sizes * iterations expect_equal(nrow(results$item_results), n_expected_rows) # Convergence rate should be > 0 expect_true(sum(results$item_results$converged) > 0) }) test_that("irt_simulate same-model (gen=1PL, est=1PL) produces expected schema", { d <- make_design_1pl(n = 10) study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") results <- irt_simulate(study, iterations = 3, seed = 42, progress = FALSE) expect_s3_class(results, "irt_results") # Should have n_items rows per iteration per sample_size n_expected_rows <- 10L * 1L * 3L expect_equal(nrow(results$item_results), n_expected_rows) # All params should be "b" expect_true(all(results$item_results$param == "b")) }) # ============================================================================= # 5. Print methods for cross-fit display # ============================================================================= test_that("print.irt_study shows 'Estimation:' line for cross-fit (2PL -> 1PL)", { d <- make_design_2pl() study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") output <- capture.output(print(study)) output_text <- paste(output, collapse = "\n") # Should show "Estimation:" line indicating cross-fit expect_true(grepl("Estimation", output_text)) }) test_that("print.irt_study omits 'Estimation:' line for same-model (2PL -> 2PL)", { d <- make_design_2pl() study <- irt_study(d, sample_sizes = 300, estimation_model = "2PL") output <- capture.output(print(study)) output_text <- paste(output, collapse = "\n") # Should NOT show "Estimation:" line (backward compat) expect_false(grepl("Estimation", output_text)) }) test_that("print.irt_study omits 'Estimation:' line when estimation_model defaults", { d <- make_design_1pl() study <- irt_study(d, sample_sizes = 300) # No estimation_model specified output <- capture.output(print(study)) output_text <- paste(output, collapse = "\n") # Should NOT show "Estimation:" line expect_false(grepl("Estimation", output_text)) }) test_that("print.irt_results shows Gen/Est models for cross-fit (2PL -> 1PL)", { d <- make_design_2pl(n = 10) study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") results <- irt_simulate(study, iterations = 2, seed = 42, progress = FALSE) output <- capture.output(print(results)) output_text <- paste(output, collapse = "\n") # Should indicate generation and estimation models are different expect_true(grepl("Gen.*model", output_text) || grepl("Est.*model", output_text)) }) test_that("print.irt_results shows single 'Model:' line for same-model (1PL -> 1PL)", { d <- make_design_1pl(n = 10) study <- irt_study(d, sample_sizes = 300, estimation_model = "1PL") results <- irt_simulate(study, iterations = 2, seed = 42, progress = FALSE) output <- capture.output(print(results)) output_text <- paste(output, collapse = "\n") # Should show single "Model:" line (backward compat, no "Gen model:" or "Est model:") model_line_count <- length(grep("Model:", output_text, value = TRUE)) # Line should appear exactly once (or as part of one-line format) expect_true(model_line_count >= 1) }) # ============================================================================= # 6. Backward compatibility: existing behavior unchanged when no misspecification # ============================================================================= test_that("irt_study output is backward compatible when estimation_model defaults", { d <- make_design_2pl(n = 10) study <- irt_study(d, sample_sizes = c(100, 250, 500)) # Should have estimation_model set to design model expect_equal(study$estimation_model, d$model) # Print output should NOT contain "Estimation:" (backward compat) output <- capture.output(print(study)) output_text <- paste(output, collapse = "\n") expect_false(grepl("Estimation:", output_text)) }) test_that("irt_simulate default behavior unchanged (gen=est)", { d <- make_design_1pl(n = 10) study <- irt_study(d, sample_sizes = 300) results <- irt_simulate(study, iterations = 2, seed = 42, progress = FALSE) # Results schema should match expected 1PL output expect_true(all(results$item_results$param == "b")) expect_equal(nrow(results$item_results), 10L * 2L) # n_items * iterations })