test_that("run_qc_pipeline returns correct class and structure", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit) expect_s3_class(qc, "mfrm_qc_pipeline") expect_true(is.list(qc)) expect_named(qc, c("verdicts", "overall", "details", "recommendations", "config"), ignore.order = TRUE) }) test_that("verdicts table has 10 rows with valid verdict values", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit) expect_equal(nrow(qc$verdicts), 10) expect_true(all(qc$verdicts$Verdict %in% c("Pass", "Warn", "Fail", "Skip"))) expected_checks <- c("Convergence", "Global Fit", "Reliability", "Separation", "Element Misfit", "Unexpected Responses", "Category Structure", "Connectivity", "Inter-rater Agreement", "Functioning/Bias Screen") expect_equal(qc$verdicts$Check, expected_checks) expect_true(all(c("Check", "Verdict", "Value", "Threshold", "Detail") %in% names(qc$verdicts))) }) test_that("overall verdict is one of Pass/Warn/Fail", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit) expect_true(qc$overall %in% c("Pass", "Warn", "Fail")) }) test_that("recommendations is a character vector", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit) expect_type(qc$recommendations, "character") }) test_that("print.mfrm_qc_pipeline produces output", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit) out <- capture.output(print(qc)) expect_true(length(out) > 0) expect_true(any(grepl("QC Pipeline", out))) expect_true(any(grepl("Overall:", out))) }) test_that("summary.mfrm_qc_pipeline produces correct output", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit) s <- summary(qc) expect_s3_class(s, "summary.mfrm_qc_pipeline") expect_true("pass_count" %in% names(s)) expect_true("warn_count" %in% names(s)) expect_true("fail_count" %in% names(s)) expect_true("skip_count" %in% names(s)) expect_equal(s$pass_count + s$warn_count + s$fail_count + s$skip_count, 10) out <- capture.output(print(s)) expect_true(length(out) > 0) expect_true(any(grepl("QC Pipeline Summary", out))) }) test_that("plot_qc_pipeline with draw=FALSE returns data", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit) vt <- plot_qc_pipeline(qc, draw = FALSE) expect_true(is.data.frame(vt) || tibble::is_tibble(vt)) expect_equal(nrow(vt), 10) }) test_that("threshold_profile strict and lenient work", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc_strict <- run_qc_pipeline(fit, threshold_profile = "strict") qc_lenient <- run_qc_pipeline(fit, threshold_profile = "lenient") expect_s3_class(qc_strict, "mfrm_qc_pipeline") expect_s3_class(qc_lenient, "mfrm_qc_pipeline") expect_equal(qc_strict$config$threshold_profile, "strict") expect_equal(qc_lenient$config$threshold_profile, "lenient") # strict thresholds should be tighter expect_true(qc_strict$config$thresholds$reliability_pass > qc_lenient$config$thresholds$reliability_pass) expect_true(qc_strict$config$thresholds$separation_pass > qc_lenient$config$thresholds$separation_pass) }) test_that("thresholds override works", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) qc <- run_qc_pipeline(fit, thresholds = list(reliability_pass = 0.99)) expect_equal(qc$config$thresholds$reliability_pass, 0.99) }) test_that("run_qc_pipeline works with pre-computed diagnostics", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) diag <- diagnose_mfrm(fit) qc <- run_qc_pipeline(fit, diagnostics = diag) expect_s3_class(qc, "mfrm_qc_pipeline") expect_equal(nrow(qc$verdicts), 10) }) test_that("run_qc_pipeline records screening-tier bias metadata when bias results are provided", { toy <- load_mfrmr_data("example_bias") fit <- suppressWarnings(fit_mfrm( toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 20 )) diag <- suppressWarnings(diagnose_mfrm(fit, residual_pca = "none")) bias <- suppressWarnings(estimate_bias( fit, diag, facet_a = "Rater", facet_b = "Criterion", max_iter = 2 )) qc <- run_qc_pipeline(fit, diagnostics = diag, bias_results = bias) expect_match(qc$verdicts$Detail[10], "screened interactions crossed", fixed = TRUE) expect_identical(qc$details$bias$inference_tier, "screening") expect_true(qc$details$bias$available) expect_true(is.finite(qc$details$bias$total)) }) test_that("run_qc_pipeline does not convert category-count failures into pass verdicts", { toy <- load_mfrmr_data("study1") fit <- fit_mfrm(toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 25) diag <- diagnose_mfrm(fit, residual_pca = "none") diag$obs <- structure(list(), class = "bogus_obs") qc <- run_qc_pipeline(fit, diagnostics = diag) expect_identical(as.character(qc$verdicts$Verdict[7]), "Skip") expect_match(qc$verdicts$Detail[7], "Category counts could not be computed", fixed = TRUE) expect_true(nzchar(qc$details$category_structure$error)) expect_false(identical(qc$overall, "Pass")) }) test_that("run_qc_pipeline surfaces incomplete bias collections as warn rather than skip/pass", { toy <- load_mfrmr_data("example_bias") fit <- suppressWarnings(fit_mfrm( toy, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 20 )) diag <- suppressWarnings(diagnose_mfrm(fit, residual_pca = "none")) diag$interactions <- data.frame() bias_collection <- structure( list( by_pair = list(), errors = data.frame( Interaction = "Rater x Criterion", Facets = "Rater x Criterion", Error = "forced pair failure", stringsAsFactors = FALSE ) ), class = c("mfrm_bias_collection", "mfrm_bundle", "list") ) qc <- run_qc_pipeline(fit, diagnostics = diag, bias_results = bias_collection) expect_identical(as.character(qc$verdicts$Verdict[10]), "Warn") expect_match(qc$verdicts$Detail[10], "incomplete", fixed = TRUE) expect_identical(qc$details$bias$error_count, 1L) expect_false(identical(qc$overall, "Pass")) }) test_that("run_qc_pipeline rejects non-mfrm_fit input", { expect_error(run_qc_pipeline(list()), "mfrm_fit") }) test_that("plot_qc_pipeline rejects non-qc-pipeline input", { expect_error(plot_qc_pipeline(list()), "mfrm_qc_pipeline") })