# Tests for Phase 4: Anchoring & Equating Workflow # ---------- shared fixtures (computed once) ---------- d1 <- load_mfrmr_data("study1") d2 <- load_mfrmr_data("study2") fit1 <- fit_mfrm(d1, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML") fit2 <- fit_mfrm(d2, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML") # ================================================================ # anchor_to_baseline # ================================================================ test_that("anchor_to_baseline returns correct class and structure", { res <- suppressWarnings( anchor_to_baseline(d2, fit1, person = "Person", facets = c("Rater", "Criterion"), score = "Score") ) expect_s3_class(res, "mfrm_anchored_fit") expect_true(is.list(res)) expect_named(res, c("fit", "diagnostics", "baseline_anchors", "drift"), ignore.order = TRUE) # fit is an mfrm_fit expect_s3_class(res$fit, "mfrm_fit") # baseline_anchors is a tibble with expected columns expect_true(is.data.frame(res$baseline_anchors)) expect_true(all(c("Facet", "Level", "Anchor") %in% names(res$baseline_anchors))) expect_true(nrow(res$baseline_anchors) > 0) # drift is a tibble with expected columns expect_true(is.data.frame(res$drift)) drift_cols <- c("Facet", "Level", "Baseline", "New", "Drift", "SE_Baseline", "SE_New", "SE_Diff", "Drift_SE_Ratio", "Flag") expect_true(all(drift_cols %in% names(res$drift))) }) test_that("anchor_to_baseline self-anchoring yields near-zero drift", { # Anchor fit1 data to fit1 itself -> drift should be ~0 res <- anchor_to_baseline(d1, fit1, person = "Person", facets = c("Rater", "Criterion"), score = "Score") expect_s3_class(res, "mfrm_anchored_fit") # All drifts should be very small (< 0.1 logits) if (nrow(res$drift) > 0) { expect_true(all(abs(res$drift$Drift) < 0.1), info = "Self-anchored drift should be near zero") } }) test_that("anchor_to_baseline rejects non-mfrm_fit input", { expect_error( anchor_to_baseline(data.frame(), list(x = 1), "P", "F", "S"), "mfrm_fit" ) }) test_that("fit_mfrm surfaces malformed anchor schemas instead of silently dropping them", { toy <- load_mfrmr_data("example_core") bad_anchors <- data.frame( WrongFacet = "Rater", WrongLevel = "R1", WrongValue = 0, stringsAsFactors = FALSE ) expect_warning( withCallingHandlers( fit_mfrm( toy, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML", maxit = 15, anchors = bad_anchors, anchor_policy = "warn" ), warning = function(w) { if (grepl("Optimizer did not fully converge", conditionMessage(w), fixed = TRUE)) { invokeRestart("muffleWarning") } } ), "anchor_schema_mismatch" ) expect_error( fit_mfrm( toy, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML", maxit = 15, anchors = bad_anchors, anchor_policy = "error" ), "anchor_schema_mismatch" ) }) test_that("anchor_to_baseline S3 methods produce output", { res <- suppressWarnings( anchor_to_baseline(d2, fit1, person = "Person", facets = c("Rater", "Criterion"), score = "Score") ) # summary returns expected class s <- summary(res) expect_s3_class(s, "summary.mfrm_anchored_fit") expect_true(is.numeric(s$n_anchored)) expect_true(is.numeric(s$n_common)) expect_true(is.numeric(s$n_flagged)) # print methods produce output without error expect_output(print(res), "Anchored Fit Summary") expect_output(print(s), "Anchored Fit Summary") }) # ================================================================ # detect_anchor_drift # ================================================================ test_that("detect_anchor_drift returns correct class and structure", { drift <- detect_anchor_drift(list(Wave1 = fit1, Wave2 = fit2)) expect_s3_class(drift, "mfrm_anchor_drift") expect_named(drift, c("drift_table", "summary", "common_elements", "common_by_facet", "config"), ignore.order = TRUE) # drift_table is a tibble with expected columns expect_true(is.data.frame(drift$drift_table)) dt_cols <- c("Facet", "Level", "Reference", "Wave", "Ref_Est", "Wave_Est", "LinkOffset", "Drift", "SE_Ref", "SE_Wave", "SE", "Drift_SE_Ratio", "LinkSupportAdequate", "Flag") expect_true(all(dt_cols %in% names(drift$drift_table))) # common_elements has expected columns expect_true(is.data.frame(drift$common_elements)) expect_true(all(c("Wave1", "Wave2", "N_Common") %in% names(drift$common_elements))) expect_true(is.data.frame(drift$common_by_facet)) expect_true(all(c("Reference", "Wave", "Facet", "N_Common", "N_Retained", "GuidelineMinCommon", "LinkSupportAdequate") %in% names(drift$common_by_facet))) # config preserves settings expect_equal(drift$config$reference, "Wave1") expect_equal(drift$config$method, "screened_common_element_alignment") expect_equal(drift$config$intended_use, "review_screen") expect_equal(drift$config$drift_threshold, 0.5) expect_equal(drift$config$min_common_per_facet, 5L) expect_equal(drift$config$waves, c("Wave1", "Wave2")) }) test_that("detect_anchor_drift finds common elements", { drift <- detect_anchor_drift(list(W1 = fit1, W2 = fit2)) # Should have at least some common elements expect_true(nrow(drift$common_elements) > 0) expect_true(all(drift$common_elements$N_Common >= 0)) }) test_that("detect_anchor_drift uses aligned drift and combined standard errors", { drift <- detect_anchor_drift(list(W1 = fit1, W2 = fit2)) if (nrow(drift$drift_table) > 0) { expected_se <- sqrt(drift$drift_table$SE_Ref^2 + drift$drift_table$SE_Wave^2) expect_equal(drift$drift_table$SE, expected_se, tolerance = 1e-8) expect_equal( drift$drift_table$Drift_SE_Ratio, abs(drift$drift_table$Drift) / drift$drift_table$SE, tolerance = 1e-8 ) } }) test_that("detect_anchor_drift warns when retained link support is thin", { d_small1 <- simulate_mfrm_data( n_person = 20, n_rater = 4, n_criterion = 3, raters_per_person = 2, seed = 901 ) d_small2 <- simulate_mfrm_data( n_person = 20, n_rater = 4, n_criterion = 3, raters_per_person = 2, seed = 902 ) fit_small1 <- suppressWarnings( fit_mfrm(d_small1, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 10) ) fit_small2 <- suppressWarnings( fit_mfrm(d_small2, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 10) ) expect_warning( drift <- detect_anchor_drift(list(W1 = fit_small1, W2 = fit_small2), facets = "Rater"), "Thin linking support" ) expect_true(any(!drift$common_by_facet$LinkSupportAdequate)) }) test_that("detect_anchor_drift flagging logic works", { # Use a very small threshold to trigger flags drift <- detect_anchor_drift(list(W1 = fit1, W2 = fit2), drift_threshold = 0.01, flag_se_ratio = 0.01) # With such small thresholds, most elements should be flagged if (nrow(drift$drift_table) > 0) { expect_true(is.logical(drift$drift_table$Flag)) } # Use a very large threshold to suppress flags drift_lax <- detect_anchor_drift(list(W1 = fit1, W2 = fit2), drift_threshold = 100, flag_se_ratio = 100) if (nrow(drift_lax$drift_table) > 0) { expect_equal(sum(drift_lax$drift_table$Flag), 0) } }) test_that("detect_anchor_drift rejects invalid input", { expect_error(detect_anchor_drift(list()), "length") expect_error(detect_anchor_drift(list(a = 1, b = 2)), "mfrm_fit") }) test_that("detect_anchor_drift S3 methods produce output", { drift <- detect_anchor_drift(list(W1 = fit1, W2 = fit2)) s <- summary(drift) expect_s3_class(s, "summary.mfrm_anchor_drift") expect_true(is.numeric(s$n_comparisons)) expect_true(is.numeric(s$n_flagged)) expect_output(print(drift), "Anchor Drift Screen") expect_output(print(s), "Anchor Drift Screen") }) # ================================================================ # build_equating_chain # ================================================================ test_that("build_equating_chain returns correct class and structure", { chain <- build_equating_chain(list(Form1 = fit1, Form2 = fit2)) expect_s3_class(chain, "mfrm_equating_chain") expect_named(chain, c("links", "cumulative", "element_detail", "common_by_facet", "config"), ignore.order = TRUE) # links is a tibble with expected columns expect_true(is.data.frame(chain$links)) link_cols <- c("Link", "From", "To", "N_Common", "N_Retained", "Min_Common_Per_Facet", "Min_Retained_Per_Facet", "Offset_Prelim", "Offset", "Offset_SD", "Max_Residual", "LinkSupportAdequate", "Offset_Method") expect_true(all(link_cols %in% names(chain$links))) expect_equal(nrow(chain$links), 1) # 2 fits -> 1 link # cumulative has one row per wave expect_true(is.data.frame(chain$cumulative)) expect_equal(nrow(chain$cumulative), 2) expect_true(all(c("Wave", "Cumulative_Offset") %in% names(chain$cumulative))) # First wave offset is always 0 expect_equal(chain$cumulative$Cumulative_Offset[1], 0) expect_true(is.data.frame(chain$common_by_facet)) expect_equal(chain$config$method, "screened_common_element_alignment") expect_equal(chain$config$intended_use, "screened_linking_aid") }) test_that("build_equating_chain with 3 fits produces 2 links", { # Use fit1 three times (artificial but tests chain logic) chain <- build_equating_chain(list(A = fit1, B = fit2, C = fit1)) expect_equal(nrow(chain$links), 2) expect_equal(nrow(chain$cumulative), 3) expect_equal(chain$cumulative$Wave, c("A", "B", "C")) # Cumulative offset of first wave is 0 expect_equal(chain$cumulative$Cumulative_Offset[1], 0) }) test_that("build_equating_chain uses inverse-variance weighted offsets", { chain <- build_equating_chain(list(F1 = fit1, F2 = fit2)) detail <- chain$element_detail if (nrow(detail) > 0) { w <- 1 / (detail$SE_From^2 + detail$SE_To^2) keep <- is.finite(w) & detail$Retained expected_offset <- stats::weighted.mean(detail$Diff[keep], w = w[keep]) expect_equal(chain$links$Offset[1], expected_offset, tolerance = 1e-8) } }) test_that("build_equating_chain warns when retained link support is thin", { d_small1 <- simulate_mfrm_data( n_person = 20, n_rater = 4, n_criterion = 3, raters_per_person = 2, seed = 903 ) d_small2 <- simulate_mfrm_data( n_person = 20, n_rater = 4, n_criterion = 3, raters_per_person = 2, seed = 904 ) fit_small1 <- suppressWarnings( fit_mfrm(d_small1, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 10) ) fit_small2 <- suppressWarnings( fit_mfrm(d_small2, "Person", c("Rater", "Criterion"), "Score", method = "JML", maxit = 10) ) expect_warning( chain <- build_equating_chain(list(F1 = fit_small1, F2 = fit_small2), anchor_facets = "Rater"), "Thin linking support" ) expect_true(any(!chain$links$LinkSupportAdequate)) expect_true(any(chain$common_by_facet$N_Retained < chain$config$min_common_per_facet)) }) test_that("build_equating_chain rejects invalid input", { expect_error(build_equating_chain(list()), "length") expect_error(build_equating_chain(list(a = 1, b = 2)), "mfrm_fit") }) test_that("build_equating_chain S3 methods produce output", { chain <- build_equating_chain(list(F1 = fit1, F2 = fit2)) s <- summary(chain) expect_s3_class(s, "summary.mfrm_equating_chain") expect_true(is.numeric(s$n_flagged)) expect_output(print(chain), "Screened Linking Chain") expect_output(print(s), "Screened Linking Chain") }) # ================================================================ # plot_anchor_drift # ================================================================ # Precompute drift and chain objects for all plot tests drift_obj <- detect_anchor_drift(list(W1 = fit1, W2 = fit2)) chain_obj <- build_equating_chain(list(F1 = fit1, F2 = fit2)) test_that("plot_anchor_drift drift type returns data with draw=FALSE", { result <- plot_anchor_drift(drift_obj, type = "drift", draw = FALSE) expect_s3_class(result, "mfrm_plot_data") expect_identical(result$data$plot, "drift") expect_true(is.data.frame(result$data$table)) expect_true(nrow(result$data$table) > 0) expect_true(all(c("title", "subtitle", "legend", "reference_lines") %in% names(result$data))) }) test_that("plot_anchor_drift heatmap type returns data with draw=FALSE", { result <- plot_anchor_drift(drift_obj, type = "heatmap", draw = FALSE) expect_s3_class(result, "mfrm_plot_data") expect_identical(result$data$plot, "heatmap") expect_true(is.matrix(result$data$matrix)) }) test_that("plot_anchor_drift chain type returns data with draw=FALSE", { result <- plot_anchor_drift(chain_obj, type = "chain", draw = FALSE) expect_s3_class(result, "mfrm_plot_data") expect_identical(result$data$plot, "chain") expect_true(is.data.frame(result$data$table)) expect_true(all(c("Wave", "Cumulative_Offset") %in% names(result$data$table))) }) test_that("plot_anchor_drift drift type draws without error", { pdf(NULL) # suppress graphical output on.exit(dev.off(), add = TRUE) expect_no_error(plot_anchor_drift(drift_obj, type = "drift")) }) test_that("plot_anchor_drift chain type draws without error", { pdf(NULL) on.exit(dev.off(), add = TRUE) expect_no_error(plot_anchor_drift(chain_obj, type = "chain")) }) test_that("plot_anchor_drift heatmap type draws without error", { pdf(NULL) on.exit(dev.off(), add = TRUE) expect_no_error(plot_anchor_drift(drift_obj, type = "heatmap")) }) test_that("plot_anchor_drift accepts publication preset", { pdf(NULL) on.exit(dev.off(), add = TRUE) expect_no_error(plot_anchor_drift(drift_obj, type = "drift", preset = "publication")) expect_no_error(plot_anchor_drift(chain_obj, type = "chain", preset = "publication")) }) test_that("plot_anchor_drift rejects unsupported type/class combo", { # chain object with drift type should error expect_error(plot_anchor_drift(chain_obj, type = "drift"), "Unsupported") }) test_that("plot_anchor_drift facet filter works", { result <- plot_anchor_drift(drift_obj, type = "drift", facet = "Rater", draw = FALSE) if (inherits(result, "mfrm_plot_data") && nrow(result$data$table) > 0) { expect_true(all(result$data$table$Facet == "Rater")) } })