# Tests for DIF analysis module (Phase 2) and compare_mfrm enhancements (Phase 3) # ---------- shared fixtures ---------- local_dif_fixtures <- function(env = parent.frame()) { toy <- load_mfrmr_data("study1") persons <- unique(toy$Person) half <- ceiling(length(persons) / 2) grp_map <- setNames( c(rep("A", half), rep("B", length(persons) - half)), persons ) toy$Group <- grp_map[toy$Person] fit <- fit_mfrm(toy, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML") diag <- diagnose_mfrm(fit, residual_pca = "none") assign("toy", toy, envir = env) assign("fit", fit, envir = env) assign("diag", diag, envir = env) } # ================================================================ # Phase 2: DIF diagnostic module # ================================================================ test_that("analyze_dff residual method returns expected structure", { local_dif_fixtures() dif <- analyze_dff(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") expect_s3_class(dif, "mfrm_dff") expect_s3_class(dif, "mfrm_dif") expect_true(is.data.frame(dif$dif_table)) expect_true(is.data.frame(dif$cell_table)) expect_true(nrow(dif$dif_table) > 0) expect_true(nrow(dif$cell_table) > 0) # Required columns in dif_table required_cols <- c("Level", "Group1", "Group2", "Contrast", "SE", "t", "df", "p_value", "Classification", "ClassificationSystem", "ETS", "Method", "SEBasis", "StatisticLabel", "ProbabilityMetric", "DFBasis", "ContrastBasis", "ReportingUse", "PrimaryReportingEligible") expect_true(all(required_cols %in% names(dif$dif_table))) # Method must be "residual" expect_true(all(dif$dif_table$Method == "residual")) expect_true(all(dif$dif_table$ClassificationSystem == "screening")) expect_true(all(dif$dif_table$StatisticLabel == "Welch screening t")) expect_true(all(dif$dif_table$DFBasis == "Welch-Satterthwaite approximation")) expect_true(all(is.na(dif$dif_table$ETS))) expect_true(all(dif$dif_table$ReportingUse == "screening_only")) expect_true(all(!dif$dif_table$PrimaryReportingEligible)) expect_equal(dif$config$method, "residual") }) test_that("analyze_dff alias is backward compatible with analyze_dif", { local_dif_fixtures() dff <- analyze_dff(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") expect_equal(dff$dif_table, dif$dif_table) expect_equal(dff$summary, dif$summary) expect_equal(dff$config$functioning_label, "DIF") }) test_that("analyze_dif refit keeps JML contrasts descriptive even when linked", { local_dif_fixtures() dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "refit") expect_s3_class(dif, "mfrm_dif") expect_true(is.data.frame(dif$dif_table)) expect_true(nrow(dif$dif_table) > 0) expect_true(all(c("ContrastComparable", "ScaleLinkStatus", "LinkingFacets", "FormalInferenceEligible", "InferenceTier", "ReportingUse", "PrimaryReportingEligible", "BaselineConverged", "SubgroupConverged1", "SubgroupConverged2", "BaselineMethod", "BaselinePrecisionTier", "BaselineSupportsFormalInference") %in% names(dif$dif_table))) expect_true(all(c("LinkingStatus", "LinkingDetail", "ETS_Eligible", "LinkComparable", "PrecisionTier", "SupportsFormalInference") %in% names(dif$group_fits[[1]]))) expect_true("Converged" %in% names(dif$group_fits[[1]])) expect_true(all(dif$dif_table$ClassificationSystem == "descriptive")) expect_true(all(dif$dif_table$StatisticLabel == "linked descriptive contrast")) expect_true(all(dif$dif_table$SEBasis == "not reported without model-based subgroup precision")) expect_true(all(is.na(dif$dif_table$ETS))) expect_true(all(dif$dif_table$ContrastComparable)) expect_true(all(!dif$dif_table$FormalInferenceEligible)) expect_true(all(!dif$dif_table$PrimaryReportingEligible)) expect_true(all(dif$dif_table$InferenceTier == "exploratory")) expect_true(all(dif$dif_table$ScaleLinkStatus == "linked")) expect_true(all(dif$dif_table$ReportingUse == "screening_only")) expect_equal(dif$config$method, "refit") }) test_that("analyze_dif refit surfaces subgroup diagnostics failures", { local_dif_fixtures() testthat::local_mocked_bindings( diagnose_mfrm = function(...) stop("forced subgroup diagnostics failure"), .package = "mfrmr" ) dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "refit") group_fit_tbl <- dif$group_fits[[1]] expect_true(all(c("DiagnosticsStatus", "DiagnosticsDetail") %in% names(group_fit_tbl))) expect_true(all(group_fit_tbl$DiagnosticsStatus == "failed")) expect_true(all(group_fit_tbl$PrecisionTier == "diagnostics_unavailable")) expect_true(all(grepl("forced subgroup diagnostics failure", group_fit_tbl$DiagnosticsDetail, fixed = TRUE))) }) test_that("analyze_dif refit uses ETS only for linked model-based MML contrasts", { dat <- mfrmr:::sample_mfrm_data(seed = 42) persons <- unique(dat$Person) half <- ceiling(length(persons) / 2) grp_map <- setNames( c(rep("A", half), rep("B", length(persons) - half)), persons ) dat$Group <- grp_map[dat$Person] fit_mml <- suppressWarnings(fit_mfrm( dat, person = "Person", facets = c("Rater", "Task", "Criterion"), score = "Score", method = "MML", quad_points = 7, maxit = 25 )) diag_mml <- diagnose_mfrm(fit_mml, residual_pca = "none") dif_mml <- analyze_dif( fit_mml, diag_mml, facet = "Criterion", group = "Group", data = dat, method = "refit" ) expect_true(any(dif_mml$dif_table$ClassificationSystem == "ETS")) ets_rows <- dif_mml$dif_table$ClassificationSystem == "ETS" expect_true(all(dif_mml$dif_table$ContrastComparable[ets_rows])) expect_true(all(dif_mml$dif_table$FormalInferenceEligible[ets_rows])) expect_true(all(dif_mml$dif_table$PrimaryReportingEligible[ets_rows])) expect_true(all(dif_mml$dif_table$InferenceTier[ets_rows] == "model_based")) expect_true(all(dif_mml$dif_table$ReportingUse[ets_rows] == "primary_reporting")) expect_true(all(stats::na.omit(dif_mml$dif_table$ETS) %in% c("A", "B", "C"))) }) test_that("analyze_dif refit demotes ETS when subgroup refits lack linking facets", { local_dif_fixtures() fit_one <- fit_mfrm(toy, person = "Person", facets = "Criterion", score = "Score", method = "JML", maxit = 20) diag_one <- diagnose_mfrm(fit_one, residual_pca = "none") dif_one <- analyze_dif(fit_one, diag_one, facet = "Criterion", group = "Group", data = toy, method = "refit") expect_true(all(dif_one$dif_table$ClassificationSystem == "descriptive")) expect_true(all(is.na(dif_one$dif_table$ETS))) expect_true(all(dif_one$dif_table$ScaleLinkStatus == "unlinked")) expect_true(all(!dif_one$dif_table$ContrastComparable)) expect_match(dif_one$summary$Classification[nrow(dif_one$summary)], "insufficient linking", ignore.case = TRUE) }) test_that("analyze_dif min_obs filter works", { local_dif_fixtures() # With a very high min_obs, all cells should be sparse dif_high <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual", min_obs = 99999) expect_true(all(dif_high$cell_table$sparse)) }) test_that("analyze_dif p_adjust works for all methods", { local_dif_fixtures() for (m in c("holm", "fdr", "bonferroni", "none")) { dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual", p_adjust = m) expect_true("p_adjusted" %in% names(dif$dif_table)) } }) test_that("residual method uses screening labels instead of ETS categories", { local_dif_fixtures() dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") expect_true(all(dif$summary$Classification %in% c("Screen positive", "Screen negative", "Unclassified"))) expect_true(all(is.na(dif$dif_table$ETS))) }) test_that("residual method uses Welch-Satterthwaite degrees of freedom", { local_dif_fixtures() dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") first_row <- dif$dif_table[1, , drop = FALSE] c1 <- subset(dif$cell_table, Level == first_row$Level & GroupValue == first_row$Group1) c2 <- subset(dif$cell_table, Level == first_row$Level & GroupValue == first_row$Group2) comp1 <- c1$Var_sum[1] / c1$N[1]^2 comp2 <- c2$Var_sum[1] / c2$N[1]^2 expected_df <- (comp1 + comp2)^2 / ((comp1^2) / (c1$N[1] - 1) + (comp2^2) / (c2$N[1] - 1)) expect_equal(first_row$df, expected_df, tolerance = 1e-8) }) test_that("refit method ETS classification is valid", { dat <- mfrmr:::sample_mfrm_data(seed = 99) dat$Group <- ifelse(dat$Person %in% unique(dat$Person)[1:30], "A", "B") fit_mml <- suppressWarnings(fit_mfrm( dat, person = "Person", facets = c("Rater", "Task", "Criterion"), score = "Score", method = "MML", quad_points = 7, maxit = 25 )) diag_mml <- diagnose_mfrm(fit_mml, residual_pca = "none") dif <- analyze_dif(fit_mml, diag_mml, facet = "Criterion", group = "Group", data = dat, method = "refit") expect_true(all(stats::na.omit(dif$dif_table$ETS) %in% c("A", "B", "C"))) }) test_that("dif_interaction_table returns expected structure", { local_dif_fixtures() int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group", data = toy, min_obs = 2) expect_s3_class(int, "mfrm_dif_interaction") expect_true(is.data.frame(int$table)) expect_true(nrow(int$table) > 0) required_cols <- c("Level", "GroupValue", "N", "ObsScore", "ExpScore", "ObsExpAvg") expect_true(all(required_cols %in% names(int$table))) }) test_that("dif_interaction_table min_obs filter works", { local_dif_fixtures() int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group", data = toy, min_obs = 99999) expect_true(all(int$table$sparse)) }) test_that("plot_dif_heatmap returns matrix when draw = FALSE", { local_dif_fixtures() dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") for (m in c("obs_exp", "t", "contrast")) { mat <- plot_dif_heatmap(dif, metric = m, draw = FALSE) expect_true(is.matrix(mat)) } }) test_that("plot_dif_heatmap works with dif_interaction_table", { local_dif_fixtures() int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group", data = toy, min_obs = 2) mat <- plot_dif_heatmap(int, metric = "obs_exp", draw = FALSE) expect_true(is.matrix(mat)) }) test_that("dif_report produces interpretable output", { local_dif_fixtures() dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") rpt <- dif_report(dif) expect_s3_class(rpt, "mfrm_dif_report") expect_true(is.character(rpt$narrative)) expect_true(nchar(rpt$narrative) > 0) expect_match(rpt$narrative, "screening", ignore.case = TRUE) }) test_that("print and summary S3 methods work for DIF objects", { local_dif_fixtures() dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group", data = toy, method = "residual") expect_output(print(dif)) s <- summary(dif) expect_output(print(s)) int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group", data = toy, min_obs = 2) expect_output(print(int)) s2 <- summary(int) expect_output(print(s2)) rpt <- dif_report(dif) expect_output(print(rpt)) s3 <- summary(rpt) expect_output(print(s3)) }) # ================================================================ # Phase 3: compare_mfrm enhancements # ================================================================ test_that("compare_mfrm reports comparable IC quantities on a common basis", { toy_small <- load_mfrmr_data("example_core") fit_rsm <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "RSM", quad_points = 5, maxit = 15 )) fit_pcm <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "PCM", step_facet = "Criterion", quad_points = 5, maxit = 15 )) fit_rsm$summary$Converged[1] <- TRUE fit_pcm$summary$Converged[1] <- TRUE comp <- compare_mfrm(RSM = fit_rsm, PCM = fit_pcm) expect_s3_class(comp, "mfrm_comparison") tbl <- comp$table expect_true(all(tbl$ICComparable)) expect_true("Delta_AIC" %in% names(tbl)) expect_true("Delta_BIC" %in% names(tbl)) expect_true("AkaikeWeight" %in% names(tbl)) expect_true("BICWeight" %in% names(tbl)) # Delta should have at least one zero (best model) expect_equal(min(tbl$Delta_AIC), 0) expect_equal(min(tbl$Delta_BIC), 0) # Weights should sum to 1 expect_equal(sum(tbl$AkaikeWeight), 1, tolerance = 1e-10) expect_equal(sum(tbl$BICWeight), 1, tolerance = 1e-10) expect_true(isTRUE(comp$comparison_basis$ic_comparable)) expect_true(isTRUE(comp$comparison_basis$same_data)) }) test_that("compare_mfrm evidence_ratios are reciprocal", { toy_small <- load_mfrmr_data("example_core") fit_rsm <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "RSM", quad_points = 5, maxit = 15 )) fit_pcm <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "PCM", step_facet = "Criterion", quad_points = 5, maxit = 15 )) fit_rsm$summary$Converged[1] <- TRUE fit_pcm$summary$Converged[1] <- TRUE comp <- compare_mfrm(RSM = fit_rsm, PCM = fit_pcm) er <- comp$evidence_ratios expect_true(is.data.frame(er)) expect_true(nrow(er) > 0) expect_true("EvidenceRatio" %in% names(er)) expect_true(all(er$EvidenceRatio > 0)) }) test_that("compare_mfrm print and summary work with new fields", { toy_small <- load_mfrmr_data("example_core") fit_rsm <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "RSM", quad_points = 5, maxit = 15 )) fit_pcm <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "PCM", step_facet = "Criterion", quad_points = 5, maxit = 15 )) fit_rsm$summary$Converged[1] <- TRUE fit_pcm$summary$Converged[1] <- TRUE comp <- compare_mfrm(RSM = fit_rsm, PCM = fit_pcm) expect_output(print(comp)) s <- summary(comp) expect_output(print(s)) }) test_that("compare_mfrm suppresses IC ranking outside the formal MML path and requires nested = TRUE for LRT", { toy_small <- load_mfrmr_data("example_core") fit_jml <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML", model = "RSM", maxit = 15 )) fit_mml <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "RSM", quad_points = 5, maxit = 15 )) fit_jml$summary$Converged[1] <- TRUE fit_mml$summary$Converged[1] <- TRUE expect_warning( comp <- compare_mfrm(JML = fit_jml, MML = fit_mml), "different estimation methods" ) expect_false(isTRUE(comp$comparison_basis$ic_comparable)) expect_true(all(is.na(comp$table$Delta_AIC))) expect_true(all(is.na(comp$table$AkaikeWeight))) expect_null(comp$lrt) fit_pcm <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML", model = "PCM", step_facet = "Criterion", maxit = 15 )) fit_pcm$summary$Converged[1] <- TRUE expect_warning( comp_no_lrt <- compare_mfrm(RSM = fit_jml, PCM = fit_pcm), "limited to converged MML fits" ) expect_null(comp_no_lrt$lrt) expect_warning( comp_lrt <- compare_mfrm(RSM = fit_jml, PCM = fit_pcm, nested = TRUE), "formal MML likelihood basis" ) expect_true(isTRUE(comp_lrt$comparison_basis$nested_requested)) expect_null(comp_lrt$lrt) expect_true(isTRUE(comp_lrt$comparison_basis$nesting_audit$eligible)) expect_identical(as.character(comp_lrt$comparison_basis$nesting_audit$relation), "RSM_in_PCM") fit_rsm_2 <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML", model = "RSM", maxit = 15 )) fit_rsm_2$summary$Converged[1] <- TRUE expect_warning( comp_same <- compare_mfrm(RSM1 = fit_jml, RSM2 = fit_rsm_2, nested = TRUE), "formal MML likelihood basis" ) expect_null(comp_same$lrt) expect_false(isTRUE(comp_same$comparison_basis$ic_comparable)) expect_false(isTRUE(comp_same$comparison_basis$nesting_audit$eligible)) expect_identical(as.character(comp_same$comparison_basis$nesting_audit$relation), "same_model") }) test_that("compare_mfrm suppresses IC ranking for JML-only comparisons", { local_dif_fixtures() fit2 <- fit_mfrm(toy, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML", model = "PCM", step_facet = "Criterion") fit$summary$Converged[1] <- TRUE fit2$summary$Converged[1] <- TRUE expect_warning( comp <- compare_mfrm(RSM = fit, PCM = fit2), "limited to converged MML fits" ) expect_false(isTRUE(comp$comparison_basis$ic_comparable)) expect_false(isTRUE(comp$comparison_basis$all_mml)) expect_true(all(is.na(comp$table$Delta_AIC))) expect_true(all(is.na(comp$table$AkaikeWeight))) expect_null(comp$evidence_ratios) }) test_that("compare_mfrm suppresses IC ranking when a fit is marked unconverged", { local_dif_fixtures() fit2 <- fit_mfrm(toy, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "JML", model = "PCM", step_facet = "Criterion") fit2$summary$Converged[1] <- FALSE expect_warning( comp <- compare_mfrm(RSM = fit, PCM = fit2), "did not converge" ) expect_false(isTRUE(comp$comparison_basis$ic_comparable)) expect_false(isTRUE(comp$comparison_basis$all_converged)) expect_true(all(is.na(comp$table$Delta_AIC))) expect_true(all(is.na(comp$table$AkaikeWeight))) }) test_that("compare_mfrm requires the same prepared response data for IC ranking", { toy_small <- load_mfrmr_data("example_core") fit_a <- suppressWarnings(fit_mfrm( toy_small, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "RSM", quad_points = 5, maxit = 15 )) fit_a$summary$Converged[1] <- TRUE toy_perm <- toy_small[sample.int(nrow(toy_small)), , drop = FALSE] fit_perm <- suppressWarnings(fit_mfrm( toy_perm, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "RSM", quad_points = 5, maxit = 15 )) fit_perm$summary$Converged[1] <- TRUE comp_same <- compare_mfrm(RSM1 = fit_a, RSM2 = fit_perm) expect_true(isTRUE(comp_same$comparison_basis$same_data)) expect_true(all(comp_same$table$ICComparable)) toy_shuf <- toy_small toy_shuf$Score <- sample(toy_shuf$Score) fit_shuf <- suppressWarnings(fit_mfrm( toy_shuf, person = "Person", facets = c("Rater", "Criterion"), score = "Score", method = "MML", model = "RSM", quad_points = 5, maxit = 15 )) fit_shuf$summary$Converged[1] <- TRUE expect_warning( comp_diff <- compare_mfrm(A = fit_a, B = fit_shuf), "same prepared response data" ) expect_false(isTRUE(comp_diff$comparison_basis$same_data)) expect_false(any(comp_diff$table$ICComparable)) expect_true(all(is.na(comp_diff$table$Delta_AIC))) expect_true(all(is.na(comp_diff$table$AkaikeWeight))) })