# test-data-processing.R # Tests for the data processing layer -- the most important bug hotspot. # Covers: facet ID conversion, factor vs character equivalence, label # permutation invariance, missing data, score recoding, anchors, weights, # and edge cases. # --------------------------------------------------------------------------- # 1.1 Facet ID conversion accuracy # --------------------------------------------------------------------------- test_that("character facet labels are preserved in fitted output", { d <- mfrmr:::sample_mfrm_data(seed = 42) fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)) # Rater labels rater_levels <- sort(unique(d$Rater)) fitted_rater <- fit$facets$others |> dplyr::filter(Facet == "Rater") |> dplyr::pull(Level) |> sort() expect_equal(fitted_rater, rater_levels) # Task labels task_levels <- sort(unique(d$Task)) fitted_task <- fit$facets$others |> dplyr::filter(Facet == "Task") |> dplyr::pull(Level) |> sort() expect_equal(fitted_task, task_levels) # Criterion labels crit_levels <- sort(unique(d$Criterion)) fitted_crit <- fit$facets$others |> dplyr::filter(Facet == "Criterion") |> dplyr::pull(Level) |> sort() expect_equal(fitted_crit, crit_levels) # Person labels preserved expect_setequal(fit$facets$person$Person, unique(d$Person)) }) test_that("facet level ordering follows alphabetical (factor default)", { d <- mfrmr:::sample_mfrm_data(seed = 42) fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)) rater_tbl <- fit$facets$others |> dplyr::filter(Facet == "Rater") expect_equal(rater_tbl$Level, sort(unique(d$Rater))) }) # --------------------------------------------------------------------------- # 1.2 Factor vs character equivalence # --------------------------------------------------------------------------- test_that("factor vs character facet columns produce identical estimates", { d <- mfrmr:::sample_mfrm_data(seed = 100) d_factor <- d |> dplyr::mutate(Rater = factor(Rater), Task = factor(Task)) fit_char <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40)) fit_factor <- suppressWarnings(fit_mfrm(d_factor, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40)) expect_equal(fit_char$summary$LogLik, fit_factor$summary$LogLik, tolerance = 1e-6) for (facet in c("Rater", "Task", "Criterion")) { est_c <- fit_char$facets$others |> dplyr::filter(Facet == facet) |> dplyr::arrange(Level) |> dplyr::pull(Estimate) est_f <- fit_factor$facets$others |> dplyr::filter(Facet == facet) |> dplyr::arrange(Level) |> dplyr::pull(Estimate) expect_equal(est_c, est_f, tolerance = 1e-8, label = paste("factor vs char for", facet)) } }) test_that("factor with explicit level ordering matches character", { d <- mfrmr:::sample_mfrm_data(seed = 55) d_fac <- d |> dplyr::mutate(Rater = factor(Rater, levels = c("R3", "R1", "R2"))) fit_chr <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40)) fit_fac <- suppressWarnings(fit_mfrm(d_fac, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40)) expect_equal(fit_chr$summary$LogLik, fit_fac$summary$LogLik, tolerance = 1e-6) }) # --------------------------------------------------------------------------- # 1.3 Integer ID vs string ID equivalence # --------------------------------------------------------------------------- test_that("integer facet IDs and string IDs produce equivalent estimates", { d <- mfrmr:::sample_mfrm_data(seed = 42) rater_map <- c(R1 = "1", R2 = "2", R3 = "3") d_int <- d |> dplyr::mutate(Rater = rater_map[Rater]) fit_str <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40)) fit_int <- suppressWarnings(fit_mfrm(d_int, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40)) expect_equal(fit_str$summary$LogLik, fit_int$summary$LogLik, tolerance = 1e-6) }) # --------------------------------------------------------------------------- # 1.4 Label permutation invariance (CRITICAL) # --------------------------------------------------------------------------- test_that("shuffling rater labels preserves estimates after remapping", { d <- mfrmr:::sample_mfrm_data(seed = 200) fit_orig <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 100)) shuffle <- c(R1 = "RaterC", R2 = "RaterA", R3 = "RaterB") d_shuf <- d |> dplyr::mutate(Rater = shuffle[Rater]) fit_shuf <- suppressWarnings(fit_mfrm(d_shuf, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 100)) rev_map <- c(RaterC = "R1", RaterA = "R2", RaterB = "R3") est_orig <- fit_orig$facets$others |> dplyr::filter(Facet == "Rater") |> dplyr::arrange(Level) |> dplyr::pull(Estimate) est_remapped <- fit_shuf$facets$others |> dplyr::filter(Facet == "Rater") |> dplyr::mutate(OrigLevel = rev_map[Level]) |> dplyr::arrange(OrigLevel) |> dplyr::pull(Estimate) expect_equal(unname(est_orig), unname(est_remapped), tolerance = 0.01) }) test_that("shuffling task labels preserves estimates after remapping", { d <- mfrmr:::sample_mfrm_data(seed = 200) fit_orig <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 100)) shuffle <- c(T1 = "TaskD", T2 = "TaskB", T3 = "TaskA", T4 = "TaskC") d_shuf <- d |> dplyr::mutate(Task = shuffle[Task]) fit_shuf <- suppressWarnings(fit_mfrm(d_shuf, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 100)) rev_map <- c(TaskD = "T1", TaskB = "T2", TaskA = "T3", TaskC = "T4") est_orig <- fit_orig$facets$others |> dplyr::filter(Facet == "Task") |> dplyr::arrange(Level) |> dplyr::pull(Estimate) est_remapped <- fit_shuf$facets$others |> dplyr::filter(Facet == "Task") |> dplyr::mutate(OrigLevel = rev_map[Level]) |> dplyr::arrange(OrigLevel) |> dplyr::pull(Estimate) expect_equal(unname(est_orig), unname(est_remapped), tolerance = 0.01) }) test_that("shuffling criterion labels preserves estimates after remapping", { d <- mfrmr:::sample_mfrm_data(seed = 200) fit_orig <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 100)) shuffle <- c(C1 = "CritZ", C2 = "CritX", C3 = "CritY") d_shuf <- d |> dplyr::mutate(Criterion = shuffle[Criterion]) fit_shuf <- suppressWarnings(fit_mfrm(d_shuf, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 100)) rev_map <- c(CritZ = "C1", CritX = "C2", CritY = "C3") est_orig <- fit_orig$facets$others |> dplyr::filter(Facet == "Criterion") |> dplyr::arrange(Level) |> dplyr::pull(Estimate) est_remapped <- fit_shuf$facets$others |> dplyr::filter(Facet == "Criterion") |> dplyr::mutate(OrigLevel = rev_map[Level]) |> dplyr::arrange(OrigLevel) |> dplyr::pull(Estimate) expect_equal(unname(est_orig), unname(est_remapped), tolerance = 0.01) }) # --------------------------------------------------------------------------- # 1.5 Missing data handling # --------------------------------------------------------------------------- test_that("NA in person column: rows dropped, fit succeeds", { d <- mfrmr:::sample_mfrm_data(seed = 42) d$Person[1:5] <- NA fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)) expect_s3_class(fit, "mfrm_fit") expect_lt(nrow(fit$prep$data), nrow(d)) }) test_that("NA in score column: rows dropped, fit succeeds", { d <- mfrmr:::sample_mfrm_data(seed = 42) d$Score[10:20] <- NA fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)) expect_s3_class(fit, "mfrm_fit") expect_lt(nrow(fit$prep$data), nrow(d)) }) test_that("5% random NA insertion: fit still succeeds, prep data rows < original", { d <- mfrmr:::sample_mfrm_data(seed = 42) n_orig <- nrow(d) set.seed(999) na_idx <- sample(seq_len(n_orig), size = ceiling(0.05 * n_orig)) d$Score[na_idx] <- NA fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)) expect_s3_class(fit, "mfrm_fit") expect_lt(nrow(fit$prep$data), n_orig) }) test_that("all NA after filtering raises explicit error", { d <- mfrmr:::sample_mfrm_data(seed = 42) d$Score <- NA expect_error( suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)), "No valid observations" ) }) # --------------------------------------------------------------------------- # 1.6 Score category handling # --------------------------------------------------------------------------- test_that("unused intermediate category recoded to contiguous (keep_original=FALSE)", { d <- mfrmr:::sample_mfrm_data(seed = 42) # Only keep scores 1, 3, 5 (drop 2 and 4) d_gap <- d |> dplyr::filter(Score %in% c(1, 3, 5)) fit <- suppressWarnings(fit_mfrm(d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30, keep_original = FALSE)) expect_s3_class(fit, "mfrm_fit") # After recoding, the data should have contiguous scores observed_scores <- sort(unique(fit$prep$data$Score)) expected_contiguous <- seq(min(observed_scores), max(observed_scores)) expect_equal(observed_scores, expected_contiguous) }) test_that("non-consecutive score recoding is surfaced before estimation", { d <- mfrmr:::sample_mfrm_data(seed = 42) d_gap <- d |> dplyr::filter(Score %in% c(1, 3, 5)) warning_messages <- character(0) prep <- withCallingHandlers( mfrmr:::prepare_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", keep_original = FALSE ), warning = function(w) { warning_messages <<- c(warning_messages, conditionMessage(w)) invokeRestart("muffleWarning") } ) expect_true(any(grepl("were recoded internally to a contiguous scale", warning_messages, fixed = TRUE))) expect_equal(prep$score_map$OriginalScore, c(1, 3, 5)) expect_equal(prep$score_map$InternalScore, c(1, 2, 3)) }) test_that("keep_original=TRUE preserves original score codes", { d <- mfrmr:::sample_mfrm_data(seed = 42) # Only keep scores 1, 3, 5 d_gap <- d |> dplyr::filter(Score %in% c(1, 3, 5)) fit <- suppressWarnings(fit_mfrm(d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30, keep_original = TRUE)) expect_s3_class(fit, "mfrm_fit") observed_scores <- sort(unique(fit$prep$data$Score)) expect_true(all(observed_scores %in% c(1, 3, 5))) }) test_that("explicit rating_min/rating_max override auto-detection", { d <- mfrmr:::sample_mfrm_data(seed = 42) fit_auto <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)) fit_explicit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30, rating_min = 1, rating_max = 5)) # With the same data and explicit range matching the data, results should match expect_equal(fit_auto$summary$LogLik, fit_explicit$summary$LogLik, tolerance = 1e-10) }) test_that("explicit rating range preserves unused boundary categories", { d <- mfrmr:::sample_mfrm_data(seed = 42) d_gap <- d |> dplyr::filter(Score %in% 2:5) prep <- mfrmr:::prepare_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5 ) fit <- suppressWarnings(fit_mfrm( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30, rating_min = 1, rating_max = 5 )) ds <- describe_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5 ) ds_summary <- summary(ds) rs <- rating_scale_table(fit, drop_unused = FALSE) rs_drop <- rating_scale_table(fit, drop_unused = TRUE) fit_summary <- summary(fit) surface <- plot(fit, type = "ccc_surface", draw = FALSE, theta_points = 55) expect_equal(prep$rating_min, 1L) expect_equal(prep$rating_max, 5L) expect_equal(prep$score_map$OriginalScore, 1:5) expect_equal(prep$score_map$InternalScore, 1:5) expect_equal(prep$unused_score_categories, 1L) expect_equal(fit$config$n_cat, 5L) expect_equal(fit$prep$unused_score_categories, 1L) expect_true(1 %in% ds$score_distribution$Score) expect_equal(ds$score_distribution$RawN[ds$score_distribution$Score == 1], 0L) expect_match(paste(ds_summary$notes, collapse = " "), "zero-count") expect_equal(fit_summary$settings_overview$UnusedScoreCategories[1], "1") expect_equal(fit_summary$settings_overview$UnusedScoreCategoryType[1], "boundary") expect_match(paste(fit_summary$key_warnings, collapse = " "), "Unused boundary score") expect_true(1 %in% rs$category_table$Category) expect_equal(rs$category_table$Count[rs$category_table$Category == 1], 0) expect_false(1 %in% rs_drop$category_table$Category) expect_equal(rs_drop$summary$UnusedScoreCategories[1], "1") expect_true(any(grepl("Unused boundary score", rs_drop$caveats$Message, fixed = TRUE))) expect_true(all(c("ZeroCount", "UnusedCategoryType", "WeaklyIdentified", "CategoryCaveat") %in% names(rs$category_table))) expect_true(all(c("LowerCategory", "UpperCategory", "WeaklyIdentified", "ThresholdCaveat") %in% names(rs$threshold_table))) expect_true(isTRUE(rs$category_table$ZeroCount[rs$category_table$Category == 1])) expect_equal(rs$category_table$UnusedCategoryType[rs$category_table$Category == 1], "boundary") expect_true(any(rs$threshold_table$WeaklyIdentified, na.rm = TRUE)) expect_true(any(grepl("zero-count boundary", rs$threshold_table$ThresholdCaveat, fixed = TRUE))) expect_true(any(grepl("Unused boundary score", rs$caveats$Message, fixed = TRUE))) expect_true(rs$category_table$Percent[rs$category_table$Category == 2] > 0) expect_gt(sum(rs$category_table$ExpectedPercent, na.rm = TRUE), 99) expect_lt(sum(rs$category_table$ExpectedPercent, na.rm = TRUE), 101) expect_s3_class(surface, "mfrm_plot_data") expect_true("1" %in% surface$data$surface$Category) expect_false("6" %in% surface$data$surface$Category) expect_true(all(sort(unique(surface$data$surface$Category)) == as.character(1:5))) expect_true("category_support" %in% names(surface$data)) expect_identical( surface$data$category_support$ZeroObserved[surface$data$category_support$Category == "1"][1], TRUE ) expect_match( surface$data$interpretation_guide$Guidance[ surface$data$interpretation_guide$Topic == "Zero-frequency categories" ][1], "Retained categories with zero observed responses: 1", fixed = TRUE ) }) test_that("explicit rating range still collapses internal gaps unless original codes are requested", { d <- mfrmr:::sample_mfrm_data(seed = 42) d_gap <- d |> dplyr::filter(Score %in% c(1, 3, 5)) prep <- suppressWarnings(mfrmr:::prepare_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5 )) prep_keep <- mfrmr:::prepare_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5, keep_original = TRUE ) expect_equal(prep$rating_min, 1L) expect_equal(prep$rating_max, 3L) expect_equal(prep$score_map$OriginalScore, c(1L, 3L, 5L)) expect_equal(prep$score_map$InternalScore, 1:3) expect_equal(prep_keep$rating_min, 1L) expect_equal(prep_keep$rating_max, 5L) expect_equal(prep_keep$unused_score_categories, c(2L, 4L)) fit_keep <- suppressWarnings(fit_mfrm( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30, rating_min = 1, rating_max = 5, keep_original = TRUE )) fit_keep_summary <- summary(fit_keep) rs_keep <- rating_scale_table(fit_keep, drop_unused = FALSE) cs_keep <- category_structure_report(fit_keep) rs_keep_summary <- summary(rs_keep) cs_keep_summary <- summary(cs_keep) surface_keep <- plot(fit_keep, type = "ccc_surface", draw = FALSE, theta_points = 55) expect_equal(fit_keep_summary$settings_overview$UnusedScoreCategories[1], "2, 4") expect_equal(fit_keep_summary$settings_overview$UnusedScoreCategoryType[1], "internal") expect_match(paste(fit_keep_summary$key_warnings, collapse = " "), "Unused intermediate score") expect_true(all(rs_keep$category_table$WeaklyIdentified[rs_keep$category_table$Category %in% c(2, 4)])) expect_true(any(grepl("zero-count intermediate", rs_keep$threshold_table$ThresholdCaveat, fixed = TRUE))) expect_true(any(grepl("Unused intermediate score", rs_keep$caveats$Message, fixed = TRUE))) expect_true("caveats" %in% names(rs_keep_summary)) expect_true(any(grepl("Unused intermediate score", rs_keep_summary$caveats$Message, fixed = TRUE))) expect_true("caveats" %in% names(cs_keep_summary)) expect_true(any(grepl("Unused intermediate score", cs_keep_summary$caveats$Message, fixed = TRUE))) expect_true("WeaklyIdentified" %in% names(cs_keep$median_thresholds)) expect_true(any(cs_keep$median_thresholds$WeaklyIdentified, na.rm = TRUE)) expect_s3_class(surface_keep, "mfrm_plot_data") expect_true(all(as.character(1:5) %in% surface_keep$data$surface$Category)) expect_true(all(c("2", "4") %in% surface_keep$data$surface$Category)) expect_identical( surface_keep$data$category_support$ZeroObserved[surface_keep$data$category_support$Category == "2"][1], TRUE ) expect_match( surface_keep$data$interpretation_guide$Guidance[ surface_keep$data$interpretation_guide$Topic == "Zero-frequency categories" ][1], "2, 4", fixed = TRUE ) }) test_that("single intermediate gap is recoded by default and retained when requested", { d <- mfrmr:::sample_mfrm_data(seed = 42) d_gap <- d |> dplyr::filter(Score %in% c(1, 2, 4, 5)) prep <- suppressWarnings(mfrmr:::prepare_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5 )) prep_keep <- mfrmr:::prepare_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5, keep_original = TRUE ) expect_equal(prep$rating_min, 1L) expect_equal(prep$rating_max, 4L) expect_equal(prep$score_map$OriginalScore, c(1L, 2L, 4L, 5L)) expect_equal(prep$score_map$InternalScore, 1:4) expect_length(prep$unused_score_categories, 0L) expect_equal(prep_keep$rating_min, 1L) expect_equal(prep_keep$rating_max, 5L) expect_equal(prep_keep$unused_score_categories, 3L) ds <- suppressWarnings(describe_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5 )) ds_keep <- describe_mfrm_data( d_gap, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5, keep_original = TRUE ) ds_summary <- summary(ds) ds_keep_summary <- summary(ds_keep) expect_true(any(ds_summary$caveats$Condition == "score_categories_recoded")) expect_false(any(ds_summary$caveats$Condition == "zero_count_intermediate_score_category")) expect_true(3 %in% ds_keep$score_distribution$Score) expect_equal(ds_keep$score_distribution$RawN[ds_keep$score_distribution$Score == 3], 0L) expect_true(any(ds_keep_summary$caveats$Condition == "zero_count_intermediate_score_category")) expect_true(any(ds_keep_summary$caveats$CategoryType == "internal")) expect_true(any(grepl("Unused intermediate score", ds_keep_summary$caveats$Message, fixed = TRUE))) }) test_that("score-support caveats cover upper-boundary and mixed gaps", { d <- mfrmr:::sample_mfrm_data(seed = 42) d_upper <- d |> dplyr::filter(Score %in% 1:4) d_mixed <- d |> dplyr::filter(Score %in% c(2, 4)) prep_upper <- mfrmr:::prepare_mfrm_data( d_upper, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5 ) ds_upper <- describe_mfrm_data( d_upper, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5 ) ds_upper_summary <- summary(ds_upper) expect_equal(prep_upper$unused_score_categories, 5L) expect_true(5 %in% ds_upper$score_distribution$Score) expect_equal(ds_upper$score_distribution$RawN[ds_upper$score_distribution$Score == 5], 0L) expect_true(any(ds_upper_summary$caveats$Condition == "zero_count_boundary_score_category")) expect_true(any(grepl("5", ds_upper_summary$caveats$Categories, fixed = TRUE))) prep_mixed <- mfrmr:::prepare_mfrm_data( d_mixed, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5, keep_original = TRUE ) ds_mixed <- describe_mfrm_data( d_mixed, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 5, keep_original = TRUE ) ds_mixed_summary <- summary(ds_mixed) expect_equal(prep_mixed$unused_score_categories, c(1L, 3L, 5L)) expect_true(any(ds_mixed_summary$caveats$Condition == "zero_count_boundary_score_category")) expect_true(any(ds_mixed_summary$caveats$Condition == "zero_count_intermediate_score_category")) expect_true(any(ds_mixed_summary$caveats$CategoryType == "boundary")) expect_true(any(ds_mixed_summary$caveats$CategoryType == "internal")) }) test_that("explicit rating range rejects observed categories outside support", { d <- mfrmr:::sample_mfrm_data(seed = 42) expect_error( mfrmr:::prepare_mfrm_data( d, "Person", c("Rater", "Task", "Criterion"), "Score", rating_min = 1, rating_max = 4 ), "outside the supplied rating range" ) }) # --------------------------------------------------------------------------- # 1.7 Constraint (anchor) settings # --------------------------------------------------------------------------- test_that("anchor R2=0 forces Rater R2 estimate to zero", { d <- mfrmr:::sample_mfrm_data(seed = 42) anchor_tbl <- data.frame( Facet = "Rater", Level = "R2", Anchor = 0, stringsAsFactors = FALSE ) fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 60, anchors = anchor_tbl)) r2_est <- fit$facets$others |> dplyr::filter(Facet == "Rater", Level == "R2") |> dplyr::pull(Estimate) expect_equal(unname(r2_est), 0, tolerance = 1e-8) }) test_that("invalid anchor (non-existent level) with warn policy triggers warning", { d <- mfrmr:::sample_mfrm_data(seed = 42) anchor_tbl <- data.frame( Facet = "Rater", Level = "R99_NONEXISTENT", Anchor = 0, stringsAsFactors = FALSE ) expect_warning( fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 60, anchors = anchor_tbl, anchor_policy = "warn"), regex = "Anchor audit" ) }) test_that("group anchors constrain group mean to target", { d <- mfrmr:::sample_mfrm_data(seed = 42) # Group R1 and R3 together with target mean = 0 group_tbl <- data.frame( Facet = c("Rater", "Rater"), Level = c("R1", "R3"), Group = c("G1", "G1"), GroupValue = c(0, 0), stringsAsFactors = FALSE ) fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 60, group_anchors = group_tbl)) r1_est <- fit$facets$others |> dplyr::filter(Facet == "Rater", Level == "R1") |> dplyr::pull(Estimate) r3_est <- fit$facets$others |> dplyr::filter(Facet == "Rater", Level == "R3") |> dplyr::pull(Estimate) group_mean <- mean(c(r1_est, r3_est)) expect_equal(group_mean, 0, tolerance = 1e-4) }) # --------------------------------------------------------------------------- # 1.8 Weight handling # --------------------------------------------------------------------------- test_that("zero-weight rows are excluded from estimation", { d <- mfrmr:::sample_mfrm_data(seed = 42) d$Weight <- 1 d$Weight[1:50] <- 0 fit_wt <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30, weight = "Weight")) expect_lt(nrow(fit_wt$prep$data), nrow(d)) }) test_that("explicit weight column produces different results from unit weights", { d <- mfrmr:::sample_mfrm_data(seed = 42) set.seed(777) d$Weight <- sample(c(1, 2, 3), nrow(d), replace = TRUE) fit_unit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40)) fit_wt <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 40, weight = "Weight")) # LogLik should differ when weights are non-uniform expect_false(isTRUE(all.equal(fit_unit$summary$LogLik, fit_wt$summary$LogLik, tolerance = 1e-4))) }) # --------------------------------------------------------------------------- # 1.9 Edge cases # --------------------------------------------------------------------------- test_that("duplicate column names in data raise error with 'duplicate'", { d <- mfrmr:::sample_mfrm_data(seed = 42) d2 <- cbind(d, Rater = d$Rater) # This data.frame now has two columns named "Rater" expect_error( suppressWarnings(fit_mfrm(d2, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 20)), regex = "[Dd]uplic" ) }) test_that("large number of facet levels (20+ persons) does not crash", { set.seed(1234) n_person <- 25 d_large <- expand.grid( Person = paste0("P", sprintf("%02d", seq_len(n_person))), Rater = paste0("R", 1:2), Task = paste0("T", 1:2), stringsAsFactors = FALSE ) d_large$Score <- sample(1:3, nrow(d_large), replace = TRUE) fit <- suppressWarnings(fit_mfrm(d_large, "Person", c("Rater", "Task"), "Score", method = "JML", maxit = 30)) expect_s3_class(fit, "mfrm_fit") expect_equal(nrow(fit$facets$person), n_person) }) test_that("non-numeric score column (character) is coerced correctly", { d <- mfrmr:::sample_mfrm_data(seed = 42) d$Score <- as.character(d$Score) fit <- suppressWarnings(fit_mfrm(d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 30)) expect_s3_class(fit, "mfrm_fit") expect_true(is.integer(fit$prep$data$Score) || is.numeric(fit$prep$data$Score)) }) test_that("non-numeric score or weight entries are surfaced before rows are dropped", { d <- mfrmr:::sample_mfrm_data(seed = 42) d$Score[1] <- "bad-score" d$Weight <- 1 d$Weight[2] <- "bad-weight" warning_messages <- character(0) prep <- withCallingHandlers( mfrmr:::prepare_mfrm_data(d, "Person", c("Rater", "Task", "Criterion"), "Score", weight_col = "Weight"), warning = function(w) { warning_messages <<- c(warning_messages, conditionMessage(w)) invokeRestart("muffleWarning") } ) expect_true(any(grepl("`Score` contained", warning_messages, fixed = TRUE))) expect_true(any(grepl("`Weight` contained", warning_messages, fixed = TRUE))) expect_equal(nrow(prep$data), nrow(d) - 2L) }) test_that("fractional score inputs are rejected before integer coercion", { d <- mfrmr:::sample_mfrm_data(seed = 42) d$Score[1] <- 1.5 d$Score[2] <- 3.25 expect_error( fit_mfrm( d, "Person", c("Rater", "Task", "Criterion"), "Score", method = "JML", maxit = 10 ), "`Score` must contain ordered integer category codes", fixed = TRUE ) }) test_that("prepare_mfrm_data returns expected structure", { d <- mfrmr:::sample_mfrm_data(seed = 42) prep <- mfrmr:::prepare_mfrm_data(d, "Person", c("Rater", "Task", "Criterion"), "Score") expect_true(is.list(prep)) expect_true("data" %in% names(prep)) expect_true("levels" %in% names(prep)) expect_true("facet_names" %in% names(prep)) expect_true("rating_min" %in% names(prep)) expect_true("rating_max" %in% names(prep)) expect_equal(prep$facet_names, c("Rater", "Task", "Criterion")) expect_true("Person" %in% names(prep$levels)) expect_true("Rater" %in% names(prep$levels)) }) test_that("duplicate person/facets/score arguments raise error", { d <- mfrmr:::sample_mfrm_data(seed = 42) expect_error( mfrmr:::prepare_mfrm_data(d, "Person", c("Person", "Task"), "Score"), regex = "[Dd]uplic" ) })