# Edge case tests for dataFormat function # Tests for unusual/abnormal input patterns # ============================================================ # Phase 1: ID column identification # ============================================================ test_that("id=NULL auto-detects numeric consecutive IDs (1:N)", { set.seed(201) data <- data.frame( id = 1:15, item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) result <- dataFormat(data) expect_equal(ncol(result$Q), 2) expect_equal(result$ID, as.character(1:15)) }) test_that("id=1 explicitly treats column 1 as ID", { set.seed(202) data <- data.frame( id = 1:15, item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) result <- dataFormat(data, id = 1) expect_equal(ncol(result$Q), 2) expect_equal(result$ID, as.character(1:15)) }) test_that("id=NULL auto-detects string IDs in first column", { set.seed(203) data <- data.frame( student = paste0("S", 1:15), item1 = sample(0:1, 15, replace = TRUE), item2 = sample(0:1, 15, replace = TRUE) ) result <- dataFormat(data) expect_equal(ncol(result$U), 2) expect_equal(result$ID, paste0("S", 1:15)) }) test_that("id=NULL generates auto IDs when first column is response data + message", { set.seed(204) # First column has duplicates and looks like response data data <- data.frame( item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE), item3 = sample(1:4, 15, replace = TRUE) ) expect_message(result <- dataFormat(data), "No ID column detected") expect_equal(ncol(result$Q), 3) expect_equal(result$ID[1], "Student1") }) test_that("explicit id for non-first column works", { set.seed(205) data <- data.frame( item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE), student = paste0("S", 1:15) ) result <- dataFormat(data, id = 3) expect_equal(ncol(result$Q), 2) expect_equal(result$ID, paste0("S", 1:15)) }) test_that("NA in ID column causes error (duplicated NAs)", { data <- data.frame( id = c(paste0("S", 1:13), NA, NA), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) expect_error(dataFormat(data, id = 1), "Duplicated IDs found") }) test_that("id column number exceeds ncol causes error", { data <- data.frame( item1 = sample(0:1, 15, replace = TRUE), item2 = sample(0:1, 15, replace = TRUE) ) expect_error(dataFormat(data, id = 5), "ID column number exceeds") }) test_that("rownames lost by unclass: auto-generated IDs used instead", { # NOTE: as.data.frame(unclass(data)) on line 61 resets rownames to 1:n. # The rownames-as-ID path is effectively dead code in the current implementation. set.seed(206) data <- data.frame( item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) rownames(data) <- paste0("Subj", 1:15) expect_message(result <- dataFormat(data), "No ID column detected") # Rownames are lost, so auto-generated IDs are used expect_equal(result$ID[1], "Student1") }) # ============================================================ # Phase 2: Response type detection # ============================================================ test_that("CA provided without response.type → auto-detect as rated", { set.seed(211) data <- data.frame( id = paste0("S", 1:20), item1 = sample(1:4, 20, replace = TRUE), item2 = sample(1:4, 20, replace = TRUE), item3 = sample(1:4, 20, replace = TRUE) ) result <- dataFormat(data, CA = c(2, 3, 1)) expect_equal(result$response.type, "rated") expect_true(!is.null(result$U)) expect_true(!is.null(result$Q)) expect_equal(result$CA, c(2, 3, 1)) }) test_that("CA with numeric ID (1:N) → rated with correct ID detection", { data <- data.frame( id = 1:15, item1 = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3), item2 = c(2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1) ) result <- dataFormat(data, CA = c(3, 2)) expect_equal(result$response.type, "rated") expect_equal(ncol(result$U), 2) expect_equal(ncol(result$Q), 2) }) test_that("rated U matrix correctly scores against CA", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1), item2 = c(3, 3, 3, 1, 1, 1, 2, 2, 2, 3) ) result <- dataFormat(data, CA = c(2, 3)) # item1: CA=2 → S2,S5,S8 correct expect_equal(result$U[, 1], c(0, 1, 0, 0, 1, 0, 0, 1, 0, 0)) # item2: CA=3 → S1,S2,S3,S10 correct expect_equal(result$U[, 2], c(1, 1, 1, 0, 0, 0, 0, 0, 0, 1)) }) test_that("rated U matrix preserves missing as -1", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, NA, 3, 1, 2, 3, 1, 2, 3, 1), item2 = c(3, 3, NA, 1, 1, 1, 2, 2, 2, 3) ) result <- dataFormat(data, CA = c(2, 3)) expect_equal(result$U[2, 1], -1) # NA in item1 → -1 in U expect_equal(result$U[3, 2], -1) # NA in item2 → -1 in U # Non-missing values still scored correctly expect_equal(result$U[1, 1], 0) # 1 != CA(2) expect_equal(result$U[1, 2], 1) # 3 == CA(3) }) test_that("CA length mismatch causes error", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) expect_error(dataFormat(data, CA = c(1, 2, 3)), "length of CA must match") }) test_that("CA value not in data causes error", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:3, 15, replace = TRUE), item2 = sample(1:3, 15, replace = TRUE) ) expect_error(dataFormat(data, CA = c(5, 1)), "not a valid response category") }) test_that("2-category non-binary (1/2) without response.type causes error", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:2, 15, replace = TRUE), item2 = sample(1:2, 15, replace = TRUE) ) expect_error(dataFormat(data), "2-category data with non-binary values") }) test_that("2-category non-binary with explicit response.type works", { set.seed(213) data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:2, 15, replace = TRUE), item2 = sample(1:2, 15, replace = TRUE) ) result <- dataFormat(data, response.type = "ordinal") expect_equal(result$response.type, "ordinal") }) test_that(">=20 categories without response.type causes error", { set.seed(214) data <- data.frame( id = paste0("S", 1:100), item1 = sample(1:25, 100, replace = TRUE), item2 = sample(1:25, 100, replace = TRUE) ) expect_error(dataFormat(data), "Too many categories") }) test_that(">=20 categories with explicit response.type works", { set.seed(215) data <- data.frame( id = paste0("S", 1:100), item1 = sample(1:25, 100, replace = TRUE), item2 = sample(1:25, 100, replace = TRUE) ) result <- dataFormat(data, response.type = "ordinal") expect_equal(result$response.type, "ordinal") }) test_that("binary 0/1 auto-detected correctly", { set.seed(216) data <- data.frame( id = paste0("S", 1:20), item1 = sample(0:1, 20, replace = TRUE), item2 = sample(0:1, 20, replace = TRUE) ) result <- dataFormat(data) expect_equal(result$response.type, "binary") expect_true(!is.null(result$U)) }) test_that("binary data with CA still becomes rated", { set.seed(217) data <- data.frame( id = paste0("S", 1:20), item1 = sample(0:1, 20, replace = TRUE), item2 = sample(0:1, 20, replace = TRUE) ) result <- dataFormat(data, CA = c(1, 1)) expect_equal(result$response.type, "binary") # Binary check runs first, CA check only for non-binary }) test_that("explicit response.type overrides auto-detection", { set.seed(218) data <- data.frame( id = paste0("S", 1:20), item1 = sample(1:5, 20, replace = TRUE), item2 = sample(1:5, 20, replace = TRUE) ) result <- dataFormat(data, response.type = "nominal") expect_equal(result$response.type, "nominal") }) test_that("invalid response.type causes error", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(0:1, 15, replace = TRUE) ) expect_error(dataFormat(data, response.type = "invalid")) }) # ============================================================ # Size anomalies # ============================================================ test_that("fewer than 10 rows causes error", { data <- data.frame( id = paste0("S", 1:5), item1 = c(0, 1, 0, 1, 0), item2 = c(1, 0, 1, 0, 1) ) expect_error(dataFormat(data), "at least 10 rows") }) test_that("exactly 10 rows works (boundary)", { set.seed(221) data <- data.frame( id = paste0("S", 1:10), item1 = sample(0:1, 10, replace = TRUE), item2 = sample(0:1, 10, replace = TRUE) ) result <- dataFormat(data) expect_s3_class(result, "exametrika") expect_equal(nrow(result$U), 10) }) test_that("0 rows causes error", { data <- data.frame( id = character(0), item1 = numeric(0), item2 = numeric(0) ) expect_error(dataFormat(data), "at least 10 rows") }) test_that("single item after ID removal works", { set.seed(222) data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:4, 15, replace = TRUE) ) result <- dataFormat(data) expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 1) }) # ============================================================ # Variance anomalies (sd = NA or 0) # ============================================================ test_that("all items constant value → warns zero variance", { data <- data.frame( id = paste0("S", 1:15), item1 = rep(3, 15), item2 = rep(5, 15), item3 = rep(1, 15) ) expect_message(result <- dataFormat(data), "zero variance") expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 3) # Not excluded, but warned }) test_that("all-NA column → warns all missing values", { set.seed(231) data <- data.frame( id = paste0("S", 1:15), item1 = rep(NA, 15), item2 = sample(1:4, 15, replace = TRUE), item3 = sample(1:4, 15, replace = TRUE) ) expect_message(result <- dataFormat(data), "all missing values") expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 3) # All 3 columns remain expect_true(all(result$Q[, 1] == -1)) # item1 is all -1 }) test_that("all items all NA → warns all missing + all students missing", { data <- data.frame( id = paste0("S", 1:15), item1 = rep(NA, 15), item2 = rep(NA, 15) ) expect_message( expect_message(result <- dataFormat(data), "all missing values"), "all missing responses" ) expect_s3_class(result, "exametrika") expect_true(all(result$Q == -1)) expect_true(all(result$Z == 0)) }) test_that("column with Inf triggers sd=NA → excluded", { set.seed(232) data <- data.frame( id = paste0("S", 1:15), item1 = c(Inf, sample(1:4, 14, replace = TRUE)), # sd=NA due to Inf item2 = sample(1:4, 15, replace = TRUE), item3 = sample(1:4, 15, replace = TRUE) ) expect_message(result <- dataFormat(data, response.type = "nominal"), "no variance") expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 2) # item1 excluded expect_false("item1" %in% result$ItemLabel) }) # ============================================================ # Missing value handling # ============================================================ test_that("na=NA is handled (treated as NULL)", { set.seed(241) data <- data.frame( id = paste0("S", 1:15), item1 = sample(c(1, 2, 3, NA), 15, replace = TRUE), item2 = sample(c(1, 2, 3, NA), 15, replace = TRUE) ) result <- dataFormat(data, na = NA) expect_s3_class(result, "exametrika") }) test_that("custom na value converted to -1 in response matrix", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, 2, 99, 1, 2, 3, 1, 2, 3, 1), item2 = c(3, 99, 2, 1, 2, 3, 1, 99, 3, 2) ) result <- dataFormat(data, na = 99) expect_equal(unname(result$Q[1, 1]), 1) expect_equal(unname(result$Q[3, 1]), -1) # 99 → -1 expect_equal(unname(result$Z[3, 1]), 0) # missing indicator }) test_that("Z matrix overrides response data for missing", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1), item2 = c(3, 3, 2, 1, 2, 3, 1, 2, 3, 2) ) Z <- matrix(1, nrow = 10, ncol = 2) Z[1, 1] <- 0 # Force item1 of student 1 to missing Z[5, 2] <- 0 # Force item2 of student 5 to missing result <- dataFormat(data, Z = Z) expect_equal(unname(result$Q[1, 1]), -1) expect_equal(unname(result$Q[5, 2]), -1) expect_equal(unname(result$Z[1, 1]), 0) expect_equal(unname(result$Z[5, 2]), 0) }) test_that("Z matrix with non-0/1 values causes error", { data <- data.frame( id = paste0("S", 1:10), item1 = sample(1:3, 10, replace = TRUE), item2 = sample(1:3, 10, replace = TRUE) ) Z_bad <- matrix(c(0, 1, 2, 1), nrow = 10, ncol = 2) expect_error(dataFormat(data, Z = Z_bad), "must contain only 0 or 1") }) # ============================================================ # rated without response.type (regression test for the bug fix) # ============================================================ test_that("rated auto-detection: ordinal-looking data + CA → rated", { set.seed(251) data <- data.frame( id = paste0("S", 1:20), item1 = sample(1:5, 20, replace = TRUE), item2 = sample(1:5, 20, replace = TRUE), item3 = sample(1:5, 20, replace = TRUE) ) result <- dataFormat(data, CA = c(3, 1, 5)) expect_equal(result$response.type, "rated") expect_true(!is.null(result$U)) expect_true(all(result$U[result$Q != -1] %in% c(0, 1))) }) test_that("rated auto-detection: nominal-looking data + CA → rated", { set.seed(252) data <- data.frame( id = paste0("S", 1:20), item1 = sample(c(1, 5, 10), 20, replace = TRUE), item2 = sample(c(2, 7, 15), 20, replace = TRUE) ) result <- dataFormat(data, CA = c(5, 7)) expect_equal(result$response.type, "rated") }) # ============================================================ # Weight vector anomalies # ============================================================ test_that("default weight is all 1s matching number of items", { set.seed(261) data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:3, 15, replace = TRUE), item2 = sample(1:3, 15, replace = TRUE), item3 = sample(1:3, 15, replace = TRUE) ) result <- dataFormat(data) expect_equal(result$w, c(1, 1, 1)) }) test_that("custom weight vector applied correctly", { set.seed(262) data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:3, 15, replace = TRUE), item2 = sample(1:3, 15, replace = TRUE) ) result <- dataFormat(data, w = c(2, 0.5)) expect_equal(result$w, c(2, 0.5)) }) # ============================================================ # Factor / CategoryLabel handling # ============================================================ test_that("factor response columns converted to numeric with labels preserved", { set.seed(271) data <- data.frame( id = paste0("S", 1:15), item1 = factor(sample(c("A", "B", "C"), 15, replace = TRUE)), item2 = factor(sample(c("X", "Y", "Z"), 15, replace = TRUE)) ) result <- dataFormat(data, response.type = "nominal") expect_s3_class(result, "exametrika") expect_true(!is.null(result$CategoryLabel)) expect_true(all(result$Q[result$Q != -1] > 0)) # numeric, not character }) test_that("mixed factor and numeric columns handled", { set.seed(272) data <- data.frame( id = paste0("S", 1:15), item1 = factor(sample(c("Low", "Mid", "High"), 15, replace = TRUE), levels = c("Low", "Mid", "High") ), item2 = sample(1:3, 15, replace = TRUE) ) result <- dataFormat(data, response.type = "ordinal") expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 2) }) # ============================================================ # Matrix input (no column names, no ID) # ============================================================ test_that("matrix without colnames gets V1/V2/V3 labels from as.data.frame", { set.seed(281) mat <- matrix(sample(0:1, 15 * 3, replace = TRUE), nrow = 15, ncol = 3) expect_message(result <- dataFormat(mat), "No ID column detected") expect_s3_class(result, "exametrika") expect_equal(result$response.type, "binary") # as.data.frame(unclass()) on unnamed matrix gives "V1", "V2", ... expect_equal(result$ItemLabel, c("V1", "V2", "V3")) expect_equal(result$ID[1], "Student1") }) test_that("matrix with colnames preserves item labels", { set.seed(282) mat <- matrix(sample(1:4, 20 * 2, replace = TRUE), nrow = 20, ncol = 2) colnames(mat) <- c("Q1", "Q2") result <- dataFormat(mat) expect_s3_class(result, "exametrika") expect_equal(result$ItemLabel, c("Q1", "Q2")) }) # ============================================================ # Already formatted data (early return) # ============================================================ test_that("already formatted exametrika object returned as-is", { set.seed(291) data <- data.frame( id = paste0("S", 1:15), item1 = sample(0:1, 15, replace = TRUE), item2 = sample(0:1, 15, replace = TRUE) ) result1 <- dataFormat(data) result2 <- dataFormat(result1) expect_identical(result1, result2) }) # ============================================================ # Regression: id default change (NULL instead of 1) # ============================================================ test_that("existing code without id arg still works (backward compat)", { set.seed(301) # String ID in first column - should auto-detect data1 <- data.frame( student = paste0("S", 1:20), item1 = sample(0:1, 20, replace = TRUE), item2 = sample(0:1, 20, replace = TRUE) ) result1 <- dataFormat(data1) expect_equal(ncol(result1$U), 2) expect_equal(result1$ID, paste0("S", 1:20)) # Matrix input - should auto-detect no ID set.seed(302) mat <- matrix(sample(0:1, 15 * 4, replace = TRUE), nrow = 15, ncol = 4) result2 <- dataFormat(mat) expect_equal(ncol(result2$U), 4) # Non-first column ID - should work with explicit id set.seed(303) data3 <- data.frame( item1 = sample(1:3, 15, replace = TRUE), student = paste0("S", 1:15), item2 = sample(1:3, 15, replace = TRUE) ) result3 <- dataFormat(data3, id = 2) expect_equal(ncol(result3$Q), 2) expect_equal(result3$ID, paste0("S", 1:15)) }) # ============================================================ # Non-consecutive numeric IDs (e.g., 101, 205, 300) # ============================================================ test_that("non-consecutive numeric IDs auto-detected", { set.seed(311) data <- data.frame( id = seq(101, 115), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) result <- dataFormat(data) expect_equal(ncol(result$Q), 2) expect_equal(result$ID, as.character(101:115)) }) # ============================================================ # categories field correctness # ============================================================ test_that("categories counts unique non-missing values per item", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2), # 4 categories item2 = c(1, 1, 2, 2, 3, 3, 1, 1, 2, 2) # 3 categories ) result <- dataFormat(data) expect_equal(unname(result$categories[1]), 4) expect_equal(unname(result$categories[2]), 3) }) test_that("categories ignores missing values", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, 2, NA, 3, 1, 2, NA, 3, 1, 2), # 3 non-missing categories (1, 2, 3) item2 = c(1, 2, 1, 3, 3, 2, 1, 1, 3, NA) # 3 non-missing categories (1, 2, 3) ) result <- dataFormat(data) # Note: NA→-1, so categories includes -1 as a "category" in current implementation # categories counts unique(x[x != -1]) so -1 is excluded expect_equal(unname(result$categories[1]), 3) expect_equal(unname(result$categories[2]), 3) }) # ============================================================ # Absurd / adversarial input patterns # ============================================================ # --- Wrong data types passed to data parameter --- test_that("list input coerced to data.frame (as.data.frame handles it)", { # list with 10-element vectors becomes a 10x2 data.frame result <- dataFormat(list( a = sample(0:1, 10, replace = TRUE), b = sample(0:1, 10, replace = TRUE) )) expect_s3_class(result, "exametrika") }) test_that("NULL input causes error", { expect_error(dataFormat(NULL)) }) test_that("single vector coerced to 1-column data.frame", { # A 10-element vector becomes a 10x1 data.frame result <- dataFormat(c(1, 0, 1, 0, 1, 0, 1, 0, 1, 0)) expect_s3_class(result, "exametrika") expect_equal(result$response.type, "binary") }) test_that("truly invalid input: string scalar", { expect_error(dataFormat("not a dataset")) }) # --- Weird values in response data --- test_that("negative values in response data", { set.seed(401) data <- data.frame( id = paste0("S", 1:15), item1 = sample(c(-2, -1, 0, 1, 2), 15, replace = TRUE), item2 = sample(c(-2, -1, 0, 1, 2), 15, replace = TRUE) ) # Negative values: auto-detect should fail or classify as nominal # -1 is the missing indicator internally, so this is problematic result <- dataFormat(data, response.type = "nominal") expect_s3_class(result, "exametrika") }) test_that("decimal/float values in response data", { data <- data.frame( id = paste0("S", 1:15), item1 = c(1.5, 2.7, 3.1, 1.5, 2.7, 3.1, 1.5, 2.7, 3.1, 1.5, 2.7, 3.1, 1.5, 2.7, 3.1), item2 = c(0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3) ) # Non-integer values: auto ordinal detection should fail (not integers) # Should fall through to nominal result <- dataFormat(data) expect_s3_class(result, "exametrika") expect_equal(result$response.type, "nominal") }) test_that("Inf values in response data → item excluded by sd=NA", { data <- data.frame( id = paste0("S", 1:15), item1 = c(1, 2, Inf, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), item2 = c(3, 2, 1, 3, 2, 1, 3, 2, 1, 3, 2, 1, 3, 2, 1) ) # Inf causes sd=NA → item excluded with message expect_message(result <- dataFormat(data, response.type = "nominal"), "no variance") expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 1) # item1 excluded }) test_that("NaN values in response data treated as NA", { data <- data.frame( id = paste0("S", 1:15), item1 = c(1, 2, NaN, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), item2 = c(3, 2, 1, 3, 2, 1, 3, 2, 1, 3, 2, 1, 3, 2, 1) ) result <- dataFormat(data) expect_s3_class(result, "exametrika") expect_equal(unname(result$Z[3, 1]), 0) # NaN → NA → missing }) test_that("very large numbers in response data", { set.seed(402) data <- data.frame( id = paste0("S", 1:15), item1 = sample(c(1000000, 2000000, 3000000), 15, replace = TRUE), item2 = sample(c(1000000, 2000000, 3000000), 15, replace = TRUE) ) result <- dataFormat(data, response.type = "nominal") expect_s3_class(result, "exametrika") }) # --- ID column weirdness --- test_that("ID column with all NAs (auto-detect) → auto-ID + warns all missing", { data <- data.frame( id = rep(NA, 15), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) # All NA in first column: not consecutive, not string → auto-ID generated # The all-NA column also triggers "all missing values" warning expect_message(result <- dataFormat(data), "No ID column detected") expect_s3_class(result, "exametrika") }) test_that("ID column with empty strings", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:3, 15, replace = TRUE), item2 = sample(1:3, 15, replace = TRUE), stringsAsFactors = FALSE ) data$id[5] <- "" result <- dataFormat(data, id = 1) expect_s3_class(result, "exametrika") expect_equal(result$ID[5], "") }) test_that("ID column with whitespace strings", { data <- data.frame( id = c(paste0("S", 1:13), " ", " "), item1 = sample(1:3, 15, replace = TRUE), item2 = sample(1:3, 15, replace = TRUE), stringsAsFactors = FALSE ) # " " and " " are different strings, no duplicate result <- dataFormat(data, id = 1) expect_s3_class(result, "exametrika") }) test_that("all same ID value causes duplicate error with row numbers", { data <- data.frame( id = rep("SAME", 15), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) expect_error(dataFormat(data, id = 1), "Duplicated IDs found") expect_error(dataFormat(data, id = 1), "rows") # Row numbers shown }) test_that("duplicate IDs error shows which rows are duplicated", { data <- data.frame( id = c( "A", "B", "C", "A", "D", "E", "B", "F", "G", "H", "I", "J", "K", "L", "M" ), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) expect_error(dataFormat(data, id = 1), "'A' \\(rows 1,4\\)") expect_error(dataFormat(data, id = 1), "'B' \\(rows 2,7\\)") }) test_that("first column with many unique values triggers ID hint", { set.seed(420) # First column has 12 unique values out of 15 rows (80%) but has duplicates # → not consecutive, not detected as ID → auto-generate # But 80% unique is suspicious, so a hint message is shown data <- data.frame( subject = c( 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 101, 102, 103 ), # 12 unique out of 15 item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) expect_message(dataFormat(data), "No ID column detected") expect_message(dataFormat(data), "first column.*unique values") }) test_that("factor ID with few levels (< 20) auto-detected as response", { set.seed(403) data <- data.frame( group = factor(sample(c("A", "B", "C"), 15, replace = TRUE)), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) # Factor with < 20 unique values: not detected as ID result <- dataFormat(data) expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 3) # All 3 columns as response }) # --- CA parameter weirdness --- test_that("CA as character vector causes error or is coerced", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:3, 15, replace = TRUE), item2 = sample(1:3, 15, replace = TRUE) ) # CA should be numeric; character "2" should be coerced result <- dataFormat(data, CA = c("2", "1")) expect_equal(result$response.type, "rated") }) test_that("CA=NULL with response.type='rated' causes error", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:3, 15, replace = TRUE), item2 = sample(1:3, 15, replace = TRUE) ) expect_error( dataFormat(data, response.type = "rated", CA = NULL), "Correct Answer must be specified" ) }) test_that("CA with named vector works", { data <- data.frame( id = paste0("S", 1:15), item1 = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3), item2 = c(3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1) ) result <- dataFormat(data, CA = c(item1 = 2, item2 = 3)) expect_equal(result$response.type, "rated") expect_equal(as.numeric(result$CA), c(2, 3)) }) # --- na parameter weirdness --- test_that("na=0 treats 0 as missing (ambiguous with binary)", { data <- data.frame( id = paste0("S", 1:15), item1 = c(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2), item2 = c(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1) ) result <- dataFormat(data, na = 0) # 0 should be treated as missing expect_equal(unname(result$Z[1, 1]), 0) # item1[1] = 0 → missing expect_equal(unname(result$Q[1, 1]), -1) }) test_that("na=-999 custom sentinel value", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, 2, -999, 3, 1, 2, -999, 3, 1, 2), item2 = c(3, -999, 1, 2, 3, -999, 1, 2, 3, 1) ) result <- dataFormat(data, na = -999) expect_equal(unname(result$Q[3, 1]), -1) expect_equal(unname(result$Q[2, 2]), -1) expect_equal(unname(result$Z[3, 1]), 0) }) # --- Data with more columns than rows --- test_that("wide data (more items than students)", { set.seed(405) n_items <- 30 n_students <- 12 mat <- matrix(sample(0:1, n_students * n_items, replace = TRUE), nrow = n_students, ncol = n_items ) colnames(mat) <- paste0("Q", 1:n_items) result <- dataFormat(mat) expect_s3_class(result, "exametrika") expect_equal(ncol(result$U), n_items) expect_equal(nrow(result$U), n_students) }) # --- Extremely sparse data --- test_that("mostly NA data (>90% missing) → NA becomes -1, still works", { data <- data.frame( id = paste0("S", 1:20), item1 = c(1, rep(NA, 19)), item2 = c(rep(NA, 19), 2), item3 = c(NA, 3, rep(NA, 18)) ) # After NA→-1, each column has {value, -1, -1, ...}: sd > 0, not excluded result <- dataFormat(data) expect_s3_class(result, "exametrika") expect_equal(ncol(result$Q), 3) }) test_that("sparse but enough variance survives", { set.seed(407) data <- data.frame( id = paste0("S", 1:15), item1 = c(1, 2, 3, rep(NA, 12)), # 3 non-NA, has variance item2 = c(rep(NA, 12), 3, 4, 5), # 3 non-NA, has variance item3 = sample(1:3, 15, replace = TRUE) # fully observed ) result <- dataFormat(data) expect_s3_class(result, "exametrika") }) # --- Duplicate column names --- test_that("duplicate column names in data.frame", { set.seed(408) data <- data.frame( id = paste0("S", 1:15), x = sample(1:3, 15, replace = TRUE), x = sample(1:3, 15, replace = TRUE), check.names = FALSE ) # R allows duplicate column names with check.names=FALSE result <- dataFormat(data) expect_s3_class(result, "exametrika") }) # --- Column names with special characters --- test_that("column names with spaces and special chars → make.names applied", { set.seed(409) data <- data.frame( id = paste0("S", 1:15), `item 1` = sample(1:4, 15, replace = TRUE), `item-2` = sample(1:4, 15, replace = TRUE), check.names = FALSE ) result <- dataFormat(data, id = 1) expect_s3_class(result, "exametrika") # as.data.frame(unclass()) applies make.names: "item 1" → "item.1" expect_true("item.1" %in% result$ItemLabel) }) # --- Mixed binary and ordinal columns --- test_that("mix of binary and ordinal items → ordinal overall", { set.seed(410) data <- data.frame( id = paste0("S", 1:20), item1 = sample(0:1, 20, replace = TRUE), # binary item2 = sample(0:1, 20, replace = TRUE), # binary item3 = sample(1:5, 20, replace = TRUE), # ordinal item4 = sample(1:5, 20, replace = TRUE) # ordinal ) result <- dataFormat(data) # Not all binary → should be ordinal or nominal expect_true(result$response.type %in% c("ordinal", "nominal")) }) # --- Boolean/logical data --- test_that("logical TRUE/FALSE data detected as binary", { set.seed(411) data <- data.frame( id = paste0("S", 1:15), item1 = sample(c(TRUE, FALSE), 15, replace = TRUE), item2 = sample(c(TRUE, FALSE), 15, replace = TRUE) ) result <- dataFormat(data) expect_equal(result$response.type, "binary") expect_true(all(result$U[result$U != -1] %in% c(0, 1))) }) # --- Factor ID column with many levels (>= 20) looks like ID --- test_that("factor ID with >= 20 levels detected as ID", { set.seed(412) data <- data.frame( student = factor(paste0("Student_", 1:25)), item1 = sample(1:4, 25, replace = TRUE), item2 = sample(1:4, 25, replace = TRUE) ) result <- dataFormat(data) expect_equal(ncol(result$Q), 2) expect_equal(length(result$ID), 25) }) # --- Factor with unused levels --- test_that("factor with unused levels handled correctly", { set.seed(413) data <- data.frame( id = paste0("S", 1:15), item1 = factor(sample(c("A", "B"), 15, replace = TRUE), levels = c("A", "B", "C", "D") ), # C and D unused item2 = factor(sample(c("X", "Y"), 15, replace = TRUE), levels = c("X", "Y", "Z") ) # Z unused ) result <- dataFormat(data, response.type = "nominal") expect_s3_class(result, "exametrika") }) # --- Tibble-like input (data.frame subclass) --- test_that("data.frame subclass still works", { set.seed(414) data <- data.frame( id = paste0("S", 1:15), item1 = sample(0:1, 15, replace = TRUE), item2 = sample(0:1, 15, replace = TRUE) ) class(data) <- c("tbl_df", "tbl", "data.frame") result <- dataFormat(data) expect_s3_class(result, "exametrika") }) # --- rated: all correct or all incorrect --- test_that("rated: all students answer correctly → U all 1s, warns zero variance", { data <- data.frame( id = paste0("S", 1:10), item1 = rep(3, 10), item2 = rep(2, 10) ) expect_message(result <- dataFormat(data, CA = c(3, 2)), "zero variance") expect_equal(result$response.type, "rated") expect_true(all(result$U == 1)) }) test_that("rated: almost no student answers correctly", { data <- data.frame( id = paste0("S", 1:10), item1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 2), # CA=2, only S10 correct item2 = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 1) # CA=1, only S10 correct ) result <- dataFormat(data, CA = c(2, 1)) expect_equal(result$response.type, "rated") expect_equal(sum(result$U[, 1] == 1), 1) # Only 1 correct for item1 expect_equal(sum(result$U[, 2] == 1), 1) # Only 1 correct for item2 }) # --- Z matrix edge cases --- test_that("Z matrix all zeros → all -1, warns all missing", { set.seed(415) data <- data.frame( id = paste0("S", 1:10), item1 = sample(1:3, 10, replace = TRUE), item2 = sample(1:3, 10, replace = TRUE) ) Z_all_zero <- matrix(0, nrow = 10, ncol = 2) expect_message(result <- dataFormat(data, Z = Z_all_zero), "all missing") expect_s3_class(result, "exametrika") expect_true(all(result$Q == -1)) expect_true(all(result$Z == 0)) }) # --- response.type explicitly set to binary but data is not --- test_that("binary response.type with non-binary data causes error", { data <- data.frame( id = paste0("S", 1:15), item1 = sample(1:4, 15, replace = TRUE), item2 = sample(1:4, 15, replace = TRUE) ) expect_error( dataFormat(data, response.type = "binary"), "can only contain the values 0, 1" ) }) # --- Multiple issues combined --- test_that("numeric ID + CA + NA all at once", { data <- data.frame( id = 1:15, item1 = c(1, NA, 3, 2, 1, 3, 2, NA, 1, 3, 2, 1, 3, 2, 1), item2 = c(NA, 2, 1, 3, 2, 1, NA, 3, 2, 1, 3, 2, 1, 3, 2), item3 = c(2, 3, 1, NA, 2, 3, 1, 2, NA, 3, 1, 2, 3, 1, 2) ) result <- dataFormat(data, CA = c(2, 3, 1)) expect_equal(result$response.type, "rated") expect_equal(ncol(result$U), 3) expect_equal(ncol(result$Q), 3) # Check missing preserved in U expect_equal(result$U[2, 1], -1) # NA → -1 expect_equal(result$U[1, 2], -1) # NA → -1 # Check correct scoring expect_equal(result$U[1, 1], 0) # 1 != CA(2) expect_equal(result$U[3, 2], 0) # 1 != CA(3) expect_equal(result$U[4, 1], 1) # 2 == CA(2) }) test_that("custom na + CA + explicit id", { data <- data.frame( item1 = c(1, 99, 3, 2, 1, 3, 2, 99, 1, 3, 2, 1, 3, 2, 1), student = paste0("S", 1:15), item2 = c(99, 2, 1, 3, 2, 1, 99, 3, 2, 1, 3, 2, 1, 3, 2) ) result <- dataFormat(data, na = 99, id = 2, CA = c(2, 3)) expect_equal(result$response.type, "rated") expect_equal(result$ID, paste0("S", 1:15)) expect_equal(unname(result$Q[2, 1]), -1) # 99 → -1 expect_equal(unname(result$U[2, 1]), -1) # missing preserved in U }) # --- Regression: sample datasets still work --- test_that("package dataset J5S10 loads and formats correctly", { data(J5S10, package = "exametrika") result <- dataFormat(J5S10) expect_s3_class(result, "exametrika") expect_equal(result$response.type, "binary") }) test_that("package dataset J35S500 loads and formats correctly", { data(J35S500, package = "exametrika") result <- dataFormat(J35S500) expect_s3_class(result, "exametrika") expect_equal(result$response.type, "ordinal") }) test_that("package dataset J20S600 loads and formats correctly", { data(J20S600, package = "exametrika") result <- dataFormat(J20S600) expect_s3_class(result, "exametrika") expect_equal(result$response.type, "nominal") })