# Test suite for check functions test_that("check_text returns tibble with correct structure", { text <- "t(28) = 2.21, p = .035, d = 0.80" result <- check_text(text) expect_true(tibble::is_tibble(result)) if (nrow(result) > 0) { expect_true("test_type" %in% names(result)) expect_true("status" %in% names(result)) expect_true("uncertainty_level" %in% names(result)) } }) test_that("check_text handles empty text", { result <- check_text("") expect_true(tibble::is_tibble(result)) expect_equal(nrow(result), 0) }) test_that("check_text detects t-test", { text <- "t(28) = 2.21, p = .035, d = 0.80" result <- check_text(text) if (nrow(result) > 0) { expect_equal(result$test_type[1], "t") } }) test_that("check_text detects F-test", { text <- "F(2, 27) = 4.56, p < .05" result <- check_text(text) if (nrow(result) > 0) { expect_equal(result$test_type[1], "F") } }) test_that("check_text detects correlation", { text <- "r(198) = .34, p < .001" result <- check_text(text) if (nrow(result) > 0) { expect_equal(result$test_type[1], "r") } }) test_that("check_text computes effect sizes for t-test", { text <- "t(28) = 2.21, d = 0.80, N = 30" result <- check_text(text) if (nrow(result) > 0) { # Should compute d_ind or dz has_effect <- !is.na(result$d_ind[1]) || !is.na(result$dz[1]) expect_true(has_effect) } }) test_that("check_text computes ANOVA effect sizes", { text <- "F(2, 27) = 4.56, η² = 0.25" result <- check_text(text) if (nrow(result) > 0) { # Should compute eta² variants has_eta2 <- !is.na(result$eta2[1]) || !is.na(result$partial_eta2[1]) expect_true(has_eta2) } }) test_that("check_text assigns status correctly", { text <- "t(28) = 2.21, d = 0.80, N = 30, n1 = 15, n2 = 15" result <- check_text(text, tol_effect = list(d = 0.02)) if (nrow(result) > 0) { expect_true(result$status[1] %in% c("PASS", "OK", "NOTE", "WARN", "ERROR", "INSUFFICIENT_DATA")) } }) test_that("check_text tracks uncertainty", { text <- "t(28) = 2.21, d = 0.80" result <- check_text(text) if (nrow(result) > 0) { expect_true("uncertainty_level" %in% names(result)) expect_true(result$uncertainty_level[1] %in% c("low", "medium", "high")) } }) test_that("check_text includes design_inferred", { text <- "t(28) = 2.21, d = 0.80" result <- check_text(text) if (nrow(result) > 0) { expect_true("design_inferred" %in% names(result)) } }) test_that("check_text includes variants_tested", { text <- "t(28) = 2.21, d = 0.80" result <- check_text(text) if (nrow(result) > 0) { expect_true("variants_tested" %in% names(result)) } }) test_that("check_files handles multiple files", { # Create temporary test files file1 <- tempfile(fileext = ".txt") file2 <- tempfile(fileext = ".txt") writeLines("t(28) = 2.21, d = 0.80", file1) writeLines("r(50) = 0.34, p < .05", file2) result <- check_files(c(file1, file2)) expect_true(tibble::is_tibble(result)) expect_true("source" %in% names(result)) # Cleanup unlink(c(file1, file2)) }) # =========================================================================== # 5-level status system tests (PASS / OK / NOTE / WARN / ERROR) # =========================================================================== test_that("OK status for p-value-only check with consistent direction", { # F-test with no effect size, but p-value consistent text <- "F(2, 27) = 4.56, p = .02" result <- check_text(text) if (nrow(result) > 0) { expect_true(result$status[1] %in% c("PASS", "OK", "NOTE", "WARN", "ERROR")) } }) test_that("OK status for p < .001 inequality when computed p also < .001", { # Large t-value ensures computed p < .001 text <- "t(28) = 5.50, p < .001" result <- check_text(text) if (nrow(result) > 0) { # No effect size reported, p < .001 consistent -> should be OK expect_true(result$status[1] %in% c("OK", "NOTE", "PASS")) } }) test_that("r-test with consistent p-value gets OK status", { # r IS the effect size, p should be consistent text <- "r(198) = .34, p < .001" result <- check_text(text) if (nrow(result) > 0) { # r-test with consistent p should be OK or PASS expect_true(result$status[1] %in% c("OK", "PASS", "NOTE")) } }) test_that("NOTE status for ambiguous design with matching effect", { # Ambiguous design (no clear paired/independent context) but effect matches text <- "t(28) = 2.21, d = 0.80" result <- check_text(text) if (nrow(result) > 0) { # Should be PASS, NOTE, or WARN depending on delta expect_true(result$status[1] %in% c("PASS", "OK", "NOTE", "WARN", "ERROR")) } }) test_that("p_symbol column is available in parsed output", { text <- "t(28) = 2.21, p < .001" parsed <- parse_text(text) expect_true("p_symbol" %in% names(parsed)) if (nrow(parsed) > 0) { expect_equal(parsed$p_symbol[1], "<") } }) test_that("p_symbol captures = sign", { text <- "t(28) = 2.21, p = .035" parsed <- parse_text(text) if (nrow(parsed) > 0) { expect_equal(parsed$p_symbol[1], "=") } }) # =========================================================================== # Welch t-test and thousands-separator bug fixes # =========================================================================== test_that("thousands-separator N is parsed correctly (not truncated)", { # Bug: "N = 1,182" was parsed as N=1 due to decimal comma conversion text <- "N = 1,182. t(403.8) = -3.15, p < .001, d = -.31" result <- check_text(text) expect_true(nrow(result) >= 1) # N should be 1182, NOT 1 expect_equal(result$N[1], 1182) # d_ind should NOT be garbage (-3.15 from N=1); with N=1182 it's ~-0.18 if (!is.na(result$d_ind_equalN[1])) { expect_true(abs(result$d_ind_equalN[1]) < 1) # Not garbage expect_true(abs(result$d_ind_equalN[1]) > 0.05) # Not degenerate } }) test_that("Welch t-test with correct N gives PASS", { # Use the actual correct N for this t-test text <- "N = 406. t(403.8) = -3.15, p < .001, d = -.31" result <- check_text(text) expect_true(nrow(result) >= 1) if (!is.na(result$d_ind_equalN[1])) { expect_true(abs(result$d_ind_equalN[1] - (-0.31)) < 0.05) } expect_true(result$status[1] %in% c("PASS", "NOTE")) }) test_that("Welch t-test infers N from df when N missing", { text <- "t(403.8) = -3.15, p < .001, d = -.31" result <- check_text(text) expect_true(nrow(result) >= 1) # Should infer N ~ 406-413 from df=403.8 and/or reported d if (!is.na(result$N[1])) { expect_true(result$N[1] >= 400 && result$N[1] <= 420) } }) test_that("Welch t-test back-computes N from reported d", { # t=-3.15, d=-0.31 -> N = 4*9.9225/0.0961 ~ 413 text <- "t(403.8) = -3.15, p = .001, d = -.31" result <- check_text(text) expect_true(nrow(result) >= 1) # Should get reasonable effect size match if (!is.na(result$d_ind_equalN[1])) { expect_true(abs(abs(result$d_ind_equalN[1]) - 0.31) < 0.05) } }) test_that("parse_text correctly extracts N with thousands separator", { text <- "N = 1,182 participants were tested. t(403.8) = -3.15, p < .001, d = -.31" result <- parse_text(text) expect_true(nrow(result) >= 1) # N should be 1182, NOT 1 expect_equal(result$N[1], 1182) }) test_that("dz_from_t returns NA for n < 2", { expect_true(is.na(effectcheck:::dz_from_t(2.5, 1))) expect_true(is.na(effectcheck:::dz_from_t(2.5, 0))) expect_true(is.na(effectcheck:::dz_from_t(2.5, -1))) # n=2 should still work expect_equal(effectcheck:::dz_from_t(2.0, 2), 2.0 / sqrt(2), tolerance = 1e-7) })