# Tests for bugs reported by Innsbruck Credibility Hackathon (2026-03-19) # =========================================================================== # Bug 1: Section numbers parsed as p-values # Text like "p = .548).\n\n3.3. Further analyses" should NOT extract p = 3.3 # =========================================================================== test_that("Section numbers are not parsed as p-values (Bug 1)", { text <- "F(6, 1913) = 0.83, p = .548).\n\n3.3. Further analyses showed" result <- check_text(text) expect_true(nrow(result) > 0) expect_equal(result$test_type[1], "F") # p_reported should be 0.548, NOT 3.3 expect_true(!is.na(result$p_reported[1]), info = "p_reported should not be NA") expect_equal(result$p_reported[1], 0.548, tolerance = 0.001, info = paste("Expected p=0.548, got p=", result$p_reported[1])) }) test_that("Section numbers don't corrupt p-values across line breaks (Bug 1 variant)", { text <- "t(45) = 2.10, p = .041.\n\n3.1. Discussion of results" result <- check_text(text) expect_true(nrow(result) > 0) expect_equal(result$p_reported[1], 0.041, tolerance = 0.001) }) test_that("Legitimate p-value line breaks still work after Bug 1 fix", { # This is the case the original regex was designed for text <- "t(28) = 2.21, p =\n0.035, d = 0.80" result <- check_text(text) expect_true(nrow(result) > 0) expect_equal(result$p_reported[1], 0.035, tolerance = 0.001, info = "p-value across line break should still be joined") }) # =========================================================================== # Bug 2: Wrong N used for phi in chi-square # Chi-square with inline N should use that N, not a global/context N # =========================================================================== test_that("Chi-square uses inline N for phi computation (Bug 2)", { text <- "Study 1 involved N = 572 participants.\n\nStudy 2 Results: chi2(1, N = 204) = 4.36, p = .037, phi = .15." result <- check_text(text) # Should find at least the chi-square test chi_rows <- result[result$test_type == "chisq", ] expect_true(nrow(chi_rows) > 0, info = "Should detect chi-square test") # N should be 204 (from inline), NOT 572 (from Study 1) expect_equal(chi_rows$N[1], 204, info = paste("Expected N=204 (inline), got N=", chi_rows$N[1])) # phi = sqrt(chi2/N) = sqrt(4.36/204) = 0.1462 # Reported phi = 0.15, so delta should be small expect_true(!is.na(chi_rows$phi[1]), info = "phi should be computed") expected_phi <- sqrt(4.36 / 204) expect_equal(chi_rows$phi[1], expected_phi, tolerance = 0.01, info = paste("phi should be computed with N=204, got:", chi_rows$phi[1])) }) test_that("Chi-square without inline N still falls back to context (Bug 2 non-regression)", { text <- "With N = 100 participants, chi2(2) = 8.73, p = .013" result <- check_text(text) chi_rows <- result[result$test_type == "chisq", ] expect_true(nrow(chi_rows) > 0) expect_equal(chi_rows$N[1], 100, info = "Should fall back to context N when no inline N") }) test_that("Chi-square inline N with thousands separator (Bug 2 edge case)", { text <- "chi2(1, N = 1,204) = 5.12, p = .024" result <- check_text(text) chi_rows <- result[result$test_type == "chisq", ] expect_true(nrow(chi_rows) > 0) expect_equal(chi_rows$N[1], 1204, info = paste("Expected N=1204, got N=", chi_rows$N[1])) }) # =========================================================================== # Bug 3: "ns" triggers WARN instead of OK # "ns" / "n.s." should be recognized as "not significant" # =========================================================================== test_that("'ns' is recognized as not significant and gives OK (Bug 3)", { text <- "F(1, 99) = .86, ns." result <- check_text(text) expect_true(nrow(result) > 0) expect_equal(result$test_type[1], "F") # p_computed for F(1,99) = 0.86 should be ~0.356 (not significant) expect_true(!is.na(result$p_computed[1])) expect_true(result$p_computed[1] > 0.05, info = paste("p_computed should be > 0.05, got:", result$p_computed[1])) # Status should be OK (both agree: not significant) expect_equal(result$status[1], "OK", info = paste("Expected OK for ns with p > .05, got:", result$status[1])) }) test_that("'n.s.' variant is also recognized (Bug 3)", { text <- "t(50) = 1.20, n.s." result <- check_text(text) expect_true(nrow(result) > 0) # p_computed for t(50) = 1.20 is ~0.236 (not significant) expect_equal(result$status[1], "OK", info = paste("Expected OK for n.s., got:", result$status[1])) }) test_that("'ns' with significant computed p gives WARN (Bug 3 decision error)", { text <- "F(1, 99) = 5.50, ns." result <- check_text(text) expect_true(nrow(result) > 0) # p_computed for F(1,99) = 5.50 is ~0.021 (significant) expect_true(result$p_computed[1] < 0.05, info = paste("p_computed should be < 0.05, got:", result$p_computed[1])) # Status should be WARN (decision error: paper says ns but p < .05) expect_equal(result$status[1], "WARN", info = paste("Expected WARN for ns with significant p, got:", result$status[1])) expect_true(result$decision_error[1], info = "Should flag decision error") }) test_that("Numeric p-value takes precedence over ns (Bug 3 non-regression)", { # When both numeric p and ns are present, numeric should win text <- "F(1, 99) = .86, p = .356, ns." result <- check_text(text) expect_true(nrow(result) > 0) # Should use the numeric p-value expect_equal(result$p_reported[1], 0.356, tolerance = 0.001, info = "Numeric p should take precedence over ns") })