context("check if inconsistencies/errors are correctly classified") # test if the following cases are correctly identified as errors -------------- # check classification of regular errors in all types of tests test_that("simple errors are classified as such", { txt1 <- "t(28) = 2.20, p = .03" txt2 <- "F(2, 28) = 2.20, p = .15" txt3 <- "r(28) = .22, p = .26" txt4 <- "chi2(28) = 22.20, p = .79" txt5 <- " z = 2.20, p = .04" txt6 <- "Q(28) = 22.20, p = .79" expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt3, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt4, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt5, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]]) }) # classify p-values of negative test statistics correctly test_that("p-values of negative tests are correctly classified", { txt1 <- " Z = -2.42, p = 0.016" # no error txt2 <- "t(28) = -2.20, p = .03" # error expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) }) # classify inexactly reported p-values correctly test_that("inexactly reported p-values are correctly classified",{ txt1 <- "t(28) = 2.20, ns" txt2 <- "t(28) = 2.20, p > .05" txt3 <- "t(28) = 2.0, p < .05" expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]]) }) # also classify decision errors as errors test_that("decision errors are also classified as errors",{ txt1 <- "t(28) = 1.20, p = .03" txt2 <- "t(28) = 2.20, p = .30" expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) }) # test if the following cases are correctly identified as correct ------------- # correct rounding test_that("correctly rounded p-values are not considered errors", { txt1 <- "t(28) = 2, p = .02" txt2 <- "t(28) = 2, p = .14" txt3 <- "t(28) = 2.2, p = .03" # rounded lower bound p-value txt4 <- "t(28) = 2.2, p = .04" txt5 <- "t(28) = 2.20, p = .036" txt6 <- "t(28) = 2.20, p = .037" expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt4, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt5, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt6, messages = FALSE)[[VAR_ERROR]]) }) # test if different arguments concerning errors work -------------------------- # OneTailedTests: assume all tests are one-tailed test_that("OneTailedTests considers everything as one-tailed", { txt1 <- "t(28) = 2.20, p = .02" txt2 <- "t(28) = 2.20, p = .04" txt3 <- "this test is one-tailed: t(28) = 2.20, p = .02, but this one is not: t(28) = 2.20, p = .04" expect_false(statcheck(txt1, messages = FALSE, OneTailedTests = TRUE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE, OneTailedTests = TRUE)[[VAR_ERROR]]) expect_equal(statcheck(txt3, messages = FALSE, OneTailedTests = TRUE)[[VAR_ERROR]], c(FALSE, TRUE)) }) # OneTailedTxt: automated detection of one-tailed test in text test_that("automated one-tailed test detection works", { txt1 <- "t(28) = 2.20, p = .018" txt2 <- "t(28) = 2.20, p = .01, one-tailed" txt3 <- "t(28) = 2.20, p = .018, one-tailed" txt4 <- "t(28) = 2.20, p = .018, one-sided" txt5 <- "t(28) = 2.20, p = .018, directional" # don't correct for one-tailed testing here expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt1, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]]) expect_true(statcheck(txt3, messages = FALSE)[[VAR_ERROR]]) # correct for one-tailed testing here expect_false(statcheck(txt3, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]]) expect_false(statcheck(txt4, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]]) expect_false(statcheck(txt5, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]]) # check that p-values were corrected in these cases p_1tail <- pt(2.20, 28, lower.tail = FALSE) expect_equal(statcheck(c(txt3, txt4, txt5), messages = FALSE, OneTailedTxt = TRUE)[[VAR_COMPUTED_P]], rep(p_1tail, 3)) }) # pZeroError: check if p = .000 is counted as an inconsistency or not test_that("you can adapt whether p = .000 is counted as inconsistent or not", { txt1 <- "t(28) = 22.20, p = .000" txt2 <- "t(28) = 22.20, p < .000" # this is always an Error expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt1, messages = FALSE, pZeroError = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt2, messages = FALSE, pZeroError = FALSE)[[VAR_ERROR]]) }) # test classifications of (in)exact test statistcs and (in)exact p-values ---- # test statistics exactly reported test_that("cases where t = ... are correctly classified", { # calculate range of correct p-values lowp <- pt(2.25, 28, lower.tail = FALSE)*2 upp <- pt(2.15, 28, lower.tail = FALSE)*2 # correct txt1 <- "t(28) = 2.2, p = .036" # correct txt2 <- "t(28) = 2.2, p < .08" # correct txt3 <- "t(28) = 2.2, p > .02" # correct # error txt4 <- paste("t(28) = 2.2, p >", upp) # error txt5 <- paste("t(28) = 2.2, p <", lowp) # error txt6 <- "t(28) = 2.2, p = .08" # error txt7 <- "t(28) = 2.2, p = .02" # error txt8 <- "t(28) = 2.2, p > .08" # error txt9 <- "t(28) = 2.2, p < .02" # error expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt4, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt5, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt7, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt8, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt9, messages = FALSE)[[VAR_ERROR]]) }) # test statistic reported as < test_that("cases where t < ... are correctly classified", { # calculate range of correct p-values lowp <- pt(2.25, 28, lower.tail = FALSE)*2 upp <- pt(2.15, 28, lower.tail = FALSE)*2 # correct txt1 <- paste("t(28) < 2.20, p >", upp) txt2 <- "t(28) < 2.2, p = .08" txt3 <- "t(28) < 2.2, p > .08" txt4 <- "t(28) < 2.2, p < .08" txt5 <- "t(28) < 2.2, p > .02" # error txt6 <- paste("t(28) < 2.2, p =", lowp) txt7 <- paste("t(28) < 2.2, p <", lowp) txt8 <- "t(28) < 2.2, p < .02" txt9 <- "t(28) < 2.2, p = .02" expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt4, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt5, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt7, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt8, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt9, messages = FALSE)[[VAR_ERROR]]) }) # test statistic reported as > test_that("cases where t > ... are correctly classified", { # calculate range of correct p-values lowp <- pt(2.25, 28, lower.tail = FALSE)*2 upp <- pt(2.15, 28, lower.tail = FALSE)*2 # correct txt1 <- paste("t(28) > 2.20, p <", upp) txt2 <- "t(28) > 2.2, p = .02" txt3 <- "t(28) > 2.2, p > .02" txt4 <- "t(28) > 2.2, p < .02" txt5 <- "t(28) > 2.2, p < .08" # error txt6 <- paste("t(28) > 2.2, p =", upp) txt7 <- paste("t(28) > 2.2, p >", upp) txt8 <- "t(28) > 2.2, p > .08" txt9 <- "t(28) > 2.2, p = .08" expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt4, messages = FALSE)[[VAR_ERROR]]) expect_false(statcheck(txt5, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt7, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt8, messages = FALSE)[[VAR_ERROR]]) expect_true(statcheck(txt9, messages = FALSE)[[VAR_ERROR]]) })