context("check if inconsistencies/errors are correctly classified")

# test if the following cases are correctly identified as errors --------------

# check classification of regular errors in all types of tests
test_that("simple errors are classified as such", {
  txt1 <- "t(28) = 2.20, p = .03"
  txt2 <- "F(2, 28) = 2.20, p = .15"
  txt3 <- "r(28) = .22, p = .26"
  txt4 <- "chi2(28) = 22.20, p = .79"
  txt5 <- " z = 2.20, p = .04"
  txt6 <- "Q(28) = 22.20, p = .79"

  expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt3, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt4, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt5, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]])
})

# classify p-values of negative test statistics correctly
test_that("p-values of negative tests are correctly classified", {
  txt1 <- " Z = -2.42, p = 0.016" # no error
  txt2 <- "t(28) = -2.20, p = .03" # error
  
  expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  
})

# classify inexactly reported p-values correctly
test_that("inexactly reported p-values are correctly classified",{
  txt1 <- "t(28) = 2.20, ns"
  txt2 <- "t(28) = 2.20, p > .05"
  txt3 <- "t(28) = 2.0, p < .05"
  
  expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  
  expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]])
})

# also classify decision errors as errors
test_that("decision errors are also classified as errors",{
  txt1 <- "t(28) = 1.20, p = .03"
  txt2 <- "t(28) = 2.20, p = .30"

  expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
})

# test if the following cases are correctly identified as correct -------------

# correct rounding
test_that("correctly rounded p-values are not considered errors", {
  txt1 <- "t(28) = 2, p = .02"
  txt2 <- "t(28) = 2, p = .14"
  txt3 <- "t(28) = 2.2, p = .03" # rounded lower bound p-value
  txt4 <- "t(28) = 2.2, p = .04"
  txt5 <- "t(28) = 2.20, p = .036"
  txt6 <- "t(28) = 2.20, p = .037"
  
  expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt4, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt5, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt6, messages = FALSE)[[VAR_ERROR]])
})

# test if different arguments concerning errors work --------------------------

# OneTailedTests: assume all tests are one-tailed
test_that("OneTailedTests considers everything as one-tailed", {
  txt1 <- "t(28) = 2.20, p = .02"
  txt2 <- "t(28) = 2.20, p = .04"
  txt3 <- "this test is one-tailed: t(28) = 2.20, p = .02, but this one is not: t(28) = 2.20, p = .04"

  expect_false(statcheck(txt1, messages = FALSE,  OneTailedTests = TRUE)[[VAR_ERROR]])
  expect_true(statcheck(txt2, messages = FALSE, OneTailedTests = TRUE)[[VAR_ERROR]])
  expect_equal(statcheck(txt3, messages = FALSE, OneTailedTests = TRUE)[[VAR_ERROR]], c(FALSE, TRUE))
})

# OneTailedTxt: automated detection of one-tailed test in text
test_that("automated one-tailed test detection works", {
  txt1 <- "t(28) = 2.20, p = .018"
  txt2 <- "t(28) = 2.20, p = .01, one-tailed"
  txt3 <- "t(28) = 2.20, p = .018, one-tailed"
  txt4 <- "t(28) = 2.20, p = .018, one-sided"
  txt5 <- "t(28) = 2.20, p = .018, directional"

  # don't correct for one-tailed testing here
  expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt1, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]])
  expect_true(statcheck(txt2, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]])
  expect_true(statcheck(txt3, messages = FALSE)[[VAR_ERROR]])

  # correct for one-tailed testing here
  expect_false(statcheck(txt3, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]])
  expect_false(statcheck(txt4, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]])
  expect_false(statcheck(txt5, messages = FALSE, OneTailedTxt = TRUE)[[VAR_ERROR]])
  
  # check that p-values were corrected in these cases
  p_1tail <- pt(2.20, 28, lower.tail = FALSE)
  expect_equal(statcheck(c(txt3, txt4, txt5), messages = FALSE, 
                         OneTailedTxt = TRUE)[[VAR_COMPUTED_P]], rep(p_1tail, 3))
})

# pZeroError: check if p = .000 is counted as an inconsistency or not
test_that("you can adapt whether p = .000 is counted as inconsistent or not", {
  txt1 <- "t(28) = 22.20, p = .000"
  txt2 <- "t(28) = 22.20, p < .000" # this is always an Error

  expect_true(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt1, messages = FALSE, pZeroError = FALSE)[[VAR_ERROR]])

  expect_true(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt2, messages = FALSE, pZeroError = FALSE)[[VAR_ERROR]])
})

# test classifications of (in)exact test statistcs and (in)exact p-values ----

# test statistics exactly reported 
test_that("cases where t = ... are correctly classified", {
  
  # calculate range of correct p-values
  lowp <- pt(2.25, 28, lower.tail = FALSE)*2
  upp <- pt(2.15, 28, lower.tail = FALSE)*2
  
  # correct
  txt1 <- "t(28) = 2.2, p = .036" # correct
  txt2 <- "t(28) = 2.2, p < .08"  # correct
  txt3 <- "t(28) = 2.2, p > .02"  # correct
  
  # error
  txt4 <- paste("t(28) = 2.2, p >", upp)  # error
  txt5 <- paste("t(28) = 2.2, p <", lowp) # error
  
  txt6 <- "t(28) = 2.2, p = .08"  # error
  txt7 <- "t(28) = 2.2, p = .02"  # error
  txt8 <- "t(28) = 2.2, p > .08"  # error
  txt9 <- "t(28) = 2.2, p < .02"  # error
  
  expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]])
  
  expect_true(statcheck(txt4, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt5, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt7, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt8, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt9, messages = FALSE)[[VAR_ERROR]])
})


# test statistic reported as <
test_that("cases where t < ... are correctly classified", {
  
  # calculate range of correct p-values
  lowp <- pt(2.25, 28, lower.tail = FALSE)*2
  upp <- pt(2.15, 28, lower.tail = FALSE)*2
  
  # correct
  txt1 <- paste("t(28) < 2.20, p >", upp)
  txt2 <- "t(28) < 2.2, p = .08"
  txt3 <- "t(28) < 2.2, p > .08"
  txt4 <- "t(28) < 2.2, p < .08"
  txt5 <- "t(28) < 2.2, p > .02"
  
  # error
  txt6 <- paste("t(28) < 2.2, p =", lowp)
  txt7 <- paste("t(28) < 2.2, p <", lowp)
  txt8 <- "t(28) < 2.2, p < .02"
  txt9 <- "t(28) < 2.2, p = .02"
  
  expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt4, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt5, messages = FALSE)[[VAR_ERROR]])
  
  expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt7, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt8, messages = FALSE)[[VAR_ERROR]])
  expect_true(statcheck(txt9, messages = FALSE)[[VAR_ERROR]])
  
})

# test statistic reported as >
test_that("cases where t > ... are correctly classified", {
  
  # calculate range of correct p-values
  lowp <- pt(2.25, 28, lower.tail = FALSE)*2
  upp <- pt(2.15, 28, lower.tail = FALSE)*2
  
  # correct
  txt1 <- paste("t(28) > 2.20, p <", upp)
  txt2 <- "t(28) > 2.2, p = .02"
  txt3 <- "t(28) > 2.2, p > .02"
  txt4 <- "t(28) > 2.2, p < .02"
  txt5 <- "t(28) > 2.2, p < .08"
  
  # error
  txt6 <- paste("t(28) > 2.2, p =", upp)
  txt7 <- paste("t(28) > 2.2, p >", upp)
  txt8 <- "t(28) > 2.2, p > .08"
  txt9 <- "t(28) > 2.2, p = .08"
  
  expect_false(statcheck(txt1, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt2, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt3, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt4, messages = FALSE)[[VAR_ERROR]])
  expect_false(statcheck(txt5, messages = FALSE)[[VAR_ERROR]])
  
  expect_true(statcheck(txt6, messages = FALSE)[[VAR_ERROR]]) 
  expect_true(statcheck(txt7, messages = FALSE)[[VAR_ERROR]]) 
  expect_true(statcheck(txt8, messages = FALSE)[[VAR_ERROR]]) 
  expect_true(statcheck(txt9, messages = FALSE)[[VAR_ERROR]])
  
})