library(polmineR)
use(pkg = "RcppCWB", corpus = "REUTERS")
testthat::context("hits")

test_that(
  "hits method",
  {
    y <- hits("REUTERS", query = "oil")
    expect_equal(as.data.table(y)[["count"]], count("REUTERS", query = "oil")[["count"]])
    
    y <- hits("REUTERS", query = "oil", s_attribute = "places", freq = TRUE)
    expect_equal(y@stat[places == "argentina"][["count"]], 1L)

    p <- partition("REUTERS", places = "saudi-arabia", regex = TRUE)
    y <- hits(p, query = "oil")
    expect_equal(as.data.table(y)[["count"]], count(p, query = "oil")[["count"]])
    
    y <- hits(p, query = "oil", s_attribute = "id")
    expect_equal(y@stat[id == "242"][["count"]], count(partition("REUTERS", id = "242"), query = "oil")[["count"]])
  }
)

test_that(
  "hits for nested scenario",
  {
    skip_if_not(use("GermaParl2"))

    # we want to ensure that the order is independent from the order of 
    # subsetting operations
    
    hits1 <- corpus("GERMAPARL2MINI") |>
      subset(p_type == "speech") %>% 
      subset(speaker_party %in% c("CDU", "CSU", "SPD")) |>
      split(s_attribute = "speaker_party", verbose = FALSE) %>% 
      hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>% 
      as.data.table()
    
    hits2 <- corpus("GERMAPARL2MINI") |>
      subset(speaker_party %in% c("CDU", "CSU", "SPD")) |>
      split(s_attribute = "speaker_party") %>% 
      subset(p_type == "speech", verbose = FALSE) %>% 
      hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>% 
      as.data.table()
    
    testthat::expect_identical(hits1, hits2)
    
    hits3 <- corpus("GERMAPARL2MINI") |>
      subset(speaker_party == "CDU") |>
      subset(p_type == "speech", verbose = FALSE) %>% 
      hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>% 
      as.data.table()
    
    hits4 <- corpus("GERMAPARL2MINI") |>
      subset(p_type == "speech") %>% 
      subset(speaker_party == "CDU") |>
      hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>% 
      as.data.table()
    
    testthat::expect_identical(hits3, hits4)
    
    testthat::expect_identical(
      hits2[partition == "CDU"][, c("query", "count")],
      hits3[count > 0][,c("query", "count")]
    )
    
  }
)

test_that(
  "test arg decode of hits()",
  {
    s_attrs <- c("id", "places", "language")
    
    corpus("REUTERS") %>% 
      hits(query = "oil", s_attribute = s_attrs, decode = TRUE) %>% 
      as.data.table() %>% 
      .[, s_attrs, with = FALSE] %>% 
      sapply(typeof) %>% 
      unname() %>% 
      unique() %>% 
      expect_identical("character")
    
    corpus("REUTERS") %>% 
      hits(query = "oil", s_attribute = s_attrs, decode = FALSE) %>% 
      as.data.table() %>% 
      .[, s_attrs, with = FALSE] %>% 
      sapply(typeof) %>% 
      unname() %>% 
      unique() %>% 
      expect_identical("integer")
    

    corpus("REUTERS") %>% 
      hits(query = "oil", s_attribute = s_attrs, decode = c(FALSE, TRUE, TRUE)) %>% 
      as.data.table() %>% 
      .[, s_attrs, with = FALSE] %>% 
      sapply(typeof) %>% 
      unname() %>%
      expect_identical(c("integer", "character", "character"))
  }
)