library(cleanNLP)

context("Testing tools for working with textual data")

data(un)

test_that("testing utils_tfidf", {
  cnlp_init_stringi()
  anno <- cnlp_annotate(un, verbose=FALSE)

  tf_direct <- cnlp_utils_tfidf(anno$token)
  expect_equal(dim(tf_direct), c(30, 79))
  expect_equal(anno$document$doc_id, rownames(tf_direct))
})


test_that("testing tidy_pca", {

  cnlp_init_stringi()
  anno <- cnlp_annotate(un, verbose=FALSE)

  res <- cnlp_utils_pca(cnlp_utils_tfidf(anno$token))
  expect_equal(rownames(res), anno$document$doc_id)
  expect_equal(colnames(res), c("PC1", "PC2"))

  res <- cnlp_utils_pca(cnlp_utils_tfidf(anno$token), k=4)
  expect_equal(rownames(res), anno$document$doc_id)
  expect_equal(colnames(res), c("PC1", "PC2", "PC3", "PC4"))

})