load("testdata.Rda") test_that("token columns are created, dataset matches", { tx <- tokenize(testdata) expect_true("token" %in% colnames(tx)) expect_true("uid" %in% colnames(tx)) expect_true("nwords" %in% colnames(tx)) expect_true("relative_time" %in% colnames(tx)) expect_true("order" %in% colnames(tx)) expect_true("rank" %in% colnames(tx)) expect_true("frequency" %in% colnames(tx)) expect_equal(nrow(tx), 738) expect_equal(tx$relative_time[1:5], c(315271, 315796, 316320, 315414, 316067)) expect_equal(tx$token[1:5], c("high", "level", "eh?", "sí", "que")) expect_equal(tx$order[1:4], c("first", "middle", "last", "only")) expect_equal(tx$rank[1:5], c(243, 307, 70, 50, 14)) }) test_that("no issues arise with dataset containing existing nwords column", { testdata$nwords <- 1 tx <- tokenize(testdata) expect_equal(tx$nwords[1:5], c(3, 3, 3, 1, 6)) })