test_that("html tags removed", {
testing <- data.frame(ID = c(1,2),
page_notes = c(" The review .",
"text with a page break",
"tag without spaces",
" color "))
results <- token_comments(testing)
expect_identical(results[[1]], c("the", "review"))
expect_identical(results[[2]], c("text", "with", "a", "page", "break"))
expect_identical(results[[3]], c("tag", "without", "spaces"))
expect_identical(results[[4]], "color")
})