toks_test <- tokens(c("b A A d", "C C a b B e")) fcmt_test <- fcm(toks_test, context = "document") test_that("fcm_compress works as expected, not working for 'window' context", { fcmt <- fcm(toks_test, context = "window", window = 3) expect_error(fcm_compress(fcmt), "fcm must be created with a document context") }) test_that("fcm_tolower and fcm_compress work as expected", { fcmt_lc <- fcm_tolower(fcmt_test) expect_equivalent(rownames(fcmt_lc), c("b", "a", "d", "c", "e")) mt <- matrix(c(1, 3, 1, 2, 2, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0), nrow = 5, ncol = 5, byrow = TRUE) expect_true(all(as.vector(Matrix::triu(fcmt_lc)) == as.vector(mt))) }) test_that("fcm_toupper and fcm_compress work as expected", { fcmt_uc <- fcm_toupper(fcmt_test) expect_equivalent(rownames(fcmt_uc), c("B", "A", "D", "C", "E")) mt <- matrix(c(1, 3, 1, 2, 2, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0), nrow = 5, ncol = 5, byrow = TRUE) expect_true(all(as.vector(Matrix::triu(fcmt_uc)) == as.vector(mt))) }) txt <- c(doc1 = "a B c D e", doc2 = "a BBB c D e", doc3 = "Aaaa BBB cc") fcmt_test2 <- fcm(tokens(txt), context = "document", count = "frequency", tri = TRUE) test_that("test fcm_select, fixed", { expect_equal( featnames(fcm_select(fcmt_test2, c("a", "b", "c"), selection = "keep", valuetype = "fixed", verbose = FALSE)), c("a", "B", "c") ) expect_equal( featnames(fcm_select(fcmt_test2, c("a", "b", "c"), selection = "remove", valuetype = "fixed", verbose = FALSE)), setdiff(featnames(fcmt_test2), c("a", "B", "c")) ) expect_equal( featnames(fcm_select(fcmt_test2, c("a", "b", "c"), selection = "keep", valuetype = "fixed", case_insensitive = FALSE, verbose = FALSE)), c("a", "c") ) expect_equal( featnames(fcm_select(fcmt_test2, c("a", "b", "c"), selection = "remove", valuetype = "fixed", case_insensitive = FALSE, verbose = FALSE)), setdiff(featnames(fcmt_test2), c("a", "c")) ) # expect_equal( # featnames(fcm_select(fcmt_test2, c("aaaa", "bbb", "cc"), selection = "keep", valuetype = "fixed", min_nchar = 3, verbose = FALSE)), # c("BBB", "Aaaa") # ) # expect_equal( # featnames(fcm_select(fcmt_test2, c("aaaa", "bbb", "cc"), selection = "remove", valuetype = "fixed", min_nchar = 3, verbose = FALSE)), # setdiff(featnames(fcmt_test2), c("BBB", "Aaaa")) # ) # expect_equal( # featnames(fcm_select(fcmt_test2, c("aaaa", "bbb", "cc"), selection = "keep", valuetype = "fixed", min_nchar = 3, max_nchar = 3, verbose = FALSE)), # c("BBB") # ) # expect_equal( # featnames(fcm_select(fcmt_test2, c("aaaa", "bbb", "cc"), selection = "remove", valuetype = "fixed", min_nchar = 3, max_nchar = 3, verbose = FALSE)), # setdiff(featnames(fcmt_test2), c("BBB")) # ) }) test_that("test fcm_select, glob", { pat <- c("a*", "B*", "c") expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "keep", valuetype = "glob", verbose = FALSE)), c("a", "B", "c", "BBB", "Aaaa") ) expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "remove", valuetype = "glob", verbose = FALSE)), setdiff(featnames(fcmt_test2), c("a", "B", "c", "BBB", "Aaaa")) ) expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "keep", valuetype = "glob", case_insensitive = FALSE, verbose = FALSE)), c("a", "B", "c", "BBB") ) expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "remove", valuetype = "glob", case_insensitive = FALSE, verbose = FALSE)), setdiff(featnames(fcmt_test2), c("a", "B", "c", "BBB")) ) expect_equal( featnames(fcm_select(fcmt_test2, selection = "keep", valuetype = "glob", min_nchar = 3, verbose = FALSE)), c("BBB", "Aaaa") ) expect_equal( featnames(fcm_select(fcmt_test2, selection = "remove", valuetype = "glob", max_nchar = 2, verbose = FALSE)), setdiff(featnames(fcmt_test2), c("BBB", "Aaaa")) ) }) test_that("test fcm_select, regex", { pat <- c("[A-Z].*", "c.+") expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "keep", valuetype = "regex", verbose = FALSE)), c("a", "B", "c", "D", "e", "BBB", "Aaaa", "cc") ) expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "remove", valuetype = "regex", verbose = FALSE)), character(0) ) expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "keep", valuetype = "regex", case_insensitive = FALSE, verbose = FALSE)), c("B", "D", "BBB", "Aaaa", "cc") ) expect_equal( featnames(fcm_select(fcmt_test2, pat, selection = "remove", valuetype = "regex", case_insensitive = FALSE, verbose = FALSE)), setdiff(featnames(fcmt_test2), c("B", "D", "BBB", "Aaaa", "cc")) ) }) test_that("glob works if results in no features", { expect_true(is.fcm(fcm_select(fcmt_test2, "notthere"))) }) test_that("longer selection than longer than features that exist (related to #447)", { fcmt_test2 <- fcm(tokens(c(d1 = "a b", d2 = "a b c d e"))) feat <- c("b", "c", "d", "e", "f", "g") # bugs in C++ needs repeated tests expect_message(fcm_select(fcmt_test2, feat, verbose = TRUE), "kept 4 features") expect_equivalent( as.matrix(fcm_select(fcmt_test2, feat)), matrix(c(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0), nrow = 4, byrow = TRUE) ) }) # test_that("test fcm_select with features from a dfm, fixed", { # txt <- c("a", "b", "c") # mx <- dfm(txt) # expect_equal( # featnames(fcm_select(fcmt_test2, mx, selection = "keep", valuetype = "fixed", verbose = FALSE)), # featnames(mx) # ) # expect_equal( # featnames(fcm_select(fcmt_test2, mx, selection = "remove", valuetype = "fixed", verbose = FALSE)), # setdiff(featnames(fcmt_test2), featnames(mx)) # ) # }) test_that("test fcm_compress retains class", { fcmt <- fcm(tokens(c("b A A d", "C C a b B e")), context = "document") colnames(fcmt) <- rownames(fcmt) <- tolower(colnames(fcmt)) fcmt2 <- fcm_compress(fcmt) expect_equivalent(class(fcmt2), "fcm") }) test_that("shortcut functions works", { fcmt_test2 <- fcm(tokens(data_corpus_inaugural[1:5])) expect_equal(fcm_select(fcmt_test2, stopwords("english"), selection = "keep"), fcm_keep(fcmt_test2, stopwords("english"))) expect_equal(fcm_select(fcmt_test2, stopwords("english"), selection = "remove"), fcm_remove(fcmt_test2, stopwords("english"))) }) test_that("as.fcm is working", { feat1 <- c("B", "A", "D", "C", "E") feat2 <- c("Z", "X", "N", "M", "K") mt1 <- matrix(c(1, 3, 1, 2, 2, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0), dimnames = list(feat1, feat1), nrow = 5, ncol = 5, byrow = TRUE) expect_true(is.fcm(as.fcm(mt1))) expect_true(is.fcm(as.fcm(as(as(mt1, "CsparseMatrix"), "triangularMatrix")))) expect_true(is.fcm(as.fcm(as(mt1, "dgCMatrix")))) expect_true(is.fcm(as.fcm(as(mt1, "TsparseMatrix")))) expect_true(is.fcm(as.fcm(Matrix::Matrix(mt1, sparse = FALSE)))) mt2 <- matrix(c(1, 3, 1, 2, 2, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0), dimnames = list(feat1, feat2), nrow = 5, ncol = 5, byrow = TRUE) expect_error(as.fcm(mt2), "matrix must have the same rownames and colnames") expect_error(as.fcm(Matrix::Matrix(mt2, sparse = FALSE)), "matrix must have the same rownames and colnames") }) test_that("Compatible with Matrix 1.5-5 changes in dimnames", { dfmat <- dfm(tokens(c("a aa a", "a aaa aa aa"))) fcmat <- fcm(dfmat) expect_equal( featnames(dfm_remove(dfmat, "a*")), character(0) ) expect_equal( featnames(fcm_remove(fcmat, "a*")), character(0) ) })