if (compareVersion(paste0(version$major, ".", version$minor), "3.6") < 0) { skip("Randomization algorithm has changed from R 3.6") } # Read Data data(keyATM_data_bills) bills_dfm <- keyATM_data_bills$doc_dfm bills_keywords <- keyATM_data_bills$keywords bills_cov <- keyATM_data_bills$cov bills_time_index <- keyATM_data_bills$time_index keyATM_docs <- keyATM_read(bills_dfm) # # keyATM # test_that("keyATM base, resume", { all <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "base", options = list(seed = 250, iterations = 19) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "base", options = list( seed = 250, iterations = 7, resume = paste0(tempdir(), "/resume.rds") ) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "base", options = list( seed = 250, iterations = 12, resume = paste0(tempdir(), "/resume.rds") ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) expect_equal( all$values_iter$alpha_iter$alpha[5], resumed$values_iter$alpha_iter$alpha[6] ) }) test_that("keyATM cov, resume", { all <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "covariates", model_settings = list( covariates_data = bills_cov, standardize = "all", covariates_formula = ~., covariates_model = "DirMulti" ), options = list(seed = 250, iterations = 19) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "covariates", model_settings = list( covariates_data = bills_cov, standardize = "all", covariates_formula = ~., covariates_model = "DirMulti" ), options = list( seed = 250, iterations = 7, resume = paste0(tempdir(), "/resume.rds") ) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "covariates", model_settings = list( covariates_data = bills_cov, standardize = "all", covariates_formula = ~., covariates_model = "DirMulti" ), options = list( seed = 250, iterations = 12, resume = paste0(tempdir(), "/resume.rds") ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) expect_equal( all$values_iter$Lambda_iter[[5]][7, 1], resumed$values_iter$Lambda_iter[[6]][7, 1] ) }) test_that("keyATM dynamic, resume (without storing the transition matrix)", { all <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 19, store_transition_matrix = 0 ) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 7, resume = paste0(tempdir(), "/resume.rds"), store_transition_matrix = 0 ) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 12, resume = paste0(tempdir(), "/resume.rds"), store_transition_matrix = 0 ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) expect_equal( all$values_iter$alpha_iter$alpha[175], resumed$values_iter$alpha_iter$alpha[210] ) }) test_that("keyATM dynamic, resume (with storing the transition matrix)", { all <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 19, store_transition_matrix = 1 ) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 7, resume = paste0(tempdir(), "/resume.rds"), store_transition_matrix = 1 ) ) resumed <- keyATM( docs = keyATM_docs, no_keyword_topics = 3, keywords = bills_keywords, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 12, resume = paste0(tempdir(), "/resume.rds"), store_transition_matrix = 1 ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) expect_equal( all$values_iter$alpha_iter$alpha[175], resumed$values_iter$alpha_iter$alpha[210] ) }) # # Weighted LDA # test_that("weightedLDA base, resume", { all <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "base", options = list(seed = 250, iterations = 19) ) resumed <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "base", options = list( seed = 250, iterations = 7, resume = paste0(tempdir(), "/resume.rds") ) ) resumed <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "base", options = list( seed = 250, iterations = 12, resume = paste0(tempdir(), "/resume.rds") ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) expect_equal( all$values_iter$alpha_iter$alpha[5], resumed$values_iter$alpha_iter$alpha[6] ) }) test_that("weightedLDA cov, resume", { all <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "covariates", model_settings = list( covariates_data = bills_cov, standardize = "all", covariates_formula = ~., covariates_model = "DirMulti" ), options = list(seed = 250, iterations = 19) ) resumed <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "covariates", model_settings = list( covariates_data = bills_cov, standardize = "all", covariates_formula = ~., covariates_model = "DirMulti" ), options = list( seed = 250, iterations = 7, resume = paste0(tempdir(), "/resume.rds") ) ) resumed <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "covariates", model_settings = list( covariates_data = bills_cov, standardize = "all", covariates_formula = ~., covariates_model = "DirMulti" ), options = list( seed = 250, iterations = 12, resume = paste0(tempdir(), "/resume.rds") ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) }) test_that("weightedLDA dynamic, resume", { all <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list(seed = 250, iterations = 19) ) resumed <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 7, resume = paste0(tempdir(), "/resume.rds") ) ) resumed <- weightedLDA( docs = keyATM_docs, number_of_topics = 5, model = "dynamic", model_settings = list(time_index = bills_time_index - 100, num_states = 5), options = list( seed = 250, iterations = 12, resume = paste0(tempdir(), "/resume.rds") ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) }) # # Using the quanteda data # test_that("keyATM cov, resume with quanteda", { skip_on_cran() data(data_corpus_inaugural, package = "quanteda") data_corpus_inaugural <- head(data_corpus_inaugural, n = 58) data_tokens <- tokens( data_corpus_inaugural, remove_numbers = TRUE, remove_punct = TRUE, remove_symbols = TRUE, remove_separators = TRUE, remove_url = TRUE ) %>% tokens_tolower() %>% tokens_remove(c( stopwords("english"), "may", "shall", "can", "must", "upon", "with", "without" )) %>% tokens_select(min_nchar = 3) data_dfm <- dfm(data_tokens) %>% dfm_trim(min_termfreq = 5, min_docfreq = 2) keyATM_docs <- keyATM_read(texts = data_dfm) keywords <- list( Government = c("laws", "law", "executive"), ForeignAffairs = c("foreign", "war") ) vars <- docvars(data_corpus_inaugural) library(dplyr) vars %>% as_tibble() %>% mutate(Period = case_when(Year <= 1899 ~ "18_19c", TRUE ~ "20_21c")) %>% mutate( Party = case_when( Party == "Democratic" ~ "Democratic", Party == "Republican" ~ "Republican", TRUE ~ "Other" ) ) %>% select(Party, Period) -> vars_selected vars_selected %>% mutate( Party = factor(Party, levels = c("Other", "Republican", "Democratic")), Period = factor(Period, levels = c("18_19c", "20_21c")) ) -> vars_selected all <- keyATM( docs = keyATM_docs, # text input no_keyword_topics = 5, # number of topics without keywords keywords = keywords, # keywords model = "covariates", model_settings = list( covariates_data = vars_selected, covariates_formula = ~ Party + Period ), options = list(seed = 250, iterations = 11) ) resumed <- keyATM( docs = keyATM_docs, # text input no_keyword_topics = 5, # number of topics without keywords keywords = keywords, # keywords model = "covariates", model_settings = list( covariates_data = vars_selected, covariates_formula = ~ Party + Period ), options = list( seed = 250, iterations = 3, resume = paste0(tempdir(), "/resume.rds") ) ) resumed <- keyATM( docs = keyATM_docs, # text input no_keyword_topics = 5, # number of topics without keywords keywords = keywords, # keywords model = "covariates", model_settings = list( covariates_data = vars_selected, covariates_formula = ~ Party + Period ), options = list( seed = 250, iterations = 8, resume = paste0(tempdir(), "/resume.rds") ) ) fs::file_delete(paste0(tempdir(), "/resume.rds")) expect_equal(all$model_fit$Perplexity[3], resumed$model_fit$Perplexity[4]) })