skip_if_not_installed("xgboost") skip_if_not_installed("text2vec") library(xgboost) library(text2vec) data(train_sentences) # Tokenize data get_matrix <- function(text) { it <- itoken(text, progressbar = FALSE) create_dtm(it, vectorizer = hash_vectorizer()) } dtm_train = get_matrix(train_sentences$text) # Create boosting model xgb_model <- xgb.train( list( max_depth = 7, eta = 0.1, objective = "binary:logistic", eval_metric = "error", nthread = 1 ), xgb.DMatrix(dtm_train, label = train_sentences$class.text == "OWNX"), nrounds = 50 ) test_that("single sentence explanation", { to_explain <- "Since our motivation is an application in bioinformatics, our notation and terminology will be drawn from that area" expect_gt(predict(xgb_model, get_matrix(to_explain)), 0.5) explainer <- lime(x = to_explain, model = xgb_model, preprocess = get_matrix) explanation <- explain(to_explain, explainer, n_labels = 1, n_features = 2) expect_length(explanation, 13) expect_equal(nrow(explanation), 2) expect_type(explanation, "list") expect_true("our" %in% explanation$feature) expect_gt(sum(explanation[explanation$feature == "our", "feature_weight"]), 0) }) test_that("multiple sentences, multiple explanations", { sentences <- head( test_sentences[test_sentences$class.text == "OWNX", "text"], 5 ) explainer <- lime(sentences, xgb_model, get_matrix) explanation <- explain(sentences, explainer, n_labels = 1, n_features = 2) expect_gte(sum(tolower(explanation$feature) == "we"), 3) expect_true("our" %in% explanation$feature) expect_equal(nrow(explanation), 5 * 2) expect_true(all( explanation[explanation$feature == "our", "feature_weight"] > 0 )) expect_gt(sum(explanation[explanation$feature == "we", "feature_weight"]), 0) }) test_that("multiple sentences, single explanation", { sentences <- head( test_sentences[test_sentences$class.text == "OWNX", "text"], 5 ) explainer <- lime(sentences, xgb_model, get_matrix) explanation <- explain( sentences, explainer, n_labels = 1, n_features = 5, single_explanation = TRUE ) expect_true(all( explanation[explanation$feature == "Section", "feature_weight"] > 0 )) expect_gt(sum(explanation[explanation$feature == "we", "feature_weight"]), 0) })