library(polmineR)
use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")
testthat::context("kwic")
test_that(
"kwic-method for corpus",
{
expect_equal(
nrow(kwic("REUTERS", query = "oil", p_attribute = "word")@stat),
78L
)
expect_equal(
nrow(kwic("REUTERS", query = '"barrel.*"', p_attribute = "word")@stat),
26L
)
expect_equal(
kwic("REUTERS", query = "asdfasdf", p_attribute = "word"),
NULL
)
expect_equal(
kwic("REUTERS", query = '"asdfasdfasdfasd.*"', cqp = TRUE),
NULL
)
}
)
test_that(
"kwic-method for partition",
{
P <- partition("REUTERS", places = "saudi-arabia", regex = TRUE)
expect_equal(
nrow(kwic(P, query = "oil", p_attribute = "word")@stat),
21L
)
expect_equal(
nrow(kwic(P, query = '"barrel.*"', cqp = TRUE, p_attribute = "word")@stat),
7L
)
expect_equal(
kwic(P, query = "asdfasdf", p_attribute = "word"),
NULL
)
expect_equal(
kwic(P, query = '"asdfasdfasdfasd.*"', cqp = TRUE, p_attribute = "word"),
NULL
)
}
)
test_that(
"as.character-method for kwic objects",
{
oil <- corpus("REUTERS") %>% kwic(query = "oil")
str <- as.character(oil, fmt = NULL)
expect_equal(length(str), 78L)
expect_equal(str[1], "its contract prices for crude oil by 1.50 dlrs a barrel")
expect_equal(
as.character(oil)[1],
"its contract prices for crude oil by 1.50 dlrs a barrel"
)
expect_equal(
as.character(corpus("REUTERS") %>% kwic(query = "oil"), fmt = "%s")[1],
"its contract prices for crude oil by 1.50 dlrs a barrel"
)
}
)
test_that(
"indexing kwic objects",
{
k <- corpus("REUTERS") %>% kwic(query = "oil")
k2 <- k[1:5]
expect_identical(unique(k2@cpos[["match_id"]]), k2@stat[["match_id"]])
}
)
test_that(
"subsetting kwic objects",
{
oil <- corpus("REUTERS") %>% kwic(query = "oil") %>% subset(grepl("prices", right))
expect_identical(unique(oil@cpos[["match_id"]]), oil@stat[["match_id"]])
int_spd <- corpus("GERMAPARLMINI") %>%
kwic(query = "Integration") %>%
enrich(s_attribute = "party") %>%
subset(grepl("SPD", party))
expect_identical(unique(int_spd@stat[["party"]]), "SPD")
}
)
test_that(
"as.data.frame for kwic-method",
{
int <- corpus("GERMAPARLMINI") %>%
kwic(query = "Integration") %>%
enrich(s_attributes = c("date", "speaker", "party")) %>%
as.data.frame()
expect_equal(int[[1]][1], "2009-10-27
Heinz Riesenhuber
NA")
}
)
test_that(
"as.DocumentTermMatrix for kwic-class-object",
{
oil <- kwic("REUTERS", query = "oil")
dtm <- as.DocumentTermMatrix(oil, p_attribute = "word")
expect_equal(
slam::col_sums(dtm)[["prices"]],
nrow(oil@cpos[word == "prices" & direction != 0L])
)
}
)
test_that(
"kwic: NULL object if positivelist removes all matches",
{
k <- corpus("GERMAPARLMINI") %>% kwic(query = 'Integration', cqp = FALSE, positivelist = "Messer")
expect_equal(is.null(k), TRUE)
}
)
test_that(
"kwic: Apply kwic on partition_bundle",
{
sp <- corpus("GERMAPARLMINI") %>%
subset(date == "2009-11-10") %>%
split(s_attribute = "speaker")
kwic_table <- kwic(sp, query = "Integration") %>% slot("stat")
# The idea of the test is that the number of concordences per subcorpus
# needs to be identical with the result of a count over the partition_bundle
dt <- kwic_table[, .N, by = "subcorpus_name"]
data.table::setorderv(dt, cols = "N", order = -1L)
cnt <- count(sp, query = "Integration", s_attributes = "speaker", progress = FALSE)
cnt <- cnt[TOTAL > 0L]
setorderv(cnt, cols = "TOTAL", order = -1L)
expect_equal(dt[["subcorpus_name"]], cnt[["partition"]])
expect_equal(dt[["N"]], cnt[["TOTAL"]])
}
)
test_that(
"check boundary arg for kwic,character()-method",
{
K <- kwic(
"GERMAPARLMINI",
query = '"Sehr" "geehrte"', cqp = TRUE,
left = 100, right = 100,
boundary = "date"
)
for (i in c(1,3,4)){
expect_identical(
K@cpos[match_id == i][["word"]][1:2],
c("Sehr", "geehrte")
)
}
}
)
test_that(
"",
{
oil <- kwic("REUTERS", query = "oil", s_attributes = "id") %>%
highlight(list(yellow = "prices")) %>%
tooltips(tooltips = list(yellow = "alert"))
expect_true("id" %in% colnames(oil@stat))
}
)