context("HTML parsing")
accumulated <- data.frame(tag = c(), attribute = c(), value = c())
reset_accumulator <- function() {
accumulated <<- data.frame(tag = c(), attribute = c(), value = c())
}
html_accumulator <- function(tag, att, val, idx) {
accumulated <<- rbind(accumulated, data.frame(
tag = tag,
attribute = att,
value = val,
stringsAsFactors = FALSE))
}
test_that("different attribute quoting styles are supported", {
call_resource_attrs(paste(
"",
"
"), html_accumulator)
expect_equal(accumulated, data.frame(
tag = c("img", "img"),
attribute = c("src", "src"),
value = c("123", "456"),
stringsAsFactors = FALSE))
reset_accumulator()
})
test_that("irrelevant white space is ignored", {
call_resource_attrs(paste(
"< img src = \n",
" \t '123'\n",
" \t value ='abc' />",
""), html_accumulator)
expect_equal(accumulated, data.frame(
tag = c("img", "link"),
attribute = c("src", "href"),
value = c("123", "456"),
stringsAsFactors = FALSE))
reset_accumulator()
})
test_that("common resource types are found in a simple document", {
call_resource_attrs(paste(
"\n",
"\n",
"