test_that("html_text returns raw html", {
html <- minimal_html("
x\nyz
")
p <- html_elements(html, "p")
expect_equal(html_text(p), "x\nyz")
})
# html_text2 --------------------------------------------------------------
test_that("handles block containing only inline elements", {
html <- minimal_html("a b c
")
expect_equal(html_text2(html), "a b c")
# internal newlines are trimmed
html <- minimal_html("a\n\nb\nc
")
expect_equal(html_text2(html), "a b c")
})
test_that("handles multiple paragraphs with line breaks", {
html <- minimal_html("
a
b
c
")
expect_equal(html_text2(html), "a\n\nb\nc")
expect_equal(html_text2(html_elements(html, "p")), c("a", "b\nc"))
})
test_that("handles table", {
html <- minimal_html("
")
expect_equal(html_text2(html), "a\tb\n1\t2\n2\t3")
})
test_that("handles mixed block as well as can be expected", {
html <- minimal_html("
")
expect_equal(html_text2(html_element(html, "div")), "a\n\nb\n")
})
test_that("returns NA for xml_missing", {
expect_equal(html_text2(xml2::xml_missing()), NA_character_)
})
test_that("breaks as expected", {
expect_identical(tag_margin("p"), 2L)
expect_identical(tag_margin("li"), 1L)
expect_identical(tag_margin("b"), 0L)
})
# inline ------------------------------------------------------------------
test_that("handle single line of text", {
html <- minimal_html("a b c
")
expect_equal(html_text_inline(html_element(html, "p")), "a b c")
# collapses space across nodes
html <- minimal_html("a b c
")
expect_equal(html_text_inline(html_element(html, "p")), "a b c")
})
test_that("converts br to \n", {
html <- minimal_html("
x
")
expect_equal(html_text_inline(html_element(html, "p")), "\nx")
html <- minimal_html("x
")
expect_equal(html_text_inline(html_element(html, "p")), "x\n")
html <- minimal_html("
")
expect_equal(html_text_inline(html_element(html, "p")), "\n\n")
})
test_that("empty block returns empty string", {
html <- minimal_html("")
expect_equal(html_text_inline(html_element(html, "p")), "")
})
test_that("collapse whitespace handles single line", {
expect_equal(collapse_whitespace("\n\tx\t\n"), "x")
expect_equal(collapse_whitespace("x y"), "x y")
})
test_that("optionally preserve nbsp", {
expect_equal(collapse_whitespace("x \u00a0 y"), "x y")
expect_equal(collapse_whitespace("x\u00a0y", TRUE), "x\u00a0y")
})
# PaddedText --------------------------------------------------------------
test_that("margins only added within text", {
text <- PaddedText$new()
text$add_margin(1)
text$add_text("x")
text$add_margin(1)
expect_equal(text$output(), "x")
})
test_that("margins are collapsed", {
text <- PaddedText$new()
text$add_text("x")
text$add_margin(1)
expect_equal(text$lines, 1)
text$add_margin(2)
expect_equal(text$lines, 2)
text$add_text("y")
expect_equal(text$output(), "x\n\ny")
})
test_that("empty text is ignored", {
text <- PaddedText$new()
text$add_text("")
text$add_margin(1)
text$add_text("x")
expect_equal(text$output(), "x")
})