test_that("html_text returns raw html", { html <- minimal_html("

x\ny
z

") p <- html_elements(html, "p") expect_equal(html_text(p), "x\nyz") }) # html_text2 -------------------------------------------------------------- test_that("handles block containing only inline elements", { html <- minimal_html("

a b c

") expect_equal(html_text2(html), "a b c") # internal newlines are trimmed html <- minimal_html("

a\n\nb\nc

") expect_equal(html_text2(html), "a b c") }) test_that("handles multiple paragraphs with line breaks", { html <- minimal_html("

a

b
c ") expect_equal(html_text2(html), "a\n\nb\nc") expect_equal(html_text2(html_elements(html, "p")), c("a", "b\nc")) }) test_that("handles table", { html <- minimal_html("
ab
12
23
") expect_equal(html_text2(html), "a\tb\n1\t2\n2\t3") }) test_that("handles mixed block as well as can be expected", { html <- minimal_html("

a

b
") expect_equal(html_text2(html_element(html, "div")), "a\n\nb\n") }) test_that("returns NA for xml_missing", { expect_equal(html_text2(xml2::xml_missing()), NA_character_) }) test_that("breaks as expected", { expect_identical(tag_margin("p"), 2L) expect_identical(tag_margin("li"), 1L) expect_identical(tag_margin("b"), 0L) }) # inline ------------------------------------------------------------------ test_that("handle single line of text", { html <- minimal_html("

a b c

") expect_equal(html_text_inline(html_element(html, "p")), "a b c") # collapses space across nodes html <- minimal_html("

a b c

") expect_equal(html_text_inline(html_element(html, "p")), "a b c") }) test_that("converts br to \n", { html <- minimal_html("


x

") expect_equal(html_text_inline(html_element(html, "p")), "\nx") html <- minimal_html("

x

") expect_equal(html_text_inline(html_element(html, "p")), "x\n") html <- minimal_html("



") expect_equal(html_text_inline(html_element(html, "p")), "\n\n") }) test_that("empty block returns empty string", { html <- minimal_html("

") expect_equal(html_text_inline(html_element(html, "p")), "") }) test_that("collapse whitespace handles single line", { expect_equal(collapse_whitespace("\n\tx\t\n"), "x") expect_equal(collapse_whitespace("x y"), "x y") }) test_that("optionally preserve nbsp", { expect_equal(collapse_whitespace("x \u00a0 y"), "x y") expect_equal(collapse_whitespace("x\u00a0y", TRUE), "x\u00a0y") }) # PaddedText -------------------------------------------------------------- test_that("margins only added within text", { text <- PaddedText$new() text$add_margin(1) text$add_text("x") text$add_margin(1) expect_equal(text$output(), "x") }) test_that("margins are collapsed", { text <- PaddedText$new() text$add_text("x") text$add_margin(1) expect_equal(text$lines, 1) text$add_margin(2) expect_equal(text$lines, 2) text$add_text("y") expect_equal(text$output(), "x\n\ny") }) test_that("empty text is ignored", { text <- PaddedText$new() text$add_text("") text$add_margin(1) text$add_text("x") expect_equal(text$output(), "x") })