test_that("parse_xml_annotations returns empty vectors for empty input", { res1 <- RKorAPClient:::parse_xml_annotations(NULL) res2 <- RKorAPClient:::parse_xml_annotations(NA) res3 <- RKorAPClient:::parse_xml_annotations("") for (res in list(res1, res2, res3)) { expect_true(is.list(res)) expect_named(res, c("token", "lemma", "pos", "morph")) expect_length(res$token, 0) expect_length(res$lemma, 0) expect_length(res$pos, 0) expect_length(res$morph, 0) } }) test_that("parse_xml_annotations extracts tokens/pos/lemma across multiple blocks", { xml_snippet <- ' Wir können alles außer Plan ' parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet) expect_equal(parsed$token, c("Wir", "können", "alles", "außer", "Plan")) expect_equal(parsed$pos, c("PPER", "VVFIN", "PIS", "APPR", "NN")) expect_equal(parsed$lemma, c("Wir", "können", "alles", "außer", "Plan")) # morph not present in snippet; should be NA-aligned to tokens expect_length(parsed$morph, length(parsed$token)) expect_true(all(is.na(parsed$morph))) }) test_that("parse_xml_annotations handles missing lemma/pos/morph gracefully", { # First token has POS only; second has lemma+POS+morph; third has lemma only xml_snippet <- ' Haus können gehen ' parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet) expect_equal(parsed$token, c("Haus", "können", "gehen")) expect_equal(parsed$pos, c("NN", "VVFIN", NA)) expect_equal(parsed$lemma, c(NA, "können", "gehen")) expect_equal(parsed$morph, c(NA, "verbform:fin", NA)) # Vectors must be equal length n <- length(parsed$token) expect_length(parsed$lemma, n) expect_length(parsed$pos, n) expect_length(parsed$morph, n) })