testthat::context("Test round trip validation")
library(xml2)
test_roundtrip <- function(f, schema = NULL, check_lengths = TRUE) {
testthat::test_that(
paste(
"testing that",
basename(f),
"can roundtrip & validate"
),
{
# Skip on ARM Macs due to platform-specific validation issues
testthat::skip_on_os("mac", arch = "aarch64")
## guess root and ns for sub-modules
doc <- xml2::read_xml(f)
ns <- find_real_root_name(doc)
out <- tempfile(basename(f), fileext = ".xml")
emld <- as_emld(f)
as_xml(emld, out, ns$name, ns$prefix)
## Make sure output xml is still valid unless it's supposed to be invalid
if (!grepl("invalid", f, ignore.case = TRUE)) {
testthat::expect_true(eml_validate(out, schema = schema))
} else {
testthat::expect_false(eml_validate(out, schema = schema))
}
## Make sure we have the same number & names of elements as we started with
if (check_lengths) {
elements_at_end <- sort(names(unlist(as_emld(out), recursive = TRUE)))
elements_at_start <- sort(names(unlist(emld, recursive = TRUE)))
# Filter out schemaLocation since it can be absent at start and added at end
elements_at_start <- Filter(
function(e) {
e != "schemaLocation"
},
elements_at_start
)
elements_at_end <- Filter(
function(e) {
e != "schemaLocation"
},
elements_at_end
)
testthat::expect_equal(elements_at_start, elements_at_end)
}
}
)
}
## Enforce testing on 2.1.1 for this
options("emld_db" = "eml-2.1.1")
## Test all citation-* examples:
suite <- list.files(
system.file(
file.path("tests", eml_version()),
package = "emld"
),
pattern = "citation",
full.names = TRUE
)
lapply(suite, test_roundtrip)
suite <- list.files(
system.file(
file.path("tests", eml_version()),
package = "emld"
),
pattern = "eml-",
full.names = TRUE
)
drop <- basename(suite) %in%
c(
"eml-datasetWithNonwordCharacters.xml",
"eml-i18n.xml",
"eml-literature.xml",
"eml-literatureInPress.xml",
"eml-unitDictionary.xml",
"eml-units.xml"
)
test_suite <- suite[!drop]
lapply(test_suite, test_roundtrip)
## These four skip the length-check
partial_test <- basename(suite) %in%
c(
"eml-datasetWithNonwordCharacters.xml",
"eml-i18n.xml",
"eml-literature.xml",
"eml-literatureInPress.xml"
)
lapply(suite[partial_test], test_roundtrip, check_lengths = FALSE)
## Add testing for 2.2.0 suite separately here.
## Helper methods for debugging
#' out <- lapply(suite, purrr::safely(test_roundtrip))
#' failed <- purrr::map_lgl(purrr::map(out, "result"), is.null)
#' suite[failed]
#' msg <- unlist(purrr::map(out, "error"))
## 35 & 36 have units we check separately since our validator can't automatically find schema
testthat::test_that("unitDictionary", {
# Skip on ARM Macs due to platform-specific validation issues
testthat::skip_on_os("mac", arch = "aarch64")
f <- system.file(
file.path("tests", getOption("emld_db", "2.2.0"), "eml-unitDictionary.xml"),
package = "emld"
)
schema <- system.file("xsd/eml-2.1.1/stmml.xsd", package = "emld")
out <- tempfile(basename(f), fileext = ".xml")
emld <- as_emld(f)
elements_at_start <- names(unlist(emld, recursive = TRUE))
## Applies JSON-LD framing. Because vocab is stmml, framing drops all elements!
## So do this manually
# as_xml(emld, out, "unitList", "stmml")
context <- emld[["@context"]]
emld[["@type"]] <- NULL
emld[["@context"]] <- NULL
xml <- emld:::as_eml_document(emld, "unitList", "stmml")
xml <- emld:::context_namespaces(context, xml)
root <- xml_root(xml)
#xml_set_name(root, "stmml:unitList", ns = xml_ns(xml))
xml2::xml_set_attr(
root,
"xmlns",
gsub("/$", "", "http://www.xml-cml.org/schema/stmml-1.1")
)
write_xml(xml, out)
eml_validate(out, schema = schema)
testthat::expect_true(eml_validate(out, schema = schema))
elements_at_end <- names(unlist(as_emld(out), recursive = TRUE))
testthat::expect_equal(elements_at_start, elements_at_end)
})
##########
## Enforce testing on 2.2.0 for this
options("emld_db" = "eml-2.2.0")
## Test all citation-* examples:
suite <- list.files(
system.file(
file.path("tests", eml_version()),
package = "emld"
),
pattern = "citation",
full.names = TRUE
)
drop <- basename(suite) %in%
c(
"citation-sbclter-bibliography.284.xml",
"citation-sbclter-bibliography.289.xml",
"eml-citationWithContact.xml"
)
test_suite <- suite[!drop]
lapply(test_suite, test_roundtrip)
suite <- list.files(
system.file(
file.path("tests", eml_version()),
package = "emld"
),
pattern = "eml-",
full.names = TRUE
)
drop <- basename(suite) %in%
c(
"eml-datasetWithNonwordCharacters.xml",
"eml-i18n.xml",
"eml-literature.xml",
"eml-literatureInPress.xml",
"eml-unitDictionary.xml",
"eml-units.xml",
"eml-citationWithContact.xml"
)
test_suite <- suite[!drop]
lapply(test_suite, test_roundtrip)
## These ones skip the length-check
partial_test <- basename(suite) %in%
c(
"eml-datasetWithNonwordCharacters.xml",
"eml-i18n.xml",
"eml-literature.xml",
"eml-literatureInPress.xml",
"eml-citationWithContact.xml"
)
lapply(suite[partial_test], test_roundtrip, check_lengths = FALSE)
######## Test that new schema validation logic that doesn't use schemaLocation
######## still validates
test_that("we can still validate XML docs without schemaLocation set on them", {
# Skip on ARM Macs due to platform-specific validation issues
testthat::skip_on_os("mac", arch = "aarch64")
testthat::expect_true(
eml_validate(
system.file(
file.path("tests", "eml-2.2.0", "eml-datasetNoSchemaLocation.xml"),
package = "emld"
)
)
)
testthat::expect_false(
eml_validate(
system.file(
file.path(
"tests",
"eml-2.2.0",
"eml-datasetNoSchemaLocationInvalid.xml"
),
package = "emld"
)
)
)
})
######### Helper methods
test_that("get_root_ns works for a variety of cases", {
testthat::expect_equal(
find_real_root_name(
xml2::read_xml(
"
"
)
),
list(prefix = "d1", name = "dictionary")
)
testthat::expect_equal(
find_real_root_name(
xml2::read_xml(
"
"
)
),
list(prefix = "eml", name = "eml")
)
testthat::expect_equal(
find_real_root_name(
xml2::read_xml(
"
"
)
),
list(prefix = "cit", name = "citation")
)
})
test_that("get_root_ns works for a variety of cases", {
testthat::expect_equal(
guess_root_schema(
xml2::read_xml(
"
"
)
),
list(
module = "stmml",
version = "1.1",
namespace = "http://www.xml-cml.org/schema/stmml-1.1"
)
)
testthat::expect_equal(
guess_root_schema(
xml2::read_xml(
"
"
)
),
list(
module = "eml",
version = "2.2.0",
namespace = "https://eml.ecoinformatics.org/eml-2.2.0"
)
)
testthat::expect_equal(
guess_root_schema(
xml2::read_xml(
"
"
)
),
list(
module = "literature",
version = "2.2.0",
namespace = "https://eml.ecoinformatics.org/literature-2.2.0"
)
)
})