## HTML tags are removed and duplicates are merged
test_that("HTML tags are removed and records are merged", {
df <- data.frame(
id = c("A1", "B2"),
source = c("WOS", "SCOPUS"),
title = c("Study X", "Study X"),
author = c("Smith J", "Smith J"),
year = c("2020", "2020"),
journal = c("Journal A", "Journal A"),
doi = c("10.1234/xyz", "10.1234/xyz"),
volume = c("1", "1"),
issue = c("1", "1"),
pages = c("1-5", "1-5"),
abstract = c(NA, "Abstract text"),
stringsAsFactors = FALSE
)
out <- remove_duplicate(df)
expect_equal(nrow(out), 1)
expect_equal(out$abstract, "Abstract text")
})
## Primary source is preserved; provenance is tracked
test_that("primary source is preserved and provenance is tracked", {
df <- data.frame(
id = c("A1", "B2"),
source = c("WOS", "SCOPUS"),
title = c("Study X", "Study X"),
author = c("Smith J", "Smith J"),
year = c("2020", "2020"),
journal = c("Journal A", "Journal A"),
doi = c("10.1234/xyz", "10.1234/xyz"),
volume = c("1", "1"),
issue = c("1", "1"),
pages = c("1-5", "1-5"),
stringsAsFactors = FALSE
)
out <- remove_duplicate(df)
expect_equal(nrow(out), 1)
expect_equal(out$source, "WOS")
expect_equal(out$source_provenance, "WOS;SCOPUS")
})
## Existing values are not overwritten during merge
test_that("existing values are not overwritten when merging", {
df <- data.frame(
id = c("A1", "B2"),
source = c("WOS", "SCOPUS"),
title = c("Study Y", "Study Y"),
author = c("Smith J", "Smith J"),
year = c("2022", "2022"),
journal = c("Journal B", "Journal B"),
doi = c("10.1234/abc", "10.1234/abc"),
volume = c("2", "2"),
issue = c("1", "1"),
pages = c("10-20", "10-20"),
stringsAsFactors = FALSE
)
out <- remove_duplicate(df)
expect_equal(out$doi, "10.1234/abc")
})