## HTML tags are removed and duplicates are merged test_that("HTML tags are removed and records are merged", { df <- data.frame( id = c("A1", "B2"), source = c("WOS", "SCOPUS"), title = c("Study X", "Study X"), author = c("Smith J", "Smith J"), year = c("2020", "2020"), journal = c("Journal A", "Journal A"), doi = c("10.1234/xyz", "10.1234/xyz"), volume = c("1", "1"), issue = c("1", "1"), pages = c("1-5", "1-5"), abstract = c(NA, "Abstract text"), stringsAsFactors = FALSE ) out <- remove_duplicate(df) expect_equal(nrow(out), 1) expect_equal(out$abstract, "Abstract text") }) ## Primary source is preserved; provenance is tracked test_that("primary source is preserved and provenance is tracked", { df <- data.frame( id = c("A1", "B2"), source = c("WOS", "SCOPUS"), title = c("Study X", "Study X"), author = c("Smith J", "Smith J"), year = c("2020", "2020"), journal = c("Journal A", "Journal A"), doi = c("10.1234/xyz", "10.1234/xyz"), volume = c("1", "1"), issue = c("1", "1"), pages = c("1-5", "1-5"), stringsAsFactors = FALSE ) out <- remove_duplicate(df) expect_equal(nrow(out), 1) expect_equal(out$source, "WOS") expect_equal(out$source_provenance, "WOS;SCOPUS") }) ## Existing values are not overwritten during merge test_that("existing values are not overwritten when merging", { df <- data.frame( id = c("A1", "B2"), source = c("WOS", "SCOPUS"), title = c("Study Y", "Study Y"), author = c("Smith J", "Smith J"), year = c("2022", "2022"), journal = c("Journal B", "Journal B"), doi = c("10.1234/abc", "10.1234/abc"), volume = c("2", "2"), issue = c("1", "1"), pages = c("10-20", "10-20"), stringsAsFactors = FALSE ) out <- remove_duplicate(df) expect_equal(out$doi, "10.1234/abc") })