R Under development (unstable) (2024-01-31 r85845 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > source("helpers.R") > > expect_equal_pairs <- function(x, y) { + setkey(x, .x, .y) + setkey(y, .x, .y) + expect_equal(names(x), names(y)) + for (col in names(x)) + expect_equal(x[[col]], y[[col]], attributes = FALSE) + } > > library(reclin2) Loading required package: data.table > library(parallel) > > # Prepare data > data(linkexample1) > data(linkexample2) > linkexample1$postcode[1] <- NA > linkexample1$postcode[3] <- "6789 XY" > > # What the result should look like > pairs_ref <- data.table( + .x = c(1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L), + .y = c(1L, 1L, 2L, 3L, 3L, 4L, 5L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L), + firstname = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, + FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE), + lastname = c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, + FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE) + ) > > # Regular pairs > pairs1 <- pair_blocking(linkexample1, linkexample2, on = "postcode") > pairs2 <- pair_blocking(linkexample1, linkexample2, on = "lastname") > pairs <- merge_pairs(pairs1, pairs2) > compare_pairs(pairs, on = c("firstname", "lastname"), inplace = TRUE) > expect_equal_pairs(pairs, pairs_ref) > > compare_pairs(pairs1, on = c("firstname", "lastname"), inplace = TRUE) > compare_pairs(pairs2, on = c("address", "lastname"), inplace = TRUE) > pairs <- merge_pairs(pairs1, pairs2) > expect_equal(sort(names(pairs)), c(".x", ".y", "address", "firstname", "lastname")) > expect_equal(is.na(pairs$address), !is.na(pairs$firstname)) > > > > # Cluster pairs > library(parallel) > cl <- makeCluster(2) > pairs1c <- cluster_pair_blocking(cl, linkexample1, linkexample2, on = "postcode", name="a") > pairs2c <- cluster_pair_blocking(cl, linkexample1, linkexample2, on = "lastname", name="b") > pairsc <- merge_pairs(pairs1c, pairs2c) > compare_pairs(pairsc, on = c("firstname", "lastname"), inplace = TRUE) > pairsc_local <- cluster_collect(pairsc) > expect_equal_pairs(pairsc_local, pairs_ref) > > compare_pairs(pairs1c, on = c("firstname", "lastname")) > compare_pairs(pairs2c, on = c("address", "lastname")) > pairsc <- merge_pairs(pairs1c, pairs2c) > pairsc_local <- cluster_collect(pairsc) > expect_equal(sort(names(pairsc_local)), c(".x", ".y", "address", "firstname", "lastname")) > expect_equal(is.na(pairsc_local$address), !is.na(pairsc_local$firstname)) > > stopCluster(cl) > > pairs1 <- pair_blocking(linkexample1, linkexample2, on = "postcode") > pairs2 <- pair_blocking(linkexample1, linkexample2, on = "lastname") > pairs2 <- pairs2[FALSE, ] > pairs <- merge_pairs(pairs1, pairs2) > compare_pairs(pairs, on = c("firstname", "lastname"), inplace = TRUE) > compare_pairs(pairs1, on = c("firstname", "lastname"), inplace = TRUE) > expect_equal_pairs(pairs1, pairs) > > > > proc.time() user system elapsed 0.59 0.14 1.37