R Under development (unstable) (2024-02-08 r85876 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(reclin2) Loading required package: data.table > source("helpers.R") > > > data("linkexample1", "linkexample2") > linkexample1$id2 <- c(1,1,3,3,5,6) > linkexample2$id2 <- c(2,3,3,6,7) > > pairs <- pair_blocking(linkexample1, linkexample2, "postcode") > pairs[, score := runif(nrow(pairs))] First data set: 6 records Second data set: 5 records Total number of pairs: 17 pairs Blocking on: 'postcode' .x .y score 1: 1 1 0.5894881 2: 1 2 0.1824231 3: 1 3 0.8353371 4: 2 1 0.6770434 5: 2 2 0.6544106 6: 2 3 0.8774025 7: 3 1 0.5120988 8: 3 2 0.6981675 9: 3 3 0.4878957 10: 4 1 0.9716523 11: 4 2 0.8297809 12: 4 3 0.8262862 13: 5 1 0.1826654 14: 5 2 0.3685295 15: 5 3 0.4131194 16: 6 4 0.6243491 17: 6 5 0.2456658 > pairs[, select := score > 0.5] First data set: 6 records Second data set: 5 records Total number of pairs: 17 pairs Blocking on: 'postcode' .x .y score select 1: 1 1 0.5894881 TRUE 2: 1 2 0.1824231 FALSE 3: 1 3 0.8353371 TRUE 4: 2 1 0.6770434 TRUE 5: 2 2 0.6544106 TRUE 6: 2 3 0.8774025 TRUE 7: 3 1 0.5120988 TRUE 8: 3 2 0.6981675 TRUE 9: 3 3 0.4878957 FALSE 10: 4 1 0.9716523 TRUE 11: 4 2 0.8297809 TRUE 12: 4 3 0.8262862 TRUE 13: 5 1 0.1826654 FALSE 14: 5 2 0.3685295 FALSE 15: 5 3 0.4131194 FALSE 16: 6 4 0.6243491 TRUE 17: 6 5 0.2456658 FALSE > pairs[, select2 := FALSE] First data set: 6 records Second data set: 5 records Total number of pairs: 17 pairs Blocking on: 'postcode' .x .y score select select2 1: 1 1 0.5894881 TRUE FALSE 2: 1 2 0.1824231 FALSE FALSE 3: 1 3 0.8353371 TRUE FALSE 4: 2 1 0.6770434 TRUE FALSE 5: 2 2 0.6544106 TRUE FALSE 6: 2 3 0.8774025 TRUE FALSE 7: 3 1 0.5120988 TRUE FALSE 8: 3 2 0.6981675 TRUE FALSE 9: 3 3 0.4878957 FALSE FALSE 10: 4 1 0.9716523 TRUE FALSE 11: 4 2 0.8297809 TRUE FALSE 12: 4 3 0.8262862 TRUE FALSE 13: 5 1 0.1826654 FALSE FALSE 14: 5 2 0.3685295 FALSE FALSE 15: 5 3 0.4131194 FALSE FALSE 16: 6 4 0.6243491 TRUE FALSE 17: 6 5 0.2456658 FALSE FALSE > > > # === TESTS FOR ID_X AND ID_Y ARGUMENTS > > test <- reclin2:::select_preprocess(pairs, score = "score", id_x = "id", id_y = "id") > expect_equal(test$.x, linkexample1$id[pairs$.x]) > expect_equal(test$.y, linkexample2$id[pairs$.y]) > > test <- reclin2:::select_preprocess(pairs, score = "score", id_x = "id2", id_y = "id2") > expect_equal(test$.x, linkexample1$id2[pairs$.x]) > expect_equal(test$.y, linkexample2$id2[pairs$.y]) > > test <- reclin2:::select_preprocess(pairs, score = "score", id_x = "id2", id_y = "id2", + preselect = "select") > expect_equal(sort(test$.x), sort(linkexample1$id2[pairs$.x[pairs$select]])) > expect_equal(sort(test$.y), sort(linkexample2$id2[pairs$.y[pairs$select]])) > > test <- reclin2:::select_preprocess(pairs, score = "score", id_x = "id2", id_y = "id2", + preselect = "select2") > expect_equal(nrow(test), 0) > expect_equal(names(test), c(".x", ".y", "score", "index")) > > expect_error( + test <- reclin2:::select_preprocess(pairs, score = "score", id_x = 1:3, id_y = 1:3, + preselect = "select") + ) > > proc.time() user system elapsed 0.54 0.10 0.62