R version 4.4.0 beta (2024-04-15 r86425 ucrt) -- "Puppy Cup" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > # the second data set is not sorted by id/date, on purpose > > df1 <- data.frame(id= 1:10, + y1= as.Date(c("1992-01-01", "1996-01-01", "1997-03-20", + "2000-01-01", "2001-01-01", "2004-01-01", + "2014-03-27", "2014-01-30", "2000-08-01", + "1997-04-29"))) > > df2 <- data.frame(id= c(1, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 9, 9, 10, + 3, 3, 6, 6, 8), + y2= as.Date(c("1998-04-30", "2004-07-01", "1999-04-14", + "2001-02-22", "2003-11-19", "2005-02-15", "2006-06-22", + "2007-09-20", "2013-08-02", "2015-01-09", "2014-01-15", + "2006-12-06", "1999-10-20", "2010-06-30", "1997-04-28", + "1995-04-20", "1997-03-20", "1998-04-30", "1995-04-20", + "2006-12-06"))) > > if (FALSE) { # plot for visual check + plot(y2 ~ id, df2, ylim=range(c(df1$y1, df2$y2)), type='n') + text(df2$id, df2$y2, as.numeric(1:nrow(df2))) + points(y1~id, df1, col=2, pch='+') + } > > i1 <- neardate(df1$id, df2$id, df1$y1, df2$y2) > all.equal(i1, c(1, 3, 17, 5, 7, 8, 10, NA, 12, NA)) [1] TRUE > > i2 <- neardate(df1$id, df2$id, df1$y1, df2$y2, best="prior") > all.equal(i2, c(NA, NA, 17, NA, NA, 18, 9, 11, 13, 15)) [1] TRUE > > indx <- order(df2$id, df2$y2) > df3 <- df2[indx,] > i3 <- neardate(df1$id, df3$id, df1$y1, df3$y2) > all.equal(indx[i3], i1) [1] TRUE > > i4 <- neardate(df1$id, df3$id, df1$y1, df3$y2, best="prior") > all.equal(indx[i4], i2) [1] TRUE > > indx <- c(2,3,10,9, 4,5, 7,8,1,6) > df4 <- df1[indx,] > i5 <- neardate(df4$id, df2$id, df4$y1, df2$y2) > all.equal(i1[indx], i5) [1] TRUE > > proc.time() user system elapsed 1.18 0.15 1.28