NAME <- "diffChr" source(file.path('_helper', 'init.R')) # - Corner Cases --------------------------------------------------------------- # Corner cases from https://neil.fraser.name/writing/diff/ # Both of these appear handled correctly by the algorithm here # first one: suboptimal edit script due to two sided approach A1 <- c("X", "A", "X", "C", "X", "A", "B", "C") B1 <- c("A", "B", "C", "Y") all.equal(as.character(diffChr(A1, B1)), rdsf(100)) # second one: failure to find intersection at ends of paths (paths run into # each other eventually) A2 <- c("A", "B", "X", "A", "B") B2 <- c("A", "Y", "B") all.equal(as.character(diffChr(A2, B2)), rdsf(200)) # Simple corner cases all.equal( as.character(diffChr(character(), character())), rdsf(225) ) all.equal(as.character(diffChr("", "")), rdsf(250)) # - Larger strings ------------------------------------------------------------- # diffChr(X[1:2000], X[2001:4000]) all.equal(as.character(diffChr(chr.7, chr.8)), rdsf(300)) # Too slow to run; useful for benchmarking though # X1 <- X[1:2e4] # X2 <- X1[-sample(seq_along(X1), 2e3)] # X2[sample(seq_along(X2), 4e3)] <- "XXXXXX" # res <- diffChr(X1, X2) # res <- diffChr(X[1:10000], X[7500:17500]) # res <- ses(X[1:10000], X[7500:17500]) # res <- diffChr(X[1:25000], X[10001:50000], max.diffs=65000) # - Sentences chr.5 <- c( "hello there how are you doing", "humpty dumpty took a big fall", "lorem ipsum dolor sic est boom", "a computer once wrote a phrase" ) chr.6 <- c( "hello THERE how are you doing", "and another SENTENCE blah blah", "humpty dumpty TOOK a big fall", "a COMPUTER once wrote a phrase" ) all.equal(as.character(diffChr(chr.5, chr.6)), rdsf(400)) all.equal( as.character(diffChr(chr.5, chr.6, mode="unified")), rdsf(500) ) all.equal( as.character(diffChr(chr.5, chr.6, mode="context")), rdsf(600) ) # - Whitespace ----------------------------------------------------------------- all.equal( as.character(diffChr(c("a", "b", "c"), c("a ", "b", "c"))), rdsf(800) ) all.equal( as.character( diffChr(c("a", "b", "c"), c("a ", "b", "c"), ignore.white.space=FALSE) ), rdsf(900) ) # New lines count as new elements all.equal( as.character(diffChr("woo\nhoo\nfoo", c("woo", "foo"))), rdsf(1000) ) all.equal( capture.output(diffChr("hello . world", "hello. world", format='raw')), txtf(100) ) # - SGR ------------------------------------------------------------------------ a <- c("hello \033[31mworld\033[m", "umbrellas", "tomatoes") b <- c("hello world", "umbrellas", "tomatoes") local({ old.opt <- options(diffobj.sgr.supported=TRUE) on.exit(options(old.opt)) diff <- diffChr(a, b) # warn: 'contained ANSI CSI SGR' try(diffChr(a, b, strip.sgr=1:3)) # "TRUE, FALSE, or NULL" try(diffChr(a, b, sgr.supported=1:3)) # "TRUE, FALSE, or NULL" c( all.equal(capture.output(show(diff)), txtf(200)), all.equal(capture.output(show(diffChr(a, b, strip.sgr=FALSE))), txtf(300)), all.equal(capture.output(show(diffChr(a, b, format='raw'))), txtf(400)) ) }) # - Alignment ------------------------------------------------------------------ chr.7 <- c("a b c d e", "F G h i j k", "xxx", "yyy", "k l m n o") chr.8 <- c("f g h i j k", "hello", "goodbye", "yo", "k l m n o") all.equal(as.character(diffChr(chr.7, chr.8)), rdsf(1100)) all.equal( as.character(diffChr(chr.7, chr.8, align=4/6)), rdsf(1100) # same as above ) # No longer aligns all.equal( as.character(diffChr(chr.7, chr.8, align=4.01/6)), rdsf(1200) ) all.equal( as.character(diffChr(chr.7, chr.8, align=AlignThreshold(min.chars=4))), rdsf(1100) # same as earlier ) all.equal( as.character(diffChr(chr.7, chr.8, align=AlignThreshold(min.chars=5))), rdsf(1200) # same as above ) ## Normally this would not align, but we allow symbols to count towards ## alignment chr.7a <- c("a b c e", "d [ f g") chr.7b <- "D [ f g" a1 <- AlignThreshold(threshold=0, min.chars=2, count.alnum.only=FALSE) all.equal( as.character(diffChr(chr.7a, chr.7b, align=a1, format='raw')), structure( c("< chr.7a > chr.7b ", "@@ 1,2 @@ @@ 1 @@ ", "< a b c e ~ ", "< d [ f g > D [ f g "), len = 4L) ) # corner case where alignment alog exits early because it runs out of B values # to match A values to. b <- c('a b c e', 'x w z f', 'e f g h') a <- c('z o o o', 'p o o o', 'A b c e') al <- AlignThreshold(threshold=0, min.chars=0) all.equal( capture.output(show(diffChr(b, a, align=al, format='raw'))), txtf(500) ) # - NAs ------------------------------------------------------------------------ all.equal( as.character( diffChr(c(NA, letters[1:3]), c(letters[1:3], LETTERS[1:2], NA)) ), rdsf(1300) ) all.equal( as.character( diffChr(c(letters[1:3]), c(letters[1:3], LETTERS[1:2], NA)) ), rdsf(1400) ) all.equal( as.character( diffChr(c(NA, letters[1:3]), c(letters[1:3], LETTERS[1:2])) ), rdsf(1500) ) # - Nested dots issue 134, h/t Noam Ross --------------------------------------- fn <- function(target, current, ...) { diffChr(target, current, ...) } all.equal( as.character(fn("a", "b", format = "raw")), structure( c( "< target > current ", "@@ 1 @@ @@ 1 @@ ", "< a > b "), len = 3L ) ) # - Newlines in input, issue 135, h/t Flying Sheep ----------------------------- a <- 'A Time Series:\n[1] 1 2 3 4' b <- 'A Time Series:\n[1] 9 4 1 4' all.equal( c(as.character(diffobj::diffChr(a, b, format = 'raw'))), c("< a > b ", "@@ 1,2 @@ @@ 1,2 @@ ", " A Time Series: A Time Series:", "< [1] 1 2 3 4 > [1] 9 4 1 4 ") ) # - Attributes causing dispatch in guides, issue 142 --------------------------- zlold <- c("0x0000, 0x001F", "0x007F, 0x009F", "0x0300, 0x036F") zlnew <- structure( c("0x0000, 0x001F", "0x008F, 0x009F", "0x0300, 0x036F"), .Dim = 3L ) diffChr(zlold, zlnew) # no warning # - do.call, issue 158 --------------------------------------------------------- do.call(diffChr, list(1:2, 3:4, format='raw'))