R Under development (unstable) (2025-04-19 r88162 ucrt) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > NAME <- "diffChr" > source(file.path('_helper', 'init.R')) > > # - Corner Cases --------------------------------------------------------------- > > # Corner cases from https://neil.fraser.name/writing/diff/ > # Both of these appear handled correctly by the algorithm here > # first one: suboptimal edit script due to two sided approach > > A1 <- c("X", "A", "X", "C", "X", "A", "B", "C") > B1 <- c("A", "B", "C", "Y") > all.equal(as.character(diffChr(A1, B1)), rdsf(100)) [1] TRUE > > # second one: failure to find intersection at ends of paths (paths run into > # each other eventually) > > A2 <- c("A", "B", "X", "A", "B") > B2 <- c("A", "Y", "B") > all.equal(as.character(diffChr(A2, B2)), rdsf(200)) [1] TRUE > > # Simple corner cases > > all.equal( + as.character(diffChr(character(), character())), rdsf(225) + ) [1] TRUE > all.equal(as.character(diffChr("", "")), rdsf(250)) [1] TRUE > > # - Larger strings ------------------------------------------------------------- > > # diffChr(X[1:2000], X[2001:4000]) > > all.equal(as.character(diffChr(chr.7, chr.8)), rdsf(300)) [1] TRUE > > # Too slow to run; useful for benchmarking though > > # X1 <- X[1:2e4] > # X2 <- X1[-sample(seq_along(X1), 2e3)] > # X2[sample(seq_along(X2), 4e3)] <- "XXXXXX" > # res <- diffChr(X1, X2) > # res <- diffChr(X[1:10000], X[7500:17500]) > # res <- ses(X[1:10000], X[7500:17500]) > # res <- diffChr(X[1:25000], X[10001:50000], max.diffs=65000) > > # - Sentences > chr.5 <- c( + "hello there how are you doing", + "humpty dumpty took a big fall", + "lorem ipsum dolor sic est boom", + "a computer once wrote a phrase" + ) > chr.6 <- c( + "hello THERE how are you doing", + "and another SENTENCE blah blah", + "humpty dumpty TOOK a big fall", + "a COMPUTER once wrote a phrase" + ) > all.equal(as.character(diffChr(chr.5, chr.6)), rdsf(400)) [1] TRUE > all.equal( + as.character(diffChr(chr.5, chr.6, mode="unified")), rdsf(500) + ) [1] TRUE > all.equal( + as.character(diffChr(chr.5, chr.6, mode="context")), rdsf(600) + ) [1] TRUE > # - Whitespace ----------------------------------------------------------------- > > all.equal( + as.character(diffChr(c("a", "b", "c"), c("a ", "b", "c"))), rdsf(800) + ) [1] TRUE > all.equal( + as.character( + diffChr(c("a", "b", "c"), c("a ", "b", "c"), ignore.white.space=FALSE) + ), + rdsf(900) + ) [1] TRUE > # New lines count as new elements > all.equal( + as.character(diffChr("woo\nhoo\nfoo", c("woo", "foo"))), rdsf(1000) + ) [1] TRUE > all.equal( + capture.output(diffChr("hello . world", "hello. world", format='raw')), + txtf(100) + ) [1] TRUE > # - SGR ------------------------------------------------------------------------ > > a <- c("hello \033[31mworld\033[m", "umbrellas", "tomatoes") > b <- c("hello world", "umbrellas", "tomatoes") > > local({ + old.opt <- options(diffobj.sgr.supported=TRUE) + on.exit(options(old.opt)) + diff <- diffChr(a, b) # warn: 'contained ANSI CSI SGR' + try(diffChr(a, b, strip.sgr=1:3)) # "TRUE, FALSE, or NULL" + try(diffChr(a, b, sgr.supported=1:3)) # "TRUE, FALSE, or NULL" + + c( + all.equal(capture.output(show(diff)), txtf(200)), + all.equal(capture.output(show(diffChr(a, b, strip.sgr=FALSE))), txtf(300)), + all.equal(capture.output(show(diffChr(a, b, format='raw'))), txtf(400)) + ) + }) Error in diffChr(target = a, current = b, strip.sgr = 1:3) : Argument `strip.sgr` must be TRUE, FALSE, or NULL In addition: Warning message: In diffChr(target = a, current = b) : `target` or `current` contained ANSI CSI SGR when rendered; these were stripped. Use `strip.sgr=FALSE` to preserve them in the diffs. Error in diffChr(target = a, current = b, sgr.supported = 1:3) : Argument `sgr.supported` must be TRUE, FALSE, or NULL [1] TRUE TRUE TRUE > # - Alignment ------------------------------------------------------------------ > > chr.7 <- c("a b c d e", "F G h i j k", "xxx", "yyy", "k l m n o") > chr.8 <- c("f g h i j k", "hello", "goodbye", "yo", "k l m n o") > > all.equal(as.character(diffChr(chr.7, chr.8)), rdsf(1100)) [1] TRUE > all.equal( + as.character(diffChr(chr.7, chr.8, align=4/6)), rdsf(1100) # same as above + ) [1] TRUE > # No longer aligns > all.equal( + as.character(diffChr(chr.7, chr.8, align=4.01/6)), rdsf(1200) + ) [1] TRUE > all.equal( + as.character(diffChr(chr.7, chr.8, align=AlignThreshold(min.chars=4))), + rdsf(1100) # same as earlier + ) [1] TRUE > all.equal( + as.character(diffChr(chr.7, chr.8, align=AlignThreshold(min.chars=5))), + rdsf(1200) # same as above + ) [1] TRUE > > ## Normally this would not align, but we allow symbols to count towards > ## alignment > chr.7a <- c("a b c e", "d [ f g") > chr.7b <- "D [ f g" > a1 <- AlignThreshold(threshold=0, min.chars=2, count.alnum.only=FALSE) > all.equal( + as.character(diffChr(chr.7a, chr.7b, align=a1, format='raw')), + structure( + c("< chr.7a > chr.7b ", "@@ 1,2 @@ @@ 1 @@ ", + "< a b c e ~ ", "< d [ f g > D [ f g "), len = 4L) + ) [1] TRUE > # corner case where alignment alog exits early because it runs out of B values > # to match A values to. > > b <- c('a b c e', 'x w z f', 'e f g h') > a <- c('z o o o', 'p o o o', 'A b c e') > al <- AlignThreshold(threshold=0, min.chars=0) > all.equal( + capture.output(show(diffChr(b, a, align=al, format='raw'))), txtf(500) + ) [1] TRUE > # - NAs ------------------------------------------------------------------------ > > all.equal( + as.character( + diffChr(c(NA, letters[1:3]), c(letters[1:3], LETTERS[1:2], NA)) + ), + rdsf(1300) + ) [1] TRUE > all.equal( + as.character( + diffChr(c(letters[1:3]), c(letters[1:3], LETTERS[1:2], NA)) + ), + rdsf(1400) + ) [1] TRUE > all.equal( + as.character( + diffChr(c(NA, letters[1:3]), c(letters[1:3], LETTERS[1:2])) + ), + rdsf(1500) + ) [1] TRUE > # - Nested dots issue 134, h/t Noam Ross --------------------------------------- > > fn <- function(target, current, ...) { + diffChr(target, current, ...) + } > all.equal( + as.character(fn("a", "b", format = "raw")), + structure( + c( + "< target > current ", + "@@ 1 @@ @@ 1 @@ ", + "< a > b "), len = 3L + ) + ) [1] TRUE > > # - Newlines in input, issue 135, h/t Flying Sheep ----------------------------- > > a <- 'A Time Series:\n[1] 1 2 3 4' > b <- 'A Time Series:\n[1] 9 4 1 4' > all.equal( + c(as.character(diffobj::diffChr(a, b, format = 'raw'))), + c("< a > b ", + "@@ 1,2 @@ @@ 1,2 @@ ", + " A Time Series: A Time Series:", + "< [1] 1 2 3 4 > [1] 9 4 1 4 ") + ) [1] TRUE > > # - Attributes causing dispatch in guides, issue 142 --------------------------- > > zlold <- c("0x0000, 0x001F", "0x007F, 0x009F", "0x0300, 0x036F") > zlnew <- structure( + c("0x0000, 0x001F", "0x008F, 0x009F", "0x0300, 0x036F"), .Dim = 3L + ) > diffChr(zlold, zlnew) # no warning < zlold > zlnew @@ 1,3 @@  @@ 1,3 @@  0x0000, 0x001F 0x0000, 0x001F < 0x007F, 0x009F > 0x008F, 0x009F 0x0300, 0x036F 0x0300, 0x036F > > # - do.call, issue 158 --------------------------------------------------------- > > do.call(diffChr, list(1:2, 3:4, format='raw')) < 1:2 > 3:4 @@ 1,2 @@ @@ 1,2 @@ < 1 > 3 < 2 > 4 > > > proc.time() user system elapsed 2.53 0.17 3.12