R Under development (unstable) (2023-11-28 r85645 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ### 'R CMD check' does not Reliably compare the output of examples with > ### Rout.save file. > > library(re2) > > ############################################################ > ### count > > color <- c("yellowgreen", "steelblue", "goldenrod", "forestgreen") > stopifnot(re2_count(color, "e") == c(3, 3, 1, 3)) > stopifnot(re2_count(color, "r") == c(1, 0, 1, 2)) > > # Regular expression vs literal string) > stopifnot(re2_count(c("..", "a...", "foo.b"), ".") == c(2, 4, 5)) > stopifnot(re2_count(c("..", "a...", "foo.b"), re2_regexp(".", literal = TRUE)) + == c(2, 3, 1)) > > ############################################################ > ### detect > > ## Character vector input > s <- c("barbazbla", "foobar", "not present here ") > pat <- "(foo)|(bar)baz" > stopifnot(re2_detect(s, pat) == c(TRUE, TRUE, FALSE)) > > ## Use precompiled regexp > re <- re2_regexp("(foo)|(bAR)baz", case_sensitive = FALSE) > stopifnot(re2_detect(s, re) == c(TRUE, TRUE, FALSE)) > > ############################################################ > ### extract_replace > > # Returns extracted string with substitutions > stopifnot(re2_extract_replace( + "bunny@wunnies.pl", + "(.*)@([^.]*)", + "\\2!\\1" + ) == c("wunnies!bunny")) > > # Case insensitive > stopifnot(re2_extract_replace( + "BUNNY@wunnies.pl", + re2_regexp("(b.*)@([^.]*)", case_sensitive = FALSE), + "\\2!\\1" + ) == c("wunnies!BUNNY")) > > # Max submatch too large (1 match group, 2 submatches needed) > stopifnot(re2_extract_replace("foo", "f(o+)", "\\1\\2") == c("")) > > ############################################################ > ### locate > > r1 <- c( + 12, 11, + 10, 9, + 10, 9, + 12, 11 + ) > r2 <- c( + 3, 3, + 5, 5, + 3, 3, + NA, NA + ) > r3 <- c( + 2, 2, + 3, 3, + 5, 5, + 4, 4 + ) > r4 <- c( + 3, 4, + 1, 2, + 3, 4, + 5, 6 + ) > color <- c("yellowgreen", "steelblue", "goldenrod", "forestgreen") > stopifnot(all(c(t(re2_locate(color, "$"))) == r1)) > stopifnot(all(na.omit(c(t(re2_locate(color, "l")))) == na.omit(r2))) > stopifnot(all(c(t(re2_locate(color, "e"))) == r3)) > > # String length can be a multiple of pattern length > stopifnot(all(c(t(re2_locate(color, c("l(l|d)?", "st")))) == r4)) > > # Locate all occurrences > r <- re2_locate_all(color, "l") > stopifnot(all(c(t(r[[1]])) == c(3, 3, 4, 4))) > stopifnot(all(c(t(r[[2]])) == c(5, 5, 7, 7))) > stopifnot(all(c(t(r[[3]])) == c(3, 3))) > > r <- re2_locate_all(color, "e") > stopifnot(all(c(t(r[[1]])) == c(2, 2, 9, 9, 10, 10))) > stopifnot(all(c(t(r[[2]])) == c(3, 3, 4, 4, 9, 9))) > stopifnot(all(c(t(r[[3]])) == c(5, 5))) > stopifnot(all(c(t(r[[4]])) == c(4, 4, 9, 9, 10, 10))) > > r <- re2_locate_all(color, ".") > stopifnot(all(c(t(r[[1]])) + == c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, + 7, 8, 8, 9, 9, 10, 10, 11, 11))) > > ############################################################ > ### which > > color <- c("yellowgreen", "steelblue", "GOLDENROD", "forestgreen") > stopifnot(all(re2_which(color, "o") == c(1, 4))) > stopifnot(all(re2_subset(color, "o") == c("yellowgreen", "forestgreen"))) > > stopifnot(all(re2_which(c("x", "y", NA, "foo", ""), ".") == c(1, 2, 4))) > stopifnot(all(re2_subset(c("x", "y", NA, "foo", ""), ".") + == c("x", "y", "foo"))) > > # Use precompiled regexp > re <- re2_regexp("[a-z]") > stopifnot(all(re2_which(color, re) == c(1, 2, 4))) > stopifnot(all(re2_subset(color, re) + == c("yellowgreen", "steelblue", "forestgreen"))) > > re <- re2_regexp("[a-z]", case_sensitive = FALSE) > stopifnot(all(re2_which(color, re) == c(1, 2, 3, 4))) > stopifnot(all(re2_subset(color, re) + == c("yellowgreen", "steelblue", "GOLDENROD", "forestgreen"))) > # Vector of patterns > stopifnot(all(re2_which(color, c("^o", "bl.e$", re, "$")) == c(2, 3, 4))) > > ############################################################ > ### replace > > string <- c("yabba dabba doo", "famabbb sb") > stopifnot(all(re2_replace(string, "b+", "d") + == c("yada dabba doo", "famad sb"))) > stopifnot(all(re2_replace_all(string, "b+", "d") + == c("yada dada doo", "famad sd"))) > # Rearrange matching groups in replaced string > stopifnot(all(re2_replace( + "boris@kremvax.ru", + "(.*)@([^.]*)", "\\2!\\1" + ) == c("kremvax!boris.ru"))) > > # Use complied pattern > string <- "the quick brown fox jumps over the lazy dogs." > re <- re2_regexp("(qu|[b-df-hj-np-tv-z]*)([a-z]+)") > rewrite <- "\\2\\1ay" > stopifnot(all(re2_replace(string, re, rewrite) + == c("ethay quick brown fox jumps over the lazy dogs."))) > stopifnot(all(re2_replace_all(string, re, rewrite) + == c("ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday."))) > > string <- "abcd.efghi@google.com" > re <- re2_regexp("\\w+") > rewrite <- "\\0-NOSPAM" > stopifnot(all(re2_replace(string, re, rewrite) + == c("abcd-NOSPAM.efghi@google.com"))) > stopifnot(all(re2_replace_all(string, re, rewrite) + == c("abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM"))) > > string <- "aba\naba" > re <- re2_regexp("a.*a") > rewrite <- "(\\0)" > stopifnot(all(re2_replace(string, re, rewrite) == c("(aba)\naba"))) > stopifnot(all(re2_replace_all(string, re, rewrite) == c("(aba)\n(aba)"))) > > # Vectorize string and pattern > string <- c("ababababab", "bbbbbb", "bbbbbb", "aaaaa") > pattern <- c("b", "b+", "b*", "b*") > rewrite <- "bb" > stopifnot(all(re2_replace(string, pattern, rewrite) + == c("abbabababab", "bb", "bb", "bbaaaaa"))) > stopifnot(all(re2_replace_all(string, pattern, rewrite) + == c("abbabbabbabbabb", "bb", "bb", "bbabbabbabbabbabb"))) > > ############################################################ > ### regexp > > > re2p <- re2_regexp("hello world") > stopifnot(mode(re2p) == "externalptr") > > ## UTF-8 and matching interface > # By default, pattern and input text are interpreted as UTF-8. > # The Latin1 option causes them to be interpreted as Latin-1. > x <- "fa\xE7ile" > Encoding(x) <- "latin1" > stopifnot(re2_detect(x, re2_regexp("fa\xE7", encoding = "Latin1")) == TRUE) > > ## Case insensitive > stopifnot(re2_detect("fOobar ", re2_regexp("Foo", case_sensitive = FALSE)) + == TRUE) > > ## Literal string (as opposed to regular expression) > ## Matches only when 'literal' option is TRUE > stopifnot(re2_detect("foo\\$bar", re2_regexp("foo\\$b", literal = TRUE)) + == TRUE) > stopifnot(re2_detect("foo\\$bar", re2_regexp("foo\\$b", literal = FALSE)) + == FALSE) > > ## Use of never_nl > re <- re2_regexp("(abc(.|\n)*def)", never_nl = FALSE) > stopifnot(re2_match("abc\ndef\n", re)[1, 1:3] == c("abc\ndef", "abc\ndef", "\n")) > re <- re2_regexp("(abc(.|\n)*def)", never_nl = TRUE) > stopifnot(all(is.na(re2_match("abc\ndef\n", re)[1, 1:3]))) > > > ############################################################ > ### split > > panagram <- c( + "The quick brown fox jumps over the lazy dog", + "How vexingly quick daft zebras jump!" + ) > > r <- re2_split(panagram, " quick | over | zebras ") > list1 <- list( + c("The", "brown fox jumps", "the lazy dog"), + c("How vexingly", "daft", "jump!") + ) > stopifnot(identical(r, list1)) > > r <- re2_split(panagram, " quick | over | zebras ", simplify = TRUE) > m1 <- rbind( + c("The", "brown fox jumps", "the lazy dog"), + c("How vexingly", "daft", "jump!") + ) > stopifnot(r == m1) > > # Use compiled regexp > re <- re2_regexp("quick | over |how ", case_sensitive = FALSE) > r <- re2_split(panagram, re) > list2 <- list( + c("The ", "brown fox jumps", "the lazy dog"), + c("", "vexingly ", "daft zebras jump!") + ) > stopifnot(identical(r, list2)) > > r <- re2_split(panagram, re, simplify = TRUE) > m2 <- rbind( + c("The ", "brown fox jumps", "the lazy dog"), + c("", "vexingly ", "daft zebras jump!") + ) > stopifnot(r == m2) > > # Restrict number of matches > r <- re2_split(panagram, " quick | over | zebras ", n = 2) > list3 <- list( + c("The", "brown fox jumps over the lazy dog"), + c("How vexingly", "daft zebras jump!") + ) > stopifnot(identical(r, list3)) > > > ############################################################ > ### match > > > ## Substring extraction > strings <- c("barbazbla", "foobar") > pattern <- "(foo)|(?Pbar)baz" > > result <- re2_match(strings, pattern) > m <- rbind( + c("barbaz", NA, "bar"), + c("foo", "foo", NA) + ) > > lvec <- function(lst) { + na.omit(unlist(lst)) + } > mvec <- function(mat) { + na.omit(c(mat)) + } > stopifnot(mvec(result) == mvec(m)) > stopifnot(is.matrix(result)) > stopifnot(names(result) == c(".0", ".1", "TestGroup")) > > result <- re2_match(strings, pattern, simplify = FALSE) > lst <- list( + rbind( + c("barbaz", NA, "bar") + ), + rbind( + c("foo", "foo", NA) + ) + ) > stopifnot(lvec(result) == lvec(lst)) > stopifnot(is.list(result)) > stopifnot(is.character(result[[1]])) > > ## Compile regexp > re <- re2_regexp("(foo)|(BaR)baz", case_sensitive = FALSE) > r <- re2_match(strings, re) > m <- rbind( + c("barbaz", NA, "bar"), + c("foo", "foo", NA) + ) > stopifnot(mvec(r) == mvec(m)) > > strings <- c( + "Home: 743 733 5365", "373-733-5753 ", "foobar", + "733.335.3457 and Work: 573-433-7577 " + ) > re <- re2_regexp("([0-9]{3})[- .]([0-9]{3})[- .]([0-9]{4})") > r <- re2_match(strings, re) > m <- rbind( + c("743 733 5365", "743", "733", "5365"), + c("373-733-5753", "373", "733", "5753"), + c(NA, NA, NA, NA), + c("733.335.3457", "733", "335", "3457") + ) > stopifnot(mvec(r) == mvec(m)) > > ## Vectorized over patterns > r <- re2_match(strings, c(re, "53 $", "^foo", re)) > m <- rbind( + c("743 733 5365", "743", "733", "5365"), + c("53 ", NA, NA, NA), + c("foo", NA, NA, NA), + c("733.335.3457", "733", "335", "3457") + ) > stopifnot(mvec(r) == mvec(m)) > > ## Match all occurances, not just the first > r <- re2_match_all(strings, re) > lst <- list( + rbind( + c("743 733 5365", "743", "733", "5365") + ), + rbind( + c("373-733-5753", "373", "733", "5753") + ), + rbind( + c("733.335.3457", "733", "335", "3457"), + c("573-433-7577", "573", "433", "7577") + ) + ) > stopifnot(unlist(r) == unlist(lst)) > > r <- re2_match_all("ruby:1234 68 red:92 blue:", "(\\w+):(\\d+)") > m <- rbind( + c("ruby:1234", "ruby", "1234"), + c("red:92", "red", "92") + ) > stopifnot(r[[1]] == m) > > ## Vectorized over patterns (matching all occurances) > > r <- re2_match_all(strings, c(re, "53 $", "^foo", re)) > lst <- list( + rbind( + c("743 733 5365", "743", "733", "5365") + ), + rbind( + c("53 ") + ), + rbind( + c("foo") + ), + rbind( + c("733.335.3457", "733", "335", "3457"), + c("573-433-7577", "573", "433", "7577") + ) + ) > stopifnot(unlist(r) == unlist(lst)) > > proc.time() user system elapsed 0.39 0.06 0.37