R Under development (unstable) (2024-01-07 r85787 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > if (require(RUnit)) { + library(FeatureHashing) + df <- data.frame(b = c("1,2", "1,,3", ",2,3"), stringsAsFactors = FALSE) + m <- hashed.model.matrix(~ split(b), df, create.mapping = TRUE) + mapping <- hash.mapping(m) + checkTrue(all.equal(sort(names(mapping)), paste("b", 1:3, sep="")), "the default split is incorrect") + m <- hashed.model.matrix(~ split(b, delim = "2"), df, create.mapping = TRUE) + mapping <- hash.mapping(m) + checkTrue(all.equal(sort(names(mapping)), + sort(paste("b", unique(unlist(strsplit(df$b, "2", fixed = TRUE))), sep="")) + ), "the delim of split is incorrect") + m <- hashed.model.matrix(~ split(b, delim = ",", type = "count"), df, create.mapping = TRUE) + mapping <- hash.mapping(m) + checkTrue(all.equal(sort(names(mapping)), paste("b", 1:3, sep="")), "the default split is incorrect") + for(key in names(mapping)) { + pattern <- substring(key, 2, nchar(key)) + checkTrue(all.equal(sum(grepl(pattern, df$b, fixed=TRUE)), sum(abs(m[,mapping[[key]]]))), + "The value is inconsistent") + } + } Loading required package: RUnit > > proc.time() user system elapsed 1.92 0.28 2.18