R Under development (unstable) (2024-01-23 r85822 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > source("incl/start.R") Loading required package: R.oo Loading required package: R.methodsS3 R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help. R.oo v1.26.0 successfully loaded. See ?R.oo for help. Attaching package: 'R.oo' The following object is masked from 'package:R.methodsS3': throw The following objects are masked from 'package:methods': getClasses, getMethods The following objects are masked from 'package:base': attach, detach, load, save R.filesets v2.15.1 successfully loaded. See ?R.filesets for help. Attaching package: 'R.filesets' The following objects are masked from 'package:base': append, readLines > > message("*** TabularTextFileSet ...") *** TabularTextFileSet ... > > # Setup a file set consisting of all *.dat tab-delimited files > # in a particular directory > pathA <- system.file("exData", "dataSetA,original", package="R.filesets") > ds <- TabularTextFileSet$byPath(pathA, pattern="[.]dat$") > print(ds) TabularTextFileSet: Name: dataSetA Tags: original Full name: dataSetA,original Number of files: 5 Names: fileA, fileB, fileC, fileD, fileE [5] Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original Total file size: 905 B (905 bytes) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Extract one column with a particular name (one per file) > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Read column 'y' and a subset of the rows from each of the > # tab-delimited files and combine into a matrix > rows <- c(3:5, 8, 2) > data <- extractMatrix(ds, column="y", colClasses="integer", rows=rows, drop=TRUE) > print(data) fileA fileB fileC fileD fileE [1,] 3 5 6 7 7 [2,] 4 6 7 6 6 [3,] 5 10 10 10 10 [4,] 8 43 4 24 24 [5,] 2 2 2 3 3 > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Read data frames from each of the files > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > dataList <- lapply(ds, FUN=readDataFrame) > print(dataList) $`fileA,20100112` x y fac char 1 1 1 A a 2 1 2 A b 3 1 3 B c 4 1 4 A d 5 1 5 C e 6 1 6 A f 7 1 7 A g 8 1 8 C h 9 1 9 C i 10 1 10 A j $`fileB,other,tags` x y fac char 1 1 1 A r 2 1 2 B b 3 1 5 D g 4 1 6 A q 5 1 10 A f 6 2 12 D a 7 2 7 E g 8 2 43 C d 9 2 9 F g 10 3 12 A j $`fileC,inverted` x y fac char 1 1 3 D r 2 2 2 A b 3 2 6 D g 4 1 7 B q 5 3 10 B f 6 1 2 B a 7 2 24 E g 8 1 4 E d 9 1 1 G g 10 1 2 A j $`fileD,3cols` x y char 1 1 1 r 2 4 3 t 3 2 7 g 4 1 6 s 5 6 10 f 6 6 2 a 7 2 4 e 8 7 24 c 9 8 1 g 10 1 2 j $`fileE,headerArgs` x y char 1 1 1 r 2 4 3 t 3 2 7 g 4 1 6 s 5 6 10 f 6 6 2 a 7 2 4 e 8 7 24 c 9 8 1 g 10 1 2 j > > rows <- c(3:5, 8, 2) > dataList <- lapply(ds, FUN=readDataFrame, rows=rows) > print(dataList) $`fileA,20100112` x y fac char 3 1 3 B c 4 1 4 A d 5 1 5 C e 8 1 8 C h 2 1 2 A b $`fileB,other,tags` x y fac char 3 1 5 D g 4 1 6 A q 5 1 10 A f 8 2 43 C d 2 1 2 B b $`fileC,inverted` x y fac char 3 2 6 D g 4 1 7 B q 5 3 10 B f 8 1 4 E d 2 2 2 A b $`fileD,3cols` x y char 3 2 7 g 4 1 6 s 5 6 10 f 8 7 24 c 2 4 3 t $`fileE,headerArgs` x y char 3 2 7 g 4 1 6 s 5 6 10 f 8 7 24 c 2 4 3 t > > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Read common columns and stack into one data frame > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > colNames <- Reduce(intersect, lapply(ds, getColumnNames)) > cat("Common column names:\n") Common column names: > print(colNames) [1] "x" "y" "char" > > # Read the *common* columns "as is" (hence 'NA') > colClasses <- rep(NA, times=length(colNames)) > names(colClasses) <- colNames > cat("Column class patterns:\n") Column class patterns: > print(colClasses) x y char NA NA NA > > data <- readDataFrame(ds, colClasses=colClasses, verbose=TRUE) Reading data set as data frame... Reading all data files... Number of files: 5 Reading all data files...done Combining all data... Combining all data...done Reading data set as data frame...done > print(data) x y char 1 1 1 a 2 1 2 b 3 1 3 c 4 1 4 d 5 1 5 e 6 1 6 f 7 1 7 g 8 1 8 h 9 1 9 i 10 1 10 j 11 1 1 r 12 1 2 b 13 1 5 g 14 1 6 q 15 1 10 f 16 2 12 a 17 2 7 g 18 2 43 d 19 2 9 g 20 3 12 j 21 1 3 r 22 2 2 b 23 2 6 g 24 1 7 q 25 3 10 f 26 1 2 a 27 2 24 g 28 1 4 d 29 1 1 g 30 1 2 j 31 1 1 r 32 4 3 t 33 2 7 g 34 1 6 s 35 6 10 f 36 6 2 a 37 2 4 e 38 7 24 c 39 8 1 g 40 1 2 j 41 1 1 r 42 4 3 t 43 2 7 g 44 1 6 s 45 6 10 f 46 6 2 a 47 2 4 e 48 7 24 c 49 8 1 g 50 1 2 j > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Translate column names on the fly > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > lapply(ds, FUN=setColumnNamesTranslator, function(names, ...) toupper(names)) $`fileA,20100112` TabularTextFile: Name: fileA Tags: 20100112 Full name: fileA,20100112 Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileA,20100112.dat File size: 288 B (288 bytes) Number of data rows: 10 Columns [4]: 'X', 'Y', 'FAC', 'CHAR' Number of text lines: 18 $`fileB,other,tags` TabularTextFile: Name: fileB Tags: other,tags Full name: fileB,other,tags Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat File size: 108 B (108 bytes) Number of data rows: 10 Columns [4]: 'X', 'Y', 'FAC', 'CHAR' Number of text lines: 11 $`fileC,inverted` TabularTextFile: Name: fileC Tags: inverted Full name: fileC,inverted Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileC,inverted.dat File size: 166 B (166 bytes) Number of data rows: 10 Columns [4]: 'X', 'Y', 'FAC', 'CHAR' Number of text lines: 13 $`fileD,3cols` TabularTextFile: Name: fileD Tags: 3cols Full name: fileD,3cols Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileD,3cols.dat File size: 142 B (142 bytes) Number of data rows: 10 Columns [3]: 'X', 'Y', 'CHAR' Number of text lines: 13 $`fileE,headerArgs` TabularTextFile: Name: fileE Tags: headerArgs Full name: fileE,headerArgs Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileE,headerArgs.dat File size: 201 B (201 bytes) Number of data rows: 10 Columns [3]: 'X', 'Y', 'CHAR' Number of text lines: 14 > data <- readDataFrame(ds, colClasses=c("(X|Y)"="integer", "CHAR"="character")) > print(data) X Y CHAR 1 1 1 a 2 1 2 b 3 1 3 c 4 1 4 d 5 1 5 e 6 1 6 f 7 1 7 g 8 1 8 h 9 1 9 i 10 1 10 j 11 1 1 r 12 1 2 b 13 1 5 g 14 1 6 q 15 1 10 f 16 2 12 a 17 2 7 g 18 2 43 d 19 2 9 g 20 3 12 j 21 1 3 r 22 2 2 b 23 2 6 g 24 1 7 q 25 3 10 f 26 1 2 a 27 2 24 g 28 1 4 d 29 1 1 g 30 1 2 j 31 1 1 r 32 4 3 t 33 2 7 g 34 1 6 s 35 6 10 f 36 6 2 a 37 2 4 e 38 7 24 c 39 8 1 g 40 1 2 j 41 1 1 r 42 4 3 t 43 2 7 g 44 1 6 s 45 6 10 f 46 6 2 a 47 2 4 e 48 7 24 c 49 8 1 g 50 1 2 j > > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # ADVANCED: Translation of fullnames > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > message("- Translation of fullnames") - Translation of fullnames > > ## Extra sanity checks to troubleshoot stall on CRAN MS Windows servers > path <- getPath(ds) > cat(sprintf("Data set path: %s\n", sQuote(path))) Data set path: '../../lib/R.filesets/exData/dataSetA,original' > stopifnot(length(path) > 0) > pattern <- ",fullnames[.]txt$" > cat(sprintf("Pattern: %s\n", sQuote(path))) Pattern: '../../lib/R.filesets/exData/dataSetA,original' > files <- dir(pattern = pattern, path = path, full.names = TRUE, all.files = TRUE) > cat(sprintf("Data set files [n = %d]:\n", length(files))) Data set files [n = 1]: > print(files) [1] "../../lib/R.filesets/exData/dataSetA,original/all,fullnames.txt" > stopifnot(length(files) > 0) > > fnts <- TabularTextFileSet$byPath(path, pattern = pattern) > print(fnts) TabularTextFileSet: Name: dataSetA Tags: original Full name: dataSetA,original Number of files: 1 Names: all [1] Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original Total file size: 71 B (71 bytes) > str(as.list(fnts)) List of 1 $ all,fullnames: 'TabularTextFile' Named logi fileA sampleA ..- attr(*, ".env")= > > cat("Data set before applying fullname translator:\n") Data set before applying fullname translator: > print(ds) TabularTextFileSet: Name: dataSetA Tags: original Full name: dataSetA,original Number of files: 5 Names: fileA, fileB, fileC, fileD, fileE [5] Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original Total file size: 905 B (905 bytes) > appendFullNamesTranslator(ds, as.list(fnts)) > > cat("Data set after applying fullname translator:\n") Data set after applying fullname translator: > print(ds) TabularTextFileSet: Name: dataSetA Tags: original Full name: dataSetA,original Number of files: 5 Names: sampleA, sampleB, sampleC, fileD, fileE [5] Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original Total file size: 905 B (905 bytes) > > cat("Default fullnames:\n") Default fullnames: > print(head(getFullNames(ds, translate=FALSE))) [1] "fileA,20100112" "fileB,other,tags" "fileC,inverted" "fileD,3cols" [5] "fileE,headerArgs" > cat("Translated fullnames:\n") Translated fullnames: > print(head(getFullNames(ds))) [1] "sampleA,20100112" "sampleB,other,tags" "sampleC,fileC,inverted" [4] "fileD,3cols" "fileE,headerArgs" > > cat("Default fullnames:\n") Default fullnames: > print(getFullNames(ds, translate=FALSE)) [1] "fileA,20100112" "fileB,other,tags" "fileC,inverted" "fileD,3cols" [5] "fileE,headerArgs" > cat("Translated fullnames:\n") Translated fullnames: > print(getFullNames(ds)) [1] "sampleA,20100112" "sampleB,other,tags" "sampleC,fileC,inverted" [4] "fileD,3cols" "fileE,headerArgs" > > message("*** TabularTextFileSet ... DONE") *** TabularTextFileSet ... DONE > > source("incl/end.R") > > proc.time() user system elapsed 2.46 0.12 2.57