R Under development (unstable) (2024-01-23 r85822 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > source("incl/start.R") Loading required package: R.oo Loading required package: R.methodsS3 R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help. R.oo v1.26.0 successfully loaded. See ?R.oo for help. Attaching package: 'R.oo' The following object is masked from 'package:R.methodsS3': throw The following objects are masked from 'package:methods': getClasses, getMethods The following objects are masked from 'package:base': attach, detach, load, save R.filesets v2.15.1 successfully loaded. See ?R.filesets for help. Attaching package: 'R.filesets' The following objects are masked from 'package:base': append, readLines > > message("*** TabularTextFile") *** TabularTextFile > > pathA <- system.file("exData", "dataSetA,original", package="R.filesets") > pathB <- system.file("exData", "dataSetB", package="R.filesets") > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # File #1 - regular tab-delimited file > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > db <- TabularTextFile("fileB,other,tags.dat", path=pathA) > print(db) TabularTextFile: Name: fileB Tags: other,tags Full name: fileB,other,tags Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat File size: 108 B (108 bytes) Number of data rows: 10 Columns [4]: 'x', 'y', 'fac', 'char' Number of text lines: 11 > > # Read all data > data <- readDataFrame(db, verbose=TRUE) Reading TabularTextFile... Arguments inferred from file header: $header [1] TRUE $colClasses x y fac char NA NA NA NA $skip [1] 0 $sep [1] "\t" $quote [1] "\"" $fill [1] FALSE $comment.char [1] "#" $check.names [1] FALSE $na.strings [1] "---" "NA" $stringsAsFactors [1] FALSE Column names (4): x, y, fac, char Matching column names:... Column classes (4): NA, NA, NA, NA Matching column names:...done Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat Calling read.table()... Arguments used to read tabular file: [[1]] A connection with description "../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat" class "file" mode "r" text "text" opened "opened" can read "yes" can write "no" $header [1] TRUE $colClasses [1] NA NA NA NA $skip [1] 0 $sep [1] "\t" $quote [1] "\"" $fill [1] FALSE $comment.char [1] "#" $check.names [1] FALSE $na.strings [1] "---" "NA" $stringsAsFactors [1] FALSE Raw data read by read.table(): 'data.frame': 10 obs. of 4 variables: $ x : int 1 1 1 1 1 2 2 2 2 3 $ y : int 1 2 5 6 10 12 7 43 9 12 $ fac : chr "A" "B" "D" "A" ... $ char: chr "r" "b" "g" "q" ... Number of rows read: 10 Number of columns read: 4 'data.frame': 10 obs. of 4 variables: $ x : int 1 1 1 1 1 2 2 2 2 3 $ y : int 1 2 5 6 10 12 7 43 9 12 $ fac : chr "A" "B" "D" "A" ... $ char: chr "r" "b" "g" "q" ... Calling read.table()...done Reading TabularTextFile...done > print(data) x y fac char 1 1 1 A r 2 1 2 B b 3 1 5 D g 4 1 6 A q 5 1 10 A f 6 2 12 D a 7 2 7 E g 8 2 43 C d 9 2 9 F g 10 3 12 A j > > # Read columns > dataC <- readColumns(db, verbose=TRUE) Reading columns... Argument 'columns': 1, 2, 3, 4 Argument 'colClasses': NULL Column names': x, y, fac, char Column classes: ^x$ ^y$ ^fac$ ^char$ "character" "character" "character" "character" Subsetting columns... 'data.frame': 10 obs. of 4 variables: $ x : chr "1" "1" "1" "1" ... $ y : chr "1" "2" "5" "6" ... $ fac : chr "A" "B" "D" "A" ... $ char: chr "r" "b" "g" "q" ... Columns to keep: 1, 2, 3, 4 Subsetting columns...done Reading columns...done > print(dataC) x y fac char 1 1 1 A r 2 1 2 B b 3 1 5 D g 4 1 6 A q 5 1 10 A f 6 2 12 D a 7 2 7 E g 8 2 43 C d 9 2 9 F g 10 3 12 A j > > # Extract a particular column by its name > dataY <- extractMatrix(db, column="y", colClasses="integer") > > # Validate > stopifnot(identical(dataY[,1], data$y)) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # File #2 - with header comments > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > db <- TabularTextFile("fileA,20100112.dat", path=pathA) > print(db) TabularTextFile: Name: fileA Tags: 20100112 Full name: fileA,20100112 Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileA,20100112.dat File size: 288 B (288 bytes) Number of data rows: 10 Columns [4]: 'x', 'y', 'fac', 'char' Number of text lines: 18 > > # Read all data > data <- readDataFrame(db) > print(data) x y fac char 1 1 1 A a 2 1 2 A b 3 1 3 B c 4 1 4 A d 5 1 5 C e 6 1 6 A f 7 1 7 A g 8 1 8 C h 9 1 9 C i 10 1 10 A j > > # Read columns 'x', 'y', and 'char' > data <- readDataFrame(db, colClasses=c("(x|y)"="integer", "char"="character")) > print(data) x y char 1 1 1 a 2 1 2 b 3 1 3 c 4 1 4 d 5 1 5 e 6 1 6 f 7 1 7 g 8 1 8 h 9 1 9 i 10 1 10 j > > # Translate column names on the fly > db <- setColumnNamesTranslator(db, function(names, ...) toupper(names)) > data <- readDataFrame(db, colClasses=c("(X|Y)"="integer", "CHAR"="character")) > print(data) X Y CHAR 1 1 1 a 2 1 2 b 3 1 3 c 4 1 4 d 5 1 5 e 6 1 6 f 7 1 7 g 8 1 8 h 9 1 9 i 10 1 10 j > > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # File #3 - column names in header comments > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > db <- TabularTextFile("fileE,headerArgs.dat", path=pathA) > print(db) TabularTextFile: Name: fileE Tags: headerArgs Full name: fileE,headerArgs Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileE,headerArgs.dat File size: 201 B (201 bytes) Number of data rows: 10 Columns [3]: 'x', 'y', 'char' Number of text lines: 14 > > # Read all data > data <- readDataFrame(db) > print(data) x y char 1 1 1 r 2 4 3 t 3 2 7 g 4 1 6 s 5 6 10 f 6 6 2 a 7 2 4 e 8 7 24 c 9 8 1 g 10 1 2 j > > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # File #4 - with neither column names nor header comments > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > db <- TabularTextFile("fileF,noHeader.dat", path=pathB, columnNames=FALSE) > print(db) TabularTextFile: Name: fileF Tags: noHeader Full name: fileF,noHeader Pathname: ../../lib/R.filesets/exData/dataSetB/fileF,noHeader.dat File size: 72 B (72 bytes) Number of data rows: 10 Columns [NA]: Number of text lines: 10 > > # Read all data > data <- readDataFrame(db) > print(data) V1 V2 V3 1 1 1 r 2 4 3 t 3 2 7 g 4 1 6 s 5 6 10 f 6 6 2 a 7 2 4 e 8 7 24 c 9 8 1 g 10 1 2 j > str(data) 'data.frame': 10 obs. of 3 variables: $ V1: int 1 4 2 1 6 6 2 7 8 1 $ V2: int 1 3 7 6 10 2 4 24 1 2 $ V3: chr "r" "t" "g" "s" ... > > # Use column classes > colClasses <- rep(NA_character_, times=nbrOfColumns(db)) > colClasses[length(colClasses)] <- "NULL" > data <- readDataFrame(db, colClasses=colClasses) > print(data) V1 V2 1 1 1 2 4 3 3 2 7 4 1 6 5 6 10 6 6 2 7 2 4 8 7 24 9 8 1 10 1 2 > str(data) 'data.frame': 10 obs. of 2 variables: $ V1: int 1 4 2 1 6 6 2 7 8 1 $ V2: int 1 3 7 6 10 2 4 24 1 2 > > # Sanity check > stopifnot(ncol(data) == nbrOfColumns(db) - 1L) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # File #5 - with and without newline for the last line > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > df1 <- TabularTextFile("fileG,EOL.txt", path=pathB) > print(df1) TabularTextFile: Name: fileG Tags: EOL Full name: fileG,EOL Pathname: ../../lib/R.filesets/exData/dataSetB/fileG,EOL.txt File size: 122 B (122 bytes) Number of data rows: 10 Columns [5]: 'index', 'x', 'y', 'fac', 'char' Number of text lines: 11 > data1 <- readDataFrame(df1) > > df2 <- TabularTextFile("fileG,noEOL.txt", path=pathB) > print(df2) TabularTextFile: Name: fileG Tags: noEOL Full name: fileG,noEOL Pathname: ../../lib/R.filesets/exData/dataSetB/fileG,noEOL.txt File size: 121 B (121 bytes) Number of data rows: 10 Columns [5]: 'index', 'x', 'y', 'fac', 'char' Number of text lines: 11 > data2 <- readDataFrame(df2) > > # Sanity checks > stopifnot(identical(data2, data1)) > > source("incl/end.R") > > proc.time() user system elapsed 1.32 0.07 1.39