R Under development (unstable) (2024-01-23 r85822 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> source("incl/start.R")
Loading required package: R.oo
Loading required package: R.methodsS3
R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
R.oo v1.26.0 successfully loaded. See ?R.oo for help.

Attaching package: 'R.oo'

The following object is masked from 'package:R.methodsS3':

    throw

The following objects are masked from 'package:methods':

    getClasses, getMethods

The following objects are masked from 'package:base':

    attach, detach, load, save

R.filesets v2.15.1 successfully loaded. See ?R.filesets for help.

Attaching package: 'R.filesets'

The following objects are masked from 'package:base':

    append, readLines

> 
> message("*** TabularTextFile")
*** TabularTextFile
> 
> pathA <- system.file("exData", "dataSetA,original", package="R.filesets")
> pathB <- system.file("exData", "dataSetB", package="R.filesets")
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # File #1 - regular tab-delimited file
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> db <- TabularTextFile("fileB,other,tags.dat", path=pathA)
> print(db)
TabularTextFile:
Name: fileB
Tags: other,tags
Full name: fileB,other,tags
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat
File size: 108 B (108 bytes)
Number of data rows: 10
Columns [4]: 'x', 'y', 'fac', 'char'
Number of text lines: 11
> 
> # Read all data
> data <- readDataFrame(db, verbose=TRUE)
Reading TabularTextFile...
 Arguments inferred from file header:
 $header
 [1] TRUE
 
 $colClasses
    x    y  fac char 
   NA   NA   NA   NA 
 
 $skip
 [1] 0
 
 $sep
 [1] "\t"
 
 $quote
 [1] "\""
 
 $fill
 [1] FALSE
 
 $comment.char
 [1] "#"
 
 $check.names
 [1] FALSE
 
 $na.strings
 [1] "---" "NA" 
 
 $stringsAsFactors
 [1] FALSE
 
 Column names (4):
 x, y, fac, char
 Matching column names:...
  Column classes (4):
  NA, NA, NA, NA
 Matching column names:...done
 Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat
 Calling read.table()...
  Arguments used to read tabular file:
  [[1]]
  A connection with                                                                                
  description "../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat"
  class       "file"                                                              
  mode        "r"                                                                 
  text        "text"                                                              
  opened      "opened"                                                            
  can read    "yes"                                                               
  can write   "no"                                                                
  
  $header
  [1] TRUE
  
  $colClasses
  [1] NA NA NA NA
  
  $skip
  [1] 0
  
  $sep
  [1] "\t"
  
  $quote
  [1] "\""
  
  $fill
  [1] FALSE
  
  $comment.char
  [1] "#"
  
  $check.names
  [1] FALSE
  
  $na.strings
  [1] "---" "NA" 
  
  $stringsAsFactors
  [1] FALSE
  
  Raw data read by read.table():
  'data.frame':	10 obs. of  4 variables:
   $ x   : int  1 1 1 1 1 2 2 2 2 3
   $ y   : int  1 2 5 6 10 12 7 43 9 12
   $ fac : chr  "A" "B" "D" "A" ...
   $ char: chr  "r" "b" "g" "q" ...
  Number of rows read: 10
  Number of columns read: 4
  'data.frame':	10 obs. of  4 variables:
   $ x   : int  1 1 1 1 1 2 2 2 2 3
   $ y   : int  1 2 5 6 10 12 7 43 9 12
   $ fac : chr  "A" "B" "D" "A" ...
   $ char: chr  "r" "b" "g" "q" ...
 Calling read.table()...done
Reading TabularTextFile...done
> print(data)
   x  y fac char
1  1  1   A    r
2  1  2   B    b
3  1  5   D    g
4  1  6   A    q
5  1 10   A    f
6  2 12   D    a
7  2  7   E    g
8  2 43   C    d
9  2  9   F    g
10 3 12   A    j
> 
> # Read columns
> dataC <- readColumns(db, verbose=TRUE)
Reading columns...
 Argument 'columns': 1, 2, 3, 4
 Argument 'colClasses':
 NULL
 Column names': x, y, fac, char
 Column classes:
         ^x$         ^y$       ^fac$      ^char$ 
 "character" "character" "character" "character" 
 Subsetting columns...
  'data.frame':	10 obs. of  4 variables:
   $ x   : chr  "1" "1" "1" "1" ...
   $ y   : chr  "1" "2" "5" "6" ...
   $ fac : chr  "A" "B" "D" "A" ...
   $ char: chr  "r" "b" "g" "q" ...
  Columns to keep: 1, 2, 3, 4
 Subsetting columns...done
Reading columns...done
> print(dataC)
   x  y fac char
1  1  1   A    r
2  1  2   B    b
3  1  5   D    g
4  1  6   A    q
5  1 10   A    f
6  2 12   D    a
7  2  7   E    g
8  2 43   C    d
9  2  9   F    g
10 3 12   A    j
> 
> # Extract a particular column by its name
> dataY <- extractMatrix(db, column="y", colClasses="integer")
> 
> # Validate
> stopifnot(identical(dataY[,1], data$y))
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # File #2 - with header comments
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> db <- TabularTextFile("fileA,20100112.dat", path=pathA)
> print(db)
TabularTextFile:
Name: fileA
Tags: 20100112
Full name: fileA,20100112
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileA,20100112.dat
File size: 288 B (288 bytes)
Number of data rows: 10
Columns [4]: 'x', 'y', 'fac', 'char'
Number of text lines: 18
> 
> # Read all data
> data <- readDataFrame(db)
> print(data)
   x  y fac char
1  1  1   A    a
2  1  2   A    b
3  1  3   B    c
4  1  4   A    d
5  1  5   C    e
6  1  6   A    f
7  1  7   A    g
8  1  8   C    h
9  1  9   C    i
10 1 10   A    j
> 
> # Read columns 'x', 'y', and 'char'
> data <- readDataFrame(db, colClasses=c("(x|y)"="integer", "char"="character"))
> print(data)
   x  y char
1  1  1    a
2  1  2    b
3  1  3    c
4  1  4    d
5  1  5    e
6  1  6    f
7  1  7    g
8  1  8    h
9  1  9    i
10 1 10    j
> 
> # Translate column names on the fly
> db <- setColumnNamesTranslator(db, function(names, ...) toupper(names))
> data <- readDataFrame(db, colClasses=c("(X|Y)"="integer", "CHAR"="character"))
> print(data)
   X  Y CHAR
1  1  1    a
2  1  2    b
3  1  3    c
4  1  4    d
5  1  5    e
6  1  6    f
7  1  7    g
8  1  8    h
9  1  9    i
10 1 10    j
> 
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # File #3 - column names in header comments
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> db <- TabularTextFile("fileE,headerArgs.dat", path=pathA)
> print(db)
TabularTextFile:
Name: fileE
Tags: headerArgs
Full name: fileE,headerArgs
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileE,headerArgs.dat
File size: 201 B (201 bytes)
Number of data rows: 10
Columns [3]: 'x', 'y', 'char'
Number of text lines: 14
> 
> # Read all data
> data <- readDataFrame(db)
> print(data)
   x  y char
1  1  1    r
2  4  3    t
3  2  7    g
4  1  6    s
5  6 10    f
6  6  2    a
7  2  4    e
8  7 24    c
9  8  1    g
10 1  2    j
> 
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # File #4 - with neither column names nor header comments
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> db <- TabularTextFile("fileF,noHeader.dat", path=pathB, columnNames=FALSE)
> print(db)
TabularTextFile:
Name: fileF
Tags: noHeader
Full name: fileF,noHeader
Pathname: ../../lib/R.filesets/exData/dataSetB/fileF,noHeader.dat
File size: 72 B (72 bytes)
Number of data rows: 10
Columns [NA]: <not reading column names>
Number of text lines: 10
> 
> # Read all data
> data <- readDataFrame(db)
> print(data)
   V1 V2 V3
1   1  1  r
2   4  3  t
3   2  7  g
4   1  6  s
5   6 10  f
6   6  2  a
7   2  4  e
8   7 24  c
9   8  1  g
10  1  2  j
> str(data)
'data.frame':	10 obs. of  3 variables:
 $ V1: int  1 4 2 1 6 6 2 7 8 1
 $ V2: int  1 3 7 6 10 2 4 24 1 2
 $ V3: chr  "r" "t" "g" "s" ...
> 
> # Use column classes
> colClasses <- rep(NA_character_, times=nbrOfColumns(db))
> colClasses[length(colClasses)] <- "NULL"
> data <- readDataFrame(db, colClasses=colClasses)
> print(data)
   V1 V2
1   1  1
2   4  3
3   2  7
4   1  6
5   6 10
6   6  2
7   2  4
8   7 24
9   8  1
10  1  2
> str(data)
'data.frame':	10 obs. of  2 variables:
 $ V1: int  1 4 2 1 6 6 2 7 8 1
 $ V2: int  1 3 7 6 10 2 4 24 1 2
> 
> # Sanity check
> stopifnot(ncol(data) == nbrOfColumns(db) - 1L)
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # File #5 - with and without newline for the last line
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> df1 <- TabularTextFile("fileG,EOL.txt", path=pathB)
> print(df1)
TabularTextFile:
Name: fileG
Tags: EOL
Full name: fileG,EOL
Pathname: ../../lib/R.filesets/exData/dataSetB/fileG,EOL.txt
File size: 122 B (122 bytes)
Number of data rows: 10
Columns [5]: 'index', 'x', 'y', 'fac', 'char'
Number of text lines: 11
> data1 <- readDataFrame(df1)
> 
> df2 <- TabularTextFile("fileG,noEOL.txt", path=pathB)
> print(df2)
TabularTextFile:
Name: fileG
Tags: noEOL
Full name: fileG,noEOL
Pathname: ../../lib/R.filesets/exData/dataSetB/fileG,noEOL.txt
File size: 121 B (121 bytes)
Number of data rows: 10
Columns [5]: 'index', 'x', 'y', 'fac', 'char'
Number of text lines: 11
> data2 <- readDataFrame(df2)
> 
> # Sanity checks
> stopifnot(identical(data2, data1))
> 
> source("incl/end.R")
> 
> proc.time()
   user  system elapsed 
   1.32    0.07    1.39