R Under development (unstable) (2024-01-23 r85822 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> source("incl/start.R")
Loading required package: R.oo
Loading required package: R.methodsS3
R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
R.oo v1.26.0 successfully loaded. See ?R.oo for help.

Attaching package: 'R.oo'

The following object is masked from 'package:R.methodsS3':

    throw

The following objects are masked from 'package:methods':

    getClasses, getMethods

The following objects are masked from 'package:base':

    attach, detach, load, save

R.filesets v2.15.1 successfully loaded. See ?R.filesets for help.

Attaching package: 'R.filesets'

The following objects are masked from 'package:base':

    append, readLines

> 
> message("*** TabularTextFileSet ...")
*** TabularTextFileSet ...
> 
> # Setup a file set consisting of all *.dat tab-delimited files
> # in a particular directory
> pathA <- system.file("exData", "dataSetA,original", package="R.filesets")
> ds <- TabularTextFileSet$byPath(pathA, pattern="[.]dat$")
> print(ds)
TabularTextFileSet:
Name: dataSetA
Tags: original
Full name: dataSetA,original
Number of files: 5
Names: fileA, fileB, fileC, fileD, fileE [5]
Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original
Total file size: 905 B (905 bytes)
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Extract one column with a particular name (one per file)
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Read column 'y' and a subset of the rows from each of the
> # tab-delimited files and combine into a matrix
> rows <- c(3:5, 8, 2)
> data <- extractMatrix(ds, column="y", colClasses="integer", rows=rows, drop=TRUE)
> print(data)
     fileA fileB fileC fileD fileE
[1,]     3     5     6     7     7
[2,]     4     6     7     6     6
[3,]     5    10    10    10    10
[4,]     8    43     4    24    24
[5,]     2     2     2     3     3
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Read data frames from each of the files
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> dataList <- lapply(ds, FUN=readDataFrame)
> print(dataList)
$`fileA,20100112`
   x  y fac char
1  1  1   A    a
2  1  2   A    b
3  1  3   B    c
4  1  4   A    d
5  1  5   C    e
6  1  6   A    f
7  1  7   A    g
8  1  8   C    h
9  1  9   C    i
10 1 10   A    j

$`fileB,other,tags`
   x  y fac char
1  1  1   A    r
2  1  2   B    b
3  1  5   D    g
4  1  6   A    q
5  1 10   A    f
6  2 12   D    a
7  2  7   E    g
8  2 43   C    d
9  2  9   F    g
10 3 12   A    j

$`fileC,inverted`
   x  y fac char
1  1  3   D    r
2  2  2   A    b
3  2  6   D    g
4  1  7   B    q
5  3 10   B    f
6  1  2   B    a
7  2 24   E    g
8  1  4   E    d
9  1  1   G    g
10 1  2   A    j

$`fileD,3cols`
   x  y char
1  1  1    r
2  4  3    t
3  2  7    g
4  1  6    s
5  6 10    f
6  6  2    a
7  2  4    e
8  7 24    c
9  8  1    g
10 1  2    j

$`fileE,headerArgs`
   x  y char
1  1  1    r
2  4  3    t
3  2  7    g
4  1  6    s
5  6 10    f
6  6  2    a
7  2  4    e
8  7 24    c
9  8  1    g
10 1  2    j

> 
> rows <- c(3:5, 8, 2)
> dataList <- lapply(ds, FUN=readDataFrame, rows=rows)
> print(dataList)
$`fileA,20100112`
  x y fac char
3 1 3   B    c
4 1 4   A    d
5 1 5   C    e
8 1 8   C    h
2 1 2   A    b

$`fileB,other,tags`
  x  y fac char
3 1  5   D    g
4 1  6   A    q
5 1 10   A    f
8 2 43   C    d
2 1  2   B    b

$`fileC,inverted`
  x  y fac char
3 2  6   D    g
4 1  7   B    q
5 3 10   B    f
8 1  4   E    d
2 2  2   A    b

$`fileD,3cols`
  x  y char
3 2  7    g
4 1  6    s
5 6 10    f
8 7 24    c
2 4  3    t

$`fileE,headerArgs`
  x  y char
3 2  7    g
4 1  6    s
5 6 10    f
8 7 24    c
2 4  3    t

> 
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Read common columns and stack into one data frame
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> colNames <- Reduce(intersect, lapply(ds, getColumnNames))
> cat("Common column names:\n")
Common column names:
> print(colNames)
[1] "x"    "y"    "char"
> 
> # Read the *common* columns "as is" (hence 'NA')
> colClasses <- rep(NA, times=length(colNames))
> names(colClasses) <- colNames
> cat("Column class patterns:\n")
Column class patterns:
> print(colClasses)
   x    y char 
  NA   NA   NA 
> 
> data <- readDataFrame(ds, colClasses=colClasses, verbose=TRUE)
Reading data set as data frame...
 Reading all data files...
  Number of files: 5
 Reading all data files...done
 Combining all data...
 Combining all data...done
Reading data set as data frame...done
> print(data)
   x  y char
1  1  1    a
2  1  2    b
3  1  3    c
4  1  4    d
5  1  5    e
6  1  6    f
7  1  7    g
8  1  8    h
9  1  9    i
10 1 10    j
11 1  1    r
12 1  2    b
13 1  5    g
14 1  6    q
15 1 10    f
16 2 12    a
17 2  7    g
18 2 43    d
19 2  9    g
20 3 12    j
21 1  3    r
22 2  2    b
23 2  6    g
24 1  7    q
25 3 10    f
26 1  2    a
27 2 24    g
28 1  4    d
29 1  1    g
30 1  2    j
31 1  1    r
32 4  3    t
33 2  7    g
34 1  6    s
35 6 10    f
36 6  2    a
37 2  4    e
38 7 24    c
39 8  1    g
40 1  2    j
41 1  1    r
42 4  3    t
43 2  7    g
44 1  6    s
45 6 10    f
46 6  2    a
47 2  4    e
48 7 24    c
49 8  1    g
50 1  2    j
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Translate column names on the fly
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> lapply(ds, FUN=setColumnNamesTranslator, function(names, ...) toupper(names))
$`fileA,20100112`
TabularTextFile:
Name: fileA
Tags: 20100112
Full name: fileA,20100112
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileA,20100112.dat
File size: 288 B (288 bytes)
Number of data rows: 10
Columns [4]: 'X', 'Y', 'FAC', 'CHAR'
Number of text lines: 18

$`fileB,other,tags`
TabularTextFile:
Name: fileB
Tags: other,tags
Full name: fileB,other,tags
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileB,other,tags.dat
File size: 108 B (108 bytes)
Number of data rows: 10
Columns [4]: 'X', 'Y', 'FAC', 'CHAR'
Number of text lines: 11

$`fileC,inverted`
TabularTextFile:
Name: fileC
Tags: inverted
Full name: fileC,inverted
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileC,inverted.dat
File size: 166 B (166 bytes)
Number of data rows: 10
Columns [4]: 'X', 'Y', 'FAC', 'CHAR'
Number of text lines: 13

$`fileD,3cols`
TabularTextFile:
Name: fileD
Tags: 3cols
Full name: fileD,3cols
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileD,3cols.dat
File size: 142 B (142 bytes)
Number of data rows: 10
Columns [3]: 'X', 'Y', 'CHAR'
Number of text lines: 13

$`fileE,headerArgs`
TabularTextFile:
Name: fileE
Tags: headerArgs
Full name: fileE,headerArgs
Pathname: ../../lib/R.filesets/exData/dataSetA,original/fileE,headerArgs.dat
File size: 201 B (201 bytes)
Number of data rows: 10
Columns [3]: 'X', 'Y', 'CHAR'
Number of text lines: 14

> data <- readDataFrame(ds, colClasses=c("(X|Y)"="integer", "CHAR"="character"))
> print(data)
   X  Y CHAR
1  1  1    a
2  1  2    b
3  1  3    c
4  1  4    d
5  1  5    e
6  1  6    f
7  1  7    g
8  1  8    h
9  1  9    i
10 1 10    j
11 1  1    r
12 1  2    b
13 1  5    g
14 1  6    q
15 1 10    f
16 2 12    a
17 2  7    g
18 2 43    d
19 2  9    g
20 3 12    j
21 1  3    r
22 2  2    b
23 2  6    g
24 1  7    q
25 3 10    f
26 1  2    a
27 2 24    g
28 1  4    d
29 1  1    g
30 1  2    j
31 1  1    r
32 4  3    t
33 2  7    g
34 1  6    s
35 6 10    f
36 6  2    a
37 2  4    e
38 7 24    c
39 8  1    g
40 1  2    j
41 1  1    r
42 4  3    t
43 2  7    g
44 1  6    s
45 6 10    f
46 6  2    a
47 2  4    e
48 7 24    c
49 8  1    g
50 1  2    j
> 
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # ADVANCED: Translation of fullnames
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> message("- Translation of fullnames")
- Translation of fullnames
> 
> ## Extra sanity checks to troubleshoot stall on CRAN MS Windows servers
> path <- getPath(ds)
> cat(sprintf("Data set path: %s\n", sQuote(path)))
Data set path: '../../lib/R.filesets/exData/dataSetA,original'
> stopifnot(length(path) > 0)
> pattern <- ",fullnames[.]txt$"
> cat(sprintf("Pattern: %s\n", sQuote(path)))
Pattern: '../../lib/R.filesets/exData/dataSetA,original'
> files <- dir(pattern = pattern, path = path, full.names = TRUE, all.files = TRUE)
> cat(sprintf("Data set files [n = %d]:\n", length(files)))
Data set files [n = 1]:
> print(files)
[1] "../../lib/R.filesets/exData/dataSetA,original/all,fullnames.txt"
> stopifnot(length(files) > 0)
> 
> fnts <- TabularTextFileSet$byPath(path, pattern = pattern)
> print(fnts)
TabularTextFileSet:
Name: dataSetA
Tags: original
Full name: dataSetA,original
Number of files: 1
Names: all [1]
Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original
Total file size: 71 B (71 bytes)
> str(as.list(fnts))
List of 1
 $ all,fullnames: 'TabularTextFile' Named logi fileA sampleA
  ..- attr(*, ".env")=<environment: 0x0000028261f26180> 
> 
> cat("Data set before applying fullname translator:\n")
Data set before applying fullname translator:
> print(ds)
TabularTextFileSet:
Name: dataSetA
Tags: original
Full name: dataSetA,original
Number of files: 5
Names: fileA, fileB, fileC, fileD, fileE [5]
Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original
Total file size: 905 B (905 bytes)
> appendFullNamesTranslator(ds, as.list(fnts))
> 
> cat("Data set after applying fullname translator:\n")
Data set after applying fullname translator:
> print(ds)
TabularTextFileSet:
Name: dataSetA
Tags: original
Full name: dataSetA,original
Number of files: 5
Names: sampleA, sampleB, sampleC, fileD, fileE [5]
Path (to the first file): ../../lib/R.filesets/exData/dataSetA,original
Total file size: 905 B (905 bytes)
> 
> cat("Default fullnames:\n")
Default fullnames:
> print(head(getFullNames(ds, translate=FALSE)))
[1] "fileA,20100112"   "fileB,other,tags" "fileC,inverted"   "fileD,3cols"     
[5] "fileE,headerArgs"
> cat("Translated fullnames:\n")
Translated fullnames:
> print(head(getFullNames(ds)))
[1] "sampleA,20100112"       "sampleB,other,tags"     "sampleC,fileC,inverted"
[4] "fileD,3cols"            "fileE,headerArgs"      
> 
> cat("Default fullnames:\n")
Default fullnames:
> print(getFullNames(ds, translate=FALSE))
[1] "fileA,20100112"   "fileB,other,tags" "fileC,inverted"   "fileD,3cols"     
[5] "fileE,headerArgs"
> cat("Translated fullnames:\n")
Translated fullnames:
> print(getFullNames(ds))
[1] "sampleA,20100112"       "sampleB,other,tags"     "sampleC,fileC,inverted"
[4] "fileD,3cols"            "fileE,headerArgs"      
> 
> message("*** TabularTextFileSet ... DONE")
*** TabularTextFileSet ... DONE
> 
> source("incl/end.R")
> 
> proc.time()
   user  system elapsed 
   2.46    0.12    2.57