R Under development (unstable) (2024-02-01 r85851 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > #### Artificial example with 3 or 4 true components : > library(sca) > > ## Better Sig (eigen space), but *still* BLAS/Lapack version etc platform diffs > ## > mvrnorm <- MASS::mvrnorm # (and we see MASS in sessionInfo) > Sig <- function(p, rho) toeplitz(rho^(seq_len(p)-1L)) > rmvN <- function(n,p, rho) mvrnorm(n, mu=rep(0,p), Sigma = Sig(p, rho)) > > ## platform details > sessionInfo() R Under development (unstable) (2024-02-01 r85851 ucrt) Platform: x86_64-w64-mingw32/x64 Running under: Windows Server 2022 x64 (build 20348) Matrix products: default locale: [1] LC_COLLATE=C LC_CTYPE=German_Germany.utf8 [3] LC_MONETARY=C LC_NUMERIC=C [5] LC_TIME=C time zone: Europe/Berlin tzcode source: internal attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] sca_0.9-2 loaded via a namespace (and not attached): [1] MASS_7.3-60.2 compiler_4.4.0 > str(.Machine) List of 29 $ double.eps : num 2.22e-16 $ double.neg.eps : num 1.11e-16 $ double.xmin : num 2.23e-308 $ double.xmax : num 1.8e+308 $ double.base : int 2 $ double.digits : int 53 $ double.rounding : int 5 $ double.guard : int 0 $ double.ulp.digits : int -52 $ double.neg.ulp.digits : int -53 $ double.exponent : int 11 $ double.min.exp : int -1022 $ double.max.exp : int 1024 $ integer.max : int 2147483647 $ sizeof.long : int 4 $ sizeof.longlong : int 8 $ sizeof.longdouble : int 16 $ sizeof.pointer : int 8 $ sizeof.time_t : int 8 $ longdouble.eps : num 1.08e-19 $ longdouble.neg.eps : num 5.42e-20 $ longdouble.digits : int 64 $ longdouble.rounding : int 5 $ longdouble.guard : int 0 $ longdouble.ulp.digits : int -63 $ longdouble.neg.ulp.digits: int -64 $ longdouble.exponent : int 15 $ longdouble.min.exp : int -16382 $ longdouble.max.exp : int 16384 > > set.seed(253) > ## random matrix > mr <- matrix(rnorm(1000), 50, 20) > > .proctime00 <- proc.time() > > (scr0 <- sca(cor(mr))) ------------------------------------------------------------ Simple Component Analysis ------------------------------------------------------------ Optimality criterion : corrected sum of variances Clustering procedure : median linkage Within-block differences : TRUE Possible invertion of signs : FALSE Number of block-components : 1 Number of diff.-components : 4 ------------------------------------------------------------ Simple matrix: B1 D2 D3 D4 D5 V1 1 1 7 7 3 V2 1 1 0 0 3 V3 1 1 7 0 -5 V4 1 0 7 0 -5 V5 1 -1 -8 7 3 V6 1 1 -8 -6 -5 V7 1 -1 0 -6 3 V8 1 0 -8 7 3 V9 1 -1 -8 0 -5 V10 1 -1 7 0 3 V11 1 1 -8 0 3 V12 1 1 -8 0 0 V13 1 0 0 -6 3 V14 1 0 7 7 3 V15 1 0 7 7 0 V16 1 0 0 7 -5 V17 1 -1 -8 -6 0 V18 1 0 7 -6 3 V19 1 -1 7 -6 0 V20 1 0 0 -6 -5 ------------------------------------------------------------ Variance principal components: 11.83 % 9.44 % 9.14 % 8.75 % 7.62 % Variance simple components : 4.72 % 10.91 % 8.58 % 8.43 % 7.76 % ------------------------------------------------------------ Extracted variability PCA: 46.78 % Extracted variability SCA: 39.45 % Optimality SCA : 84.33 % ------------------------------------------------------------ Correlations simple components: B1 D2 D3 D4 D5 B1 1.00 0.11 -0.01 0.06 0.16 D2 0.11 1.00 -0.05 0.17 -0.11 D3 -0.01 -0.05 1.00 0.07 0.06 D4 0.06 0.17 0.07 1.00 0.13 D5 0.16 -0.11 0.06 0.13 1.00 ------------------------------------------------------------ Max (abs) correlation: 0.17 ( D2 - D4 ) ------------------------------------------------------------ > if(FALSE) + dput(sapply(scr0$allcrit[1:5], signif)) > acrit5 <- + list(varpc = c(0.118292, 0.0944164, 0.0913502, 0.0875232, 0.0762098), + varsc = c(B1 = 0.0472054, D2 = 0.109121, D3 = 0.0858094, + D4 = 0.0842553, D5 = 0.0775506), + cumpc = 0.467791, cumsc = 0.394491, opt = 0.843305) > all.equal(acrit5, scr0$allcrit[1:5], tolerance = 0) [1] "Component \"varpc\": Mean relative difference: 1.100458e-06" [2] "Component \"varsc\": Mean relative difference: 3.042223e-07" [3] "Component \"cumpc\": Mean relative difference: 5.666106e-07" [4] "Component \"cumsc\": Mean relative difference: 6.540004e-07" [5] "Component \"opt\": Mean relative difference: 7.408353e-08" > ## ^^ to gauge this: > with(scr0, + stopifnot( + nblock == 1, ndiff == 4, + all.equal(acrit5, scr0$allcrit[1:5], tolerance = .001))) > > > scr <- sca(cor(mr), q = 5, corblocks = 0.12) > stopifnot(all.equal(scr, scr0)) > > > > ##- Nr. 1 --- p = 3+2+4 = 9 ------------------ > > set.seed(1324) # re-setting random seed - still difference 32bit - 64bit > m3b <- cbind(rmvN(512, 3, 0.7), + rmvN(512, 2, 0.9), + rmvN(512, 4, 0.8)) > ## Show near block-structure of cor. matrix : > C3b <- cor(m3b) > symnum(C3b, lower.tri = FALSE) # (already small platform differ.!!) [1,] 1 , . [2,] , 1 , [3,] . , 1 [4,] 1 * [5,] * 1 [6,] 1 + , . [7,] + 1 + , [8,] , + 1 , [9,] . , , 1 attr(,"legend") [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1 > > b01 <- Matrix::bdiag(matrix(TRUE, 3,3), + matrix(TRUE, 2,2), + matrix(TRUE, 4,4)) > stopifnot(identical(b01, as(C3b > 0.4, "CsparseMatrix"))) > > sc3b <- sca(C3b) > sc3b ------------------------------------------------------------ Simple Component Analysis ------------------------------------------------------------ Optimality criterion : corrected sum of variances Clustering procedure : median linkage Within-block differences : TRUE Possible invertion of signs : FALSE Number of block-components : 3 Number of diff.-components : 2 ------------------------------------------------------------ Simple matrix: B1 B2 B3 D4 D5 V1 0 1 0 1 0 V2 0 1 0 0 0 V3 0 1 0 -1 0 V4 0 0 1 0 0 V5 0 0 1 0 0 V6 1 0 0 0 1 V7 1 0 0 0 1 V8 1 0 0 0 0 V9 1 0 0 0 -2 ------------------------------------------------------------ Variance principal components: 34.90 % 24.75 % 20.97 % 6.36 % 5.64 % Variance simple components : 34.44 % 24.68 % 21.14 % 5.97 % 5.70 % ------------------------------------------------------------ Extracted variability PCA: 92.63 % Extracted variability SCA: 91.63 % Optimality SCA : 98.92 % ------------------------------------------------------------ Correlations simple components: B1 B2 B3 D4 D5 B1 1.00 -0.04 0.05 -0.09 0.07 B2 -0.04 1.00 -0.04 -0.01 -0.03 B3 0.05 -0.04 1.00 0.02 -0.10 D4 -0.09 -0.01 0.02 1.00 -0.06 D5 0.07 -0.03 -0.10 -0.06 1.00 ------------------------------------------------------------ Max (abs) correlation: 0.1 ( B3 - D5 ) ------------------------------------------------------------ > ## TODO: check for "parts" > > sc3c.1 <- sca(C3b, corblocks = 0.1) > ## -> gives the 3 "true" block components > stopifnot(all.equal(sc3b, sc3c.1)) > > > ##- Nr. 2 --- p = 12+6+2+10 = 30 ------------------ > > set.seed(21262) > m4b <- cbind(rmvN(500, 12, 0.7), + rmvN(500, 6, 0.9), + rmvN(500, 2, 0.9), + rmvN(500, 10, 0.8)) > C4 <- cor(m4b) > ## Show near block-structure of cor. matrix > symnum(C4, lower.tri = FALSE) # even here, M1mac differs slightly ! [1,] 1 , . . [2,] , 1 , . . [3,] . , 1 , . . [4,] . . , 1 , . . [5,] . . , 1 , . [6,] . . , 1 , . [7,] . . , 1 , . . [8,] . , 1 , . . [9,] . , 1 , . . [10,] . . , 1 , . [11,] . . , 1 , [12,] . . , 1 [13,] 1 * + , , , [14,] * 1 + + , , [15,] + + 1 * + , [16,] , + * 1 + + [17,] , , + + 1 * [18,] , , , + * 1 [19,] 1 + [20,] + 1 [21,] 1 + , . . . [22,] + 1 , , . . [23,] , , 1 , , . . [24,] . , , 1 , . . . . [25,] . . , , 1 , . . . [26,] . . . . , 1 , , . . [27,] . . . , 1 + , . [28,] . . , + 1 + , [29,] . . . , + 1 , [30,] . . , , 1 attr(,"legend") [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1 > C4[3,6] [1] 0.398603 > sc4b <- sca(C4) > sc4b ------------------------------------------------------------ Simple Component Analysis ------------------------------------------------------------ Optimality criterion : corrected sum of variances Clustering procedure : median linkage Within-block differences : TRUE Possible invertion of signs : FALSE Number of block-components : 5 Number of diff.-components : 0 ------------------------------------------------------------ Simple matrix: B1 B2 B3 B4 B5 V1 0 0 1 0 0 V2 0 0 1 0 0 V3 0 0 1 0 0 V4 0 0 1 0 0 V5 0 0 1 0 0 V6 0 0 1 0 0 V7 0 0 0 1 0 V8 0 0 0 1 0 V9 0 0 0 1 0 V10 0 0 0 1 0 V11 0 0 0 1 0 V12 0 0 0 1 0 V13 0 1 0 0 0 V14 0 1 0 0 0 V15 0 1 0 0 0 V16 0 1 0 0 0 V17 0 1 0 0 0 V18 0 1 0 0 0 V19 0 0 0 0 1 V20 0 0 0 0 1 V21 1 0 0 0 0 V22 1 0 0 0 0 V23 1 0 0 0 0 V24 1 0 0 0 0 V25 1 0 0 0 0 V26 1 0 0 0 0 V27 1 0 0 0 0 V28 1 0 0 0 0 V29 1 0 0 0 0 V30 1 0 0 0 0 ------------------------------------------------------------ Variance principal components: 17.77 % 16.92 % 15.31 % 9.17 % 7.10 % Variance simple components : 17.47 % 16.62 % 11.90 % 11.30 % 6.29 % ------------------------------------------------------------ Extracted variability PCA: 66.26 % Extracted variability SCA: 62.50 % Optimality SCA : 94.32 % ------------------------------------------------------------ Correlations simple components: B1 B2 B3 B4 B5 B1 1.00 0.00 -0.01 -0.01 -0.02 B2 0.00 1.00 0.01 -0.06 0.06 B3 -0.01 0.01 1.00 0.29 -0.04 B4 -0.01 -0.06 0.29 1.00 0.03 B5 -0.02 0.06 -0.04 0.03 1.00 ------------------------------------------------------------ Max (abs) correlation: 0.29 ( B3 - B4 ) ------------------------------------------------------------ > ## TODO: check for "parts" > stopifnot(with(sc4b, nblock == 5, ndiff == 0)) > > ## Different than sc4b : > sc4c.1 <- sca(C4, corblocks = 0.1) > ## -> gives the 4 "true" block components > sc4c.1 ------------------------------------------------------------ Simple Component Analysis ------------------------------------------------------------ Optimality criterion : corrected sum of variances Clustering procedure : median linkage Within-block differences : TRUE Possible invertion of signs : FALSE Number of block-components : 4 Number of diff.-components : 1 ------------------------------------------------------------ Simple matrix: B1 B2 B3 B4 D5 V1 0 0 1 0 1 V2 0 0 1 0 1 V3 0 0 1 0 1 V4 0 0 1 0 1 V5 0 0 1 0 1 V6 0 0 1 0 0 V7 0 0 1 0 0 V8 0 0 1 0 -1 V9 0 0 1 0 -1 V10 0 0 1 0 -1 V11 0 0 1 0 -1 V12 0 0 1 0 -1 V13 0 1 0 0 0 V14 0 1 0 0 0 V15 0 1 0 0 0 V16 0 1 0 0 0 V17 0 1 0 0 0 V18 0 1 0 0 0 V19 0 0 0 1 0 V20 0 0 0 1 0 V21 1 0 0 0 0 V22 1 0 0 0 0 V23 1 0 0 0 0 V24 1 0 0 0 0 V25 1 0 0 0 0 V26 1 0 0 0 0 V27 1 0 0 0 0 V28 1 0 0 0 0 V29 1 0 0 0 0 V30 1 0 0 0 0 ------------------------------------------------------------ Variance principal components: 17.77 % 16.92 % 15.31 % 9.17 % 7.10 % Variance simple components : 17.47 % 16.62 % 14.99 % 6.29 % 8.62 % ------------------------------------------------------------ Extracted variability PCA: 66.26 % Extracted variability SCA: 63.89 % Optimality SCA : 96.41 % ------------------------------------------------------------ Correlations simple components: B1 B2 B3 B4 D5 B1 1.00 0.00 -0.01 -0.02 0.00 B2 0.00 1.00 -0.03 0.06 0.06 B3 -0.01 -0.03 1.00 -0.01 0.03 B4 -0.02 0.06 -0.01 1.00 -0.05 D5 0.00 0.06 0.03 -0.05 1.00 ------------------------------------------------------------ Max (abs) correlation: 0.06 ( B2 - D5 ) ------------------------------------------------------------ > ## TODO: check for "parts" > stopifnot(with(sc4c.1, nblock == 4, ndiff == 1)) > str(sc4c.1) List of 10 $ simplemat : num [1:30, 1:5] 0 0 0 0 0 0 0 0 0 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:30] "V1" "V2" "V3" "V4" ... .. ..$ : chr [1:5] "B1" "B2" "B3" "B4" ... $ loadings : num [1:30, 1:5] 0 0 0 0 0 0 0 0 0 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:30] "V1" "V2" "V3" "V4" ... .. ..$ : chr [1:5] "B1" "B2" "B3" "B4" ... $ allcrit :List of 7 ..$ varpc : num [1:5] 0.1777 0.1692 0.1531 0.0917 0.071 ..$ varsc : Named num [1:5] 0.1747 0.1662 0.1499 0.0629 0.0862 .. ..- attr(*, "names")= chr [1:5] "B1" "B2" "B3" "B4" ... ..$ cumpc : num 0.663 ..$ cumsc : num 0.639 ..$ opt : num 0.964 ..$ corsc : num [1:5, 1:5] 1 -0.003284 -0.008187 -0.017572 0.000471 ... .. ..- attr(*, "dimnames")=List of 2 .. .. ..$ : chr [1:5] "B1" "B2" "B3" "B4" ... .. .. ..$ : chr [1:5] "B1" "B2" "B3" "B4" ... ..$ maxcor:List of 3 .. ..$ row: chr "B2" .. ..$ col: chr "D5" .. ..$ val: num 0.0638 $ nblock : num 4 $ ndiff : num 1 $ criterion : chr "csv" $ cluster : chr "median" $ withinblock: logi TRUE $ invertsigns: logi FALSE $ vardata : num [1:30, 1:30] 1 0.732 0.523 0.408 0.257 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:30] "V1" "V2" "V3" "V4" ... .. ..$ : chr [1:30] "V1" "V2" "V3" "V4" ... - attr(*, "class")= chr "simpcomp" > > sc4d <- sca(C4, corblocks = 0.1, invertsigns=TRUE) Warning message: In sca(C4, corblocks = 0.1, invertsigns = TRUE) : some rows and columns in 'S' have been inverted to avoid negative values > ## ^^^^^^^^^^^^^^^^^ (==> quite a bit worse !) > ## TODO: check for "parts" > stopifnot(with(sc4d, nblock == 1, ndiff == 4)) > sc4d ------------------------------------------------------------ Simple Component Analysis ------------------------------------------------------------ Optimality criterion : corrected sum of variances Clustering procedure : median linkage Within-block differences : TRUE Possible invertion of signs : TRUE Number of block-components : 1 Number of diff.-components : 4 ------------------------------------------------------------ Simple matrix: B1 D2 D3 D4 D5 -V1 1 0 4 7 0 V2 1 0 -9 -8 0 V3 1 0 -9 -8 0 V4 1 0 -9 -8 0 V5 1 0 -9 -8 0 V6 1 0 -9 0 0 V7 1 0 -9 7 0 V8 1 0 -9 7 0 V9 1 0 -9 7 0 -V10 1 0 4 -8 -2 -V11 1 0 4 -8 -2 -V12 1 0 4 -8 0 V13 1 5 4 0 0 V14 1 5 4 0 0 V15 1 5 4 0 0 V16 1 5 4 0 0 V17 1 5 4 0 0 V18 1 5 4 0 0 V19 1 0 0 0 0 V20 1 0 0 0 0 -V21 1 -3 0 7 -2 -V22 1 -3 0 7 -2 -V23 1 -3 4 7 -2 -V24 1 -3 4 7 -2 -V25 1 -3 4 0 0 -V26 1 -3 4 0 0 -V27 1 -3 4 0 3 -V28 1 -3 4 0 3 -V29 1 -3 4 0 3 -V30 1 -3 4 0 3 ------------------------------------------------------------ Variance principal components: 17.77 % 16.92 % 15.31 % 9.17 % 7.10 % Variance simple components : 13.25 % 16.88 % 15.30 % 9.35 % 6.68 % ------------------------------------------------------------ Extracted variability PCA: 66.26 % Extracted variability SCA: 59.64 % Optimality SCA : 90.01 % ------------------------------------------------------------ Correlations simple components: B1 D2 D3 D4 D5 B1 1.00 -0.02 0.13 0.12 0.12 D2 -0.02 1.00 0.06 -0.29 -0.21 D3 0.13 0.06 1.00 0.10 0.03 D4 0.12 -0.29 0.10 1.00 -0.04 D5 0.12 -0.21 0.03 -0.04 1.00 ------------------------------------------------------------ Max (abs) correlation: 0.29 ( D2 - D4 ) ------------------------------------------------------------ > > > cat('Time elapsed: ',proc.time() - .proctime00,'\n') Time elapsed: 2.27 0.19 2.46 NA NA > > proc.time() user system elapsed 2.48 0.29 2.78