R Under development (unstable) (2024-06-18 r86781 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> 
> #### Testing  medcouple mc() and related functions
> 
> ### here, we do "strict tests" -- hence no *.Rout.save
> ### hence, can also produce non-reproducible output such as timing
> 
> library(robustbase)
> for(f in system.file("xtraR", c("mcnaive.R", # -> mcNaive()
+                                 "styleData.R",  # -> smallD  list of small datasets
+ 			      "platform-sessionInfo.R"), # -> moreSessionInfo()
+                      package = "robustbase", mustWork=TRUE)) {
+     cat("source(",f,"):\n", sep="")
+     source(f)
+ }
source(D:/RCompile/CRANincoming/R-devel/lib/robustbase/xtraR/mcnaive.R):
source(D:/RCompile/CRANincoming/R-devel/lib/robustbase/xtraR/styleData.R):
source(D:/RCompile/CRANincoming/R-devel/lib/robustbase/xtraR/platform-sessionInfo.R):
> 
> ## instead of relying on  system.file("test-tools-1.R", package="Matrix"):
> source(system.file("xtraR/test-tools.R", package = "robustbase")) # assert.EQ() etc
> 
> assertEQm12 <- function(x,y, giveRE=TRUE, ...)
+     assert.EQ(x,y, tol = 1e-12, giveRE=giveRE, ...)
> ## ^^ shows *any* difference ("tol = 0") unless there is no difference at all
> ##
> c.time <- function(...) cat('Time elapsed: ', ..., '\n')
> S.time <- function(expr) c.time(system.time(expr))
> DO <- function(...) S.time(stopifnot(...))
> 
> ## from {sfsmisc}:
> lseq <- function(from, to, length) exp(seq(log(from), log(to), length.out = length))
> 
> 
> mS <- moreSessionInfo(print.=TRUE)
List of 5
 $ sizeof.long      : int 4
 $ sizeof.longlong  : int 8
 $ sizeof.longdouble: int 16
 $ sizeof.pointer   : int 8
 $ sizeof.time_t    : int 8
64 bit platform type 'windows'  ==> onWindows: TRUE
arch: x86-64
osVersion (0): Windows Server 2022 x64 (build 20348) 
osVersion: 'Windows Server 2022 x64 (build 20348)' 
 +  BLAS "is" Lapack: TRUE | BLAS=OpenBLAS: FALSE | Lapack=OpenBLAS: FALSE 
strictR: FALSE 
> 
> (doExtras <- robustbase:::doExtras())# TRUE if interactive() or activated by envvar
[1] FALSE
> 
> if(!dev.interactive(orNone=TRUE)) pdf("mc-strict.pdf")
> 
> tools::assertCondition(mc(1:11), "message") # change of default to  doScale=FALSE
The default of 'doScale' is FALSE now for stability;
  set options(mc_doScale_quiet=TRUE) to suppress this (once per session) message
> 
> smlMC  <- vapply(smallD, mc, pi)
> smlMCo <- vapply(smallD, mc, pi, doScale=TRUE, c.huberize=Inf)
> yI <- c("yI", "yI."); notI  <- setdiff(names(smallD), yI)
> yI2 <- c(yI, "x3I");  notI2 <- setdiff(names(smallD), yI2)
> assert.EQ(smlMC [notI],
+           smlMCo[notI], tol = 4e-11, giveRE=TRUE)
Mean relative difference: 9.070078e-12
> ## above small diff. is from 'x3I';  dropping that, too, leaves no differences
> table(smlMC [notI2] == smlMCo[notI2])

TRUE 
  20 
> 
> n.set <- c(1:99, 1e5L+ 0:1) # large n gave integer overflow in earlier versions
> DO(0 == sapply(n.set, function(n) mc(seq_len(n))))
Time elapsed:  0.11 0 0.11 NA NA 
> DO(0 == sapply(n.set, function(n) mc(seq_len(n), doRefl=FALSE)))
Time elapsed:  0.13 0 0.12 NA NA 
> 
> DO(0 == sapply(1:100, function(n) mcNaive(seq_len(n), "simple")))
Time elapsed:  0.09 0 0.1 NA NA 
> DO(0 == sapply(1:100, function(n) mcNaive(seq_len(n), "h.use" )))
Time elapsed:  0.01 0 0.01 NA NA 
> 
> 
> x1 <- c(1, 2, 7, 9, 10)
> mcNaive(x1) # = -1/3
[1] -0.3333333
> assertEQm12(-1/3, mcNaive(x1, "simple"))
> assertEQm12(-1/3, mcNaive(x1, "h.use"))
> assertEQm12(-1/3, mc(x1))
> 
> x2 <- c(-1, 0, 0, 0, 1, 2)
> mcNaive(x2, meth="simple") # = 0 - which is wrong
[1] 0
> mcNaive(x2, meth="h.use")  # = 1/6 = 0.16666
[1] 0.1666667
> assertEQm12(1/6, mc(x2))
> assertEQm12(1/6, mcNaive(x2, "h.use"))
> 
> x4 <- c(1:5,7,10,15,25, 1e15) ## - bombed in original algo
> mcNaive(x4,"h.use") # 0.5833333
[1] 0.5833333
> assertEQm12( 7/12, mcNaive(x4, "h.use"))
> assertEQm12( 7/12, mcNaive(x4, "simple"))
> assertEQm12( 7/12, mc( x4, doRefl= FALSE))
> assertEQm12(-7/12, mc(-x4, doRefl= FALSE))
> 
> xx <- c(-3, -3, -2, -2, -1, rep(0, 6), 1, 1, 1, 2, 2, 3, 3, 5)
> stopifnot(exprs = {
+     mc(xx, doScale=TRUE , c.huberize = Inf) == 0 ## old mc()
+     mc(xx) == 0
+     mc(xx,  doReflect=FALSE) == 0
+    -mc(-xx, doReflect=FALSE) == 0
+     mcNaive(xx, "h.use" ) == 0
+     mcNaive(xx, "simple") == 0
+ })
> 
> set.seed(17)
> for(n in 3:50) {
+     cat(" ")
+     for(k in 1:5) {
+ 	x <- rlnorm(n)
+ 	mc1 <- mc(x)
+ 	mc2 <- mcNaive(x, method = "simple")
+ 	mc3 <- mcNaive(x, method = "h.use" )
+ 	stopifnot(all.equal(mc1, mc3, tolerance = 1e-10),# 1e-12 not quite ok
+ 		  mc2 == mc3)
+ 	cat(".")
+     }
+ };  cat("\n")
 ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... .....
> 
> 
> ###----  Strict tests of adjOutlyingness():
> ###                      ================= changed after long-standing bug fix in Oct.2014
> 
> ## For longley, note n < 4p and hence "random nonsense" numbers
> set.seed(1);  S.time(a1.1 <- adjOutlyingness(longley))
Time elapsed:  0.17 0 0.17 NA NA 
> set.seed(11); S.time(a1.2 <- adjOutlyingness(longley))
Time elapsed:  0.23 0 0.23 NA NA 
> ##
> set.seed(2); S.time(a2 <- adjOutlyingness(hbk)) # 75 x 4
Time elapsed:  0.17 0.02 0.19 NA NA 
> set.seed(3); S.time(a3 <- adjOutlyingness(hbk[, 1:3]))# the 'X' space
Time elapsed:  0.19 0.01 0.2 NA NA 
> set.seed(4); S.time(a4 <- adjOutlyingness(milk)) # obs.63 = obs.64
Time elapsed:  0.23 0 0.23 NA NA 
> set.seed(5); S.time(a5 <- adjOutlyingness(wood)) # 20 x 6  ==> n < 4p
Time elapsed:  0.16 0 0.16 NA NA 
> set.seed(6); S.time(a6 <- adjOutlyingness(wood[, 1:5]))# ('X' space) 20 x 5: n = 4p (ok!)
Time elapsed:  0.14 0 0.15 NA NA 
> 
> ## 32-bit <-> 64-bit different results {tested on Linux only}
> is32 <- .Machine$sizeof.pointer == 4 ## <- should work for Linux/MacOS/Windows
> isMac <- Sys.info()[["sysname"]] == "Darwin"
> isSun <- Sys.info()[["sysname"]] == "SunOS"
> Rnk <- function(u) rank(unname(u), ties.method = "first")
> ## to use for testing below:
> cat("\nRnk(a3 $ adjout): "); dput(Rnk(a3$adjout), control= {})

Rnk(a3 $ adjout): c(62, 64, 69, 71, 70, 66, 65, 63, 68, 67, 73, 75, 72, 74, 35, 
60, 55, 4, 22, 36, 6, 33, 34, 28, 53, 16, 13, 9, 27, 31, 49, 
39, 20, 50, 14, 2, 24, 40, 54, 21, 17, 37, 52, 23, 58, 19, 61, 
11, 25, 8, 46, 59, 48, 47, 29, 44, 43, 42, 7, 30, 18, 51, 41, 
15, 10, 38, 3, 56, 57, 5, 1, 12, 26, 32, 45)
> cat("\nRnk(a4 $ adjout): "); dput(Rnk(a4$adjout), control= {})

Rnk(a4 $ adjout): c(73, 77, 67, 55, 50, 63, 26, 47, 45, 4, 64, 84, 79, 78, 83, 
72, 75, 68, 65, 60, 52, 39, 16, 61, 42, 33, 80, 71, 30, 2, 35, 
51, 1, 69, 59, 15, 38, 62, 66, 40, 76, 32, 56, 85, 21, 43, 70, 
41, 27, 58, 11, 53, 37, 8, 34, 3, 29, 49, 46, 14, 5, 44, 18, 
19, 25, 10, 7, 24, 28, 86, 36, 23, 54, 74, 82, 17, 81, 9, 6, 
48, 12, 31, 20, 13, 57, 22)
> 
> (i.a4Out <- which( ! a4$nonOut)) # the outliers -- varies "wildly"
[1] 70
> stopifnot(70 %in% i.a4Out)
> {
+     if(is32 && !isMac)
+         all.equal(i.a4Out, c(1, 2, 41, 70))
+     ## and this is "typically" true, but not for a 64-bit Linux version bypassing BLAS in matprod
+     else if(isSun || isMac)
+         TRUE
+     else if(grepl("^Fedora", osVersion) && !is32)
+         identical(i.a4Out, 70L) # since Dec 2020 (F 32)
+     else
+         all.equal(i.a4Out, c(9:19, 23:27,57, 59, 70, 77))
+ }
[1] "Numeric: lengths (1, 20) differ"
> 
> ## only for ATLAS (BLAS/Lapack), not all are TRUE; which ones [but n < 4p]
> if(!all(a5$nonOut))
+   print(which(!a5$nonOut)) # if we know, enable check below
> 
> stopifnot(exprs = {
+     which(!a2$nonOut) == 1:14
+     which(!a3$nonOut) == 1:14
+     ## 'longley', 'wood' have no outliers in the "adjOut" sense:
+     if(doExtras && !isMac) { ## longley also has n < 4p (!)
+         if(mS$ strictR)
+             sum(a1.2$nonOut) >= 15 # sum(.) = 16 [nb-mm3, Oct.2014]
+         else ## however, openBLAS Fedora Linux /usr/bin/R gives sum(a1.2$nonOut) = 13
+             sum(a1.2$nonOut) >= 13
+     } else TRUE
+     if(doExtras) { ## have n < 4p (!)
+         if(mS$ strictR) a5$nonOut
+         else ## not for ATLAS
+             sum(a5$nonOut) >= 18 # 18: OpenBLAS
+     } else TRUE
+     a6$nonOut[-20]
+     ## hbk (n = 75, p = 3) should be "stable" (but isn't quite)
+     abs(Rnk(a3$adjout) -
+         c(62, 64, 69, 71, 70,    66, 65, 63, 68, 67,    73, 75, 72, 74, 35,
+           60, 55,  4, 22, 36,     6, 33, 34, 28, 53,    16, 13,  9, 27, 31,
+           49, 39, 20, 50, 14,     2, 24, 40, 54, 21,    17, 37, 52, 23, 58,
+           19, 61, 11, 25,  8,    46, 59, 48, 47, 29,    44, 43, 42,  7, 30,
+           18, 51, 41, 15, 10,    38,  3, 56, 57,  5,     1, 12, 26, 32, 45)
+         ) <= 3 ## all 0 on 64-bit (F 32) Linux
+ })
> 
> ## milk (n = 86) : -- Quite platform dependent!
> r <- Rnk(a4$adjout)
> r64 <- ## the 64-bit (ubuntu 14.04, nb-mm3) values:
+     c(65, 66, 61, 56, 47,   51, 19, 37, 74, 67,   79, 86, 83, 84, 85,
+       82, 81, 73, 80, 55,   27,  3, 70, 68, 78,   76, 77, 53, 48,  8,
+       29, 33,	 6, 32, 28,   31, 36, 40, 22, 58,   64, 52, 39, 63, 44,
+       30, 57, 46, 43, 45,   25, 54, 12,  1,  9,    2, 71, 14, 75, 23,
+       4, 10, 34, 35, 17,   24, 15, 20, 38, 72,   42, 13, 50, 60, 62,
+       26, 69, 18,  5, 21,    7, 49, 11, 41, 59,   16)
> r32 <- ## Linux 32bit (florence: 3.14.8-100.fc19.i686.PAE)
+     c(78, 79, 72, 66, 52,   61, 22, 41, 53, 14,   74, 85, 82, 83, 84,
+       80, 81, 56, 73, 65,   30,  3, 16, 17, 68,   57, 58, 63, 54,  8,
+       32, 37,  6, 36, 31,   35, 40, 44, 25, 69,   77, 62, 43, 76, 48,
+       34, 67, 51, 47, 49,   28, 64, 12,  1,  9,    2, 33, 15, 59, 26,
+       4, 10, 38, 39, 20,   27, 18, 23, 42, 86,   46, 13, 60, 71, 75,
+       29, 50, 21,  5, 24,    7, 55, 11, 45, 70,   19)
> d <- (r - if (is32) r32 else r64)
> cbind(r, d)
       r   d
 [1,] 73   8
 [2,] 77  11
 [3,] 67   6
 [4,] 55  -1
 [5,] 50   3
 [6,] 63  12
 [7,] 26   7
 [8,] 47  10
 [9,] 45 -29
[10,]  4 -63
[11,] 64 -15
[12,] 84  -2
[13,] 79  -4
[14,] 78  -6
[15,] 83  -2
[16,] 72 -10
[17,] 75  -6
[18,] 68  -5
[19,] 65 -15
[20,] 60   5
[21,] 52  25
[22,] 39  36
[23,] 16 -54
[24,] 61  -7
[25,] 42 -36
[26,] 33 -43
[27,] 80   3
[28,] 71  18
[29,] 30 -18
[30,]  2  -6
[31,] 35   6
[32,] 51  18
[33,]  1  -5
[34,] 69  37
[35,] 59  31
[36,] 15 -16
[37,] 38   2
[38,] 62  22
[39,] 66  44
[40,] 40 -18
[41,] 76  12
[42,] 32 -20
[43,] 56  17
[44,] 85  22
[45,] 21 -23
[46,] 43  13
[47,] 70  13
[48,] 41  -5
[49,] 27 -16
[50,] 58  13
[51,] 11 -14
[52,] 53  -1
[53,] 37  25
[54,]  8   7
[55,] 34  25
[56,]  3   1
[57,] 29 -42
[58,] 49  35
[59,] 46 -29
[60,] 14  -9
[61,]  5   1
[62,] 44  34
[63,] 18 -16
[64,] 19 -16
[65,] 25   8
[66,] 10 -14
[67,]  7  -8
[68,] 24   4
[69,] 28 -10
[70,] 86  14
[71,] 36  -6
[72,] 23  10
[73,] 54   4
[74,] 74  14
[75,] 82  20
[76,] 17  -9
[77,] 81  12
[78,]  9  -9
[79,]  6   1
[80,] 48  27
[81,] 12   5
[82,] 31 -18
[83,] 20   9
[84,] 13 -28
[85,] 57  -2
[86,] 22   6
>        table(abs(d))

 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 20 22 23 25 27 28 29 31 
 5  4  2  3  5  7  3  3  4  4  1  3  3  4  2  4  1  5  2  2  1  3  1  1  2  1 
34 35 36 37 42 43 44 54 63 
 1  1  2  1  1  1  1  1  1 
> cumsum(table(abs(d))) # <=> unscaled ecdf(d)
 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 20 22 23 25 27 28 29 31 
 5  9 11 14 19 26 29 32 36 40 41 44 47 51 53 57 58 63 65 67 68 71 72 73 75 76 
34 35 36 37 42 43 44 54 63 
77 78 80 81 82 83 84 85 86 
> 
> ## For the biggest part (79 out of 86), the ranks are "close":
> ## 2014: still true, but in a different sense..
> ##       ^ typically, but e.g., *not* when using non-BLAS matprod():
> sum(abs(d) <= 17) >= 78
[1] FALSE
> sum(abs(d) <= 13) >= 75
[1] FALSE
> 
> 
> ## check of adjOutlyingness *free* bug
> ## reported by Kaveh Vakili <Kaveh.Vakili@wis.kuleuven.be>
> set.seed(-37665251)
> X <- matrix(rnorm(100*5),   100, 5)
> Z <- matrix(rnorm(10*5)/100, 10, 5)
> Z[,1] <- Z[,1] + 5
> X[91:100,] <- Z # if anything these should be outliers, but ...
> for (i in 1:10) {
+     ## this would produce an error in the 6th iteration
+     aa <- adjOutlyingness(x=X, ndir=250)
+     if(any(is.out <- !aa$nonOut))
+         cat("'outliers' at obs.", paste(which(is.out), collapse=", "),"\n")
+     stopifnot(1/4 < aa$adjout & aa$adjout < 16)
+ }
'outliers' at obs. 6, 33, 39, 44, 48, 90 
'outliers' at obs. 16, 22, 88 
'outliers' at obs. 12, 19, 42, 50, 58, 88, 90 
'outliers' at obs. 42, 53, 62, 72, 88, 90 
'outliers' at obs. 90 
'outliers' at obs. 88 
> 
> ## Check "high"-dimensional Noise ... typically mc() did *not* converge for some re-centered columns
> ## Example by Valentin Todorov:
> n <- 50
> p <- 30
> set.seed(1) # MM
> a <- matrix(rnorm(n * p), nrow=n, ncol=p)
> str(a)
 num [1:50, 1:30] -0.626 0.184 -0.836 1.595 0.33 ...
> kappa(a) # 20.42 (~ 10--20 or so; definitely not close to singular)
[1] 20.42296
> a.a <- adjOutlyingness(a, mcScale=FALSE, # <- my own recommendation
+                        trace.lev=1)
keeping *all*  250  normalized directions
med <- colMedians(Y):  250  values; summary():
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
0.002157 0.052337 0.106917 0.128326 0.175055 0.582199 
Columnwise mc() got  250  values; summary():
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-1.00000 -0.01316  0.00000 -0.06105  0.00000  1.00000 
250 lower & upper Y (:= X - med(.)) values:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.18087 0.01166 2.91541 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.05395 0.00000 2.50093 
outlyingnesses for all directions (of which max(.) will be chosen:
          0%          25%          50%          75%         100% 
0.000000e+00 1.363636e-01 5.515630e-01 1.755753e+14 9.614765e+15 
> a.s <- adjOutlyingness(a, mcScale=TRUE, trace.lev=1)
keeping *all*  250  normalized directions
med <- colMedians(Y):  250  values; summary():
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0006641 0.0700928 0.1242860 0.1483769 0.2124847 0.5023069 
Columnwise mc() got  250  values; summary():
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-1.00000 -0.07290  0.00000 -0.09323  0.00000  1.00000 
250 lower & upper Y (:= X - med(.)) values:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.25788 0.04611 3.32822 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.06039 0.00000 3.08440 
outlyingnesses for all directions (of which max(.) will be chosen:
          0%          25%          50%          75%         100% 
0.000000e+00 1.466278e-01 5.318217e-01 1.721839e+14 6.502497e+15 
> ## a.a :
> str(a.a) # high 'adjout' values "all similar" -> no outliers .. hmm .. ???
List of 7
 $ adjout      : num [1:50] 2.53e+15 3.63e+15 5.67e+15 3.00e+15 5.25e+15 ...
 $ iter        : int 250
 $ ndir.final  : int 250
 $ MCadjout    : num 0.237
 $ Qalph.adjout: Named num [1:2] 2.89e+15 4.79e+15
  ..- attr(*, "names")= chr [1:2] "25%" "75%"
 $ cutoff      : Named num 1.06e+16
  ..- attr(*, "names")= chr "75%"
 $ nonOut      : logi [1:50] TRUE TRUE TRUE TRUE TRUE TRUE ...
> (hdOut <- which( ! a.a$nonOut)) ## indices of "outlier" -- very platform dependent !
integer(0)
> a.a$MCadjout; all.equal(a.a$MCadjout, 0.136839766177,
[1] 0.2371803
+           tol = 1e-12) # seen 7.65e-14   and "big" differences on non-default platforms
[1] "Mean relative difference: 0.4230559"
> ## a.s :
> which(! a.s$nonOut ) # none  [all TRUE]
integer(0)
> a.s$MCadjout # platform dependent; saw
[1] 0.07089226
> all.equal(a.s$MCadjout, 0.32284906741568, tol = 1e-13) # seen 2.2e-15 ..
[1] "Mean relative difference: 3.55408"
>                                         # and big diffs on non-default platforms
> ##
> ## The adjout values are all > 10^15  !!!   why ??
> ## Now (2021) I know: n < 4*p ==> can find 1D-projection where 1 of the 2 {Q3-Q2, Q2-Q1} is 0 !
> ##---------------------------------------------------------------------------------------------
> 
> 
> ###-- Back to  mc()  checks for "hard" cases
> ###           =====  -----------------------
> 
> ## "large n" (this did overflow sum_p, sum_q  earlier ==> had inf.loop):
> set.seed(3); x <- rnorm(2e5)
> (mx <- mc(x, trace.lev=3))
mc_C_d(z[1:200000], trace_lev=3, scale=F): Median = -6.55715e-05 (not at the border)
   x1[] := {x | x_j >= x_eps = 6.55715e-19}    has 100000 ('j-1') entries
'median-x' {x | -eps < x_i <= eps} has 0 (= 'k') entries
  now allocating 2+5 work arrays of size (1+) h2=100000 each:
 (h1,h2, nr, knew) = (100000,100000, 10000000000, 5000000001)
   it= 1, whimed(*, n=100000)= -0.0019683 sum_(p,q)= (5007620130,5007620131); sum_p >= kn
   it= 2, whimed(*, n= 99999)=     0.4221 sum_(p,q)= (2453285443,2453285444); s_p < kn ?<=? s_q: no
   it= 3, whimed(*, n= 99999)=    0.21774 sum_(p,q)= (3629582104,3629582105); s_p < kn ?<=? s_q: no
   it= 4, whimed(*, n= 99999)=    0.11138 sum_(p,q)= (4288398219,4288398220); s_p < kn ?<=? s_q: no
   it= 5, whimed(*, n= 99998)=   0.055486 sum_(p,q)= (4641942845,4641942846); s_p < kn ?<=? s_q: no
   it= 6, whimed(*, n= 99994)=   0.026873 sum_(p,q)= (4823919585,4823919586); s_p < kn ?<=? s_q: no
   it= 7, whimed(*, n= 99987)=   0.012451 sum_(p,q)= (4915754726,4915754727); s_p < kn ?<=? s_q: no
   it= 8, whimed(*, n= 99971)=  0.0052434 sum_(p,q)= (4961674479,4961674480); s_p < kn ?<=? s_q: no
   it= 9, whimed(*, n= 99924)=  0.0016334 sum_(p,q)= (4984674126,4984674127); s_p < kn ?<=? s_q: no
   it=10, whimed(*, n= 99842)= -0.0001688 sum_(p,q)= (4996158141,4996158142); s_p < kn ?<=? s_q: no
   it=11, whimed(*, n= 99686)= -0.0010715 sum_(p,q)= (5001907425,5001907426); sum_p >= kn
   it=12, whimed(*, n= 99377)=-0.00062346 sum_(p,q)= (4999053587,4999053588); s_p < kn ?<=? s_q: no
   it=13, whimed(*, n= 98730)=-0.00085064 sum_(p,q)= (5000499912,5000499913); sum_p >= kn
   it=14, whimed(*, n= 97586)= -0.0007403 sum_(p,q)= (4999796905,4999796906); s_p < kn ?<=? s_q: no
   it=15, whimed(*, n= 94851)=-0.00079865 sum_(p,q)= (5000168479,5000168480); sum_p >= kn
   it=16, whimed(*, n= 89663)=-0.00077229 sum_(p,q)= (4999999851,4999999852); s_p < kn ?<=? s_q: no
  not found [it=16,  (nr,nl) = (5000168479,4999999852)], -> (knew-nl-1, j) = (148, 168627)
converged in 16 iterations
[1] -0.0007723158
> stopifnot(print(abs(mx - -0.000772315846101988)) < 1e-15)
[1] 2.049142e-17
> 					# 3.252e-19, 64b Linux
> 					# 1.198e-16, 32b Windows
> 
> ### Some platform info :
> local({ nms <- names(Si <- Sys.info())
+         dropNms <- c("nodename", "machine", "login")
+         structure(Si[c("nodename", nms[is.na(match(nms, dropNms))])],
+                   class="simple.list") })
               _          
nodename       CRANWIN3   
sysname        Windows    
release        Server x64 
version        build 20348
user           CRAN       
effective_user CRAN       
> 
> if(identical(1L, grep("linux", R.version[["os"]]))) { ##----- Linux - only ----
+     ##
+     Sys.procinfo <- function(procfile)
+     {
+         l2 <- strsplit(readLines(procfile),"[ \t]*:[ \t]*")
+         r <- sapply(l2[sapply(l2, length) == 2],
+                     function(c2)structure(c2[2], names= c2[1]))
+         attr(r,"Name") <- procfile
+         class(r) <- "simple.list"
+         r
+     }
+     ##
+     Scpu <- Sys.procinfo("/proc/cpuinfo")
+     Smem <- Sys.procinfo("/proc/meminfo")
+     print(Scpu[c("model name", "cpu MHz", "cache size", "bogomips")])
+     print(Smem[c("MemTotal", "SwapTotal")])
+ }
> 
> ##' Checking the breakdown point of mc() --- Hubert et al. theory said : 25%
> ##' using non-default  doReflect=FALSE  as that corresponds to original Hubert et al.
> ##'
> ##' @title Medcouple mc() checking
> ##' @param x
> ##' @param Xfun
> ##' @param eps
> ##' @param NAiferror
> ##' @param doReflect
> ##' @param ...
> ##' @return mc(*,..) or NaN in case mc() signals an error [non-convergence]
> ##' @author Martin Maechler
> mcX <- function(x, Xfun, eps=0, NAiferror=FALSE, doReflect=FALSE, ...) {
+     stopifnot(is.numeric(x), is.function(Xfun), "eps" %in% names(formals(Xfun)))
+     myFun <-
+ 	if(NAiferror)
+ 	    function(u) tryCatch(mc(Xfun(u, eps=eps), doReflect=doReflect, ...),
+ 				 error = function(e) NaN)
+ 	else
+ 	    function(u) mc(Xfun(u, eps=eps), doReflect=doReflect, ...)
+     vapply(x, myFun, 1.)
+ }
> 
> X1. <- function(u, eps=0) c(1,2,3, 7+(-10:10)*eps, u + (-1:1)*eps)
> ## ==> This *did* breakdown [but points not "in general position"]:
> ## but now is stable:
> r.mc1 <- curve(mcX(x, X1.), 10, 1e35, log="x", n=1001)
> stopifnot(r.mc1$y == 0) # now stable
> if(FALSE) {
+ rt1 <- uniroot(function(x) mcX(exp(x), X1.) - 1/2, lower=0, upper=500)
+ exp(rt1$root) #  4.056265e+31
+ }
> 
> ## eps > 0  ==> No duplicated points ==> theory says breakdown point = 0.25
> ## -------  but get big numerical problems:
> if(FALSE) { # ==> convergence problem [also in maxit = 1e5] .. really an *inf* loop!
+ r.mc1.1  <- curve(mcX(x, X1., eps= .1  ), 10, 1e35, log="x", n=1001)
+ r.mc1.2  <- curve(mcX(x, X1., eps= .01 ), 10, 1e35, log="x", n=1001)
+ r.mc1.3  <- curve(mcX(x, X1., eps= .001), 10, 1e35, log="x", n=1001)
+ r.mc1.5  <- curve(mcX(x, X1., eps= 1e-5), 10, 1e35, log="x", n=1001)
+ r.mc1.8  <- curve(mcX(x, X1., eps= 1e-8), 10, 1e35, log="x", n=1001)
+ r.mc1.15 <- curve(mcX(x, X1., eps=1e-15), 10, 1e35, log="x", n=1001)# still!
+ }
> ## practically identical to  eps = 0 where we have breakdown (see above)
> r.mc1.16 <- curve(mcX(x, X1., eps=1e-16), 10, 1e35, log="x", n=1001)
> all.equal(r.mc1, r.mc1.16, tol=1e-15)#-> TRUE
[1] TRUE
> 
> ## Quite bad case: Non convergence
> X2. <- function(u) c(1:3, seq(6, 8, by = 1/8), u, u, u)
> ## try(mc(X2.(4.3e31)))## -> error: no convergence
> ## but now
> stopifnot(exprs = {
+     all.equal(1/30, mc(X2.(4.3e31)), tol=1e-12)
+     all.equal(1/30, mc(X2.(4.3e31), eps1=1e-7, eps2=1e-100), tol=1e-12)
+ })
> ## related, more direct:
> X3. <- function(u) c(10*(1:3), 60:80, (4:6)*u)
> stopifnot(0 == mc(X3.(1e31), trace=5)) # fine convergence in one iter.
mc_C_d(z[1:27], trace_lev=5, scale=F): Median = 70 (not at the border)
   x1[] := {x | x_j >= x_eps = 7e-13}    has 13 ('j-1') entries
'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries
  now allocating 2+5 work arrays of size (1+) h2=14 each:
 (h1,h2, nr, knew) = (14,14, 196, 99)
 before whimed(): work and iwt, each [0:(14-1)]:
        1 0.714286      0.5 0.333333      0.2 0.0909091        0 -0.0769231 -0.142857     -0.2    -0.25 -0.73913 -0.785714 -0.818182
       14       14       14       14       14       14       14       14       14       14       14       14       14       14
   it= 1, whimed(*, n=    14)=          0 
j=  1, i= 14, x[j]=1.2607e+12, x2[i]=-60, h=1
j=  2, i= 14, x[j]=1.2607e+12, x2[i]=-60, h=1
j=  3, i= 14, x[j]=1.2607e+12, x2[i]=-60, h=1
j=  4, i= 10, x[j]=10, x2[i]=-9, h=0.0526316
j=  5, i=  9, x[j]=9, x2[i]=-8, h=0.0588235
j=  6, i=  8, x[j]=8, x2[i]=-7, h=0.0666667
j=  7, i=  7, x[j]=7, x2[i]=-6, h=0.0769231
j=  8, i=  6, x[j]=6, x2[i]=-5, h=0.0909091
j=  9, i=  5, x[j]=5, x2[i]=-4, h=0.111111
j= 10, i=  4, x[j]=4, x2[i]=-3, h=0.142857
j= 11, i=  3, x[j]=3, x2[i]=-2, h=0.2
j= 12, i=  2, x[j]=2, x2[i]=-1, h=0.333333
j= 13, i=  1, x[j]=1, x2[i]=-0, h=1
    p[1:14]: 13 12 11 10  9  8  7  6  5  4  3  3  3  3 sum=  97
    q[1:14]: 15 14 13 12 11 10  9  8  7  6  5  4  4  4 sum= 108
; s_p < kn ?<=? s_q: TRUE
converged in 1 iterations
mc_C_d(z[1:27], trace_lev=5, scale=F): Median = -70 (not at the border)
   x1[] := {x | x_j >= x_eps = 7e-13}    has 13 ('j-1') entries
'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries
  now allocating 2+5 work arrays of size (1+) h2=14 each:
 (h1,h2, nr, knew) = (14,14, 196, 99)
 before whimed(): work and iwt, each [0:(14-1)]:
        1 0.714286      0.5 0.333333      0.2 0.0909091        0 -0.0769231 -0.142857     -0.2    -0.25       -1       -1       -1
       14       14       14       14       14       14       14       14       14       14       14       14       14       14
   it= 1, whimed(*, n=    14)=          0 
j=  1, i= 11, x[j]=60, x2[i]=-10, h=0.714286
j=  2, i= 11, x[j]=50, x2[i]=-10, h=0.666667
j=  3, i= 11, x[j]=40, x2[i]=-10, h=0.6
j=  4, i= 10, x[j]=10, x2[i]=-9, h=0.0526316
j=  5, i=  9, x[j]=9, x2[i]=-8, h=0.0588235
j=  6, i=  8, x[j]=8, x2[i]=-7, h=0.0666667
j=  7, i=  7, x[j]=7, x2[i]=-6, h=0.0769231
j=  8, i=  6, x[j]=6, x2[i]=-5, h=0.0909091
j=  9, i=  5, x[j]=5, x2[i]=-4, h=0.111111
j= 10, i=  4, x[j]=4, x2[i]=-3, h=0.142857
j= 11, i=  3, x[j]=3, x2[i]=-2, h=0.2
j= 12, i=  2, x[j]=2, x2[i]=-1, h=0.333333
j= 13, i=  1, x[j]=1, x2[i]=-0, h=1
    p[1:14]: 13 12 11 10  9  8  7  6  5  4  3  0  0  0 sum=  88
    q[1:14]: 15 14 13 12 11 10  9  8  7  6  5  1  1  1 sum=  99
; s_p < kn ?<=? s_q: TRUE
converged in 1 iterations
> stopifnot(0 == mc(X3.(1e32), trace=3)) # did *not* converge
mc_C_d(z[1:27], trace_lev=3, scale=F): Median = 70 (not at the border)
   x1[] := {x | x_j >= x_eps = 7e-13}    has 13 ('j-1') entries
'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries
  now allocating 2+5 work arrays of size (1+) h2=14 each:
 (h1,h2, nr, knew) = (14,14, 196, 99)
   it= 1, whimed(*, n=    14)=          0 sum_(p,q)= (97,108); s_p < kn ?<=? s_q: TRUE
converged in 1 iterations
mc_C_d(z[1:27], trace_lev=3, scale=F): Median = -70 (not at the border)
   x1[] := {x | x_j >= x_eps = 7e-13}    has 13 ('j-1') entries
'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries
  now allocating 2+5 work arrays of size (1+) h2=14 each:
 (h1,h2, nr, knew) = (14,14, 196, 99)
   it= 1, whimed(*, n=    14)=          0 sum_(p,q)= (88,99); s_p < kn ?<=? s_q: TRUE
converged in 1 iterations
> 
> ### TODO : find example with *smaller* sample size -- with no convergence
> X4. <- function(u, eps, ...) c(10, 70:75, (2:3)*u)
> mc(X4.(1e34))# "fine"
[1] 0.3333333
> ## now stable too:
> r.mc4 <- curve(mcX(x, X4.), 100, 1e35, log="x", n=2^12)
> stopifnot(abs(1/3 - r.mc4$y) < 1e-15)
> 
> X5. <- function(u) c(10*(1:3), 70:78, (4:6)*u)
> stopifnot(all.equal(4/15, mc(X5.(1e32), maxit=1000)))
> 
> X5. <- function(u, eps,...) c(5*(1:12), (4:6)*u)
> str(r.mc5 <- mc(X5.(1e32), doReflect=FALSE, full.result = TRUE))
 num 0.2
 - attr(*, "mcComp")=List of 4
  ..$ medc     : num 0.2
  ..$ eps      : num [1:2] 1e-14 1e-15
  ..$ iter     : int 2
  ..$ converged: logi TRUE
> ## Now, stable:
> stopifnot(all.equal(1/5, c(r.mc5))) ## was 1; platform dependent ..
> stopifnot(all.equal(4/15, mc(X5.(5e31)))) # had  no convergence w/ maxit=10000
> r.mc5Sml <- curve(mcX(x, X5.), 1,  100, log="x", n=1024) ## quite astonishing
> x <- lseq(1, 1e200, 2^11)
> mc5L <- mcX(x, X5.)
> table(err <- abs(0.2 - mc5L[x >= 24])) # I see all 0!

   0 
2033 
> stopifnot(abs(err) < 1e-15)
> 
> c.time(proc.time())
Time elapsed:  8.59 0.25 8.89 NA NA 
> 
> summary(warnings()) # seen 15 x  In mcComp(....) :
No warnings
> ## maximal number of iterations (100 =? 100) reached prematurely
> 
> 
> proc.time()
   user  system elapsed 
   8.59    0.25    8.89