R Under development (unstable) (2024-06-18 r86781 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > #### Testing medcouple mc() and related functions > > ### here, we do "strict tests" -- hence no *.Rout.save > ### hence, can also produce non-reproducible output such as timing > > library(robustbase) > for(f in system.file("xtraR", c("mcnaive.R", # -> mcNaive() + "styleData.R", # -> smallD list of small datasets + "platform-sessionInfo.R"), # -> moreSessionInfo() + package = "robustbase", mustWork=TRUE)) { + cat("source(",f,"):\n", sep="") + source(f) + } source(D:/RCompile/CRANincoming/R-devel/lib/robustbase/xtraR/mcnaive.R): source(D:/RCompile/CRANincoming/R-devel/lib/robustbase/xtraR/styleData.R): source(D:/RCompile/CRANincoming/R-devel/lib/robustbase/xtraR/platform-sessionInfo.R): > > ## instead of relying on system.file("test-tools-1.R", package="Matrix"): > source(system.file("xtraR/test-tools.R", package = "robustbase")) # assert.EQ() etc > > assertEQm12 <- function(x,y, giveRE=TRUE, ...) + assert.EQ(x,y, tol = 1e-12, giveRE=giveRE, ...) > ## ^^ shows *any* difference ("tol = 0") unless there is no difference at all > ## > c.time <- function(...) cat('Time elapsed: ', ..., '\n') > S.time <- function(expr) c.time(system.time(expr)) > DO <- function(...) S.time(stopifnot(...)) > > ## from {sfsmisc}: > lseq <- function(from, to, length) exp(seq(log(from), log(to), length.out = length)) > > > mS <- moreSessionInfo(print.=TRUE) List of 5 $ sizeof.long : int 4 $ sizeof.longlong : int 8 $ sizeof.longdouble: int 16 $ sizeof.pointer : int 8 $ sizeof.time_t : int 8 64 bit platform type 'windows' ==> onWindows: TRUE arch: x86-64 osVersion (0): Windows Server 2022 x64 (build 20348) osVersion: 'Windows Server 2022 x64 (build 20348)' + BLAS "is" Lapack: TRUE | BLAS=OpenBLAS: FALSE | Lapack=OpenBLAS: FALSE strictR: FALSE > > (doExtras <- robustbase:::doExtras())# TRUE if interactive() or activated by envvar [1] FALSE > > if(!dev.interactive(orNone=TRUE)) pdf("mc-strict.pdf") > > tools::assertCondition(mc(1:11), "message") # change of default to doScale=FALSE The default of 'doScale' is FALSE now for stability; set options(mc_doScale_quiet=TRUE) to suppress this (once per session) message > > smlMC <- vapply(smallD, mc, pi) > smlMCo <- vapply(smallD, mc, pi, doScale=TRUE, c.huberize=Inf) > yI <- c("yI", "yI."); notI <- setdiff(names(smallD), yI) > yI2 <- c(yI, "x3I"); notI2 <- setdiff(names(smallD), yI2) > assert.EQ(smlMC [notI], + smlMCo[notI], tol = 4e-11, giveRE=TRUE) Mean relative difference: 9.070078e-12 > ## above small diff. is from 'x3I'; dropping that, too, leaves no differences > table(smlMC [notI2] == smlMCo[notI2]) TRUE 20 > > n.set <- c(1:99, 1e5L+ 0:1) # large n gave integer overflow in earlier versions > DO(0 == sapply(n.set, function(n) mc(seq_len(n)))) Time elapsed: 0.11 0 0.11 NA NA > DO(0 == sapply(n.set, function(n) mc(seq_len(n), doRefl=FALSE))) Time elapsed: 0.13 0 0.12 NA NA > > DO(0 == sapply(1:100, function(n) mcNaive(seq_len(n), "simple"))) Time elapsed: 0.09 0 0.1 NA NA > DO(0 == sapply(1:100, function(n) mcNaive(seq_len(n), "h.use" ))) Time elapsed: 0.01 0 0.01 NA NA > > > x1 <- c(1, 2, 7, 9, 10) > mcNaive(x1) # = -1/3 [1] -0.3333333 > assertEQm12(-1/3, mcNaive(x1, "simple")) > assertEQm12(-1/3, mcNaive(x1, "h.use")) > assertEQm12(-1/3, mc(x1)) > > x2 <- c(-1, 0, 0, 0, 1, 2) > mcNaive(x2, meth="simple") # = 0 - which is wrong [1] 0 > mcNaive(x2, meth="h.use") # = 1/6 = 0.16666 [1] 0.1666667 > assertEQm12(1/6, mc(x2)) > assertEQm12(1/6, mcNaive(x2, "h.use")) > > x4 <- c(1:5,7,10,15,25, 1e15) ## - bombed in original algo > mcNaive(x4,"h.use") # 0.5833333 [1] 0.5833333 > assertEQm12( 7/12, mcNaive(x4, "h.use")) > assertEQm12( 7/12, mcNaive(x4, "simple")) > assertEQm12( 7/12, mc( x4, doRefl= FALSE)) > assertEQm12(-7/12, mc(-x4, doRefl= FALSE)) > > xx <- c(-3, -3, -2, -2, -1, rep(0, 6), 1, 1, 1, 2, 2, 3, 3, 5) > stopifnot(exprs = { + mc(xx, doScale=TRUE , c.huberize = Inf) == 0 ## old mc() + mc(xx) == 0 + mc(xx, doReflect=FALSE) == 0 + -mc(-xx, doReflect=FALSE) == 0 + mcNaive(xx, "h.use" ) == 0 + mcNaive(xx, "simple") == 0 + }) > > set.seed(17) > for(n in 3:50) { + cat(" ") + for(k in 1:5) { + x <- rlnorm(n) + mc1 <- mc(x) + mc2 <- mcNaive(x, method = "simple") + mc3 <- mcNaive(x, method = "h.use" ) + stopifnot(all.equal(mc1, mc3, tolerance = 1e-10),# 1e-12 not quite ok + mc2 == mc3) + cat(".") + } + }; cat("\n") ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... ..... > > > ###---- Strict tests of adjOutlyingness(): > ### ================= changed after long-standing bug fix in Oct.2014 > > ## For longley, note n < 4p and hence "random nonsense" numbers > set.seed(1); S.time(a1.1 <- adjOutlyingness(longley)) Time elapsed: 0.17 0 0.17 NA NA > set.seed(11); S.time(a1.2 <- adjOutlyingness(longley)) Time elapsed: 0.23 0 0.23 NA NA > ## > set.seed(2); S.time(a2 <- adjOutlyingness(hbk)) # 75 x 4 Time elapsed: 0.17 0.02 0.19 NA NA > set.seed(3); S.time(a3 <- adjOutlyingness(hbk[, 1:3]))# the 'X' space Time elapsed: 0.19 0.01 0.2 NA NA > set.seed(4); S.time(a4 <- adjOutlyingness(milk)) # obs.63 = obs.64 Time elapsed: 0.23 0 0.23 NA NA > set.seed(5); S.time(a5 <- adjOutlyingness(wood)) # 20 x 6 ==> n < 4p Time elapsed: 0.16 0 0.16 NA NA > set.seed(6); S.time(a6 <- adjOutlyingness(wood[, 1:5]))# ('X' space) 20 x 5: n = 4p (ok!) Time elapsed: 0.14 0 0.15 NA NA > > ## 32-bit <-> 64-bit different results {tested on Linux only} > is32 <- .Machine$sizeof.pointer == 4 ## <- should work for Linux/MacOS/Windows > isMac <- Sys.info()[["sysname"]] == "Darwin" > isSun <- Sys.info()[["sysname"]] == "SunOS" > Rnk <- function(u) rank(unname(u), ties.method = "first") > ## to use for testing below: > cat("\nRnk(a3 $ adjout): "); dput(Rnk(a3$adjout), control= {}) Rnk(a3 $ adjout): c(62, 64, 69, 71, 70, 66, 65, 63, 68, 67, 73, 75, 72, 74, 35, 60, 55, 4, 22, 36, 6, 33, 34, 28, 53, 16, 13, 9, 27, 31, 49, 39, 20, 50, 14, 2, 24, 40, 54, 21, 17, 37, 52, 23, 58, 19, 61, 11, 25, 8, 46, 59, 48, 47, 29, 44, 43, 42, 7, 30, 18, 51, 41, 15, 10, 38, 3, 56, 57, 5, 1, 12, 26, 32, 45) > cat("\nRnk(a4 $ adjout): "); dput(Rnk(a4$adjout), control= {}) Rnk(a4 $ adjout): c(73, 77, 67, 55, 50, 63, 26, 47, 45, 4, 64, 84, 79, 78, 83, 72, 75, 68, 65, 60, 52, 39, 16, 61, 42, 33, 80, 71, 30, 2, 35, 51, 1, 69, 59, 15, 38, 62, 66, 40, 76, 32, 56, 85, 21, 43, 70, 41, 27, 58, 11, 53, 37, 8, 34, 3, 29, 49, 46, 14, 5, 44, 18, 19, 25, 10, 7, 24, 28, 86, 36, 23, 54, 74, 82, 17, 81, 9, 6, 48, 12, 31, 20, 13, 57, 22) > > (i.a4Out <- which( ! a4$nonOut)) # the outliers -- varies "wildly" [1] 70 > stopifnot(70 %in% i.a4Out) > { + if(is32 && !isMac) + all.equal(i.a4Out, c(1, 2, 41, 70)) + ## and this is "typically" true, but not for a 64-bit Linux version bypassing BLAS in matprod + else if(isSun || isMac) + TRUE + else if(grepl("^Fedora", osVersion) && !is32) + identical(i.a4Out, 70L) # since Dec 2020 (F 32) + else + all.equal(i.a4Out, c(9:19, 23:27,57, 59, 70, 77)) + } [1] "Numeric: lengths (1, 20) differ" > > ## only for ATLAS (BLAS/Lapack), not all are TRUE; which ones [but n < 4p] > if(!all(a5$nonOut)) + print(which(!a5$nonOut)) # if we know, enable check below > > stopifnot(exprs = { + which(!a2$nonOut) == 1:14 + which(!a3$nonOut) == 1:14 + ## 'longley', 'wood' have no outliers in the "adjOut" sense: + if(doExtras && !isMac) { ## longley also has n < 4p (!) + if(mS$ strictR) + sum(a1.2$nonOut) >= 15 # sum(.) = 16 [nb-mm3, Oct.2014] + else ## however, openBLAS Fedora Linux /usr/bin/R gives sum(a1.2$nonOut) = 13 + sum(a1.2$nonOut) >= 13 + } else TRUE + if(doExtras) { ## have n < 4p (!) + if(mS$ strictR) a5$nonOut + else ## not for ATLAS + sum(a5$nonOut) >= 18 # 18: OpenBLAS + } else TRUE + a6$nonOut[-20] + ## hbk (n = 75, p = 3) should be "stable" (but isn't quite) + abs(Rnk(a3$adjout) - + c(62, 64, 69, 71, 70, 66, 65, 63, 68, 67, 73, 75, 72, 74, 35, + 60, 55, 4, 22, 36, 6, 33, 34, 28, 53, 16, 13, 9, 27, 31, + 49, 39, 20, 50, 14, 2, 24, 40, 54, 21, 17, 37, 52, 23, 58, + 19, 61, 11, 25, 8, 46, 59, 48, 47, 29, 44, 43, 42, 7, 30, + 18, 51, 41, 15, 10, 38, 3, 56, 57, 5, 1, 12, 26, 32, 45) + ) <= 3 ## all 0 on 64-bit (F 32) Linux + }) > > ## milk (n = 86) : -- Quite platform dependent! > r <- Rnk(a4$adjout) > r64 <- ## the 64-bit (ubuntu 14.04, nb-mm3) values: + c(65, 66, 61, 56, 47, 51, 19, 37, 74, 67, 79, 86, 83, 84, 85, + 82, 81, 73, 80, 55, 27, 3, 70, 68, 78, 76, 77, 53, 48, 8, + 29, 33, 6, 32, 28, 31, 36, 40, 22, 58, 64, 52, 39, 63, 44, + 30, 57, 46, 43, 45, 25, 54, 12, 1, 9, 2, 71, 14, 75, 23, + 4, 10, 34, 35, 17, 24, 15, 20, 38, 72, 42, 13, 50, 60, 62, + 26, 69, 18, 5, 21, 7, 49, 11, 41, 59, 16) > r32 <- ## Linux 32bit (florence: 3.14.8-100.fc19.i686.PAE) + c(78, 79, 72, 66, 52, 61, 22, 41, 53, 14, 74, 85, 82, 83, 84, + 80, 81, 56, 73, 65, 30, 3, 16, 17, 68, 57, 58, 63, 54, 8, + 32, 37, 6, 36, 31, 35, 40, 44, 25, 69, 77, 62, 43, 76, 48, + 34, 67, 51, 47, 49, 28, 64, 12, 1, 9, 2, 33, 15, 59, 26, + 4, 10, 38, 39, 20, 27, 18, 23, 42, 86, 46, 13, 60, 71, 75, + 29, 50, 21, 5, 24, 7, 55, 11, 45, 70, 19) > d <- (r - if (is32) r32 else r64) > cbind(r, d) r d [1,] 73 8 [2,] 77 11 [3,] 67 6 [4,] 55 -1 [5,] 50 3 [6,] 63 12 [7,] 26 7 [8,] 47 10 [9,] 45 -29 [10,] 4 -63 [11,] 64 -15 [12,] 84 -2 [13,] 79 -4 [14,] 78 -6 [15,] 83 -2 [16,] 72 -10 [17,] 75 -6 [18,] 68 -5 [19,] 65 -15 [20,] 60 5 [21,] 52 25 [22,] 39 36 [23,] 16 -54 [24,] 61 -7 [25,] 42 -36 [26,] 33 -43 [27,] 80 3 [28,] 71 18 [29,] 30 -18 [30,] 2 -6 [31,] 35 6 [32,] 51 18 [33,] 1 -5 [34,] 69 37 [35,] 59 31 [36,] 15 -16 [37,] 38 2 [38,] 62 22 [39,] 66 44 [40,] 40 -18 [41,] 76 12 [42,] 32 -20 [43,] 56 17 [44,] 85 22 [45,] 21 -23 [46,] 43 13 [47,] 70 13 [48,] 41 -5 [49,] 27 -16 [50,] 58 13 [51,] 11 -14 [52,] 53 -1 [53,] 37 25 [54,] 8 7 [55,] 34 25 [56,] 3 1 [57,] 29 -42 [58,] 49 35 [59,] 46 -29 [60,] 14 -9 [61,] 5 1 [62,] 44 34 [63,] 18 -16 [64,] 19 -16 [65,] 25 8 [66,] 10 -14 [67,] 7 -8 [68,] 24 4 [69,] 28 -10 [70,] 86 14 [71,] 36 -6 [72,] 23 10 [73,] 54 4 [74,] 74 14 [75,] 82 20 [76,] 17 -9 [77,] 81 12 [78,] 9 -9 [79,] 6 1 [80,] 48 27 [81,] 12 5 [82,] 31 -18 [83,] 20 9 [84,] 13 -28 [85,] 57 -2 [86,] 22 6 > table(abs(d)) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20 22 23 25 27 28 29 31 5 4 2 3 5 7 3 3 4 4 1 3 3 4 2 4 1 5 2 2 1 3 1 1 2 1 34 35 36 37 42 43 44 54 63 1 1 2 1 1 1 1 1 1 > cumsum(table(abs(d))) # <=> unscaled ecdf(d) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20 22 23 25 27 28 29 31 5 9 11 14 19 26 29 32 36 40 41 44 47 51 53 57 58 63 65 67 68 71 72 73 75 76 34 35 36 37 42 43 44 54 63 77 78 80 81 82 83 84 85 86 > > ## For the biggest part (79 out of 86), the ranks are "close": > ## 2014: still true, but in a different sense.. > ## ^ typically, but e.g., *not* when using non-BLAS matprod(): > sum(abs(d) <= 17) >= 78 [1] FALSE > sum(abs(d) <= 13) >= 75 [1] FALSE > > > ## check of adjOutlyingness *free* bug > ## reported by Kaveh Vakili > set.seed(-37665251) > X <- matrix(rnorm(100*5), 100, 5) > Z <- matrix(rnorm(10*5)/100, 10, 5) > Z[,1] <- Z[,1] + 5 > X[91:100,] <- Z # if anything these should be outliers, but ... > for (i in 1:10) { + ## this would produce an error in the 6th iteration + aa <- adjOutlyingness(x=X, ndir=250) + if(any(is.out <- !aa$nonOut)) + cat("'outliers' at obs.", paste(which(is.out), collapse=", "),"\n") + stopifnot(1/4 < aa$adjout & aa$adjout < 16) + } 'outliers' at obs. 6, 33, 39, 44, 48, 90 'outliers' at obs. 16, 22, 88 'outliers' at obs. 12, 19, 42, 50, 58, 88, 90 'outliers' at obs. 42, 53, 62, 72, 88, 90 'outliers' at obs. 90 'outliers' at obs. 88 > > ## Check "high"-dimensional Noise ... typically mc() did *not* converge for some re-centered columns > ## Example by Valentin Todorov: > n <- 50 > p <- 30 > set.seed(1) # MM > a <- matrix(rnorm(n * p), nrow=n, ncol=p) > str(a) num [1:50, 1:30] -0.626 0.184 -0.836 1.595 0.33 ... > kappa(a) # 20.42 (~ 10--20 or so; definitely not close to singular) [1] 20.42296 > a.a <- adjOutlyingness(a, mcScale=FALSE, # <- my own recommendation + trace.lev=1) keeping *all* 250 normalized directions med <- colMedians(Y): 250 values; summary(): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.002157 0.052337 0.106917 0.128326 0.175055 0.582199 Columnwise mc() got 250 values; summary(): Min. 1st Qu. Median Mean 3rd Qu. Max. -1.00000 -0.01316 0.00000 -0.06105 0.00000 1.00000 250 lower & upper Y (:= X - med(.)) values: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.18087 0.01166 2.91541 Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.05395 0.00000 2.50093 outlyingnesses for all directions (of which max(.) will be chosen: 0% 25% 50% 75% 100% 0.000000e+00 1.363636e-01 5.515630e-01 1.755753e+14 9.614765e+15 > a.s <- adjOutlyingness(a, mcScale=TRUE, trace.lev=1) keeping *all* 250 normalized directions med <- colMedians(Y): 250 values; summary(): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0006641 0.0700928 0.1242860 0.1483769 0.2124847 0.5023069 Columnwise mc() got 250 values; summary(): Min. 1st Qu. Median Mean 3rd Qu. Max. -1.00000 -0.07290 0.00000 -0.09323 0.00000 1.00000 250 lower & upper Y (:= X - med(.)) values: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.25788 0.04611 3.32822 Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.06039 0.00000 3.08440 outlyingnesses for all directions (of which max(.) will be chosen: 0% 25% 50% 75% 100% 0.000000e+00 1.466278e-01 5.318217e-01 1.721839e+14 6.502497e+15 > ## a.a : > str(a.a) # high 'adjout' values "all similar" -> no outliers .. hmm .. ??? List of 7 $ adjout : num [1:50] 2.53e+15 3.63e+15 5.67e+15 3.00e+15 5.25e+15 ... $ iter : int 250 $ ndir.final : int 250 $ MCadjout : num 0.237 $ Qalph.adjout: Named num [1:2] 2.89e+15 4.79e+15 ..- attr(*, "names")= chr [1:2] "25%" "75%" $ cutoff : Named num 1.06e+16 ..- attr(*, "names")= chr "75%" $ nonOut : logi [1:50] TRUE TRUE TRUE TRUE TRUE TRUE ... > (hdOut <- which( ! a.a$nonOut)) ## indices of "outlier" -- very platform dependent ! integer(0) > a.a$MCadjout; all.equal(a.a$MCadjout, 0.136839766177, [1] 0.2371803 + tol = 1e-12) # seen 7.65e-14 and "big" differences on non-default platforms [1] "Mean relative difference: 0.4230559" > ## a.s : > which(! a.s$nonOut ) # none [all TRUE] integer(0) > a.s$MCadjout # platform dependent; saw [1] 0.07089226 > all.equal(a.s$MCadjout, 0.32284906741568, tol = 1e-13) # seen 2.2e-15 .. [1] "Mean relative difference: 3.55408" > # and big diffs on non-default platforms > ## > ## The adjout values are all > 10^15 !!! why ?? > ## Now (2021) I know: n < 4*p ==> can find 1D-projection where 1 of the 2 {Q3-Q2, Q2-Q1} is 0 ! > ##--------------------------------------------------------------------------------------------- > > > ###-- Back to mc() checks for "hard" cases > ### ===== ----------------------- > > ## "large n" (this did overflow sum_p, sum_q earlier ==> had inf.loop): > set.seed(3); x <- rnorm(2e5) > (mx <- mc(x, trace.lev=3)) mc_C_d(z[1:200000], trace_lev=3, scale=F): Median = -6.55715e-05 (not at the border) x1[] := {x | x_j >= x_eps = 6.55715e-19} has 100000 ('j-1') entries 'median-x' {x | -eps < x_i <= eps} has 0 (= 'k') entries now allocating 2+5 work arrays of size (1+) h2=100000 each: (h1,h2, nr, knew) = (100000,100000, 10000000000, 5000000001) it= 1, whimed(*, n=100000)= -0.0019683 sum_(p,q)= (5007620130,5007620131); sum_p >= kn it= 2, whimed(*, n= 99999)= 0.4221 sum_(p,q)= (2453285443,2453285444); s_p < kn ?<=? s_q: no it= 3, whimed(*, n= 99999)= 0.21774 sum_(p,q)= (3629582104,3629582105); s_p < kn ?<=? s_q: no it= 4, whimed(*, n= 99999)= 0.11138 sum_(p,q)= (4288398219,4288398220); s_p < kn ?<=? s_q: no it= 5, whimed(*, n= 99998)= 0.055486 sum_(p,q)= (4641942845,4641942846); s_p < kn ?<=? s_q: no it= 6, whimed(*, n= 99994)= 0.026873 sum_(p,q)= (4823919585,4823919586); s_p < kn ?<=? s_q: no it= 7, whimed(*, n= 99987)= 0.012451 sum_(p,q)= (4915754726,4915754727); s_p < kn ?<=? s_q: no it= 8, whimed(*, n= 99971)= 0.0052434 sum_(p,q)= (4961674479,4961674480); s_p < kn ?<=? s_q: no it= 9, whimed(*, n= 99924)= 0.0016334 sum_(p,q)= (4984674126,4984674127); s_p < kn ?<=? s_q: no it=10, whimed(*, n= 99842)= -0.0001688 sum_(p,q)= (4996158141,4996158142); s_p < kn ?<=? s_q: no it=11, whimed(*, n= 99686)= -0.0010715 sum_(p,q)= (5001907425,5001907426); sum_p >= kn it=12, whimed(*, n= 99377)=-0.00062346 sum_(p,q)= (4999053587,4999053588); s_p < kn ?<=? s_q: no it=13, whimed(*, n= 98730)=-0.00085064 sum_(p,q)= (5000499912,5000499913); sum_p >= kn it=14, whimed(*, n= 97586)= -0.0007403 sum_(p,q)= (4999796905,4999796906); s_p < kn ?<=? s_q: no it=15, whimed(*, n= 94851)=-0.00079865 sum_(p,q)= (5000168479,5000168480); sum_p >= kn it=16, whimed(*, n= 89663)=-0.00077229 sum_(p,q)= (4999999851,4999999852); s_p < kn ?<=? s_q: no not found [it=16, (nr,nl) = (5000168479,4999999852)], -> (knew-nl-1, j) = (148, 168627) converged in 16 iterations [1] -0.0007723158 > stopifnot(print(abs(mx - -0.000772315846101988)) < 1e-15) [1] 2.049142e-17 > # 3.252e-19, 64b Linux > # 1.198e-16, 32b Windows > > ### Some platform info : > local({ nms <- names(Si <- Sys.info()) + dropNms <- c("nodename", "machine", "login") + structure(Si[c("nodename", nms[is.na(match(nms, dropNms))])], + class="simple.list") }) _ nodename CRANWIN3 sysname Windows release Server x64 version build 20348 user CRAN effective_user CRAN > > if(identical(1L, grep("linux", R.version[["os"]]))) { ##----- Linux - only ---- + ## + Sys.procinfo <- function(procfile) + { + l2 <- strsplit(readLines(procfile),"[ \t]*:[ \t]*") + r <- sapply(l2[sapply(l2, length) == 2], + function(c2)structure(c2[2], names= c2[1])) + attr(r,"Name") <- procfile + class(r) <- "simple.list" + r + } + ## + Scpu <- Sys.procinfo("/proc/cpuinfo") + Smem <- Sys.procinfo("/proc/meminfo") + print(Scpu[c("model name", "cpu MHz", "cache size", "bogomips")]) + print(Smem[c("MemTotal", "SwapTotal")]) + } > > ##' Checking the breakdown point of mc() --- Hubert et al. theory said : 25% > ##' using non-default doReflect=FALSE as that corresponds to original Hubert et al. > ##' > ##' @title Medcouple mc() checking > ##' @param x > ##' @param Xfun > ##' @param eps > ##' @param NAiferror > ##' @param doReflect > ##' @param ... > ##' @return mc(*,..) or NaN in case mc() signals an error [non-convergence] > ##' @author Martin Maechler > mcX <- function(x, Xfun, eps=0, NAiferror=FALSE, doReflect=FALSE, ...) { + stopifnot(is.numeric(x), is.function(Xfun), "eps" %in% names(formals(Xfun))) + myFun <- + if(NAiferror) + function(u) tryCatch(mc(Xfun(u, eps=eps), doReflect=doReflect, ...), + error = function(e) NaN) + else + function(u) mc(Xfun(u, eps=eps), doReflect=doReflect, ...) + vapply(x, myFun, 1.) + } > > X1. <- function(u, eps=0) c(1,2,3, 7+(-10:10)*eps, u + (-1:1)*eps) > ## ==> This *did* breakdown [but points not "in general position"]: > ## but now is stable: > r.mc1 <- curve(mcX(x, X1.), 10, 1e35, log="x", n=1001) > stopifnot(r.mc1$y == 0) # now stable > if(FALSE) { + rt1 <- uniroot(function(x) mcX(exp(x), X1.) - 1/2, lower=0, upper=500) + exp(rt1$root) # 4.056265e+31 + } > > ## eps > 0 ==> No duplicated points ==> theory says breakdown point = 0.25 > ## ------- but get big numerical problems: > if(FALSE) { # ==> convergence problem [also in maxit = 1e5] .. really an *inf* loop! + r.mc1.1 <- curve(mcX(x, X1., eps= .1 ), 10, 1e35, log="x", n=1001) + r.mc1.2 <- curve(mcX(x, X1., eps= .01 ), 10, 1e35, log="x", n=1001) + r.mc1.3 <- curve(mcX(x, X1., eps= .001), 10, 1e35, log="x", n=1001) + r.mc1.5 <- curve(mcX(x, X1., eps= 1e-5), 10, 1e35, log="x", n=1001) + r.mc1.8 <- curve(mcX(x, X1., eps= 1e-8), 10, 1e35, log="x", n=1001) + r.mc1.15 <- curve(mcX(x, X1., eps=1e-15), 10, 1e35, log="x", n=1001)# still! + } > ## practically identical to eps = 0 where we have breakdown (see above) > r.mc1.16 <- curve(mcX(x, X1., eps=1e-16), 10, 1e35, log="x", n=1001) > all.equal(r.mc1, r.mc1.16, tol=1e-15)#-> TRUE [1] TRUE > > ## Quite bad case: Non convergence > X2. <- function(u) c(1:3, seq(6, 8, by = 1/8), u, u, u) > ## try(mc(X2.(4.3e31)))## -> error: no convergence > ## but now > stopifnot(exprs = { + all.equal(1/30, mc(X2.(4.3e31)), tol=1e-12) + all.equal(1/30, mc(X2.(4.3e31), eps1=1e-7, eps2=1e-100), tol=1e-12) + }) > ## related, more direct: > X3. <- function(u) c(10*(1:3), 60:80, (4:6)*u) > stopifnot(0 == mc(X3.(1e31), trace=5)) # fine convergence in one iter. mc_C_d(z[1:27], trace_lev=5, scale=F): Median = 70 (not at the border) x1[] := {x | x_j >= x_eps = 7e-13} has 13 ('j-1') entries 'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries now allocating 2+5 work arrays of size (1+) h2=14 each: (h1,h2, nr, knew) = (14,14, 196, 99) before whimed(): work and iwt, each [0:(14-1)]: 1 0.714286 0.5 0.333333 0.2 0.0909091 0 -0.0769231 -0.142857 -0.2 -0.25 -0.73913 -0.785714 -0.818182 14 14 14 14 14 14 14 14 14 14 14 14 14 14 it= 1, whimed(*, n= 14)= 0 j= 1, i= 14, x[j]=1.2607e+12, x2[i]=-60, h=1 j= 2, i= 14, x[j]=1.2607e+12, x2[i]=-60, h=1 j= 3, i= 14, x[j]=1.2607e+12, x2[i]=-60, h=1 j= 4, i= 10, x[j]=10, x2[i]=-9, h=0.0526316 j= 5, i= 9, x[j]=9, x2[i]=-8, h=0.0588235 j= 6, i= 8, x[j]=8, x2[i]=-7, h=0.0666667 j= 7, i= 7, x[j]=7, x2[i]=-6, h=0.0769231 j= 8, i= 6, x[j]=6, x2[i]=-5, h=0.0909091 j= 9, i= 5, x[j]=5, x2[i]=-4, h=0.111111 j= 10, i= 4, x[j]=4, x2[i]=-3, h=0.142857 j= 11, i= 3, x[j]=3, x2[i]=-2, h=0.2 j= 12, i= 2, x[j]=2, x2[i]=-1, h=0.333333 j= 13, i= 1, x[j]=1, x2[i]=-0, h=1 p[1:14]: 13 12 11 10 9 8 7 6 5 4 3 3 3 3 sum= 97 q[1:14]: 15 14 13 12 11 10 9 8 7 6 5 4 4 4 sum= 108 ; s_p < kn ?<=? s_q: TRUE converged in 1 iterations mc_C_d(z[1:27], trace_lev=5, scale=F): Median = -70 (not at the border) x1[] := {x | x_j >= x_eps = 7e-13} has 13 ('j-1') entries 'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries now allocating 2+5 work arrays of size (1+) h2=14 each: (h1,h2, nr, knew) = (14,14, 196, 99) before whimed(): work and iwt, each [0:(14-1)]: 1 0.714286 0.5 0.333333 0.2 0.0909091 0 -0.0769231 -0.142857 -0.2 -0.25 -1 -1 -1 14 14 14 14 14 14 14 14 14 14 14 14 14 14 it= 1, whimed(*, n= 14)= 0 j= 1, i= 11, x[j]=60, x2[i]=-10, h=0.714286 j= 2, i= 11, x[j]=50, x2[i]=-10, h=0.666667 j= 3, i= 11, x[j]=40, x2[i]=-10, h=0.6 j= 4, i= 10, x[j]=10, x2[i]=-9, h=0.0526316 j= 5, i= 9, x[j]=9, x2[i]=-8, h=0.0588235 j= 6, i= 8, x[j]=8, x2[i]=-7, h=0.0666667 j= 7, i= 7, x[j]=7, x2[i]=-6, h=0.0769231 j= 8, i= 6, x[j]=6, x2[i]=-5, h=0.0909091 j= 9, i= 5, x[j]=5, x2[i]=-4, h=0.111111 j= 10, i= 4, x[j]=4, x2[i]=-3, h=0.142857 j= 11, i= 3, x[j]=3, x2[i]=-2, h=0.2 j= 12, i= 2, x[j]=2, x2[i]=-1, h=0.333333 j= 13, i= 1, x[j]=1, x2[i]=-0, h=1 p[1:14]: 13 12 11 10 9 8 7 6 5 4 3 0 0 0 sum= 88 q[1:14]: 15 14 13 12 11 10 9 8 7 6 5 1 1 1 sum= 99 ; s_p < kn ?<=? s_q: TRUE converged in 1 iterations > stopifnot(0 == mc(X3.(1e32), trace=3)) # did *not* converge mc_C_d(z[1:27], trace_lev=3, scale=F): Median = 70 (not at the border) x1[] := {x | x_j >= x_eps = 7e-13} has 13 ('j-1') entries 'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries now allocating 2+5 work arrays of size (1+) h2=14 each: (h1,h2, nr, knew) = (14,14, 196, 99) it= 1, whimed(*, n= 14)= 0 sum_(p,q)= (97,108); s_p < kn ?<=? s_q: TRUE converged in 1 iterations mc_C_d(z[1:27], trace_lev=3, scale=F): Median = -70 (not at the border) x1[] := {x | x_j >= x_eps = 7e-13} has 13 ('j-1') entries 'median-x' {x | -eps < x_i <= eps} has 1 (= 'k') entries now allocating 2+5 work arrays of size (1+) h2=14 each: (h1,h2, nr, knew) = (14,14, 196, 99) it= 1, whimed(*, n= 14)= 0 sum_(p,q)= (88,99); s_p < kn ?<=? s_q: TRUE converged in 1 iterations > > ### TODO : find example with *smaller* sample size -- with no convergence > X4. <- function(u, eps, ...) c(10, 70:75, (2:3)*u) > mc(X4.(1e34))# "fine" [1] 0.3333333 > ## now stable too: > r.mc4 <- curve(mcX(x, X4.), 100, 1e35, log="x", n=2^12) > stopifnot(abs(1/3 - r.mc4$y) < 1e-15) > > X5. <- function(u) c(10*(1:3), 70:78, (4:6)*u) > stopifnot(all.equal(4/15, mc(X5.(1e32), maxit=1000))) > > X5. <- function(u, eps,...) c(5*(1:12), (4:6)*u) > str(r.mc5 <- mc(X5.(1e32), doReflect=FALSE, full.result = TRUE)) num 0.2 - attr(*, "mcComp")=List of 4 ..$ medc : num 0.2 ..$ eps : num [1:2] 1e-14 1e-15 ..$ iter : int 2 ..$ converged: logi TRUE > ## Now, stable: > stopifnot(all.equal(1/5, c(r.mc5))) ## was 1; platform dependent .. > stopifnot(all.equal(4/15, mc(X5.(5e31)))) # had no convergence w/ maxit=10000 > r.mc5Sml <- curve(mcX(x, X5.), 1, 100, log="x", n=1024) ## quite astonishing > x <- lseq(1, 1e200, 2^11) > mc5L <- mcX(x, X5.) > table(err <- abs(0.2 - mc5L[x >= 24])) # I see all 0! 0 2033 > stopifnot(abs(err) < 1e-15) > > c.time(proc.time()) Time elapsed: 8.59 0.25 8.89 NA NA > > summary(warnings()) # seen 15 x In mcComp(....) : No warnings > ## maximal number of iterations (100 =? 100) reached prematurely > > > proc.time() user system elapsed 8.59 0.25 8.89