R Under development (unstable) (2024-01-29 r85841 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > dodata <- function(nrep=1, time=FALSE, short=FALSE, full=TRUE, method = c("FASTMVE","MASS")){ + ##@bdescr + ## Test the function covMve() on the literature datasets: + ## + ## Call covMve() for all regression datasets available in rrco/robustbasev and print: + ## - execution time (if time == TRUE) + ## - objective fucntion + ## - best subsample found (if short == false) + ## - outliers identified (with cutoff 0.975) (if short == false) + ## - estimated center and covarinance matrix if full == TRUE) + ## + ##@edescr + ## + ##@in nrep : [integer] number of repetitions to use for estimating the + ## (average) execution time + ##@in time : [boolean] whether to evaluate the execution time + ##@in short : [boolean] whether to do short output (i.e. only the + ## objective function value). If short == FALSE, + ## the best subsample and the identified outliers are + ## printed. See also the parameter full below + ##@in full : [boolean] whether to print the estimated cente and covariance matrix + ##@in method : [character] select a method: one of (FASTMCD, MASS) + + domve <- function(x, xname, nrep=1){ + n <- dim(x)[1] + p <- dim(x)[2] + alpha <- 0.5 + h <- h.alpha.n(alpha, n, p) + if(method == "MASS"){ + mve <- cov.mve(x, quantile.used=h) + quan <- h #default: floor((n+p+1)/2) + crit <- mve$crit + best <- mve$best + mah <- mahalanobis(x, mve$center, mve$cov) + quantiel <- qchisq(0.975, p) + wt <- as.numeric(mah < quantiel) + } + else{ + mve <- CovMve(x, trace=FALSE) + quan <- as.integer(mve@quan) + crit <- log(mve@crit) + best <- mve@best + wt <- mve@wt + } + + + if(time){ + xtime <- system.time(dorep(x, nrep, method))[1]/nrep + xres <- sprintf("%3d %3d %3d %12.6f %10.3f\n", dim(x)[1], dim(x)[2], quan, crit, xtime) + } + else{ + xres <- sprintf("%3d %3d %3d %12.6f\n", dim(x)[1], dim(x)[2], quan, crit) + } + + lpad<-lname-nchar(xname) + cat(pad.right(xname,lpad), xres) + + if(!short){ + cat("Best subsample: \n") + print(best) + + ibad <- which(wt == 0) + names(ibad) <- NULL + nbad <- length(ibad) + cat("Outliers: ", nbad, "\n") + if(nbad > 0) + print(ibad) + if(full){ + cat("-------------\n") + show(mve) + } + cat("--------------------------------------------------------\n") + } + } + + options(digits = 5) + set.seed(101) # <<-- sub-sampling algorithm now based on R's RNG and seed + + lname <- 20 + + ## VT::15.09.2013 - this will render the output independent + ## from the version of the package + suppressPackageStartupMessages(library(rrcov)) + + method <- match.arg(method) + if(method == "MASS") + library(MASS) + + + data(heart) + data(starsCYG) + data(phosphor) + data(stackloss) + data(coleman) + data(salinity) + data(wood) + + data(hbk) + + data(Animals, package = "MASS") + brain <- Animals[c(1:24, 26:25, 27:28),] + data(milk) + data(bushfire) + + tmp <- sys.call() + cat("\nCall: ", deparse(substitute(tmp)),"\n") + + cat("Data Set n p Half LOG(obj) Time\n") + cat("========================================================\n") + domve(heart[, 1:2], data(heart), nrep) + domve(starsCYG, data(starsCYG), nrep) + domve(data.matrix(subset(phosphor, select = -plant)), data(phosphor), nrep) + domve(stack.x, data(stackloss), nrep) + domve(data.matrix(subset(coleman, select = -Y)), data(coleman), nrep) + domve(data.matrix(subset(salinity, select = -Y)), data(salinity), nrep) + domve(data.matrix(subset(wood, select = -y)), data(wood), nrep) + domve(data.matrix(subset(hbk, select = -Y)),data(hbk), nrep) + + domve(brain, "Animals", nrep) + domve(milk, data(milk), nrep) + domve(bushfire, data(bushfire), nrep) + cat("========================================================\n") + } > > dogen <- function(nrep=1, eps=0.49, method=c("FASTMVE", "MASS")){ + + domve <- function(x, nrep=1){ + gc() + xtime <- system.time(dorep(x, nrep, method))[1]/nrep + cat(sprintf("%6d %3d %10.2f\n", dim(x)[1], dim(x)[2], xtime)) + xtime + } + + set.seed(1234) + + ## VT::15.09.2013 - this will render the output independent + ## from the version of the package + suppressPackageStartupMessages(library(rrcov)) + library(MASS) + + method <- match.arg(method) + + ap <- c(2, 5, 10, 20, 30) + an <- c(100, 500, 1000, 10000, 50000) + + tottime <- 0 + cat(" n p Time\n") + cat("=====================\n") + for(i in 1:length(an)) { + for(j in 1:length(ap)) { + n <- an[i] + p <- ap[j] + if(5*p <= n){ + xx <- gendata(n, p, eps) + X <- xx$X + tottime <- tottime + domve(X, nrep) + } + } + } + + cat("=====================\n") + cat("Total time: ", tottime*nrep, "\n") + } > > docheck <- function(n, p, eps){ + xx <- gendata(n,p,eps) + mve <- CovMve(xx$X) + check(mve, xx$xind) + } > > check <- function(mcd, xind){ + ## check if mcd is robust w.r.t xind, i.e. check how many of xind + ## did not get zero weight + mymatch <- xind %in% which(mcd@wt == 0) + length(xind) - length(which(mymatch)) + } > > dorep <- function(x, nrep=1, method=c("FASTMVE","MASS")){ + + method <- match.arg(method) + for(i in 1:nrep) + if(method == "MASS") + cov.mve(x) + else + CovMve(x) + } > > #### gendata() #### > # Generates a location contaminated multivariate > # normal sample of n observations in p dimensions > # (1-eps)*Np(0,Ip) + eps*Np(m,Ip) > # where > # m = (b,b,...,b) > # Defaults: eps=0 and b=10 > # > gendata <- function(n,p,eps=0,b=10){ + + if(missing(n) || missing(p)) + stop("Please specify (n,p)") + if(eps < 0 || eps >= 0.5) + stop(message="eps must be in [0,0.5)") + X <- mvrnorm(n,rep(0,p),diag(1,nrow=p,ncol=p)) + nbad <- as.integer(eps * n) + if(nbad > 0){ + Xbad <- mvrnorm(nbad,rep(b,p),diag(1,nrow=p,ncol=p)) + xind <- sample(n,nbad) + X[xind,] <- Xbad + } + list(X=X, xind=xind) + } > > pad.right <- function(z, pads) + { + ### Pads spaces to right of text + padding <- paste(rep(" ", pads), collapse = "") + paste(z, padding, sep = "") + } > > whatis<-function(x){ + if(is.data.frame(x)) + cat("Type: data.frame\n") + else if(is.matrix(x)) + cat("Type: matrix\n") + else if(is.vector(x)) + cat("Type: vector\n") + else + cat("Type: don't know\n") + } > > ## VT::15.09.2013 - this will render the output independent > ## from the version of the package > suppressPackageStartupMessages(library(rrcov)) > > dodata() Call: dodata() Data Set n p Half LOG(obj) Time ======================================================== heart 12 2 7 3.827606 Best subsample: [1] 1 4 7 8 9 10 11 Outliers: 3 [1] 2 6 12 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: height weight 34.9 27.0 Robust Estimate of Covariance: height weight height 142 217 weight 217 350 -------------------------------------------------------- starsCYG 47 2 25 -2.742997 Best subsample: [1] 2 4 6 8 12 13 16 23 24 25 26 28 31 32 33 37 38 39 41 42 43 44 45 46 47 Outliers: 7 [1] 7 9 11 14 20 30 34 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: log.Te log.light 4.41 4.93 Robust Estimate of Covariance: log.Te log.light log.Te 0.0173 0.0578 log.light 0.0578 0.3615 -------------------------------------------------------- phosphor 18 2 10 4.443101 Best subsample: [1] 3 5 8 9 11 12 13 14 15 17 Outliers: 3 [1] 1 6 10 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: inorg organic 15.2 39.4 Robust Estimate of Covariance: inorg organic inorg 188 230 organic 230 339 -------------------------------------------------------- stackloss 21 3 12 3.327582 Best subsample: [1] 4 5 6 7 8 9 10 11 12 13 14 20 Outliers: 3 [1] 1 2 3 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: Air.Flow Water.Temp Acid.Conc. 56.7 20.2 85.5 Robust Estimate of Covariance: Air.Flow Water.Temp Acid.Conc. Air.Flow 34.31 11.07 23.54 Water.Temp 11.07 9.23 7.85 Acid.Conc. 23.54 7.85 47.35 -------------------------------------------------------- coleman 20 5 13 2.065143 Best subsample: [1] 1 3 4 5 7 8 11 14 16 17 18 19 20 Outliers: 5 [1] 2 6 9 10 13 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: salaryP fatherWc sstatus teacherSc motherLev 2.79 44.26 3.59 25.08 6.38 Robust Estimate of Covariance: salaryP fatherWc sstatus teacherSc motherLev salaryP 0.2920 1.1188 2.0421 0.3487 0.0748 fatherWc 1.1188 996.7540 338.6587 7.1673 23.1783 sstatus 2.0421 338.6587 148.2501 4.4894 7.8135 teacherSc 0.3487 7.1673 4.4894 0.9082 0.3204 motherLev 0.0748 23.1783 7.8135 0.3204 0.6024 -------------------------------------------------------- salinity 28 3 16 2.002555 Best subsample: [1] 1 7 8 9 12 13 14 18 19 20 21 22 25 26 27 28 Outliers: 5 [1] 5 11 16 23 24 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: X1 X2 X3 10.2 3.1 22.4 Robust Estimate of Covariance: X1 X2 X3 X1 14.387 1.153 -4.072 X2 1.153 5.005 -0.954 X3 -4.072 -0.954 2.222 -------------------------------------------------------- wood 20 5 13 -5.471407 Best subsample: [1] 1 2 3 5 9 10 12 13 14 15 17 18 20 Outliers: 5 [1] 4 6 8 11 19 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: x1 x2 x3 x4 x5 0.576 0.123 0.531 0.538 0.889 Robust Estimate of Covariance: x1 x2 x3 x4 x5 x1 7.45e-03 1.11e-03 1.83e-03 -2.90e-05 -5.65e-04 x2 1.11e-03 3.11e-04 7.68e-04 3.37e-05 3.85e-05 x3 1.83e-03 7.68e-04 4.30e-03 -9.96e-04 -6.27e-05 x4 -2.90e-05 3.37e-05 -9.96e-04 3.02e-03 1.91e-03 x5 -5.65e-04 3.85e-05 -6.27e-05 1.91e-03 2.25e-03 -------------------------------------------------------- hbk 75 3 39 1.096831 Best subsample: [1] 15 17 18 19 20 21 24 27 28 30 32 33 35 36 40 41 42 43 44 46 48 49 50 53 54 [26] 55 56 58 59 64 65 66 67 70 71 72 73 74 75 Outliers: 14 [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: X1 X2 X3 1.48 1.86 1.73 Robust Estimate of Covariance: X1 X2 X3 X1 1.695 0.230 0.265 X2 0.230 1.679 0.119 X3 0.265 0.119 1.683 -------------------------------------------------------- Animals 28 2 15 8.945423 Best subsample: [1] 1 3 4 5 10 11 17 18 21 22 23 24 26 27 28 Outliers: 9 [1] 2 6 7 9 12 14 15 16 25 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: body brain 48.3 127.3 Robust Estimate of Covariance: body brain body 10767 16872 brain 16872 46918 -------------------------------------------------------- milk 86 8 47 -1.160085 Best subsample: [1] 4 5 7 8 9 10 11 19 21 22 23 24 26 30 31 33 34 35 36 37 38 39 42 43 45 [26] 46 54 56 57 59 60 61 62 63 64 65 66 67 69 72 76 78 79 81 82 83 85 Outliers: 18 [1] 1 2 3 12 13 14 15 16 17 18 20 27 41 44 47 70 74 75 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: X1 X2 X3 X4 X5 X6 X7 X8 1.03 35.91 33.02 26.08 25.06 24.99 122.93 14.38 Robust Estimate of Covariance: X1 X2 X3 X4 X5 X6 X7 X1 6.00e-07 1.51e-04 3.34e-04 3.09e-04 2.82e-04 2.77e-04 1.09e-03 X2 1.51e-04 2.03e+00 3.83e-01 3.04e-01 2.20e-01 3.51e-01 2.18e+00 X3 3.34e-04 3.83e-01 1.58e+00 1.21e+00 1.18e+00 1.20e+00 1.60e+00 X4 3.09e-04 3.04e-01 1.21e+00 9.82e-01 9.39e-01 9.53e-01 1.36e+00 X5 2.82e-04 2.20e-01 1.18e+00 9.39e-01 9.67e-01 9.52e-01 1.34e+00 X6 2.77e-04 3.51e-01 1.20e+00 9.53e-01 9.52e-01 9.92e-01 1.38e+00 X7 1.09e-03 2.18e+00 1.60e+00 1.36e+00 1.34e+00 1.38e+00 6.73e+00 X8 3.33e-05 2.92e-01 2.65e-01 1.83e-01 1.65e-01 1.76e-01 5.64e-01 X8 X1 3.33e-05 X2 2.92e-01 X3 2.65e-01 X4 1.83e-01 X5 1.65e-01 X6 1.76e-01 X7 5.64e-01 X8 1.80e-01 -------------------------------------------------------- bushfire 38 5 22 5.644315 Best subsample: [1] 1 2 3 4 5 6 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 Outliers: 15 [1] 7 8 9 10 11 29 30 31 32 33 34 35 36 37 38 ------------- Call: CovMve(x = x, trace = FALSE) -> Method: Minimum volume ellipsoid estimator Robust Estimate of Location: V1 V2 V3 V4 V5 107 147 263 215 277 Robust Estimate of Covariance: V1 V2 V3 V4 V5 V1 519 375 -2799 -619 -509 V2 375 320 -1671 -342 -289 V3 -2799 -1671 18373 4314 3480 V4 -619 -342 4314 1076 854 V5 -509 -289 3480 854 683 -------------------------------------------------------- ======================================================== > > proc.time() user system elapsed 0.48 0.17 0.64