R Under development (unstable) (2023-11-28 r85645 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ### tests methods argument of lmrob.control > > library(robustbase) > > data(stackloss) > cat("doExtras:", doExtras <- robustbase:::doExtras(),"\n") doExtras: FALSE > > str(ctrl2 <- lmrob.control(trace.lev = if(doExtras) 2 else 0)) List of 34 $ setting : NULL $ seed : int(0) $ nResample : num 500 $ psi : chr "bisquare" $ tuning.chi : num 1.55 $ bb : num 0.5 $ tuning.psi : num 4.69 $ max.it : num 50 $ groups : num 5 $ n.group : num 400 $ best.r.s : int 2 $ k.fast.s : int 1 $ k.max : int 200 $ maxit.scale : int 200 $ k.m_s : int 20 $ refine.tol : num 1e-07 $ rel.tol : num 1e-07 $ scale.tol : num 1e-10 $ solve.tol : num 1e-07 $ zero.tol : num 1e-10 $ trace.lev : num 0 $ mts : int 1000 $ subsampling : chr "nonsingular" $ compute.rd : logi FALSE $ method : chr "MM" $ numpoints : int 10 $ cov : chr ".vcov.avar1" $ split.type : chr "f" $ fast.s.large.n : num 2000 $ eps.outlier :function (nobs) $ eps.x :function (maxx) $ compute.outlier.stats: chr "SM" $ warn.limit.reject : num 0.5 $ warn.limit.meanrw : num 0.5 - attr(*, "class")= chr "lmrobCtrl" > > ## S > set.seed(0) > summary(m0 <- lmrob(stack.loss ~ ., data = stackloss, method = "S", + compute.outlier.stats = "S")) Call: lmrob(formula = stack.loss ~ ., data = stackloss, method = "S", compute.outlier.stats = "S") \--> method = "S" Residuals: Min 1Q Median 3Q Max -9.46226 -0.82076 0.02249 0.80806 8.31829 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -36.92542 5.41708 -6.816 3.0e-06 *** Air.Flow 0.84957 0.07892 10.765 5.2e-09 *** Water.Temp 0.43047 0.19507 2.207 0.0414 * Acid.Conc. -0.07354 0.07216 -1.019 0.3224 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robust residual standard error: 1.912 Multiple R-squared: 0.9863, Adjusted R-squared: 0.9839 Robustness weights: 5 observations c(1,3,4,13,21) are outliers with |weight| = 0 ( < 0.0048); one weight is ~= 1. The remaining 15 ones are summarized as Min. 1st Qu. Median Mean 3rd Qu. Max. 0.4126 0.7595 0.8726 0.8270 0.9718 0.9986 Algorithmic parameters: tuning.chi bb tuning.psi refine.tol 1.548e+00 5.000e-01 4.685e+00 1.000e-07 rel.tol scale.tol solve.tol zero.tol 1.000e-07 1.000e-10 1.000e-07 1.000e-10 eps.outlier eps.x warn.limit.reject warn.limit.meanrw 4.762e-03 1.692e-10 5.000e-01 5.000e-01 nResample max.it best.r.s k.fast.s k.max 500 50 2 1 200 maxit.scale trace.lev mts compute.rd fast.s.large.n 200 0 1000 0 2000 psi subsampling cov "bisquare" "nonsingular" ".vcov.w" compute.outlier.stats "S" seed : int(0) > set.seed(0) > m0a <- lmrob.S(m0$x, stack.loss, ctrl2) > > all.equal(m0 [c('coefficients', 'scale', 'rweights')], + m0a[c('coefficients', 'scale', 'rweights')]) [1] TRUE > > ## MM > set.seed(0) > summary(m1 <- lmrob(stack.loss ~ ., data = stackloss, method = "MM", + compute.outlier.stats = "S")) Call: lmrob(formula = stack.loss ~ ., data = stackloss, method = "MM", compute.outlier.stats = "S") \--> method = "MM" Residuals: Min 1Q Median 3Q Max -10.50974 -1.43819 -0.09134 1.02503 7.23113 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -41.52462 5.29780 -7.838 4.82e-07 *** Air.Flow 0.93885 0.11743 7.995 3.68e-07 *** Water.Temp 0.57955 0.26296 2.204 0.0416 * Acid.Conc. -0.11292 0.06989 -1.616 0.1246 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robust residual standard error: 1.912 Multiple R-squared: 0.9593, Adjusted R-squared: 0.9521 Convergence in 17 IRWLS iterations Robustness weights: observation 21 is an outlier with |weight| = 0 ( < 0.0048); 2 weights are ~= 1. The remaining 18 ones are summarized as Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1215 0.8757 0.9428 0.8721 0.9797 0.9978 Algorithmic parameters: tuning.chi bb tuning.psi refine.tol 1.548e+00 5.000e-01 4.685e+00 1.000e-07 rel.tol scale.tol solve.tol zero.tol 1.000e-07 1.000e-10 1.000e-07 1.000e-10 eps.outlier eps.x warn.limit.reject warn.limit.meanrw 4.762e-03 1.692e-10 5.000e-01 5.000e-01 nResample max.it best.r.s k.fast.s k.max 500 50 2 1 200 maxit.scale trace.lev mts compute.rd fast.s.large.n 200 0 1000 0 2000 psi subsampling cov "bisquare" "nonsingular" ".vcov.avar1" compute.outlier.stats "S" seed : int(0) > > set.seed(0) > m2 <- update(m1, method = "SM") > > all.equal(m1[c('coefficients', 'scale', 'cov')], + m2[c('coefficients', 'scale', 'cov')]) [1] TRUE > > set.seed(0) > m3 <- update(m0, method = "SM", cov = '.vcov.w') > > ## SMD > set.seed(0) > summary(m4 <- lmrob(stack.loss ~ ., data = stackloss, method = "SMD", psi = 'bisquare', + compute.outlier.stats = "S")) Call: lmrob(formula = stack.loss ~ ., data = stackloss, method = "SMD", psi = "bisquare", compute.outlier.stats = "S") \--> method = "SMD" Residuals: Min 1Q Median 3Q Max -10.50974 -1.43819 -0.09134 1.02503 7.23113 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -41.5246 8.9525 -4.638 0.000235 *** Air.Flow 0.9388 0.1175 7.990 3.71e-07 *** Water.Temp 0.5796 0.3199 1.812 0.087756 . Acid.Conc. -0.1129 0.1176 -0.960 0.350512 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robust residual standard error: 2.651 Multiple R-squared: 0.9593, Adjusted R-squared: 0.9521 Convergence in 17 IRWLS iterations Robustness weights: observation 21 is an outlier with |weight| = 0 ( < 0.0048); 2 weights are ~= 1. The remaining 18 ones are summarized as Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1215 0.8757 0.9428 0.8721 0.9797 0.9978 Algorithmic parameters: tuning.chi bb tuning.psi refine.tol 1.548e+00 5.000e-01 4.685e+00 1.000e-07 rel.tol scale.tol solve.tol zero.tol 1.000e-07 1.000e-10 1.000e-07 1.000e-10 eps.outlier eps.x warn.limit.reject warn.limit.meanrw 4.762e-03 1.692e-10 5.000e-01 5.000e-01 nResample max.it best.r.s k.fast.s k.max 500 50 2 1 200 maxit.scale trace.lev mts compute.rd numpoints 200 0 1000 0 10 fast.s.large.n 2000 psi subsampling cov "bisquare" "nonsingular" ".vcov.w" compute.outlier.stats "S" seed : int(0) > summary(m4a <- lmrob..D..fit(m3)) Call: lmrob(formula = stack.loss ~ ., data = stackloss, method = "SMD", compute.outlier.stats = "S", cov = ".vcov.w") \--> method = "MM" Residuals: Min 1Q Median 3Q Max -10.50974 -1.43819 -0.09134 1.02503 7.23113 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -41.5246 9.3676 -4.433 0.000365 *** Air.Flow 0.9388 0.1230 7.636 6.84e-07 *** Water.Temp 0.5796 0.3348 1.731 0.101505 Acid.Conc. -0.1129 0.1231 -0.917 0.371736 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robust residual standard error: 2.651 Multiple R-squared: 0.9593, Adjusted R-squared: 0.9521 Convergence in 17 IRWLS iterations Robustness weights: observation 21 is an outlier with |weight| = 0 ( < 0.0048); 2 weights are ~= 1. The remaining 18 ones are summarized as Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1215 0.8757 0.9428 0.8721 0.9797 0.9978 Algorithmic parameters: tuning.chi bb tuning.psi refine.tol 1.548e+00 5.000e-01 4.685e+00 1.000e-07 rel.tol scale.tol solve.tol zero.tol 1.000e-07 1.000e-10 1.000e-07 1.000e-10 eps.outlier eps.x warn.limit.reject warn.limit.meanrw 4.762e-03 1.692e-10 5.000e-01 5.000e-01 nResample max.it best.r.s k.fast.s k.max 500 50 2 1 200 maxit.scale trace.lev mts compute.rd fast.s.large.n 200 0 1000 0 2000 psi subsampling cov "bisquare" "nonsingular" ".vcov.w" compute.outlier.stats "S" seed : int(0) > > ## rearrange m4a and update call > m4a <- m4a[names(m4)] > class(m4a) <- class(m4) > m4a$call <- m4$call > > all.equal(m4, m4a, check.environment = FALSE) [1] "Component \"control\": Component \"method\": 1 string mismatch" [2] "Component \"init\": Component \"control\": Component \"method\": 1 string mismatch" [3] "Component \"cov\": Attributes: < Component \"corrfact\": Mean relative difference: 0.1167673 >" [4] "Component \"cov\": Attributes: < Component \"scorr\": Mean relative difference: 0.01959345 >" [5] "Component \"cov\": Mean relative difference: 0.09488594" > > ## SMDM > set.seed(0) > summary(m5 <- lmrob(stack.loss ~ ., data = stackloss, method = "SMDM", psi = 'bisquare', + compute.outlier.stats = "S")) Call: lmrob(formula = stack.loss ~ ., data = stackloss, method = "SMDM", psi = "bisquare", compute.outlier.stats = "S") \--> method = "SMDM" Residuals: Min 1Q Median 3Q Max -9.6746 -1.7721 0.1346 1.2041 6.6080 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -41.9398 9.7719 -4.292 0.000494 *** Air.Flow 0.8747 0.1231 7.107 1.76e-06 *** Water.Temp 0.8099 0.3363 2.408 0.027656 * Acid.Conc. -0.1188 0.1284 -0.926 0.367655 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robust residual standard error: 2.651 Multiple R-squared: 0.9384, Adjusted R-squared: 0.9275 Convergence in 17 IRWLS iterations Robustness weights: 2 weights are ~= 1. The remaining 19 ones are summarized as Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1546 0.9139 0.9597 0.8874 0.9866 0.9966 Algorithmic parameters: tuning.chi bb tuning.psi refine.tol 1.548e+00 5.000e-01 4.685e+00 1.000e-07 rel.tol scale.tol solve.tol zero.tol 1.000e-07 1.000e-10 1.000e-07 1.000e-10 eps.outlier eps.x warn.limit.reject warn.limit.meanrw 4.762e-03 1.692e-10 5.000e-01 5.000e-01 nResample max.it best.r.s k.fast.s k.max 500 50 2 1 200 maxit.scale trace.lev mts compute.rd numpoints 200 0 1000 0 10 fast.s.large.n 2000 psi subsampling cov "bisquare" "nonsingular" ".vcov.w" compute.outlier.stats "S" seed : int(0) > summary(m5a <- lmrob..M..fit(obj=m4)) Call: lmrob(formula = stack.loss ~ ., data = stackloss, method = "SMDM", psi = "bisquare", compute.outlier.stats = "S") \--> method = "SMD" Residuals: Min 1Q Median 3Q Max -9.6746 -1.7721 0.1346 1.2041 6.6080 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -41.9398 9.7719 -4.292 0.000494 *** Air.Flow 0.8747 0.1231 7.107 1.76e-06 *** Water.Temp 0.8099 0.3363 2.408 0.027656 * Acid.Conc. -0.1188 0.1284 -0.926 0.367655 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robust residual standard error: 2.651 Multiple R-squared: 0.9384, Adjusted R-squared: 0.9275 Convergence in 17 IRWLS iterations Robustness weights: 2 weights are ~= 1. The remaining 19 ones are summarized as Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1546 0.9139 0.9597 0.8874 0.9866 0.9966 Algorithmic parameters: tuning.chi bb tuning.psi refine.tol 1.548e+00 5.000e-01 4.685e+00 1.000e-07 rel.tol scale.tol solve.tol zero.tol 1.000e-07 1.000e-10 1.000e-07 1.000e-10 eps.outlier eps.x warn.limit.reject warn.limit.meanrw 4.762e-03 1.692e-10 5.000e-01 5.000e-01 nResample max.it best.r.s k.fast.s k.max 500 50 2 1 200 maxit.scale trace.lev mts compute.rd numpoints 200 0 1000 0 10 fast.s.large.n 2000 psi subsampling cov "bisquare" "nonsingular" ".vcov.w" compute.outlier.stats "S" seed : int(0) > > ## rearrange m5a > m5a <- m5a[names(m5)] > class(m5a) <- class(m5) > > all.equal(m5, m5a, check.environment = FALSE) #-> 3 string mismatch [1] "Component \"control\": Component \"method\": 1 string mismatch" [2] "Component \"init\": Component \"control\": Component \"method\": 1 string mismatch" [3] "Component \"init\": Component \"init\": Component \"control\": Component \"method\": 1 string mismatch" > > ## Fast S large n strategy (sped up) > model <- model.frame(LNOx ~ . ,data = NOxEmissions) > control <- lmrob.control(fast.s.large.n = 10, n.group = 341, groups = 2) > set.seed(0) > try(ret <- lmrob.S(model.matrix(model, NOxEmissions)[1:682,], NOxEmissions$LNOx[1:682], control)) Error in lmrob.S(model.matrix(model, NOxEmissions)[1:682, ], NOxEmissions$LNOx[1:682], : Fast S large n strategy failed. Use control parameter 'fast.s.large.n = Inf'. In addition: Warning message: In lmrob.S(model.matrix(model, NOxEmissions)[1:682, ], NOxEmissions$LNOx[1:682], : 'control$n.group' is not much larger than 'p', probably too small > ## do what the error says > control <- lmrob.control(fast.s.large.n = Inf) > try(ret <- lmrob.S(model.matrix(model, NOxEmissions)[1:682,], NOxEmissions$LNOx[1:682], control)) Error in lmrob.S(model.matrix(model, NOxEmissions)[1:682, ], NOxEmissions$LNOx[1:682], : DGEEQU: column 30 of the design matrix is exactly zero. > ##-> Error ...... DGEEQU: column 30 of the design matrix is exactly zero. > ## > ## still fails, but this error is to be expected since only a part > ## of the design matrix is given > > proc.time() user system elapsed 0.50 0.09 0.57