R Under development (unstable) (2024-08-15 r87022 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## test handing of NA coefficients / singular fits > ## also check: > ## -- what would have to be done if class "lm" was added. > ## -- general compatibility to class lm. > require(robustbase) Loading required package: robustbase > options(digits = 5)# -> higher chance of platform independence > > ## generate simple example data (almost as in ./weights.R ) > data <- expand.grid(x1=letters[1:3], x2=LETTERS[1:3], rep=1:3) > set.seed(1) > data$y <- rnorm(nrow(data)) > ## drop all combinations of one interaction: > data <- subset(data, x1 != 'c' | (x2 != 'B' & x2 != 'C')) > ## add collinear variables > data$x3 <- rnorm(nrow(data)) > data$x4 <- rnorm(nrow(data)) > data$x5 <- data$x3 + data$x4 > ## add some NA terms > data$y[1] <- NA > data$x4[2:3] <- NA ## to test anova > > ## Classical models start with 'cm', robust just with 'rm' (or just 'm'): > cm0 <- lm (y ~ x1*x2 + x3, data) > cm1 <- lm (y ~ x1*x2 + x3 + x4 + x5, data) > set.seed(2) > rm1 <- lmrob(y ~ x1*x2 + x3 + x4 + x5, data) > m3 <- lmrob(y ~ x1*x2 + x3 + x4, data) # same column space as rm1 > rm0 <- lmrob(y ~ x1*x2 + x3, data) > > ## clean version of rm1 (to check predict) > data2 <- data.frame(y=data$y[-(1:3)], rm1$x[,!is.na(rm1$coef)]) > set.seed(2) > rm1c <- lmrob(y ~ x1b + x1c + x2B + x2C + x3 + x4 + x1b:x2B + x1b:x2C, data2) > > ## add class lm to rm1 (for now) > class(rm1) <- c(class(rm1), "lm") > class(rm0) <- c(class(rm0), "lm") > > ## the full matrix (data) should be returned by model matrix (frame) > stopifnot(all.equal(model.matrix(cm1), model.matrix(rm1)), + all.equal(model.frame (cm1), model.frame (rm1))) > ## qr decomposition should be for the full data and pivots identical lm result > qr.cm1 <- qr(cm1)$qr > qr.rm1 <- rm1$qr$qr > stopifnot(NCOL(qr.rm1) == NCOL(qr.cm1), + NROW(qr.rm1) == NROW(qr.cm1), + length(rm1$qr$qraux) == length(qr(cm1)$qraux), + all.equal(rm1$qr$pivot, qr(cm1)$pivot), + all.equal(dimnames(qr.rm1),dimnames(qr.cm1))) > ## the alias function should return the same result > stopifnot(all.equal(alias(cm1), alias(rm1))) > > #### > ## these helper functions should print NAs for the dropped coefficients > print(rm1) Call: lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data) \--> method = "MM" Coefficients: (Intercept) x1b x1c x2B x2C x3 0.4381 0.5968 0.0344 0.2012 0.1789 -0.1320 x4 x5 x1b:x2B x1c:x2B x1b:x2C x1c:x2C -0.2155 NA -1.8763 NA -0.8651 NA > summary(rm1) -> s1 > confint(rm1) -> ci1 > stopifnot(identical(is.na(coef(cm1)), apply(ci1, 1L, anyNA)), + identical(sigma(rm1), s1$ sigma), + identical(vcov(rm1, complete=FALSE), s1$ cov ), + TRUE) > > print(s1, showAlgo=FALSE) Call: lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data) \--> method = "MM" Residuals: Min 1Q Median 3Q Max -1.4584 -0.3556 0.0246 0.3651 1.0296 Coefficients: (3 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) (Intercept) 0.4381 0.5443 0.80 0.44 x1b 0.5968 0.6423 0.93 0.38 x1c 0.0344 0.6880 0.05 0.96 x2B 0.2012 0.7164 0.28 0.79 x2C 0.1789 0.6871 0.26 0.80 x3 -0.1320 0.4155 -0.32 0.76 x4 -0.2155 0.1694 -1.27 0.24 x5 NA NA NA NA x1b:x2B -1.8763 1.2153 -1.54 0.16 x1c:x2B NA NA NA NA x1b:x2C -0.8651 0.7466 -1.16 0.28 x1c:x2C NA NA NA NA Robust residual standard error: 0.927 (3 observations deleted due to missingness) Multiple R-squared: 0.338, Adjusted R-squared: -0.251 Convergence in 15 IRWLS iterations Robustness weights: 2 weights are ~= 1. The remaining 16 ones are summarized as Min. 1st Qu. Median Mean 3rd Qu. Max. 0.787 0.937 0.985 0.952 0.988 0.994 > ci1 2.5 % 97.5 % (Intercept) -0.79333 1.66946 x1b -0.85607 2.04973 x1c -1.52188 1.59076 x2B -1.41948 1.82189 x2C -1.37549 1.73320 x3 -1.07182 0.80783 x4 -0.59863 0.16756 x5 NA NA x1b:x2B -4.62539 0.87283 x1c:x2B NA NA x1b:x2C -2.55391 0.82381 x1c:x2C NA NA > ## drop1 should return df = 0 > #drop1(rm1) ## drop.lm does not return valid results (yet)! > > #### > ## methods that should just drop the NA coefficients > ## m3 is actually the same as rm1, so anova should raise an error > tools::assertError(anova(rm1, m3, test="Wald")) > tools::assertError(anova(rm1, m3, test="Deviance")) > ## but comparing rm1 and rm0 should be ok > anova(rm1, rm0, test="Wald") Robust Wald Test Table Model 1: y ~ x1 * x2 + x3 + x4 + x5 Model 2: y ~ x1 * x2 + x3 Largest model fitted by lmrob(), i.e. SM pseudoDf Test.Stat Df Pr(>chisq) 1 6 2 10 1.62 1 0.2 > anova(rm1, rm0, test="Deviance") Robust Deviance Table Model 1: y ~ x1 * x2 + x3 + x4 + x5 Model 2: y ~ x1 * x2 + x3 Largest model fitted by lmrob(), i.e. SM pseudoDf Test.Stat Df Pr(>chisq) 1 6 2 10 1.4 1 0.24 > ## commands with single #: > ## they do (or might) not return sensible results for robust fits > ## and need to be checked again > #cooks.distance(rm1) > #deviance(rm1) > #dfbeta(rm1) > #dfbetas(rm1) > #effects(rm1) ## fails > #extractAIC(rm1) > #influence(rm1) > stopifnot(all.equal(hv1 <- hatvalues(rm1), .lmrob.hat(wqr=rm1$qr), tol=1e-15), + all.equal(hv1, stats:::hatvalues.lm(rm1), tol=1e-15), + all.equal(hat(cm1$qr), unname(hatvalues(cm1)), tol=1e-15), + all.equal(unname(hv1), hat(rm1$qr), tol=1e-15), + ## ditto : + all.equal(hv1c <- hatvalues(rm1c), stats:::hatvalues.lm(rm1c), tol=1e-15)) > > ## kappa() & labels() : > stopifnot(is.infinite(kr1 <- kappa(rm1)), kr1 == kappa(cm1), # = +Inf both + identical(labels(rm1), labels(cm1))) > logLik(rm1)# well, and what does it mean? 'log Lik.' -17.67 (df=10) > ## plot(rm1, which=1) ## plot.lmrob() fails "singular covariance" .. FIXME! > par(mfrow=c(2,2)) > plot(rm1, which=2:4) > stopifnot(all.equal(predict(rm1), predict(rm1c), tol=1e-15), + all.equal(predict(rm1, se.fit=TRUE, interval="confidence"), + predict(rm1c, se.fit=TRUE, interval="confidence"), tol=4e-15)) # seen 1.3e-15 (ATLAS) > predict(rm1, type="terms", se.fit=TRUE, interval="confidence") $fit x1 x2 x3 x4 x5 x1:x2 4 -0.26908 0.074520 -0.166290 0.17233795 0 0.45689 5 0.32774 0.074520 0.026620 -0.03309916 0 -1.41939 7 -0.26908 0.052168 -0.038119 0.28384254 0 0.45689 8 0.32774 0.052168 0.020155 -0.26844357 0 -0.40816 10 -0.26908 -0.126688 0.194821 -0.38642275 0 0.45689 11 0.32774 -0.126688 0.067831 0.11957373 0 0.45689 12 -0.23465 -0.126688 0.065098 0.26547275 0 0.45689 13 -0.26908 0.074520 0.020882 -0.08237063 0 0.45689 14 0.32774 0.074520 -0.132148 0.06953345 0 -1.41939 16 -0.26908 0.052168 -0.087685 -0.47721028 0 0.45689 17 0.32774 0.052168 0.034769 0.04888197 0 -0.40816 19 -0.26908 -0.126688 0.046496 -0.10823918 0 0.45689 20 0.32774 -0.126688 -0.078945 0.03438888 0 0.45689 21 -0.23465 -0.126688 -0.060426 0.20062634 0 0.45689 22 -0.26908 0.074520 0.103967 -0.00026715 0 0.45689 23 0.32774 0.074520 0.106440 0.42945756 0 -1.41939 25 -0.26908 0.052168 -0.035072 -0.27545520 0 0.45689 26 0.32774 0.052168 -0.088392 0.00739277 0 -0.40816 attr(,"constant") [1] 0.32347 $se.fit x1 x2 x3 x4 x5 x1:x2 4 0.35192 0.42010 0.523390 0.13540939 0 0.29013 5 0.29582 0.42010 0.083786 0.02600668 0 0.95012 7 0.35192 0.40345 0.119979 0.22302078 0 0.29013 8 0.29582 0.40345 0.063436 0.21092151 0 0.53827 10 0.35192 0.40191 0.613190 0.30362011 0 0.29013 11 0.29582 0.40191 0.213494 0.09395148 0 0.29013 12 0.40411 0.40191 0.204892 0.20858727 0 0.29013 13 0.35192 0.42010 0.065724 0.06472026 0 0.29013 14 0.29582 0.42010 0.415930 0.05463383 0 0.95012 16 0.35192 0.40345 0.275984 0.37495370 0 0.29013 17 0.29582 0.40345 0.109434 0.03840755 0 0.53827 19 0.35192 0.40191 0.146343 0.08504570 0 0.29013 20 0.29582 0.40191 0.248476 0.02702003 0 0.29013 21 0.40411 0.40191 0.190187 0.15763614 0 0.29013 22 0.35192 0.42010 0.327230 0.00020991 0 0.29013 23 0.29582 0.42010 0.335015 0.33743343 0 0.95012 25 0.35192 0.40345 0.110386 0.21643068 0 0.29013 26 0.29582 0.40345 0.278210 0.00580864 0 0.53827 $lwr x1 x2 x3 x4 x5 x1:x2 4 -1.06517 -0.87582 -1.35028 -0.1339794 0 -0.19943 5 -0.34144 -0.87582 -0.16292 -0.0919303 0 -3.56872 7 -1.06517 -0.86049 -0.30953 -0.2206655 0 -0.19943 8 -0.34144 -0.86049 -0.12335 -0.7455812 0 -1.62581 10 -1.06517 -1.03588 -1.19231 -1.0732591 0 -0.19943 11 -0.34144 -1.03588 -0.41513 -0.0929593 0 -0.19943 12 -1.14880 -1.03588 -0.39840 -0.2063844 0 -0.19943 13 -1.06517 -0.87582 -0.12780 -0.2287780 0 -0.19943 14 -0.34144 -0.87582 -1.07305 -0.0540569 0 -3.56872 16 -1.06517 -0.86049 -0.71200 -1.3254145 0 -0.19943 17 -0.34144 -0.86049 -0.21279 -0.0380019 0 -1.62581 19 -1.06517 -1.03588 -0.28455 -0.3006259 0 -0.19943 20 -0.34144 -1.03588 -0.64104 -0.0267347 0 -0.19943 21 -1.14880 -1.03588 -0.49066 -0.1559714 0 -0.19943 22 -1.06517 -0.87582 -0.63628 -0.0007420 0 -0.19943 23 -0.34144 -0.87582 -0.65142 -0.3338699 0 -3.56872 25 -1.06517 -0.86049 -0.28478 -0.7650554 0 -0.19943 26 -0.34144 -0.86049 -0.71775 -0.0057473 0 -1.62581 attr(,"constant") [1] 0.32347 $upr x1 x2 x3 x4 x5 x1:x2 4 0.52701 1.02486 1.01770 0.47865527 0 1.11321 5 0.99693 1.02486 0.21616 0.02573203 0 0.72993 7 0.52701 0.96483 0.23329 0.78835059 0 1.11321 8 0.99693 0.96483 0.16366 0.20869402 0 0.80949 10 0.52701 0.78250 1.58195 0.30041366 0 1.11321 11 0.99693 0.78250 0.55079 0.33210673 0 1.11321 12 0.67950 0.78250 0.52860 0.73732993 0 1.11321 13 0.52701 1.02486 0.16956 0.06403677 0 1.11321 14 0.99693 1.02486 0.80875 0.19312376 0 0.72993 16 0.52701 0.96483 0.53663 0.37099391 0 1.11321 17 0.99693 0.96483 0.28233 0.13576588 0 0.80949 19 0.52701 0.78250 0.37755 0.08414755 0 1.11321 20 0.99693 0.78250 0.48315 0.09551244 0 1.11321 21 0.67950 0.78250 0.36981 0.55722407 0 1.11321 22 0.52701 1.02486 0.84421 0.00020769 0 1.11321 23 0.99693 1.02486 0.86430 1.19278501 0 0.72993 25 0.52701 0.96483 0.21464 0.21414502 0 1.11321 26 0.99693 0.96483 0.54096 0.02053283 0 0.80949 attr(,"constant") [1] 0.32347 $df [1] 9 $residual.scale [1] 0.92726 > #proj(rm1) ## fails "FIXME" > residuals(rm1) 4 5 7 8 10 11 12 13 1.003436 1.029645 -0.321738 0.691394 -0.498376 0.342960 -0.359752 -1.145548 14 16 17 19 20 21 22 23 -1.458427 -0.043483 -0.395061 0.498376 -0.342960 0.359752 0.092640 0.232325 25 26 0.366908 -0.270349 > #rstandard(rm1) > #rstudent(rm1) > #simulate(rm1) ## just $weights needs to be changed to prior weights > V1 <- vcov(rm1, complete=FALSE) > ## but don't show the "eigen" part {vectors may flip sign}: > attributes(V1) <- attributes(V1)[c("dim","dimnames", "weights")]; V1 (Intercept) x1b x1c x2B x2C x3 (Intercept) 0.296312 -0.321429 -0.338842 -0.238010 -0.3289125 0.1357438 x1b -0.321429 0.412501 0.369763 0.253038 0.3616767 -0.1594475 x1c -0.338842 0.369763 0.473317 0.274811 0.3497592 -0.1464335 x2B -0.238010 0.253038 0.274811 0.513277 0.2342086 -0.0640599 x2C -0.328913 0.361677 0.349759 0.234209 0.4721185 -0.2294044 x3 0.135744 -0.159448 -0.146434 -0.064060 -0.2294044 0.1726038 x4 -0.035258 0.039598 0.060587 0.035359 0.0024864 0.0037187 x1b:x2B 0.305162 -0.399754 -0.273260 -0.557840 -0.5219539 0.3087350 x1b:x2C 0.321423 -0.414159 -0.349092 -0.233097 -0.4394078 0.2039253 x4 x1b:x2B x1b:x2C (Intercept) -0.0352579 0.305162 0.321423 x1b 0.0395980 -0.399754 -0.414159 x1c 0.0605871 -0.273260 -0.349092 x2B 0.0353593 -0.557840 -0.233097 x2C 0.0024864 -0.521954 -0.439408 x3 0.0037187 0.308735 0.203925 x4 0.0286797 0.063743 -0.012435 x1b:x2B 0.0637434 1.476860 0.498060 x1b:x2C -0.0124347 0.498060 0.557368 attr(,"weights") 4 5 7 8 10 11 12 13 14 16 0.89614 0.89081 0.98906 0.94998 0.97385 0.98757 0.98633 0.86577 0.78729 0.99980 17 19 20 21 22 23 25 26 0.98353 0.97385 0.98757 0.98633 0.99909 0.99429 0.98578 0.99227 > set.seed(12); sc <- simulate(cm1, 64) > set.seed(12); rc <- simulate(rm1, 64) > > stopifnot(all.equal(sqrt(diag(V1)), coef(summary(rm1))[,"Std. Error"], tol=1e-15), + all.equal(sc, rc, tolerance = 0.08),# dimension *and* approx. values (no NA) + identical(variable.names(rm1), variable.names(cm1)), + all.equal(residuals(rm1), residuals(cm1), tolerance = 0.05),# incl. names + all.equal(rstudent (rm1), rstudent (cm1), tolerance = 0.06), + identical(dimnames(rm1), dimnames(cm1)), + all.equal(dummy.coef(rm1), dummy.coef(cm1), tolerance= .5)) ## check mostly structure > > ## other helper functions > stopifnot(identical(case.names(rm1), case.names(cm1)), + all.equal(family(rm1), family(cm1)),# identical() upto environment + identical(formula(rm1), formula(cm1)), + nobs(rm1) == nobs(cm1)) > #add1(rm0, ~ . + x3 + x4 + x5) ## does not return valid results (yet)! > > > ## test other initial estimators > lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init="M-S") Call: lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data, init = "M-S") \--> method = "M-SM" Coefficients: (Intercept) x1b x1c x2B x2C x3 0.4358 0.5996 0.0346 0.2005 0.1877 -0.1395 x4 x5 x1b:x2B x1c:x2B x1b:x2C x1c:x2C -0.2185 NA -1.8957 NA -0.8698 NA Warning message: In lmrob.M.S(x, y, control, mf = mf) : Skipping design matrix equilibration (DGEEQU): row 12 is exactly zero. > lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init=lmrob.lar) Call: lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data, init = lmrob.lar) \--> method = "lM" Coefficients: (Intercept) x1b x1c x2B x2C x3 0.561131 0.444339 0.000184 0.530303 -0.251794 0.236541 x4 x5 x1b:x2B x1c:x2B x1b:x2C x1c:x2C -0.082680 NA -1.298418 NA -0.597602 NA > > ## test all zero design matrix > data <- data.frame(y=1:10,x1=0,x2=0,os=2,w=c(0.5, 1)) > (m5 <- lmrob(y ~ 1+x1+x2+offset(os), data, weights=w)) Call: lmrob(formula = y ~ 1 + x1 + x2 + offset(os), data = data, weights = w) \--> method = "MM" Coefficients: (Intercept) x1 x2 3.64 NA NA > (sm5 <- summary(m5)) Call: lmrob(formula = y ~ 1 + x1 + x2 + offset(os), data = data, weights = w) \--> method = "MM" Weighted Residuals: Min 1Q Median 3Q Max -3.6412 -1.8110 -0.0473 2.0093 4.3588 Coefficients: (2 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) (Intercept) 3.64 1.03 3.53 0.0064 ** x1 NA NA NA NA x2 NA NA NA NA --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robust residual standard error: 3.24 Convergence in 8 IRWLS iterations Robustness weights: 1 2 3 4 5 6 7 8 9 10 0.909 0.889 0.970 0.977 0.998 0.999 0.992 0.952 0.952 0.842 Algorithmic parameters: tuning.chi bb tuning.psi refine.tol 1.55e+00 5.00e-01 4.69e+00 1.00e-07 rel.tol scale.tol solve.tol zero.tol 1.00e-07 1.00e-10 1.00e-07 1.00e-10 eps.outlier eps.x warn.limit.reject warn.limit.meanrw 1.00e-02 1.82e-12 5.00e-01 5.00e-01 nResample max.it best.r.s k.fast.s k.max 500 50 2 1 200 maxit.scale trace.lev mts compute.rd fast.s.large.n 200 0 1000 0 2000 psi subsampling cov "bisquare" "nonsingular" ".vcov.avar1" compute.outlier.stats "SM" seed : int(0) > (m6 <- lmrob(y ~ 0+x1+x2+offset(os), data, weights=w)) Call: lmrob(formula = y ~ 0 + x1 + x2 + offset(os), data = data, weights = w) \--> method = "MM" Coefficients: x1 x2 NA NA > (sm6 <- summary(m6)) Call: lmrob(formula = y ~ 0 + x1 + x2 + offset(os), data = data, weights = w) \--> method = "MM" Weighted Residuals: Min 1Q Median 3Q Max -3.83 -1.37 1.09 3.54 6.00 Coefficients: (2 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) x1 NA NA NA NA x2 NA NA NA NA Robust residual standard error: NA Convergence in 0 IRWLS iterations Robustness weights: [1] NA NA NA NA NA NA NA NA NA NA Algorithmic parameters: tuning.psi rel.tol scale.tol solve.tol 4.69e+00 1.00e-07 1.00e-10 1.00e-07 zero.tol eps.outlier warn.limit.reject warn.limit.meanrw 1.00e-10 1.00e-02 5.00e-01 5.00e-01 max.it maxit.scale trace.lev compute.rd fast.s.large.n 50 200 0 0 2000 eps.x 0 psi cov compute.outlier.stats "bisquare" ".vcov.avar1" "SM" seed : int(0) > > sc5 <- summary(cm5 <- lm(y ~ 1+x1+x2+offset(os), data, weights=w)) > sc6 <- summary(cm6 <- lm(y ~ 0+x1+x2+offset(os), data, weights=w)) > > if(getRversion() <= "3.5.1" && as.numeric(R.version$`svn rev`) < 74993) + ## in the past, lm() returned logical empty matrix + storage.mode(sc6$coefficients) <- "double" > > stopifnot(all.equal(coef(m5), coef(cm5), tolerance = 0.01), + all.equal(coef(m6), coef(cm6), tolerance = 1e-14), + all.equal(coef(sm5), coef(sc5), tolerance = 0.05), + all.equal(coef(sm6), coef(sc6), tolerance = 1e-14), + identical(sm5$df, sc5$df), + identical(sm6$df, sc6$df)) > > proc.time() user system elapsed 0.29 0.07 0.35