R Under development (unstable) (2024-06-18 r86781 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> ## test handing of NA coefficients / singular fits
> ## also check:
> ## -- what would have to be done if class "lm" was added.
> ## -- general compatibility to class lm.
> require(robustbase)
Loading required package: robustbase
> options(digits = 5)# -> higher chance of platform independence
> 
> ## generate simple example data (almost as in ./weights.R )
> data <- expand.grid(x1=letters[1:3], x2=LETTERS[1:3], rep=1:3)
> set.seed(1)
> data$y <- rnorm(nrow(data))
> ## drop all combinations of one interaction:
> data <- subset(data, x1 != 'c' | (x2 != 'B' & x2 != 'C'))
> ## add collinear variables
> data$x3 <- rnorm(nrow(data))
> data$x4 <- rnorm(nrow(data))
> data$x5 <- data$x3 + data$x4
> ## add some NA terms
> data$y[1] <- NA
> data$x4[2:3] <- NA ## to test anova
> 
> ## Classical models start with 'cm', robust just with  'rm' (or just 'm'):
> cm0 <- lm   (y ~ x1*x2 + x3,	       data)
> cm1 <- lm   (y ~ x1*x2 + x3 + x4 + x5, data)
> set.seed(2)
> rm1 <- lmrob(y ~ x1*x2 + x3 + x4 + x5, data)
> m3  <- lmrob(y ~ x1*x2 + x3 + x4,      data) # same column space as rm1
> rm0 <- lmrob(y ~ x1*x2 + x3,	       data)
> 
> ## clean version of rm1 (to check predict)
> data2 <- data.frame(y=data$y[-(1:3)], rm1$x[,!is.na(rm1$coef)])
> set.seed(2)
> rm1c <- lmrob(y ~ x1b + x1c + x2B + x2C + x3 + x4 + x1b:x2B + x1b:x2C, data2)
> 
> ## add class lm to rm1 (for now)
> class(rm1) <- c(class(rm1), "lm")
> class(rm0) <- c(class(rm0), "lm")
> 
> ## the full matrix (data) should be returned by model matrix (frame)
> stopifnot(all.equal(model.matrix(cm1), model.matrix(rm1)),
+           all.equal(model.frame (cm1), model.frame (rm1)))
> ## qr decomposition should be for the full data and pivots identical lm result
> qr.cm1 <- qr(cm1)$qr
> qr.rm1 <- rm1$qr$qr
> stopifnot(NCOL(qr.rm1) == NCOL(qr.cm1),
+           NROW(qr.rm1) == NROW(qr.cm1),
+           length(rm1$qr$qraux) == length(qr(cm1)$qraux),
+           all.equal(rm1$qr$pivot, qr(cm1)$pivot),
+           all.equal(dimnames(qr.rm1),dimnames(qr.cm1)))
> ## the alias function should return the same result
> stopifnot(all.equal(alias(cm1), alias(rm1)))
> 
> ####
> ## these helper functions should print NAs for the dropped coefficients
>   print(rm1)

Call:
lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data)
 \--> method = "MM"
Coefficients:
(Intercept)          x1b          x1c          x2B          x2C           x3  
     0.4381       0.5968       0.0344       0.2012       0.1789      -0.1320  
         x4           x5      x1b:x2B      x1c:x2B      x1b:x2C      x1c:x2C  
    -0.2155           NA      -1.8763           NA      -0.8651           NA  

> summary(rm1) -> s1
> confint(rm1) -> ci1
> stopifnot(identical(is.na(coef(cm1)), apply(ci1, 1L, anyNA)),
+ 	  identical(sigma(rm1),                 s1$ sigma),
+ 	  identical(vcov(rm1, complete=FALSE),  s1$ cov  ),
+ 	  TRUE)
> 
> print(s1, showAlgo=FALSE)

Call:
lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data)
 \--> method = "MM"
Residuals:
    Min      1Q  Median      3Q     Max 
-1.4584 -0.3556  0.0246  0.3651  1.0296 

Coefficients: (3 not defined because of singularities)
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   0.4381     0.5443    0.80     0.44
x1b           0.5968     0.6423    0.93     0.38
x1c           0.0344     0.6880    0.05     0.96
x2B           0.2012     0.7164    0.28     0.79
x2C           0.1789     0.6871    0.26     0.80
x3           -0.1320     0.4155   -0.32     0.76
x4           -0.2155     0.1694   -1.27     0.24
x5                NA         NA      NA       NA
x1b:x2B      -1.8763     1.2153   -1.54     0.16
x1c:x2B           NA         NA      NA       NA
x1b:x2C      -0.8651     0.7466   -1.16     0.28
x1c:x2C           NA         NA      NA       NA

Robust residual standard error: 0.927 
  (3 observations deleted due to missingness)
Multiple R-squared:  0.338,	Adjusted R-squared:  -0.251 
Convergence in 15 IRWLS iterations

Robustness weights: 
 2 weights are ~= 1. The remaining 16 ones are summarized as
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.787   0.937   0.985   0.952   0.988   0.994 
> ci1
               2.5 %  97.5 %
(Intercept) -0.79333 1.66946
x1b         -0.85607 2.04973
x1c         -1.52188 1.59076
x2B         -1.41948 1.82189
x2C         -1.37549 1.73320
x3          -1.07182 0.80783
x4          -0.59863 0.16756
x5                NA      NA
x1b:x2B     -4.62539 0.87283
x1c:x2B           NA      NA
x1b:x2C     -2.55391 0.82381
x1c:x2C           NA      NA
> ## drop1 should return df = 0
> #drop1(rm1) ## drop.lm does not return valid results (yet)!
> 
> ####
> ## methods that should just drop the NA coefficients
> ## m3 is actually the same as rm1, so anova should raise an error
> tools::assertError(anova(rm1, m3, test="Wald"))
> tools::assertError(anova(rm1, m3, test="Deviance"))
> ## but comparing rm1 and rm0 should be ok
> anova(rm1, rm0, test="Wald")
Robust Wald Test Table

Model 1: y ~ x1 * x2 + x3 + x4 + x5
Model 2: y ~ x1 * x2 + x3
Largest model fitted by lmrob(), i.e. SM

  pseudoDf Test.Stat Df Pr(>chisq)
1        6                        
2       10      1.62  1        0.2
> anova(rm1, rm0, test="Deviance")
Robust Deviance Table

Model 1: y ~ x1 * x2 + x3 + x4 + x5
Model 2: y ~ x1 * x2 + x3
Largest model fitted by lmrob(), i.e. SM

  pseudoDf Test.Stat Df Pr(>chisq)
1        6                        
2       10       1.4  1       0.24
> ## commands with single #:
> ## they do (or might) not return sensible results for robust fits
> ## and need to be checked again
> #cooks.distance(rm1)
> #deviance(rm1)
> #dfbeta(rm1)
> #dfbetas(rm1)
> #effects(rm1) ## fails
> #extractAIC(rm1)
> #influence(rm1)
> stopifnot(all.equal(hv1 <- hatvalues(rm1), .lmrob.hat(wqr=rm1$qr), tol=1e-15),
+           all.equal(hv1, stats:::hatvalues.lm(rm1), tol=1e-15),
+           all.equal(hat(cm1$qr), unname(hatvalues(cm1)), tol=1e-15),
+           all.equal(unname(hv1), hat(rm1$qr), tol=1e-15),
+           ## ditto :
+           all.equal(hv1c <- hatvalues(rm1c), stats:::hatvalues.lm(rm1c), tol=1e-15))
> 
> ## kappa() & labels() :
> stopifnot(is.infinite(kr1 <- kappa(rm1)), kr1 == kappa(cm1), # = +Inf both
+           identical(labels(rm1), labels(cm1)))
> logLik(rm1)# well, and what does it mean?
'log Lik.' -17.67 (df=10)
> ## plot(rm1, which=1) ## plot.lmrob() fails "singular covariance" .. FIXME!
> par(mfrow=c(2,2))
> plot(rm1, which=2:4)
> stopifnot(all.equal(predict(rm1), predict(rm1c), tol=1e-15),
+           all.equal(predict(rm1,  se.fit=TRUE, interval="confidence"),
+ 		    predict(rm1c, se.fit=TRUE, interval="confidence"), tol=4e-15)) # seen 1.3e-15 (ATLAS)
> predict(rm1, type="terms", se.fit=TRUE, interval="confidence")
$fit
         x1        x2        x3          x4 x5    x1:x2
4  -0.26908  0.074520 -0.166290  0.17233795  0  0.45689
5   0.32774  0.074520  0.026620 -0.03309916  0 -1.41939
7  -0.26908  0.052168 -0.038119  0.28384254  0  0.45689
8   0.32774  0.052168  0.020155 -0.26844357  0 -0.40816
10 -0.26908 -0.126688  0.194821 -0.38642275  0  0.45689
11  0.32774 -0.126688  0.067831  0.11957373  0  0.45689
12 -0.23465 -0.126688  0.065098  0.26547275  0  0.45689
13 -0.26908  0.074520  0.020882 -0.08237063  0  0.45689
14  0.32774  0.074520 -0.132148  0.06953345  0 -1.41939
16 -0.26908  0.052168 -0.087685 -0.47721028  0  0.45689
17  0.32774  0.052168  0.034769  0.04888197  0 -0.40816
19 -0.26908 -0.126688  0.046496 -0.10823918  0  0.45689
20  0.32774 -0.126688 -0.078945  0.03438888  0  0.45689
21 -0.23465 -0.126688 -0.060426  0.20062634  0  0.45689
22 -0.26908  0.074520  0.103967 -0.00026715  0  0.45689
23  0.32774  0.074520  0.106440  0.42945756  0 -1.41939
25 -0.26908  0.052168 -0.035072 -0.27545520  0  0.45689
26  0.32774  0.052168 -0.088392  0.00739277  0 -0.40816
attr(,"constant")
[1] 0.32347

$se.fit
        x1      x2       x3         x4 x5   x1:x2
4  0.35192 0.42010 0.523390 0.13540939  0 0.29013
5  0.29582 0.42010 0.083786 0.02600668  0 0.95012
7  0.35192 0.40345 0.119979 0.22302078  0 0.29013
8  0.29582 0.40345 0.063436 0.21092151  0 0.53827
10 0.35192 0.40191 0.613190 0.30362011  0 0.29013
11 0.29582 0.40191 0.213494 0.09395148  0 0.29013
12 0.40411 0.40191 0.204892 0.20858727  0 0.29013
13 0.35192 0.42010 0.065724 0.06472026  0 0.29013
14 0.29582 0.42010 0.415930 0.05463383  0 0.95012
16 0.35192 0.40345 0.275984 0.37495370  0 0.29013
17 0.29582 0.40345 0.109434 0.03840755  0 0.53827
19 0.35192 0.40191 0.146343 0.08504570  0 0.29013
20 0.29582 0.40191 0.248476 0.02702003  0 0.29013
21 0.40411 0.40191 0.190187 0.15763614  0 0.29013
22 0.35192 0.42010 0.327230 0.00020991  0 0.29013
23 0.29582 0.42010 0.335015 0.33743343  0 0.95012
25 0.35192 0.40345 0.110386 0.21643068  0 0.29013
26 0.29582 0.40345 0.278210 0.00580864  0 0.53827

$lwr
         x1       x2       x3         x4 x5    x1:x2
4  -1.06517 -0.87582 -1.35028 -0.1339794  0 -0.19943
5  -0.34144 -0.87582 -0.16292 -0.0919303  0 -3.56872
7  -1.06517 -0.86049 -0.30953 -0.2206655  0 -0.19943
8  -0.34144 -0.86049 -0.12335 -0.7455812  0 -1.62581
10 -1.06517 -1.03588 -1.19231 -1.0732591  0 -0.19943
11 -0.34144 -1.03588 -0.41513 -0.0929593  0 -0.19943
12 -1.14880 -1.03588 -0.39840 -0.2063844  0 -0.19943
13 -1.06517 -0.87582 -0.12780 -0.2287780  0 -0.19943
14 -0.34144 -0.87582 -1.07305 -0.0540569  0 -3.56872
16 -1.06517 -0.86049 -0.71200 -1.3254145  0 -0.19943
17 -0.34144 -0.86049 -0.21279 -0.0380019  0 -1.62581
19 -1.06517 -1.03588 -0.28455 -0.3006259  0 -0.19943
20 -0.34144 -1.03588 -0.64104 -0.0267347  0 -0.19943
21 -1.14880 -1.03588 -0.49066 -0.1559714  0 -0.19943
22 -1.06517 -0.87582 -0.63628 -0.0007420  0 -0.19943
23 -0.34144 -0.87582 -0.65142 -0.3338699  0 -3.56872
25 -1.06517 -0.86049 -0.28478 -0.7650554  0 -0.19943
26 -0.34144 -0.86049 -0.71775 -0.0057473  0 -1.62581
attr(,"constant")
[1] 0.32347

$upr
        x1      x2      x3         x4 x5   x1:x2
4  0.52701 1.02486 1.01770 0.47865527  0 1.11321
5  0.99693 1.02486 0.21616 0.02573203  0 0.72993
7  0.52701 0.96483 0.23329 0.78835059  0 1.11321
8  0.99693 0.96483 0.16366 0.20869402  0 0.80949
10 0.52701 0.78250 1.58195 0.30041366  0 1.11321
11 0.99693 0.78250 0.55079 0.33210673  0 1.11321
12 0.67950 0.78250 0.52860 0.73732993  0 1.11321
13 0.52701 1.02486 0.16956 0.06403677  0 1.11321
14 0.99693 1.02486 0.80875 0.19312376  0 0.72993
16 0.52701 0.96483 0.53663 0.37099391  0 1.11321
17 0.99693 0.96483 0.28233 0.13576588  0 0.80949
19 0.52701 0.78250 0.37755 0.08414755  0 1.11321
20 0.99693 0.78250 0.48315 0.09551244  0 1.11321
21 0.67950 0.78250 0.36981 0.55722407  0 1.11321
22 0.52701 1.02486 0.84421 0.00020769  0 1.11321
23 0.99693 1.02486 0.86430 1.19278501  0 0.72993
25 0.52701 0.96483 0.21464 0.21414502  0 1.11321
26 0.99693 0.96483 0.54096 0.02053283  0 0.80949
attr(,"constant")
[1] 0.32347

$df
[1] 9

$residual.scale
[1] 0.92726

> #proj(rm1) ## fails "FIXME"
> residuals(rm1)
        4         5         7         8        10        11        12        13 
 1.003436  1.029645 -0.321738  0.691394 -0.498376  0.342960 -0.359752 -1.145548 
       14        16        17        19        20        21        22        23 
-1.458427 -0.043483 -0.395061  0.498376 -0.342960  0.359752  0.092640  0.232325 
       25        26 
 0.366908 -0.270349 
> #rstandard(rm1)
> #rstudent(rm1)
> #simulate(rm1) ## just $weights needs to be changed to prior weights
> V1 <- vcov(rm1, complete=FALSE)
> ## but don't show the "eigen" part {vectors may flip sign}:
> attributes(V1) <- attributes(V1)[c("dim","dimnames", "weights")]; V1
            (Intercept)       x1b       x1c       x2B        x2C         x3
(Intercept)    0.296312 -0.321429 -0.338842 -0.238010 -0.3289125  0.1357438
x1b           -0.321429  0.412501  0.369763  0.253038  0.3616767 -0.1594475
x1c           -0.338842  0.369763  0.473317  0.274811  0.3497592 -0.1464335
x2B           -0.238010  0.253038  0.274811  0.513277  0.2342086 -0.0640599
x2C           -0.328913  0.361677  0.349759  0.234209  0.4721185 -0.2294044
x3             0.135744 -0.159448 -0.146434 -0.064060 -0.2294044  0.1726038
x4            -0.035258  0.039598  0.060587  0.035359  0.0024864  0.0037187
x1b:x2B        0.305162 -0.399754 -0.273260 -0.557840 -0.5219539  0.3087350
x1b:x2C        0.321423 -0.414159 -0.349092 -0.233097 -0.4394078  0.2039253
                    x4   x1b:x2B   x1b:x2C
(Intercept) -0.0352579  0.305162  0.321423
x1b          0.0395980 -0.399754 -0.414159
x1c          0.0605871 -0.273260 -0.349092
x2B          0.0353593 -0.557840 -0.233097
x2C          0.0024864 -0.521954 -0.439408
x3           0.0037187  0.308735  0.203925
x4           0.0286797  0.063743 -0.012435
x1b:x2B      0.0637434  1.476860  0.498060
x1b:x2C     -0.0124347  0.498060  0.557368
attr(,"weights")
      4       5       7       8      10      11      12      13      14      16 
0.89614 0.89081 0.98906 0.94998 0.97385 0.98757 0.98633 0.86577 0.78729 0.99980 
     17      19      20      21      22      23      25      26 
0.98353 0.97385 0.98757 0.98633 0.99909 0.99429 0.98578 0.99227 
> set.seed(12); sc <- simulate(cm1, 64)
> set.seed(12); rc <- simulate(rm1, 64)
> 
> stopifnot(all.equal(sqrt(diag(V1)), coef(summary(rm1))[,"Std. Error"], tol=1e-15),
+ 	  all.equal(sc, rc, tolerance = 0.08),# dimension *and* approx. values (no NA)
+ 	  identical(variable.names(rm1), variable.names(cm1)),
+ 	  all.equal(residuals(rm1), residuals(cm1), tolerance = 0.05),# incl. names
+ 	  all.equal(rstudent (rm1), rstudent (cm1), tolerance = 0.06),
+ 	  identical(dimnames(rm1), dimnames(cm1)),
+ 	  all.equal(dummy.coef(rm1), dummy.coef(cm1), tolerance= .5)) ## check mostly structure
> 
> ## other helper functions
> stopifnot(identical(case.names(rm1), case.names(cm1)),
+           all.equal(family(rm1), family(cm1)),# identical() upto environment
+           identical(formula(rm1), formula(cm1)),
+           nobs(rm1) == nobs(cm1))
> #add1(rm0, ~ . + x3 + x4 + x5) ## does not return valid results (yet)!
> 
> 
> ## test other initial estimators
> lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init="M-S")

Call:
lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data, init = "M-S")
 \--> method = "M-SM"
Coefficients:
(Intercept)          x1b          x1c          x2B          x2C           x3  
     0.4358       0.5996       0.0346       0.2005       0.1877      -0.1395  
         x4           x5      x1b:x2B      x1c:x2B      x1b:x2C      x1c:x2C  
    -0.2185           NA      -1.8957           NA      -0.8698           NA  

Warning message:
In lmrob.M.S(x, y, control, mf = mf) :
   Skipping design matrix equilibration (DGEEQU): row 12 is exactly zero.
> lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init=lmrob.lar)

Call:
lmrob(formula = y ~ x1 * x2 + x3 + x4 + x5, data = data, init = lmrob.lar)
 \--> method = "lM"
Coefficients:
(Intercept)          x1b          x1c          x2B          x2C           x3  
   0.561131     0.444339     0.000184     0.530303    -0.251794     0.236541  
         x4           x5      x1b:x2B      x1c:x2B      x1b:x2C      x1c:x2C  
  -0.082680           NA    -1.298418           NA    -0.597602           NA  

> 
> ## test all zero design matrix
> data <- data.frame(y=1:10,x1=0,x2=0,os=2,w=c(0.5, 1))
> (m5 <- lmrob(y ~ 1+x1+x2+offset(os), data, weights=w))

Call:
lmrob(formula = y ~ 1 + x1 + x2 + offset(os), data = data, weights = w)
 \--> method = "MM"
Coefficients:
(Intercept)           x1           x2  
       3.64           NA           NA  

> (sm5 <- summary(m5))

Call:
lmrob(formula = y ~ 1 + x1 + x2 + offset(os), data = data, weights = w)
 \--> method = "MM"
Weighted Residuals:
    Min      1Q  Median      3Q     Max 
-3.6412 -1.8110 -0.0473  2.0093  4.3588 

Coefficients: (2 not defined because of singularities)
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)     3.64       1.03    3.53   0.0064 **
x1                NA         NA      NA       NA   
x2                NA         NA      NA       NA   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Robust residual standard error: 3.24 
Convergence in 8 IRWLS iterations

Robustness weights: 
    1     2     3     4     5     6     7     8     9    10 
0.909 0.889 0.970 0.977 0.998 0.999 0.992 0.952 0.952 0.842 
Algorithmic parameters: 
       tuning.chi                bb        tuning.psi        refine.tol 
         1.55e+00          5.00e-01          4.69e+00          1.00e-07 
          rel.tol         scale.tol         solve.tol          zero.tol 
         1.00e-07          1.00e-10          1.00e-07          1.00e-10 
      eps.outlier             eps.x warn.limit.reject warn.limit.meanrw 
         1.00e-02          1.82e-12          5.00e-01          5.00e-01 
     nResample         max.it       best.r.s       k.fast.s          k.max 
           500             50              2              1            200 
   maxit.scale      trace.lev            mts     compute.rd fast.s.large.n 
           200              0           1000              0           2000 
                  psi           subsampling                   cov 
           "bisquare"         "nonsingular"         ".vcov.avar1" 
compute.outlier.stats 
                 "SM" 
seed : int(0) 
> (m6 <- lmrob(y ~ 0+x1+x2+offset(os), data, weights=w))

Call:
lmrob(formula = y ~ 0 + x1 + x2 + offset(os), data = data, weights = w)
 \--> method = "MM"
Coefficients:
x1  x2  
NA  NA  

> (sm6 <- summary(m6))

Call:
lmrob(formula = y ~ 0 + x1 + x2 + offset(os), data = data, weights = w)
 \--> method = "MM"
Weighted Residuals:
   Min     1Q Median     3Q    Max 
 -3.83  -1.37   1.09   3.54   6.00 

Coefficients: (2 not defined because of singularities)
   Estimate Std. Error t value Pr(>|t|)
x1       NA         NA      NA       NA
x2       NA         NA      NA       NA

Robust residual standard error: NA 
Convergence in 0 IRWLS iterations

Robustness weights: 
 [1] NA NA NA NA NA NA NA NA NA NA
Algorithmic parameters: 
       tuning.psi           rel.tol         scale.tol         solve.tol 
         4.69e+00          1.00e-07          1.00e-10          1.00e-07 
         zero.tol       eps.outlier warn.limit.reject warn.limit.meanrw 
         1.00e-10          1.00e-02          5.00e-01          5.00e-01 
        max.it    maxit.scale      trace.lev     compute.rd fast.s.large.n 
            50            200              0              0           2000 
         eps.x 
             0 
                  psi                   cov compute.outlier.stats 
           "bisquare"         ".vcov.avar1"                  "SM" 
seed : int(0) 
> 
> sc5 <- summary(cm5 <- lm(y ~ 1+x1+x2+offset(os), data, weights=w))
> sc6 <- summary(cm6 <- lm(y ~ 0+x1+x2+offset(os), data, weights=w))
> 
> if(getRversion() <= "3.5.1" && as.numeric(R.version$`svn rev`) < 74993)
+     ## in the past, lm() returned logical empty matrix
+     storage.mode(sc6$coefficients) <- "double"
> 
> stopifnot(all.equal(coef(m5), coef(cm5), tolerance = 0.01),
+           all.equal(coef(m6), coef(cm6), tolerance = 1e-14),
+           all.equal(coef(sm5), coef(sc5), tolerance = 0.05),
+           all.equal(coef(sm6), coef(sc6), tolerance = 1e-14),
+           identical(sm5$df, sc5$df),
+           identical(sm6$df, sc6$df))
> 
> proc.time()
   user  system elapsed 
   0.46    0.09    0.54