## test handing of weights and offset argument require(robustbase) ## generate simple example data (extension of the one in ./NAcoef.R ) data <- expand.grid(x1=letters[1:3], x2=LETTERS[1:4], rep=1:3, KEEP.OUT.ATTRS = FALSE) ## generate offset column data$os <- 1:nrow(data) set.seed(1) data$y <- data$os + rnorm(nrow(data)) ## add collinear variables data$x3 <- rnorm(nrow(data)) data$x4 <- rnorm(nrow(data)) data$x5 <- data$x3 + data$x4 ## lm() will have 'x5' "aliased" (and give coef = NA) ## add some NA terms data$y[1] <- NA data$x4[2:3] <- NA ## to test anova ## generate weights ## some obs with weight 0 data$weights <- as.numeric(with(data, x1 != 'c' | (x2 != 'B' & x2 != 'C'))) ## some obs with weight 2 data$weights[data$x1 == 'b'] <- 2 ## data2 := {data + weights}, encoded in "data2" (-> "ok" for coef(), not for SE) data2 <- rbind(subset(data, weights > 0), subset(data, weights == 2)) ## using these parameters we're essentially forcing lmrob() to ## fit a classic model --> easier to compare to lm() ctrl <- lmrob.control(psi="optimal", tuning.chi = 20, bb = 0.0003846154, tuning.psi=20, method="SM", cov=".vcov.w") ## SM = MM == the case where .vcov.avar1 was also defined for ## Classical models start with 'cm', robust just with 'rm' (or just 'm'), ## "." - models drop 'x5' (which is aliased / extraneous by construction) : (cm0 <- lm (y ~ x1*x2 + x3 + x4 + x5 + offset(os), data)) (cm0.<- lm (y ~ x1*x2 + x3 + x4 + offset(os), data)) (cm1 <- lm (y ~ x1*x2 + x3 + x4 + x5 + offset(os), data, weights=weights)) (cm1.<- lm (y ~ x1*x2 + x3 + x4 + offset(os), data, weights=weights)) (cm2 <- lm (y ~ x1*x2 + x3 + x4 + x5, data2, offset=os)) (cm2.<- lm (y ~ x1*x2 + x3 + x4, data2, offset=os)) (rm0 <- lmrob(y ~ x1*x2 + x3 + x4 + x5 + offset(os), data, control=ctrl)) (rm0.<- lmrob(y ~ x1*x2 + x3 + x4 + offset(os), data, control=ctrl)) set.seed(2) (rm1 <- lmrob(y ~ x1*x2 + x3 + x4 + x5 + offset(os), data, weights=weights, control=ctrl)) set.seed(2) (rm1.<- lmrob(y ~ x1*x2 + x3 + x4 + offset(os), data, weights=weights, control=ctrl)) set.seed(2) (rm2 <- lmrob(y ~ x1*x2 + x3 + x4 + x5, data2, offset=os, control=ctrl)) set.seed(2) (rm2.<- lmrob(y ~ x1*x2 + x3 + x4, data2, offset=os, control=ctrl)) sc0 <- summary(cm0) sc1 <- summary(cm1) sc2 <- summary(cm2) (sr0 <- summary(rm0)) (sr1 <- summary(rm1)) (sr2 <- summary(rm2)) ## test Estimates, Std. Errors, ... nc <- names(coef(cm1)) nc. <- setdiff(nc, "x5") # those who are "valid" stopifnot(exprs = { all.equal(coef(cm0.),coef(cm0)[nc.]) all.equal(coef(cm1.),coef(cm1)[nc.]) all.equal(coef(cm2.),coef(cm2)[nc.]) all.equal(coef(cm1), coef(cm2)) all.equal(coef(rm1), coef(rm2)) all.equal(coef(sc0), coef(sr0)) all.equal(coef(sc1), coef(sr1)) all.equal(coef(sc2), coef(sr2)) }) ## test class "lm" methods that do not depend on weights meths1 <- c("family", "formula", "labels", "model.matrix", "na.action", "terms") for (meth in meths1) stopifnot(all.equal(do.call(meth, list(rm0)), do.call(meth, list(rm1)))) ## class "lm" methods that depend on weights ## FIXME: meths2 <- c(#"AIC", "alias", #"BIC", "case.names", "coef", "confint", #"cooks.distance", #"deviance", "df.residual", #"dfbeta", #"dfbetas", #"drop1", "dummy.coef", #"effects", #"extractAIC", #"hatvalues", #"influence", "kappa", #"logLik", #"model.frame", ## disable because of zero.weights attribute "nobs", "predict", #"proj", #"rstandard", #"rstudent", #"simulate", ##"summary", ## see above "variable.names", ##"vcov", ## see below "weights") op <- options(warn = 1)# print immediately for (meth in meths2) { cat(meth,":") .SW. <- if(meth == "weights") suppressWarnings else identity # for suppressing ## No weights defined for this object. Use type="robustness" .... stopifnot(all.equal(do.call(meth, list(cm1)), do.call(meth, list(rm1))), all.equal(do.call(meth, list(cm2)), .SW.(do.call(meth, list(rm2))))) cat("\n") } options(op)# reverting ## further tests: anova(rm1, update(rm1, ~ . - x4 - x5)) anova(rm2, update(rm2, ~ . - x4 - x5)) stopifnot(all.equal(fitted(cm0), fitted(rm0)), all.equal(fitted(cm1), fitted(rm1)), all.equal(fitted(cm2), fitted(rm2))) nd <- expand.grid(x1=letters[1:3], x2=LETTERS[1:4]) set.seed(3) nd$x3 <- rnorm(nrow(nd)) nd$x4 <- rnorm(nrow(nd)) nd$x5 <- rnorm(nrow(nd)) # (*not* the sum x3+x4 !) nd$os <- nrow(nd):1 wts <- runif(nrow(nd)) stopifnot(exprs = { all.equal(predict(cm0, nd, interval="prediction"), predict(rm0, nd, interval="prediction")) all.equal(predict(cm1, nd, interval="prediction"), predict(rm1, nd, interval="prediction")) all.equal(predict(cm2, nd, interval="prediction"), predict(rm2, nd, interval="prediction")) all.equal(predict(cm0, nd, interval="prediction", weights=wts), predict(rm0, nd, interval="prediction", weights=wts)) all.equal(predict(cm1, nd, interval="prediction", weights=wts), predict(rm1, nd, interval="prediction", weights=wts)) all.equal(predict(cm2, nd, interval="prediction", weights=wts), predict(rm2, nd, interval="prediction", weights=wts), tolerance=1e-7) }) ## Padding can lead to differing values here ## so test only full rank part qrEQ <- function(m1, m2) { q1 <- qr(m1) q2 <- qr(m2) r <- 1:q1$rank stopifnot(q1$rank == q2$rank, all.equal(q1$pivot, q2$pivot), all.equal(q1$qraux[r],q2$qraux[r]), all.equal(q1$qr[r,r], q2$qr[r,r])) } qrEQ(cm0, rm0) qrEQ(cm1, rm1) qrEQ(cm2, rm2) stopifnot(all.equal(residuals(cm0), residuals(rm0)), all.equal(residuals(cm1), residuals(rm1)), all.equal(residuals(cm2), residuals(rm2)), all.equal(resid(cm0, type="pearson"), resid(rm0, type="pearson")), all.equal(resid(cm1, type="pearson"), resid(rm1, type="pearson")), all.equal(resid(cm2, type="pearson"), resid(rm2, type="pearson"))) ## R 3.5.0: vcov(*, complete=TRUE) new default ==> same NA's as coef() if(interactive()) withAutoprint({ op <- options(width = 130, digits = 2) # --> vcov() rows fit on 1 line vcov(cm0) # 'x5' is NA vcov(cm2) # 'x5', 'x1c:2B', 'x1c:2C' rows & columns are NA options(op) }) (no.C <- is.na(match("complete", names(formals(stats:::vcov.lm))))) ## temporary _FIXME_ vcovC <- if(no.C) function(M, ...) vcov(M, complete=FALSE, ...) else vcov # (complete=TRUE) stopifnot(all.equal(vcov(cm0), vcovC(rm0), check.attributes=FALSE), all.equal(vcov(cm1), vcovC(rm1), check.attributes=FALSE), all.equal(vcov(cm2), vcovC(rm2), check.attributes=FALSE)) ## "clean": cln <- function(vc) structure(vc, weights=NULL, eigen=NULL) ## .vcov.avar1() is not recommended here, but also should work with singular / NA coef case: ok0 <- !is.na(coef(rm0)) vr0.NA<- vcov(rm0, cov=".vcov.avar1", complete=NA) # "almost singular" warning vr0.T <- vcov(rm0, cov=".vcov.avar1", complete=TRUE) vr0.F <- vcov(rm0, cov=".vcov.avar1", complete=FALSE) stopifnot(identical(dim(vr0.NA), dim(vr0.T)), identical(dim(vr0.F), dim(vr0.T) - 1L), dim(vr0.F) == 14, all.equal(cln(vr0.F), vr0.T[ok0,ok0], tol = 1e-15)) if(!no.C) { vc0.T <- vcov(cm0, complete=TRUE) vc0.F <- vcov(cm0, complete=FALSE) } ok1 <- !is.na(coef(rm1)) ## cannot work because init/fit residuals are not of full length tools::assertError(vr1.NA<- vcov(rm1, cov=".vcov.avar1", complete=NA)) tools::assertError(vr1.T <- vcov(rm1, cov=".vcov.avar1", complete=TRUE )) tools::assertError(vr1.F <- vcov(rm1, cov=".vcov.avar1", complete=FALSE)) ## instead, must refit rm1. <- update(rm1, control = within(ctrl, cov <- ".vcov.avar1")) vr1.NA<- vcov(rm1., complete=NA) vr1.T <- vcov(rm1., complete=TRUE) vr1.F <- vcov(rm1., complete=FALSE) stopifnot(identical(vr1.F, vr1.NA), # in this case identical(dim(vr1.F), dim(vr1.T) - 3L), dim(vr1.F) == 12, isSymmetric(vr1.T), identical(rownames(vr1.F), rownames(vr1.T)[ok1]), all.equal(cln(vr1.F), vr1.T[ok1,ok1], tol=1e-15)) if(FALSE) ## ERROR "exact singular" (probably *NOT* to fix, as TRUE/FALSE do work !) vr2.NA<- vcov(rm2, cov=".vcov.avar1", complete=NA) # "almost singular" warning vr2.T <- vcov(rm2, cov=".vcov.avar1", complete=TRUE) vr2.F <- vcov(rm2, cov=".vcov.avar1", complete=FALSE) stopifnot(TRUE, # identical(dim(vr2.NA), dim(vr2.T)), identical(dim(vr2.F), dim(vr2.T) - 3L), dim(vr2.F) == 12, identical(rownames(vr2.F), rownames(vr1.F)), identical(rownames(vr2.T), rownames(vr1.T)), all.equal(cln(vr2.F), vr2.T[ok1,ok1], tol=1e-15)) ## Hmm, the supposedly heteroscedastic-robust estimates *are* very different: all.equal(vcov(cm0), vcovC(rm0, cov = ".vcov.avar1"), check.attributes=FALSE) # rel.diff. 0.5367564 if(FALSE) # does not make sense all.equal(vcov(cm1), vcovC(rm1, cov = ".vcov.avar1"), check.attributes=FALSE) all.equal(vcov(cm2), vcovC(rm2, cov = ".vcov.avar1"), check.attributes=FALSE) # rel.diff. 0.5757642 ## Null fits (rank(X)==0) are tested in NAcoef.R ## testing weight=0 bug lmrob(y ~ x3, data, weights=weights)