R Under development (unstable) (2023-06-09 r84528 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(robustbase) > > ### Binomial example with *small* ni > > N <- 51 > set.seed(123) > table(ni <- rpois(N, lam=4))# has 4 '1's, (no '0') 1 2 3 4 5 6 7 8 10 4 8 10 9 7 4 5 3 1 > n0 <- ni; n0[print(which(ni == 1)[1:2])] <- 0 # has two '0's [1] 6 18 > x <- seq(0,1, length=N) > pr.x <- plogis(5*(x - 1/2)) > k <- rbinom(N, size = ni, prob = pr.x) > k0 <- rbinom(N, size = n0, prob = pr.x) > cbind(k,ni, k0,n0) k ni k0 n0 [1,] 0 3 0 3 [2,] 1 6 2 6 [3,] 0 3 0 3 [4,] 1 6 2 6 [5,] 0 7 2 7 [6,] 0 1 0 0 [7,] 1 4 1 4 [8,] 2 7 1 7 [9,] 0 4 0 4 [10,] 1 4 2 4 [11,] 0 8 1 8 [12,] 0 4 0 4 [13,] 0 5 3 5 [14,] 2 4 1 4 [15,] 0 2 0 2 [16,] 3 7 2 7 [17,] 2 3 2 3 [18,] 1 1 0 0 [19,] 1 3 1 3 [20,] 4 8 2 8 [21,] 3 7 3 7 [22,] 3 5 1 5 [23,] 0 5 2 5 [24,] 4 10 3 10 [25,] 1 5 2 5 [26,] 2 5 5 5 [27,] 2 4 3 4 [28,] 3 4 4 4 [29,] 3 3 3 3 [30,] 2 2 1 2 [31,] 4 8 5 8 [32,] 5 7 3 7 [33,] 3 5 3 5 [34,] 6 6 3 6 [35,] 1 1 1 1 [36,] 1 4 3 4 [37,] 3 5 3 5 [38,] 1 2 1 2 [39,] 3 3 1 3 [40,] 2 2 2 2 [41,] 2 2 2 2 [42,] 3 3 3 3 [43,] 2 3 3 3 [44,] 3 3 3 3 [45,] 2 2 1 2 [46,] 1 2 2 2 [47,] 2 2 2 2 [48,] 4 4 4 4 [49,] 3 3 3 3 [50,] 5 6 5 6 [51,] 1 1 1 1 > g1 <- glm(cbind(k , ni-k ) ~ x, family = binomial) > coef(summary(g1))[,1:2] Estimate Std. Error (Intercept) -2.515884 0.3784211 x 5.123650 0.7344629 > g0 <- glm(cbind(k0, n0-k0) ~ x, family = binomial)# works too > g0. <- glm(cbind(k0, n0-k0) ~ x, family = binomial, subset = n0 > 0) > ## all.equal(g0, g0.) > stopifnot(all.equal(print(coef(summary(g0))), coef(summary(g0.)))) Estimate Std. Error z value Pr(>|z|) (Intercept) -1.913157 0.3346560 -5.716786 1.085574e-08 x 4.061024 0.6512647 6.235596 4.500620e-10 > > > rg1 <- glmrob(cbind(k , ni-k ) ~ x, family = binomial) > rg1. <- glmrob(cbind(k , ni-k ) ~ x, family = binomial, + acc = 1e-10) # default is just 1e-4 > > stopifnot(all.equal(unname(coef(rg1.)), c(-2.37585864, 4.902389143), tolerance=1e-9), + all.equal(coef(rg1), coef(rg1.), tolerance=1e-4), + all.equal(vcov(rg1.), vcov(rg1), tolerance = 1e-4)) > rg1$iter [1] 5 > which(rg1.$w.r != 1) ## 7 of them : [1] 11 18 23 29 34 36 46 > str(rg1.["family" != names(rg1.)]) List of 27 $ coefficients : Named num [1:2] -2.38 4.9 ..- attr(*, "names")= chr [1:2] "(Intercept)" "x" $ residuals : Named num [1:51] -0.528 0.622 -0.582 0.435 -0.981 ... ..- attr(*, "names")= chr [1:51] "1" "2" "3" "4" ... $ fitted.values : Named num [1:51] 0.085 0.093 0.102 0.111 0.121 ... ..- attr(*, "names")= chr [1:51] "1" "2" "3" "4" ... $ w.r : num [1:51] 1 1 1 1 1 1 1 1 1 1 ... $ w.x : num [1:51] 1 1 1 1 1 1 1 1 1 1 ... $ ni : num [1:51] 3 6 3 6 7 1 4 7 4 4 ... $ dispersion : num 1 $ cov : num [1:2, 1:2] 0.144 -0.253 -0.253 0.55 ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:2] "(Intercept)" "x" .. ..$ : chr [1:2] "(Intercept)" "x" $ matM : num [1:2, 1:2] 0.625 0.287 0.287 0.163 ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:2] "(Intercept)" "x" .. ..$ : chr [1:2] "(Intercept)" "x" $ matQ : num [1:2, 1:2] 0.551 0.252 0.252 0.143 ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:2] "(Intercept)" "x" .. ..$ : chr [1:2] "(Intercept)" "x" $ tcc : num 1.34 $ linear.predictors: Named num [1:51] -2.38 -2.28 -2.18 -2.08 -1.98 ... ..- attr(*, "names")= chr [1:51] "1" "2" "3" "4" ... $ deviance : NULL $ iter : int 11 $ y : Named num [1:51] 0 0.167 0 0.167 0 ... ..- attr(*, "names")= chr [1:51] "1" "2" "3" "4" ... $ converged : logi TRUE $ model :'data.frame': 51 obs. of 2 variables: ..$ cbind(k, ni - k): int [1:51, 1:2] 0 1 0 1 0 0 1 2 0 1 ... .. ..- attr(*, "dimnames")=List of 2 .. .. ..$ : NULL .. .. ..$ : chr [1:2] "k" "" ..$ x : num [1:51] 0 0.02 0.04 0.06 0.08 0.1 0.12 0.14 0.16 0.18 ... ..- attr(*, "terms")=Classes 'terms', 'formula' language cbind(k, ni - k) ~ x .. .. ..- attr(*, "variables")= language list(cbind(k, ni - k), x) .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1 .. .. .. ..- attr(*, "dimnames")=List of 2 .. .. .. .. ..$ : chr [1:2] "cbind(k, ni - k)" "x" .. .. .. .. ..$ : chr "x" .. .. ..- attr(*, "term.labels")= chr "x" .. .. ..- attr(*, "order")= int 1 .. .. ..- attr(*, "intercept")= int 1 .. .. ..- attr(*, "response")= int 1 .. .. ..- attr(*, ".Environment")= .. .. ..- attr(*, "predvars")= language list(cbind(k, ni - k), x) .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "nmatrix.2" "numeric" .. .. .. ..- attr(*, "names")= chr [1:2] "cbind(k, ni - k)" "x" $ call : language glmrob(formula = cbind(k, ni - k) ~ x, family = binomial, acc = 1e-10) $ formula :Class 'formula' language cbind(k, ni - k) ~ x .. ..- attr(*, ".Environment")= $ terms :Classes 'terms', 'formula' language cbind(k, ni - k) ~ x .. ..- attr(*, "variables")= language list(cbind(k, ni - k), x) .. ..- attr(*, "factors")= int [1:2, 1] 0 1 .. .. ..- attr(*, "dimnames")=List of 2 .. .. .. ..$ : chr [1:2] "cbind(k, ni - k)" "x" .. .. .. ..$ : chr "x" .. ..- attr(*, "term.labels")= chr "x" .. ..- attr(*, "order")= int 1 .. ..- attr(*, "intercept")= int 1 .. ..- attr(*, "response")= int 1 .. ..- attr(*, ".Environment")= .. ..- attr(*, "predvars")= language list(cbind(k, ni - k), x) .. ..- attr(*, "dataClasses")= Named chr [1:2] "nmatrix.2" "numeric" .. .. ..- attr(*, "names")= chr [1:2] "cbind(k, ni - k)" "x" $ data : $ offset : NULL $ control :List of 4 ..$ acc : num 1e-10 ..$ test.acc: chr "coef" ..$ maxit : num 50 ..$ tcc : num 1.34 $ method : chr "Mqle" $ prior.weights : num [1:51] 1 1 1 1 1 1 1 1 1 1 ... $ contrasts : NULL $ xlevels : Named list() > > rg2 <- glmrob(cbind(k , ni-k ) ~ x, family = binomial, + acc = 1e-10, tcc = 3) # large cutoff: almost classical > vcov(rg2) # << already close to limit (Intercept) x (Intercept) 0.1430407 -0.2501886 x -0.2501886 0.5388665 > rg10 <- glmrob(cbind(k , ni-k ) ~ x, family = binomial, tcc = 10) > rgL <- glmrob(cbind(k , ni-k ) ~ x, family = binomial, tcc = 100) > > no.comp <- - match(c("call", "data", "family", "control", "tcc"), names(rg10)) > stopifnot(all.equal(rg10[no.comp], rgL[no.comp], tolerance= 1e-14)) > > vcov(rgL) # is now the same as the following: (Intercept) x (Intercept) 0.1432102 -0.2504843 x -0.2504843 0.5394659 > if(FALSE) { ## tcc=Inf fails: non-convergence / singular matrix from GOTO/Atlas3 + rgI <- glmrob(cbind(k , ni-k ) ~ x, family = binomial, tcc = Inf) + ## tcc = Inf still *FAILS* (!) + stopifnot(all.equal(rgL[no.comp], rgI[no.comp], tolerance= 0)) + ## and is quite close to the classic one: + (all.equal(vcov(rgI), vcov(g1))) + } > > rg0 <- glmrob(cbind(k0, n0-k0) ~ x, family = binomial) Warning message: In glmrobMqle(X = X, y = Y, weights = weights, start = start, offset = offset, : fitted probabilities numerically 0 or 1 occurred > ## --> warning.. > rg0. <- glmrob(cbind(k0, n0-k0) ~ x, family = binomial, subset = n0 > 0) > > coef(summary(rg0)) # not yet good (cf. 'g0' above!) -- but the one of rg0. is Estimate Std. Error z value Pr(>|z|) (Intercept) -1.852918 NaN NaN NaN x 3.847520 NaN NaN NaN > stopifnot(all.equal(coef(rg0), coef(rg0.))) > > > ### Example where all ni >= 3 -- works better, now also correct as.var. !! > ### ----------------- ======= > > min(n3 <- ni + 2)# = 3 [1] 3 > k3 <- rbinom(N, size = n3, prob = pr.x) > g3 <- glm(cbind(k3 , n3-k3) ~ x, family = binomial) > (cfg <- coef(summary(g3))[,1:2]) Estimate Std. Error (Intercept) -2.945565 0.3420351 x 5.546417 0.6259260 > stopifnot(all.equal(sqrt(diag(vcov(g3))), cfg[,2])) > > rg3 <- glmrob(cbind(k3 , n3-k3) ~ x, family = binomial) > (s3 <- summary(rg3)) Call: glmrob(formula = cbind(k3, n3 - k3) ~ x, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.8901 0.3499 -8.260 <2e-16 *** x 5.5039 0.6447 8.537 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robustness weights w.r * w.x: 48 weights are ~= 1. The remaining 3 ones are 42 44 51 0.5127 0.7846 0.7388 Number of observations: 51 Fitted by method 'Mqle' (in 5 iterations) (Dispersion parameter for binomial family taken to be 1) No deviance values available Algorithmic parameters: acc tcc 0.0001 1.3450 maxit 50 test.acc "coef" > summary(rg3$w.r) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.5127 1.0000 1.0000 0.9811 1.0000 1.0000 > rg3.5 <- glmrob(cbind(k3 , n3-k3) ~ x, family = binomial, tcc = 5) > (s3.5 <- summary(rg3.5)) Call: glmrob(formula = cbind(k3, n3 - k3) ~ x, family = binomial, tcc = 5) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.9454 0.3420 -8.611 <2e-16 *** x 5.5461 0.6259 8.861 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robustness weights w.r * w.x: All 51 weights are ~= 1. Number of observations: 51 Fitted by method 'Mqle' (in 1 iterations) (Dispersion parameter for binomial family taken to be 1) No deviance values available Algorithmic parameters: acc 1e-04 maxit tcc 50 5 test.acc "coef" > summary(rg3.5$w.r)# all 1 Min. 1st Qu. Median Mean 3rd Qu. Max. 1 1 1 1 1 1 > stopifnot(all.equal(coef(s3)[,1:2], coef(s3.5)[,1:2], tolerance = 0.02)) > > rg3.15 <- glmrob(cbind(k3 , n3-k3) ~ x, family = binomial, tcc = 15, acc=1e-10) > (s3.15 <- summary(rg3.15)) Call: glmrob(formula = cbind(k3, n3 - k3) ~ x, family = binomial, tcc = 15, acc = 1e-10) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.9456 0.3420 -8.612 <2e-16 *** x 5.5464 0.6259 8.861 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Robustness weights w.r * w.x: All 51 weights are ~= 1. Number of observations: 51 Fitted by method 'Mqle' (in 1 iterations) (Dispersion parameter for binomial family taken to be 1) No deviance values available Algorithmic parameters: acc 1e-10 maxit tcc 50 15 test.acc "coef" > > stopifnot(all.equal(coef(s3.15)[,1:2], cfg, tolerance = 1e-5),# 2e-6 + all.equal(cfg[,"Estimate"], rg3.15$coeff, tolerance= 1e-8) # 6.05e-10 + ) > ##rg3.15$eff # == 1 > > ## doesn't change any more: > rg3.1000 <- glmrob(cbind(k3 , n3-k3) ~ x, family = binomial, tcc = 1000, + acc=1e-10) > stopifnot(all.equal(rg3.1000[no.comp], + rg3.15 [no.comp], tol = 1e-13)) > > cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons'' Time elapsed: 0.28 0.01 0.29 NA NA > > proc.time() user system elapsed 0.28 0.01 0.29