R Under development (unstable) (2025-02-23 r87804 ucrt) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > require("gamboostLSS") Loading required package: gamboostLSS Loading required package: mboost Loading required package: parallel Loading required package: stabs Attaching package: 'gamboostLSS' The following object is masked from 'package:stats': model.weights > > ###negbin dist, linear### > > set.seed(2611) > x1 <- rnorm(1000) > x2 <- rnorm(1000) > x3 <- rnorm(1000) > x4 <- rnorm(1000) > x5 <- rnorm(1000) > x6 <- rnorm(1000) > mu <- exp(1.5 + x1^2 +0.5 * x2 - 3 * sin(x3) -1 * x4) > sigma <- exp(-0.2 * x4 +0.2 * x5 +0.4 * x6) > y <- numeric(1000) > for (i in 1:1000) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) > dat <- data.frame(x1, x2, x3, x4, x5, x6, y) > > #fit models at number of params + 1 > > #glmboost > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 3), method = "noncyclic") > > #linear baselearner with bols > model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 3), method = "noncyclic", + baselearner = "bols") > > #nonlinear bbs baselearner > > model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 3), method = "noncyclic", + baselearner = "bbs") Warning messages: 1: In .qr.rank.def.warn(r) : matrix is structurally rank deficient; using augmented matrix with additional 2 row(s) of zeros 2: In .qr.rank.def.warn(r) : matrix is structurally rank deficient; using augmented matrix with additional 2 row(s) of zeros > > #reducing model and increasing it afterwards should yield the same fit > > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 50), method = "noncyclic") > > m_co <- coef(model) > > mstop(model) <- 5 > mstop(model) <- 50 > > stopifnot(all.equal(m_co, coef(model))) > > > model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 50), method = "noncyclic", + baselearner = "bols") > > m_co <- coef(model) > > mstop(model) <- 5 > mstop(model) <- 50 > > stopifnot(all.equal(m_co, coef(model))) > > > model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 50), method = "noncyclic", + baselearner = "bbs") Warning messages: 1: In .qr.rank.def.warn(r) : matrix is structurally rank deficient; using augmented matrix with additional 2 row(s) of zeros 2: In .qr.rank.def.warn(r) : matrix is structurally rank deficient; using augmented matrix with additional 2 row(s) of zeros > > m_co <- coef(model) > > mstop(model) <- 5 > mstop(model) <- 50 > > stopifnot(all.equal(m_co, coef(model))) > > > model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 50), method = "noncyclic", + baselearner = "bbs") Warning messages: 1: In .qr.rank.def.warn(r) : matrix is structurally rank deficient; using augmented matrix with additional 2 row(s) of zeros 2: In .qr.rank.def.warn(r) : matrix is structurally rank deficient; using augmented matrix with additional 2 row(s) of zeros > > m_co <- coef(model) > > mstop(model) <- 5 > mstop(model) <- 50 > > stopifnot(all.equal(m_co, coef(model))) > > ## check cvrisk for noncyclic models > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 3), method = "noncyclic") > cvr1 <- cvrisk(model, grid = 1:50, cv(model.weights(model), B = 5)) Starting cross-validation... [Fold: 1] [ 1] ........................................ -- risk: 1838.501 [ 41] ......... Final risk: 1842.375 [Fold: 2] [ 1] ........................................ -- risk: 1737.159 [ 41] ......... Final risk: 1734.726 [Fold: 3] [ 1] ........................................ -- risk: 1840.021 [ 41] ......... Final risk: 1834.083 [Fold: 4] [ 1] ........................................ -- risk: 1644.871 [ 41] ......... Final risk: 1642.941 [Fold: 5] [ 1] ........................................ -- risk: 1748.443 [ 41] ......... Final risk: 1745.234 > cvr1 Cross-validated glmboostLSS(formula = y ~ ., data = dat, families = NBinomialLSS(), control = boost_control(mstop = 3), method = "noncyclic") 1 2 3 4 5 6 7 8 4.857889 4.857889 4.856947 4.855428 4.854873 4.855378 4.852273 4.851972 9 10 11 12 13 14 15 16 4.852622 4.849785 4.845720 4.844206 4.844934 4.841169 4.839790 4.839599 17 18 19 20 21 22 23 24 4.837818 4.835607 4.835725 4.834000 4.832272 4.830426 4.829887 4.828157 25 26 27 28 29 30 31 32 4.826913 4.826344 4.824523 4.822832 4.821722 4.820891 4.819435 4.818253 33 34 35 36 37 38 39 40 4.817339 4.816674 4.815388 4.813377 4.813263 4.812566 4.810793 4.809623 41 42 43 44 45 46 47 48 4.809436 4.807793 4.806741 4.806119 4.805959 4.805148 4.803171 4.802931 49 50 4.802431 4.801477 Optimal number of boosting iterations: 50 > plot(cvr1) > > risk(model, merge = TRUE) mu sigma sigma sigma mu 4755.327 4755.327 4752.028 4749.214 4746.600 > risk(model, merge = FALSE) $mu [1] 4755.327 4746.600 $sigma [1] 4755.327 4752.028 4749.214 attr(,"class") [1] "inbag" > > > ## test that mstop = 0 is possible > compare_models <- function (m1, m2) { + stopifnot(all.equal(coef(m1), coef(m2), check.environment=FALSE)) + stopifnot(all.equal(predict(m1), predict(m2), check.environment=FALSE)) + stopifnot(all.equal(fitted(m1), fitted(m2), check.environment=FALSE)) + stopifnot(all.equal(selected(m1), selected(m2), check.environment=FALSE)) + stopifnot(all.equal(risk(m1), risk(m2), check.environment=FALSE)) + ## remove obvious differences from objects + m1$control <- m2$control <- NULL + m1$call <- m2$call <- NULL + if (!all.equal(m1, m2, check.environment=FALSE)) + stop("Objects of offset model + 1 step and model with 1 step not identical") + invisible(NULL) + } > > # set up models > mod <- glmboostLSS(y ~ ., data = dat, method = "noncyclic", control = boost_control(mstop = 0)) > mod2 <- glmboostLSS(y ~ ., data = dat, method = "noncyclic", control = boost_control(mstop = 1)) > mod3 <- glmboostLSS(y ~ ., data = dat, method = "noncyclic", control = boost_control(mstop = 1)) > > lapply(coef(mod), function(x) stopifnot(is.null(x))) $mu NULL $sigma NULL > > mstop(mod3) <- 0 > mapply(compare_models, m1 = mod, m2 = mod3) $mu NULL $sigma NULL > > mstop(mod) <- 1 > mapply(compare_models, m1 = mod, m2 = mod2) $mu NULL $sigma NULL > > mstop(mod3) <- 1 > mapply(compare_models, m1 = mod, m2 = mod3) $mu NULL $sigma NULL > > ## check selected > set.seed(1907) > x1 <- rnorm(500) > x2 <- rnorm(500) > x3 <- rnorm(500) > x4 <- rnorm(500) > x5 <- rnorm(500) > x6 <- rnorm(500) > mu <- exp(1.5 +1 * x1 +0.5 * x2 -0.5 * x3 -1 * x4) > sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 +0.4 * x6) > y <- numeric(500) > for( i in 1:500) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) > dat <- data.frame(x1, x2, x3, x4, x5, x6, y) > > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "cyclic") > selected(model) # ok (at least in principle) $mu [1] 2 2 5 2 5 2 5 5 2 5 $sigma [1] 5 5 4 5 4 3 5 4 3 2 > selected(model, merge = TRUE) # ok mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu 2 5 2 5 5 4 2 5 5 4 2 3 5 sigma mu sigma mu sigma mu sigma 5 5 4 2 3 5 2 > > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "noncyclic") > selected(model) # ok (at least in principle) $mu [1] 2 2 5 2 5 2 5 2 5 5 $sigma NULL > selected(model, merge = TRUE) ## BROKEN mu mu mu mu mu mu mu mu mu mu 2 2 5 2 5 2 5 2 5 5 > > ## with informative sigma: > sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 + 1 * x6) > y <- numeric(500) > for( i in 1:500) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) > dat <- data.frame(x1, x2, x3, x4, x5, x6, y) > > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 20), + center = TRUE, method = "cyclic") > selected(model) # ok (at least in principle) $mu [1] 5 5 5 5 5 2 5 5 2 5 2 5 2 5 2 5 5 2 5 2 $sigma [1] 4 5 4 5 4 5 4 5 4 5 7 4 5 2 7 4 5 2 7 4 > selected(model, merge = TRUE) # ok mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu 5 4 5 5 5 4 5 5 5 4 2 5 5 sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma 4 5 5 2 4 5 5 2 7 5 4 2 5 mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu 5 2 2 7 5 4 5 5 2 2 5 7 2 sigma 4 > > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 20), + center = TRUE, method = "noncyclic") > selected(model) # ok (at least in principle) $mu [1] 5 5 5 5 5 2 5 2 5 2 5 2 5 5 2 5 $sigma [1] 5 4 5 4 > selected(model, merge = TRUE) ## BROKEN mu mu mu mu mu mu mu mu mu mu mu mu sigma 5 5 5 5 5 2 5 2 5 2 5 2 5 mu sigma mu sigma mu sigma mu 5 4 5 5 2 4 5 > > > ## Check merged risk for reducing mstop to 0, and increasing it again does not contain an NA > stopifnot(all(!is.na(risk(model, merge = TRUE)))) > mstop(model) <- 0 > stopifnot(all(!is.na(risk(model, merge = TRUE)))) > mstop(model) <- 10 > stopifnot(all(!is.na(risk(model, merge = TRUE)))) > > > > proc.time() user system elapsed 6.84 0.45 7.28