R Under development (unstable) (2024-11-07 r87302 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library("model4you") Loading required package: partykit Loading required package: grid Loading required package: libcoin Loading required package: mvtnorm > library("survival") > > ### survreg > set.seed(1) > data(GBSG2, package = "TH.data") > > ## base model > bmod <- survreg(Surv(time, cens) ~ horTh, data = GBSG2, model = TRUE) > survreg_plot(bmod) > grid.newpage() > coeftable.survreg(bmod) > > > ## partitioned model > tr <- pmtree(bmod) No data given. I'm using data set GBSG2 from the current environment parent.frame(). Please check if that is what you want. > plot(tr, terminal_panel = node_pmterminal(tr, plotfun = survreg_plot, + confint = TRUE)) > summary(tr) Stratified model for node(s) 2, 4, 5 Model call: survreg(formula = Surv(time, cens) ~ horTh, data = GBSG2, model = TRUE) Coefficients: node 2 node 4 node 5 (Intercept) 7.8590 7.2936 6.981 horThyes 0.4001 0.3039 0.163 Number of obervations: node 2 node 4 node 5 376 223 87 Objective function: (1093.18) + (1037.81) + (457.96) = 2588.95> summary(tr, node = 1:2) Stratified model for node(s) 1, 2 Model call: survreg(formula = Surv(time, cens) ~ horTh, data = GBSG2, model = TRUE) Coefficients: node 1 node 2 (Intercept) 7.608 7.8590 horThyes 0.306 0.4001 Number of obervations: node 1 node 2 686 376 Objective function: node 1 node 2 2632.096 1093.182 > > coef(tr) (Intercept) horThyes 2 7.859026 0.4001011 4 7.293645 0.3038557 5 6.980875 0.1630104 > coef(tr, node = 1) (Intercept) horThyes 1 7.608449 0.3059506 > coef(bmod) (Intercept) horThyes 7.6084486 0.3059506 > > logLik(bmod) 'log Lik.' -2632.096 (df=3) > logLik(tr) 'log Lik.' -2588.953 (df=9) > > ## alternative table in plot > plot(tr, terminal_panel = node_pmterminal(tr, plotfun = survreg_plot, + confint = TRUE, coeftable = coeftable.survreg)) > > > ### glm binomial > set.seed(2) > n <- 1000 > trt <- factor(rep(1:2, each = n/2)) > age <- sample(40:60, size = n, replace = TRUE) > eff <- -1 + I(trt == 2) + 1 * I(trt == 2) * I(age > 50) > expit <- function(x) 1/(1 + exp(-x)) > > success <- rbinom(n = n, size = 1, prob = expit(eff)) > > dat <- data.frame(success, trt, age) > library("plyr") > dattab <- ddply(.data = dat, .variables = .(trt, age), + function(x) data.frame(nsuccess = sum(x$success), + nfailure = NROW(x) - sum(x$success))) > > bmod1 <- glm(success ~ trt, family = binomial) > bmod2 <- glm(success ~ trt, family = "binomial") > bmod3 <- glm(success ~ trt, data = dat, family = binomial) > bmod4 <- glm(cbind(nsuccess, nfailure) ~ trt, data = dattab, family = binomial) > > (tr1 <- pmtree(bmod1, data = dat)) [1] root | [2] age <= 50: n = 502 | (Intercept) trt2 | -1.162126 1.162126 | [3] age > 50: n = 498 | (Intercept) trt2 | -1.034074 1.980880 Number of inner nodes: 1 Number of terminal nodes: 2 Number of parameters per node: 2 Objective function: 601.9864 > (tr2 <- pmtree(bmod2, data = dat)) [1] root | [2] age <= 50: n = 502 | (Intercept) trt2 | -1.162126 1.162126 | [3] age > 50: n = 498 | (Intercept) trt2 | -1.034074 1.980880 Number of inner nodes: 1 Number of terminal nodes: 2 Number of parameters per node: 2 Objective function: 601.9864 > (tr3 <- pmtree(bmod3)) No data given. I'm using data set dat from the current environment parent.frame(). Please check if that is what you want. [1] root | [2] age <= 50: n = 502 | (Intercept) trt2 | -1.162126 1.162126 | [3] age > 50: n = 498 | (Intercept) trt2 | -1.034074 1.980880 Number of inner nodes: 1 Number of terminal nodes: 2 Number of parameters per node: 2 Objective function: 601.9864 > (tr4 <- pmtree(bmod4)) No data given. I'm using data set dattab from the current environment parent.frame(). Please check if that is what you want. [1] root | [2] age <= 50: n = 22 | (Intercept) trt2 | -1.162126 1.162126 | [3] age > 50: n = 20 | (Intercept) trt2 | -1.034074 1.980880 Number of inner nodes: 1 Number of terminal nodes: 2 Number of parameters per node: 2 Objective function: 91.09863 > > (mtr1 <- glmtree(success ~ trt | age, data = dat, family = binomial)) Generalized linear model tree (family: binomial) Model formula: success ~ trt | age Fitted party: [1] root | [2] age <= 50: n = 502 | (Intercept) trt2 | -1.162126 1.162126 | [3] age > 50: n = 498 | (Intercept) trt2 | -1.034074 1.980880 Number of inner nodes: 1 Number of terminal nodes: 2 Number of parameters per node: 2 Objective function (negative log-likelihood): 601.9864 > # (mtr2 <- glmtree(cbind(nsuccess, nfailure) ~ trt | age, data = dattab, family = binomial)) > > library("strucchange") Loading required package: zoo Attaching package: 'zoo' The following objects are masked from 'package:base': as.Date, as.Date.numeric Loading required package: sandwich > sctest(tr3) $`1` age statistic 1.446675e+01 p.value 7.220786e-04 $`2` age statistic 2.0885768 p.value 0.3519422 $`3` age statistic 4.1541909 p.value 0.1252936 > sctest(tr4) $`1` age statistic 7.19313420 p.value 0.02741768 $`2` age statistic 2.7056665 p.value 0.2585068 $`3` age statistic 3.6772322 p.value 0.1590374 > > ## check logLik > logLik(mtr1) 'log Lik.' -601.9864 (df=5) > logLik(tr1) 'log Lik.' -601.9864 (df=4) > > sum(objfun(tr1, newdata = dat)) [1] -601.9864 > objfun(tr1, newdata = dat, sum = TRUE) [1] -601.9864 > sum(objfun(tr1)) [1] -601.9864 > objfun(tr1, sum = TRUE) [1] -601.9864 > > > ## variable importance > logLik(tr1) 'log Lik.' -601.9864 (df=4) > logLik(tr1, perm = "age") 'log Lik.' -623.9381 (df=NA) > a1 <- predict.party(tr1, perm = "age", type = "node") > a2 <- predict(tr1, perm = "age", type = "node") > a3 <- predict(tr1, perm = 3, type = "node") > b <- predict.party(tr1, type = "node") > varimp(tr1, nperm = 5) 'log Lik.' 24.60555 (df=4) > > > library("ggplot2") > ofs <- data.frame(objfun_bmod1 = objfun(bmod1), + objfun_tr1 = objfun(tr1)) > ggplot(ofs, aes(objfun_bmod1, objfun_tr1)) + geom_jitter(alpha = 0.3) > > > > ### linear model and missings > data("MathExam14W", package = "psychotools") > > ## scale points achieved to [0, 100] percent > MathExam14W$tests <- 100 * MathExam14W$tests/26 > MathExam14W$pcorrect <- 100 * MathExam14W$nsolved/13 > > ## select variables to be used > MathExam <- MathExam14W[ , c("pcorrect", "group", "tests", "study", + "attempt", "semester", "gender")] > > ## compute base model > bmod_math <- lm(pcorrect ~ group, data = MathExam) > > ## compute tree > (tr_math <- pmtree(bmod_math, control = ctree_control(maxdepth = 2))) No data given. I'm using data set MathExam from the current environment parent.frame(). Please check if that is what you want. [1] root | [2] tests <= 84.61538 | | [3] tests <= 57.69231: n = 121 | | (Intercept) group2 | | 40.694789 -4.580056 | | [4] tests > 57.69231: n = 425 | | (Intercept) group2 | | 55.251855 -2.012988 | [5] tests > 84.61538 | | [6] tests <= 92.30769: n = 97 | | (Intercept) group2 | | 66.730769 -3.033063 | | [7] tests > 92.30769: n = 86 | | (Intercept) group2 | | 90.32967 -13.25576 Number of inner nodes: 3 Number of terminal nodes: 4 Number of parameters per node: 2 Objective function: -276167.6 > > ## create data with NAs > Math_mx <- Math_mz <- MathExam > Math_mx$group[1:2] <- NA > Math_mz$tests[1:20] <- NA > > bmod_math_mx <- lm(pcorrect ~ group, data = Math_mx) > > (tr_math_mx1 <- pmtree(bmod_math, control = ctree_control(maxdepth = 2), data = Math_mx)) [1] root | [2] tests <= 84.61538 | | [3] tests <= 57.69231: n = 120 | | (Intercept) group2 | | 40.10088 -3.98615 | | [4] tests > 57.69231: n = 424 | | (Intercept) group2 | | 55.180534 -1.941667 | [5] tests > 84.61538 | | [6] tests <= 92.30769: n = 97 | | (Intercept) group2 | | 66.730769 -3.033063 | | [7] tests > 92.30769: n = 86 | | (Intercept) group2 | | 90.32967 -13.25576 Number of inner nodes: 3 Number of terminal nodes: 4 Number of parameters per node: 2 Objective function: -274637.2 Warning message: In .prepare_args(model = model, data = data, zformula = zformula, : NAs in model variables (pcorrect, group). Omitting rows with NAs. > (tr_math_mx2 <- pmtree(bmod_math_mx, control = ctree_control(maxdepth = 2))) No data given. I'm using data set Math_mx from the current environment parent.frame(). Please check if that is what you want. [1] root | [2] tests <= 84.61538 | | [3] tests <= 57.69231: n = 120 | | (Intercept) group2 | | 40.10088 -3.98615 | | [4] tests > 57.69231: n = 424 | | (Intercept) group2 | | 55.180534 -1.941667 | [5] tests > 84.61538 | | [6] tests <= 92.30769: n = 97 | | (Intercept) group2 | | 66.730769 -3.033063 | | [7] tests > 92.30769: n = 86 | | (Intercept) group2 | | 90.32967 -13.25576 Number of inner nodes: 3 Number of terminal nodes: 4 Number of parameters per node: 2 Objective function: -274637.2 Warning message: In .prepare_args(model = model, data = data, zformula = zformula, : NAs in model variables (pcorrect, group). Omitting rows with NAs. > (tr_math_mz <- pmtree(bmod_math, control = ctree_control(maxdepth = 2), data = Math_mz)) [1] root | [2] tests <= 84.61538 | | [3] tests <= 57.69231: n = 122 | | (Intercept) group2 | | 41.514042 -5.399309 | | [4] tests > 57.69231: n = 422 | | (Intercept) group2 | | 55.273592 -2.034726 | [5] tests > 84.61538 | | [6] tests <= 92.30769: n = 98 | | (Intercept) group2 | | 65.290807 -1.593101 | | [7] tests > 92.30769: n = 87 | | (Intercept) group2 | | 89.52991 -12.45601 Number of inner nodes: 3 Number of terminal nodes: 4 Number of parameters per node: 2 Objective function: -279321 > > > ## check logLik > (tr_math_mob <- lmtree(pcorrect ~ group | ., data = MathExam, maxdepth = 2)) Linear model tree Model formula: pcorrect ~ group | . Fitted party: [1] root | [2] tests <= 84.61538: n = 546 | (Intercept) group2 | 51.767152 -2.048578 | [3] tests > 84.61538: n = 183 | (Intercept) group2 | 77.743590 -7.729345 Number of inner nodes: 1 Number of terminal nodes: 2 Number of parameters per node: 2 Objective function (residual sum of squares): 315116.8 > > logLik(bmod_math) 'log Lik.' -3321.166 (df=3) > logLik(tr_math) 'log Lik.' -3195.249 (df=12) > logLik(tr_math_mob) 'log Lik.' -3245.756 (df=7) > > sum(bmod_math$residuals^2) [1] 386684.1 > > ## varimp > of <- function(x, newdata = NULL, weights = NULL, + perm = NULL, ...) { + - objfun(x, newdata = newdata, weights = weights, perm = perm, sum = TRUE, ...) + } > varimp(tr_math, nperm = 2, risk = of) tests 170701.1 > > # deeper tree > w <- rep(1, nrow(Math_mx)) > w[5:100] <- 0 > tr_math_d <- pmtree(bmod_math, data = Math_mx, weights = w, + control = ctree_control(alpha = 0.7)) Warning message: In .prepare_args(model = model, data = data, zformula = zformula, : NAs in model variables (pcorrect, group). Omitting rows with NAs. > varimp(tr_math_d, risk = of) tests attempt gender study semester 234737.534 69050.999 11758.219 1055.642 2806.542 > > > ### check different formulas > set.seed(1212) > n <- 90 > d1 <- d2 <- d3 <- data.frame(y = abs(rnorm(n) + 5), x = rep(1:(n/15), each = 15), #1:n - 10, + trt = rep(1:3, each = n/3), z1 = rnorm(n)) > d2$trt <- factor(d2$trt) > d3$trt <- ordered(d3$trt) > > f <- list( + y ~ 1, + y ~ x, + y ~ trt, + y ~ trt + x, + y ~ trt + offset(x), + y ~ trt + x + offset(x), + y ~ trt + offset(as.numeric(trt)), + y ~ factor(trt), + y ~ factor(trt) + offset(x), + y ~ factor(x > as.numeric(trt)), + # y ~ interaction(x, trt), + y ~ 0 + trt + ) > > try_pmtree <- function(bmod, data) { + "pmtree" %in% class(tryCatch(pmtree(bmod, data = data), + error = function(e) e, + warning = function(w) w)) + # "pmtree" %in% class(try(pmtree(bmod, data = data), silent = TRUE)) + } > > run_lm <- function(formula, data, ...) { + eval(substitute(lm(fm, data = data, ...), list(fm = formula))) + } > > run_survreg <- function(formula, data, ...) { + eval(substitute(survreg(fm, data = data, ...), list(fm = formula))) + } > > run_coxph <- function(formula, data, ...) { + eval(substitute(coxph(fm, data = data, ...), list(fm = formula))) + } > > ## expected results > ok1 <- list(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE) > ok2 <- list(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE) > ok3 <- ok2 > > > ## checks with lm > lm1 <- lapply(f, run_lm, data = d1, model = FALSE) > identical(lapply(lm1, try_pmtree, data = d1), ok1) [1] TRUE > > lm2 <- lapply(f, run_lm, data = d2, model = FALSE) > identical(lapply(lm2, try_pmtree, data = d2), ok2) [1] TRUE > > lm3 <- lapply(f, run_lm, data = d3, model = FALSE) > identical(lapply(lm3, try_pmtree, data = d3), ok3) [1] TRUE > > ## checks with survreg > library("survival") > d1$y <- d2$y <- d3$y <- Surv(d1$y + 0.5) > > survreg1 <- lapply(f, run_survreg, data = d1, model = FALSE) Warning message: In survreg.fit(X, Y, weights, offset, init = init, controlvals = control, : Ran out of iterations and did not converge > identical(lapply(survreg1, try_pmtree, data = d1), ok1) [1] TRUE > > survreg2 <- lapply(f, run_survreg, data = d2, model = FALSE) > identical(lapply(survreg2, try_pmtree, data = d2), ok2) [1] TRUE > > survreg3 <- lapply(f, run_survreg, data = d3, model = FALSE) > identical(lapply(survreg3, try_pmtree, data = d3), ok3) [1] TRUE > > > > ## checks with coxph > coxph1 <- lapply(f, run_coxph, data = d1, model = FALSE) > identical(lapply(coxph1, try_pmtree, data = d1), ok1) [1] TRUE > > coxph2 <- lapply(f, run_coxph, data = d2, model = FALSE) > identical(lapply(coxph2, try_pmtree, data = d2), ok2) [1] TRUE > > coxph3 <- lapply(f, run_coxph, data = d3, model = FALSE) > identical(lapply(coxph3, try_pmtree, data = d3), ok3) [1] TRUE > > > > proc.time() user system elapsed 11.96 0.89 12.87