R Under development (unstable) (2024-07-17 r86903 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library("ipred") > library("mlbench") > library("MASS") > library("survival") > > suppressWarnings(RNGversion("3.5.3")) > > actversion <- paste(R.version$major, R.version$minor, sep=".") > thisversion <- "1.7.0" > > #if (compareVersion(actversion, thisversion) >= 0) { > # RNGversion("1.6.2") > #} > set.seed(29081975) > > > # Classification > > learn <- as.data.frame(mlbench.twonorm(200)) > test <- as.data.frame(mlbench.twonorm(100)) > > # bagging > > mod <- bagging(classes ~ ., data=learn, coob=TRUE, nbagg=10) > mod Bagging classification trees with 10 bootstrap replications Call: bagging.data.frame(formula = classes ~ ., data = learn, coob = TRUE, nbagg = 10) Out-of-bag estimate of misclassification error: 0.195 > predict(mod)[1:10] [1] 1 1 1 2 2 2 1 1 1 1 Levels: 1 2 > > # Double-Bagging > > comb.lda <- list(list(model=lda, predict=function(obj, newdata) + predict(obj, newdata)$x)) > > mod <- bagging(classes ~ ., data=learn, comb=comb.lda, nbagg=10) > mod Bagging classification trees with 10 bootstrap replications Call: bagging.data.frame(formula = classes ~ ., data = learn, comb = comb.lda, nbagg = 10) > predict(mod, newdata=test[1:10,]) [1] 1 1 1 1 1 2 1 2 1 2 Levels: 1 2 > predict(mod, newdata=test[1:10,], agg="aver") [1] 1 1 1 1 2 2 1 2 1 2 Levels: 1 2 > predict(mod, newdata=test[1:10,], agg="wei") [1] 1 1 1 1 2 2 1 2 1 2 Levels: 1 2 > predict(mod, newdata=test[1:10,], type="prob") 1 2 [1,] 1.0 0.0 [2,] 0.9 0.1 [3,] 1.0 0.0 [4,] 1.0 0.0 [5,] 0.5 0.5 [6,] 0.0 1.0 [7,] 1.0 0.0 [8,] 0.1 0.9 [9,] 1.0 0.0 [10,] 0.0 1.0 > predict(mod, newdata=test[1:10,], type="prob", agg="aver") 1 2 [1,] 1.0 0.0 [2,] 0.9 0.1 [3,] 1.0 0.0 [4,] 1.0 0.0 [5,] 0.5 0.5 [6,] 0.0 1.0 [7,] 1.0 0.0 [8,] 0.1 0.9 [9,] 1.0 0.0 [10,] 0.0 1.0 > predict(mod, newdata=test[1:10,], type="prob", agg="wei") 1 2 [1,] 1.000000000 0.000000000 [2,] 0.996441281 0.003558719 [3,] 1.000000000 0.000000000 [4,] 1.000000000 0.000000000 [5,] 0.484359233 0.515640767 [6,] 0.000000000 1.000000000 [7,] 1.000000000 0.000000000 [8,] 0.001138952 0.998861048 [9,] 1.000000000 0.000000000 [10,] 0.000000000 1.000000000 > > mypredict.lda <- function(object, newdata) + predict(object, newdata = newdata)$class > > errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda) Call: errorest.data.frame(formula = classes ~ ., data = learn, model = lda, predict = mypredict.lda) 10-fold cross-validation estimator of misclassification error Misclassification error: 0.035 > errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, + est.para=control.errorest(k=5, random=FALSE)) Call: errorest.data.frame(formula = classes ~ ., data = learn, model = lda, predict = mypredict.lda, est.para = control.errorest(k = 5, random = FALSE)) 5-fold cross-validation estimator of misclassification error Misclassification error: 0.04 > > lapply(errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, + est.para=control.errorest(k=5, random=FALSE, getmodels=TRUE))$models, class) [[1]] [1] "lda" [[2]] [1] "lda" [[3]] [1] "lda" [[4]] [1] "lda" [[5]] [1] "lda" > errorest(classes ~ ., data=learn, model=bagging, + est.para=control.errorest(k=2), nbagg=10) Call: errorest.data.frame(formula = classes ~ ., data = learn, model = bagging, est.para = control.errorest(k = 2), nbagg = 10) 2-fold cross-validation estimator of misclassification error Misclassification error: 0.12 > errorest(classes ~ ., data=learn, model=bagging, + est.para=control.errorest(k=2), nbagg=10, comb=comb.lda) Call: errorest.data.frame(formula = classes ~ ., data = learn, model = bagging, est.para = control.errorest(k = 2), nbagg = 10, comb = comb.lda) 2-fold cross-validation estimator of misclassification error Misclassification error: 0.055 > errorest(classes ~ ., data=learn, model=lda, + predict=mypredict.lda, estimator="boot") Call: errorest.data.frame(formula = classes ~ ., data = learn, model = lda, predict = mypredict.lda, estimator = "boot") Bootstrap estimator of misclassification error with 25 bootstrap replications Misclassification error: 0.038 Standard deviation: 0.0023 > errorest(classes ~ ., data=learn, model=lda, + predict=mypredict.lda, estimator="632plus") Call: errorest.data.frame(formula = classes ~ ., data = learn, model = lda, predict = mypredict.lda, estimator = "632plus") .632+ Bootstrap estimator of misclassification error with 25 bootstrap replications Misclassification error: 0.0336 > > # Regression > > learn <- as.data.frame(mlbench.friedman1(100)) > test <- as.data.frame(mlbench.friedman1(100)) > > # bagging > > mod <- bagging(y ~ ., data=learn, coob=TRUE, nbagg=10) > mod Bagging regression trees with 10 bootstrap replications Call: bagging.data.frame(formula = y ~ ., data = learn, coob = TRUE, nbagg = 10) Out-of-bag estimate of root mean squared error: 3.0773 > predict(mod)[1:10] [1] 13.367299 15.465575 10.728200 21.426540 16.379882 16.992572 NA [8] 12.899667 8.096002 16.070858 > > predict(mod, newdata=test[1:10,]) [1] 13.90686 14.94293 12.94732 11.67369 16.63664 14.42252 16.01717 12.34594 [9] 11.96764 22.14124 > predict(mod, newdata=test[1:10,], agg="aver") [1] 13.90686 14.94293 12.94732 11.67369 16.63664 14.42252 16.01717 12.34594 [9] 11.96764 22.14124 > predict(mod, newdata=test[1:10,], agg="wei") [1] 13.96527 14.95040 12.92685 11.61045 16.74963 14.59937 16.46226 12.44494 [9] 12.20556 22.00779 > errorest(y ~ ., data=learn, model=lm) Call: errorest.data.frame(formula = y ~ ., data = learn, model = lm) 10-fold cross-validation estimator of root mean squared error Root mean squared error: 2.7385 > errorest(y ~ ., data=learn, model=lm, + est.para=control.errorest(k=5, random=FALSE)) Call: errorest.data.frame(formula = y ~ ., data = learn, model = lm, est.para = control.errorest(k = 5, random = FALSE)) 5-fold cross-validation estimator of root mean squared error Root mean squared error: 2.7941 > lapply(errorest(y ~ ., data=learn, model=lm, + est.para=control.errorest(k=5, random=FALSE, getmodels=TRUE))$models, class) [[1]] [1] "lm" [[2]] [1] "lm" [[3]] [1] "lm" [[4]] [1] "lm" [[5]] [1] "lm" > errorest(y ~ ., data=learn, model=lm, estimator="boot") Call: errorest.data.frame(formula = y ~ ., data = learn, model = lm, estimator = "boot") Bootstrap estimator of root mean squared error with 25 bootstrap replications Root mean squared error: 2.7966 > > # survival > > learn <- rsurv(100, model="C") > test <- rsurv(100, model="C") > > mod <- bagging(Surv(time, cens) ~ ., data=learn, nbagg=10) > mod Bagging survival trees with 10 bootstrap replications Call: bagging.data.frame(formula = Surv(time, cens) ~ ., data = learn, nbagg = 10) > predict(mod, newdata=test[1:10,]) [[1]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 117 117 0.0751 0.064 0.123 [[2]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 134 134 0.028 0.0247 0.0321 [[3]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 142 142 0.215 0.179 0.241 [[4]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 131 131 0.0216 0.0205 0.028 [[5]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 145 145 0.217 0.168 0.251 [[6]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 105 105 0.0476 0.0421 0.0496 [[7]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 125 125 0.181 0.156 0.218 [[8]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 139 139 0.179 0.156 0.217 [[9]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 131 131 0.181 0.179 0.251 [[10]] Call: survfit(formula = Surv(agglsample[[j]], aggcens[[j]]) ~ 1) n events median 0.95LCL 0.95UCL [1,] 120 120 0.028 0.0247 0.0395 > > #errorest(Surv(time, cens) ~ ., data=learn, model=bagging, > # est.para=list(k=2, random=FALSE), nbagg=5) > #errorest(Surv(time, cens) ~ ., data=learn, model=bagging, > # estimator="boot", nbagg=5, est.para=list(nboot=5)) > #insert control.errorest > errorest(Surv(time, cens) ~ ., data=learn, model=bagging, + est.para=control.errorest(k=2, random=FALSE), nbagg=5) Call: errorest.data.frame(formula = Surv(time, cens) ~ ., data = learn, model = bagging, est.para = control.errorest(k = 2, random = FALSE), nbagg = 5) 2-fold cross-validation estimator of Brier's score Brier's score: 0.0874 > errorest(Surv(time, cens) ~ ., data=learn, model=bagging, + estimator="boot", nbagg=5, est.para=control.errorest(nboot=5)) Call: errorest.data.frame(formula = Surv(time, cens) ~ ., data = learn, model = bagging, estimator = "boot", est.para = control.errorest(nboot = 5), nbagg = 5) Bootstrap estimator of Brier's score with 5 bootstrap replications Brier's score: 0.0956 > > #lapply(errorest(Surv(time, cens) ~ ., data=learn, model=bagging, > # estimator="cv", nbagg=1, est.para=list(k=2, random=FALSE, > # getmodels=TRUE))$models, class) > #insert control.errorest > lapply(errorest(Surv(time, cens) ~ ., data=learn, model=bagging, + estimator="cv", nbagg=1, est.para=control.errorest(k=2, random=FALSE, + getmodels=TRUE))$models, class) [[1]] [1] "survbagg" [[2]] [1] "survbagg" > > # bundling for regression > > learn <- as.data.frame(mlbench.friedman1(100)) > test <- as.data.frame(mlbench.friedman1(100)) > > comb <- list(list(model=lm, predict=predict.lm)) > > modc <- bagging(y ~ ., data=learn, nbagg=10, comb=comb) > modc Bagging regression trees with 10 bootstrap replications Call: bagging.data.frame(formula = y ~ ., data = learn, nbagg = 10, comb = comb) > predict(modc, newdata=learn)[1:10] [1] 7.192725 20.401036 7.607863 14.909765 15.721930 8.544955 16.968957 [8] 16.818052 12.692932 14.218597 > > # bundling for survival > > while(FALSE) { + data("GBSG2", package = "ipred") + rcomb <- list(list(model=coxph, predict=predict)) + + mods <- bagging(Surv(time,cens) ~ ., data=GBSG2, nbagg=10, + comb=rcomb, control=rpart.control(xval=0)) + predict(mods, newdata=GBSG2[1:3,]) + + # test for method dispatch on integer valued responses + y <- sample(1:100, 100) + class(y) + x <- matrix(rnorm(100*5), ncol=5) + mydata <- as.data.frame(cbind(y, x)) + + cv(y, y ~ ., data=mydata, model=lm, predict=predict) + bootest(y, y ~ ., data=mydata, model=lm, predict=predict) + bagging(y ~., data=mydata, nbagg=10) + } > > proc.time() user system elapsed 4.96 0.39 5.35