R Under development (unstable) (2023-12-18 r85702 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## Test for boot_nlme > require(nlme) Loading required package: nlme > require(nlraa) Loading required package: nlraa > require(car) Loading required package: car Loading required package: carData > > ## I only run this on my computer and occasionally > run.boot.test <- Sys.info()[["user"]] == "fernandomiguez" && FALSE > > data(barley, package = "nlraa") > set.seed(101) > ## Does Boot produce 'good' confidence intervals? > > if(run.boot.test){ + + fit.nls <- nls(yield ~ SSlinp(NF, a, b, xs), data = barley) + + ## Profiled confidence intervals + confint(fit.nls) + + ## Does bootstrap as implemented in car underestimates confidence intervals? + fit.nls.bt <- Boot(fit.nls) + ## It is not possible to parallelize the previous code, why? + + fit.gnls <- gnls(yield ~ SSlinp(NF, a, b, xs), data = barley) + + intervals(fit.gnls) + + ## This takes: 9.3 seconds + system.time(fit.gnls.bt <- boot_nlme(fit.gnls, R = 999, cores = 4)) + + ## This takes also <10 seconds + fit.gnls.bt2 <- boot_nlme(fit.gnls, R = 999, psim = 0, cores = 4) + + ## Compare confidence intervals + confint(fit.nls) ## nls profile + confint(fit.nls.bt) ## Boot + confint(fit.gnls.bt) ## boot_nlme psim = 1 + confint(fit.gnls.bt2) ## boot_nlme psim = 0 + ## These intervals do not agree exaclty because different assumptions + ## are made and I'm running bootstrap a relatively small number of times + } > > if(run.boot.test){ + + ## Simple test for barley and an object of class 'gnls' + ## Simplify the dataset to make the set up simpler + barley2 <- subset(barley, year < 1974) + + fit.lp.gnls2 <- gnls(yield ~ SSlinp(NF, a, b, xs), data = barley2) + + intervals(fit.lp.gnls2) + + ## Compare this to the bootstrapping approach + ## This take about ~13 seconds + system.time(fit.lp.gnls2.bt <- boot_nlme(fit.lp.gnls2, R = 2000, cores = 4)) + + summary(fit.lp.gnls2.bt) ## Bias is low, which is good + + confint(fit.lp.gnls2.bt, type = "perc") + + hist(fit.lp.gnls2.bt, 1, ci = "perc") + hist(fit.lp.gnls2.bt, 2, ci = "perc") + hist(fit.lp.gnls2.bt, 3, ci = "perc") + + ## Testing the bootstrap function with an object which + ## contains factors + + barley2$year.f <- as.factor(barley2$year) + + cfs <- coef(fit.lp.gnls2) + + fit.lp.gnls3 <- update(fit.lp.gnls2, + params = list(a + b + xs ~ year.f), + start = c(cfs[1], 0, 0, 0, + cfs[2], 0, 0, 0, + cfs[3], 0, 0, 0)) + + intervals(fit.lp.gnls3) + + ## This takes 13 seconds (Mac), not bad. Windows (21 seconds) + system.time(fit.lp.gnls3.bt <- boot_nlme(fit.lp.gnls3, R = 3000, cores = 4)) + + summary(fit.lp.gnls3.bt) + + confint(fit.lp.gnls3.bt, type = "perc") + + hist(fit.lp.gnls3.bt, 1, ci = "perc") + hist(fit.lp.gnls3.bt, 2, ci = "perc") + hist(fit.lp.gnls3.bt, 3, ci = "perc") + + ## Testing the function for 'nlme'. + barley$year.f <- as.factor(barley$year) + + barleyG <- groupedData(yield ~ NF | year.f, data = barley) + + fitL.bar <- nlsList(yield ~ SSlinp(NF, a, b, xs), data = barleyG) + + fit.bar.nlme <- nlme(fitL.bar, random = pdDiag(a + b + xs ~ 1)) + + plot(augPred(fit.bar.nlme, level = 0:1)) + ## Confidence intervals of the model fixed parameters + intervals(fit.bar.nlme, which = "fixed") + + ## Bootstrap for the asymptote + fna <- function(x) fixef(x)[1] + fixef(x)[2] * fixef(x)[3] + + ## This takes much longer... 215-237 seconds on my laptop + system.time(fit.bar.nlme.bt <- boot_nlme(fit.bar.nlme, f = fna, R = 2000, cores = 4)) + + confint(fit.bar.nlme.bt, type = "perc") + + ## This is good because the estimate on the original data is + ## 348.9912, which is in the middle of the confidence interval + hist(fit.bar.nlme.bt, 1, ci = "perc") + + } > > ## Trying to fit the model using different approaches > ## This time with the SSasymp function > > if(run.boot.test){ + + fmL1 <- nlsList(yield ~ SSasymp(NF, Asym, R0, lrc), data = barleyG) + + fm1 <- nlme(fmL1) + + fm2 <- update(fm1, random = pdDiag(Asym + R0 + lrc ~ 1)) + ## Arguably the second model is better + + ## fm3 <- nlmer(yield ~ SSasymp(NF, Asym, R0, lrc) ~ (Asym | year.f) + (R0 | year.f), data = barley, + ## start = getInitial(yield ~ SSasymp(NF, Asym, R0, lrc), data = barley)) + ## This does not fail now, but it did before. + + ## This is very slow... 17 seconds for 10 attempts + ## system.time(fm1.bt <- boot_nlme(fm1, R = 10, cores = 4)) + ## What about the second model? + ## This one takes about 39 sec (Mac and Windows?) + system.time(fm2.bt <- boot_nlme(fm2, R = 1000, cores = 4)) + + summary(fm2.bt) + + confint(fm2.bt) + + hist(fm2.bt, 1) + hist(fm2.bt, 2) + hist(fm2.bt, 3) + + ## I feel validated! These intervals are very, very close to the ones + ## obtained through normal approximation + ## I wrote the previous statement a long time aga, I don't know + ## if it is still true (2020-5-22) + + ## rstanarm failed with this example. Chains did not converge (2020-05-22) + ## fm3 <- stan_nlmer(yield ~ SSasymp(NF, Asym, R0, lrc) ~ Asym + R0 + lrc | year.f, + ## data = barley, cores = 2) + + ## What is the impact of psim = 2 vs. psim = 3 on bootstrapping? + data(barley, package = "nlraa") + barley$year.f <- as.factor(barley$year) + barleyG <- groupedData(yield ~ NF | year.f, data = barley) + fmL1 <- nlsList(yield ~ SSasymp(NF, Asym, R0, lrc), data = barleyG) + fm1 <- nlme(fmL1) + fm2 <- update(fm1, random = pdDiag(Asym + R0 + lrc ~ 1)) + fm3 <- update(fm1, random = pdDiag(Asym + R0 ~ 1)) + + anova(fm2, fm3) + ## Bootstrap the random effects + vcov_est <- function(x) diag(var_cov(x, type = "random")) + + system.time(fm3.vc.bt2 <- boot_nlme(fm3, vcov_est, psim = 2, cores = 4)) + + fm3.vc.bt2 + + hist(fm3.vc.bt2, 1, ci = "perc") + hist(fm3.vc.bt2, 2, ci = "perc") + + system.time(fm3.vc.bt3 <- boot_nlme(fm3, vcov_est, psim = 3, cores = 4)) + + fm3.vc.bt3 + + hist(fm3.vc.bt3, 1, ci = "perc") + hist(fm3.vc.bt3, 2, ci = "perc") + + ## Does this show that this method results in biased estimates of the + ## variance components? Bootstrapping has substantial bias + ## Is it because I attempt to fix the variance (I use empirical = TRUE in MASS::rmvnorm) + ## I could either set empirical to FALSE or simulate new variance components... + + } > > proc.time() user system elapsed 1.20 0.18 1.37