R Under development (unstable) (2023-12-18 r85702 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## Testing simulate_nlme levels > require(nlme) Loading required package: nlme > require(nlraa) Loading required package: nlraa > require(ggplot2) Loading required package: ggplot2 > require(car) Loading required package: car Loading required package: carData > > run.test.simulate.nlme <- Sys.info()[["user"]] == "fernandomiguez" && FALSE > > if(run.test.simulate.nlme){ + + data(Orange) + + fitL <- nlsList(circumference ~ SSlogis(age, Asym, xmid, scal), data = Orange) + + fmm <- nlme(fitL, random = pdDiag(Asym + xmid + scal ~ 1)) + + ## This first simulation is at the level of population (fixed, level = 0) + ## It also means that psim = 0, so there is no sampling from mvrnorm + sim00 <- simulate_nlme(fmm, nsim = 100, psim = 0, level = 0, value = "data.frame") + ## Level one means that we simlate at the level of individual Tree (or BLUP in this case) + ## But psim = 0, so this is the same as predict(x, level = 0) + sim01 <- simulate_nlme(fmm, nsim = 100, psim = 0, level = 1, value = "data.frame") + + ## What if I simulate? + sim10 <- simulate_nlme(fmm, nsim = 100, psim = 1, level = 0, value = "data.frame") + sim11 <- simulate_nlme(fmm, nsim = 100, psim = 1, level = 1, value = "data.frame") + + ## Level 2? + ## Not sure if it is correct, but it seems to work as intended + ## The line below does not make sense because we are adding error + ## at the population level + ## sim20 <- simulate_nlme(fmm, nsim = 100, psim = 2, level = 0) + sim21 <- simulate_nlme(fmm, nsim = 100, psim = 2, level = 1, value = "data.frame") + + ggplot(data = sim00, aes(x = age, y = circumference)) + + geom_point() + + geom_line(aes(y = sim.y, group = ii)) + + ggtitle("psim = 0, level = 0") + + ggplot(data = sim01) + + geom_point(aes(x = age, y = circumference, color = Tree)) + + geom_line(aes(y = sim.y, x = age, color = Tree)) + + theme(legend.position = "none") + + ggtitle("psim = 0, level = 1") + + ggplot(data = sim10) + + geom_line(aes(y = sim.y, x = age, group = ii), color = "gray", alpha = 0.4) + + geom_point(aes(x = age, y = circumference)) + + theme(legend.position = "none") + + ggtitle("psim = 1, level = 0") + + sim11$Tree_ID <- with(sim11, paste0(Tree,"_",ii)) + + ggplot(data = sim11) + + facet_wrap(~ Tree) + + geom_line(aes(y = sim.y, x = age, color = Tree, group = Tree_ID), alpha = 0.4) + + geom_point(aes(x = age, y = circumference)) + + theme(legend.position = "none") + + ggtitle("psim = 1, level = 1") + + ## Test with Soybean example + data(Soybean) + + fmsL <- nlsList(SSlogis, Soybean) + fmsm <- nlme(fmsL) + + fxf <- fixef(fmsm) + fmsm2 <- update(fmsm, fixed = list(Asym + xmid + scal ~ Variety), + start = c(fxf[1], 0, fxf[2], 0, fxf[3], 0), + random = pdDiag(Asym + xmid + scal ~ 1)) + + fmsm.s <- simulate_nlme(fmsm2, nsim = 100, value = "data.frame") + + fmsm.s$ID <- with(fmsm.s, paste0(ii,"_",Plot)) + + ggplot(data = fmsm.s, aes(x = Time, y = weight)) + + facet_grid(Variety ~ Year) + + geom_line(aes(x = Time, y = sim.y, group = ID, color = Variety), alpha = 0.5) + + geom_point() + + ## Test with CO2 example + data(CO2) + + fmL <- nlsList(SSasymp, CO2) + fmcm <- nlme(fmL, random = pdDiag(Asym + R0 ~ 1)) + + fxf <- fixef(fmcm) + fmcm2 <- update(fmcm, fixed = list(Asym + R0 + lrc ~ Treatment), + start = c(fxf[1], 0, fxf[2], 0, fxf[3], 0)) + + fmcm2.s <- simulate_nlme(fmcm2, nsim = 100, value = "data.frame") + + fmcm2.s$ID <- with(fmcm2.s, paste0(ii,"_",Plant)) + + ggplot(data = fmcm2.s, aes(x = conc, y = uptake)) + + facet_grid(Type ~ Treatment) + + geom_line(aes(x = conc, y = sim.y, group = ID, color = Treatment), alpha = 0.5) + + geom_point() + + ## What about bootstrap? + system.time(fmcm2.bt <- boot_nlme(fmcm2, cores = 4)) ## 35 seconds on my laptop MacBook Pro + + hist(fmcm2.bt, 2) + + ## predictions + ## Incorporate the fixed effect of type + fxf2 <- fixef(fmcm2) + fmcm3 <- update(fmcm2, fixed = list(Asym + R0 + lrc ~ Treatment + Type), + start = c(fxf2[1:2], 0, fxf2[3:4], 0, fxf2[5:6], 0), + weights = varPower()) + + fxf3 <- fixef(fmcm3) + fmcm4 <- update(fmcm3, fixed = list(Asym + R0 + lrc ~ Treatment + Type + Treatment:Type), + start = c(fxf3[1:3], 0, fxf3[4:6], 0, fxf3[7:9], 0)) + + prd_fun <- function(x) predict(x, level = 0) + + fmcm4.bt2 <- boot_nlme(fmcm4, prd_fun, cores = 4) + + CO2$lwr <- apply(t(fmcm4.bt2$t), 1, quantile, probs = 0.05, na.rm = TRUE) + CO2$upr <- apply(t(fmcm4.bt2$t), 1, quantile, probs = 0.95, na.rm = TRUE) + + CO2$ID <- with(CO2, paste0(Treatment,"_",Type)) + + ggplot(data = CO2, aes(x = conc, y = uptake)) + + facet_grid( Type ~ Treatment) + + geom_ribbon(aes(x = conc, ymin = lwr, ymax = upr, group = ID, fill = Treatment), alpha = 0.5) + + geom_point() + + ## Trying the nested approach + fmcm4 <- update(fmcm2, random = list(Type = pdDiag(Asym + R0 ~ 1), + Plot = list(Asym + R0 ~ 1)), + groups = ~Type/Plant) + + prd0 <- predict(fmcm4, newdata = CO2, level = 1) + + fmcm4.s <- simulate_nlme(fmcm4, nsim = 100, value = "data.frame") + ## This shows that while predict.nlme fails, simulate_nlme works + ## (or seems to work) + + fmcm4.s$ID <- with(fmcm4.s, paste0(ii,"_",Plant)) + + ggplot(data = fmcm4.s, aes(x = conc, y = uptake)) + + facet_grid( Type ~ Treatment) + + geom_line(aes(x = conc, y = sim.y, group = ID, color = Treatment), alpha = 0.5) + + geom_point() + + ## GAMS don't quite get it right + library(mgcv) + CO2$id <- as.factor(with(CO2, paste0(Type, "_", Treatment))) + fit.gam <- gam(uptake ~ Type*Treatment + s(conc, k = 3, by = id), data = CO2) + # + ggplot(data = CO2, aes(x = conc, y = uptake)) + + facet_grid( Type ~ Treatment) + + geom_line(aes(y = fitted(fit.gam))) + + geom_point() + + ## Compared to gnls model + fit.lis <- nlsLMList(uptake ~ SSnrh(conc, asym, phi, theta, rd), data = CO2) + + plot(fit.lis) + + fmco2 <- nlme(fit.lis, random = pdDiag(asym + phi + rd ~ 1)) + + fmco2.2 <- update(fmco2, fixed = asym + phi + theta + rd ~ Type, + start = c(fixef(fmco2)[1], 0, + fixef(fmco2)[2], 0, + fixef(fmco2)[3], 0, + fixef(fmco2)[4], 0)) + + fmco2.3 <- update(fmco2, fixed = asym + phi + theta + rd ~ Type + Treatment, + start = c(fixef(fmco2.2)[1:2], 0, + fixef(fmco2.2)[3:4], 0, + fixef(fmco2.2)[5:6], 0, + fixef(fmco2.2)[7:8], 0)) + + ## Turns out this model has too many parameters and they are not stable + ## for this reason the predictions are all over the place + fmco2.4 <- update(fmco2, fixed = asym + phi + theta + rd ~ Type + Treatment + Type:Treatment, + start = c(fixef(fmco2.3)[1:3], 0, + fixef(fmco2.3)[4:6], 0, + fixef(fmco2.3)[7:9], 0, + fixef(fmco2.3)[10:12], 0)) + + prd <- predict_nlme(fmco2.4, interval = "conf", level = 0.9) + CO2A <- cbind(CO2, prd) + + CO2AS <- subset(CO2A, Type == "Quebec") + ggplot(data = CO2AS, aes(x = conc, y = uptake)) + + facet_grid( Type ~ Treatment) + + geom_line(aes(y = Estimate)) + + geom_ribbon(aes(ymin = Q5, ymax = Q95), fill = "purple", alpha = 0.3) + + geom_point() + + ## A simpler model is better + fmco2.5 <- update(fmco2.4, fixed = list(asym + phi ~ Type + Treatment + Type:Treatment, + theta + rd ~ 1), + start = c(fixef(fmco2.3)[1:3], 0, + fixef(fmco2.3)[4:6], 0, + fixef(fmco2.3)[7], + fixef(fmco2.3)[10])) + + prd <- predict_nlme(fmco2.5, interval = "conf", level = 0.9) + CO2A <- cbind(CO2, prd) + + ggplot(data = CO2A, aes(x = conc, y = uptake)) + + facet_grid( Type ~ Treatment) + + geom_line(aes(y = Estimate)) + + geom_ribbon(aes(ymin = Q5, ymax = Q95), fill = "purple", alpha = 0.3) + + geom_point() + + fit.gnls <- gnls(uptake ~ SSnrh(conc, asym, phi, theta, rd), data = CO2, + params = list(asym + phi ~ Type*Treatment, theta + rd ~ 1), + start = c(35.35, 0, 0, 0, 0.1577, 0, 0, 0, 0.955, 2.35)) + + ## This shows that a 'good' nls model is better than a 'GAM' but 'gams' + ## are much, much more flexible + IC_tab(fit.gnls, fit.gam) + + ## Testing the implementation of psim = 3 for nlme objects + ## Picking up the previous Soybean model + data(Soybean) + + fmsL <- nlsList(SSlogis, Soybean) + fmsm <- nlme(fmsL) + + fxf <- fixef(fmsm) + fmsm2 <- update(fmsm, fixed = list(Asym + xmid + scal ~ Variety), + start = c(fxf[1], 0, fxf[2], 0, fxf[3], 0), + random = pdDiag(Asym + xmid + scal ~ 1)) + + ## Ok, this seems to work + sim2.psim2 <- simulate_nlme(fmsm2, nsim = 1, + psim = 2, level = 1, + value = "data.frame") + + sim2.psim2$id <- with(sim2.psim2, paste(Plot, Variety, sep = "_")) + + ggplot(data = sim2.psim2, aes(x = Time, y = weight)) + + geom_line(aes(x = Time, y = sim.y, group = id), color = "gray", alpha = 0.4) + + geom_point() + + ## psim = 3 is not specific for a given subject + sim2.psim3 <- simulate_nlme(fmsm2, nsim = 10, + psim = 3, level = 1, + value = "data.frame") + + sim2.psim3$id <- with(sim2.psim3, paste(Plot, Variety, ii, sep = "_")) + + ggplot(data = sim2.psim3, aes(x = Time, y = weight)) + + geom_line(aes(x = Time, y = sim.y, group = id), color = "gray", alpha = 0.3) + + geom_point() + + ## Since psim = 3 is not specific to an individual subject + ## we should not expect for the lines to vary around the observed data + ## in each individual panel. If that is what you want then + ## you need to use psim = 2 + ggplot(data = sim2.psim3, aes(x = Time, y = weight)) + + facet_wrap(~ Variety * Plot) + + geom_line(aes(x = Time, y = sim.y, group = ii), color = "gray", alpha = 0.3) + + geom_point() + + fmsm3 <- update(fmsm2, random = list(Year = pdDiag(Asym + xmid + scal ~ 1), + Plot = pdDiag(Asym + xmid + scal ~ 1)), + groups = ~Year/Plot) + + ## I won't check if this gives you the right answer at the moment, but it runs + sim3.psim3 <- simulate_nlme(fmsm3, nsim = 10, psim = 3, value = "data.frame") + + sim3.psim3$id <- with(sim3.psim3, paste(Plot, Variety, Year, ii, sep = "_")) + + ggplot(data = sim3.psim3, aes(x = Time, y = weight)) + + geom_point() + + geom_line(aes(x = Time, y = sim.y, group = id), alpha = 0.1) + + ggplot(data = sim3.psim3, aes(x = Time, y = weight)) + + facet_wrap(~ Year * Variety) + + geom_point() + + geom_line(aes(x = Time, y = sim.y, group = id), alpha = 0.1) + + varT <- var(Soybean$weight) + + sim3.psim2.2 <- simulate_nlme(fmsm3, nsim = 2e3, psim = 2) + sim3.psim3.2 <- simulate_nlme(fmsm3, nsim = 2e3, psim = 3) + + varT.psim2 <- apply(sim3.psim2.2, 2, var, na.rm = TRUE) + varT.psim3 <- apply(sim3.psim3.2, 2, var, na.rm = TRUE) + + ggplot() + + geom_density(aes(x = varT.psim2, color = "psim = 2")) + + geom_density(aes(x = varT.psim3, color = "psim = 3")) + + geom_vline(xintercept = varT) + ## Unexpectedly, this shows that the variance for psim = 3 + ## is lower than the variance for psim2, perhaps I need to do more simulations? + ## Try a different dataset + + data(Orange) + fitL <- nlsList(circumference ~ SSlogis(age, Asym, xmid, scal), data = Orange) + fmm <- nlme(fitL, random = pdDiag(Asym + xmid + scal ~ 1)) + + varT <- var(Orange$circumference) + fmm.sim.psim2 <- simulate_nlme(fmm, nsim = 1e3, psim = 2) + fmm.sim.psim3 <- simulate_nlme(fmm, nsim = 1e3, psim = 3) + + varT.psim2 <- apply(fmm.sim.psim2, 2, var, na.rm = TRUE) + varT.psim3 <- apply(fmm.sim.psim3, 2, var, na.rm = TRUE) + + ggplot() + + geom_density(aes(x = varT.psim2, color = "psim = 2")) + + geom_density(aes(x = varT.psim3, color = "psim = 3")) + + geom_vline(xintercept = varT) + + ## Prediction Interval for the mean of a NEW Tree + nprd <- simulate_nlme(fmm, nsim = 1e3, psim = 3, value = "data.frame") + + ## First step: remove the observation-level variation + nprdA1 <- aggregate(sim.y ~ ii + age + Tree, data = nprd, FUN = mean) + + nprdA <- aggregate(sim.y ~ age, data = nprdA1, FUN = median) + nprdA$lwr <- aggregate(sim.y ~ age, data = nprdA1, FUN = quantile, probs = 0.05)$sim.y + nprdA$upr <- aggregate(sim.y ~ age, data = nprdA1, FUN = quantile, probs = 0.95)$sim.y + + ggplot() + + geom_point(data = Orange, aes(x = age, y = circumference, color = Tree)) + + geom_line(data = nprdA, aes(x = age, y = sim.y)) + + geom_ribbon(data = nprdA, aes(x = age, ymin = lwr, ymax = upr), alpha = 0.3) + + ggtitle("90% Prediction Interval for a NEW Tree regression") + } > > proc.time() user system elapsed 1.92 0.26 2.17