R Under development (unstable) (2023-12-18 r85702 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Testing simulate_gam > require(ggplot2) Loading required package: ggplot2 > require(nlraa) Loading required package: nlraa > require(nlme) Loading required package: nlme > require(mgcv) Loading required package: mgcv This is mgcv 1.9-0. For overview type 'help("mgcv-package")'. > > if(Sys.info()[["user"]] == "fernandomiguez"){ + + y <- c(12, 14, 33, 50, 67, 74, 123, 141, 165, 204, 253, 246, 240) + t <- 1:13 + + dat <- data.frame(y = y, t = t) + + ggplot(data = dat, aes(x = t, y = y)) + geom_point() + + ## From page 132 form GAM book by Simon Wood + m1 <- gam(y ~ t + I(t^2), data = dat, family = poisson) + + ggplot(data = dat, aes(x = t, y = y)) + + geom_point() + + geom_line(aes(y = fitted(m1))) + + m1.sim <- simulate_gam(m1, nsim = 1e3) + + m1.sims <- summary_simulate(m1.sim) + datA <- cbind(dat, m1.sims) + + ggplot(data = datA, aes(x = t, y = y)) + + geom_point() + + geom_line(aes(y = Estimate)) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + + ggtitle("Simulate + summary_simulate method") + + ## Compare to native method from mgcv package + m1.prd <- predict(m1, se.fit = TRUE, type = "response") + + m1.prdd <- data.frame(dat, prd = m1.prd$fit, + lwr = m1.prd$fit - 1.96 * m1.prd$se.fit, + upr = m1.prd$fit + 1.96 * m1.prd$se.fit) + + ggplot(data = m1.prdd, aes(x = t, y = y)) + + geom_point() + + geom_line(aes(y = prd)) + + geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "purple", alpha = 0.3) + + ggtitle("Built-in predict.gam method") + + ## Prediction. The default method uses 'simulate.lm' under the hood + m1.simP <- simulate_gam(m1, nsim = 1e3, psim = 2) + + m1.simPs <- summary_simulate(m1.simP) + datAP <- cbind(dat, m1.simPs) + + ggplot(data = datAP, aes(x = t, y = y)) + + geom_point() + + geom_line(aes(y = Estimate)) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + + ## This method includes the uncertainty in the coefficients and the residuals + m1.simP2 <- simulate_gam(m1, nsim = 1e3, psim = 2, resid.type = "resample") + + m1.simP2s <- summary_simulate(m1.simP2) + datAP2 <- cbind(dat, m1.simP2s) + + ggplot(data = datAP2, aes(x = t, y = y)) + + geom_point() + + geom_line(aes(y = Estimate)) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + + ## Wild residuals + m1.simPW <- simulate_gam(m1, nsim = 1e3, psim = 2, resid.type = "wild") + + m1.simPWs <- summary_simulate(m1.simPW) + datAPW <- cbind(dat, m1.simPWs) + + ggplot(data = datAPW, aes(x = t, y = y)) + + geom_point() + + geom_line(aes(y = Estimate)) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + + ## Dataset Soybean + data(Soybean) + + fms.G <- gam(weight ~ Time + s(Time), data = Soybean) + fms.C <- lm(weight ~ Time + I(Time^2) + I(Time^3), data = Soybean) + fms.B <- nls(weight ~ SSbgrp(Time, w.max, lt.e, ldt), data = Soybean) + + ## AIC strongly favors GAM and BIC strongly favors beta + IC_tab(fms.G, fms.C, fms.B, criteria = "AIC") + IC_tab(fms.G, fms.C, fms.B, criteria = "BIC") + + ggplot(data = Soybean, aes(x = Time, y = weight)) + + geom_point() + + geom_line(aes(y = fitted(fms.C), color = "Cubic")) + + geom_line(aes(y = fitted(fms.G), color = "GAM")) + + geom_line(aes(y = fitted(fms.B), color = "Beta")) + + ## Prediction based on GAM + prd <- predict_gam(fms.G, interval = "confidence") + + SoybeanAG <- cbind(Soybean, prd) + + ggplot(data = SoybeanAG, aes(x = Time, y = weight)) + + geom_point() + + geom_line(aes(y = fitted(fms.G), color = "GAM")) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + + ## Prediction based on beta + prdc <- predict_nls(fms.B, interval = "confidence") + prdp <- predict_nls(fms.B, interval = "prediction") + colnames(prdp) <- paste0("p", c("Estimate", "Est.Error", "Q2.5", "Q97.5")) + + SoybeanAB <- cbind(Soybean, prdc, prdp) + + ggplot(data = SoybeanAB, aes(x = Time, y = weight)) + + geom_point() + + geom_line(aes(y = fitted(fms.B), color = "beta")) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + + geom_ribbon(aes(ymin = pQ2.5, ymax = pQ97.5), fill = "purple", alpha = 0.2) + + ## Clearly a gnls model would be better + fms.Bg <- gnls(weight ~ SSbgrp(Time, w.max, lt.e, ldt), data = Soybean, + weights = varPower()) + + IC_tab(fms.G, fms.C, fms.B, fms.Bg, criteria = "AIC") + + prdc.bg <- predict_nlme(fms.Bg, interval = "confidence") + prdp.bg <- predict_nlme(fms.Bg, interval = "prediction") + colnames(prdp.bg) <- paste0("p", c("Estimate", "Est.Error", "Q2.5", "Q97.5")) + + SoybeanBG <- cbind(Soybean, prdc.bg, prdp.bg) + + ggplot(data = SoybeanBG, aes(x = Time, y = weight)) + + geom_point() + + geom_line(aes(y = fitted(fms.Bg), color = "mean")) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5, color = "confidence"), fill = "purple", alpha = 0.3) + + geom_ribbon(aes(ymin = pQ2.5, ymax = pQ97.5, color = "prediction"), fill = "purple", alpha = 0.2) + + ggtitle("Beta growth with increasing variance is the best model") + + ## Looking at data barley + data(barley) + fgb0 <- gam(yield ~ s(NF, k = 5), data = barley) + + fgb0p <- predict_gam(fgb0, interval = "conf") + barleyA <- cbind(barley, fgb0p) + + prd <- predict(fgb0, se = TRUE) + barleyG <- barley + barleyG$fit <- prd$fit + barleyG$lwr <- prd$fit - 1.96 * prd$se.fit + barleyG$upr <- prd$fit + 1.96 * prd$se.fit + + barleyAG <- merge(barleyA, barleyG) + + ## It is hard to see, but they are identical + ggplot(data = barleyAG, aes(x = NF, y = yield)) + + geom_point() + + geom_line(aes(y = fit)) + + geom_line(aes(y = Estimate), linetype = 2) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.2) + + geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "yellow", alpha = 0.1) + + ## What if we include a random effect of year? + barley$year.f <- as.factor(barley$year) + + fgb1 <- gam(yield ~ s(NF, k = 5) + s(year.f, bs = "re"), data = barley) + + fgb1p <- predict_gam(fgb1, interval = "conf") + barleyA <- cbind(barley, fgb1p) + + prd <- predict(fgb1, se = TRUE) + barleyG <- barley + barleyG$fit <- prd$fit + barleyG$lwr <- prd$fit - 1.96 * prd$se.fit + barleyG$upr <- prd$fit + 1.96 * prd$se.fit + + barleyAG <- merge(barleyA, barleyG) + + ## It is hard to see, but they are identical + ggplot(data = barleyAG, aes(x = NF, y = yield)) + + facet_wrap(~year.f) + + geom_point() + + geom_line(aes(y = fit)) + + geom_line(aes(y = Estimate), linetype = 2) + + geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "blue", alpha = 0.3) + + geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "white", alpha = 0.3) + + f1 <- function(){ + dat <- data.frame(x = rnorm(10), y = rnorm(10)) + fm00 <- mgcv::gam(y ~ x, data = dat) + ans <- simulate_gam(fm00) + ans + } + ## For GAM objects it works regardless because I use 'predict.gam' under the hood and method = "lpmatrix" + res1 <- f1() + + } > > > proc.time() user system elapsed 1.31 0.25 1.54