### test-manual-tutorial.R --- ##---------------------------------------------------------------------- ## Author: Brice Ozenne ## Created: nov 13 2021 (16:47) ## Version: ## Last-Updated: aug 1 2023 (14:05) ## By: Brice Ozenne ## Update #: 28 ##---------------------------------------------------------------------- ## ### Commentary: ## ### Change Log: ##---------------------------------------------------------------------- ## ### Code: if(FALSE){ library(testthat) library(lattice) library(psych) library(emmeans) library(LMMstar) } context("Check lmm on Julie tutorial") LMMstar.options(optimizer = "FS", method.numDeriv = "simple", precompute.moments = TRUE) test.practical <- FALSE ## * section 4: Preparing data for analysis data("gastricbypassW", package = "LMMstar") wide <- gastricbypassW long <- reshape(wide, direction='long', idvar='id', varying=list( c('weight1','weight2','weight3','weight4'), c('glucagonAUC1', 'glucagonAUC2', 'glucagonAUC3', 'glucagonAUC4') ), v.names=c('weight','glucagonAUC'), timevar='visit') # Make a categorical version of the time variable: time.names <- c('-3 month','-1 week','+1 week','+3 month') long$time <- factor(long$visit, labels=time.names) ## * section 5: Descriptive statistics test_that("summarize", { if(test.practical==FALSE){skip('Not run to save time in the check')} ss1 <- summarize(weight ~ time, data = long) GS <- data.frame("outcome" = c("weight", "weight", "weight", "weight"), "time" = as.factor(c("-3 month", "-1 week", "+1 week", "+3 month")), "observed" = c(20, 20, 20, 20), "missing" = c(0, 0, 0, 0), "pc.missing" = c(0, 0, 0, 0), "mean" = c(128.970, 121.240, 115.700, 102.365), "sd" = c(20.26937, 18.91019, 18.27532, 17.05389), "min" = c(100.9, 95.7, 89.9, 78.8), "q1" = c(115.300, 107.775, 102.225, 90.400), "median" = c(123.1, 114.5, 110.6, 98.5), "q3" = c(139.825, 134.525, 128.375, 108.250), "max" = c(173.0, 162.2, 155.0, 148.0)) expect_equivalent(ss1,GS, tol = 1e-3) ss2 <- summarize(weight ~ time | id, data = long) }) ## * section 6: about linear mixed models long$time <- relevel(long$time, ref="-3 month") ## ** Display of the parametrisation test_that("plot parametrisation", { if(test.practical==FALSE){skip('Not run to save time in the check')} fit.main <- lmm(weight~time, repetition=~visit|id, structure="UN", df=TRUE, data=long) ggParam <- autoplot(fit.main, ci = FALSE, mean.size = c(4, 1.5), size.text = 20)$plot ggParam <- ggParam + coord_cartesian(ylim = c(0,1.2*coef(fit.main)[1]), xlim = c(0.5,4)) ## ggParam <- ggParam + geom_curve(aes(x = 1, y = 0, xend = 1, yend = 125), ## arrow = arrow(length = unit(0.03, "npc"), type="closed"), ## colour = "#EC7014", size = 1.2, angle = -90) ggParam <- ggParam + geom_curve(aes(x = 1, y = 0, xend = 1, yend = 0.99*coef(fit.main)[1]), arrow = arrow(length = unit(0.03, "npc"), type="closed"), colour = "#EC7014", size = 1, angle = -90, curvature = -0.25) ggParam <- ggParam + geom_curve(aes(x = 1, y = 1.01*coef(fit.main)[1], xend = 2, yend = 1.02*(coef(fit.main)[1]+coef(fit.main)[2])), arrow = arrow(length = unit(0.03, "npc"), type="closed"), colour = "blue", size = 1, angle = -90, curvature = -0.25) ggParam <- ggParam + geom_curve(aes(x = 1, y = 1.01*coef(fit.main)[1], xend = 3, yend = 1.02*(coef(fit.main)[1]+coef(fit.main)[3])), arrow = arrow(length = unit(0.03, "npc"), type="closed"), colour = "purple", size = 1, angle = -90, curvature = -0.25) ggParam <- ggParam + geom_curve(aes(x = 1, y = 1.01*coef(fit.main)[1], xend = 4, yend = 1.02*(coef(fit.main)[1]+coef(fit.main)[4])), arrow = arrow(length = unit(0.03, "npc"), type="closed"), colour = "darkgreen", size = 1, angle = -90, curvature = -0.25) ggParam <- ggParam + geom_text(mapping = aes(x = 0.9, y = coef(fit.main)[1]/2, label = "beta[1]"), parse = TRUE,colour = "#EC7014", size = 8) ggParam <- ggParam + geom_text(mapping = aes(x = 1.5, y = 0.9*coef(fit.main)[1], label = "beta[2]"), parse = TRUE,colour = "blue", size = 8) ggParam <- ggParam + geom_text(mapping = aes(x = 2.3, y = 1*coef(fit.main)[1], label = "beta[3]"), parse = TRUE,colour = "purple", size = 8) ggParam <- ggParam + geom_text(mapping = aes(x = 3.5, y = 1.05*coef(fit.main)[1], label = "beta[4]"), parse = TRUE,colour = "darkgreen", size = 8) ggParam <- ggParam + scale_x_discrete(breaks=1:4, labels=sort(unique(long$time))) ## ggParam <- ggParam + scale_y_continuous(breaks=as.double(c(0,50,100,sort(coef(fit.main)[1]+c(0,coef(fit.main)[-1])),150)), ## labels=list(bquote(0),bquote(50),bquote(100),bquote(mu[1]),bquote(mu[2]),bquote(mu[3]),bquote(mu[4]),bquote(150))) ggParam <- ggParam + scale_y_continuous(breaks=as.double(c(0,50,100,coef(fit.main)[1]+c(0,coef(fit.main)[-1]),150)), labels=c(0,50,100, expression(mu[1]), expression(mu[2]), expression(mu[3]), expression(mu[4]), 150)) ggParam <- ggParam + theme(panel.grid.minor = element_blank()) ggParam }) ## ggsave(ggParam, filename = "figures/gg-explanation-table.png", width = 10) ## ** Dynamic predictions test_that("Dynamic predictions", { if(test.practical==FALSE){skip('Not run to save time in the check')} fit.main <- lmm(weight~time, repetition=~visit|id, structure="UN", df=TRUE, data=long) newd <- rbind(data.frame(id = 1:100, time = "-3 month", visit = 1, weight = seq(100,175,length.out = 100)), data.frame(id = 1:100, time = "-1 week", visit = 2, weight = NA)) ## sigma(lm(weight2 ~ weight1, data = gastricbypassW)) ## head(dfFit) ## sqrt(prod(coef(fit.main, effects = "all")[c("sigma","k.2")])^2*(1-coef(fit.main, effects = "all")[c("rho(1,2)")]^2)) dfFit <- merge(newd[newd$time=="-3 month",c("id","weight")], cbind(res = predict(fit.main, newdata = newd, type = "dynamic", se = "res", keep.newdata = TRUE)[newd$time=="-1 week",c("id","estimate","lower","upper")], total = predict(fit.main, newdata = newd, type = "dynamic", se = "total", keep.newdata = TRUE)[newd$time=="-1 week",c("estimate","lower","upper")]), by.x = "id", by.y = "res.id") dfData <- reshape2::dcast(data = long[long$time %in% c("-3 month","-1 week"),], formula = id~time, value.var = "weight") colnames(dfData) <- c("id","w1","w2") ggDyn <- ggplot() ggDyn <- ggDyn + geom_point(data = dfData, aes(x=w1,y=w2)) ggDyn <- ggDyn + geom_line(data = dfFit, aes(x=weight,y=res.estimate)) ggDyn <- ggDyn + geom_line(data = dfFit, aes(x=weight,y=res.upper, color = "residual variance"), linetype = 2, linewidth = 1.1) ggDyn <- ggDyn + geom_line(data = dfFit, aes(x=weight,y=res.lower, color = "residual variance"), linetype = 2, linewidth = 1.1) ggDyn <- ggDyn + geom_line(data = dfFit, aes(x=weight,y=total.upper, color = "residual variance and estimation uncertainty"), linetype = 3, linewidth = 1.1) ggDyn <- ggDyn + geom_line(data = dfFit, aes(x=weight,y=total.lower, color = "residual variance and estimation uncertainty"), linetype = 3, linewidth = 1.1) ggDyn <- ggDyn + labs(x = "weight 3 months before", y = "weight 1 week before", color = "95% prediction limit accounting for") ggDyn <- ggDyn + theme(legend.position = "bottom", legend.direction = "vertical", text = element_text(size=15), axis.line = element_line(size = 1.25), axis.ticks = element_line(size = 2), axis.ticks.length=unit(.25, "cm")) ggDyn ## ggsave(ggDyn, filename = "figures/gg-dynamic-prediction.png", width = 10) }) ## ** residual plot test_that("Residuals", { if(test.practical==FALSE){skip('Not run to save time in the check')} fit.main <- lmm(weight~time, repetition=~visit|id, structure="UN", df=TRUE, data=long) df.allres <- residuals(fit.main, type = "all", keep.data = TRUE) gg.res1 <- ggplot(df.allres, aes(x=time,y = weight, group = id, color = id)) + geom_line() + geom_point() gg.res2 <- ggplot(df.allres, aes(x=time,y = r.response, group = id, color = id)) + geom_line() + geom_point() + ylab("raw residuals") gg.res3 <- ggplot(df.allres, aes(x=time,y = r.studentized, group = id, color = id)) + geom_line() + geom_point() + ylab("studentized residuals") gg.res4 <- ggplot(df.allres, aes(x=time,y = r.normalized, group = id, color = id)) + geom_line() + geom_point() + ylab("scaled residuals") library(ggpubr) gg.res <- ggarrange(gg.res1,gg.res2,gg.res3,gg.res4, legend = "none") gg.res ## ggsave(gg.res, filename = "figures/gg-residuals.png", width = 10) GS <- data.frame("visit" = c(1, 2, 3, 4), "weight" = c(165.2, 153.4, 149.2, 132.0), "fitted" = c(128.970, 121.240, 115.700, 102.365), "r.response" = c(36.230, 32.160, 33.500, 29.635), "r.pearson" = c(1.787422, 1.700667, 1.833070, 1.737725), "r.studentized" = c(1.833856, 1.744848, 1.880690, 1.782868), "r.normalized" = c( 1.7874218, -0.4780950, 1.8805374, -0.4578838)) expect_equivalent(GS, df.allres[df.allres$id=="2", c("visit","weight","fitted","r.response","r.pearson","r.studentized","r.normalized")], tol = 1e-5) }) ## * section 7: Analysis and interpretation of the linear mixed model ## Set reference point (intercept) for time factor: long$time <- relevel(long$time, ref="-3 month") test_that("Extactors for lmm", { if(test.practical==FALSE){skip('Not run to save time in the check')} ## ** section 7.1 fit.main <- lmm(weight~time, repetition=~visit|id, structure="UN", df=TRUE, data=long) ## Model summary, loads of information summary(fit.main) ## Display fitted values plot(fit.main) ## Extract estimates and confidence intervals confint(fit.main) GS <- data.frame("estimate" = c(128.97, -7.73, -13.27, -26.605), "se" = c(4.53237977, 0.69744264, 0.83920784, 1.54937331), "df" = c(18.9805546, 18.97430883, 18.96889869, 18.96352408), "lower" = c(119.48296228, -9.18989801, -15.02667713, -29.84829784), "upper" = c(138.45703772, -6.27010199, -11.51332287, -23.36170216), "p.value" = c(0, 9.95e-10, 2.23e-12, 5.17e-13)) expect_equivalent(model.tables(fit.main), GS, tol = 1e-5) ## Extract covariance matrix: sigma(fit.main) ## F-test: fitAnova.main <- anova(fit.main, ci = TRUE) fitAnova.main expect_equivalent(fitAnova.main$multivariate[,c("statistic","df.num","df.denom","p.value")], data.frame("statistic" = c(121.65995198), "df.num" = c(3), "df.denom" = c(18.97809203), "p.value" = c(1.427969e-12)), tol = 1e-5 ) round(coef(fit.main, effects = "correlation"),3) round(coef(fit.main, effects = "variance", transform.k = "sd"),2) ## Predictions ## Make a dataset with covariate values for prediction: pred <- long[,c('visit','time')] ## Reduce to one of each value: pred <- unique(pred) ## Add predicted means to the dataframe: pred <- cbind(pred, predict(fit.main, newdata=pred)) ## Plot predicted means xyplot(estimate~time, data=pred, type='b') xyplot(se~time, data=pred, type='b') ## OPTIONAL: Picture including 95% CIs (emmeans-package) emmip(fit.main, ~time, CIs=TRUE, xlab='Time', ylab='Mean weight') ## Residual diagnostics par(mfrow=c(2,2)) plot(fitted(fit.main), residuals(fit.main, type='studentized'), main='Studentized residuals') abline(h=0) qqnorm(residuals(fit.main, type='studentized')) abline(0,1) # Note: Studentized residuals are slightly skewed. plot(fitted(fit.main), residuals(fit.main, type='scaled'), main='Scaled residuals') abline(h=0) qqnorm(residuals(fit.main, type='scaled')) abline(0,1) ## Note: Scaled residuals look good. ##residuals(fit.main, format = "long", type = "normalized", plot = "scatterplot") plot(fit.main, type = "qqplot", engine.qqplot = "qqtest", by.repetition = TRUE) plot(fit.main, type = "qqplot", engine.qqplot = "qqtest", by.repetition = FALSE) ## ** section 7.5 ## Fit the model without an intercept using -1 in the model formula: fit.means <- lmm(weight~-1+time, repetition=~visit|id, structure="UN", data=long, df=TRUE) ## Extract estimated means and confidence intervals: GS <- data.frame("estimate" = c(128.97, 121.24, 115.7, 102.365), "lower" = c(119.48296232, 112.39029937, 107.14759895, 94.38463625), "upper" = c(138.45703768, 130.08970063, 124.25240105, 110.34536375)) expect_equivalent(confint(fit.means)[c("estimate","lower","upper")], GS, tol = 1e-6) }) ## * Appendix D test_that("log transformation for lmm", { if(test.practical==FALSE){skip('Not run to save time in the check')} ## Add log2-transformed weights to the long data: long$log2weight <- log2(long$weight) ## Fit the linear mixed model fit.log <- lmm(log2weight~time, repetition=~visit|id, structure="UN", df=TRUE, data=long) ## Estimates and CIs on log2-scale: test <- confint(fit.log, backtransform = function(x){2^x}) test ## Back-transformed estimates and CIs: expect_equivalent(2^confint(fit.log)[,c("estimate","lower","upper")], test[,c("estimate","lower","upper")], tol = 1e-5) ## Save predicted population means on log2-scale and plot them pred.log <- long[,c('visit','time')] pred.log <- unique(pred.log) pred.log <- cbind(pred.log, predict(fit.log, newdata=pred.log)) xyplot(estimate~time, type='b', data=pred.log) # mean on log2-scale xyplot(2^estimate~time, type='b', data=pred.log) # back-transformed, i.e. geometric means or medians ## Residual diagnostics: plot(fitted(fit.log), residuals(fit.log, type='studentized')) abline(h=0) qqnorm(residuals(fit.log, type='studentized')) abline(0,1) plot(fitted(fit.log), residuals(fit.log, type='scaled')) abline(h=0) qqnorm(residuals(fit.log, type='scaled')) abline(0,1) ## Note: slightly better fit. }) ##---------------------------------------------------------------------- ### test-manual-tutorial.R ends here