R Under development (unstable) (2023-09-10 r85120 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > #-------------------------------------------------------------------------------------------------------------------------- > # test code for survPen package > #-------------------------------------------------------------------------------------------------------------------------- > > > library(survPen) > library(splines) > data(datCancer) # simulated dataset with 2000 individuals diagnosed with cervical cancer > > > cat("\n","______________________________________________________________________________________________","\n") ______________________________________________________________________________________________ > cat("\n","survPen test code","\n") survPen test code > cat("\n","______________________________________________________________________________________________","\n") ______________________________________________________________________________________________ > > > #-------------------------------------------------------- example 0 > # Comparison between restricted cubic splines and penalized restricted cubic splines > cat("\n","______________________________________________________________________________________________","\n") ______________________________________________________________________________________________ > cat("\n","example 0","\n") example 0 > cat("\n","Comparison between restricted cubic splines and penalized restricted cubic splines","\n") Comparison between restricted cubic splines and penalized restricted cubic splines > > > > # unpenalized > f <- ~ns(fu,knots=c(0.25, 0.5, 1, 2, 4),Boundary.knots=c(0,5)) > > mod <- survPen(f,data=datCancer,t1=fu,event=dead) > > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the unpenalized model","\n") summary of the unpenalized model > print(summary(mod)) hazard model Call: survPen(formula = f, data = datCancer, t1 = fu, event = dead) Parametric coefficients: Estimate (Intercept) -1.880242 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))1 -0.029509 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))2 0.012000 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))3 -0.726988 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))4 -0.612175 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))5 -1.876409 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))6 -1.644222 Std. Error (Intercept) 0.229846 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))1 0.256168 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))2 0.301039 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))3 0.314238 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))4 0.312224 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))5 0.558958 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))6 0.321442 z value (Intercept) -8.1804 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))1 -0.1152 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))2 0.0399 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))3 -2.3135 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))4 -1.9607 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))5 -3.3570 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))6 -5.1151 Pr(>|z|) (Intercept) 2.828e-16 *** ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))1 0.908291 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))2 0.968204 ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))3 0.020696 * ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))4 0.049915 * ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))5 0.000788 *** ns(fu, knots = c(0.25, 0.5, 1, 2, 4), Boundary.knots = c(0, 5))6 3.135e-07 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 likelihood = -2319 Number of parameters = 7 converged= TRUE > > # penalized > f.pen <- ~ smf(fu,knots=c(0,0.25, 0.5, 1, 2, 4,5)) # careful here: the boundary knots are included > > mod.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead) > > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the penalized model","\n") summary of the penalized model > print(summary(mod.pen)) penalized hazard model Call: survPen(formula = f.pen, data = datCancer, t1 = fu, event = dead) Parametric coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -3.04226 0.12344 -24.645 < 2.2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 log-likelihood = -2320.8, penalized log-likelihood = -2321.7 Number of parameters = 7, effective degrees of freedom = 3.6549 LAML = 2326.3 Smoothing parameter(s): smf(fu) 1690.5 edf of smooth terms: smf(fu) 2.6549 converged= TRUE > > > # predictions > > new.time <- seq(0,5,length=50) > pred <- predict(mod,data.frame(fu=new.time)) > pred.pen <- predict(mod.pen,data.frame(fu=new.time)) > > pdf("compar_unpen_pen.pdf",height=5,width=6) > par(mfrow=c(1,1)) > plot(new.time,pred$haz,type="l",ylim=c(0,0.2),main="hazard vs time", + xlab="time since diagnosis (years)",ylab="hazard",col="red") > lines(new.time,pred.pen$haz,col="blue3") > legend("topright",legend=c("unpenalized","penalized"), + col=c("red","blue3"),lty=rep(1,2)) > > dev.off() null device 1 > > > #-------------------------------------------------------- example 1 > # hazard models with unpenalized formulas compared to a penalized tensor product smooth > cat("\n","______________________________________________________________________________________________","\n") ______________________________________________________________________________________________ > cat("\n","example 1","\n") example 1 > cat("\n","hazard models with unpenalized formulas compared to a penalized tensor product smooth","\n") hazard models with unpenalized formulas compared to a penalized tensor product smooth > > > # constant hazard model > f.cst <- ~1 > mod.cst <- survPen(f.cst,data=datCancer,t1=fu,event=dead) > > # piecewise constant hazard model > f.pwcst <- ~cut(fu,breaks=seq(0,5,by=0.5),include.lowest=TRUE) > mod.pwcst <- survPen(f.pwcst,data=datCancer,t1=fu,event=dead,n.legendre=200) > # we increase the number of points for Gauss-Legendre quadrature to make sure that the cumulative > # hazard is properly approximated > > # linear effect of time > f.lin <- ~fu > mod.lin <- survPen(f.lin,data=datCancer,t1=fu,event=dead) > > # linear effect of time and age with proportional effect of age > f.lin.age <- ~fu+age > mod.lin.age <- survPen(f.lin.age,data=datCancer,t1=fu,event=dead) > > # linear effect of time and age with time-dependent effect of age (linear) > f.lin.inter.age <- ~fu*age > mod.lin.inter.age <- survPen(f.lin.inter.age,data=datCancer,t1=fu,event=dead) > > # cubic B-spline of time with a knot at 1 year, linear effect of age and time-dependent effect > # of age with a quadratic B-spline of time with a knot at 1 year > f.spline.inter.age <- ~bs(fu,knots=c(1),Boundary.knots=c(0,5))+age+ + age:bs(fu,knots=c(1),Boundary.knots=c(0,5),degree=2) > # here, bs indicates an unpenalized cubic spline > > mod.spline.inter.age <- survPen(f.spline.inter.age,data=datCancer,t1=fu,event=dead) > > > # tensor of time and age > f.tensor <- ~tensor(fu,age) > mod.tensor <- survPen(f.tensor,data=datCancer,t1=fu,event=dead) > > > # predictions of the models at age 60 > > new.time <- seq(0,5,length=50) > pred.cst <- predict(mod.cst,data.frame(fu=new.time)) > pred.pwcst <- predict(mod.pwcst,data.frame(fu=new.time)) > pred.lin <- predict(mod.lin,data.frame(fu=new.time)) > pred.lin.age <- predict(mod.lin.age,data.frame(fu=new.time,age=60)) > pred.lin.inter.age <- predict(mod.lin.inter.age,data.frame(fu=new.time,age=60)) > pred.spline.inter.age <- predict(mod.spline.inter.age,data.frame(fu=new.time,age=60)) > pred.tensor <- predict(mod.tensor,data.frame(fu=new.time,age=60)) > > lwd1 <- 2 > > pdf("compar_several_mods.pdf",height=5,width=6) > par(mfrow=c(1,1)) > plot(new.time,pred.cst$haz,type="l",ylim=c(0,0.2),main="hazard vs time", + xlab="years since diagnosis",ylab="hazard",col="blue3",lwd=lwd1) > segments(x0=new.time[1:49],x1=new.time[2:50],y0=pred.pwcst$haz[1:49],col="lightblue2",lwd=lwd1) > lines(new.time,pred.lin$haz,col="green3",lwd=lwd1) > lines(new.time,pred.lin.age$haz,col="yellow",lwd=lwd1) > lines(new.time,pred.lin.inter.age$haz,col="orange",lwd=lwd1) > lines(new.time,pred.spline.inter.age$haz,col="red",lwd=lwd1) > lines(new.time,pred.tensor$haz,col="black",lwd=lwd1) > legend("topright", + legend=c("cst","pwcst","lin","lin.age","lin.inter.age","spline.inter.age","tensor"), + col=c("blue3","lightblue2","green3","yellow","orange","red","black"), + lty=rep(1,7),lwd=rep(lwd1,7)) > > dev.off() null device 1 > > # you can also calculate the hazard yourself with the lpmatrix option. > # For example, compare the following predictions: > haz.tensor <- pred.tensor$haz > > X.tensor <- predict(mod.tensor,data.frame(fu=new.time,age=60),type="lpmatrix") > haz.tensor.lpmatrix <- exp(X.tensor%*%mod.tensor$coefficients) > > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","difference between standard and lpmatrix predictions","\n") difference between standard and lpmatrix predictions > print(summary(as.numeric(haz.tensor.lpmatrix - haz.tensor))) Min. 1st Qu. Median Mean 3rd Qu. Max. -1.665e-16 -5.551e-17 0.000e+00 -4.163e-19 5.378e-17 2.498e-16 > > #---------------- The 95% confidence intervals can be calculated like this: > > # standard errors from the Bayesian covariance matrix Vp > std <- sqrt(rowSums((X.tensor%*%mod.tensor$Vp)*X.tensor)) > > qt.norm <- stats::qnorm(1-(1-0.95)/2) > haz.inf <- as.vector(exp(X.tensor%*%mod.tensor$coefficients-qt.norm*std)) > haz.sup <- as.vector(exp(X.tensor%*%mod.tensor$coefficients+qt.norm*std)) > > # checking that they are similar to the ones given by the predict function > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","difference between standard and lpmatrix confidence intervals","\n") difference between standard and lpmatrix confidence intervals > summary(haz.inf - pred.tensor$haz.inf) Min. 1st Qu. Median Mean 3rd Qu. Max. -1.527e-16 -4.163e-17 0.000e+00 1.249e-18 4.163e-17 2.220e-16 > summary(haz.sup - pred.tensor$haz.sup) Min. 1st Qu. Median Mean 3rd Qu. Max. -2.220e-16 -8.327e-17 0.000e+00 -4.302e-18 6.939e-17 2.776e-16 > > > > > #-------------------------------------------------------- example 2 > cat("\n","______________________________________________________________________________________________","\n") ______________________________________________________________________________________________ > cat("\n","example 2","\n") example 2 > cat("\n","smoothing parameter estimation, excess hazard and left truncation","\n") smoothing parameter estimation, excess hazard and left truncation > > # model : unidimensional penalized spline for time since diagnosis with 5 knots > f1 <- ~smf(fu,df=5) > # when knots are not specified, quantiles are used. For example, for the term "smf(x,df=df1)", > # the vector of knots will be: quantile(unique(x),seq(0,1,length=df1)) > > # you can specify your own knots if you want > # f1 <- ~smf(fu,knots=c(0,1,3,6,8)) > > # hazard model > mod1 <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=NULL,method="LAML") > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the LAML model","\n") summary of the LAML model > print(summary(mod1)) penalized hazard model Call: survPen(formula = f1, data = datCancer, t1 = fu, event = dead, expected = NULL, method = "LAML") Parametric coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -3.02234 0.11124 -27.17 < 2.2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 log-likelihood = -2321.3, penalized log-likelihood = -2322 Number of parameters = 5, effective degrees of freedom = 3.4489 LAML = 2326.6 Smoothing parameter(s): smf(fu) 133.54 edf of smooth terms: smf(fu) 2.4489 converged= TRUE > > # to see where the knots were placed > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","default knots location","\n") default knots location > print(mod1$list.smf) [[1]] $term [1] "fu" $dim [1] 1 $knots $knots$fu 0% 25% 50% 75% 100% 0.001455958 0.732075578 1.425060350 2.570586727 5.000000000 $df [1] 5 $by [1] "NULL" $same.rho [1] FALSE $name [1] "smf(fu)" attr(,"class") [1] "smf.smooth.spec" > > # with LCV instead of LAML > mod1bis <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=NULL,method="LCV") > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the LCV model","\n") summary of the LCV model > print(summary(mod1bis)) penalized hazard model Call: survPen(formula = f1, data = datCancer, t1 = fu, event = dead, expected = NULL, method = "LCV") Parametric coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -3.01983 0.10967 -27.535 < 2.2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 log-likelihood = -2321.3, penalized log-likelihood = -2322.1 Number of parameters = 5, effective degrees of freedom = 3.3722 LCV = 2324.7 Smoothing parameter(s): smf(fu) 157.76 edf of smooth terms: smf(fu) 2.3722 converged= TRUE > > # hazard model taking into account left truncation (not representative of cancer data, > # the begin variable was simulated for illustration purposes only) > mod2 <- survPen(f1,data=datCancer,t0=begin,t1=fu,event=dead,expected=NULL,method="LAML") > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the left-truncated model","\n") summary of the left-truncated model > print(summary(mod2)) penalized hazard model Call: survPen(formula = f1, data = datCancer, t1 = fu, t0 = begin, event = dead, expected = NULL, method = "LAML") Parametric coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.917629 0.073679 -39.599 < 2.2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 log-likelihood = -2274.3, penalized log-likelihood = -2274.3 Number of parameters = 5, effective degrees of freedom = 2.0817 LAML = 2277.6 Smoothing parameter(s): smf(fu) 11775 edf of smooth terms: smf(fu) 1.0817 converged= TRUE > > # excess hazard model > mod3 <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=rate,method="LAML") > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the excess hazard model","\n") summary of the excess hazard model > print(summary(mod3)) penalized excess hazard model Call: survPen(formula = f1, data = datCancer, t1 = fu, event = dead, expected = rate, method = "LAML") Parametric coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -3.44113 0.16832 -20.444 < 2.2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 log-likelihood = -2169.3, penalized log-likelihood = -2170.3 Number of parameters = 5, effective degrees of freedom = 4.0414 LAML = 2175.5 Smoothing parameter(s): smf(fu) 24.923 edf of smooth terms: smf(fu) 3.0414 converged= TRUE > > # compare the predictions of the models > new.time <- seq(0,5,length=50) > pred1 <- predict(mod1,data.frame(fu=new.time)) > pred1bis <- predict(mod1bis,data.frame(fu=new.time)) > pred2 <- predict(mod2,data.frame(fu=new.time)) > pred3 <- predict(mod3,data.frame(fu=new.time)) > > # LAML vs LCV > pdf("compar_LAML_LCV.pdf",height=5,width=10) > par(mfrow=c(1,2)) > plot(new.time,pred1$haz,type="l",ylim=c(0,0.2),main="LCV vs LAML", + xlab="years since diagnosis",ylab="hazard") > lines(new.time,pred1bis$haz,col="blue3",lty=2) > legend("topright",legend=c("LAML","LCV"),col=c("black","blue3"),lty=c(1,2)) > > plot(new.time,pred1$surv,type="l",ylim=c(0,1),main="LCV vs LAML", + xlab="years since diagnosis",ylab="survival") > lines(new.time,pred1bis$surv,col="blue3",lty=2) > > dev.off() null device 1 > > # hazard vs excess hazard > pdf("compar_total_excess.pdf",height=5,width=10) > par(mfrow=c(1,2)) > plot(new.time,pred1$haz,type="l",ylim=c(0,0.2),main="hazard vs excess hazard", + xlab="years since diagnosis",ylab="hazard") > lines(new.time,pred3$haz,col="green3") > legend("topright",legend=c("overall","excess"),col=c("black","green3"),lty=c(1,1)) > > plot(new.time,pred1$surv,type="l",ylim=c(0,1),main="survival vs net survival", + xlab="time",ylab="survival") > lines(new.time,pred3$surv,col="green3") > legend("topright",legend=c("overall survival","net survival"), col=c("black","green3"), lty=c(1,1)) > > dev.off() null device 1 > > > # hazard vs excess hazard with 95% Bayesian confidence intervals (based on Vp matrix, > # see predict.survPen) > pdf("compar_total_excess_CI.pdf",height=5,width=6) > > par(mfrow=c(1,1)) > plot(new.time,pred1$haz,type="l",ylim=c(0,0.2),main="hazard vs excess hazard", + xlab="years since diagnosis",ylab="hazard") > lines(new.time,pred3$haz,col="green3") > legend("topright",legend=c("overall","excess"),col=c("black","green3"),lty=c(1,1)) > > lines(new.time,pred1$haz.inf,lty=2) > lines(new.time,pred1$haz.sup,lty=2) > > lines(new.time,pred3$haz.inf,lty=2,col="green3") > lines(new.time,pred3$haz.sup,lty=2,col="green3") > > dev.off() null device 1 > > > #-------------------------------------------------------- example 3 > # models: tensor product smooth vs tensor product interaction of time since diagnosis and > # age at diagnosis. Smoothing parameters are estimated via LAML maximization > cat("\n","______________________________________________________________________________________________","\n") ______________________________________________________________________________________________ > cat("\n","example 3","\n") example 3 > cat("\n","tensor product smooth vs tensor product interaction","\n") tensor product smooth vs tensor product interaction > > > f2 <- ~tensor(fu,age,df=c(5,5)) > > f3 <- ~tint(fu,df=5)+tint(age,df=5)+tint(fu,age,df=c(5,5)) > > # hazard model > mod4 <- survPen(f2,data=datCancer,t1=fu,event=dead) > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the tensor model","\n") summary of the tensor model > print(summary(mod4)) penalized hazard model Call: survPen(formula = f2, data = datCancer, t1 = fu, event = dead) Parametric coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -3.31334 0.17612 -18.813 < 2.2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 log-likelihood = -2106.2, penalized log-likelihood = -2110 Number of parameters = 25, effective degrees of freedom = 11.69 LAML = 2121.7 Smoothing parameter(s): tensor(fu,age).1 tensor(fu,age).2 0.77927 21.67000 edf of smooth terms: tensor(fu,age) 10.69 converged= TRUE > > mod5 <- survPen(f3,data=datCancer,t1=fu,event=dead) > cat("\n","--------------------------------","\n") -------------------------------- > cat("\n","summary of the tint model","\n") summary of the tint model > print(summary(mod5)) penalized hazard model Call: survPen(formula = f3, data = datCancer, t1 = fu, event = dead) Parametric coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -3.23237 0.15164 -21.316 < 2.2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 log-likelihood = -2106.4, penalized log-likelihood = -2109.6 Number of parameters = 25, effective degrees of freedom = 10.462 LAML = 2122.7 Smoothing parameter(s): tint(fu) tint(age) tint(fu,age).1 tint(fu,age).2 9.4054e-01 6.4517e+00 1.8307e-01 1.2800e+05 edf of smooth terms: tint(fu) tint(age) tint(fu,age) 3.5664 2.5192 3.3764 converged= TRUE > > # predictions > new.age <- seq(50,90,length=50) > new.time <- seq(0,7,length=50) > > Z4 <- outer(new.time,new.age,function(t,a) predict(mod4,data.frame(fu=t,age=a))$haz) > Z5 <- outer(new.time,new.age,function(t,a) predict(mod5,data.frame(fu=t,age=a))$haz) > > # color settings > col.pal <- colorRampPalette(c("white", "red")) > colors <- col.pal(100) > > facet <- function(z){ + + facet.center <- (z[-1, -1] + z[-1, -ncol(z)] + z[-nrow(z), -1] + z[-nrow(z), -ncol(z)])/4 + cut(facet.center, 100) + + } > > # plot the hazard surfaces for both models > pdf("compar_tensor_tint.pdf",height=5,width=10) > par(mfrow=c(1,2)) > persp(new.time,new.age,Z4,col=colors[facet(Z4)],main="tensor",theta=30, + xlab="years since diagnosis",ylab="age at diagnosis",zlab="excess hazard",ticktype="detailed") > persp(new.time,new.age,Z5,col=colors[facet(Z5)],main="tint",theta=30, + xlab="years since diagnosis",ylab="age at diagnosis",zlab="excess hazard",ticktype="detailed") > dev.off() null device 1 > > > > > ################################################################################################################# > > > proc.time() user system elapsed 10.12 2.14 12.25