#-------------------------------------------------------------------------------------------------------------------------- # test code for survPen package #-------------------------------------------------------------------------------------------------------------------------- library(survPen) library(splines) data(datCancer) # simulated dataset with 2000 individuals diagnosed with cervical cancer cat("\n","______________________________________________________________________________________________","\n") cat("\n","survPen test code","\n") cat("\n","______________________________________________________________________________________________","\n") #-------------------------------------------------------- example 0 # Comparison between restricted cubic splines and penalized restricted cubic splines cat("\n","______________________________________________________________________________________________","\n") cat("\n","example 0","\n") cat("\n","Comparison between restricted cubic splines and penalized restricted cubic splines","\n") # unpenalized f <- ~ns(fu,knots=c(0.25, 0.5, 1, 2, 4),Boundary.knots=c(0,5)) mod <- survPen(f,data=datCancer,t1=fu,event=dead) cat("\n","--------------------------------","\n") cat("\n","summary of the unpenalized model","\n") print(summary(mod)) # penalized f.pen <- ~ smf(fu,knots=c(0,0.25, 0.5, 1, 2, 4,5)) # careful here: the boundary knots are included mod.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead) cat("\n","--------------------------------","\n") cat("\n","summary of the penalized model","\n") print(summary(mod.pen)) # predictions new.time <- seq(0,5,length=50) pred <- predict(mod,data.frame(fu=new.time)) pred.pen <- predict(mod.pen,data.frame(fu=new.time)) pdf("compar_unpen_pen.pdf",height=5,width=6) par(mfrow=c(1,1)) plot(new.time,pred$haz,type="l",ylim=c(0,0.2),main="hazard vs time", xlab="time since diagnosis (years)",ylab="hazard",col="red") lines(new.time,pred.pen$haz,col="blue3") legend("topright",legend=c("unpenalized","penalized"), col=c("red","blue3"),lty=rep(1,2)) dev.off() #-------------------------------------------------------- example 1 # hazard models with unpenalized formulas compared to a penalized tensor product smooth cat("\n","______________________________________________________________________________________________","\n") cat("\n","example 1","\n") cat("\n","hazard models with unpenalized formulas compared to a penalized tensor product smooth","\n") # constant hazard model f.cst <- ~1 mod.cst <- survPen(f.cst,data=datCancer,t1=fu,event=dead) # piecewise constant hazard model f.pwcst <- ~cut(fu,breaks=seq(0,5,by=0.5),include.lowest=TRUE) mod.pwcst <- survPen(f.pwcst,data=datCancer,t1=fu,event=dead,n.legendre=200) # we increase the number of points for Gauss-Legendre quadrature to make sure that the cumulative # hazard is properly approximated # linear effect of time f.lin <- ~fu mod.lin <- survPen(f.lin,data=datCancer,t1=fu,event=dead) # linear effect of time and age with proportional effect of age f.lin.age <- ~fu+age mod.lin.age <- survPen(f.lin.age,data=datCancer,t1=fu,event=dead) # linear effect of time and age with time-dependent effect of age (linear) f.lin.inter.age <- ~fu*age mod.lin.inter.age <- survPen(f.lin.inter.age,data=datCancer,t1=fu,event=dead) # cubic B-spline of time with a knot at 1 year, linear effect of age and time-dependent effect # of age with a quadratic B-spline of time with a knot at 1 year f.spline.inter.age <- ~bs(fu,knots=c(1),Boundary.knots=c(0,5))+age+ age:bs(fu,knots=c(1),Boundary.knots=c(0,5),degree=2) # here, bs indicates an unpenalized cubic spline mod.spline.inter.age <- survPen(f.spline.inter.age,data=datCancer,t1=fu,event=dead) # tensor of time and age f.tensor <- ~tensor(fu,age) mod.tensor <- survPen(f.tensor,data=datCancer,t1=fu,event=dead) # predictions of the models at age 60 new.time <- seq(0,5,length=50) pred.cst <- predict(mod.cst,data.frame(fu=new.time)) pred.pwcst <- predict(mod.pwcst,data.frame(fu=new.time)) pred.lin <- predict(mod.lin,data.frame(fu=new.time)) pred.lin.age <- predict(mod.lin.age,data.frame(fu=new.time,age=60)) pred.lin.inter.age <- predict(mod.lin.inter.age,data.frame(fu=new.time,age=60)) pred.spline.inter.age <- predict(mod.spline.inter.age,data.frame(fu=new.time,age=60)) pred.tensor <- predict(mod.tensor,data.frame(fu=new.time,age=60)) lwd1 <- 2 pdf("compar_several_mods.pdf",height=5,width=6) par(mfrow=c(1,1)) plot(new.time,pred.cst$haz,type="l",ylim=c(0,0.2),main="hazard vs time", xlab="years since diagnosis",ylab="hazard",col="blue3",lwd=lwd1) segments(x0=new.time[1:49],x1=new.time[2:50],y0=pred.pwcst$haz[1:49],col="lightblue2",lwd=lwd1) lines(new.time,pred.lin$haz,col="green3",lwd=lwd1) lines(new.time,pred.lin.age$haz,col="yellow",lwd=lwd1) lines(new.time,pred.lin.inter.age$haz,col="orange",lwd=lwd1) lines(new.time,pred.spline.inter.age$haz,col="red",lwd=lwd1) lines(new.time,pred.tensor$haz,col="black",lwd=lwd1) legend("topright", legend=c("cst","pwcst","lin","lin.age","lin.inter.age","spline.inter.age","tensor"), col=c("blue3","lightblue2","green3","yellow","orange","red","black"), lty=rep(1,7),lwd=rep(lwd1,7)) dev.off() # you can also calculate the hazard yourself with the lpmatrix option. # For example, compare the following predictions: haz.tensor <- pred.tensor$haz X.tensor <- predict(mod.tensor,data.frame(fu=new.time,age=60),type="lpmatrix") haz.tensor.lpmatrix <- exp(X.tensor%*%mod.tensor$coefficients) cat("\n","--------------------------------","\n") cat("\n","difference between standard and lpmatrix predictions","\n") print(summary(as.numeric(haz.tensor.lpmatrix - haz.tensor))) #---------------- The 95% confidence intervals can be calculated like this: # standard errors from the Bayesian covariance matrix Vp std <- sqrt(rowSums((X.tensor%*%mod.tensor$Vp)*X.tensor)) qt.norm <- stats::qnorm(1-(1-0.95)/2) haz.inf <- as.vector(exp(X.tensor%*%mod.tensor$coefficients-qt.norm*std)) haz.sup <- as.vector(exp(X.tensor%*%mod.tensor$coefficients+qt.norm*std)) # checking that they are similar to the ones given by the predict function cat("\n","--------------------------------","\n") cat("\n","difference between standard and lpmatrix confidence intervals","\n") summary(haz.inf - pred.tensor$haz.inf) summary(haz.sup - pred.tensor$haz.sup) #-------------------------------------------------------- example 2 cat("\n","______________________________________________________________________________________________","\n") cat("\n","example 2","\n") cat("\n","smoothing parameter estimation, excess hazard and left truncation","\n") # model : unidimensional penalized spline for time since diagnosis with 5 knots f1 <- ~smf(fu,df=5) # when knots are not specified, quantiles are used. For example, for the term "smf(x,df=df1)", # the vector of knots will be: quantile(unique(x),seq(0,1,length=df1)) # you can specify your own knots if you want # f1 <- ~smf(fu,knots=c(0,1,3,6,8)) # hazard model mod1 <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=NULL,method="LAML") cat("\n","--------------------------------","\n") cat("\n","summary of the LAML model","\n") print(summary(mod1)) # to see where the knots were placed cat("\n","--------------------------------","\n") cat("\n","default knots location","\n") print(mod1$list.smf) # with LCV instead of LAML mod1bis <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=NULL,method="LCV") cat("\n","--------------------------------","\n") cat("\n","summary of the LCV model","\n") print(summary(mod1bis)) # hazard model taking into account left truncation (not representative of cancer data, # the begin variable was simulated for illustration purposes only) mod2 <- survPen(f1,data=datCancer,t0=begin,t1=fu,event=dead,expected=NULL,method="LAML") cat("\n","--------------------------------","\n") cat("\n","summary of the left-truncated model","\n") print(summary(mod2)) # excess hazard model mod3 <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=rate,method="LAML") cat("\n","--------------------------------","\n") cat("\n","summary of the excess hazard model","\n") print(summary(mod3)) # compare the predictions of the models new.time <- seq(0,5,length=50) pred1 <- predict(mod1,data.frame(fu=new.time)) pred1bis <- predict(mod1bis,data.frame(fu=new.time)) pred2 <- predict(mod2,data.frame(fu=new.time)) pred3 <- predict(mod3,data.frame(fu=new.time)) # LAML vs LCV pdf("compar_LAML_LCV.pdf",height=5,width=10) par(mfrow=c(1,2)) plot(new.time,pred1$haz,type="l",ylim=c(0,0.2),main="LCV vs LAML", xlab="years since diagnosis",ylab="hazard") lines(new.time,pred1bis$haz,col="blue3",lty=2) legend("topright",legend=c("LAML","LCV"),col=c("black","blue3"),lty=c(1,2)) plot(new.time,pred1$surv,type="l",ylim=c(0,1),main="LCV vs LAML", xlab="years since diagnosis",ylab="survival") lines(new.time,pred1bis$surv,col="blue3",lty=2) dev.off() # hazard vs excess hazard pdf("compar_total_excess.pdf",height=5,width=10) par(mfrow=c(1,2)) plot(new.time,pred1$haz,type="l",ylim=c(0,0.2),main="hazard vs excess hazard", xlab="years since diagnosis",ylab="hazard") lines(new.time,pred3$haz,col="green3") legend("topright",legend=c("overall","excess"),col=c("black","green3"),lty=c(1,1)) plot(new.time,pred1$surv,type="l",ylim=c(0,1),main="survival vs net survival", xlab="time",ylab="survival") lines(new.time,pred3$surv,col="green3") legend("topright",legend=c("overall survival","net survival"), col=c("black","green3"), lty=c(1,1)) dev.off() # hazard vs excess hazard with 95% Bayesian confidence intervals (based on Vp matrix, # see predict.survPen) pdf("compar_total_excess_CI.pdf",height=5,width=6) par(mfrow=c(1,1)) plot(new.time,pred1$haz,type="l",ylim=c(0,0.2),main="hazard vs excess hazard", xlab="years since diagnosis",ylab="hazard") lines(new.time,pred3$haz,col="green3") legend("topright",legend=c("overall","excess"),col=c("black","green3"),lty=c(1,1)) lines(new.time,pred1$haz.inf,lty=2) lines(new.time,pred1$haz.sup,lty=2) lines(new.time,pred3$haz.inf,lty=2,col="green3") lines(new.time,pred3$haz.sup,lty=2,col="green3") dev.off() #-------------------------------------------------------- example 3 # models: tensor product smooth vs tensor product interaction of time since diagnosis and # age at diagnosis. Smoothing parameters are estimated via LAML maximization cat("\n","______________________________________________________________________________________________","\n") cat("\n","example 3","\n") cat("\n","tensor product smooth vs tensor product interaction","\n") f2 <- ~tensor(fu,age,df=c(5,5)) f3 <- ~tint(fu,df=5)+tint(age,df=5)+tint(fu,age,df=c(5,5)) # hazard model mod4 <- survPen(f2,data=datCancer,t1=fu,event=dead) cat("\n","--------------------------------","\n") cat("\n","summary of the tensor model","\n") print(summary(mod4)) mod5 <- survPen(f3,data=datCancer,t1=fu,event=dead) cat("\n","--------------------------------","\n") cat("\n","summary of the tint model","\n") print(summary(mod5)) # predictions new.age <- seq(50,90,length=50) new.time <- seq(0,7,length=50) Z4 <- outer(new.time,new.age,function(t,a) predict(mod4,data.frame(fu=t,age=a))$haz) Z5 <- outer(new.time,new.age,function(t,a) predict(mod5,data.frame(fu=t,age=a))$haz) # color settings col.pal <- colorRampPalette(c("white", "red")) colors <- col.pal(100) facet <- function(z){ facet.center <- (z[-1, -1] + z[-1, -ncol(z)] + z[-nrow(z), -1] + z[-nrow(z), -ncol(z)])/4 cut(facet.center, 100) } # plot the hazard surfaces for both models pdf("compar_tensor_tint.pdf",height=5,width=10) par(mfrow=c(1,2)) persp(new.time,new.age,Z4,col=colors[facet(Z4)],main="tensor",theta=30, xlab="years since diagnosis",ylab="age at diagnosis",zlab="excess hazard",ticktype="detailed") persp(new.time,new.age,Z5,col=colors[facet(Z5)],main="tint",theta=30, xlab="years since diagnosis",ylab="age at diagnosis",zlab="excess hazard",ticktype="detailed") dev.off() #################################################################################################################