R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > > # > # Tests from the appendix of Therneau and Grambsch > # b. Data set 1 and Efron estimate > # > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > byhand <- function(beta, newx=0) { + r <- exp(beta) + loglik <- 2*beta - (log(3*r +3) + log((r+5)/2) + log(r+3)) + u <- (30 + 23*r - r^3)/ ((r+1)*(r+3)*(r+5)) + tfun <- function(x) x - x^2 + imat <- tfun(r/(r+1)) + tfun(r/(r+5)) + tfun(r/(r+3)) + + # The matrix of weights, one row per obs, one col per time + # Time of 1, 6, 6+0 (second death), and 9 + wtmat <- matrix(c(1,1,1,1,1,1, + 0,0,1,1,1,1, + 0,0,.5, .5, 1,1, + 0,0,0,0,0,1), ncol=4) + wtmat <- diag(c(r,r,r,1,1,1)) %*% wtmat + + x <- c(1,1,1,0,0,0) + status <- c(1,0,1,1,0,1) + xbar <- colSums(wtmat*x)/ colSums(wtmat) + haz <- 1/ colSums(wtmat) # one death at each of the times + + hazmat <- wtmat %*% diag(haz) #each subject's hazard over time + mart <- status - rowSums(hazmat) + + a <- r+1; b<- r+3; d<- r+5 # 'c' in the book, 'd' here + score <- c((2*r + 3)/ (3*a^2), + -r/ (3*a^2), + (675+ r*(1305 +r*(756 + r*(-4 +r*(-79 -13*r)))))/(3*(a*b*d)^2), + r*(1/(3*a^2) - a/(2*b^2) - b/(2*d^2)), + 2*r*(177 + r*(282 +r*(182 + r*(50 + 5*r)))) /(3*(a*b*d)^2), + 2*r*(177 + r*(282 +r*(182 + r*(50 + 5*r)))) /(3*(a*b*d)^2)) + + # Schoenfeld residual + d <- mean(xbar[2:3]) + scho <- c(1/(r+1), 1- d, 0- d , 0) + + surv <- exp(-cumsum(haz)* exp(beta*newx))[c(1,3,4)] + varhaz.g <- cumsum(haz^2) # since all numerators are 1 + + varhaz.d <- cumsum((newx-xbar) * haz) + + varhaz <- (varhaz.g + varhaz.d^2/ imat) * exp(2*beta*newx) + + list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz, + mart=mart, score=score, var.g=varhaz.g, var.d=varhaz.d, + scho=scho, surv=surv, var=varhaz[c(1,3,4)]) + } > > > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > fit0 <-coxph(Surv(time, status) ~x, test1, iter=0) > truth0 <- byhand(0,0) > aeq(truth0$loglik, fit0$loglik[1]) [1] TRUE > aeq(1/truth0$imat, fit0$var) [1] TRUE > aeq(truth0$mart, fit0$resid[c(2:6,1)]) [1] TRUE > aeq(resid(fit0), c(-3/4, NA, 5/6, -1/6, 5/12, 5/12, -3/4)) [1] TRUE > aeq(truth0$scho, resid(fit0, 'schoen')) [1] TRUE > aeq(truth0$score, resid(fit0, 'score')[c(3:7,1)]) [1] TRUE > sfit <- survfit(fit0, list(x=0), censor=FALSE) > aeq(sfit$std.err^2, truth0$var) [1] TRUE > aeq(sfit$surv, truth0$surv) [1] TRUE > > fit <- coxph(Surv(time, status) ~x, test1, eps=1e-8, nocenter=NULL) > aeq(round(fit$coef,6), 1.676857) [1] TRUE > truebeta <- log(cos(acos((45/23)*sqrt(3/23))/3) * 2* sqrt(23/3)) > truth <- byhand(truebeta, 0) > aeq(truth$loglik, fit$loglik[2]) [1] TRUE > aeq(1/truth$imat, fit$var) [1] TRUE > aeq(truth$mart, fit$resid[c(2:6,1)]) [1] TRUE > aeq(truth$scho, resid(fit, 'schoen')) [1] TRUE > aeq(truth$score, resid(fit, 'score')[c(3:7,1)]) [1] TRUE > > # Per comments in the source code, the below is expected to fail for Efron > # at the tied death times. (When predicting for new data, predict > # treats a time in the new data set that exactly matches one in the original > # as being just after the original, i.e., experiences the full hazard > # jump there, in the same way that censors do.) > expect <- predict(fit, type='expected', newdata=test1) #force recalc > use <- !(test1$time==6 | is.na(test1$status)) > aeq(test1$status[use] - resid(fit)[use], expect[use]) [1] TRUE > > sfit <- survfit(fit, list(x=0), censor=FALSE) > aeq(sfit$surv, truth$surv) [1] TRUE > aeq(sfit$std.err^2, truth$var) [1] TRUE > > # > # Done with the formal test, now print out lots of bits > # > resid(fit) 1 2 3 4 5 6 7 -0.3655434 NA 0.7191707 -0.2808293 -0.4383414 0.7310869 -0.3655434 > resid(fit, 'scor') 1 2 3 4 5 6 7 0.2208584 NA 0.1132780 -0.0442340 -0.1029199 -0.4078409 0.2208584 > resid(fit, 'scho') 1 6 6 9 0.157512 0.421244 -0.578756 0.000000 > > predict(fit, type='lp') [1] -0.8384287 NA 0.8384287 0.8384287 0.8384287 -0.8384287 -0.8384287 > predict(fit, type='risk') [1] 0.4323894 NA 2.3127302 2.3127302 2.3127302 0.4323894 0.4323894 > predict(fit, type='expected') 1 2 3 4 5 6 7 1.3655434 NA 0.2808293 0.2808293 1.4383414 0.2689131 0.3655434 > predict(fit, type='terms') x 1 -0.8384287 2 NA 3 0.8384287 4 0.8384287 5 0.8384287 6 -0.8384287 7 -0.8384287 > predict(fit, type='lp', se.fit=T) $fit 1 2 3 4 5 6 7 -0.8384287 NA 0.8384287 0.8384287 0.8384287 -0.8384287 -0.8384287 $se.fit 1 2 3 4 5 6 7 0.6388078 NA 0.6388078 0.6388078 0.6388078 0.6388078 0.6388078 > predict(fit, type='risk', se.fit=T) $fit 1 2 3 4 5 6 7 0.4323894 NA 2.3127302 2.3127302 2.3127302 0.4323894 0.4323894 $se.fit 1 2 3 4 5 6 7 0.4200565 NA 0.9714774 0.9714774 0.9714774 0.4200565 0.4200565 > predict(fit, type='expected', se.fit=T) $fit 1 2 3 4 5 6 7 1.3655434 NA 0.2808293 0.2808293 1.4383414 0.2689131 0.3655434 $se.fit [1] 1.0649293 NA 0.2864593 0.2864593 1.5922983 0.3661617 0.3661617 > predict(fit, type='terms', se.fit=T) $fit x 1 -0.8384287 2 NA 3 0.8384287 4 0.8384287 5 0.8384287 6 -0.8384287 7 -0.8384287 $se.fit x 1 0.6388078 2 NA 3 0.6388078 4 0.6388078 5 0.6388078 6 0.6388078 7 0.6388078 > > summary(survfit(fit)) Call: survfit(formula = fit) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 0.8857 0.117 0.683036 1 6 4 2 0.4294 0.237 0.145743 1 9 1 1 0.0425 0.116 0.000198 1 > summary(survfit(fit, list(x=2))) Call: survfit(formula = fit, newdata = list(x = 2)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 2.23e-01 5.97e-01 1.16e-03 1 6 4 2 2.87e-05 5.69e-04 3.96e-22 1 9 1 1 1.08e-17 1.04e-15 1.07e-99 1 > > proc.time() user system elapsed 0.87 0.12 0.98