R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > # Verify that cox.zph computes a score test > # First for the Breslow estimate > r <- (3 + sqrt(33))/2 # actual MLE for log(beta) > U <- c(1/(r+1), 3/(r+3), -r/(r+3), 0) # score statistic > imat <- c(r/(r+1)^2, 3*r/(r+3)^2, 3*r/(r+3)^2, 0) # information matrix > g = c(1, 6, 6, 9) # death times > > u2 <- c(sum(U), sum(g*U)) # first derivative > i2 <- matrix(c(sum(imat), sum(g*imat), sum(g*imat), sum(g^2*imat)), + 2,2) # second derivative > sctest <- solve(i2, u2) %*% u2 > > # Verify that centering makes no difference for the test (though i2 changes) > g2 <- g - mean(g) > u2b <- c(sum(U), sum(g2*U)) > i2b <- matrix(c(sum(imat), sum(g2*imat), sum(g2*imat), sum(g2^2*imat)), + 2,2) > sctest2 <- solve(i2b, u2b) %*% u2b > all.equal(sctest, sctest2) [1] TRUE > > # Now check the program > fit1 <- coxph(Surv(time, status) ~ x, test1, ties='breslow') > aeq(fit1$coef, log(r)) [1] TRUE > zp1 <- cox.zph(fit1, transform='identity', global=FALSE) > aeq(zp1$table[,1], sctest) [1] TRUE > aeq(zp1$y, resid(fit1, type="scaledsch")) [1] TRUE > > dummy <- rep(0, nrow(test1)) > fit1b <- coxph(Surv(dummy, time, status) ~ x, test1, ties='breslow') > aeq(fit1b$coef, log(r)) [1] TRUE > zp1b <- cox.zph(fit1b, transform='identity', global=FALSE) > aeq(zp1b$table[,1], sctest) [1] TRUE > # the pair of tied times gets reversed in the zph result > # but since the 'y' values are only used to plot it doesn't matter > aeq(zp1b$y[c(1,3,2,4)], resid(fit1b, type="scaledsch")) [1] TRUE > > # log time check > g3 <- log(g) - mean(log(g)) > u3 <- c(sum(U), sum(g3*U)) # first derivative > i3 <- matrix(c(sum(imat), sum(g3*imat), sum(g3*imat), sum(g3^2*imat)), + 2,2) # second derivative > sctest3 <- solve(i3, u3) %*% u3 > zp3 <- cox.zph(fit1, transform='log', global=FALSE) > aeq(zp3$table[,1], sctest3) [1] TRUE > > # Efron approximation > phi <- acos((45/23)*sqrt(3/23)) > r <- 2*sqrt(23/3)* cos(phi/3) # actual MLE for log(beta) > U <- c(1/(r+1), 3/(r+3), -r/(r+5), 0) # score statistic > imat <- c(r/(r+1)^2, 3*r/(r+3)^2, 5*r/(r+5)^2, 0) # information matrix > > u4 <- c(sum(U), sum(g3*U)) # first derivative > i4 <- matrix(c(sum(imat), sum(g3*imat), sum(g3*imat), sum(g3^2*imat)), + 2,2) # second derivative > sctest4 <- solve(i4, u4) %*% u4 > > fit4 <- coxph(Surv(time, status) ~ x, test1, ties='efron') > aeq(fit4$coef, log(r)) [1] TRUE > zp4 <- cox.zph(fit4, transform='log', global=FALSE) > aeq(zp4$table[,1], sctest4) [1] TRUE > aeq(zp4$y, resid(fit4, type="scaledsch")) [1] TRUE > > fit5 <- coxph(Surv(dummy, time, status) ~ x, test1, ties="efron") > aeq(fit5$coef, log(r)) [1] TRUE > zp5 <- cox.zph(fit5, transform="log", global=FALSE) > aeq(zp5$table[,1], sctest4) [1] TRUE > > # Artificial stratification > test2 <- rbind(test1, test1) > test2$group <- rep(letters[1:2], each=nrow(test1)) > # U, imat, and sctest will all double > dummy <- c(dummy, dummy) > fit6 <- coxph(Surv(dummy, time, status) ~ x + strata(group), test2) > aeq(fit6$coef, log(r)) [1] TRUE > zp6 <- cox.zph(fit6, transform="log", globa=FALSE) > aeq(zp6$table[,1], 2*sctest4) [1] TRUE > > # A multi-state check, 2 covariates > # Verify that the multi-state result = the single state Cox models > etime <- with(mgus2, ifelse(pstat==0, futime, ptime)) > event <- with(mgus2, ifelse(pstat==0, 2*death, 1)) > event <- factor(event, 0:2, labels=c("censor", "pcm", "death")) > table(event) event censor pcm death 409 115 860 > > ct1 <- coxph(Surv(etime, event) ~ sex + age, mgus2, id=id) > ct2 <- coxph(Surv(etime, event=='pcm') ~ sex + age, mgus2) > ct3 <- coxph(Surv(etime, event=='death') ~ sex + age, mgus2) > > zp1 <- cox.zph(ct1, transform='identity') > zp2 <- cox.zph(ct2, transform='identity') > zp3 <- cox.zph(ct3, transform='identity') > aeq(zp1$table[1:2,], zp2$table[1:2,]) [1] TRUE > aeq(zp1$table[3:4,], zp3$table[1:2,]) [1] TRUE > > # Now add a starting time of zero > dummy <- rep(0, nrow(mgus2)) > ct4 <- coxph(Surv(dummy, etime, event) ~ sex + age, mgus2, id=id) > ct5 <- coxph(Surv(dummy, etime, event=='pcm') ~ sex + age, mgus2) > ct6 <- coxph(Surv(dummy, etime, event=='death') ~ sex + age, mgus2) > zp4 <- cox.zph(ct4, transform='identity') > zp5 <- cox.zph(ct5, transform='identity') > zp6 <- cox.zph(ct6, transform='identity') > aeq(zp4$table[1:2,], zp5$table[1:2,]) [1] TRUE > aeq(zp4$table[3:4,], zp6$table[1:2,]) [1] TRUE > > > # Direct check of a multivariate model with start, stop data > p1 <- pbcseq[!duplicated(pbcseq$id), 1:6] > pdata <- tmerge(p1[, c("id", "trt", "age", "sex")], p1, id=id, + death = event(futime, status==2)) > pdata <- tmerge(pdata, pbcseq, id=id, bili=tdc(day, bili), + edema = tdc(day, edema), albumin=tdc(day, albumin), + protime = tdc(day, protime)) > pfit <- coxph(Surv(tstart, tstop, death) ~ log(bili) + albumin + edema + + age + log(protime), data = pdata, ties='efron') > zp7 <- cox.zph(pfit, transform='log', global=FALSE) > > direct <- function(fit) { + nvar <- length(fit$coef) + dt <- coxph.detail(fit) + gtime <- log(dt$time) - mean(log(dt$time)) + # key idea: at any event time I have a first deriviative vector + # c(dt$score[i,], gtime[i]* dt$score[i,]) + # and second derivative matrix + # dt$imat[,,i] gtime[i] * dt$imat[,,i] + # gtime[i]*dt$imat[,,i] gtime[i]^2 * dt$imat[,,i] + # for the expanded model, where imat[,,i] is symmetric, + # and colSums(dt$score) =0 (since the model converged) + # + # Create score tests for adding one time-dependent variable + # gtime * x[,j] at a time: first derivative of this test is + # c(dt$score[i,], gtime[i]* dt$score[i,j]) + # and etc. + unew <- colSums(gtime * dt$score) + temp1 <- apply(dt$imat, 1:2, sum) + temp2 <- apply(dt$imat, 1:2, function(x) sum(x*gtime)) + temp3 <- apply(dt$imat, 1:2, function(x) sum(x * gtime^2)) + + score <- double(nvar) + smat <- matrix(0., nvar+1, nvar+1) # second deriv matrix for the test + smat[1:nvar, 1:nvar] <- temp1 + for (i in 1:nvar) { + smat[nvar+1,] <- c(temp2[i,], temp3[i,i]) + smat[,nvar+1] <- c(temp2[,i], temp3[i,i]) + utemp <- c(rep(0,nvar), unew[i]) + score[i] <- solve(smat, utemp) %*% utemp + } + list(sctest = score, u= c(colSums(dt$score), unew), + imat=cbind(rbind(temp1, temp2), rbind(temp2, temp3))) + } > > aeq(zp7$table[,1], direct(pfit)$sctest) [1] TRUE > > # Last, make sure that NA coefficients are ignored > d1 <- survSplit(Surv(time, status) ~ ., veteran, cut=150, episode="epoch") > fit <- coxph(Surv(tstart, time, status) ~ celltype:strata(epoch) + age, d1) > zz <- cox.zph(fit) > > fit2 <- coxph(Surv(tstart, time, status) ~ celltype:strata(epoch) + age, d1, + x=TRUE) > zz2 <- cox.zph(fit2) > > x2 <- fit2$x[, !is.na(fit$coefficients)][,-1] > fit3 <- coxph(Surv(tstart, time, status) ~ age + x2, d1) > all.equal(fit3$loglik, fit2$loglik) [1] TRUE > zz3 <- cox.zph(fit3) > > all.equal(unclass(zz)[1:7], unclass(zz2)[1:7]) #ignore the call component [1] TRUE > all.equal(as.vector(zz$table), as.vector(zz3$table)) # variable names change [1] TRUE > > proc.time() user system elapsed 1.01 0.18 1.20