R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(survival)
> aeq <- function(x,y) all.equal(as.vector(x), as.vector(y))
> 
> test1 <- data.frame(time=  c(9, 3,1,1,6,6,8),
+                     status=c(1,NA,1,0,1,1,0),
+                     x=     c(0, 2,1,1,1,0,0))
> 
> # Verify that cox.zph computes a score test
> # First for the Breslow estimate
> r <- (3 + sqrt(33))/2   # actual MLE for log(beta)
> U <- c(1/(r+1), 3/(r+3), -r/(r+3), 0)   # score statistic
> imat <- c(r/(r+1)^2, 3*r/(r+3)^2, 3*r/(r+3)^2, 0)  # information matrix
> g = c(1, 6, 6, 9)  # death times
> 
> u2 <- c(sum(U), sum(g*U))  # first derivative
> i2 <- matrix(c(sum(imat), sum(g*imat), sum(g*imat), sum(g^2*imat)),
+                2,2)  # second derivative
> sctest <- solve(i2, u2) %*% u2
> 
> # Verify that centering makes no difference for the test (though i2 changes)
> g2 <- g - mean(g)
> u2b <- c(sum(U), sum(g2*U))
> i2b <- matrix(c(sum(imat), sum(g2*imat), sum(g2*imat), sum(g2^2*imat)),
+                2,2) 
> sctest2 <- solve(i2b, u2b) %*% u2b
> all.equal(sctest, sctest2)
[1] TRUE
> 
> # Now check the program
> fit1 <- coxph(Surv(time, status) ~ x, test1, ties='breslow')
> aeq(fit1$coef, log(r))
[1] TRUE
> zp1 <- cox.zph(fit1, transform='identity', global=FALSE)
> aeq(zp1$table[,1], sctest)
[1] TRUE
> aeq(zp1$y, resid(fit1, type="scaledsch"))
[1] TRUE
> 
> dummy <- rep(0, nrow(test1))
> fit1b <- coxph(Surv(dummy, time, status) ~ x, test1, ties='breslow')
> aeq(fit1b$coef, log(r))
[1] TRUE
> zp1b <- cox.zph(fit1b, transform='identity', global=FALSE)
> aeq(zp1b$table[,1], sctest)
[1] TRUE
> # the pair of tied times gets reversed in the zph result
> #  but since the 'y' values are only used to plot it doesn't matter
> aeq(zp1b$y[c(1,3,2,4)], resid(fit1b, type="scaledsch"))  
[1] TRUE
> 
> # log time check
> g3 <- log(g) - mean(log(g))
> u3 <- c(sum(U), sum(g3*U))  # first derivative
> i3 <- matrix(c(sum(imat), sum(g3*imat), sum(g3*imat), sum(g3^2*imat)),
+                2,2)  # second derivative
> sctest3 <- solve(i3, u3) %*% u3
> zp3 <- cox.zph(fit1, transform='log', global=FALSE)
> aeq(zp3$table[,1], sctest3)
[1] TRUE
> 
> # Efron approximation
> phi <- acos((45/23)*sqrt(3/23))
> r <- 2*sqrt(23/3)* cos(phi/3)   # actual MLE for log(beta)
> U <- c(1/(r+1), 3/(r+3), -r/(r+5), 0)   # score statistic
> imat <- c(r/(r+1)^2, 3*r/(r+3)^2, 5*r/(r+5)^2, 0)  # information matrix
> 
> u4 <- c(sum(U), sum(g3*U))  # first derivative
> i4 <- matrix(c(sum(imat), sum(g3*imat), sum(g3*imat), sum(g3^2*imat)),
+                2,2)  # second derivative
> sctest4 <- solve(i4, u4) %*% u4
> 
> fit4 <- coxph(Surv(time, status) ~ x, test1, ties='efron')
> aeq(fit4$coef, log(r))
[1] TRUE
> zp4 <- cox.zph(fit4, transform='log', global=FALSE)
> aeq(zp4$table[,1], sctest4)
[1] TRUE
> aeq(zp4$y, resid(fit4, type="scaledsch"))
[1] TRUE
> 
> fit5 <-  coxph(Surv(dummy, time, status) ~ x, test1, ties="efron")
> aeq(fit5$coef, log(r))
[1] TRUE
> zp5 <- cox.zph(fit5, transform="log", global=FALSE)
> aeq(zp5$table[,1], sctest4)
[1] TRUE
> 
> # Artificial stratification
> test2 <- rbind(test1, test1)
> test2$group <- rep(letters[1:2], each=nrow(test1))
> # U, imat, and sctest will all double
> dummy <- c(dummy, dummy)
> fit6 <- coxph(Surv(dummy, time, status) ~ x + strata(group), test2)
> aeq(fit6$coef, log(r))
[1] TRUE
> zp6 <- cox.zph(fit6, transform="log", globa=FALSE)
> aeq(zp6$table[,1], 2*sctest4)
[1] TRUE
> 
> # A multi-state check, 2 covariates
> #  Verify that the multi-state result = the single state Cox models
> etime <- with(mgus2, ifelse(pstat==0, futime, ptime))
> event <- with(mgus2, ifelse(pstat==0, 2*death, 1))
> event <- factor(event, 0:2, labels=c("censor", "pcm", "death"))
> table(event)
event
censor    pcm  death 
   409    115    860 
> 
> ct1 <- coxph(Surv(etime, event) ~ sex + age, mgus2, id=id)
> ct2 <- coxph(Surv(etime, event=='pcm') ~ sex + age, mgus2)
> ct3 <- coxph(Surv(etime, event=='death') ~ sex + age, mgus2)
> 
> zp1 <- cox.zph(ct1, transform='identity')
> zp2 <- cox.zph(ct2, transform='identity')
> zp3 <- cox.zph(ct3, transform='identity')
> aeq(zp1$table[1:2,], zp2$table[1:2,])
[1] TRUE
> aeq(zp1$table[3:4,], zp3$table[1:2,])
[1] TRUE
> 
> # Now add a starting time of zero
> dummy <- rep(0, nrow(mgus2))
> ct4 <- coxph(Surv(dummy, etime, event) ~ sex + age, mgus2, id=id)
> ct5 <- coxph(Surv(dummy, etime, event=='pcm') ~ sex + age, mgus2)
> ct6 <- coxph(Surv(dummy, etime, event=='death') ~ sex + age, mgus2)
> zp4 <- cox.zph(ct4, transform='identity')
> zp5 <- cox.zph(ct5, transform='identity')
> zp6 <- cox.zph(ct6, transform='identity')
> aeq(zp4$table[1:2,], zp5$table[1:2,])
[1] TRUE
> aeq(zp4$table[3:4,], zp6$table[1:2,])
[1] TRUE
> 
> 
> # Direct check of a multivariate model with start, stop data
> p1 <- pbcseq[!duplicated(pbcseq$id), 1:6]
> pdata <- tmerge(p1[, c("id", "trt", "age", "sex")], p1, id=id,
+                 death = event(futime, status==2))
> pdata <- tmerge(pdata, pbcseq, id=id, bili=tdc(day, bili),
+                 edema = tdc(day, edema), albumin=tdc(day, albumin),
+                 protime = tdc(day, protime))
> pfit <- coxph(Surv(tstart, tstop, death) ~ log(bili) + albumin + edema +
+                   age + log(protime), data = pdata, ties='efron')
> zp7  <- cox.zph(pfit, transform='log', global=FALSE)
> 
> direct <- function(fit) {
+     nvar <- length(fit$coef)
+     dt <- coxph.detail(fit)
+     gtime <- log(dt$time) - mean(log(dt$time))
+     # key idea: at any event time I have a first deriviative vector
+     #     c(dt$score[i,],  gtime[i]* dt$score[i,]) 
+     # and second derivative matrix
+     #     dt$imat[,,i]              gtime[i]   * dt$imat[,,i]
+     #     gtime[i]*dt$imat[,,i]     gtime[i]^2 * dt$imat[,,i]
+     # for the expanded model, where imat[,,i] is symmetric, 
+     # and colSums(dt$score) =0  (since the model converged)
+     # 
+     # Create score tests for adding one time-dependent variable 
+     #  gtime * x[,j] at a time: first derivative of this test is
+     #     c(dt$score[i,],  gtime[i]* dt$score[i,j]) 
+     #  and etc.
+     unew  <-   colSums(gtime * dt$score)
+     temp1 <- apply(dt$imat, 1:2, sum)
+     temp2 <- apply(dt$imat, 1:2, function(x) sum(x*gtime))
+     temp3 <- apply(dt$imat, 1:2, function(x) sum(x * gtime^2))
+ 
+     score <- double(nvar)
+     smat  <- matrix(0., nvar+1, nvar+1)  # second deriv matrix for the test
+     smat[1:nvar, 1:nvar] <- temp1
+     for (i in 1:nvar) {
+         smat[nvar+1,] <- c(temp2[i,], temp3[i,i])
+         smat[,nvar+1] <- c(temp2[,i], temp3[i,i])
+         utemp <- c(rep(0,nvar), unew[i])
+         score[i] <- solve(smat, utemp) %*% utemp
+     }  
+     list(sctest = score, u= c(colSums(dt$score), unew), 
+          imat=cbind(rbind(temp1, temp2), rbind(temp2, temp3)))
+ }   
> 
> aeq(zp7$table[,1], direct(pfit)$sctest)
[1] TRUE
> 
> # Last, make sure that NA coefficients are ignored
> d1 <- survSplit(Surv(time, status) ~ ., veteran, cut=150, episode="epoch")
> fit <- coxph(Surv(tstart, time, status) ~ celltype:strata(epoch) + age, d1)
> zz <- cox.zph(fit)
> 
> fit2 <- coxph(Surv(tstart, time, status) ~ celltype:strata(epoch) + age, d1,
+               x=TRUE)
> zz2 <- cox.zph(fit2)
> 
> x2 <- fit2$x[, !is.na(fit$coefficients)][,-1]
> fit3 <- coxph(Surv(tstart, time, status) ~ age + x2, d1)
> all.equal(fit3$loglik, fit2$loglik)
[1] TRUE
> zz3 <- cox.zph(fit3)
> 
> all.equal(unclass(zz)[1:7], unclass(zz2)[1:7]) #ignore the call component
[1] TRUE
> all.equal(as.vector(zz$table), as.vector(zz3$table)) # variable names change
[1] TRUE
> 
> proc.time()
   user  system elapsed 
   1.01    0.18    1.20