R Under development (unstable) (2024-12-13 r87441 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(survival)
> aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...)
> 
> # Test the survival curve for a fit with shared hazards.
> # Use the pbcseq data set, and turn bilirubin into a time-dependent state with
> #  4 levels, and a shared baseline hazard for the 4 transitions to death.
> # The subtlety is that coefficients for a shared (proportional) baseline hazard
> #  are attached to a state, not to an observation. 
> # (A bilirubin value of <1 is normal.)
> pbc1 <- pbcseq
> pbc1$bili4 <- cut(pbc1$bili, c(0,1, 2,4, 100), 
+                   c("normal", "1-2", "2-4", ">4"))
> ptemp <- subset(pbc1, !duplicated(id))  # first row of each
> 
> pbc2 <- tmerge(ptemp[, c("id", "age", "sex")], ptemp, id,
+                death= event(futime, status==2))
> 
> pbc2 <- tmerge(pbc2, pbc1, id=id, bili = tdc(day, bili),
+                  bili4 = tdc(day, bili4), bstat = event(day, as.numeric(bili4)))
> btemp <- with(pbc2, ifelse(death, 5, bstat))
> 
> # a row with the same starting and ending bili4 level is not an event
> b2 <- ifelse(((as.numeric(pbc2$bili4)) == btemp), 0, btemp)
> pbc2$bstat <- factor(b2, 0:5,
+                      c("censor", "normal", "1-2", "2-4", ">4", "death"))
> check1 <- survcheck(Surv(tstart, tstop, bstat) ~ 1, istate= bili4,
+                     id = id, data=pbc2)
> check1$transitions
        to
from     normal 1-2 2-4  >4 death (censored)
  normal      0  81  10   3     9         77
  1-2        61   0  68  15     9         36
  2-4         2  33   0  94    12         24
  >4          1   3  28   0   110         35
  death       0   0   0   0     0          0
> 
> fit2 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1,
+                    c(1:4):5 ~ age / common + shared), id= id, istate=bili4,
+               data=pbc2)
> 
> # Before we tackle fit2, start small with just 9 subjects, coefs fixed to 
> # simple values to make hand computation easier.  There are no transitions
> # from state 3 to death in this subset.  Since it is a shared hazard the
> # subjects in state 3 ARE at risk and so are found in the denominator of the
> # hazard, but since none of the progress the MLE for that ph coef is -infinity. 
> # We set it to -1.  
> pbc3 <- subset(pbc2, id < 10)
> pbc3$age <- round(pbc3$age)  # easier to do "by hand" sums
> fit3 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, 
+                    c(1:4):5 ~ age / common + shared),  x=TRUE,
+               id= id, istate=bili4, data=pbc3, init= c(.05, .6, -1, 1.1), iter=0)
> # a mixed p0 gives a stronger test than our usual (1, 0,0,0,0)
> surv3 <- survfit(fit3, newdata=list(age=50), p0=c(.4, .3, .2, .1, 0))
> 
> etime <- sort(unique(pbc3$tstop[pbc3$bstat != "censor"]))
> # At event time 1 (182), all 9 are at risk, (3,3,2,1) in initial states 1-4
> atrisk <- pbc3$tstart < etime[1] & pbc3$tstop >= etime[1]  # all 9 at risk
> table(pbc3$bili4[atrisk])

normal    1-2    2-4     >4 
     3      3      2      1 
> 
> #  One event occurs at 182, a 2:1 transition  (1-2 to normal)
> #  Risk scores for the non-death transitions are all exp(0) =1,
> #  so the hazard matrix H will have second row of (1/3, -1/3, 0,0,0) and all
> #  other rows are 0. 
> with(subset(pbc3, tstop== 182), table(istate= bili4, state=bstat))
        state
istate   censor normal 1-2 2-4 >4 death
  normal      0      0   0   0  0     0
  1-2         0      1   0   0  0     0
  2-4         0      0   0   0  0     0
  >4          0      0   0   0  0     0
> 
> # The next four events are from 3:4, 3:2, 2:3, and 1:2, so also have 
> #  simple transtions, i.e., no covariates so all risk scores are exp(0) =1
> #
> hmat <- array(0, dim=c(5,5,6))  # first 6 hazard matrices, start with 3,3,2,1
> hmat[2,1,1] <- 1/3; hmat[2,2,1] <- -1/3   # new count= 4,2,2,1
> hmat[3,4,2] <- 1/2; hmat[3,3,2] <- -1/2   # new count= 4,2,1,2
> hmat[3,2,3] <- 1  ; hmat[3,3,3] <- -1     # new count= 4,3,0,2
> hmat[2,3,4] <- 1/3; hmat[2,2,4] <- -1/3   # new count= 4,2,1,2
> hmat[1,2,5] <- 1/4; hmat[1,1,5] <- -1/4   # new count= 3,3,1,2
> 
> # Event 6 is a transition from state 4 to death, at day 400
> # For the shared hazard, the denominator is all those in states 1,2,3, or 4.
> atrisk <- with(pbc3, tstart < etime[6] & tstop >= etime[6])
> table(pbc3$bili4[atrisk]) # current states just before time 6

normal    1-2    2-4     >4 
     3      3      1      2 
>  
> adata <- subset(pbc3, atrisk)
> eta <- with(adata, .05*(age-50) + .6*(bili4=="1-2") + 1.1*(bili4 == ">4") -
+                    1*(bili4=="2-4"))
> cbind(adata[,c('id', 'age', 'tstop', 'bili4', 'bstat')], eta, risk=exp(eta)) 
   id age tstop  bili4  bstat   eta      risk
2   1  59   400     >4  death  1.55 4.7114702
5   2  56   768 normal    1-2  0.30 1.3498588
14  3  70   743    1-2 censor  1.60 4.9530324
18  4  55   729    1-2    2-4  0.85 2.3396469
25  5  38   769    2-4     >4 -1.60 0.2018965
30  6  66   737 normal censor  0.80 2.2255409
36  7  56   545    1-2 normal  0.90 2.4596031
44  8  53   795 normal censor  0.15 1.1618342
52  9  43   723     >4 censor  0.75 2.1170000
> basehaz <- 1/sum(exp(eta))
> hmat[1,5,6] <- basehaz;            hmat[1,1,6] <- -basehaz
> hmat[2,5,6] <- basehaz * exp(.6);  hmat[2,2,6] <- -basehaz*exp(.6)
> hmat[3,5,6] <- basehaz * exp(-1);  hmat[3,3,6] <- -basehaz*exp(-1)
> hmat[4,5,6] <- basehaz * exp(1.1); hmat[4,4,6] <- -basehaz*exp(1.1)
> # double check: sum of per-subject hazards at this time point = number of
> #  events at this time point 
> sum(basehaz * exp(eta)) ==1
[1] TRUE
> 
> tmat <- array(0., dim= dim(hmat))  # transition matrices
> pstate <- matrix((4:0)/10, nrow=1)
> for (i in 1:6) {
+     tmat[,,i] <- as.matrix(Matrix::expm(hmat[,,i]))
+     pstate <- rbind(pstate, pstate[i,]%*% tmat[,,i])
+ }
> 
> dtime <- which(surv3$time %in% etime)  # skip censored rows
> aeq(surv3$pstate[dtime[1:6],1,], pstate[-1,])
[1] TRUE
> 
> #
> # A function to do the above "by hand" calculations, over all time points
> # It is verified for the particular fit we did, but written for
> #  more generality.
> # fit: a multi-state fit, with shared baselines
> # istate: the inital state for each row of data
> # p0: starting dist for compuation
> # x0: curve for this set of covariates
> #  
> mysurv <- function(fit, istate, p0, x0, debug=0) {
+     if (!inherits(fit, 'coxphms')) stop("invalid fit")
+     smap <- fit$smap
+     from <- as.numeric(sub(":.*$", "", colnames(smap)))
+     to   <- as.numeric(sub("^.*:", "", colnames(smap)))
+     shared <- duplicated(smap[1,])
+     nshare <- sum(shared)
+     bcoef <- rep(1, ncol(smap))   # coefficients for shared baseline
+     beta <- coef(fit, matrix=TRUE)
+     if (nshare >0) {
+         # coefficients for shared baseline will be the last nshare of them
+         i <- seq(length=nshare, to=length(fit$coefficients))
+         bcoef[shared] <- exp(fit$coefficients[i])
+         # remove shared coef rows from beta
+         phrow <- apply(fit$cmap, 1, function(x) any(x %in% i))
+         beta <- beta[!phrow,, drop=FALSE]
+     }
+           
+     # Make the values for istate and state match the 1:2, etc of the fit,
+     #  i.e., the order of fit$states
+     # istate and state are used in tables, using factors makes sure the result
+     #  is always the right size
+     nstate <- length(fit$states)
+     state <-  factor(fit$y[,3], 1:nstate)  # endpoint of a transition
+     if (length(istate) != nrow(fit$y)) stop ("mismatched istate")
+     istate <- factor(as.character(istate), fit$states)
+ 
+     # set up output
+     ntran <- ncol(smap)    # number of transitions
+     utime <- sort(unique(fit$y[!is.na(state), 2])) # unique event times
+     ntime <- length(utime)
+     tmat <- matrix(0, nstate, nstate)  # transtion matrix at this time point
+     pmat <- diag(nstate)               # product of transitions
+     nrisk <- matrix(0., ntime, nstate) #number at risk
+     wtrisk<- matrix(0., ntime, ntran)  # weighted number per transtion 
+     nevent <- matrix(0L, ntime, nstate)  # number of events of each type
+     pstate <- matrix(0L, ntime, nstate)  # probability in state
+     hmat <- matrix(0., nstate, nstate)   # working matrix of hazards
+ 
+     # eta is a matrix of (x for subject - x0) %*% coef, one row per subject,
+     #  one column per transition
+     eta <- (fit$x - rep(x0, each= nrow(fit$y))) %*% beta
+     rwt <- exp(eta)  # the risk weight for each obs
+ 
+     t1 <- fit$y[,1]
+     t2 <- fit$y[,2]
+     for (i in 1:ntime) {
+         atrisk <- (t1 < utime[i] &  utime[i] <= t2) # risk set at this time
+         event <- which(utime[i] == t2)  # potential events, at this time
+         nrisk[i,]  <- c(table(istate[atrisk]))   # number at risk in each state
+         nevent[i,] <- c(table(state[event]))
+         # The linear predictor and hence the number at risk is different for
+         #  every transition. Also, some will not be at risk for the transition.
+         #
+         for (k in 1:ntran) { 
+             atrisk2 <- (atrisk & (as.numeric(istate) == from[k]))
+             wtrisk[i,k] <- sum(rwt[atrisk2,k])
+             }
+         dtemp <- table(istate[event], state[event]) #censors don't count
+ 
+         # fill in hmat, one hazard at a time
+         hmat <- 0*hmat 
+         for (j in unique(smap)) {
+             # for each baseline hazard
+             k <- which(smap == j)  # transitons that share this hazard
+             deaths <- sum(dtemp[cbind(from[k], to[k])])     # total events
+             if (deaths==0) hmat[cbind(from[k], to[k])] <- 0 # avoid 0/0
+             else {
+                 hazard <- deaths/ sum(wtrisk[i, k] * bcoef[k]) #shared baseline
+                 hmat[cbind(from[k], to[k])] <- hazard * bcoef[k] # PH
+             }
+         }
+         diag(hmat) <- diag(hmat) - rowSums(hmat)   # rows sum to zero
+         tmat <- as.matrix(Matrix::expm(hmat))      # transtion matrix
+ #        if (i >= debug) browser()
+         pmat <- pmat %*% tmat
+         pstate[i,] <- drop(p0 %*% pmat)
+     }
+     list(time=utime, nrisk=nrisk, nevent=nevent, pstate=pstate,  
+          wtrisk= wtrisk, P=pmat)
+ }
> 
> test3 <- mysurv(fit3, pbc3$bili4, p0= 4:0/10,  x0 =50)
> aeq(test3$pstate, surv3$pstate[match(test3$time, surv3$time),1,])
[1] TRUE
> 
> # Now with the full data set
> fit2 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1,
+                    c(1:4):5 ~ age / common + shared), id= id, istate=bili4,
+               data=pbc2, ties='breslow', x=TRUE)
> surv2 <- survfit(fit2, newdata=list(age=50), p0=c(.4, .3, .2, .1, 0))
> test2 <- mysurv(fit2, pbc2$bili4, p0= 4:0/10, fit2, x0 =50)
> aeq(test2$pstate, surv2$pstate[match(test2$time, surv2$time),1,])
[1] TRUE
> 
> 
> if (FALSE){
+     # for testing, make a plot
+     xfun <- function(i) {
+         j <- match(test2$time[i], surv2$time)
+         all.equal(test2$pstate[i,], surv2$pstate[j,1,])
+     }
+     plot(surv2, col=1:5, lwd=2)
+     matpoints(test2$time, test2$pstate, col=1:5, pch='o')
+ }
> 
> 
> proc.time()
   user  system elapsed 
   1.71    0.21    1.92