R Under development (unstable) (2024-02-13 r85898 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # A tiny multi-state example > # > library(survival) > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > mtest <- data.frame(id= c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5), + t1= c(0, 4, 9, 0, 2, 0, 2, 8, 1, 3), + t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), + st= c(1, 2, 1, 2, 3, 1, 3, 0, 2, 0)) > > mtest$state <- factor(mtest$st, 0:3, c("censor", "a", "b", "c")) > > if (FALSE) { + # this graph is very useful when debugging + temp <- survcheck(Surv(t1, t2, state) ~1, mtest, id=id) + plot(c(0,11), c(1,5.1), type='n', xlab="Time", ylab= "Subject") + with(mtest, segments(t1+.1, id, t2, id, col=as.numeric(temp$istate))) + event <- subset(mtest, state!='censor') + text(event$t2, event$id+.2, as.character(event$state)) + } > > mtest <- mtest[c(1,3,2,4,5,7,6,10, 9, 8),] #not in time order > > mfit <- survfit(Surv(t1, t2, state) ~ 1, mtest, id=id) > > # True results > # > #time state probabilities > # entry a b c entry a b c > # > #0 124 1 0 0 0 > #1+ 1245 > #2+ 1235 4 3/4 1/4 0 0 4 -> a, add 3 > #3+ 123 4 5 9/16 1/4 3/16 0 5 -> b > #4+ 23 14 5 6/16 7/16 3/16 0 1 -> a > #5+ 3 14 5 3/16 7/16 6/16 0 2 -> b, exits > #8+ 3 1 5 4 3/16 7/32 6/16 7/32 4 -> c > #9+ 15 0 0 19/32 13/32 1->b, 3->c & exit > # 10+ 1 5 19/64 19/64 13/32 1->a > > # In mfit, the "entry" state is first in the matrices, when this function was > # first created it was the last. > swap <- c(4,1,2,3) # at one time it was last > all.equal(mfit$n.risk, matrix(c(4,4,3,2,1,1,0,0, + 0,1,1,2,2,1,0,0, + 0,0,1,1,1,1,2,1, + 0,0,0,0,0,1,0,0), ncol=4)) [1] TRUE > all.equal(mfit$pstate, matrix(c(24, 18, 12, 6, 6, 0, 0, 0, + 8, 8, 14, 14, 7, 0, 9.5, 9.5, + 0, 6, 6, 12, 12,19,9.5, 9.5, + 0, 0, 0, 0, 7, 13, 13, 13)/32, ncol=4)) [1] TRUE > all.equal(mfit$n.event, matrix(c(0,0,0,0,0,0,0,0, + 1,0,1,0,0,0,1,0, + 0,1,0,1,0,1,0,0, + 0,0,0,0,1,1,0,0), ncol=4)) [1] TRUE > all.equal(mfit$time, c(2, 3, 4, 5, 8, 9, 10, 11)) [1] TRUE > > > # Somewhat more complex. > # Scramble the input data > # Not everyone starts at the same time or in the same state > # Case weights > # > tdata <- data.frame(id= c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5), + t1= c(0, 4, 9, 1, 2, 0, 2, 8, 1, 3), + t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), + st= c(1, 2, 1, 2, 3, 1, 3, 0, 3, 0), + i0= c(4, 1, 2, 1, 4, 4, 1, 3, 2, 3), + wt= 1:10) > > tdata$st <- factor(tdata$st, c(0:3), + labels=c("censor", "1", "2", "3")) > tdata$i0 <- factor(tdata$i0, 1:4, + labels=c("1", "2", "3", "entry")) > > tfun <- function(data=tdata) { + reorder <- c(10, 9, 1, 2, 5, 4, 3, 7, 8, 6) + new <- data[reorder,] + new + } > > # These weight vectors are in the order of tdata > # w[9] is the weight for subject 5 at time 1.5, for instance > # p0 is defined as all those at risk just before the first event, which in > # this data set is entry:a at time 2 for id=4; id 1,2,4,5 at risk > p0 <- function(w) c(w[4], w[9], 0, w[1]+ w[6])/ (w[1]+ w[4] + w[6] + w[9]) > > # aj2 = Aalen-Johansen H matrix at time 2, etc. > aj2 <- function(w) { + rbind(c(1, 0, 0, 0), # state a (1) stays put + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(w[6], 0, 0, w[1])/(w[1] + w[6])) #subject 4 moves to 'a' + } > aj3 <- function(w) rbind(c(1, 0, 0, 0), + c(0, 0, 1, 0), # 5 moves from b to c + c(0, 0, 1, 0), + c(0, 0, 0, 1)) > aj4 <- function(w) rbind(c(1, 0, 0, 0), + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(w[1], 0, 0, w[5])/(w[1] + w[5])) #1 moves from 4 to a > aj5 <- function(w) rbind(c(w[2]+w[7], w[4], 0, 0)/(w[2]+ w[4] + w[7]), #2 to b + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(0, 0, 0, 1)) > aj8 <- function(w) rbind(c(w[2], 0, w[7], 0)/(w[2]+ w[7]), # 4 to c + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(0, 0, 0, 1)) > aj9 <- function(w) rbind(c(0, 1, 0, 0), # 1 to b + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(0, 0, 1 ,0)) # 3 to c > aj10 <- function(w)rbind(c(1, 0, 0, 0), + c(1, 0, 0, 0), #1 back to a + c(0, 0, 1, 0), + c(0, 0, 0, 1)) > > #time state > # a b c entry > # > #1 2 5 14 initial distribution > #2 24 5 1 4 -> a, add 3 > #3 24 5 13 5 from b to c > #4 124 5 3 1 -> a > #5 14 5 3 2 -> b, exits > #8 1 45 3 4 -> c > #9 1 45 1->b, 3->c & exit > #10 1 45 1->a > > # P is a product of matrices > dopstate <- function(w) { + p1 <- p0(w) + p2 <- p1 %*% aj2(w) + p3 <- p2 %*% aj3(w) + p4 <- p3 %*% aj4(w) + p5 <- p4 %*% aj5(w) + p8 <- p5 %*% aj8(w) + p9 <- p8 %*% aj9(w) + p10<- p9 %*% aj10(w) + rbind(p2, p3, p4, p5, p8, p9, p10, p10) + } > > # Check the pstate estimate > w1 <- rep(1,10) > mtest2 <- tfun(tdata) # scrambled order > mfit2 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0) # ordered > aeq(mfit2$pstate, dopstate(w1)[,swap]) [1] TRUE > aeq(mfit2$p0, p0(w1)[swap]) [1] TRUE > > mfit2b <- survfit(Surv(t1, t2, st) ~ 1, mtest2, id=id, istate=i0)#scrambled > aeq(mfit2b$pstate, dopstate(w1)[,swap]) [1] TRUE > aeq(mfit2b$p0, p0(w1)[swap]) [1] TRUE > > mfit2b$call <- mfit2$call <- NULL > all.equal(mfit2b, mfit2) [1] TRUE > aeq(mfit2$transitions, c(2,0,1,0, 0,2,0,0, 1,1,1,0, 0,0,0,2)) [1] TRUE > > # Now the harder one, where subjects change weights > mfit3 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, + weights=wt, influence=TRUE) > aeq(mfit3$p0, p0(1:10)[swap]) [1] TRUE > aeq(mfit3$pstate, dopstate(1:10)[,swap]) [1] TRUE > > > # The derivative of a matrix product AB is (dA)B + A(dB) where dA is the > # elementwise derivative of A and etc for B. > # dp0 creates the derivatives of p0 with respect to each subject, a 5 by 4 > # matrix > # All the functions below are hand coded for a weight vector that is in > # exactly the same order as the rows of mtest. > # Since p0 = (w[4], w[9], 0, w[1]+ w[6])/ (w[1]+ w[4] + w[6] + w[9]) > # and subject id is 1,1,1, 2, 3, 4,4,4, 5,5 > # we get the derivative below > # > dp0 <- function(w) { + p <- p0(w) + w0 <- w[c(1,4,6,9)] # the 4 obs at the start, subjects 1, 2, 4, 5 + rbind(c(0, 0, 0, 1) - p, # subject 1 affects p[4] + c(1, 0, 0, 0) - p, # subject 2 affects p[1] + 0, # subject 3 affects none + c(0, 0, 0, 1) - p, # subject 4 affect p[4] + c(0, 1, 0, 0) - p)/ # subject 5 affects p[2] + sum(w0) + } > > > dp2 <- function(w) { + h2 <- aj2(w) # H matrix at time 2 + part1 <- dp0(w) %*% h2 + + # 1 and 4 in state 4, obs 4 moves from entry to a + mult <- p0(w)[4]/(w[1] + w[6]) #p(t-) / weights in state + part2 <- rbind((c(0,0,0,1)- h2[4,]) * mult, + 0, + 0, + (c(1,0,0,0) - h2[4,]) * mult, + 0) + part1 + part2 + } > > dp3 <- function(w) { + dp2(w) %*% aj3(w) + } > > dp4 <- function(w) { + h4 <- aj4(w) # H matrix at time 4 + part1 <- dp3(w) %*% h4 + + # subjects 1 and 3 in state 4, obs 1 and 5, 1 moves to a + mult <- dopstate(w)[2,4]/ (w[1] + w[5]) # p_4(time 4-0) / wt + part2 <- rbind((c(1,0,0,0)- h4[4,]) * mult, + 0, + (c(0,0,0,1)- h4[4,]) * mult, + 0, + 0) + part1 + part2 + } > dp5 <- function(w) { + h5 <- aj5(w) # H matrix at time 5 + part1 <- dp4(w) %*% h5 + + # subjects 124 in state 1, obs 2,4,7, 2 goes to 2 + mult <- dopstate(w)[3,1]/ (denom <- w[2] + w[4] + w[7]) + part2 <- rbind((c(1,0,0,0)- h5[1,]) * mult, + (c(0,1,0,0)- h5[1,]) * mult, + 0, + (c(1,0,0,0)- h5[1,]) * mult, + 0) + part1 + part2 + } > dp8 <- function(w) { + h8 <- aj8(w) # H matrix at time 8 + part1 <- dp5(w) %*% h8 + + # subjects 14 in state 1, obs 2 &7, 4 goes to c + mult <- dopstate(w)[4, 1]/ (w[2] + w[7]) + part2 <- rbind((c(1,0,0,0)- h8[1,]) * mult, + 0, + 0, + (c(0,0,1,0)- h8[1,]) * mult, + 0) + part1 + part2 + } > dp9 <- function(w) dp8(w) %*% aj9(w) > dp10<- function(w) dp9(w) %*% aj10(w) > > w1 <- 1:10 > aeq(mfit3$influence[,1,], dp0(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,2,], dp2(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,3,], dp3(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,4,], dp4(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,5,], dp5(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,6,], dp8(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,7,], dp9(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,8,], dp10(w1)[,swap]) [1] TRUE > aeq(mfit3$influence[,9,], dp10(w1)[,swap]) # no changes at time 11 [1] TRUE > > > # The cumulative hazard at each time point is remapped from a matrix > # into a vector (in survfit) > # First check out the names > nstate <- length(mfit3$states) > temp <- matrix(0, nstate, nstate) > indx1 <- match(rownames(mfit3$transitions), mfit3$states) > indx2 <- match(colnames(mfit3$transitions), mfit3$states, nomatch=0) > temp[indx1, indx2] <- mfit3$transitions[, indx2>0] > # temp is an nstate by nstate version of the transitions matrix > from <- row(temp)[temp>0] > to <- col(temp)[temp>0] > > all.equal(colnames(mfit3$cumhaz), paste(from, to, sep='.')) [1] TRUE > > > hazard <- function(fit, i, indx=which(temp>0)) { + nstate <- length(fit$states) + cmat <- matrix(0, nstate, nstate) + if (i==1) cmat[indx] <- fit$cumhaz[i,] + else cmat[indx] <- fit$cumhaz[i,] - fit$cumhaz[i-1,] + + diag(cmat) <- 1- rowSums(cmat) + cmat + } > > aeq(hazard(mfit3, 1), aj2(w1)[swap, swap]) [1] TRUE > aeq(hazard(mfit3, 2), aj3(w1)[swap, swap]) [1] TRUE > aeq(hazard(mfit3, 3), aj4(w1)[swap, swap]) [1] TRUE > aeq(hazard(mfit3, 4), aj5(w1)[swap, swap]) [1] TRUE > aeq(hazard(mfit3, 5), aj8(w1)[swap, swap]) [1] TRUE > aeq(hazard(mfit3, 6), aj9(w1)[swap, swap]) [1] TRUE > > proc.time() user system elapsed 0.96 0.12 1.07