R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(survival)
> aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...)
> #
> # Tests of the residuals.survfit function
> #
> # The influence argument of survfit returns all the residuals at every time
> #  point, but for large data sets the result will be huge.  This function uses
> #  a different algorithm which will be faster when the number of time
> #  points being reported out is small. 
> 
> # Start with small data sets and work up.  First simple survival.
> test1 <- data.frame(time=  c(9, 3,1,1,6,6,8),
+                     status=c(1,NA,1,0,1,1,0),
+                     x=     c(0, 2,1,1,1,0,0))
> indx <- order(test1$time[!is.na(test1$status)])
> 
> s1 <- survfit(Surv(time, status) ~1, test1, influence=3)
> # true influence for survival and hazard, in time order
> inf1 <- matrix(c(-20, rep(4,5), -10, 2, -13, -13, 17, 17,
+                  rep(0,6))/144, ncol=3,
+                dimnames=list(1:6, c(1,6,9)))
> inf2 <- matrix(c(10, rep(-2,5), 10, -2, 7,7, -11, -11)/72,
+                ncol=2)
> aeq(s1$influence.surv[indx,], inf1[, c(1,2,2,3)])
[1] TRUE
> aeq(s1$influence.chaz[indx,], inf2[,c(1,2,2,2)])
[1] TRUE
> 
> r1 <- resid(s1, times=c(0, 3, 5, 8, 10))
> all(r1[,1] ==0)
[1] TRUE
> aeq(r1[indx,2:5], inf1[,c(1,1,2,3)])
[1] TRUE
> 
> r2 <- resid(s1, times=c(0, 3, 5, 8, 10), type="cumhaz")
> all(r2[,1] ==0)
[1] TRUE
> aeq(r2[indx,2:5], inf2[,c(1,1,2,2)])
[1] TRUE
> 
> # AUC is a sum of rectangles, height= S, width based on time points,
> #  so the leverage is a weighted sum of dfbeta values for S
> r3 <- resid(s1, times=c(1,4, 8, 10), type="sojourn")
> inf3 <- inf1 %*% cbind(c(0,0,0), c(3,0,0), c(5,2,0), c(5,3,1))
> aeq(r3[indx,], inf3)
[1] TRUE
> 
> # exp(Nelson-Aalen) 
> s2 <- survfit(Surv(time, status) ~1, test1, stype=2, influence=3)
> r4 <- resid(s2, times=c(0, 3, 5, 8, 10), type="pstate")
> inf4 <- -inf2[, c(1,2,2)] %*% diag(s2$surv[c(1,2,4)])
> aeq(r4[indx,2:5], inf4[,c(1,1,2,3)])
[1] TRUE
> aeq(s2$influence.surv[indx,], inf4[,c(1,2,2,3)])
[1] TRUE
> 
> r5 <- resid(s2, times=c(1,4, 8, 10), type="sojourn") 
> inf5 <- inf4 %*% cbind(c(0,0,0), c(3,0,0), c(5,2,0), c(5,3,1))
> aeq(r5[indx,], inf5)
[1] TRUE
> 
> # Fleming-Harrington 
> # This one is hard, the code still fails
> s3 <- survfit(Surv(time, status) ~1, test1, ctype=2, influence=2)
> inf6 <-  matrix(c( rep(c(5, -1), c(1, 5))/36, c(5,-1)/36, 
+                   c(21,21,-29, -29)/144), ncol=2)
> # r6 <- resid(s3, times =c(0, 3, 5, 8, 10), type="cumhaz")
> 
> # Part 2: single state, with start/stop data, multiple curves,
> #   second curve is identical to test1
> #   Then put it out of order.  
> 
> test2 <- data.frame(t1  =c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8,
+                            0,0,0,0,0,0),
+                     t2  =c(2, 3, 6, 7, 8, 9, 9, 9,14, 17,
+                            9, 1, 1, 6, 6, 8),
+                     event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+                             1, 1, 0, 1, 1, 0),
+                     x    = rep(1:2, c(10, 6)),
+                     id   = 1:16)
> 
> s4 <- survfit(Surv(t1, t2, event) ~ x, test2, influence=TRUE)
> r6 <- resid(s4, time=c(4, 8, 10), type="surv")
> aeq(r6[1:10,], s4$influence.surv[[1]][,c(2, 5, 6)])
[1] TRUE
> aeq(r6[11:16,],s4$influence.surv[[2]][,c(1,3, 4)])
[1] TRUE
> aeq(r6[11:16,2:3], r1[,4:5])
[1] TRUE
> 
> r7 <- resid(s4, time=c(4, 8, 10), type="cumhaz")
> aeq(r7[1:10,], s4$influence.chaz[[1]][,c(2, 5, 6)])
[1] TRUE
> aeq(r7[11:16,],s4$influence.chaz[[2]][,c(1,3, 4)])
[1] TRUE
> aeq(r7[11:16, 2:3], r2[,4:5])
[1] TRUE
> 
> # Compute the AUC at times 8 and 10, the first is a reporting time, the
> #  second is in between
> r8 <- resid(s4, time= c(8, 10), type="auc")
> aeq(r8[11:16,], r3[,3:4])
[1] TRUE
> 
> # curve1:
> inf1 <- s4$influence.surv[[1]]
> d1 <- inf1[,1:4] %*% diff(s4$time[1:5])
> d2 <- inf1[,1:6] %*% diff(c(s4$time[1:6], 10))
> aeq(cbind(d1, d2), r8[1:10,])
[1] TRUE
> 
> # curve2:
> inf2 <- s4$influence.surv[[2]]
> d3 <- inf2[,1:2] %*% diff(s4$time[9:11])
> d4 <- inf2[,1:4] %*% diff(c(s4$time[9:12], 10))
> aeq(cbind(d3, d4), r8[11:16,])
[1] TRUE
> 
> # scramble the data
> reord <- c(1,3,5,7,9,11,13, 15,2,4,6,8,10,12,14,16)
> test2b <-test2[reord,]
> s5 <- survfit(Surv(t1, t2, event) ~x, test2b, influence=TRUE)
> r9 <- resid(s5, time=c(4, 8, 10), type="surv")
> aeq(r6[reord,], r9)
[1] TRUE
>  
> # 
> # For multistate use the same data set as mstate.R, where results have been
> #  worked out by hand.  Except, make it harder by adding an initial state.
> #
> tdata <- data.frame(id= LETTERS[3*c(1, 1, 1,  2,  3,  4, 4, 4,  5,  5)],
+                     t1= c(0, 4, 9,  1,  2,  0, 2, 8,  1,  3),
+                     t2= c(4, 9, 10, 5,  9,  2, 8, 9,  3, 11),
+                     st= c(1, 2,  1, 2,  3,  1, 3, 0,  3,  0),
+                     i0= c(1, 2,  3, 2,  1,  1, 2, 4,  3,  4),
+                     wt= 1:10)
> 
> tdata$st <- factor(tdata$st, c(0:3),
+                     labels=c("censor", "a", "b", "c"))
> tdata$i0 <- factor(tdata$i0, 1:4,
+                     labels=c("entry", "a", "b", "c"))  
> if (FALSE) {
+     #useful picture  
+     check <- survcheck(Surv(t1, t2, st) ~1, tdata, istate=i0, id=id)
+     plot(c(0,11), c(1,5.5), type='n', xlab="Time", ylab= "Subject")
+     tdata$idx <- as.numeric(factor(tdata$id))
+     with(tdata, segments(t1+.1, idx, t2, idx, col=as.numeric(check$istate)))
+     with(subset(tdata, st!= "censor"),
+           text(t2, idx+.15, as.character(st)))
+     with(tdata, text((t1+t2)/2, idx+.25, wt))
+     with(subset(tdata, !duplicated(id)),
+            text(t1, idx+.15, as.character(i0)))
+     #segments are colored by current state, case weight in center, events at ends
+     abline(v=c(2:5, 8:11), lty=3, col='gray')
+ }
>  
> tfun <- function(data=tdata) {
+     reorder <- c(10, 9, 1, 2, 5, 4, 3, 7, 8, 6)
+     new <- data[reorder,]
+     new
+ }
> mtest2 <- tfun(tdata)  # scrambled version
> 
> mfit1 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0,
+                  influence=1)
> 
> test1 <- resid(mfit1, times= mfit1$time, collapse=TRUE)
> aeq(test1, aperm(mfit1$influence, c(1,3,2)))
[1] TRUE
> aeq(sqrt(apply(test1^2, 2:3, sum)), t(mfit1$std.err))
[1] TRUE
> 
> test1a <- resid(mfit1, times=c(3, 7, 9), method=1, collapse=TRUE)
> minf <- aperm(mfit1$influence, c(1,3,2)) # influence has time second, resid third
> aeq(test1a, minf[,,c(2,4,6)])  # interpolated times work
[1] TRUE
> 
> test2 <- resid(mfit1, times= mfit1$time, collapse=TRUE, type="cumhaz")
> aeq(sqrt(apply(test2^2, 2:3, sum)), t(mfit1$std.chaz))
[1] TRUE
> test3 <- resid(mfit1, times= mfit1$time, collapse=TRUE, type="auc")
> aeq(sqrt(apply(test3^2, 2:3, sum)), t(mfit1$std.auc))
[1] TRUE
> 
> # Do a couple AUC by hand
> atime <- c(1, 5.6, 8.1, 15)
> test4 <- resid(mfit1, times=atime, type="auc", collapse=TRUE)
> all(test4[,,1] ==0) # before the first time
[1] TRUE
> # 5.6 covers rectangles of widths 1,1,1, and .6 after times 2, 3,4 and 5
> temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1, .6, 0,0,0,0)))
> aeq(temp, test4[,,2])
[1] TRUE
> temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1,  3, .1, 0, 0, 0)))
> aeq(temp, test4[,,3])
[1] TRUE
> temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1,  3, 1, 1, 1, 4)))
> aeq(temp, test4[,,4])
[1] TRUE
> 
> #
> # competing risks
> #
> mdata <- mgus2
> mdata$etime <- with(mdata, ifelse(pstat==1, ptime, futime))
> temp <- with(mdata, ifelse(pstat==1, 1, 2*death))
> mdata$event <- factor(temp, 0:2, c("censor", "PCM", "Death"))
> mfit <- survfit(Surv(etime, event) ~1, mdata, influence=1)
> rr <- resid(mfit, time=360)
> index <- sum(mfit$time <= 360)
> aeq(mfit$influence.pstate[,index,], rr)
[1] TRUE
> 
> 
> proc.time()
   user  system elapsed 
   0.89    0.10    0.96