R version 4.4.0 beta (2024-04-15 r86425 ucrt) -- "Puppy Cup" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # This is a test of the influence matrix for an Andersen-Gill fit, using the > # formulas found in the methods document, and implemented in the survfitaj.c > # code. As much as anything it was a help in debugging -- both the mathematics > # and the program. > # The test case below has tied events, tied event/censoring, entry in mutiple > # states, staggered entry, repeated events for a subject, varying case weights > # within a subject, ... on purpose > > tdata <- data.frame(id= c(1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 6, 6), + t1= c(0, 4, 9, 1, 5, 2, 0, 2, 5, 8, 1, 3, 3, 5), + t2= c(4, 9, 10, 5, 7, 9, 2, 5, 8, 9, 3, 11, 5, 8), + st= c(2, 3, 2, 3, 1, 2, 2, 4, 4, 1, 3, 1, 3, 2), + i0= c(1, 2, 3, 2, 3, 1, 1, 2, 4, 4, 4, 3, 2, 3), + wt= c(1:8, 8:3)) > > tdata$st <- factor(tdata$st, c(1:4), + labels=c("censor", "a", "b", "c")) > tdata$i0 <- factor(tdata$i0, 1:4, + labels=c("entry","a", "b", "c")) > check <- survcheck(Surv(t1, t2,st) ~1, tdata, id=id, istate=i0) > > if (FALSE) { + #useful picture + plot(c(0,11), c(1,6.5), type='n', xlab="Time", ylab= "Subject") + with(tdata, segments(t1+.1, id, t2, id, col=as.numeric(check$istate))) + with(subset(tdata, st!= "censor"), + text(t2, id+.15, as.character(st))) + with(tdata, text((t1+t2)/2, id+.25, wt)) + with(subset(tdata, !duplicated(id)), + text(t1, id+.15, as.character(i0))) + #segments are colored by current state, case weight in center, events at ends + abline(v=c(2:5, 8:11), lty=3, col='gray') + } > > # Compute the unweighted per observation leverages, using the approach in > # the methods document, as a check of both it and the C code. > # These IJ residuals can be directly verified using emprical derivatives, > # and collapsed to test the weighted+collapsed results from survfitAJ. > # > survfitaj <- function(t1, t2, state, istate=NULL, wt, id, p0, start.time=NULL, + debug = FALSE) { + check <- survcheck(Surv(t1, t2, state) ~ 1, id=id, istate=istate) + if (any(check$flag >0)) stop("failed survcheck") + states <- check$states + nstate <- length(states) + istate <- check$istate # will have the correct levels + isn <- as.numeric(istate) + n <- length(t1) + if (length(t2) !=n || length(state) !=n || length(istate) !=n || + length(wt) !=n || length(id) !=n) stop("input error") + + newstate <- factor(state, unique(c(levels(state)[1], states))) + Y <- Surv(t1, t2, newstate) # makes the levels match up + position <- survival:::survflag(Y, id) + + uid <- unique(id) + nid <- length(uid) + id <- match(id, uid) # turn it into 1,2,... + event <- (Y[,3] >0) + + U <- A <- matrix(0, n, nstate) # per observation influence, unweighted + if (missing(p0)) { + if (!missing(start.time)) t0 <- start.time + else { + if (all(Y[, 3] ==0)) t0 <- min(Y[, 2]) # no events! + else t0 <- min(Y[event, 2]) + } + atrisk <- (Y[,1] < t0 & Y[,2] >= t0) + wtsum <- sum(wt[atrisk]) # weights at that time + p0 <- tapply(wt[atrisk], istate[atrisk], sum) / wtsum + p0 <- ifelse(is.na(p0), 0, p0) #if a state has no one, tapply =NA + if (all(p0 <1)) { # compute intitial leverage + for (j in 1:nstate) { + U[atrisk,j] <- (ifelse(istate[atrisk]==states[j], 1, 0) + - p0[j])/wtsum + } + } + } else { + if (missing(start.time)) t0 <- 0 else t0 <- start.time + } + + utime <- sort(unique(c(0, Y[event | position>1, 2]))) + + ntime <- length(utime) + phat <- matrix(0, ntime, nstate) + phat[1,] <- p0 + n.risk <- matrix(0, ntime, nstate) + n.risk[1,] <- table(istate[Y[,1]< start.time & Y[,2] > start.time]) + + # count the number of transitions, and make an index to them + temp <- table(istate[event], factor(Y[event,3], 1:nstate, states)) + trmat <- cbind(from= row(temp)[temp>0], to= col(temp)[temp>0]) + nhaz <- nrow(trmat) + n.event <- matrix(0, ntime, nhaz) + C <- matrix(0, n, nhaz) + chaz <- matrix(0, ntime, nhaz) + + hash <- trmat %*% c(1,10) + tindx <- match(isn + 10*Y[,3], hash, nomatch=0) #index to transitions + + # at this point I have the initial inflence matrices (U= pstate, + # C= cumhaz, A= auc). The auc and cumhaz are 0 at the starting point + # so their influence is 0. + + Usave <- array(0, dim=c(dim(U), ntime)) + Usave[,,1] <- U + Csave <- array(0, dim= c(dim(C), ntime)) #chaz and AUC are 0 at start.time + Asave <- array(0, dim= c(dim(A), ntime)) + + for (it in 2:ntime) { + # AUC + if (it==2) delta <- utime[it]- t0 + else delta <- utime[it] - utime[it-1] + A <- A + delta* U + + # count noses + atrisk <- (t1 < utime[it] & t2 >= utime[it]) + temp <- tapply(wt[atrisk], istate[atrisk], sum) + n.risk[it,] <- ifelse(is.na(temp), 0, temp) + event <- (Y[,2]== utime[it] & Y[,3]>0) + temp <- tapply(wt[event], factor(tindx[event], 1:nhaz), sum) + n.event[it,] <- ifelse(is.na(temp), 0, temp) + + + # Add events to C and create the H matrix + H <- diag(nstate) + for (i in which(event)) { + j <- isn[i] # from, to, and transition indices + k <- Y[i,3] + jk <- match(j+10*k, hash) + C[i, jk] <- C[i, jk] + 1/n.risk[it,j] + if (j!=k) { + H[j,j] <- H[j,j] - wt[i]/n.risk[it,j] + H[j,k] <- H[j,k] + wt[i]/n.risk[it,j] + } + } + + U <- U %*% H + phat[it,] <- phat[it-1,] %*% H + + if (debug) browser() + # Add events to U + for (i in which(event)) { + j <- isn[i] # from, to, and transition indices + k <- Y[i,3] + if (j != k) { + U[i,j] <- U[i,j] - phat[it-1,j]/n.risk[it,j] + U[i,k] <- U[i,k] + phat[it-1,j]/n.risk[it,j] + } + } + + if (debug) browser() + # now the hazard part + for (h in which(n.event[it,] >0)) { + j <- trmat[h,1] + k <- trmat[h,2] + haz <- n.event[it,h]/n.risk[it, j] + h2 <- haz/n.risk[it,j] + who <- (atrisk & isn ==j) # at risk, currently in state j + + C[who,h] <- C[who,h] - h2 + if (j != k) { + U[who,j] <- U[who,j] + h2 * phat[it-1,j] + U[who,k] <- U[who,k] - h2 * phat[it-1,j] + } + } + if (debug) browser() + Usave[,,it] <- U + Csave[,,it] <- C + Asave[,,it] <- A + } + colnames(n.event) <- paste(trmat[,1], trmat[,2], sep=':') + colnames(n.risk) <- check$states + colnames(phat) <- check$states + + list(time = utime, n.risk= n.risk, n.event=n.event, pstate= phat, + C=Csave, U=Usave, A=Asave) + } > > mfit <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, + weights=wt, influence=TRUE) > mtest <- with(tdata, survfitaj(t1, t2, st, i0, wt, id)) > # mtest <- with(tdata, survfitaj(t1, t2, st, i0, wt, id, debug=TRUE)) > > # p0 and U0 from the methods document > p0 <- c(8, 4,0,6)/ 18 > U0 <- rbind(c(1,0,0,0) - p0, 0, 0, + c(0,1,0,0) - p0, 0, + 0, + c(1,0,0,0) - p0, 0, 0, 0, + c(0,0,0,1) - p0, 0, + 0, 0) /18 > > aeq(mtest$pstate[1,], p0) [1] TRUE > aeq(mtest$U[,,1], U0) [1] TRUE > aeq(mtest$time[-1], mfit$time) # mtest includes U(2-eps) as 'time 0' [1] TRUE > aeq(mtest$pstate[-1,], mfit$pstate) [1] TRUE > aeq(mfit$p0, p0) [1] TRUE > aeq(mfit$i0, rowsum(U0*tdata$wt, tdata$id)) [1] TRUE > > # direct check that mtest has the correct answer > eps <- 1e-6 > delta <- array(0, dim= c(nrow(tdata), dim(mfit$pstate))) > deltaC<- array(0, dim= c(nrow(tdata), dim(mfit$cumhaz))) > for (i in 1:nrow(tdata)) { + twt <- tdata$wt + twt[i] <- twt[i] + eps + tfit <- survfit(Surv(t1, t2, st) ~1, tdata, id=id, istate=i0, + weights= twt) + delta[i,,] <- (tfit$pstate - mfit$pstate)/eps + deltaC[i,,] <-(tfit$cumhaz - mfit$cumhaz)/eps + } > temp <- aperm(mtest$U, c(1,3,2)) # drop time 0, put state last > all.equal(temp[,-1,], delta, tol=eps/2) [1] TRUE > > tempC <-aperm(mtest$C, c(1,3,2)) > all.equal(tempC[,-1,], deltaC, tol= eps/2) [1] TRUE > > # Now check mfit, which returns the weighted collapsed values > BD <- t(model.matrix(~ factor(id) -1, tdata)) %*% diag(tdata$wt) > rownames(BD) <- 1:6 > > collapse <- function(U, cmat=BD) { + # for each time point, replace the inflence matrix U with BDU + if (is.matrix(U)) BD %*% U + else { + dd <- dim(U) + temp <- cmat %*% matrix(U, nrow = dd[1]) #fake out matrix multiply + array(temp, dim= c(nrow(temp), dd[2:3])) + } + } > > sqsum <- function(x) sqrt(sum(x^2)) > temp <- collapse(mtest$U[,,-1]) # mtest has time 0, mfit does not > # mfit$influence is in id, time, state order > aeq(aperm(temp, c(1,3,2)), mfit$influence) # mtest has time 0, mfit does not [1] TRUE > > setemp <- apply(collapse(mtest$U[,,-1]), 2:3, sqsum) > aeq(t(setemp), mfit$std.err) [1] TRUE > > ctemp <- apply(collapse(mtest$C[,,-1]), 2:3, sqsum) > aeq(t(ctemp), mfit$std.chaz) [1] TRUE > > atemp <- apply(collapse(mtest$A[,,-1]), 2:3, sqsum) > aeq(t(atemp), mfit$std.auc) [1] TRUE > > > # check residuals > rr1 <- resid(mfit, times=mfit$time, type='pstate') > aeq(rr1, mtest$U[,,-1]) [1] TRUE > rr2 <- resid(mfit, times=mfit$time, type='auc') > aeq(rr2, mtest$A[,,-1]) [1] TRUE > > > > proc.time() user system elapsed 0.96 0.14 1.09