#
# Test residuals for a coxphms model.
# This is done by explicitly fitting the three individual transitions and 
#  computing the residuals on those. 
# Use the same myeloid data set as multi2.R
library(survival)
aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...)
table2 <- function(...) table(..., useNA= "ifany")
tdata <- tmerge(myeloid[,1:4], myeloid, id=id, death=event(futime,death),
                priortx = tdc(txtime), sct= event(txtime))
tdata$event <- factor(with(tdata, sct + 2*death), 0:2,
                      c("censor", "sct", "death"))

# Make the processing a little harder by making sex missing for 3 people,
#  use it as a covariate for transitions to death, but not entry to sct
#  flt3 missing for 3, a covariate for entry:sct and sct:death, but not 1:3.
# Subject 275 for instance has two rows (427,428): entry to sct, then sct to 
#  censor, missing sex. They will not be omitted from the data frame since 
#  they are at risk for entry:sct, but not sct:death. Subject 271 is missing
#  flt for both entry:sct and sct:death, but will be counted in the risk set
#  for an entry:death transtion. Only one subject, 273, is deleted completely.
# The 646 subject/1009 obs data set is first thinned down to 645/1006; this is
#  what shows as 'deleted due to missing' in the printout.  Rows with a
#  prior stem cell transplant (priortx) and missing sex or flt have
#  nowhere to go, nor id 273 who is missing both.
tdata$sex[tdata$id %in% 273:275] <- NA   # obs 425 to 428
tdata$flt3[tdata$id %in% 271:273] <- NA  # obs 422 to 425

check <- survcheck(Surv(tstart, tstop, event) ~ 1, tdata, id=id)
fit <- coxph(list(Surv(tstart, tstop, event) ~ trt, 
                  1:3 + 2:3 ~ sex,
                  1:2 + 2:3 ~ flt3), tdata, id=id)

aeq(check$transitions, fit$transitions + c(0,0,0, 1,1,0, 0,1,0))
# The above is due to the difference between coxph, which has the more 
#  sophisticated list form of a formula, and survfit/survcheck which do not.
# survcheck with ~1 on the right uses all 1009 obs, 3 more obs than fit, but 
#  with ~sex on the right it will use fewer rows

# Multi-state now defaults to breslow rather than efron
#  The id option allows for collapse=TRUE later
fit12 <- coxph(Surv(tstart, tstop, event=='sct') ~ trt + flt3, tdata, id=id,
               subset=(priortx==0), iter=4, x=TRUE, method='breslow')
fit13 <- coxph(Surv(tstart, tstop, event=='death') ~ trt + sex, tdata, id =id,
               subset=(priortx==0), iter=4, x=TRUE, method= 'breslow')
fit23 <- coxph(Surv(tstart, tstop, event=='death') ~ trt + sex + flt3, 
               tdata, id=id,
               subset=(priortx==1), iter=4, x=TRUE, method="breslow")

# martingale residuals
# one row per retained obs, one col per transition
rr1 <- resid(fit)
aeq(dim(rr1), c(nrow(tdata)-3, 3))
#
# Obs 1-2 start in state (s0) so contribute to the 1:2 and 1:3 transitions,
#  columns 1 and 2 of rr1, while rr1[1:2, 3] =0.
all(rr1[1:2,3] ==0)
 
# Comparing the overall fit to the per-transaction ones is a bit of nuisance
# rr1 = resid(fit) has 1006 rows and 3 columns. 
# Rows where priortx=0 are not at risk for a 2:3 transition, the 2:3 colum will
#  be deterministicly 0 in rr1.  Where priortx==1 the 1:2 and 1:3 columns are 0.
mf2 <- tdata[-fit$na.action, ] # the 1006 subset
dim(mf2)
all(rr1[mf2$priortx==0, 3] ==0)
all(rr1[mf2$priortx==1, 1:2] ==0)

# fit12 has 643 obs while mf2 has 645 with priortx==0, two of those have missing
#  flt.  To match the fit residuals with fit12 we need to pick off the proper
#  643 rows, indx1 below.  The 1:3 transition is similar, but with sex.
with(mf2, table2(priortx))
with(mf2, table2(priortx, flt3))
indx1 <- (mf2$priortx==0 & !is.na(mf2$flt3))
aeq(rr1[indx1, 1], resid(fit12))
indx2 <- (mf2$priortx==0 & !is.na(mf2$sex))
aeq(rr1[indx2, 2], resid(fit13))
indx3 <- (mf2$priortx==1 & !is.na(mf2$sex) & !is.na(mf2$flt3))
aeq(rr1[indx3, 3], resid(fit23))

# dfbeta residuals have dim of (data row, coefficient)
rr3 <- resid(fit, type='dfbeta')
aeq(rr3[indx1, 1:3], resid(fit12, type='dfbeta'))
aeq(rr3[indx2, 4:5], resid(fit13, type='dfbeta'))
aeq(rr3[indx3, 6:9], resid(fit23, type='dfbeta'))

# The collapsed dfbeta have subject as the first dimension.
# For the overall fit there are 625 subjects, 2 of which (271 and 275) will
#  have 0 for the the "1:2" variables.
rr3b <- resid(fit, type='dfbeta', collapse=TRUE)
test12 <- resid(fit12, type='dfbeta', collapse= TRUE)
indx12 <- match(rownames(test12), rownames(rr3b))
all(rr3b[-indx12, 1:3] ==0)
aeq(rr3b[indx12, 1:3], test12)  #colnames won't match

test13 <- resid(fit13, type='dfbeta', collapse= TRUE)
indx13 <- match(rownames(test13), rownames(rr3b))
aeq(rr3b[indx13, 4:5], test13)  

test23 <- resid(fit23, type='dfbeta', collapse= TRUE)
indx23 <- match(rownames(test23), rownames(rr3b))
aeq(rr3b[indx23, 6:9], test23) 

# More complex formula
fit2 <- coxph(list(Surv(tstart, tstop, event) ~ trt,
                   1:3 + 2:3 ~ trt + strata(sex),
                   1:2 + 2:3 ~ flt3:sex), tdata, id=id)