options(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # Tests of the weighted Cox model # # Similar data set to test1, but add weights, # a double-death/censor tied time # a censored last subject # The latter two are cases covered only feebly elsewhere. # # The data set testw2 has the same data, but done via replication # aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) testw1 <- data.frame(time= c(1,1,2,2,2,2,3,4,5), status= c(1,0,1,1,1,0,0,1,0), x= c(2,0,1,1,0,1,0,1,0), wt = c(1,2,3,4,3,2,1,2,1)) xx <- c(1,2,3,4,3,2,1,2,1) testw2 <- data.frame(time= rep(c(1,1,2,2,2,2,3,4,5), xx), status= rep(c(1,0,1,1,1,0,0,1,0), xx), x= rep(c(2,0,1,1,0,1,0,1,0), xx), id= rep(1:9, xx)) indx <- match(1:9, testw2$id) testw2 <- data.frame(time= rep(c(1,1,2,2,2,2,3,4,5), xx), status= rep(c(1,0,1,1,1,0,0,1,0), xx), x= rep(c(2,0,1,1,0,1,0,1,0), xx), id= rep(1:9, xx)) indx <- match(1:9, testw2$id) fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow', iter=0) fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0) fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow') fitb <- coxph(Surv(time, status) ~x, testw2, ties='breslow') texp <- function(beta) { # expected, Breslow estimate r <- exp(beta) temp <- cumsum(c(1/(r^2 + 11*r +7), 10/(11*r +5), 2/(2*r+1))) c(r^2, 1,r,r,1,r,1,r,1)* temp[c(1,1,2,2,2,2,2,3,3)] } aeq(texp(0), c(1/19, 1/19, rep(103/152, 5), rep(613/456,2))) #verify texp() xbar <- function(beta) { # xbar, Breslow estimate r <- exp(beta) temp <- r* rep(c(2*r + 11, 11/10, 1), c(2, 5, 2)) temp * texp(beta) } fit0 summary(fit) aeq(resid(fit0), testw1$status - texp(0)) resid(fit0, type='score') resid(fit0, type='scho') aeq(resid(fit0, type='mart'), (resid(fit0b, type='mart'))[indx]) aeq(resid(fit0, type='scor'), (resid(fit0b, type='scor'))[indx]) aeq(unique(resid(fit0, type='scho')), unique(resid(fit0b, type='scho'))) aeq(resid(fit, type='mart'), testw1$status - texp(fit$coef)) resid(fit, type='score') resid(fit, type='scho') aeq(resid(fit, type='mart'), (resid(fitb, type='mart'))[indx]) aeq(resid(fit, type='scor'), (resid(fitb, type='scor'))[indx]) aeq(unique(resid(fit, type='scho')), unique(resid(fitb, type='scho'))) rr1 <- resid(fit, type='mart') rr2 <- resid(fit, type='mart', weighted=T) aeq(rr2/rr1, testw1$wt) rr1 <- resid(fit, type='score') rr2 <- resid(fit, type='score', weighted=T) aeq(rr2/rr1, testw1$wt) fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='efron') fit resid(fit, type='mart') resid(fit, type='score') resid(fit, type='scho') # Tests of the weighted Cox model, AG form of the data # Same solution as doweight1.s # testw3 <- data.frame(id = c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9), begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0), time= c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50), status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0), x= c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0), wt = c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1)) fit0 <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='breslow', iter=0) fit <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='breslow') fit0 summary(fit) resid(fit0, type='mart', collapse=testw3$id) resid(fit0, type='score', collapse=testw3$id) resid(fit0, type='scho') resid(fit, type='mart', collapse=testw3$id) resid(fit, type='score', collapse=testw3$id) resid(fit, type='scho') fit0 <- coxph(Surv(begin, time, status) ~x,testw3, weights=wt, iter=0) resid(fit0, 'mart', collapse=testw3$id) resid(coxph(Surv(begin, time, status) ~1, testw3, weights=wt) , collapse=testw3$id) #Null model fit <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='efron') fit resid(fit, type='mart', collapse=testw3$id) resid(fit, type='score', collapse=testw3$id) resid(fit, type='scho') # # Check out the impact of weights on the dfbetas # Am I computing them correctly? # wtemp <- rep(1,26) wtemp[c(5,10,15)] <- 2:4 fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp) rr <- resid(fit, 'dfbeta') fit1 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, subset=(-5)) fit2 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, subset=(-10)) fit3 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, subset=(-15)) # # Effect of case weights on expected survival curves post Cox model # fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow', iter=0) fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0) surv1 <- survfit(fit0, newdata=list(x=0)) surv2 <- survfit(fit0b, newdata=list(x=0)) aeq(surv1$surv, surv2$surv) # # Check out the Efron approx. # fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0) fit <- coxph(Surv(time, status) ~x,testw1, weights=wt) resid(fit0, 'mart') resid(coxph(Surv(time, status) ~1, testw1, weights=wt)) #Null model # lfun is the known log-likelihood for this data set, worked out in the # appendix of Therneau and Grambsch # ufun is the score vector and ifun the information matrix lfun <- function(beta) { r <- exp(beta) a <- 7*r +3 b <- 4*r +2 11*beta - ( log(r^2 + 11*r +7) + (10/3)*(log(a+b) + log(2*a/3 +b) + log(a/3 +b)) + 2*log(2*r +1)) } aeq(fit0$log[1], lfun(0)) aeq(fit$log[2], lfun(fit$coef)) ufun <- function(beta, efron=T) { #score statistic r <- exp(beta) xbar1 <- (2*r^2+11*r)/(r^2+11*r +7) xbar2 <- 11*r/(11*r +5) xbar3 <- 2*r/(2*r +1) xbar2b<- 26*r/(26*r+12) xbar2c<- 19*r/(19*r + 9) temp <- 11 - (xbar1 + 2*xbar3) if (efron) temp - (10/3)*(xbar2 + xbar2b + xbar2c) else temp - 10*xbar2 } print(ufun(fit$coef) < 1e-4) # Should be true ifun <- function(beta, efron=T) { # information matrix r <- exp(beta) xbar1 <- (2*r^2+11*r)/(r^2+11*r +7) xbar2 <- 11*r/(11*r +5) xbar3 <- 2*r/(2*r +1) xbar2b<- 26*r/(26*r+12) xbar2c<- 19*r/(19*r + 9) temp <- ((4*r^2 + 11*r)/(r^2+11*r +7) - xbar1^2) + 2*(xbar3 - xbar3^2) if (efron) temp + (10/3)*((xbar2- xbar2^2) + (xbar2b - xbar2b^2) + (xbar2c -xbar2c^2)) else temp + 10 * (xbar2- xbar2^2) } aeq(fit0$var, 1/ifun(0)) aeq(fit$var, 1/ifun(fit$coef)) # Make sure that the weights pass through the residuals correctly rr1 <- resid(fit, type='mart') rr2 <- resid(fit, type='mart', weighted=T) aeq(rr2/rr1, testw1$wt) rr1 <- resid(fit, type='score') rr2 <- resid(fit, type='score', weighted=T) aeq(rr2/rr1, testw1$wt) # # Look at the individual components # dt0 <- coxph.detail(fit0) dt <- coxph.detail(fit) aeq(sum(dt$score), ufun(fit$coef)) #score statistic aeq(sum(dt0$score), ufun(0)) aeq(dt0$hazard, c(1/19, (10/3)*(1/16 + 1/(6+20/3) + 1/(6+10/3)), 2/3)) rm(fit, fit0, rr1, rr2, dt, dt0) # # Effect of weights on the robust variance # test1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0), wt= c(3,0,1,1,1,1,1), id= 1:7) testx <- data.frame(time= c(4,4,4,1,1,2,2,3), status=c(1,1,1,1,0,1,1,0), x= c(0,0,0,1,1,1,0,0), wt= c(1,1,1,1,1,1,1,1), id= 1:8) fit1 <- coxph(Surv(time, status) ~x, cluster=id, test1, ties='breslow', weights=wt) fit2 <- coxph(Surv(time, status) ~x, cluster=id, testx, ties='breslow') db1 <- resid(fit1, 'dfbeta', weighted=F) db1 <- db1[-2] #toss the missing db2 <- resid(fit2, 'dfbeta') aeq(db1, db2[3:8]) W <- c(3,1,1,1,1,1) #Weights, after removal of the missing value aeq(fit2$var, sum(db1*db1*W)) aeq(fit1$var, sum(db1*db1*W*W))