R version 4.4.0 beta (2024-04-15 r86425 ucrt) -- "Puppy Cup"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> options(na.action=na.exclude) # preserve missings
> options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type
> library(survival)
> 
> # Tests of the weighted Cox model
> #
> # Similar data set to test1, but add weights,
> #                                    a double-death/censor tied time
> #                                    a censored last subject
> # The latter two are cases covered only feebly elsewhere.
> # 
> # The data set testw2 has the same data, but done via replication
> #
> aeq <- function(x,y) all.equal(as.vector(x), as.vector(y))
> 
> testw1 <- data.frame(time=  c(1,1,2,2,2,2,3,4,5),
+ 		    status= c(1,0,1,1,1,0,0,1,0),
+ 		    x=      c(2,0,1,1,0,1,0,1,0),
+ 		    wt =    c(1,2,3,4,3,2,1,2,1))
> xx <- c(1,2,3,4,3,2,1,2,1)
> testw2 <- data.frame(time=   rep(c(1,1,2,2,2,2,3,4,5), xx),
+ 		     status= rep(c(1,0,1,1,1,0,0,1,0), xx),
+ 		     x=      rep(c(2,0,1,1,0,1,0,1,0), xx),
+ 		     id=     rep(1:9, xx))
> indx <- match(1:9, testw2$id)
> testw2 <- data.frame(time=   rep(c(1,1,2,2,2,2,3,4,5), xx),
+ 		     status= rep(c(1,0,1,1,1,0,0,1,0), xx),
+ 		     x=      rep(c(2,0,1,1,0,1,0,1,0), xx),
+ 		     id=     rep(1:9, xx))
> indx <- match(1:9, testw2$id)
> 
> fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt,
+ 		    method='breslow', iter=0)
> fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0)
> fit  <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow')
> fitb <- coxph(Surv(time, status) ~x, testw2, ties='breslow')
> 
> texp <- function(beta) {  # expected, Breslow estimate
+     r <- exp(beta)
+     temp <- cumsum(c(1/(r^2 + 11*r +7), 10/(11*r +5), 2/(2*r+1)))
+     c(r^2, 1,r,r,1,r,1,r,1)* temp[c(1,1,2,2,2,2,2,3,3)]
+     }
> aeq(texp(0),  c(1/19, 1/19, rep(103/152, 5), rep(613/456,2))) #verify texp()
[1] TRUE
> 
> xbar <- function(beta) { # xbar, Breslow estimate
+     r <- exp(beta)
+     temp <- r* rep(c(2*r + 11, 11/10, 1), c(2, 5, 2))
+     temp * texp(beta)
+     }
> 
> fit0
Call:
coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, 
    method = "breslow", iter = 0)

    coef exp(coef) se(coef) z p
x 0.0000    1.0000   0.5858 0 1

Likelihood ratio test=0  on 1 df, p=1
n= 9, number of events= 5 
> summary(fit)
Call:
coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, 
    ties = "breslow")

  n= 9, number of events= 5 

    coef exp(coef) se(coef)     z Pr(>|z|)
x 0.8596    2.3621   0.7131 1.205    0.228

  exp(coef) exp(-coef) lower .95 upper .95
x     2.362     0.4233    0.5839     9.556

Concordance= 0.637  (se = 0.161 )
Likelihood ratio test= 1.69  on 1 df,   p=0.2
Wald test            = 1.45  on 1 df,   p=0.2
Score (logrank) test = 1.52  on 1 df,   p=0.2

> aeq(resid(fit0), testw1$status - texp(0))
[1] TRUE
> resid(fit0, type='score')
          1           2           3           4           5           6 
 1.24653740  0.03601108  0.10056700  0.10056700 -0.22180142 -0.21193300 
          7           8           9 
 0.46569858 -0.10082189  0.91014302 
> resid(fit0, type='scho')
         1          2          2          2          4 
 1.3157895  0.3125000  0.3125000 -0.6875000  0.3333333 
> 
> aeq(resid(fit0, type='mart'), (resid(fit0b, type='mart'))[indx])
[1] TRUE
> aeq(resid(fit0, type='scor'), (resid(fit0b, type='scor'))[indx])
[1] TRUE
> aeq(unique(resid(fit0, type='scho')), unique(resid(fit0b, type='scho')))
[1] TRUE
> 
> 
> aeq(resid(fit, type='mart'), testw1$status - texp(fit$coef))
[1] TRUE
> resid(fit, type='score')
          1           2           3           4           5           6 
 0.88681615  0.02497653  0.03608964  0.03608964 -0.54297652 -0.12528780 
          7           8           9 
 0.29564605 -0.09476911  0.58400064 
> resid(fit, type='scho')
         1          2          2          2          4 
 1.0368337  0.1613774  0.1613774 -0.8386226  0.1746960 
> aeq(resid(fit, type='mart'), (resid(fitb, type='mart'))[indx])
[1] TRUE
> aeq(resid(fit, type='scor'), (resid(fitb, type='scor'))[indx])
[1] TRUE
> aeq(unique(resid(fit, type='scho')), unique(resid(fitb, type='scho')))
[1] TRUE
> rr1 <- resid(fit, type='mart')
> rr2 <- resid(fit, type='mart', weighted=T)
> aeq(rr2/rr1, testw1$wt)
[1] TRUE
> 
> rr1 <- resid(fit, type='score')
> rr2 <- resid(fit, type='score', weighted=T)
> aeq(rr2/rr1, testw1$wt)
[1] TRUE
> 
> fit  <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='efron')
> fit
Call:
coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, 
    ties = "efron")

    coef exp(coef) se(coef)     z     p
x 0.8726    2.3931   0.7126 1.225 0.221

Likelihood ratio test=1.75  on 1 df, p=0.1858
n= 9, number of events= 5 
> resid(fit, type='mart')
          1           2           3           4           5           6 
 0.85334536 -0.02560716  0.32265266  0.32265266  0.71696234 -1.07772629 
          7           8           9 
-0.45034077 -0.90490339 -0.79598658 
> resid(fit, type='score')
          1           2           3           4           5           6 
 0.88116056  0.02477248  0.06057806  0.06057806 -0.59724033 -0.16737066 
          7           8           9 
 0.38040295 -0.13750290  0.66631324 
> resid(fit, type='scho')
         1          2          2          2          4 
 1.0325955  0.1621759  0.1621759 -0.8378241  0.1728229 
> 
> # Tests of the weighted Cox model, AG form of the data
> #   Same solution as doweight1.s
> #
> testw3 <- data.frame(id  =  c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9),
+ 		     begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0),
+ 		     time=  c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50),
+ 		    status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0),
+ 		    x=      c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0),
+ 		    wt =    c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1))
> 
> fit0 <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt,
+ 		    ties='breslow', iter=0)
> fit  <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='breslow')
> fit0
Call:
coxph(formula = Surv(begin, time, status) ~ x, data = testw3, 
    weights = wt, ties = "breslow", iter = 0)

    coef exp(coef) se(coef) z p
x 0.0000    1.0000   0.5858 0 1

Likelihood ratio test=0  on 1 df, p=1
n= 14, number of events= 5 
> summary(fit)
Call:
coxph(formula = Surv(begin, time, status) ~ x, data = testw3, 
    weights = wt, ties = "breslow")

  n= 14, number of events= 5 

    coef exp(coef) se(coef)     z Pr(>|z|)
x 0.8596    2.3621   0.7131 1.205    0.228

  exp(coef) exp(-coef) lower .95 upper .95
x     2.362     0.4233    0.5839     9.556

Concordance= 0.637  (se = 0.172 )
Likelihood ratio test= 1.69  on 1 df,   p=0.2
Wald test            = 1.45  on 1 df,   p=0.2
Score (logrank) test = 1.52  on 1 df,   p=0.2

> resid(fit0, type='mart', collapse=testw3$id)
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.32236842  0.32236842  0.32236842 -0.67763158 
          7           8           9 
-0.67763158 -0.34429825 -1.34429825 
> resid(fit0, type='score', collapse=testw3$id)
          1           2           3           4           5           6 
 1.24653740  0.03601108  0.10056700  0.10056700 -0.22180142 -0.21193300 
          7           8           9 
 0.46569858 -0.10082189  0.91014302 
> resid(fit0, type='scho')
        10         20         20         20         40 
 1.3157895  0.3125000  0.3125000 -0.6875000  0.3333333 
> 
> resid(fit, type='mart', collapse=testw3$id)
          1           2           3           4           5           6 
 0.85531186 -0.02593169  0.17636221  0.17636221  0.65131344 -0.82363779 
          7           8           9 
-0.34868656 -0.64894181 -0.69807852 
> resid(fit, type='score', collapse=testw3$id)
          1           2           3           4           5           6 
 0.88681615  0.02497653  0.03608964  0.03608964 -0.54297652 -0.12528780 
          7           8           9 
 0.29564605 -0.09476911  0.58400064 
> resid(fit, type='scho')
        10         20         20         20         40 
 1.0368337  0.1613774  0.1613774 -0.8386226  0.1746960 
> fit0 <- coxph(Surv(begin, time, status) ~x,testw3, weights=wt, iter=0)
> resid(fit0, 'mart', collapse=testw3$id)
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.44454887  0.44454887  0.44454887 -0.88126566 
          7           8           9 
-0.88126566 -0.54793233 -1.54793233 
> resid(coxph(Surv(begin, time, status) ~1, testw3, weights=wt)
+ 		      , collapse=testw3$id)  #Null model
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.44454887  0.44454887  0.44454887 -0.88126566 
          7           8           9 
-0.88126566 -0.54793233 -1.54793233 
> 
> fit  <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='efron')
> fit
Call:
coxph(formula = Surv(begin, time, status) ~ x, data = testw3, 
    weights = wt, ties = "efron")

    coef exp(coef) se(coef)     z     p
x 0.8726    2.3931   0.7126 1.225 0.221

Likelihood ratio test=1.75  on 1 df, p=0.1858
n= 14, number of events= 5 
> resid(fit, type='mart', collapse=testw3$id)
          1           2           3           4           5           6 
 0.85334536 -0.02560716  0.32265266  0.32265266  0.71696234 -1.07772629 
          7           8           9 
-0.45034077 -0.90490339 -0.79598658 
> resid(fit, type='score', collapse=testw3$id)
          1           2           3           4           5           6 
 0.88116056  0.02477248  0.06057806  0.06057806 -0.59724033 -0.16737066 
          7           8           9 
 0.38040295 -0.13750290  0.66631324 
> resid(fit, type='scho')
        10         20         20         20         40 
 1.0325955  0.1621759  0.1621759 -0.8378241  0.1728229 
> #
> # Check out the impact of weights on the dfbetas
> #    Am I computing them correctly?
> #
> wtemp <- rep(1,26)
> wtemp[c(5,10,15)] <- 2:4
> fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp)
> rr <- resid(fit, 'dfbeta')
> 
> fit1 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp,
+ 	         subset=(-5))
> fit2 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp,
+ 	         subset=(-10))
> fit3 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp,
+ 	         subset=(-15))
> 
> #
> # Effect of case weights on expected survival curves post Cox model
> #
> fit0  <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow',
+ 	       iter=0)
> fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0)
> 
> surv1 <- survfit(fit0, newdata=list(x=0))
> surv2 <- survfit(fit0b, newdata=list(x=0))
> aeq(surv1$surv, surv2$surv)
[1] TRUE
> #
> # Check out the Efron approx. 
> #
> 
> fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0)
> fit  <- coxph(Surv(time, status) ~x,testw1, weights=wt)
> resid(fit0, 'mart')
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.44454887  0.44454887  0.44454887 -0.88126566 
          7           8           9 
-0.88126566 -0.54793233 -1.54793233 
> resid(coxph(Surv(time, status) ~1, testw1, weights=wt))  #Null model
          1           2           3           4           5           6 
 0.94736842 -0.05263158  0.44454887  0.44454887  0.44454887 -0.88126566 
          7           8           9 
-0.88126566 -0.54793233 -1.54793233 
> 
> # lfun is the known log-likelihood for this data set, worked out in the
> #   appendix of Therneau and Grambsch
> # ufun is the score vector and ifun the information matrix
> lfun <- function(beta) {
+     r <- exp(beta)
+     a <- 7*r +3
+     b <- 4*r +2
+     11*beta - ( log(r^2 + 11*r +7) + 
+ 	(10/3)*(log(a+b) + log(2*a/3 +b) + log(a/3 +b)) + 2*log(2*r +1))
+     }
> aeq(fit0$log[1], lfun(0))
[1] TRUE
> aeq(fit$log[2], lfun(fit$coef))
[1] TRUE
> 
> ufun <- function(beta, efron=T) {  #score statistic
+     r <- exp(beta)
+     xbar1 <- (2*r^2+11*r)/(r^2+11*r +7)
+     xbar2 <- 11*r/(11*r +5)
+     xbar3 <-  2*r/(2*r +1)
+     xbar2b<- 26*r/(26*r+12)
+     xbar2c<- 19*r/(19*r + 9)
+     temp <- 11 - (xbar1 + 2*xbar3)
+     if (efron) temp - (10/3)*(xbar2 + xbar2b + xbar2c)
+     else       temp - 10*xbar2
+     }
> print(ufun(fit$coef) < 1e-4)  # Should be true
   x 
TRUE 
> 
> ifun <- function(beta, efron=T) {  # information matrix
+     r <- exp(beta)
+     xbar1 <- (2*r^2+11*r)/(r^2+11*r +7)
+     xbar2 <- 11*r/(11*r +5)
+     xbar3 <-  2*r/(2*r +1)
+     xbar2b<- 26*r/(26*r+12)
+     xbar2c<- 19*r/(19*r + 9)
+     temp <- ((4*r^2 + 11*r)/(r^2+11*r +7) - xbar1^2) +
+ 	    2*(xbar3 - xbar3^2)
+     if (efron) temp + (10/3)*((xbar2- xbar2^2) + (xbar2b - xbar2b^2) +
+ 			      (xbar2c -xbar2c^2))
+     else       temp + 10 * (xbar2- xbar2^2)
+     }
> 
> aeq(fit0$var, 1/ifun(0))
[1] TRUE
> aeq(fit$var, 1/ifun(fit$coef))
[1] TRUE
> 
> 
>       
> # Make sure that the weights pass through the residuals correctly
> rr1 <- resid(fit, type='mart')
> rr2 <- resid(fit, type='mart', weighted=T)
> aeq(rr2/rr1, testw1$wt)
[1] TRUE
> rr1 <- resid(fit, type='score')
> rr2 <- resid(fit, type='score', weighted=T)
> aeq(rr2/rr1, testw1$wt)
[1] TRUE
> 
> #
> # Look at the individual components
> #
> dt0 <- coxph.detail(fit0)
> dt <- coxph.detail(fit)
> aeq(sum(dt$score), ufun(fit$coef))  #score statistic
[1] TRUE
> aeq(sum(dt0$score), ufun(0))
[1] TRUE
> aeq(dt0$hazard, c(1/19, (10/3)*(1/16 + 1/(6+20/3) + 1/(6+10/3)), 2/3))
[1] TRUE
> 
> 
> 
> rm(fit, fit0, rr1, rr2, dt, dt0)
> #
> # Effect of weights on the robust variance
> #
> test1 <- data.frame(time=  c(9, 3,1,1,6,6,8),
+                     status=c(1,NA,1,0,1,1,0),
+                     x=     c(0, 2,1,1,1,0,0),
+ 		    wt=    c(3,0,1,1,1,1,1),
+                     id=    1:7)
> testx <- data.frame(time=  c(4,4,4,1,1,2,2,3),
+                     status=c(1,1,1,1,0,1,1,0),
+                     x=     c(0,0,0,1,1,1,0,0),
+ 		    wt=    c(1,1,1,1,1,1,1,1),
+                     id=    1:8)
>  
> fit1 <- coxph(Surv(time, status) ~x, cluster=id, test1, ties='breslow',
+               weights=wt)
> fit2 <- coxph(Surv(time, status) ~x, cluster=id, testx, ties='breslow')
> 
> db1 <- resid(fit1, 'dfbeta', weighted=F)
> db1 <- db1[-2]         #toss the missing
> db2 <- resid(fit2, 'dfbeta')
> aeq(db1, db2[3:8])
[1] TRUE
> 
> W <- c(3,1,1,1,1,1)   #Weights, after removal of the missing value
> aeq(fit2$var, sum(db1*db1*W))
[1] TRUE
> aeq(fit1$var, sum(db1*db1*W*W))
[1] TRUE
> 
> 
> proc.time()
   user  system elapsed 
   0.96    0.09    1.04