R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # > # Test aareg, for some simple data where the answers can be computed > # in closed form > # > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > test1 <- data.frame(time= c(4, 3,1,1,2,2,3), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0), + wt= c(1, 1:6)) > > tfit <- aareg(Surv(time, status) ~ x, test1) > aeq(tfit$times, c(1,2,2)) [1] TRUE > aeq(tfit$nrisk, c(6,4,4)) [1] TRUE > aeq(tfit$coefficient, matrix(c(0,0,1/3, 1/3, 1, -1/3), ncol=2)) [1] TRUE > aeq(tfit$tweight, matrix(c(3,3,3, 3/2, 3/4, 3/4), ncol=2)) [1] TRUE > aeq(tfit$test.statistic, c(1,1)) [1] TRUE > aeq(tfit$test.var, c(1, -1/4, -1/4, 1/4 + 9/16 + 1/16)) [1] TRUE > > tfit <- aareg(Surv(time, status) ~ x, test1, test='nrisk') > aeq(tfit$tweight, matrix(c(3,3,3, 3/2, 3/4, 3/4), ncol=2)) #should be as before [1] TRUE > aeq(tfit$test.statistic, c(4/3, 6/3+ 4 - 4/3)) [1] TRUE > aeq(tfit$test.var, c(16/9, -16/9, -16/9, 36/9 + 16 + 16/9)) [1] TRUE > > # In the 1-variable case, this is the same as the default Aalen weight > tfit <- aareg(Surv(time, status) ~ x, test1, test='variance') > aeq(tfit$test.statistic, c(1,1)) [1] TRUE > aeq(tfit$test.var, c(1, -1/4, -1/4, 1/4 + 9/16 + 1/16)) [1] TRUE > > # > # Repeat the above, with case weights > # > tfit <- aareg(Surv(time, status) ~x, test1, weights=wt) > aeq(tfit$times, c(1,2,2)) [1] TRUE > aeq(tfit$nrisk, c(21,16,16)) [1] TRUE > aeq(tfit$coefficient, matrix(c(0,0,5/12, 2/9, 1, -5/12), ncol=2)) [1] TRUE > aeq(tfit$tweight, matrix(c(12,12,12, 36/7, 3,3), ncol=2)) [1] TRUE > aeq(tfit$test.statistic, c(5, 72/63 + 3 - 15/12)) [1] TRUE > aeq(tfit$test.var, c(25, -25/4, -25/4, (72/63)^2 + 9 + (5/4)^2)) [1] TRUE > > tfit <- aareg(Surv(time, status) ~x, test1, weights=wt, test='nrisk') > aeq(tfit$test.statistic, c(20/3, 42/9 + 16 - 16*5/12)) [1] TRUE > aeq(tfit$test.var, c(400/9, -400/9, -400/9, + (42/9)^2 + 16^2 + (16*5/12)^2)) [1] TRUE > > # > # Make a test data set with no NAs, in sorted order, no ties, > # 15 observations > tdata <- lung[15:29, c('time', 'status', 'age', 'sex', 'ph.ecog')] > tdata$status <- tdata$status -1 > tdata <- tdata[order(tdata$time, tdata$status),] > row.names(tdata) <- 1:15 > tdata$status[8] <- 0 #for some variety > > afit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, tdata, nmin=6) > # > # Now, do it "by hand" > cfit <- coxph(Surv(time, status) ~ age + sex + ph.ecog, tdata, iter=0, + method='breslow') > dt1 <- coxph.detail(cfit) > sch1 <- resid(cfit, type='schoen') > > # First estimate of Aalen: from the Cox computations, first 9 > # The first and last cols of the ninth are somewhat unstable (approx =0) > mine <- rbind(solve(dt1$imat[,,1], sch1[1,]), + solve(dt1$imat[,,2], sch1[2,]), + solve(dt1$imat[,,3], sch1[3,]), + solve(dt1$imat[,,4], sch1[4,]), + solve(dt1$imat[,,5], sch1[5,]), + solve(dt1$imat[,,6], sch1[6,]), + solve(dt1$imat[,,7], sch1[7,]), + solve(dt1$imat[,,8], sch1[8,]), + solve(dt1$imat[,,9], sch1[9,])) > mine <- diag(1/dt1$nrisk[1:9]) %*% mine > > aeq(mine, afit$coef[1:9, -1]) [1] TRUE > > # > # Check out the dfbeta matrix from aareg > # Note that it is kept internally in time order, not data set order > # Those who want residuals should use the resid function! > > # > # First, the simple test case where I know the anwers > # > afit <- aareg(Surv(time, status) ~ x, test1, dfbeta=T) > temp <- c(rep(0,6), #intercepts at time 1 + c(2,-1,-1,0,0,0)/9, #alpha at time 1 + c(0,0,0,2, -1, -1)/9, #intercepts at time 2 + c(0,0,0,-2,1,1)/9) #alpha at time 2 > aeq(afit$dfbeta, temp) [1] TRUE > > # > #Now a multivariate data set > # > afit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, lung, dfbeta=T) > > ord <- order(lung$time, -lung$status) > cfit <- coxph(Surv(time, status) ~ age + sex + ph.ecog, lung[ord,], + method='breslow', iter=0, x=T) > cdt <- coxph.detail(cfit, riskmat=T) > > # an arbitrary list of times > acoef <- rowsum(afit$coef, afit$times) #per death time coefs > indx <- match(cdt$time, afit$times) > for (i in c(2,5,27,54,101, 135)) { + lwho <- (cdt$riskmat[,i]==1) + lmx <- cfit$x[lwho,] + lmy <- 1*( cfit$y[lwho,2]==1 & cfit$y[lwho,1] == cdt$time[i]) + fit <- lm(lmy~ lmx) + cat("i=", i, "coef=", aeq(fit$coef, acoef[i,])) + + rr <- diag(resid(fit)) + zz <- cbind(1,lmx) + zzinv <- solve(t(zz) %*% zz) + cat(" twt=", aeq(1/(diag(zzinv)), afit$tweight[indx[i],])) + + df <- t(zzinv %*% t(zz) %*% rr) + cat(" dfbeta=", aeq(df, afit$dfbeta[lwho,,i]), "\n") + } i= 2 coef= TRUE twt= TRUE dfbeta= TRUE i= 5 coef= TRUE twt= TRUE dfbeta= TRUE i= 27 coef= TRUE twt= TRUE dfbeta= TRUE i= 54 coef= TRUE twt= TRUE dfbeta= TRUE i= 101 coef= TRUE twt= TRUE dfbeta= TRUE i= 135 coef= TRUE twt= TRUE dfbeta= TRUE > > > # Repeat it with case weights > ww <- rep(1:5, length=nrow(lung))/ 3.0 > afit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, lung, dfbeta=T, + weights=ww) > cfit <- coxph(Surv(time, status) ~ age + sex + ph.ecog, lung[ord,], + method='breslow', iter=0, x=T, weight=ww[ord]) > cdt <- coxph.detail(cfit, riskmat=T) > > acoef <- rowsum(afit$coef, afit$times) #per death time coefs > for (i in c(2,5,27,54,101, 135)) { + who <- (cdt$riskmat[,i]==1) + x <- cfit$x[who,] + y <- 1*( cfit$y[who,2]==1 & cfit$y[who,1] == cdt$time[i]) + w <- cfit$weight[who] + fit <- lm(y~x, weights=w) + cat("i=", i, "coef=", aeq(fit$coef, acoef[i,])) + + rr <- diag(resid(fit)) + zz <- cbind(1,x) + zzinv <- solve(t(zz)%*% (w*zz)) + cat(" twt=", aeq(1/(diag(zzinv)), afit$tweight[indx[i],])) + + df <- t(zzinv %*% t(zz) %*% (w*rr)) + cat(" dfbeta=", aeq(df, afit$dfbeta[who,,i]), "\n") + } i= 2 coef= TRUE twt= TRUE dfbeta= TRUE i= 5 coef= TRUE twt= TRUE dfbeta= TRUE i= 27 coef= TRUE twt= TRUE dfbeta= TRUE i= 54 coef= TRUE twt= TRUE dfbeta= TRUE i= 101 coef= TRUE twt= TRUE dfbeta= TRUE i= 135 coef= TRUE twt= TRUE dfbeta= TRUE > > # > # Check that the test statistic computed within aareg and > # the one recomputed within summary.aareg are the same. > # Of course, they could both be wrong, but at least they'll agree! > # If the maxtime argument is used in summary, it recomputes the test, > # even if we know that it wouldn't have had to. > # > # Because the 1-variable and >1 variable case have different code, test > # them both. > # > afit <- aareg(Surv(time, status) ~ age, lung, dfbeta=T) > asum <- summary(afit, maxtime=max(afit$times)) > aeq(afit$test.stat, asum$test.stat) [1] TRUE > aeq(afit$test.var, asum$test.var) [1] TRUE > aeq(afit$test.var2, asum$test.var2) [1] TRUE > > print(afit) Call: aareg(formula = Surv(time, status) ~ age, data = lung, dfbeta = T) n= 228 139 out of 139 unique event times used slope coef se(coef) robust se z p Intercept -0.000872 -0.000905 4.26e-03 4.13e-03 -0.219 0.8270 age 0.000110 0.000142 6.96e-05 6.75e-05 2.110 0.0351 Chisq=4.44 on 1 df, p=0.0351; test weights=aalen > > afit <- aareg(Surv(time, status) ~ age, lung, dfbeta=T, test='nrisk') > asum <- summary(afit, maxtime=max(afit$times)) > aeq(afit$test.stat, asum$test.stat) [1] TRUE > aeq(afit$test.var, asum$test.var) [1] TRUE > aeq(afit$test.var2, asum$test.var2) [1] TRUE > > summary(afit) slope coef se(coef) robust se z p Intercept -0.000954 -0.117 0.53500 0.53300 -0.219 0.8260 age 0.000105 0.018 0.00875 0.00873 2.060 0.0398 Chisq=4.23 on 1 df, p=0.0398; test weights=nrisk > > # > # Mulitvariate > # > afit <- aareg(Surv(time, status) ~ age + sex + ph.karno + pat.karno, lung, + dfbeta=T) > asum <- summary(afit, maxtime=max(afit$times)) > aeq(afit$test.stat, asum$test.stat) [1] TRUE > aeq(afit$test.var, asum$test.var) [1] TRUE > aeq(afit$test.var2, asum$test.var2) [1] TRUE > > print(afit) Call: aareg(formula = Surv(time, status) ~ age + sex + ph.karno + pat.karno, data = lung, dfbeta = T) n=224 (4 observations deleted due to missingness) 132 out of 136 unique event times used slope coef se(coef) robust se z p Intercept 2.15e-02 0.025000 8.45e-03 7.72e-03 3.25 0.00117 age 3.09e-05 0.000076 7.32e-05 6.49e-05 1.17 0.24100 sex -2.96e-03 -0.004020 1.25e-03 1.23e-03 -3.27 0.00109 ph.karno -6.77e-05 -0.000083 6.69e-05 8.30e-05 -1.00 0.31700 pat.karno -1.01e-04 -0.000112 5.59e-05 5.70e-05 -1.96 0.05010 Chisq=23.36 on 4 df, p=0.000107; test weights=aalen > > afit <- aareg(Surv(time, status) ~ age + sex + ph.karno + pat.karno, lung, + dfbeta=T, test='nrisk') > asum <- summary(afit, maxtime=max(afit$times)) > aeq(afit$test.stat, asum$test.stat) [1] TRUE > aeq(afit$test.var, asum$test.var) [1] TRUE > aeq(afit$test.var2, asum$test.var2) [1] TRUE > > summary(afit) slope coef se(coef) robust se z p Intercept 2.12e-02 3.0600 1.04000 0.95600 3.20 0.00138 age 3.18e-05 0.0107 0.00928 0.00818 1.31 0.19100 sex -2.99e-03 -0.4940 0.15300 0.15200 -3.26 0.00112 ph.karno -8.37e-05 -0.0113 0.00783 0.00965 -1.17 0.24100 pat.karno -8.50e-05 -0.0133 0.00724 0.00767 -1.73 0.08320 Chisq=22.39 on 4 df, p=0.000168; test weights=nrisk > > # Weights play no role in the final computation of the test statistic, given > # the coefficient matrix, nrisk, and dfbeta as inputs. (Weights do > # change the inputs). So there is no need to reprise the above with > # case weights. > > proc.time() user system elapsed 1.06 0.17 1.23