R Under development (unstable) (2023-08-12 r84939 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> options(na.action=na.exclude) # preserve missings
> options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type
> library(survival)
> 
> #
> # Reproduce example 1 in the SAS lifereg documentation
> #
> 
> # this fit doesn't give the same log-lik that they claim
> fit1 <- survreg(Surv(time, status) ~ I(1000/(273.2+temp)), imotor,
+ 		subset=(temp>150), dist='lognormal')
> summary(fit1)

Call:
survreg(formula = Surv(time, status) ~ I(1000/(273.2 + temp)), 
    data = imotor, subset = (temp > 150), dist = "lognormal")
                         Value Std. Error     z       p
(Intercept)            -10.471      2.772 -3.78 0.00016
I(1000/(273.2 + temp))   8.322      1.284  6.48 9.1e-11
Log(scale)              -0.504      0.183 -2.75 0.00596

Scale= 0.604 

Log Normal distribution
Loglik(model)= -145.9   Loglik(intercept only)= -155
	Chisq= 18.3 on 1 degrees of freedom, p= 1.9e-05 
Number of Newton-Raphson Iterations: 6 
n= 30 

> 
> # This one, with the loglik on the transformed scale (the inappropriate
> #   scale, Ripley & Venables would argue) does agree.
> # All coefs are of course identical.
> fit2 <- survreg(Surv(log(time), status) ~ I(1000/(273.2+temp)), imotor,
+ 		subset=(temp>150), dist='gaussian')
> 
> 
> # Give the quantile estimates, which is the lower half of "output 48.1.5"
> #  in the SAS 9.2 manual
> 
> pp1 <- predict(fit1, newdata=list(temp=c(130,150)), p=c(.1, .5, .9),
+ 		      type='quantile', se=T)
> pp2 <- predict(fit1, newdata=list(temp=c(130,150)), p=c(.1, .5, .9),
+ 		      type='uquantile', se=T)
> pp1
$fit
          [,1]      [,2]     [,3]
[1,] 12033.185 26095.677 56592.20
[2,]  4536.877  9838.864 21336.98

$se.fit
         [,1]      [,2]      [,3]
[1,] 5482.338 11359.450 26036.917
[2,] 1443.072  2901.155  7172.343

> 
> temp130 <- matrix(0, nrow=3, ncol=6)
> temp130[,1] <- pp1$fit[1,]
> temp130[,2] <- pp1$se.fit[1,]
> temp130[,3] <- pp2$fit[1,]
> temp130[,4] <- pp2$se.fit[1,]
> temp130[,5] <- exp(pp2$fit[1,] - 1.64*pp2$se.fit[1,])
> temp130[,6] <- exp(pp2$fit[1,] + 1.64*pp2$se.fit[1,])
> dimnames(temp130) <- list(c("p=.1", "p=.2", "p=.3"),
+      c("Time", "se(time)", "log(time)", "se[log(time)]", 
+        "lower 90", "upper 90"))
> print(temp130)
         Time  se(time) log(time) se[log(time)]  lower 90  upper 90
p=.1 12033.18  5482.338  9.395424     0.4556015  5700.089  25402.68
p=.2 26095.68 11359.450 10.169525     0.4353001 12779.950  53285.37
p=.3 56592.20 26036.917 10.943626     0.4600796 26611.422 120349.71
> 
> # A set of examples, copied from the manual pages of SAS procedure
> #  "reliability", which is part of their QC product.
> #
> 
> color <- c("black", "red", "green", "blue", "magenta", "red4",
+                         "orange", "DarkGreen", "cyan2", "DarkViolet")
> palette(color)
> pdf(file='reliability.pdf')
> 
> #
> # Insulating fluids example
> #
> 
> # Adding a -1 to the fit just causes the each group to have it's own
> # intercept, rather than a global intercept + constrasts.  The strata
> # statement allows each to have a separate scale
> ffit <- survreg(Surv(time) ~ factor(voltage) + strata(voltage) -1, ifluid)
> 
> # Get predicted quantiles at each of the voltages
> # By default predict() would give a line of results for each observation,
> #  I only want the unique set of x's, i.e., only 4 cases
> uvolt <- sort(unique(ifluid$voltage))      #the unique levels
> plist <- c(1, 2, 5, 1:9 *10, 95, 99)/100
> pred  <- predict(ffit, type='quantile', p=plist,
+                  newdata=data.frame(voltage=uvolt))
> tfun <- function(x) log(-log(1-x))
> 
> matplot(t(pred), tfun(plist), type='l', log='x', lty=1,
+         col=1:4, yaxt='n',
+         xlab="Predicted", ylab="Quantile")
> axis(2, tfun(plist), format(100*plist), adj=1)
> 
> kfit <- survfit(Surv(time) ~ voltage, ifluid, type='fleming') #KM fit
> for (i in 1:4) {
+     temp <- kfit[i]
+     points(temp$time, tfun(1-temp$surv), col=i, pch=i)
+     }
> 
> # Now a table
> temp <- array(0, dim=c(4,4,4))  #4 groups by 4 parameters by 4 stats
> temp[,1,1] <- ffit$coef         # "EV Location" in SAS manual
> temp[,2,1] <- ffit$scale        # "EV scale"
> temp[,3,1] <- exp(ffit$coef)    # "Weibull Scale"
> temp[,4,1] <- 1/ffit$scale      # "Weibull Shape"
>  
> temp[,1,2] <- sqrt(diag(ffit$var))[1:4]   #standard error
> temp[,2,2] <- sqrt(diag(ffit$var))[5:8] * ffit$scale
> temp[,3,2] <- temp[,1,2] * temp[,3,1]
> temp[,4,2] <- temp[,2,2] / (temp[,2,1])^2
> 
> temp[,1,3] <- temp[,1,1] - 1.96*temp[,1,2]  #lower conf limits 
> temp[,1,4]  <- temp[,1,1] + 1.96*temp[,1,2] # upper
> # log(scale) is the natural parameter, in which the routine did its fitting
> #  and on which the std errors were computed
> temp[,2, 3] <- exp(log(ffit$scale) - 1.96*sqrt(diag(ffit$var))[5:8])
> temp[,2, 4] <- exp(log(ffit$scale) + 1.96*sqrt(diag(ffit$var))[5:8])
> 
> temp[,3, 3:4] <- exp(temp[,1,3:4])
> temp[,4, 3:4] <- 1/temp[,2,4:3]
> 
> dimnames(temp) <- list(uvolt, c("EV Location", "EV Scale", "Weibull scale",
+                                 "Weibull shape"),
+                        c("Estimate", "SE", "lower 95% CI", "uppper 95% CI"))
> print(aperm(temp, c(2,3,1)), digits=5)
, , 26

               Estimate         SE lower 95% CI uppper 95% CI
EV Location     6.86249    1.10404      4.69857        9.0264
EV Scale        1.83423    0.96114      0.65677        5.1227
Weibull scale 955.74665 1055.18620    109.78973     8320.0103
Weibull shape   0.54519    0.28568      0.19521        1.5226

, , 30

              Estimate       SE lower 95% CI uppper 95% CI
EV Location    4.35133  0.30151      3.76037        4.9423
EV Scale       0.94446  0.22544      0.59156        1.5079
Weibull scale 77.58159 23.39176     42.96420      140.0911
Weibull shape  1.05881  0.25274      0.66318        1.6904

, , 34

              Estimate      SE lower 95% CI uppper 95% CI
EV Location    2.50326 0.31476      1.88632        3.1202
EV Scale       1.29732 0.22895      0.91796        1.8334
Weibull scale 12.22222 3.84707      6.59509       22.6506
Weibull shape  0.77082 0.13603      0.54542        1.0894

, , 38

                Estimate      SE lower 95% CI uppper 95% CI
EV Location   0.00092629 0.27318     -0.53450       0.53635
EV Scale      0.73367610 0.20380      0.42565       1.26460
Weibull scale 1.00092672 0.27343      0.58596       1.70976
Weibull shape 1.36299929 0.37861      0.79077       2.34933

> 
> rm(temp, uvolt, plist, pred, ffit, kfit) 
> 
> #####################################################################
> # Turbine cracks data
> crack2 <- with(cracks, data.frame(day1=c(NA, days), day2=c(days, NA),
+                                   n=c(fail, 167-sum(fail))))
> cfit <- survreg(Surv(day1, day2, type='interval2') ~1, 
+                 dist='weibull', data=crack2, weight=n)
> 
> summary(cfit)

Call:
survreg(formula = Surv(day1, day2, type = "interval2") ~ 1, data = crack2, 
    weights = n, dist = "weibull")
              Value Std. Error      z       p
(Intercept)  7.6880     0.0744 103.30 < 2e-16
Log(scale)  -0.3953     0.0987  -4.01 6.2e-05

Scale= 0.674 

Weibull distribution
Loglik(model)= -309.6   Loglik(intercept only)= -309.6
Number of Newton-Raphson Iterations: 5 
n= 9 

> #Their output also has Wiebull scale = exp(cfit$coef), shape = 1/(cfit$scale)
> 
> # Draw the SAS plot
> #  The "type=fleming" argument reflects that they estimate hazards rather than
> #  survival, and forces a Nelson-Aalen hazard estimate
> #
> plist <-  c(1, 2, 5, 1:8 *10)/100
> plot(qsurvreg(plist, cfit$coef, cfit$scale), tfun(plist), log='x',
+      yaxt='n', type='l',
+      xlab="Weibull Plot for Time", ylab="Percent")
> axis(2, tfun(plist), format(100*plist), adj=1)
> 
> kfit <- survfit(Surv(day1, day2, type='interval2') ~1, data=crack2,
+                 weight=n, type='fleming')
> # Only plot point where n.event > 0 
> # Why?  I'm trying to match them.  Personally, all should be plotted.
> who <- (kfit$n.event > 0)
> points(kfit$time[who], tfun(1-kfit$surv[who]), pch='+')
> points(kfit$time[who], tfun(1-kfit$upper[who]), pch='-')
> points(kfit$time[who], tfun(1-kfit$lower[who]), pch='-')
> 
> text(rep(3,6), seq(.5, -1.0, length=6), 
+          c("Scale", "Shape", "Right Censored", "Left Censored", 
+            "Interval Censored", "Fit"), adj=0)
> text(rep(9,6), seq(.5, -1.0, length=6), 
+          c(format(round(exp(cfit$coef), 2)),
+            format(round(1/cfit$scale, 2)),
+            format(tapply(crack2$n, cfit$y[,3], sum)), "ML"), adj=1)
> 
> # Now a portion of his percentiles table
> #  I don't get the same SE as SAS, I haven't checked out why.  The
> #  estimates and se for the underlying Weibull model are the same.
> temp <- predict(cfit, type='quantile', p=plist, se=T)
> tempse <- sqrt(temp$se[1,])
> mat <- cbind(temp$fit[1,], tempse, 
+              temp$fit[1,] -1.96*tempse, temp$fit[1,] + 1.96*tempse)
> dimnames(mat) <- list(plist*100, c("Estimate", "SE", "Lower .95", "Upper .95"))
> print(mat)
     Estimate        SE  Lower .95 Upper .95
1    98.47183  5.321676   88.04134  108.9023
2   157.59360  6.186246  145.46856  169.7186
5   295.17088  7.377034  280.71189  309.6299
10  479.31737  8.227489  463.19149  495.4432
20  794.55351  8.951640  777.00829  812.0987
30 1089.70373  9.404613 1071.27068 1108.1368
40 1387.95516  9.983696 1368.38712 1407.5232
50 1704.71015 10.888487 1683.36872 1726.0516
60 2057.23906 12.217230 2033.29329 2081.1848
70 2472.58593 14.035515 2445.07632 2500.0955
80 3006.43572 16.510569 2974.07501 3038.7964
> 
> #
> # The cracks data has a particularly easy estimate, so use
> # it to double check code
> time <- c(crack2$day2[1], (crack2$day1 + crack2$day2)[2:8]/2, 
+           crack2$day1[9])
> cdf  <- cumsum(crack2$n)/sum(crack2$n)
> all.equal(kfit$time, time)
[1] TRUE
> all.equal(kfit$surv, 1-cdf[c(1:8,8)]) 
[1] TRUE
> rm(time, cdf, kfit)
> 
> 
> #######################################################
> #
> # Replacement of valve seats in diesel engines
> #   The input data has id, time, and an indicator of whether there was an
> #   event at that time: 0=no, 1=yes.  No one has an event at their last time.
> #  The input data has two engines with dual failures: 328 loses 2 valves at 
> #    time 653, and number 402 loses 2 at time 139.  For each, fudge the first
> #    time to be .1 days earlier. 
> #
> ties <- which(diff(valveSeat$time)==0 & diff(valveSeat$id)==0)
> temp <- valveSeat$time
> temp[ties] <- temp[ties] - .1
> n <- length(temp)
> first <- !duplicated(valveSeat$id)
> vtemp <- with(valveSeat, data.frame(id =id, 
+                                     time1= ifelse(first, 0, c(0, temp[-n])),
+                                     time2= temp, status=status))
> 
> kfit <- survfit(Surv(time1, time2, status) ~1, vtemp, id=id)
> 
> plot(kfit, fun='cumhaz', ylab="Sample Mean Cumulative Failures", xlab='Time')
> title("Valve replacement data")
> 
> # The summary.survfit function doesn't have an option for printing out
> #   cumulative hazards instead of survival --- need to add that
> #   so I just reprise the central code of print.summary.survfit
> xx <- summary(kfit)
> temp <- cbind(xx$time, xx$n.risk, xx$n.event, xx$cumhaz, 
+               xx$std.chaz, -log(xx$upper), -log(xx$lower))
> dimnames(temp) <- list(rep("", nrow(temp)),
+                        c("time", "n.risk", "n.event", "Cum haz", "std.err",
+                          "lower 95%", "upper 95%"))
> print(temp, digits=2)
 time n.risk n.event Cum haz std.err lower 95% upper 95%
   61     41       1   0.024   0.024    0.0000     0.073
   76     41       1   0.049   0.034    0.0000     0.117
   84     41       1   0.073   0.041    0.0000     0.156
   87     41       1   0.098   0.046    0.0057     0.192
   92     41       1   0.122   0.051    0.0208     0.226
   98     41       1   0.146   0.055    0.0373     0.259
  120     41       1   0.171   0.059    0.0548     0.291
  139     41       1   0.195   0.062    0.0732     0.322
  139     41       1   0.220   0.073    0.0750     0.369
  165     41       1   0.244   0.075    0.0954     0.398
  166     41       1   0.268   0.077    0.1163     0.427
  202     41       1   0.293   0.079    0.1376     0.455
  206     41       1   0.317   0.088    0.1452     0.497
  249     41       1   0.341   0.089    0.1675     0.524
  254     41       1   0.366   0.090    0.1903     0.551
  258     41       1   0.390   0.090    0.2133     0.577
  265     41       1   0.415   0.091    0.2368     0.603
  276     41       1   0.439   0.098    0.2479     0.641
  298     41       1   0.463   0.110    0.2490     0.689
  323     41       1   0.488   0.110    0.2734     0.714
  326     41       1   0.512   0.110    0.2981     0.739
  328     41       1   0.537   0.115    0.3124     0.774
  344     41       1   0.561   0.115    0.3376     0.798
  348     41       1   0.585   0.124    0.3430     0.842
  349     41       1   0.610   0.124    0.3686     0.866
  367     41       1   0.634   0.123    0.3945     0.890
  377     41       1   0.659   0.132    0.4018     0.932
  404     40       1   0.684   0.136    0.4189     0.965
  408     40       1   0.709   0.140    0.4365     0.998
  410     40       1   0.734   0.139    0.4631     1.022
  449     40       1   0.759   0.143    0.4813     1.055
  479     40       1   0.784   0.146    0.4998     1.087
  497     40       1   0.809   0.149    0.5187     1.118
  538     40       1   0.834   0.152    0.5380     1.150
  539     40       1   0.859   0.155    0.5576     1.181
  561     40       1   0.884   0.162    0.5696     1.220
  563     40       1   0.909   0.164    0.5898     1.250
  570     40       1   0.934   0.170    0.6029     1.287
  573     40       1   0.959   0.169    0.6310     1.310
  581     38       1   0.985   0.171    0.6532     1.341
  586     34       1   1.014   0.174    0.6777     1.376
  604     22       1   1.060   0.185    0.7009     1.446
  621     17       1   1.119   0.208    0.7144     1.554
  635     16       1   1.181   0.207    0.7795     1.618
  640     16       1   1.244   0.226    0.8033     1.723
  646     13       1   1.320   0.229    0.8775     1.809
  653      9       1   1.432   0.252    0.9394     1.983
  653      9       1   1.543   0.312    0.9201     2.238
> 
> # Note that I have the same estimates but different SE's than SAS.  We are using 
> #  a different estimator. It's a statistical argument as to which is
> #  better (one could defend both sides): SAS the more standard estimate found
> #  in the reliability literature, mine the estimate from statistics literature
> rm(temp, kfit, xx)
>                     
> ######################################################
> # Turbine data, lognormal fit
> turbine <- read.table('data.turbine', 
+                       col.names=c("time1", "time2", "n"))
> 
> tfit <- survreg(Surv(time1, time2, type='interval2') ~1, turbine,
+                 dist='lognormal', weights=n, subset=(n>0))
> 
> summary(tfit)

Call:
survreg(formula = Surv(time1, time2, type = "interval2") ~ 1, 
    data = turbine, weights = n, subset = (n > 0), dist = "lognormal")
              Value Std. Error     z      p
(Intercept)  3.6999     0.0708 52.23 <2e-16
Log(scale)  -0.3287     0.1232 -2.67 0.0076

Scale= 0.72 

Log Normal distribution
Loglik(model)= -190.7   Loglik(intercept only)= -190.7
Number of Newton-Raphson Iterations: 6 
n= 21 

> 
> # Now, do his plot, but put bootstrap confidence bands on it!
> #  First, make a simple data set without weights
> tdata <- turbine[rep(1:nrow(turbine), turbine$n),]
> 
> qstat <- function(data) {
+     temp <- survreg(Surv(time1, time2, type='interval2') ~1, data=data,
+                     dist='lognormal')
+     qsurvreg(plist, temp$coef, temp$scale, dist='lognormal')
+     }
> 
> {if (exists('bootstrap')) {
+     set.seed(1953)  # a good year :-)
+     bfit <- bootstrap(tdata, qstat, B=1000)
+     bci <- limits.bca(bfit, probs=c(.025, .975))
+     }
+ else {
+     values <- matrix(0, nrow=1000, ncol=length(plist))
+     n <- nrow(tdata)
+     for (i in 1:1000) {
+         subset <- sample(1:n, n, replace=T)
+         values[i,] <- qstat(tdata[subset,])
+         }
+     bci <- t(apply(values,2, quantile, c(.05, .95)))
+     }
+  }
> xmat <- cbind(qsurvreg(plist, tfit$coef, tfit$scale, dist='lognormal'),
+               bci)
> 
> 
> matplot(xmat, qnorm(plist), 
+         type='l', lty=c(1,2,2), col=c(1,1,1), 
+         log='x', yaxt='n', ylab='Percent', 
+         xlab='Time of Cracking (Hours x 100)')
> axis(2, qnorm(plist), format(100*plist), adj=1)
> title("Turbine Data")
> kfit <- survfit(Surv(time1, time2, type='interval2') ~1, data=tdata)
> points(kfit$time, qnorm(1-kfit$surv), pch='+')
> 
> dev.off()  #close the plot file
null device 
          1 
> 
> 
> proc.time()
   user  system elapsed 
   3.39    0.29    3.68