R Under development (unstable) (2024-10-07 r87211 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(ISwR)
> .make.epsf <- Sys.getenv("EPSF")=="y"
> ps.options(height=3.5, width=4.4, pointsize=8, horiz=F)
> if (.make.epsf) X11(height=3.5,width=4.4,pointsize=8) else postscript()
> dev.copy2eps <- function(...) invisible(grDevices::dev.copy2eps(...))
> par(mar=c(4,4,3,2)+.1)
> options(width=66, useFancyQuotes="TeX")
> suppressWarnings(RNGversion("1.5.1")) #Yes, Kinderman-Ramage was buggy...
> set.seed(310367)
> #Rprof(interval=.001)
> plot(rnorm(500))
> 2 + 2
[1] 4
> exp(-2)
[1] 0.1353353
> rnorm(15)
 [1] -0.18326112 -0.59753287 -0.67017905  0.16075723  1.28199575
 [6]  0.07976977  0.13683303  0.77155246  0.85986694 -1.01506772
[11] -0.49448567  0.52433026  1.07732656  1.09748097 -1.09318582
> x <- 2
> x
[1] 2
> x + x
[1] 4
> weight <- c(60, 72, 57, 90, 95, 72)
> weight
[1] 60 72 57 90 95 72
> height <- c(1.75, 1.80, 1.65, 1.90, 1.74, 1.91)
> bmi <- weight/height^2
> bmi
[1] 19.59184 22.22222 20.93664 24.93075 31.37799 19.73630
> sum(weight)
[1] 446
> sum(weight)/length(weight)
[1] 74.33333
> xbar <- sum(weight)/length(weight)
> weight - xbar
[1] -14.333333  -2.333333 -17.333333  15.666667  20.666667
[6]  -2.333333
> (weight - xbar)^2
[1] 205.444444   5.444444 300.444444 245.444444 427.111111
[6]   5.444444
> sum((weight - xbar)^2)
[1] 1189.333
> sqrt(sum((weight - xbar)^2)/(length(weight) - 1))
[1] 15.42293
> mean(weight)
[1] 74.33333
> sd(weight)
[1] 15.42293
> t.test(bmi, mu=22.5)

	One Sample t-test

data:  bmi
t = 0.34488, df = 5, p-value = 0.7442
alternative hypothesis: true mean is not equal to 22.5
95 percent confidence interval:
 18.41734 27.84791
sample estimates:
mean of x 
 23.13262 

> plot(height,weight)
> if (.make.epsf) dev.copy2eps(file="h-w.ps")
> plot(height, weight, pch=2)
> if (.make.epsf) dev.copy2eps(file="h-w-triangle.ps")
> hh <- c(1.65, 1.70, 1.75, 1.80, 1.85, 1.90)
> lines(hh, 22.5 * hh^2)
> if (.make.epsf) dev.copy2eps(file="h-w-line.ps")
> args(plot.default)
function (x, y = NULL, type = "p", xlim = NULL, ylim = NULL, 
    log = "", main = NULL, sub = NULL, xlab = NULL, ylab = NULL, 
    ann = par("ann"), axes = TRUE, frame.plot = axes, panel.first = NULL, 
    panel.last = NULL, asp = NA, xgap.axis = NA, ygap.axis = NA, 
    ...) 
NULL
> c("Huey","Dewey","Louie")
[1] "Huey"  "Dewey" "Louie"
> c('Huey','Dewey','Louie')
[1] "Huey"  "Dewey" "Louie"
> c(T,T,F,T)
[1]  TRUE  TRUE FALSE  TRUE
> bmi > 25
[1] FALSE FALSE FALSE FALSE  TRUE FALSE
> cat(c("Huey","Dewey","Louie"))
Huey Dewey Louie> cat("Huey","Dewey","Louie", "\n")
Huey Dewey Louie 
> cat("What is \"R\"?\n")
What is "R"?
> c(42,57,12,39,1,3,4)
[1] 42 57 12 39  1  3  4
> x <- c(1, 2, 3)
> y <- c(10, 20)
> c(x, y, 5)
[1]  1  2  3 10 20  5
> x <- c(red="Huey", blue="Dewey", green="Louie")
> x
    red    blue   green 
 "Huey" "Dewey" "Louie" 
> names(x)
[1] "red"   "blue"  "green"
> c(FALSE, 3)
[1] 0 3
> c(pi, "abc")
[1] "3.14159265358979" "abc"             
> c(FALSE, "abc")
[1] "FALSE" "abc"  
> seq(4,9)
[1] 4 5 6 7 8 9
> seq(4,10,2)
[1]  4  6  8 10
> 4:9
[1] 4 5 6 7 8 9
> oops <- c(7,9,13)
> rep(oops,3)
[1]  7  9 13  7  9 13  7  9 13
> rep(oops,1:3)
[1]  7  9  9 13 13 13
> rep(1:2,c(10,15))
 [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
> x <- 1:12
> dim(x) <- c(3,4)
> x
     [,1] [,2] [,3] [,4]
[1,]    1    4    7   10
[2,]    2    5    8   11
[3,]    3    6    9   12
> matrix(1:12,nrow=3,byrow=T)
     [,1] [,2] [,3] [,4]
[1,]    1    2    3    4
[2,]    5    6    7    8
[3,]    9   10   11   12
> x <- matrix(1:12,nrow=3,byrow=T)
> rownames(x) <- LETTERS[1:3]
> x
  [,1] [,2] [,3] [,4]
A    1    2    3    4
B    5    6    7    8
C    9   10   11   12
> t(x)
     A B  C
[1,] 1 5  9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 12
> cbind(A=1:4,B=5:8,C=9:12)
     A B  C
[1,] 1 5  9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 12
> rbind(A=1:4,B=5:8,C=9:12)
  [,1] [,2] [,3] [,4]
A    1    2    3    4
B    5    6    7    8
C    9   10   11   12
> pain <- c(0,3,2,2,1)
> fpain <- factor(pain,levels=0:3)
> levels(fpain) <- c("none","mild","medium","severe")
> fpain
[1] none   severe medium medium mild  
Levels: none mild medium severe
> as.numeric(fpain)
[1] 1 4 3 3 2
> levels(fpain)
[1] "none"   "mild"   "medium" "severe"
> intake.pre <- c(5260,5470,5640,6180,6390,
+ 6515,6805,7515,7515,8230,8770)
> intake.post <- c(3910,4220,3885,5160,5645,
+ 4680,5265,5975,6790,6900,7335)
> mylist <- list(before=intake.pre,after=intake.post)
> mylist
$before
 [1] 5260 5470 5640 6180 6390 6515 6805 7515 7515 8230 8770

$after
 [1] 3910 4220 3885 5160 5645 4680 5265 5975 6790 6900 7335

> mylist$before
 [1] 5260 5470 5640 6180 6390 6515 6805 7515 7515 8230 8770
> d <- data.frame(intake.pre,intake.post)
> d
   intake.pre intake.post
1        5260        3910
2        5470        4220
3        5640        3885
4        6180        5160
5        6390        5645
6        6515        4680
7        6805        5265
8        7515        5975
9        7515        6790
10       8230        6900
11       8770        7335
> d$intake.pre
 [1] 5260 5470 5640 6180 6390 6515 6805 7515 7515 8230 8770
> intake.pre[5]
[1] 6390
> intake.pre[c(3,5,7)]
[1] 5640 6390 6805
> v <- c(3,5,7)
> intake.pre[v]
[1] 5640 6390 6805
> intake.pre[1:5]
[1] 5260 5470 5640 6180 6390
> intake.pre[-c(3,5,7)]
[1] 5260 5470 6180 6515 7515 7515 8230 8770
> intake.post[intake.pre > 7000]
[1] 5975 6790 6900 7335
> intake.post[intake.pre > 7000 & intake.pre <= 8000]
[1] 5975 6790
> intake.pre > 7000 & intake.pre <= 8000
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
[11] FALSE
> d <- data.frame(intake.pre,intake.post)
> d[5,1]
[1] 6390
> d[5,]
  intake.pre intake.post
5       6390        5645
> d[d$intake.pre>7000,]
   intake.pre intake.post
8        7515        5975
9        7515        6790
10       8230        6900
11       8770        7335
> sel <- d$intake.pre>7000
> sel
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE
[11]  TRUE
> d[sel,]
   intake.pre intake.post
8        7515        5975
9        7515        6790
10       8230        6900
11       8770        7335
> d[1:2,]
  intake.pre intake.post
1       5260        3910
2       5470        4220
> head(d)
  intake.pre intake.post
1       5260        3910
2       5470        4220
3       5640        3885
4       6180        5160
5       6390        5645
6       6515        4680
> energy
   expend stature
1    9.21   obese
2    7.53    lean
3    7.48    lean
4    8.08    lean
5    8.09    lean
6   10.15    lean
7    8.40    lean
8   10.88    lean
9    6.13    lean
10   7.90    lean
11  11.51   obese
12  12.79   obese
13   7.05    lean
14  11.85   obese
15   9.97   obese
16   7.48    lean
17   8.79   obese
18   9.69   obese
19   9.68   obese
20   7.58    lean
21   9.19   obese
22   8.11    lean
> exp.lean <- energy$expend[energy$stature=="lean"]
> exp.obese <- energy$expend[energy$stature=="obese"]
> l <- split(energy$expend, energy$stature)
> l
$lean
 [1]  7.53  7.48  8.08  8.09 10.15  8.40 10.88  6.13  7.90  7.05
[11]  7.48  7.58  8.11

$obese
[1]  9.21 11.51 12.79 11.85  9.97  8.79  9.69  9.68  9.19

> lapply(thuesen, mean, na.rm=T)
$blood.glucose
[1] 10.3

$short.velocity
[1] 1.325652

> sapply(thuesen, mean, na.rm=T)
 blood.glucose short.velocity 
     10.300000       1.325652 
> replicate(10,mean(rexp(20)))
 [1] 1.0677019 1.2166898 0.8923216 1.1281207 0.9636017 0.8406877
 [7] 1.3357814 0.8249408 0.9488707 0.5724575
> m <- matrix(rnorm(12),4)
> m
           [,1]      [,2]        [,3]
[1,] -2.5710730 0.2524470 -0.16886795
[2,]  0.5509498 1.5430648  0.05359794
[3,]  2.4002722 0.1624704 -1.23407417
[4,]  1.4791103 0.9484525 -0.84670929
> apply(m, 2, min)
[1] -2.5710730  0.1624704 -1.2340742
> tapply(energy$expend, energy$stature, median)
 lean obese 
 7.90  9.69 
> intake$post
 [1] 3910 4220 3885 5160 5645 4680 5265 5975 6790 6900 7335
> sort(intake$post)
 [1] 3885 3910 4220 4680 5160 5265 5645 5975 6790 6900 7335
> order(intake$post)
 [1]  3  1  2  6  4  7  5  8  9 10 11
> o <- order(intake$post)
> intake$post[o]
 [1] 3885 3910 4220 4680 5160 5265 5645 5975 6790 6900 7335
> intake$pre[o]
 [1] 5640 5260 5470 6515 6180 6805 6390 7515 7515 8230 8770
> intake.sorted <- intake[o,]
> save.image("ch1.RData")
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> load("ch1.RData")
> .foo <- dev.copy2eps
> rm(dev.copy2eps)
> ls()
 [1] "bmi"           "d"             "exp.lean"     
 [4] "exp.obese"     "fpain"         "height"       
 [7] "hh"            "intake.post"   "intake.pre"   
[10] "intake.sorted" "l"             "m"            
[13] "mylist"        "o"             "oops"         
[16] "pain"          "sel"           "v"            
[19] "weight"        "x"             "xbar"         
[22] "y"            
> dev.copy2eps <- .foo
> rm(height, weight)
> sink("myfile")
> ls()
> sink()
> attach(thuesen)
> blood.glucose
 [1] 15.3 10.8  8.1 19.5  7.2  5.3  9.3 11.1  7.5 12.2  6.7  5.2
[13] 19.0 15.1  6.7  8.6  4.2 10.3 12.5 16.1 13.3  4.9  8.8  9.5
> search()
 [1] ".GlobalEnv"        "thuesen"           "package:ISwR"     
 [4] "package:stats"     "package:graphics"  "package:grDevices"
 [7] "package:utils"     "package:datasets"  "package:methods"  
[10] "Autoloads"         "package:base"     
> detach()
> search()
 [1] ".GlobalEnv"        "package:ISwR"      "package:stats"    
 [4] "package:graphics"  "package:grDevices" "package:utils"    
 [7] "package:datasets"  "package:methods"   "Autoloads"        
[10] "package:base"     
> thue2 <- subset(thuesen,blood.glucose<7)
> thue2
   blood.glucose short.velocity
6            5.3           1.49
11           6.7           1.25
12           5.2           1.19
15           6.7           1.52
17           4.2           1.12
22           4.9           1.03
> thue3 <- transform(thuesen,log.gluc=log(blood.glucose))
> thue3
   blood.glucose short.velocity log.gluc
1           15.3           1.76 2.727853
2           10.8           1.34 2.379546
3            8.1           1.27 2.091864
4           19.5           1.47 2.970414
5            7.2           1.27 1.974081
6            5.3           1.49 1.667707
7            9.3           1.31 2.230014
8           11.1           1.09 2.406945
9            7.5           1.18 2.014903
10          12.2           1.22 2.501436
11           6.7           1.25 1.902108
12           5.2           1.19 1.648659
13          19.0           1.95 2.944439
14          15.1           1.28 2.714695
15           6.7           1.52 1.902108
16           8.6             NA 2.151762
17           4.2           1.12 1.435085
18          10.3           1.37 2.332144
19          12.5           1.19 2.525729
20          16.1           1.05 2.778819
21          13.3           1.32 2.587764
22           4.9           1.03 1.589235
23           8.8           1.12 2.174752
24           9.5           1.70 2.251292
> thue4 <- within(thuesen,{
+    log.gluc <- log(blood.glucose)
+    m <- mean(log.gluc)
+    centered.log.gluc <- log.gluc - m
+    rm(m)
+ })
> thue4
   blood.glucose short.velocity centered.log.gluc log.gluc
1           15.3           1.76       0.481879807 2.727853
2           10.8           1.34       0.133573113 2.379546
3            8.1           1.27      -0.154108960 2.091864
4           19.5           1.47       0.724441444 2.970414
5            7.2           1.27      -0.271891996 1.974081
6            5.3           1.49      -0.578266201 1.667707
7            9.3           1.31      -0.015958621 2.230014
8           11.1           1.09       0.160972087 2.406945
9            7.5           1.18      -0.231070001 2.014903
10          12.2           1.22       0.255462930 2.501436
11           6.7           1.25      -0.343865495 1.902108
12           5.2           1.19      -0.597314396 1.648659
13          19.0           1.95       0.698465958 2.944439
14          15.1           1.28       0.468721722 2.714695
15           6.7           1.52      -0.343865495 1.902108
16           8.6             NA      -0.094210818 2.151762
17           4.2           1.12      -0.810888496 1.435085
18          10.3           1.37       0.086170874 2.332144
19          12.5           1.19       0.279755623 2.525729
20          16.1           1.05       0.532846250 2.778819
21          13.3           1.32       0.341791014 2.587764
22           4.9           1.03      -0.656737817 1.589235
23           8.8           1.12      -0.071221300 2.174752
24           9.5           1.70       0.005318777 2.251292
> d <- par(mar=c(5,4,4,2)+.1)
> x <- runif(50,0,2)
> y <- runif(50,0,2)
> plot(x, y, main="Main title", sub="subtitle",
+      xlab="x-label", ylab="y-label")
> text(0.6,0.6,"text at (0.6,0.6)")
> abline(h=.6,v=.6)
> for (side in 1:4) mtext(-1:4,side=side,at=.7,line=-1:4)
> mtext(paste("side",1:4), side=1:4, line=-1,font=2)
> if (.make.epsf) dev.copy2eps(file="layout.ps")
> par(d)
> plot(x, y, type="n", xlab="", ylab="", axes=F)
> points(x,y)
> axis(1)
> axis(2,at=seq(0.2,1.8,0.2))
> box()
> title(main="Main title", sub="subtitle",
+     xlab="x-label", ylab="y-label")
> set.seed(1234) #make it happen....
> x <- rnorm(100)
> hist(x,freq=F)
> curve(dnorm(x),add=T)
> h <- hist(x, plot=F)
> ylim <- range(0, h$density, dnorm(0))
> hist(x, freq=F, ylim=ylim)
> curve(dnorm(x), add=T)
> if (.make.epsf) dev.copy2eps(file="hist+norm.ps")
> hist.with.normal <- function(x, xlab=deparse(substitute(x)),...)
+ {
+     h <- hist(x, plot=F, ...)
+     s <- sd(x)
+     m <- mean(x)
+     ylim <- range(0,h$density,dnorm(0,sd=s))
+     hist(x, freq=F, ylim=ylim, xlab=xlab, ...)
+     curve(dnorm(x,m,s), add=T)
+ }
> hist.with.normal(rnorm(200))
> y <- 12345
> x <- y/2
> while (abs(x*x-y) > 1e-10) x <- (x + y/x)/2
> x
[1] 111.1081
> x^2
[1] 12345
> x <- y/2
> repeat{
+     x <- (x + y/x)/2
+     if (abs(x*x-y) < 1e-10) break
+ }
> x
[1] 111.1081
> x <- seq(0, 1,.05)
> plot(x, x, ylab="y", type="l")
> for ( j in 2:8 ) lines(x, x^j)
> t.test(bmi, mu=22.5)$p.value
[1] 0.7442183
> print
function (x, ...) 
UseMethod("print")
<bytecode: 0x000001583f494b58>
<environment: namespace:base>
> ## length(methods("print")) # quoted in text 
> ## (this test zapped 2024-10-01 due to platform dependency)
> thuesen2 <- read.table(
+    system.file("rawdata","thuesen.txt",package="ISwR"), header=T)
> thuesen2
   blood.glucose short.velocity
1           15.3           1.76
2           10.8           1.34
3            8.1           1.27
4           19.5           1.47
5            7.2           1.27
6            5.3           1.49
7            9.3           1.31
8           11.1           1.09
9            7.5           1.18
10          12.2           1.22
11           6.7           1.25
12           5.2           1.19
13          19.0           1.95
14          15.1           1.28
15           6.7           1.52
16           8.6             NA
17           4.2           1.12
18          10.3           1.37
19          12.5           1.19
20          16.1           1.05
21          13.3           1.32
22           4.9           1.03
23           8.8           1.12
24           9.5           1.70
> levels(secretin$time)
[1] "20"  "30"  "60"  "90"  "pre"
> ## IGNORE_RDIFF_BEGIN
> # keep CRAN happy - this output is obviously system-dependent
> system.file("rawdata", "thuesen.txt", package="ISwR")
[1] "D:/RCompile/CRANincoming/R-devel/lib/ISwR/rawdata/thuesen.txt"
> ## IGNORE_RDIFF_END
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> sample(1:40,5)
[1] 19  8 33 21 31
> sample(c("H","T"), 10, replace=T)
 [1] "T" "T" "H" "T" "T" "T" "H" "T" "H" "H"
> sample(c("succ", "fail"), 10, replace=T, prob=c(0.9, 0.1))
 [1] "fail" "fail" "succ" "succ" "fail" "succ" "succ" "succ"
 [9] "succ" "succ"
> 1/prod(40:36)
[1] 1.266449e-08
> prod(5:1)/prod(40:36)
[1] 1.519738e-06
> 1/choose(40,5)
[1] 1.519738e-06
> x <- seq(-4,4,0.1)
> plot(x,dnorm(x),type="l")
> if (.make.epsf) dev.copy2eps(file="bellcurve.ps")
> x <- 0:50
> plot(x,dbinom(x,size=50,prob=.33),type="h")
> if (.make.epsf) dev.copy2eps(file="binomdist.ps")
> 1-pnorm(160,mean=132,sd=13)
[1] 0.01562612
> pbinom(16,size=20,prob=.5)
[1] 0.9987116
> 1-pbinom(15,size=20,prob=.5)
[1] 0.005908966
> 1-pbinom(15,20,.5)+pbinom(4,20,.5)
[1] 0.01181793
> xbar <- 83
> sigma <- 12
> n <- 5
> sem <- sigma/sqrt(n)
> sem
[1] 5.366563
> xbar + sem * qnorm(0.025)
[1] 72.48173
> xbar + sem * qnorm(0.975)
[1] 93.51827
> set.seed(310367)
> rnorm(10)
 [1] -0.2996466 -0.1718510 -0.1955634  1.2280843 -2.6074190
 [6] -0.2999453 -0.4655102 -1.5680666  1.2545876 -1.8028839
> rnorm(10)
 [1]  1.7082495  0.1432875 -1.0271750 -0.9246647  0.6402383
 [6]  0.7201677 -0.3071239  1.2090712  0.8699669  0.5882753
> rnorm(10,mean=7,sd=5)
 [1]  8.934983  8.611855  4.675578  3.670129  4.223117  5.484290
 [7] 12.141946  8.057541 -2.893164 13.590586
> rbinom(10,size=20,prob=.5)
 [1] 12 11 10  8 11  8 11  8  8 13
>  ## no data sets used by exercises
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> x <- rnorm(50)
> mean(x)
[1] -0.1565061
> sd(x)
[1] 1.175373
> var(x)
[1] 1.381502
> median(x)
[1] -0.3560654
> quantile(x)
        0%        25%        50%        75%       100% 
-2.1565603 -0.9689894 -0.3560654  0.6133640  2.9887241 
> pvec <- seq(0,1,0.1)
> pvec
 [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
> quantile(x,pvec)
         0%         10%         20%         30%         40% 
-2.15656030 -1.54828008 -1.13398001 -0.79701920 -0.62973546 
        50%         60%         70%         80%         90% 
-0.35606539 -0.05965905  0.36950570  0.85233024  1.29837806 
       100% 
 2.98872414 
> attach(juul)
>  mean(igf1)
[1] NA
> mean(igf1,na.rm=T)
[1] 340.168
> sum(!is.na(igf1))
[1] 1018
> summary(igf1)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
   25.0   202.2   313.5   340.2   462.8   915.0     321 
> summary(juul)
      age            menarche          sex             igf1      
 Min.   : 0.170   Min.   :1.000   Min.   :1.000   Min.   : 25.0  
 1st Qu.: 9.053   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:202.2  
 Median :12.560   Median :1.000   Median :2.000   Median :313.5  
 Mean   :15.095   Mean   :1.476   Mean   :1.534   Mean   :340.2  
 3rd Qu.:16.855   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:462.8  
 Max.   :83.000   Max.   :2.000   Max.   :2.000   Max.   :915.0  
 NA's   :5        NA's   :635     NA's   :5       NA's   :321    
     tanner        testvol      
 Min.   :1.00   Min.   : 1.000  
 1st Qu.:1.00   1st Qu.: 1.000  
 Median :2.00   Median : 3.000  
 Mean   :2.64   Mean   : 7.896  
 3rd Qu.:5.00   3rd Qu.:15.000  
 Max.   :5.00   Max.   :30.000  
 NA's   :240    NA's   :859     
> detach(juul)
> juul$sex <- factor(juul$sex,labels=c("M","F"))
> juul$menarche <- factor(juul$menarche,labels=c("No","Yes"))
> juul$tanner <- factor(juul$tanner,
+                       labels=c("I","II","III","IV","V"))
> attach(juul)
> summary(juul)
      age         menarche     sex           igf1      
 Min.   : 0.170   No  :369   M   :621   Min.   : 25.0  
 1st Qu.: 9.053   Yes :335   F   :713   1st Qu.:202.2  
 Median :12.560   NA's:635   NA's:  5   Median :313.5  
 Mean   :15.095                         Mean   :340.2  
 3rd Qu.:16.855                         3rd Qu.:462.8  
 Max.   :83.000                         Max.   :915.0  
 NA's   :5                              NA's   :321    
  tanner       testvol      
 I   :515   Min.   : 1.000  
 II  :103   1st Qu.: 1.000  
 III : 72   Median : 3.000  
 IV  : 81   Mean   : 7.896  
 V   :328   3rd Qu.:15.000  
 NA's:240   Max.   :30.000  
            NA's   :859     
> hist(x)
> if (.make.epsf) dev.copy2eps(file="hist.ps")
> mid.age <- c(2.5,7.5,13,16.5,17.5,19,22.5,44.5,70.5)
> acc.count <- c(28,46,58,20,31,64,149,316,103)
> age.acc <- rep(mid.age,acc.count)
> brk <- c(0,5,10,16,17,18,20,25,60,80)
> hist(age.acc,breaks=brk)
> if (.make.epsf) dev.copy2eps(file="hist-acc-right.ps")
> n <- length(x)
> plot(sort(x),(1:n)/n,type="s",ylim=c(0,1))
> if (.make.epsf) dev.copy2eps(file="empdist.ps")
> qqnorm(x)
> if (.make.epsf) dev.copy2eps(file="qqnorm.ps")
> par(mfrow=c(1,2))
> boxplot(IgM)
> boxplot(log(IgM))
> par(mfrow=c(1,1))
> if (.make.epsf) dev.copy2eps(file="boxplot-IgM.ps")
> attach(red.cell.folate)
> tapply(folate,ventilation,mean)
N2O+O2,24h  N2O+O2,op     O2,24h 
  316.6250   256.4444   278.0000 
> tapply(folate,ventilation,sd)
N2O+O2,24h  N2O+O2,op     O2,24h 
  58.71709   37.12180   33.75648 
> tapply(folate,ventilation,length)
N2O+O2,24h  N2O+O2,op     O2,24h 
         8          9          5 
> xbar <- tapply(folate, ventilation, mean)
> s <- tapply(folate, ventilation, sd)
> n <- tapply(folate, ventilation, length)
> cbind(mean=xbar, std.dev=s, n=n)
               mean  std.dev n
N2O+O2,24h 316.6250 58.71709 8
N2O+O2,op  256.4444 37.12180 9
O2,24h     278.0000 33.75648 5
> tapply(igf1, tanner, mean)
  I  II III  IV   V 
 NA  NA  NA  NA  NA 
> tapply(igf1, tanner, mean, na.rm=T)
       I       II      III       IV        V 
207.4727 352.6714 483.2222 513.0172 465.3344 
> aggregate(juul[c("age","igf1")],
+           list(sex=juul$sex), mean, na.rm=T)
  sex      age     igf1
1   M 15.38436 310.8866
2   F 14.84363 368.1006
> aggregate(juul[c("age","igf1")], juul["sex"], mean, na.rm=T)
  sex      age     igf1
1   M 15.38436 310.8866
2   F 14.84363 368.1006
> by(juul, juul["sex"], summary)
sex: M
      age        menarche   sex          igf1        tanner   
 Min.   : 0.17   No  :  0   M:621   Min.   : 29.0   I   :291  
 1st Qu.: 8.85   Yes :  0   F:  0   1st Qu.:176.0   II  : 55  
 Median :12.38   NA's:621           Median :280.0   III : 34  
 Mean   :15.38                      Mean   :310.9   IV  : 41  
 3rd Qu.:16.77                      3rd Qu.:430.2   V   :124  
 Max.   :83.00                      Max.   :915.0   NA's: 76  
                                    NA's   :145               
    testvol      
 Min.   : 1.000  
 1st Qu.: 1.000  
 Median : 3.000  
 Mean   : 7.896  
 3rd Qu.:15.000  
 Max.   :30.000  
 NA's   :141     
------------------------------------------------- 
sex: F
      age        menarche   sex          igf1        tanner   
 Min.   : 0.25   No  :369   M:  0   Min.   : 25.0   I   :224  
 1st Qu.: 9.30   Yes :335   F:713   1st Qu.:233.0   II  : 48  
 Median :12.80   NA's:  9           Median :352.0   III : 38  
 Mean   :14.84                      Mean   :368.1   IV  : 40  
 3rd Qu.:16.93                      3rd Qu.:483.0   V   :204  
 Max.   :75.12                      Max.   :914.0   NA's:159  
                                    NA's   :176               
    testvol   
 Min.   : NA  
 1st Qu.: NA  
 Median : NA  
 Mean   :NaN  
 3rd Qu.: NA  
 Max.   : NA  
 NA's   :713  
> attach(energy)
> expend.lean <- expend[stature=="lean"]
> expend.obese <- expend[stature=="obese"]
> par(mfrow=c(2,1))
> hist(expend.lean,breaks=10,xlim=c(5,13),ylim=c(0,4),col="white")
> hist(expend.obese,breaks=10,xlim=c(5,13),ylim=c(0,4),col="grey")
> par(mfrow=c(1,1))
> if (.make.epsf) dev.copy2eps(file="expend-hist-2on1.ps")
> boxplot(expend ~ stature)
> if (.make.epsf) dev.copy2eps(file="boxplots-expend-stat.ps")
> boxplot(expend.lean,expend.obese)
> opar <- par(mfrow=c(2,2), mex=0.8, mar=c(3,3,2,1)+.1)
> stripchart(expend ~ stature)
> stripchart(expend ~ stature, method="stack")
> stripchart(expend ~ stature, method="jitter")
> stripchart(expend ~ stature, method="jitter", jitter=.03)
> par(opar)
> if (.make.epsf) dev.copy2eps(file="stripcharts-expend-stat.ps")
> caff.marital <- matrix(c(652,1537,598,242,36,46,38,21,218
+ ,327,106,67),
+ nrow=3,byrow=T)
> caff.marital
     [,1] [,2] [,3] [,4]
[1,]  652 1537  598  242
[2,]   36   46   38   21
[3,]  218  327  106   67
> colnames(caff.marital) <- c("0","1-150","151-300",">300")
> rownames(caff.marital) <- c("Married","Prev.married","Single")
> caff.marital
               0 1-150 151-300 >300
Married      652  1537     598  242
Prev.married  36    46      38   21
Single       218   327     106   67
> names(dimnames(caff.marital)) <- c("marital","consumption")
> caff.marital
              consumption
marital          0 1-150 151-300 >300
  Married      652  1537     598  242
  Prev.married  36    46      38   21
  Single       218   327     106   67
> as.data.frame(as.table(caff.marital))
        marital consumption Freq
1       Married           0  652
2  Prev.married           0   36
3        Single           0  218
4       Married       1-150 1537
5  Prev.married       1-150   46
6        Single       1-150  327
7       Married     151-300  598
8  Prev.married     151-300   38
9        Single     151-300  106
10      Married        >300  242
11 Prev.married        >300   21
12       Single        >300   67
> table(sex)
sex
  M   F 
621 713 
> table(sex,menarche)
   menarche
sex  No Yes
  M   0   0
  F 369 335
> table(menarche,tanner)
        tanner
menarche   I  II III  IV   V
     No  221  43  32  14   2
     Yes   1   1   5  26 202
> xtabs(~ tanner + sex, data=juul)
      sex
tanner   M   F
   I   291 224
   II   55  48
   III  34  38
   IV   41  40
   V   124 204
> xtabs(~ dgn + diab + coma, data=stroke)
, , coma = No

     diab
dgn    No Yes
  ICH  53   6
  ID  143  21
  INF 411  64
  SAH  38   0

, , coma = Yes

     diab
dgn    No Yes
  ICH  19   1
  ID   23   3
  INF  23   2
  SAH   9   0

> ftable(coma + diab ~ dgn, data=stroke)
    coma  No     Yes    
    diab  No Yes  No Yes
dgn                     
ICH       53   6  19   1
ID       143  21  23   3
INF      411  64  23   2
SAH       38   0   9   0
> t(caff.marital)
           marital
consumption Married Prev.married Single
    0           652           36    218
    1-150      1537           46    327
    151-300     598           38    106
    >300        242           21     67
> tanner.sex <- table(tanner,sex)
> tanner.sex
      sex
tanner   M   F
   I   291 224
   II   55  48
   III  34  38
   IV   41  40
   V   124 204
> margin.table(tanner.sex,1)
tanner
  I  II III  IV   V 
515 103  72  81 328 
> margin.table(tanner.sex,2)
sex
  M   F 
545 554 
> prop.table(tanner.sex,1)
      sex
tanner         M         F
   I   0.5650485 0.4349515
   II  0.5339806 0.4660194
   III 0.4722222 0.5277778
   IV  0.5061728 0.4938272
   V   0.3780488 0.6219512
> tanner.sex/sum(tanner.sex)
      sex
tanner          M          F
   I   0.26478617 0.20382166
   II  0.05004550 0.04367607
   III 0.03093722 0.03457689
   IV  0.03730664 0.03639672
   V   0.11282985 0.18562329
> total.caff <- margin.table(caff.marital,2)
> total.caff
consumption
      0   1-150 151-300    >300 
    906    1910     742     330 
> barplot(total.caff, col="white")
> if (.make.epsf) dev.copy2eps(file="simple-bar.ps")
> par(mfrow=c(2,2))
> barplot(caff.marital, col="white")
> barplot(t(caff.marital), col="white")
> barplot(t(caff.marital), col="white", beside=T)
> barplot(prop.table(t(caff.marital),2), col="white", beside=T)
> par(mfrow=c(1,1))
> if (.make.epsf) dev.copy2eps(file="mat-4-bar.ps")
> barplot(prop.table(t(caff.marital),2),beside=T,
+ legend.text=colnames(caff.marital),
+ col=c("white","grey80","grey50","black"))
> if (.make.epsf) dev.copy2eps(file="pretty-bar.ps")
> dotchart(t(caff.marital), lcolor="black")
> if (.make.epsf) dev.copy2eps(file="dotchart.ps")
> opar <- par(mfrow=c(2,2),mex=0.8, mar=c(1,1,2,1))
> slices <- c("white","grey80","grey50","black")
> pie(caff.marital["Married",], main="Married", col=slices)
> pie(caff.marital["Prev.married",],
+          main="Previously married", col=slices)
> pie(caff.marital["Single",], main="Single", col=slices)
> par(opar)
> if (.make.epsf) dev.copy2eps(file="pie.ps")
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> daily.intake <- c(5260,5470,5640,6180,6390,6515,
+ 6805,7515,7515,8230,8770)
> mean(daily.intake)
[1] 6753.636
> sd(daily.intake)
[1] 1142.123
> quantile(daily.intake)
  0%  25%  50%  75% 100% 
5260 5910 6515 7515 8770 
> t.test(daily.intake,mu=7725)

	One Sample t-test

data:  daily.intake
t = -2.8208, df = 10, p-value = 0.01814
alternative hypothesis: true mean is not equal to 7725
95 percent confidence interval:
 5986.348 7520.925
sample estimates:
mean of x 
 6753.636 

> t.test(daily.intake,mu=7725)

	One Sample t-test

data:  daily.intake
t = -2.8208, df = 10, p-value = 0.01814
alternative hypothesis: true mean is not equal to 7725
95 percent confidence interval:
 5986.348 7520.925
sample estimates:
mean of x 
 6753.636 

> wilcox.test(daily.intake, mu=7725)

	Wilcoxon signed rank test with continuity correction

data:  daily.intake
V = 8, p-value = 0.0293
alternative hypothesis: true location is not equal to 7725

Warning message:
In wilcox.test.default(daily.intake, mu = 7725) :
  cannot compute exact p-value with ties
> attach(energy)
> energy
   expend stature
1    9.21   obese
2    7.53    lean
3    7.48    lean
4    8.08    lean
5    8.09    lean
6   10.15    lean
7    8.40    lean
8   10.88    lean
9    6.13    lean
10   7.90    lean
11  11.51   obese
12  12.79   obese
13   7.05    lean
14  11.85   obese
15   9.97   obese
16   7.48    lean
17   8.79   obese
18   9.69   obese
19   9.68   obese
20   7.58    lean
21   9.19   obese
22   8.11    lean
> t.test(expend~stature)

	Welch Two Sample t-test

data:  expend by stature
t = -3.8555, df = 15.919, p-value = 0.001411
alternative hypothesis: true difference in means between group lean and group obese is not equal to 0
95 percent confidence interval:
 -3.459167 -1.004081
sample estimates:
 mean in group lean mean in group obese 
           8.066154           10.297778 

> t.test(expend~stature, var.equal=T)

	Two Sample t-test

data:  expend by stature
t = -3.9456, df = 20, p-value = 0.000799
alternative hypothesis: true difference in means between group lean and group obese is not equal to 0
95 percent confidence interval:
 -3.411451 -1.051796
sample estimates:
 mean in group lean mean in group obese 
           8.066154           10.297778 

> var.test(expend~stature)

	F test to compare two variances

data:  expend by stature
F = 0.78445, num df = 12, denom df = 8, p-value = 0.6797
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.1867876 2.7547991
sample estimates:
ratio of variances 
          0.784446 

> wilcox.test(expend~stature)

	Wilcoxon rank sum test with continuity correction

data:  expend by stature
W = 12, p-value = 0.002122
alternative hypothesis: true location shift is not equal to 0

Warning message:
In wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...) :
  cannot compute exact p-value with ties
> attach(intake)
> intake
    pre post
1  5260 3910
2  5470 4220
3  5640 3885
4  6180 5160
5  6390 5645
6  6515 4680
7  6805 5265
8  7515 5975
9  7515 6790
10 8230 6900
11 8770 7335
> post - pre
 [1] -1350 -1250 -1755 -1020  -745 -1835 -1540 -1540  -725 -1330
[11] -1435
> t.test(pre, post, paired=T)

	Paired t-test

data:  pre and post
t = 11.941, df = 10, p-value = 3.059e-07
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 1074.072 1566.838
sample estimates:
mean difference 
       1320.455 

> t.test(pre, post) #WRONG!

	Welch Two Sample t-test

data:  pre and post
t = 2.6242, df = 19.92, p-value = 0.01629
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
  270.5633 2370.3458
sample estimates:
mean of x mean of y 
 6753.636  5433.182 

> wilcox.test(pre, post, paired=T)

	Wilcoxon signed rank test with continuity correction

data:  pre and post
V = 66, p-value = 0.00384
alternative hypothesis: true location shift is not equal to 0

Warning message:
In wilcox.test.default(pre, post, paired = T) :
  cannot compute exact p-value with ties
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> attach(thuesen)
> lm(short.velocity~blood.glucose)

Call:
lm(formula = short.velocity ~ blood.glucose)

Coefficients:
  (Intercept)  blood.glucose  
      1.09781        0.02196  

> summary(lm(short.velocity~blood.glucose))

Call:
lm(formula = short.velocity ~ blood.glucose)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.40141 -0.14760 -0.02202  0.03001  0.43490 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    1.09781    0.11748   9.345 6.26e-09 ***
blood.glucose  0.02196    0.01045   2.101   0.0479 *  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.2167 on 21 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.1737,	Adjusted R-squared:  0.1343 
F-statistic: 4.414 on 1 and 21 DF,  p-value: 0.0479

> summary(lm(short.velocity~blood.glucose))

Call:
lm(formula = short.velocity ~ blood.glucose)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.40141 -0.14760 -0.02202  0.03001  0.43490 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    1.09781    0.11748   9.345 6.26e-09 ***
blood.glucose  0.02196    0.01045   2.101   0.0479 *  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.2167 on 21 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.1737,	Adjusted R-squared:  0.1343 
F-statistic: 4.414 on 1 and 21 DF,  p-value: 0.0479

> plot(blood.glucose,short.velocity)
> abline(lm(short.velocity~blood.glucose))
> if (.make.epsf) dev.copy2eps(file="velo-gluc-line.ps")
> lm.velo <- lm(short.velocity~blood.glucose)
> fitted(lm.velo)
       1        2        3        4        5        6        7 
1.433841 1.335010 1.275711 1.526084 1.255945 1.214216 1.302066 
       8        9       10       11       12       13       14 
1.341599 1.262534 1.365758 1.244964 1.212020 1.515103 1.429449 
      15       17       18       19       20       21       22 
1.244964 1.190057 1.324029 1.372346 1.451411 1.389916 1.205431 
      23       24 
1.291085 1.306459 
> resid(lm.velo)
           1            2            3            4            5 
 0.326158532  0.004989882 -0.005711308 -0.056084062  0.014054962 
           6            7            8            9           10 
 0.275783754  0.007933665 -0.251598875 -0.082533795 -0.145757649 
          11           12           13           14           15 
 0.005036223 -0.022019994  0.434897199 -0.149448964  0.275036223 
          17           18           19           20           21 
-0.070057471  0.045971143 -0.182346406 -0.401411486 -0.069916424 
          22           23           24 
-0.175431237 -0.171085074  0.393541161 
> options(error=expression(NULL))
> plot(blood.glucose,short.velocity)
> lines(blood.glucose,fitted(lm.velo))
Error in xy.coords(x, y) : 'x' and 'y' lengths differ
Calls: lines -> lines.default -> plot.xy -> xy.coords
> options(error=NULL)
> lines(blood.glucose[!is.na(short.velocity)],fitted(lm.velo))
> cc <- complete.cases(thuesen)
> options(na.action=na.exclude)
> lm.velo <- lm(short.velocity~blood.glucose)
> fitted(lm.velo)
       1        2        3        4        5        6        7 
1.433841 1.335010 1.275711 1.526084 1.255945 1.214216 1.302066 
       8        9       10       11       12       13       14 
1.341599 1.262534 1.365758 1.244964 1.212020 1.515103 1.429449 
      15       16       17       18       19       20       21 
1.244964       NA 1.190057 1.324029 1.372346 1.451411 1.389916 
      22       23       24 
1.205431 1.291085 1.306459 
> segments(blood.glucose,fitted(lm.velo),
+          blood.glucose,short.velocity)
> if (.make.epsf) dev.copy2eps(file="velo-gluc-seg.ps")
> plot(fitted(lm.velo),resid(lm.velo))
> if (.make.epsf) dev.copy2eps(file="velo-gluc-resid.ps")
> qqnorm(resid(lm.velo))
> if (.make.epsf) dev.copy2eps(file="velo-gluc-qqnorm.ps")
> predict(lm.velo)
       1        2        3        4        5        6        7 
1.433841 1.335010 1.275711 1.526084 1.255945 1.214216 1.302066 
       8        9       10       11       12       13       14 
1.341599 1.262534 1.365758 1.244964 1.212020 1.515103 1.429449 
      15       16       17       18       19       20       21 
1.244964       NA 1.190057 1.324029 1.372346 1.451411 1.389916 
      22       23       24 
1.205431 1.291085 1.306459 
> predict(lm.velo,int="c")
        fit      lwr      upr
1  1.433841 1.291371 1.576312
2  1.335010 1.240589 1.429431
3  1.275711 1.169536 1.381887
4  1.526084 1.306561 1.745607
5  1.255945 1.139367 1.372523
6  1.214216 1.069315 1.359118
7  1.302066 1.205244 1.398889
8  1.341599 1.246317 1.436881
9  1.262534 1.149694 1.375374
10 1.365758 1.263750 1.467765
11 1.244964 1.121641 1.368287
12 1.212020 1.065457 1.358583
13 1.515103 1.305352 1.724854
14 1.429449 1.290217 1.568681
15 1.244964 1.121641 1.368287
16       NA       NA       NA
17 1.190057 1.026217 1.353898
18 1.324029 1.230050 1.418008
19 1.372346 1.267629 1.477064
20 1.451411 1.295446 1.607377
21 1.389916 1.276444 1.503389
22 1.205431 1.053805 1.357057
23 1.291085 1.191084 1.391086
24 1.306459 1.210592 1.402326
> predict(lm.velo,int="p")
        fit       lwr      upr
1  1.433841 0.9612137 1.906469
2  1.335010 0.8745815 1.795439
3  1.275711 0.8127292 1.738693
4  1.526084 1.0248161 2.027352
5  1.255945 0.7904672 1.721423
6  1.214216 0.7408499 1.687583
7  1.302066 0.8411393 1.762993
8  1.341599 0.8809929 1.802205
9  1.262534 0.7979780 1.727090
10 1.365758 0.9037136 1.827802
11 1.244964 0.7777510 1.712177
12 1.212020 0.7381424 1.685898
13 1.515103 1.0180367 2.012169
14 1.429449 0.9577873 1.901111
15 1.244964 0.7777510 1.712177
16       NA        NA       NA
17 1.190057 0.7105546 1.669560
18 1.324029 0.8636906 1.784367
19 1.372346 0.9096964 1.834996
20 1.451411 0.9745421 1.928281
21 1.389916 0.9252067 1.854626
22 1.205431 0.7299634 1.680899
23 1.291085 0.8294798 1.752690
24 1.306459 0.8457315 1.767186
Warning message:
In predict.lm(lm.velo, int = "p") :
  predictions on current data refer to _future_ responses

> pred.frame <- data.frame(blood.glucose=4:20)
> pp <- predict(lm.velo, int="p", newdata=pred.frame)
> pc <- predict(lm.velo, int="c", newdata=pred.frame)
> plot(blood.glucose,short.velocity,
+      ylim=range(short.velocity, pp, na.rm=T))
> pred.gluc <- pred.frame$blood.glucose
> matlines(pred.gluc, pc, lty=c(1,2,2), col="black")
> matlines(pred.gluc, pp, lty=c(1,3,3), col="black")
> if (.make.epsf) dev.copy2eps(file="velo-gluc-final.ps")
> options(error=expression(NULL))
> cor(blood.glucose,short.velocity)
[1] NA
> options(error=NULL)
> cor(blood.glucose,short.velocity,use="complete.obs")
[1] 0.4167546
> cor(thuesen,use="complete.obs")
               blood.glucose short.velocity
blood.glucose      1.0000000      0.4167546
short.velocity     0.4167546      1.0000000
> cor.test(blood.glucose,short.velocity)

	Pearson's product-moment correlation

data:  blood.glucose and short.velocity
t = 2.101, df = 21, p-value = 0.0479
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.005496682 0.707429479
sample estimates:
      cor 
0.4167546 

> cor.test(blood.glucose,short.velocity,method="spearman")

	Spearman's rank correlation rho

data:  blood.glucose and short.velocity
S = 1380.4, p-value = 0.1392
alternative hypothesis: true rho is not equal to 0
sample estimates:
     rho 
0.318002 

Warning message:
In cor.test.default(blood.glucose, short.velocity, method = "spearman") :
  Cannot compute exact p-value with ties
> cor.test(blood.glucose,short.velocity,method="kendall")

	Kendall's rank correlation tau

data:  blood.glucose and short.velocity
z = 1.5604, p-value = 0.1187
alternative hypothesis: true tau is not equal to 0
sample estimates:
      tau 
0.2350616 

Warning message:
In cor.test.default(blood.glucose, short.velocity, method = "kendall") :
  Cannot compute exact p-value with ties
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> attach(red.cell.folate)
> summary(red.cell.folate)
     folate          ventilation
 Min.   :206.0   N2O+O2,24h:8   
 1st Qu.:249.5   N2O+O2,op :9   
 Median :274.0   O2,24h    :5   
 Mean   :283.2                  
 3rd Qu.:305.5                  
 Max.   :392.0                  
> anova(lm(folate~ventilation))
Analysis of Variance Table

Response: folate
            Df Sum Sq Mean Sq F value  Pr(>F)  
ventilation  2  15516  7757.9  3.7113 0.04359 *
Residuals   19  39716  2090.3                  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> attach(juul)
> anova(lm(igf1~tanner))  ## WRONG!
Analysis of Variance Table

Response: igf1
           Df   Sum Sq  Mean Sq F value    Pr(>F)    
tanner      1 10985605 10985605  686.07 < 2.2e-16 ***
Residuals 790 12649728    16012                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> juul$tanner <- factor(juul$tanner,
+                       labels=c("I","II","III","IV","V"))
> detach(juul)
> attach(juul)
> summary(tanner)
   I   II  III   IV    V NA's 
 515  103   72   81  328  240 
> anova(lm(igf1~tanner))
Analysis of Variance Table

Response: igf1
           Df   Sum Sq Mean Sq F value    Pr(>F)    
tanner      4 12696217 3174054  228.35 < 2.2e-16 ***
Residuals 787 10939116   13900                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> summary(lm(folate~ventilation))

Call:
lm(formula = folate ~ ventilation)

Residuals:
    Min      1Q  Median      3Q     Max 
-73.625 -35.361  -4.444  35.625  75.375 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)            316.62      16.16  19.588 4.65e-14 ***
ventilationN2O+O2,op   -60.18      22.22  -2.709   0.0139 *  
ventilationO2,24h      -38.62      26.06  -1.482   0.1548    
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 45.72 on 19 degrees of freedom
Multiple R-squared:  0.2809,	Adjusted R-squared:  0.2052 
F-statistic: 3.711 on 2 and 19 DF,  p-value: 0.04359

> pairwise.t.test(folate, ventilation, p.adj="bonferroni")

	Pairwise comparisons using t tests with pooled SD 

data:  folate and ventilation 

          N2O+O2,24h N2O+O2,op
N2O+O2,op 0.042      -        
O2,24h    0.464      1.000    

P value adjustment method: bonferroni 
> pairwise.t.test(folate,ventilation)

	Pairwise comparisons using t tests with pooled SD 

data:  folate and ventilation 

          N2O+O2,24h N2O+O2,op
N2O+O2,op 0.042      -        
O2,24h    0.310      0.408    

P value adjustment method: holm 
> oneway.test(folate~ventilation)

	One-way analysis of means (not assuming equal variances)

data:  folate and ventilation
F = 2.9704, num df = 2.000, denom df = 11.065, p-value =
0.09277

>  pairwise.t.test(folate,ventilation,pool.sd=F)

	Pairwise comparisons using t tests with non-pooled SD 

data:  folate and ventilation 

          N2O+O2,24h N2O+O2,op
N2O+O2,op 0.087      -        
O2,24h    0.321      0.321    

P value adjustment method: holm 
> xbar <- tapply(folate, ventilation, mean)
> s <- tapply(folate, ventilation, sd)
> n <- tapply(folate, ventilation, length)
> sem <- s/sqrt(n)
> stripchart(folate~ventilation, method="jitter",
+    jitter=0.05, pch=16, vert=T)
> arrows(1:3,xbar+sem,1:3,xbar-sem,angle=90,code=3,length=.1)
> lines(1:3,xbar,pch=4,type="b",cex=2)
> if (.make.epsf) dev.copy2eps(file="oneway.ps")
> bartlett.test(folate~ventilation)

	Bartlett test of homogeneity of variances

data:  folate by ventilation
Bartlett's K-squared = 2.0951, df = 2, p-value = 0.3508

> kruskal.test(folate~ventilation)

	Kruskal-Wallis rank sum test

data:  folate by ventilation
Kruskal-Wallis chi-squared = 4.1852, df = 2, p-value =
0.1234

> attach(heart.rate)
> heart.rate
    hr subj time
1   96    1    0
2  110    2    0
3   89    3    0
4   95    4    0
5  128    5    0
6  100    6    0
7   72    7    0
8   79    8    0
9  100    9    0
10  92    1   30
11 106    2   30
12  86    3   30
13  78    4   30
14 124    5   30
15  98    6   30
16  68    7   30
17  75    8   30
18 106    9   30
19  86    1   60
20 108    2   60
21  85    3   60
22  78    4   60
23 118    5   60
24 100    6   60
25  67    7   60
26  74    8   60
27 104    9   60
28  92    1  120
29 114    2  120
30  83    3  120
31  83    4  120
32 118    5  120
33  94    6  120
34  71    7  120
35  74    8  120
36 102    9  120
> gl(9,1,36)
 [1] 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4
[32] 5 6 7 8 9
Levels: 1 2 3 4 5 6 7 8 9
> gl(4,9,36,labels=c(0,30,60,120))
 [1] 0   0   0   0   0   0   0   0   0   30  30  30  30  30  30 
[16] 30  30  30  60  60  60  60  60  60  60  60  60  120 120 120
[31] 120 120 120 120 120 120
Levels: 0 30 60 120
> anova(lm(hr~subj+time))
Analysis of Variance Table

Response: hr
          Df Sum Sq Mean Sq F value    Pr(>F)    
subj       8 8966.6 1120.82 90.6391 4.863e-16 ***
time       3  151.0   50.32  4.0696   0.01802 *  
Residuals 24  296.8   12.37                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> interaction.plot(time, subj, hr)
> if (.make.epsf) dev.copy2eps(file="interaction-plot.ps")
> interaction.plot(ordered(time),subj,hr)
> friedman.test(hr~time|subj,data=heart.rate)

	Friedman rank sum test

data:  hr and time and subj
Friedman chi-squared = 8.5059, df = 3, p-value = 0.03664

> attach(thuesen)
> lm.velo <- lm(short.velocity~blood.glucose)
> anova(lm.velo)
Analysis of Variance Table

Response: short.velocity
              Df  Sum Sq  Mean Sq F value Pr(>F)  
blood.glucose  1 0.20727 0.207269   4.414 0.0479 *
Residuals     21 0.98610 0.046957                 
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> prop.test(39,215,.15)

	1-sample proportions test with continuity correction

data:  39 out of 215, null probability 0.15
X-squared = 1.425, df = 1, p-value = 0.2326
alternative hypothesis: true p is not equal to 0.15
95 percent confidence interval:
 0.1335937 0.2408799
sample estimates:
        p 
0.1813953 

> binom.test(39,215,.15)

	Exact binomial test

data:  39 and 215
number of successes = 39, number of trials = 215, p-value
= 0.2135
alternative hypothesis: true probability of success is not equal to 0.15
95 percent confidence interval:
 0.1322842 0.2395223
sample estimates:
probability of success 
             0.1813953 

> lewitt.machin.success <- c(9,4)
> lewitt.machin.total <- c(12,13)
> prop.test(lewitt.machin.success,lewitt.machin.total)

	2-sample test for equality of proportions with continuity
	correction

data:  lewitt.machin.success out of lewitt.machin.total
X-squared = 3.2793, df = 1, p-value = 0.07016
alternative hypothesis: two.sided
95 percent confidence interval:
 0.01151032 0.87310506
sample estimates:
   prop 1    prop 2 
0.7500000 0.3076923 

> matrix(c(9,4,3,9),2)
     [,1] [,2]
[1,]    9    3
[2,]    4    9
> lewitt.machin <- matrix(c(9,4,3,9),2)
> fisher.test(lewitt.machin)

	Fisher's Exact Test for Count Data

data:  lewitt.machin
p-value = 0.04718
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  0.9006803 57.2549701
sample estimates:
odds ratio 
  6.180528 

> chisq.test(lewitt.machin)

	Pearson's Chi-squared test with Yates' continuity
	correction

data:  lewitt.machin
X-squared = 3.2793, df = 1, p-value = 0.07016

> caesar.shoe
    <4  4 4.5  5 5.5  6+
Yes  5  7   6  7   8  10
No  17 28  36 41  46 140
> caesar.shoe.yes <- caesar.shoe["Yes",]
> caesar.shoe.total <- margin.table(caesar.shoe,2)
> caesar.shoe.yes
 <4   4 4.5   5 5.5  6+ 
  5   7   6   7   8  10 
> caesar.shoe.total
 <4   4 4.5   5 5.5  6+ 
 22  35  42  48  54 150 
> prop.test(caesar.shoe.yes,caesar.shoe.total)

	6-sample test for equality of proportions without
	continuity correction

data:  caesar.shoe.yes out of caesar.shoe.total
X-squared = 9.2874, df = 5, p-value = 0.09814
alternative hypothesis: two.sided
sample estimates:
    prop 1     prop 2     prop 3     prop 4     prop 5     prop 6 
0.22727273 0.20000000 0.14285714 0.14583333 0.14814815 0.06666667 

Warning message:
In prop.test(caesar.shoe.yes, caesar.shoe.total) :
  Chi-squared approximation may be incorrect
> prop.trend.test(caesar.shoe.yes,caesar.shoe.total)

	Chi-squared Test for Trend in Proportions

data:  caesar.shoe.yes out of caesar.shoe.total ,
 using scores: 1 2 3 4 5 6
X-squared = 8.0237, df = 1, p-value = 0.004617

> caff.marital <- matrix(c(652,1537,598,242,36,46,38,21,218
+ ,327,106,67),
+ nrow=3,byrow=T)
> colnames(caff.marital) <- c("0","1-150","151-300",">300")
> rownames(caff.marital) <- c("Married","Prev.married","Single")
> caff.marital
               0 1-150 151-300 >300
Married      652  1537     598  242
Prev.married  36    46      38   21
Single       218   327     106   67
> chisq.test(caff.marital)

	Pearson's Chi-squared test

data:  caff.marital
X-squared = 51.656, df = 6, p-value = 2.187e-09

> chisq.test(caff.marital)$expected
                     0      1-150   151-300      >300
Married      705.83179 1488.01183 578.06533 257.09105
Prev.married  32.85648   69.26698  26.90895  11.96759
Single       167.31173  352.72119 137.02572  60.94136
> chisq.test(caff.marital)$observed
               0 1-150 151-300 >300
Married      652  1537     598  242
Prev.married  36    46      38   21
Single       218   327     106   67
> E <- chisq.test(caff.marital)$expected
> O <- chisq.test(caff.marital)$observed
> (O-E)^2/E
                      0    1-150   151-300      >300
Married       4.1055981 1.612783 0.6874502 0.8858331
Prev.married  0.3007537 7.815444 4.5713926 6.8171090
Single       15.3563704 1.875645 7.0249243 0.6023355
> attach(juul)
> chisq.test(tanner,sex)

	Pearson's Chi-squared test

data:  tanner and sex
X-squared = 28.867, df = 4, p-value = 8.318e-06

> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> curve(pt(x,25,ncp=3), from=0, to=6)
> abline(v=qt(.975,25))
> if (.make.epsf) dev.copy2eps(file="noncentral-t.ps")
> pt(qt(.975,25),25,ncp=3)
[1] 0.1779891
> power.t.test(delta=0.5, sd=2, sig.level = 0.01, power=0.9)

     Two-sample t test power calculation 

              n = 477.8021
          delta = 0.5
             sd = 2
      sig.level = 0.01
          power = 0.9
    alternative = two.sided

NOTE: n is number in *each* group

> power.t.test(n=450, delta=0.5, sd=2, sig.level = 0.01)

     Two-sample t test power calculation 

              n = 450
          delta = 0.5
             sd = 2
      sig.level = 0.01
          power = 0.8784433
    alternative = two.sided

NOTE: n is number in *each* group

> power.t.test(delta=0.5, sd=2, sig.level = 0.01, power=0.9,
+ alt="one.sided")

     Two-sample t test power calculation 

              n = 417.898
          delta = 0.5
             sd = 2
      sig.level = 0.01
          power = 0.9
    alternative = one.sided

NOTE: n is number in *each* group

> power.t.test(delta=10, sd=10*sqrt(2), power=0.85, type="paired")

     Paired t test power calculation 

              n = 19.96892
          delta = 10
             sd = 14.14214
      sig.level = 0.05
          power = 0.85
    alternative = two.sided

NOTE: n is number of *pairs*, sd is std.dev. of *differences* within pairs

> power.prop.test(power=.85,p1=.15,p2=.30)

     Two-sample comparison of proportions power calculation 

              n = 137.604
             p1 = 0.15
             p2 = 0.3
      sig.level = 0.05
          power = 0.85
    alternative = two.sided

NOTE: n is number in *each* group

> ### no data sets in exercises
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> age <- subset(juul, age >= 10 & age <= 16)$age
> range(age)
[1] 10.01 16.00
> agegr <- cut(age, seq(10,16,2), right=F, include.lowest=T)
> length(age)
[1] 502
> table(agegr)
agegr
[10,12) [12,14) [14,16] 
    190     168     144 
> agegr2 <- cut(age, seq(10,16,2), right=F)
> table(agegr2)
agegr2
[10,12) [12,14) [14,16) 
    190     168     142 
> q <- quantile(age, c(0, .25, .50, .75, 1))
> q
     0%     25%     50%     75%    100% 
10.0100 11.3825 12.6400 14.2275 16.0000 
> ageQ <- cut(age, q, include.lowest=T)
> table(ageQ)
ageQ
  [10,11.4] (11.4,12.6] (12.6,14.2]   (14.2,16] 
        126         125         125         126 
> levels(ageQ) <- c("1st", "2nd", "3rd", "4th")
> levels(agegr) <- c("10-11", "12-13", "14-15")
> pain <- c(0,3,2,2,1)
> fpain <- factor(pain,levels=0:3,
+        labels=c("none","mild","medium","severe"))
> text.pain <-  c("none","severe", "medium", "medium", "mild")
> factor(text.pain)
[1] none   severe medium medium mild  
Levels: medium mild none severe
> ftpain <- factor(text.pain)
> ftpain2 <- factor(ftpain,
+                   levels=c("none", "mild", "medium", "severe"))
> ftpain3 <- ftpain2
> levels(ftpain3) <- list(
+         none="none",
+         intermediate=c("mild","medium"),
+         severe="severe")
> ftpain3
[1] none         severe       intermediate intermediate
[5] intermediate
Levels: none intermediate severe
> ftpain4 <- ftpain2
> levels(ftpain4) <- c("none","intermediate","intermediate","severe")
> ftpain4
[1] none         severe       intermediate intermediate
[5] intermediate
Levels: none intermediate severe
> stroke <- read.csv2(
+   system.file("rawdata","stroke.csv", package="ISwR"),
+   na.strings=".")
> names(stroke) <- tolower(names(stroke))
> head(stroke)
  sex       died       dstr age dgn coma diab minf han
1   1  7.01.1991  2.01.1991  76 INF    0    0    1   0
2   1       <NA>  3.01.1991  58 INF    0    0    0   0
3   1  2.06.1991  8.01.1991  74 INF    0    0    1   1
4   0 13.01.1991 11.01.1991  77 ICH    0    1    0   1
5   0 23.01.1996 13.01.1991  76 INF    0    1    0   1
6   1 13.01.1991 13.01.1991  48 ICH    1    0    0   1
> stroke <- transform(stroke,
+    died = as.Date(died, format="%d.%m.%Y"),
+    dstr = as.Date(dstr, format="%d.%m.%Y"))
> summary(stroke$died)
        Min.      1st Qu.       Median         Mean      3rd Qu. 
"1991-01-07" "1992-03-14" "1993-01-23" "1993-02-15" "1993-11-04" 
        Max.         NA's 
"1996-02-22"        "338" 
> summary(stroke$dstr)
        Min.      1st Qu.       Median         Mean      3rd Qu. 
"1991-01-02" "1991-11-08" "1992-08-12" "1992-07-27" "1993-04-30" 
        Max. 
"1993-12-31" 
> summary(stroke$died - stroke$dstr)
  Length    Class     Mode 
     829 difftime  numeric 
> head(stroke$died - stroke$dstr)
Time differences in days
[1]    5   NA  145    2 1836    0
> o <- options(width=60) # minor cheat for visual purposes
> stroke <- transform(stroke,
+   end = pmin(died,  as.Date("1996-1-1"), na.rm = T),
+   dead = !is.na(died) & died < as.Date("1996-1-1"))
> head(stroke)
  sex       died       dstr age dgn coma diab minf han
1   1 1991-01-07 1991-01-02  76 INF    0    0    1   0
2   1       <NA> 1991-01-03  58 INF    0    0    0   0
3   1 1991-06-02 1991-01-08  74 INF    0    0    1   1
4   0 1991-01-13 1991-01-11  77 ICH    0    1    0   1
5   0 1996-01-23 1991-01-13  76 INF    0    1    0   1
6   1 1991-01-13 1991-01-13  48 ICH    1    0    0   1
         end  dead
1 1991-01-07  TRUE
2 1996-01-01 FALSE
3 1991-06-02  TRUE
4 1991-01-13  TRUE
5 1996-01-01 FALSE
6 1991-01-13  TRUE
> options(o); rm(o)
> stroke <- transform(stroke,
+   obstime = as.numeric(end - dstr, units="days")/365.25)
> rawstroke <- read.csv2(
+   system.file("rawdata","stroke.csv", package="ISwR"),
+   na.strings=".")
> ix <- c("DSTR", "DIED")
> rawstroke[ix] <- lapply(rawstroke[ix],
+                         as.Date, format="%d.%m.%Y")
> head(rawstroke)
  SEX       DIED       DSTR AGE DGN COMA DIAB MINF HAN
1   1 1991-01-07 1991-01-02  76 INF    0    0    1   0
2   1       <NA> 1991-01-03  58 INF    0    0    0   0
3   1 1991-06-02 1991-01-08  74 INF    0    0    1   1
4   0 1991-01-13 1991-01-11  77 ICH    0    1    0   1
5   0 1996-01-23 1991-01-13  76 INF    0    1    0   1
6   1 1991-01-13 1991-01-13  48 ICH    1    0    0   1
> ix <- 6:9
> rawstroke[ix] <- lapply(rawstroke[ix],
+                         factor, levels=0:1, labels=c("No","Yes"))
> strokesub <- ISwR::stroke[1:10,2:3]
> strokesub
         died       dstr
1  1991-01-07 1991-01-02
2        <NA> 1991-01-03
3  1991-06-02 1991-01-08
4  1991-01-13 1991-01-11
5        <NA> 1991-01-13
6  1991-01-13 1991-01-13
7  1993-12-01 1991-01-14
8  1991-12-12 1991-01-14
9        <NA> 1991-01-15
10 1993-11-10 1991-01-15
> strokesub <- transform(strokesub,
+   event = !is.na(died))
> strokesub <- transform(strokesub,
+  obstime = ifelse(event, died-dstr, as.Date("1996-1-1") - dstr))
> strokesub
         died       dstr event obstime
1  1991-01-07 1991-01-02  TRUE       5
2        <NA> 1991-01-03 FALSE    1824
3  1991-06-02 1991-01-08  TRUE     145
4  1991-01-13 1991-01-11  TRUE       2
5        <NA> 1991-01-13 FALSE    1814
6  1991-01-13 1991-01-13  TRUE       0
7  1993-12-01 1991-01-14  TRUE    1052
8  1991-12-12 1991-01-14  TRUE     332
9        <NA> 1991-01-15 FALSE    1812
10 1993-11-10 1991-01-15  TRUE    1030
> juulgrl <- subset(juul, sex==2, select=-c(testvol,sex))
> juulboy <- subset(juul, sex==1, select=-c(menarche,sex))
> juulgrl$sex <- factor("F")
> juulgrl$testvol <- NA
> juulboy$sex <- factor("M")
> juulboy$menarche <- NA
> juulall <- rbind(juulboy, juulgrl)
> names(juulall)
[1] "age"      "igf1"     "tanner"   "testvol"  "sex"     
[6] "menarche"
> levels(juulall$sex)
[1] "M" "F"
> head(nickel)
  id icd exposure      dob  age1st   agein  ageout
1  3   0        5 1889.019 17.4808 45.2273 92.9808
2  4 162        5 1885.978 23.1864 48.2684 63.2712
3  6 163       10 1881.255 25.2452 52.9917 54.1644
4  8 527        9 1886.340 24.7206 47.9067 69.6794
5  9 150        0 1879.500 29.9575 54.7465 76.8442
6 10 163        2 1889.915 21.2877 44.3314 62.5413
> head(ewrates)
  year age lung nasal other
1 1931  10    1     0  1269
2 1931  15    2     0  2201
3 1931  20    6     0  3116
4 1931  25   14     0  3024
5 1931  30   30     1  3188
6 1931  35   68     1  4165
> nickel <- transform(nickel,
+   agr = trunc(agein/5)*5,
+   ygr = trunc((dob+agein-1)/5)*5+1)
> mrg <- merge(nickel, ewrates,
+   by.x=c("agr","ygr"), by.y=c("age","year"))
> head(mrg,10)
   agr  ygr  id icd exposure      dob  age1st   agein  ageout
1   20 1931 273 154        0 1909.500 14.6913 24.7465 55.9302
2   20 1931 213 162        0 1910.129 14.2018 24.1177 63.0493
3   20 1931 546   0        0 1909.500 14.4945 24.7465 72.5000
4   20 1931 574 491        0 1909.729 14.0356 24.5177 70.6592
5   20 1931 110   0        0 1909.247 14.0302 24.9999 72.7534
6   20 1931 325 434        0 1910.500 14.0737 23.7465 43.0343
7   25 1931  56 502        2 1904.500 18.2917 29.7465 51.5847
8   25 1931 690 420        0 1906.500 17.2206 27.7465 55.1219
9   25 1931 443 420        0 1905.326 14.5562 28.9204 65.7616
10  25 1931 137 465        0 1905.386 19.0808 28.8601 74.2794
   lung nasal other
1     6     0  3116
2     6     0  3116
3     6     0  3116
4     6     0  3116
5     6     0  3116
6     6     0  3116
7    14     0  3024
8    14     0  3024
9    14     0  3024
10   14     0  3024
> head(alkfos)
  grp  c0  c3  c6  c9 c12 c18 c24
1   1 142 140 159 162 152 175 148
2   1 120 126 120 146 134 119 116
3   1 175 161 168 164 213 194 221
4   1 234 203 174 197 289 174 189
5   1  94 107 146 124 128  98 114
6   1 128  97 113 203  NA  NA  NA
> a2 <- alkfos
> names(a2) <- sub("c", "c.", names(a2))
> names(a2)
[1] "grp"  "c.0"  "c.3"  "c.6"  "c.9"  "c.12" "c.18" "c.24"
> a.long <- reshape(a2, varying=2:8, direction="long")
> head(a.long)
    grp time   c id
1.0   1    0 142  1
2.0   1    0 120  2
3.0   1    0 175  3
4.0   1    0 234  4
5.0   1    0  94  5
6.0   1    0 128  6
> tail(a.long)
      grp time   c id
38.24   2   24  95 38
39.24   2   24  NA 39
40.24   2   24 192 40
41.24   2   24  94 41
42.24   2   24 194 42
43.24   2   24 129 43
> o <- with(a.long, order(id, time))
> head(a.long[o,], 10)
     grp time   c id
1.0    1    0 142  1
1.3    1    3 140  1
1.6    1    6 159  1
1.9    1    9 162  1
1.12   1   12 152  1
1.18   1   18 175  1
1.24   1   24 148  1
2.0    1    0 120  2
2.3    1    3 126  2
2.6    1    6 120  2
> a.long2 <- na.omit(a.long)
> attr(a.long2, "reshapeLong") <- NULL
> a.wide2 <- reshape(a.long2, direction="wide", v.names="c",
+                  idvar="id", timevar="time")
> head(a.wide2)
    grp id c.0 c.3 c.6 c.9 c.12 c.18 c.24
1.0   1  1 142 140 159 162  152  175  148
2.0   1  2 120 126 120 146  134  119  116
3.0   1  3 175 161 168 164  213  194  221
4.0   1  4 234 203 174 197  289  174  189
5.0   1  5  94 107 146 124  128   98  114
6.0   1  6 128  97 113 203   NA   NA   NA
> l <- split(a.long$c, a.long$id)
> l[1:3]
$`1`
[1] 142 140 159 162 152 175 148

$`2`
[1] 120 126 120 146 134 119 116

$`3`
[1] 175 161 168 164 213 194 221

> l2 <- lapply(l, function(x) x / x[1])
> a.long$c.adj <- unsplit(l2, a.long$id)
> subset(a.long, id==1)
     grp time   c id     c.adj
1.0    1    0 142  1 1.0000000
1.3    1    3 140  1 0.9859155
1.6    1    6 159  1 1.1197183
1.9    1    9 162  1 1.1408451
1.12   1   12 152  1 1.0704225
1.18   1   18 175  1 1.2323944
1.24   1   24 148  1 1.0422535
> a.long$c.adj <- ave(a.long$c, a.long$id,
+     FUN = function(x) x / x[1])
> all.equal(unsplit(l2, a.long$id),  a.long$c.adj)
[1] TRUE
> l <- split(a.long, a.long$id)
> l2 <- lapply(l, transform, c.adj = c / c[1])
> a.long2 <- unsplit(l2, a.long$id)
> all.equal(a.long2$c.adj,  a.long$c.adj)
[1] TRUE
> head(nickel)
  id icd exposure      dob  age1st   agein  ageout agr  ygr
1  3   0        5 1889.019 17.4808 45.2273 92.9808  45 1931
2  4 162        5 1885.978 23.1864 48.2684 63.2712  45 1931
3  6 163       10 1881.255 25.2452 52.9917 54.1644  50 1931
4  8 527        9 1886.340 24.7206 47.9067 69.6794  45 1931
5  9 150        0 1879.500 29.9575 54.7465 76.8442  50 1931
6 10 163        2 1889.915 21.2877 44.3314 62.5413  40 1931
> entry <- pmax(nickel$agein, 60)
> exit <- pmin(nickel$ageout, 65)
> valid <- (entry < exit)
> entry <- entry[valid]
> exit  <- exit[valid]
> cens <- (nickel$ageout[valid] > 65)
> nickel60 <- nickel[valid,]
> nickel60$icd[cens] <- 0
> nickel60$agein <- entry
> nickel60$ageout <- exit
> nickel60$agr <- 60
> nickel60$ygr <- with(nickel60, trunc((dob+agein-1)/5)*5+1)
> head(nickel60)
  id icd exposure      dob  age1st agein  ageout agr  ygr
1  3   0        5 1889.019 17.4808    60 65.0000  60 1946
2  4 162        5 1885.978 23.1864    60 63.2712  60 1941
4  8   0        9 1886.340 24.7206    60 65.0000  60 1946
5  9   0        0 1879.500 29.9575    60 65.0000  60 1936
6 10 163        2 1889.915 21.2877    60 62.5413  60 1946
7 15 334        0 1890.500 23.2836    60 62.0000  60 1946
> trim <- function(start)
+ {
+   end   <- start + 5
+   entry <- pmax(nickel$agein, start)
+   exit  <- pmin(nickel$ageout, end)
+   valid <- (entry < exit)
+   cens  <- (nickel$ageout[valid] > end)
+   result <- nickel[valid,]
+   result$icd[cens] <- 0
+   result$agein <- entry[valid]
+   result$ageout <- exit[valid]
+   result$agr <- start
+   result$ygr <- with(result, trunc((dob+agein-1)/5)*5+1)
+   result
+ }
> head(trim(60))
  id icd exposure      dob  age1st agein  ageout agr  ygr
1  3   0        5 1889.019 17.4808    60 65.0000  60 1946
2  4 162        5 1885.978 23.1864    60 63.2712  60 1941
4  8   0        9 1886.340 24.7206    60 65.0000  60 1946
5  9   0        0 1879.500 29.9575    60 65.0000  60 1936
6 10 163        2 1889.915 21.2877    60 62.5413  60 1946
7 15 334        0 1890.500 23.2836    60 62.0000  60 1946
> nickel.expand <- do.call("rbind", lapply(seq(20,95,5), trim))
> head(nickel.expand)
     id icd exposure      dob  age1st   agein ageout agr  ygr
84  110   0        0 1909.247 14.0302 24.9999     25  20 1931
156 213   0        0 1910.129 14.2018 24.1177     25  20 1931
197 273   0        0 1909.500 14.6913 24.7465     25  20 1931
236 325   0        0 1910.500 14.0737 23.7465     25  20 1931
384 546   0        0 1909.500 14.4945 24.7465     25  20 1931
400 574   0        0 1909.729 14.0356 24.5177     25  20 1931
> subset(nickel.expand, id==4)
     id icd exposure      dob  age1st   agein  ageout agr  ygr
2     4   0        5 1885.978 23.1864 48.2684 50.0000  45 1931
2100  4   0        5 1885.978 23.1864 50.0000 55.0000  50 1931
2102  4   0        5 1885.978 23.1864 55.0000 60.0000  55 1936
2104  4 162        5 1885.978 23.1864 60.0000 63.2712  60 1941
> nickel.expand <- merge(nickel.expand, ewrates,
+   by.x=c("agr","ygr"), by.y=c("age","year"))
> head(nickel.expand)
  agr  ygr  id icd exposure      dob  age1st   agein ageout lung
1  20 1931 325   0        0 1910.500 14.0737 23.7465     25    6
2  20 1931 273   0        0 1909.500 14.6913 24.7465     25    6
3  20 1931 110   0        0 1909.247 14.0302 24.9999     25    6
4  20 1931 574   0        0 1909.729 14.0356 24.5177     25    6
5  20 1931 213   0        0 1910.129 14.2018 24.1177     25    6
6  20 1931 546   0        0 1909.500 14.4945 24.7465     25    6
  nasal other
1     0  3116
2     0  3116
3     0  3116
4     0  3116
5     0  3116
6     0  3116
> all.equal(nickel.expand, ISwR::nickel.expand)
[1] TRUE
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> par(mex=0.5)
> pairs(cystfibr, gap=0, cex.labels=0.9)
> if (.make.epsf) dev.copy2eps(file="cyst-fibr.ps",height=4.5,width=4.49)
> attach(cystfibr)
The following object is masked from package:ISwR:

    tlc

> if (exists("age",.GlobalEnv,inh=F)) rm(age)
> if (exists("height",.GlobalEnv,inh=F)) rm(height)
> if (exists("weight",.GlobalEnv,inh=F)) rm(weight)
> summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc))

Call:
lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + 
    rv + frc + tlc)

Residuals:
    Min      1Q  Median      3Q     Max 
-37.338 -11.532   1.081  13.386  33.405 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) 176.0582   225.8912   0.779    0.448
age          -2.5420     4.8017  -0.529    0.604
sex          -3.7368    15.4598  -0.242    0.812
height       -0.4463     0.9034  -0.494    0.628
weight        2.9928     2.0080   1.490    0.157
bmp          -1.7449     1.1552  -1.510    0.152
fev1          1.0807     1.0809   1.000    0.333
rv            0.1970     0.1962   1.004    0.331
frc          -0.3084     0.4924  -0.626    0.540
tlc           0.1886     0.4997   0.377    0.711

Residual standard error: 25.47 on 15 degrees of freedom
Multiple R-squared:  0.6373,	Adjusted R-squared:  0.4197 
F-statistic: 2.929 on 9 and 15 DF,  p-value: 0.03195

> 1-25.5^2/var(pemax)
[1] 0.4183949
> anova(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc))
Analysis of Variance Table

Response: pemax
          Df  Sum Sq Mean Sq F value   Pr(>F)   
age        1 10098.5 10098.5 15.5661 0.001296 **
sex        1   955.4   955.4  1.4727 0.243680   
height     1   155.0   155.0  0.2389 0.632089   
weight     1   632.3   632.3  0.9747 0.339170   
bmp        1  2862.2  2862.2  4.4119 0.053010 . 
fev1       1  1549.1  1549.1  2.3878 0.143120   
rv         1   561.9   561.9  0.8662 0.366757   
frc        1   194.6   194.6  0.2999 0.592007   
tlc        1    92.4    92.4  0.1424 0.711160   
Residuals 15  9731.2   648.7                    
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> 955.4+155.0+632.3+2862.2+1549.1+561.9+194.6+92.4
[1] 7002.9
> 7002.9/8
[1] 875.3625
> 875.36/648.7
[1] 1.349407
> 1-pf(1.349407,8,15)
[1] 0.2935148
> ## Not command output:
> m1<-lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc)
> m2<-lm(pemax~age)
> anova(m1,m2)
Analysis of Variance Table

Model 1: pemax ~ age + sex + height + weight + bmp + fev1 + rv + frc + 
    tlc
Model 2: pemax ~ age
  Res.Df     RSS Df Sum of Sq      F Pr(>F)
1     15  9731.2                           
2     23 16734.2 -8   -7002.9 1.3493 0.2936
> summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc))

Call:
lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + 
    rv + frc + tlc)

Residuals:
    Min      1Q  Median      3Q     Max 
-37.338 -11.532   1.081  13.386  33.405 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) 176.0582   225.8912   0.779    0.448
age          -2.5420     4.8017  -0.529    0.604
sex          -3.7368    15.4598  -0.242    0.812
height       -0.4463     0.9034  -0.494    0.628
weight        2.9928     2.0080   1.490    0.157
bmp          -1.7449     1.1552  -1.510    0.152
fev1          1.0807     1.0809   1.000    0.333
rv            0.1970     0.1962   1.004    0.331
frc          -0.3084     0.4924  -0.626    0.540
tlc           0.1886     0.4997   0.377    0.711

Residual standard error: 25.47 on 15 degrees of freedom
Multiple R-squared:  0.6373,	Adjusted R-squared:  0.4197 
F-statistic: 2.929 on 9 and 15 DF,  p-value: 0.03195

> summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc))

Call:
lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + 
    rv + frc)

Residuals:
    Min      1Q  Median      3Q     Max 
-38.072 -10.067   0.113  13.526  36.990 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept) 221.8055   185.4350   1.196   0.2491  
age          -3.1346     4.4144  -0.710   0.4879  
sex          -4.6933    14.8363  -0.316   0.7558  
height       -0.5428     0.8428  -0.644   0.5286  
weight        3.3157     1.7672   1.876   0.0790 .
bmp          -1.9403     1.0047  -1.931   0.0714 .
fev1          1.0183     1.0392   0.980   0.3417  
rv            0.1857     0.1887   0.984   0.3396  
frc          -0.2605     0.4628  -0.563   0.5813  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 24.78 on 16 degrees of freedom
Multiple R-squared:  0.6339,	Adjusted R-squared:  0.4508 
F-statistic: 3.463 on 8 and 16 DF,  p-value: 0.01649

> summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv))

Call:
lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + 
    rv)

Residuals:
    Min      1Q  Median      3Q     Max 
-39.425 -12.391   3.834  14.797  36.693 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) 166.71822  154.31294   1.080   0.2951  
age          -1.81783    3.66773  -0.496   0.6265  
sex           0.10239   11.89990   0.009   0.9932  
height       -0.40981    0.79257  -0.517   0.6118  
weight        2.87386    1.55120   1.853   0.0814 .
bmp          -1.94971    0.98415  -1.981   0.0640 .
fev1          1.41526    0.74788   1.892   0.0756 .
rv            0.09567    0.09798   0.976   0.3425  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 24.28 on 17 degrees of freedom
Multiple R-squared:  0.6266,	Adjusted R-squared:  0.4729 
F-statistic: 4.076 on 7 and 17 DF,  p-value: 0.008452

> summary(lm(pemax~age+sex+height+weight+bmp+fev1))

Call:
lm(formula = pemax ~ age + sex + height + weight + bmp + fev1)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.238  -7.403  -0.081  15.534  36.028 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept) 260.6313   120.5215   2.163   0.0443 *
age          -2.9062     3.4898  -0.833   0.4159  
sex          -1.2115    11.8083  -0.103   0.9194  
height       -0.6067     0.7655  -0.793   0.4384  
weight        3.3463     1.4719   2.273   0.0355 *
bmp          -2.3042     0.9136  -2.522   0.0213 *
fev1          1.0274     0.6329   1.623   0.1219  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 24.24 on 18 degrees of freedom
Multiple R-squared:  0.6057,	Adjusted R-squared:  0.4743 
F-statistic: 4.608 on 6 and 18 DF,  p-value: 0.00529

> summary(lm(pemax~age+sex+height+weight+bmp))

Call:
lm(formula = pemax ~ age + sex + height + weight + bmp)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.194  -9.412  -2.425   9.157  40.112 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept) 280.4482   124.9556   2.244   0.0369 *
age          -3.0750     3.6352  -0.846   0.4081  
sex         -11.5281    10.3720  -1.111   0.2802  
height       -0.6853     0.7962  -0.861   0.4001  
weight        3.5546     1.5281   2.326   0.0312 *
bmp          -1.9613     0.9263  -2.117   0.0476 *
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 25.27 on 19 degrees of freedom
Multiple R-squared:  0.548,	Adjusted R-squared:  0.429 
F-statistic: 4.606 on 5 and 19 DF,  p-value: 0.006388

> summary(lm(pemax~age+height+weight+bmp))

Call:
lm(formula = pemax ~ age + height + weight + bmp)

Residuals:
    Min      1Q  Median      3Q     Max 
-41.501 -15.460  -2.838  11.082  42.991 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept) 274.5307   125.5745   2.186   0.0409 *
age          -3.0832     3.6566  -0.843   0.4091  
height       -0.6985     0.8008  -0.872   0.3934  
weight        3.6338     1.5354   2.367   0.0282 *
bmp          -1.9621     0.9317  -2.106   0.0480 *
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 25.41 on 20 degrees of freedom
Multiple R-squared:  0.5186,	Adjusted R-squared:  0.4223 
F-statistic: 5.386 on 4 and 20 DF,  p-value: 0.004137

> summary(lm(pemax~height+weight+bmp))

Call:
lm(formula = pemax ~ height + weight + bmp)

Residuals:
    Min      1Q  Median      3Q     Max 
-41.794 -11.764  -1.218  13.202  43.631 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept) 245.3936   119.8927   2.047   0.0534 .
height       -0.8264     0.7808  -1.058   0.3019  
weight        2.7717     1.1377   2.436   0.0238 *
bmp          -1.4876     0.7375  -2.017   0.0566 .
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 25.24 on 21 degrees of freedom
Multiple R-squared:  0.5015,	Adjusted R-squared:  0.4302 
F-statistic: 7.041 on 3 and 21 DF,  p-value: 0.00187

> summary(lm(pemax~weight+bmp))

Call:
lm(formula = pemax ~ weight + bmp)

Residuals:
    Min      1Q  Median      3Q     Max 
-42.924 -13.399   4.361  16.642  48.404 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 124.8297    37.4786   3.331 0.003033 ** 
weight        1.6403     0.3900   4.206 0.000365 ***
bmp          -1.0054     0.5814  -1.729 0.097797 .  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 25.31 on 22 degrees of freedom
Multiple R-squared:  0.4749,	Adjusted R-squared:  0.4271 
F-statistic: 9.947 on 2 and 22 DF,  p-value: 0.0008374

> summary(lm(pemax~weight))

Call:
lm(formula = pemax ~ weight)

Residuals:
   Min     1Q Median     3Q    Max 
-44.30 -22.69   2.23  15.91  48.41 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  63.5456    12.7016   5.003 4.63e-05 ***
weight        1.1867     0.3009   3.944 0.000646 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 26.38 on 23 degrees of freedom
Multiple R-squared:  0.4035,	Adjusted R-squared:  0.3776 
F-statistic: 15.56 on 1 and 23 DF,  p-value: 0.0006457

> summary(lm(pemax~age+weight+height))

Call:
lm(formula = pemax ~ age + weight + height)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.675 -21.566   3.229  16.274  48.068 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) 64.65555   82.40935   0.785    0.441
age          1.56755    3.14363   0.499    0.623
weight       0.86949    0.85922   1.012    0.323
height      -0.07608    0.80278  -0.095    0.925

Residual standard error: 27.41 on 21 degrees of freedom
Multiple R-squared:  0.4118,	Adjusted R-squared:  0.3278 
F-statistic: 4.901 on 3 and 21 DF,  p-value: 0.009776

> summary(lm(pemax~age+height))

Call:
lm(formula = pemax ~ age + height)

Residuals:
    Min      1Q  Median      3Q     Max 
-44.817 -17.883   3.815  18.275  53.824 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  17.8600    68.2493   0.262    0.796
age           2.7178     2.9325   0.927    0.364
height        0.3397     0.6900   0.492    0.627

Residual standard error: 27.43 on 22 degrees of freedom
Multiple R-squared:  0.3831,	Adjusted R-squared:  0.3271 
F-statistic: 6.832 on 2 and 22 DF,  p-value: 0.00492

> summary(lm(pemax~age))

Call:
lm(formula = pemax ~ age)

Residuals:
    Min      1Q  Median      3Q     Max 
-48.666 -17.174   6.209  16.209  51.334 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)   50.408     16.657   3.026  0.00601 **
age            4.055      1.088   3.726  0.00111 **
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 26.97 on 23 degrees of freedom
Multiple R-squared:  0.3764,	Adjusted R-squared:  0.3492 
F-statistic: 13.88 on 1 and 23 DF,  p-value: 0.001109

> summary(lm(pemax~height))

Call:
lm(formula = pemax ~ height)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.876 -19.306   1.787  18.170  61.464 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept) -33.2757    40.0445  -0.831  0.41453   
height        0.9319     0.2596   3.590  0.00155 **
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 27.34 on 23 degrees of freedom
Multiple R-squared:  0.3591,	Adjusted R-squared:  0.3312 
F-statistic: 12.89 on 1 and 23 DF,  p-value: 0.001549

> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> attach(cystfibr)
The following object is masked from package:ISwR:

    tlc

> summary(lm(pemax~height+I(height^2)))

Call:
lm(formula = pemax ~ height + I(height^2))

Residuals:
    Min      1Q  Median      3Q     Max 
-51.411 -14.932  -2.288  12.787  44.933 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) 615.36248  240.95580   2.554   0.0181 *
height       -8.08324    3.32052  -2.434   0.0235 *
I(height^2)   0.03064    0.01126   2.721   0.0125 *
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 24.18 on 22 degrees of freedom
Multiple R-squared:  0.5205,	Adjusted R-squared:  0.4769 
F-statistic: 11.94 on 2 and 22 DF,  p-value: 0.0003081

> pred.frame <- data.frame(height=seq(110,180,2))
> lm.pemax.hq <- lm(pemax~height+I(height^2))
> predict(lm.pemax.hq,interval="pred",newdata=pred.frame)
         fit      lwr      upr
1   96.90026 37.94461 155.8559
2   94.33611 36.82985 151.8424
3   92.01705 35.73077 148.3033
4   89.94307 34.66449 145.2217
5   88.11418 33.65007 142.5783
6   86.53038 32.70806 140.3527
7   85.19166 31.85979 138.5235
8   84.09803 31.12689 137.0692
9   83.24949 30.53064 135.9683
10  82.64604 30.09150 135.2006
11  82.28767 29.82873 134.7466
12  82.17439 29.76004 134.5887
13  82.30620 29.90143 134.7110
14  82.68309 30.26696 135.0992
15  83.30507 30.86877 135.7414
16  84.17214 31.71697 136.6273
17  85.28429 32.81967 137.7489
18  86.64153 34.18298 139.1001
19  88.24386 35.81103 140.6767
20  90.09127 37.70599 142.4766
21  92.18378 39.86804 144.4995
22  94.52137 42.29540 146.7473
23  97.10404 44.98431 149.2238
24  99.93180 47.92894 151.9347
25 103.00465 51.12145 154.8879
26 106.32259 54.55189 158.0933
27 109.88562 58.20818 161.5631
28 113.69373 62.07617 165.3113
29 117.74692 66.13965 169.3542
30 122.04521 70.38050 173.7099
31 126.58858 74.77888 178.3983
32 131.37704 79.31354 183.4405
33 136.41059 83.96224 188.8589
34 141.68922 88.70229 194.6761
35 147.21294 93.51117 200.9147
36 152.98174 98.36718 207.5963
> pp <- predict(lm.pemax.hq,newdata=pred.frame,interval="pred")
> pc <- predict(lm.pemax.hq,newdata=pred.frame,interval="conf")
> plot(height,pemax,ylim=c(0,200))
> matlines(pred.frame$height,pp,lty=c(1,2,2),col="black")
> matlines(pred.frame$height,pc,lty=c(1,3,3),col="black")
> if (.make.epsf) dev.copy2eps(file="pemax-height-quad.ps")
> x <- runif(20)
> y <- 2*x+rnorm(20,0,0.3)
> summary(lm(y~x))

Call:
lm(formula = y ~ x)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.35286 -0.19547 -0.02266  0.18194  0.56356 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -0.2224     0.1249  -1.781   0.0918 .  
x             2.4369     0.2039  11.953 5.38e-10 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.2502 on 18 degrees of freedom
Multiple R-squared:  0.8881,	Adjusted R-squared:  0.8819 
F-statistic: 142.9 on 1 and 18 DF,  p-value: 5.376e-10

> summary(lm(y~x-1))

Call:
lm(formula = y ~ x - 1)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.45178 -0.20224 -0.01095  0.06261  0.47787 

Coefficients:
  Estimate Std. Error t value Pr(>|t|)    
x  2.11227    0.09642   21.91 6.04e-15 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.2641 on 19 degrees of freedom
Multiple R-squared:  0.9619,	Adjusted R-squared:  0.9599 
F-statistic:   480 on 1 and 19 DF,  p-value: 6.039e-15

> anova(lm(y~x))
Analysis of Variance Table

Response: y
          Df Sum Sq Mean Sq F value    Pr(>F)    
x          1  8.945  8.9450  142.87 5.376e-10 ***
Residuals 18  1.127  0.0626                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> anova(lm(y~x-1))
Analysis of Variance Table

Response: y
          Df Sum Sq Mean Sq F value    Pr(>F)    
x          1 33.484  33.484  479.96 6.039e-15 ***
Residuals 19  1.326   0.070                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> model.matrix(pemax~height+weight)
   (Intercept) height weight
1            1    109   13.1
2            1    112   12.9
3            1    124   14.1
4            1    125   16.2
5            1    127   21.5
6            1    130   17.5
7            1    139   30.7
8            1    150   28.4
9            1    146   25.1
10           1    155   31.5
11           1    156   39.9
12           1    153   42.1
13           1    160   45.6
14           1    158   51.2
15           1    160   35.9
16           1    153   34.8
17           1    174   44.7
18           1    176   60.1
19           1    171   42.6
20           1    156   37.2
21           1    174   54.6
22           1    178   64.0
23           1    180   73.8
24           1    175   51.1
25           1    179   71.5
attr(,"assign")
[1] 0 1 2
> attach(red.cell.folate)
> model.matrix(folate~ventilation)
   (Intercept) ventilationN2O+O2,op ventilationO2,24h
1            1                    0                 0
2            1                    0                 0
3            1                    0                 0
4            1                    0                 0
5            1                    0                 0
6            1                    0                 0
7            1                    0                 0
8            1                    0                 0
9            1                    1                 0
10           1                    1                 0
11           1                    1                 0
12           1                    1                 0
13           1                    1                 0
14           1                    1                 0
15           1                    1                 0
16           1                    1                 0
17           1                    1                 0
18           1                    0                 1
19           1                    0                 1
20           1                    0                 1
21           1                    0                 1
22           1                    0                 1
attr(,"assign")
[1] 0 1 1
attr(,"contrasts")
attr(,"contrasts")$ventilation
[1] "contr.treatment"

> attach(fake.trypsin)
> summary(fake.trypsin)
    trypsin            grp        grpf   
 Min.   :-39.96   Min.   :1.000   1: 32  
 1st Qu.:119.52   1st Qu.:2.000   2:137  
 Median :167.59   Median :2.000   3: 38  
 Mean   :168.68   Mean   :2.583   4: 44  
 3rd Qu.:213.98   3rd Qu.:3.000   5: 16  
 Max.   :390.13   Max.   :6.000   6:  4  
> anova(lm(trypsin~grpf))
Analysis of Variance Table

Response: trypsin
           Df Sum Sq Mean Sq F value    Pr(>F)    
grpf        5 224103   44821  13.508 9.592e-12 ***
Residuals 265 879272    3318                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> anova(lm(trypsin~grp))
Analysis of Variance Table

Response: trypsin
           Df Sum Sq Mean Sq F value    Pr(>F)    
grp         1 206698  206698  62.009 8.451e-14 ***
Residuals 269 896677    3333                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> model1 <- lm(trypsin~grp)
> model2 <- lm(trypsin~grpf)
> anova(model1,model2)
Analysis of Variance Table

Model 1: trypsin ~ grp
Model 2: trypsin ~ grpf
  Res.Df    RSS Df Sum of Sq      F Pr(>F)
1    269 896677                           
2    265 879272  4     17405 1.3114 0.2661
> anova(lm(trypsin~grp+grpf))
Analysis of Variance Table

Response: trypsin
           Df Sum Sq Mean Sq F value    Pr(>F)    
grp         1 206698  206698 62.2959 7.833e-14 ***
grpf        4  17405    4351  1.3114    0.2661    
Residuals 265 879272    3318                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> xbar.trypsin <- tapply(trypsin,grpf,mean)
> stripchart(trypsin~grp, method="jitter",
+    jitter=.1, vertical=T, pch=20)
> lines(1:6,xbar.trypsin,type="b",pch=4,cex=2,lty=2)
> abline(lm(trypsin~grp))
> if (.make.epsf) dev.copy2eps(file="trypsin.ps")
> n <- c(32,137, 38,44,16,4)
> tryp.mean <- c(128,152,194,207,215,218)
> tryp.sd <-c(50.9,58.5,49.3,66.3,60,14)
> gr<-1:6
> anova(lm(tryp.mean~gr+factor(gr),weights=n))
Analysis of Variance Table

Response: tryp.mean
           Df Sum Sq Mean Sq F value Pr(>F)
gr          1 206698  206698     NaN    NaN
factor(gr)  4  17405    4351     NaN    NaN
Residuals   0      0     NaN               
Warning message:
In anova.lm(lm(tryp.mean ~ gr + factor(gr), weights = n)) :
  ANOVA F-tests on an essentially perfect fit are unreliable
> sum(tryp.sd^2*(n-1))
[1] 879271.9
> sum(n-1)
[1] 265
> sum(tryp.sd^2*(n-1))/sum(n-1)
[1] 3318.007
> 206698/3318.007 # F statistic for gr
[1] 62.29583
> 1-pf(206698/3318.007,1,265) # p-value
[1] 7.838175e-14
> 4351/3318.007   # F statistic for factor(gr)
[1] 1.311329
> 1-pf(4351/3318.007,4,265) # p-value
[1] 0.2660733
> attach(coking)
> anova(lm(time~width*temp))
Analysis of Variance Table

Response: time
           Df  Sum Sq Mean Sq F value    Pr(>F)    
width       2 123.143  61.572 222.102 3.312e-10 ***
temp        1  17.209  17.209  62.076 4.394e-06 ***
width:temp  2   5.701   2.851  10.283  0.002504 ** 
Residuals  12   3.327   0.277                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> tapply(time,list(width,temp),mean)
        1600     1900
4   3.066667 2.300000
8   7.166667 5.533333
12 10.800000 7.333333
> hellung
   glucose   conc diameter
1        1 631000     21.2
2        1 592000     21.5
3        1 563000     21.3
4        1 475000     21.0
5        1 461000     21.5
6        1 416000     21.3
7        1 385000     20.3
8        1 321000     22.7
9        1 302000     21.5
10       1 199000     22.2
11       1 195000     22.6
12       1 165000     22.5
13       1 137000     23.3
14       1 130000     23.5
15       1  90000     23.4
16       1  66000     23.6
17       1  55000     23.9
18       1  52000     23.6
19       1  46000     23.6
20       1  41000     24.5
21       1  38000     24.0
22       1  34000     25.4
23       1  32000     24.6
24       1  30000     24.9
25       1  28000     24.9
26       1  21000     24.4
27       1  20000     25.9
28       1  16000     25.5
29       1  14500     25.7
30       1  13500     25.9
31       1  11600     25.7
32       1  11000     26.3
33       2 630000     19.2
34       2 501000     19.5
35       2 332000     19.8
36       2 285000     21.0
37       2 201000     21.0
38       2 175000     21.0
39       2 129000     21.3
40       2 111000     20.5
41       2  78000     22.6
42       2  70000     22.7
43       2  69000     22.2
44       2  62000     22.7
45       2  35000     24.0
46       2  27000     23.6
47       2  24000     23.5
48       2  22000     23.3
49       2  14000     24.4
50       2  13000     24.3
51       2  11000     24.2
> summary(hellung)
    glucose           conc           diameter    
 Min.   :1.000   Min.   : 11000   Min.   :19.20  
 1st Qu.:1.000   1st Qu.: 27500   1st Qu.:21.40  
 Median :1.000   Median : 69000   Median :23.30  
 Mean   :1.373   Mean   :164326   Mean   :23.00  
 3rd Qu.:2.000   3rd Qu.:243000   3rd Qu.:24.35  
 Max.   :2.000   Max.   :631000   Max.   :26.30  
> hellung$glucose <- factor(hellung$glucose, labels=c("Yes","No"))
> summary(hellung)
 glucose       conc           diameter    
 Yes:32   Min.   : 11000   Min.   :19.20  
 No :19   1st Qu.: 27500   1st Qu.:21.40  
          Median : 69000   Median :23.30  
          Mean   :164326   Mean   :23.00  
          3rd Qu.:243000   3rd Qu.:24.35  
          Max.   :631000   Max.   :26.30  
> attach(hellung)
> plot(conc,diameter,pch=as.numeric(glucose))
> locator <- function(n)list(x=4e5,y=26)
> legend(locator(n=1),legend=c("glucose","no glucose"),pch=1:2)
> if (.make.epsf) dev.copy2eps(file="hellung-raw.ps")
> plot(conc,diameter,pch=as.numeric(glucose),log="x")
> if (.make.epsf) dev.copy2eps(file="hellung-logx.ps")
> plot(conc,diameter,pch=as.numeric(glucose),log="xy")
> tethym.gluc <- hellung[glucose=="Yes",]
> tethym.nogluc <- hellung[glucose=="No",]
> lm.nogluc <- lm(log10(diameter)~ log10(conc),data=tethym.nogluc)
> lm.gluc <- lm(log10(diameter)~ log10(conc),data=tethym.gluc)
> abline(lm.nogluc)
> abline(lm.gluc)
> if (.make.epsf) dev.copy2eps(file="hellung-loglog-lines.ps")
> summary(lm(log10(diameter)~ log10(conc), data=tethym.gluc))

Call:
lm(formula = log10(diameter) ~ log10(conc), data = tethym.gluc)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0267219 -0.0043361  0.0006891  0.0035489  0.0176077 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.63134    0.01345  121.29   <2e-16 ***
log10(conc) -0.05320    0.00272  -19.56   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.008779 on 30 degrees of freedom
Multiple R-squared:  0.9273,	Adjusted R-squared:  0.9248 
F-statistic: 382.5 on 1 and 30 DF,  p-value: < 2.2e-16

> summary(lm(log10(diameter)~ log10(conc), data=tethym.nogluc))

Call:
lm(formula = log10(diameter) ~ log10(conc), data = tethym.nogluc)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.021919 -0.004977  0.000056  0.005597  0.016625 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.634761   0.020209   80.89  < 2e-16 ***
log10(conc) -0.059677   0.004125  -14.47 5.48e-11 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.009532 on 17 degrees of freedom
Multiple R-squared:  0.9249,	Adjusted R-squared:  0.9205 
F-statistic: 209.3 on 1 and 17 DF,  p-value: 5.482e-11

> summary(lm(log10(diameter)~log10(conc)*glucose))

Call:
lm(formula = log10(diameter) ~ log10(conc) * glucose)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.026722 -0.004888  0.000056  0.003767  0.017608 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)            1.631344   0.013879 117.543   <2e-16 ***
log10(conc)           -0.053196   0.002807 -18.954   <2e-16 ***
glucoseNo              0.003418   0.023695   0.144    0.886    
log10(conc):glucoseNo -0.006480   0.004821  -1.344    0.185    
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.009059 on 47 degrees of freedom
Multiple R-squared:  0.9361,	Adjusted R-squared:  0.9321 
F-statistic: 229.6 on 3 and 47 DF,  p-value: < 2.2e-16

> summary(lm(log10(diameter)~log10(conc)+glucose))

Call:
lm(formula = log10(diameter) ~ log10(conc) + glucose)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.025243 -0.005733 -0.000195  0.004894  0.018914 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.642132   0.011417  143.83  < 2e-16 ***
log10(conc) -0.055393   0.002301  -24.07  < 2e-16 ***
glucoseNo   -0.028238   0.002647  -10.67 2.93e-14 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.009134 on 48 degrees of freedom
Multiple R-squared:  0.9337,	Adjusted R-squared:  0.9309 
F-statistic: 337.9 on 2 and 48 DF,  p-value: < 2.2e-16

> var.test(lm.gluc,lm.nogluc)

	F test to compare two variances

data:  lm.gluc and lm.nogluc
F = 0.84817, num df = 30, denom df = 17, p-value = 0.6731
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.3389901 1.9129940
sample estimates:
ratio of variances 
         0.8481674 

> anova(lm(log10(diameter)~ log10(conc)*glucose))
Analysis of Variance Table

Response: log10(diameter)
                    Df   Sum Sq  Mean Sq F value    Pr(>F)    
log10(conc)          1 0.046890 0.046890 571.436 < 2.2e-16 ***
glucose              1 0.009494 0.009494 115.698  2.89e-14 ***
log10(conc):glucose  1 0.000148 0.000148   1.807    0.1853    
Residuals           47 0.003857 0.000082                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> anova(lm(log10(diameter)~glucose+log10(conc)))
Analysis of Variance Table

Response: log10(diameter)
            Df   Sum Sq  Mean Sq F value    Pr(>F)    
glucose      1 0.008033 0.008033  96.278 4.696e-13 ***
log10(conc)  1 0.048351 0.048351 579.494 < 2.2e-16 ***
Residuals   48 0.004005 0.000083                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> anova(lm(log10(diameter)~log10(conc)+ glucose))
Analysis of Variance Table

Response: log10(diameter)
            Df   Sum Sq  Mean Sq F value    Pr(>F)    
log10(conc)  1 0.046890 0.046890  561.99 < 2.2e-16 ***
glucose      1 0.009494 0.009494  113.78 2.932e-14 ***
Residuals   48 0.004005 0.000083                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> t.test(log10(diameter)~glucose)

	Welch Two Sample t-test

data:  log10(diameter) by glucose
t = 2.7037, df = 36.31, p-value = 0.01037
alternative hypothesis: true difference in means between group Yes and group No is not equal to 0
95 percent confidence interval:
 0.006492194 0.045424241
sample estimates:
mean in group Yes  mean in group No 
         1.370046          1.344088 

> attach(thuesen)
> options(na.action="na.exclude")
> lm.velo <- lm(short.velocity~blood.glucose)
> opar <- par(mfrow=c(2,2), mex=0.6, mar=c(4,4,3,2)+.3)
> plot(lm.velo, which=1:4)
> par(opar)
> if (.make.epsf) dev.copy2eps(file="regr-diag.ps")
> opar <- par(mfrow=c(2,2), mex=0.6, mar=c(4,4,3,2)+.3)
> plot(rstandard(lm.velo))
> plot(rstudent(lm.velo))
> plot(dffits(lm.velo),type="l")
> matplot(dfbetas(lm.velo),type="l", col="black")
> lines(sqrt(cooks.distance(lm.velo)), lwd=2)
> par(opar)
> if (.make.epsf) dev.copy2eps(file="regr-diag2.ps")
> summary(lm(short.velocity~blood.glucose, subset=-13))

Call:
lm(formula = short.velocity ~ blood.glucose, subset = -13)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.31346 -0.11136 -0.01247  0.06043  0.40794 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    1.18929    0.11061  10.752 9.22e-10 ***
blood.glucose  0.01082    0.01029   1.052    0.305    
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.193 on 20 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.05241,	Adjusted R-squared:  0.005026 
F-statistic: 1.106 on 1 and 20 DF,  p-value: 0.3055

> cookd <- cooks.distance(lm(pemax~height+weight))
> cookd <- cookd/max(cookd)
> cook.colors <- gray(1-sqrt(cookd))
> plot(height,weight,bg=cook.colors,pch=21,cex=1.5)
> points(height,weight,pch=1,cex=1.5)
> if (.make.epsf) dev.copy2eps(file="cookd-cyst-fibr.ps")
> attach(secher)
> rst <- rstudent(lm(log10(bwt)~log10(ad)+log10(bpd)))
> range(rst)
[1] -3.707509  3.674050
> rst <- rst/3.71
> plot(ad,bpd,log="xy",bg=gray(1-abs(rst)),
+      pch=ifelse(rst>0,24,25), cex=1.5)
> if (.make.epsf) dev.copy2eps(file="rstudent-secher.ps")
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> no.yes <- c("No","Yes")
> smoking <- gl(2,1,8,no.yes)
> obesity <- gl(2,2,8,no.yes)
> snoring <- gl(2,4,8,no.yes)
> n.tot <- c(60,17,8,2,187,85,51,23)
> n.hyp <- c(5,2,1,0,35,13,15,8)
> data.frame(smoking,obesity,snoring,n.tot,n.hyp)
  smoking obesity snoring n.tot n.hyp
1      No      No      No    60     5
2     Yes      No      No    17     2
3      No     Yes      No     8     1
4     Yes     Yes      No     2     0
5      No      No     Yes   187    35
6     Yes      No     Yes    85    13
7      No     Yes     Yes    51    15
8     Yes     Yes     Yes    23     8
> expand.grid(smoking=no.yes, obesity=no.yes, snoring=no.yes)
  smoking obesity snoring
1      No      No      No
2     Yes      No      No
3      No     Yes      No
4     Yes     Yes      No
5      No      No     Yes
6     Yes      No     Yes
7      No     Yes     Yes
8     Yes     Yes     Yes
> hyp.tbl <- cbind(n.hyp,n.tot-n.hyp)
> hyp.tbl
     n.hyp    
[1,]     5  55
[2,]     2  15
[3,]     1   7
[4,]     0   2
[5,]    35 152
[6,]    13  72
[7,]    15  36
[8,]     8  15
> glm(hyp.tbl~smoking+obesity+snoring,family=binomial("logit"))

Call:  glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial("logit"))

Coefficients:
(Intercept)   smokingYes   obesityYes   snoringYes  
   -2.37766     -0.06777      0.69531      0.87194  

Degrees of Freedom: 7 Total (i.e. Null);  4 Residual
Null Deviance:	    14.13 
Residual Deviance: 1.618 	AIC: 34.54
> glm(hyp.tbl~smoking+obesity+snoring,binomial)

Call:  glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial)

Coefficients:
(Intercept)   smokingYes   obesityYes   snoringYes  
   -2.37766     -0.06777      0.69531      0.87194  

Degrees of Freedom: 7 Total (i.e. Null);  4 Residual
Null Deviance:	    14.13 
Residual Deviance: 1.618 	AIC: 34.54
> prop.hyp <- n.hyp/n.tot
> glm.hyp <- glm(prop.hyp~smoking+obesity+snoring,
+                binomial,weights=n.tot)
> glm(hyp.tbl~smoking+obesity+snoring, binomial("logit"))

Call:  glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial("logit"))

Coefficients:
(Intercept)   smokingYes   obesityYes   snoringYes  
   -2.37766     -0.06777      0.69531      0.87194  

Degrees of Freedom: 7 Total (i.e. Null);  4 Residual
Null Deviance:	    14.13 
Residual Deviance: 1.618 	AIC: 34.54
> glm.hyp <- glm(hyp.tbl~smoking+obesity+snoring,binomial)
> summary(glm.hyp)

Call:
glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -2.37766    0.38018  -6.254    4e-10 ***
smokingYes  -0.06777    0.27812  -0.244   0.8075    
obesityYes   0.69531    0.28509   2.439   0.0147 *  
snoringYes   0.87194    0.39757   2.193   0.0283 *  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 14.1259  on 7  degrees of freedom
Residual deviance:  1.6184  on 4  degrees of freedom
AIC: 34.537

Number of Fisher Scoring iterations: 4

> summary(glm(formula = hyp.tbl ~ smoking + obesity + snoring, family =
+ binomial))

Call:
glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -2.37766    0.38018  -6.254    4e-10 ***
smokingYes  -0.06777    0.27812  -0.244   0.8075    
obesityYes   0.69531    0.28509   2.439   0.0147 *  
snoringYes   0.87194    0.39757   2.193   0.0283 *  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 14.1259  on 7  degrees of freedom
Residual deviance:  1.6184  on 4  degrees of freedom
AIC: 34.537

Number of Fisher Scoring iterations: 4

> glm.hyp <- glm(hyp.tbl~obesity+snoring,binomial)
> summary(glm.hyp)

Call:
glm(formula = hyp.tbl ~ obesity + snoring, family = binomial)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -2.3921     0.3757  -6.366 1.94e-10 ***
obesityYes    0.6954     0.2851   2.440   0.0147 *  
snoringYes    0.8655     0.3967   2.182   0.0291 *  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 14.1259  on 7  degrees of freedom
Residual deviance:  1.6781  on 5  degrees of freedom
AIC: 32.597

Number of Fisher Scoring iterations: 4

> glm.hyp <- glm(hyp.tbl~smoking+obesity+snoring,binomial)
> anova(glm.hyp, test="Chisq")
Analysis of Deviance Table

Model: binomial, link: logit

Response: hyp.tbl

Terms added sequentially (first to last)


        Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
NULL                        7    14.1259            
smoking  1   0.0022         6    14.1237 0.962724   
obesity  1   6.8274         5     7.2963 0.008977 **
snoring  1   5.6779         4     1.6184 0.017179 * 
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> glm.hyp <- glm(hyp.tbl~snoring+obesity+smoking,binomial)
> anova(glm.hyp, test="Chisq")
Analysis of Deviance Table

Model: binomial, link: logit

Response: hyp.tbl

Terms added sequentially (first to last)


        Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
NULL                        7    14.1259            
snoring  1   6.7887         6     7.3372 0.009174 **
obesity  1   5.6591         5     1.6781 0.017365 * 
smoking  1   0.0597         4     1.6184 0.806938   
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> glm.hyp <- glm(hyp.tbl~obesity+snoring,binomial)
> anova(glm.hyp, test="Chisq")
Analysis of Deviance Table

Model: binomial, link: logit

Response: hyp.tbl

Terms added sequentially (first to last)


        Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
NULL                        7    14.1259            
obesity  1   6.8260         6     7.2999 0.008984 **
snoring  1   5.6218         5     1.6781 0.017738 * 
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> drop1(glm.hyp, test="Chisq")
Single term deletions

Model:
hyp.tbl ~ obesity + snoring
        Df Deviance    AIC    LRT Pr(>Chi)  
<none>       1.6781 32.597                  
obesity  1   7.3372 36.256 5.6591  0.01737 *
snoring  1   7.2999 36.219 5.6218  0.01774 *
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> caesar.shoe
    <4  4 4.5  5 5.5  6+
Yes  5  7   6  7   8  10
No  17 28  36 41  46 140
> shoe.score <- 1:6
> shoe.score
[1] 1 2 3 4 5 6
> summary(glm(t(caesar.shoe)~shoe.score,binomial))

Call:
glm(formula = t(caesar.shoe) ~ shoe.score, family = binomial)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)   
(Intercept) -0.87058    0.40506  -2.149  0.03161 * 
shoe.score  -0.25971    0.09361  -2.774  0.00553 **
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 9.3442  on 5  degrees of freedom
Residual deviance: 1.7845  on 4  degrees of freedom
AIC: 27.616

Number of Fisher Scoring iterations: 4

> anova(glm(t(caesar.shoe)~shoe.score,binomial))
Analysis of Deviance Table

Model: binomial, link: logit

Response: t(caesar.shoe)

Terms added sequentially (first to last)


           Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
NULL                           5     9.3442            
shoe.score  1   7.5597         4     1.7845 0.005969 **
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> caesar.shoe.yes <- caesar.shoe["Yes",]
> caesar.shoe.no <- caesar.shoe["No",]
> caesar.shoe.total <- caesar.shoe.yes+caesar.shoe.no
> prop.trend.test(caesar.shoe.yes,caesar.shoe.total)

	Chi-squared Test for Trend in Proportions

data:  caesar.shoe.yes out of caesar.shoe.total ,
 using scores: 1 2 3 4 5 6
X-squared = 8.0237, df = 1, p-value = 0.004617

> prop.test(caesar.shoe.yes,caesar.shoe.total)

	6-sample test for equality of proportions without
	continuity correction

data:  caesar.shoe.yes out of caesar.shoe.total
X-squared = 9.2874, df = 5, p-value = 0.09814
alternative hypothesis: two.sided
sample estimates:
    prop 1     prop 2     prop 3     prop 4     prop 5     prop 6 
0.22727273 0.20000000 0.14285714 0.14583333 0.14814815 0.06666667 

Warning message:
In prop.test(caesar.shoe.yes, caesar.shoe.total) :
  Chi-squared approximation may be incorrect
> confint(glm.hyp)
Waiting for profiling to be done...
                 2.5 %    97.5 %
(Intercept) -3.2101098 -1.718094
obesityYes   0.1254244  1.246789
snoringYes   0.1410076  1.715763
> confint.default(glm.hyp)
                  2.5 %    97.5 %
(Intercept) -3.12852108 -1.655631
obesityYes   0.13670388  1.254134
snoringYes   0.08801498  1.642902
> library(MASS)
> plot(profile(glm.hyp))
> if (.make.epsf) dev.copy2eps(file="profile-hyp.ps")
> exp(cbind(OR=coef(glm.hyp), confint(glm.hyp)))
Waiting for profiling to be done...
                    OR      2.5 %    97.5 %
(Intercept) 0.09143963 0.04035218 0.1794079
obesityYes  2.00454846 1.13362951 3.4791517
snoringYes  2.37609483 1.15143343 5.5609161
> juul$menarche <- factor(juul$menarche, labels=c("No","Yes"))
> juul$tanner <- factor(juul$tanner)
> juul.girl <- subset(juul,age>8 & age<20 &
+                     complete.cases(menarche))
> attach(juul.girl)
The following object is masked from package:MASS:

    menarche

> summary(glm(menarche~age,binomial))

Call:
glm(formula = menarche ~ age, family = binomial)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -20.0132     2.0284  -9.867   <2e-16 ***
age           1.5173     0.1544   9.829   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 719.39  on 518  degrees of freedom
Residual deviance: 200.66  on 517  degrees of freedom
AIC: 204.66

Number of Fisher Scoring iterations: 7

> summary(glm(menarche~age+tanner,binomial))

Call:
glm(formula = menarche ~ age + tanner, family = binomial)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -13.7758     2.7630  -4.986 6.17e-07 ***
age           0.8603     0.2311   3.723 0.000197 ***
tanner2      -0.5211     1.4846  -0.351 0.725609    
tanner3       0.8264     1.2377   0.668 0.504313    
tanner4       2.5645     1.2172   2.107 0.035132 *  
tanner5       5.1897     1.4140   3.670 0.000242 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 604.19  on 435  degrees of freedom
Residual deviance: 106.60  on 430  degrees of freedom
  (83 observations deleted due to missingness)
AIC: 118.6

Number of Fisher Scoring iterations: 8

> drop1(glm(menarche~age+tanner,binomial),test="Chisq")
Single term deletions

Model:
menarche ~ age + tanner
       Df Deviance    AIC    LRT  Pr(>Chi)    
<none>      106.60 118.60                     
age     1   124.50 134.50 17.901 2.327e-05 ***
tanner  4   161.88 165.88 55.282 2.835e-11 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> predict(glm.hyp)
         1          2          3          4          5          6 
-2.3920763 -2.3920763 -1.6966575 -1.6966575 -1.5266180 -1.5266180 
         7          8 
-0.8311991 -0.8311991 
> predict(glm.hyp, type="response")
         1          2          3          4          5          6 
0.08377892 0.08377892 0.15490233 0.15490233 0.17848906 0.17848906 
         7          8 
0.30339158 0.30339158 
> glm.menarche <- glm(menarche~age, binomial)
> Age <- seq(8,20,.1)
> newages <- data.frame(age=Age)
> predicted.probability <- predict(glm.menarche,
+                                  newages,type="resp")
> plot(predicted.probability ~ Age, type="l")
> if (.make.epsf) dev.copy2eps(file="menarche-fit.ps")
> fitted(glm.hyp)
         1          2          3          4          5          6 
0.08377892 0.08377892 0.15490233 0.15490233 0.17848906 0.17848906 
         7          8 
0.30339158 0.30339158 
> prop.hyp
[1] 0.08333333 0.11764706 0.12500000 0.00000000 0.18716578
[6] 0.15294118 0.29411765 0.34782609
> fitted(glm.hyp)*n.tot
         1          2          3          4          5          6 
 5.0267351  1.4242416  1.2392186  0.3098047 33.3774535 15.1715698 
         7          8 
15.4729705  6.9780063 
> data.frame(fit=fitted(glm.hyp)*n.tot,n.hyp,n.tot)
         fit n.hyp n.tot
1  5.0267351     5    60
2  1.4242416     2    17
3  1.2392186     1     8
4  0.3098047     0     2
5 33.3774535    35   187
6 15.1715698    13    85
7 15.4729705    15    51
8  6.9780063     8    23
> age.group <- cut(age,c(8,10,12,13,14,15,16,18,20))
> tb <- table(age.group,menarche)
> tb
         menarche
age.group  No Yes
  (8,10]  100   0
  (10,12]  97   4
  (12,13]  32  21
  (13,14]  22  20
  (14,15]   5  36
  (15,16]   0  31
  (16,18]   0 105
  (18,20]   0  46
> rel.freq <- prop.table(tb,1)[,2]
> rel.freq
    (8,10]    (10,12]    (12,13]    (13,14]    (14,15]    (15,16] 
0.00000000 0.03960396 0.39622642 0.47619048 0.87804878 1.00000000 
   (16,18]    (18,20] 
1.00000000 1.00000000 
> points(rel.freq ~ c(9,11,12.5,13.5,14.5,15.5,17,19),pch=5)
> if (.make.epsf) dev.copy2eps(file="menarche-fit+obs.ps")
> age.gr <- cut(age,c(8,12,13,14,20))
> summary(glm(menarche~age+age.gr,binomial))

Call:
glm(formula = menarche ~ age + age.gr, family = binomial)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)   -21.5683     5.0645  -4.259 2.06e-05 ***
age             1.6250     0.4416   3.680 0.000233 ***
age.gr(12,13]   0.7296     0.7856   0.929 0.353024    
age.gr(13,14]  -0.5219     1.1184  -0.467 0.640765    
age.gr(14,20]   0.2751     1.6065   0.171 0.864053    
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 719.39  on 518  degrees of freedom
Residual deviance: 192.61  on 514  degrees of freedom
AIC: 202.61

Number of Fisher Scoring iterations: 8

> anova(glm(menarche~age+age.gr,binomial))
Analysis of Deviance Table

Model: binomial, link: logit

Response: menarche

Terms added sequentially (first to last)


       Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
NULL                     518     719.39             
age     1   518.73       517     200.66  < 2e-16 ***
age.gr  3     8.06       514     192.61  0.04483 *  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> 1-pchisq(8.058,3)
[1] 0.04482811
> anova(glm(menarche~age+I(age^2)+I(age^3)+age.gr,binomial))
Analysis of Deviance Table

Model: binomial, link: logit

Response: menarche

Terms added sequentially (first to last)


         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
NULL                       518     719.39              
age       1   518.73       517     200.66 < 2.2e-16 ***
I(age^2)  1     0.05       516     200.61  0.819906    
I(age^3)  1     8.82       515     191.80  0.002984 ** 
age.gr    3     3.34       512     188.46  0.342745    
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
Warning messages:
1: glm.fit: fitted probabilities numerically 0 or 1 occurred 
2: glm.fit: fitted probabilities numerically 0 or 1 occurred 
> glm.menarche <- glm(menarche~age+I(age^2)+I(age^3), binomial)
Warning message:
glm.fit: fitted probabilities numerically 0 or 1 occurred 
> predicted.probability <-
+     predict(glm.menarche, newages, type="resp")
> plot(predicted.probability ~ Age, type="l")
> points(rel.freq~c(9,11,12.5,13.5,14.5,15.5,17,19), pch=5)
> if (.make.epsf) dev.copy2eps(file="menarche-cubic.ps")
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> library(survival)

Attaching package: `survival'

The following object is masked from `package:ISwR':

    lung

> attach(melanom)
> names(melanom)
[1] "no"     "status" "days"   "ulc"    "thick"  "sex"   
> Surv(days, status==1)
  [1]   10+   30+   35+   99+  185   204   210   232   232+  279 
 [11]  295   355+  386   426   469   493+  529   621   629   659 
 [21]  667   718   752   779   793   817   826+  833   858   869 
 [31]  872   967   977   982  1041  1055  1062  1075  1156  1228 
 [41] 1252  1271  1312  1427+ 1435  1499+ 1506  1508+ 1510+ 1512+
 [51] 1516  1525+ 1542+ 1548  1557+ 1560  1563+ 1584  1605+ 1621 
 [61] 1627+ 1634+ 1641+ 1641+ 1648+ 1652+ 1654+ 1654+ 1667  1678+
 [71] 1685+ 1690  1710+ 1710+ 1726  1745+ 1762+ 1779+ 1787+ 1787+
 [81] 1793+ 1804+ 1812+ 1836+ 1839+ 1839+ 1854+ 1856+ 1860+ 1864+
 [91] 1899+ 1914+ 1919+ 1920+ 1927+ 1933  1942+ 1955+ 1956+ 1958+
[101] 1963+ 1970+ 2005+ 2007+ 2011+ 2024+ 2028+ 2038+ 2056+ 2059+
[111] 2061  2062  2075+ 2085+ 2102+ 2103  2104+ 2108  2112+ 2150+
[121] 2156+ 2165+ 2209+ 2227+ 2227+ 2256  2264+ 2339+ 2361+ 2387+
[131] 2388  2403+ 2426+ 2426+ 2431+ 2460+ 2467  2492+ 2493+ 2521+
[141] 2542+ 2559+ 2565  2570+ 2660+ 2666+ 2676+ 2738+ 2782  2787+
[151] 2984+ 3032+ 3040+ 3042  3067+ 3079+ 3101+ 3144+ 3152+ 3154+
[161] 3180+ 3182+ 3185+ 3199+ 3228+ 3229+ 3278+ 3297+ 3328+ 3330+
[171] 3338  3383+ 3384+ 3385+ 3388+ 3402+ 3441+ 3458+ 3459+ 3459+
[181] 3476+ 3523+ 3667+ 3695+ 3695+ 3776+ 3776+ 3830+ 3856+ 3872+
[191] 3909+ 3968+ 4001+ 4103+ 4119+ 4124+ 4207+ 4310+ 4390+ 4479+
[201] 4492+ 4668+ 4688+ 4926+ 5565+
> survfit(Surv(days,status==1)~1)
Call: survfit(formula = Surv(days, status == 1) ~ 1)

       n events median 0.95LCL 0.95UCL
[1,] 205     57     NA      NA      NA
> surv.all <- survfit(Surv(days,status==1)~1)
> summary(surv.all)
Call: survfit(formula = Surv(days, status == 1) ~ 1)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
  185    201       1    0.995 0.00496        0.985        1.000
  204    200       1    0.990 0.00700        0.976        1.000
  210    199       1    0.985 0.00855        0.968        1.000
  232    198       1    0.980 0.00985        0.961        1.000
  279    196       1    0.975 0.01100        0.954        0.997
  295    195       1    0.970 0.01202        0.947        0.994
  386    193       1    0.965 0.01297        0.940        0.991
  426    192       1    0.960 0.01384        0.933        0.988
  469    191       1    0.955 0.01465        0.927        0.984
  529    189       1    0.950 0.01542        0.920        0.981
  621    188       1    0.945 0.01615        0.914        0.977
  629    187       1    0.940 0.01683        0.907        0.973
  659    186       1    0.935 0.01748        0.901        0.970
  667    185       1    0.930 0.01811        0.895        0.966
  718    184       1    0.925 0.01870        0.889        0.962
  752    183       1    0.920 0.01927        0.883        0.958
  779    182       1    0.915 0.01981        0.877        0.954
  793    181       1    0.910 0.02034        0.871        0.950
  817    180       1    0.904 0.02084        0.865        0.946
  833    178       1    0.899 0.02134        0.859        0.942
  858    177       1    0.894 0.02181        0.853        0.938
  869    176       1    0.889 0.02227        0.847        0.934
  872    175       1    0.884 0.02272        0.841        0.930
  967    174       1    0.879 0.02315        0.835        0.926
  977    173       1    0.874 0.02357        0.829        0.921
  982    172       1    0.869 0.02397        0.823        0.917
 1041    171       1    0.864 0.02436        0.817        0.913
 1055    170       1    0.859 0.02474        0.812        0.909
 1062    169       1    0.854 0.02511        0.806        0.904
 1075    168       1    0.849 0.02547        0.800        0.900
 1156    167       1    0.844 0.02582        0.794        0.896
 1228    166       1    0.838 0.02616        0.789        0.891
 1252    165       1    0.833 0.02649        0.783        0.887
 1271    164       1    0.828 0.02681        0.777        0.883
 1312    163       1    0.823 0.02713        0.772        0.878
 1435    161       1    0.818 0.02744        0.766        0.874
 1506    159       1    0.813 0.02774        0.760        0.869
 1516    155       1    0.808 0.02805        0.755        0.865
 1548    152       1    0.802 0.02837        0.749        0.860
 1560    150       1    0.797 0.02868        0.743        0.855
 1584    148       1    0.792 0.02899        0.737        0.851
 1621    146       1    0.786 0.02929        0.731        0.846
 1667    137       1    0.780 0.02963        0.725        0.841
 1690    134       1    0.775 0.02998        0.718        0.836
 1726    131       1    0.769 0.03033        0.712        0.831
 1933    110       1    0.762 0.03085        0.704        0.825
 2061     95       1    0.754 0.03155        0.694        0.818
 2062     94       1    0.746 0.03221        0.685        0.812
 2103     90       1    0.737 0.03290        0.676        0.805
 2108     88       1    0.729 0.03358        0.666        0.798
 2256     80       1    0.720 0.03438        0.656        0.791
 2388     75       1    0.710 0.03523        0.645        0.783
 2467     69       1    0.700 0.03619        0.633        0.775
 2565     63       1    0.689 0.03729        0.620        0.766
 2782     57       1    0.677 0.03854        0.605        0.757
 3042     52       1    0.664 0.03994        0.590        0.747
 3338     35       1    0.645 0.04307        0.566        0.735
> plot(surv.all)
> if (.make.epsf) dev.copy2eps(file="surv-all.ps")
> surv.bysex <- survfit(Surv(days,status==1)~sex)
> plot(surv.bysex)
> if (.make.epsf) dev.copy2eps(file="surv-bysex.ps")
> plot(surv.bysex, conf.int=T, col=c("black","gray"))
> survdiff(Surv(days,status==1)~sex)
Call:
survdiff(formula = Surv(days, status == 1) ~ sex)

        N Observed Expected (O-E)^2/E (O-E)^2/V
sex=1 126       28     37.1      2.25      6.47
sex=2  79       29     19.9      4.21      6.47

 Chisq= 6.5  on 1 degrees of freedom, p= 0.01 
> survdiff(Surv(days,status==1)~sex+strata(ulc))
Call:
survdiff(formula = Surv(days, status == 1) ~ sex + strata(ulc))

        N Observed Expected (O-E)^2/E (O-E)^2/V
sex=1 126       28     34.7      1.28      3.31
sex=2  79       29     22.3      1.99      3.31

 Chisq= 3.3  on 1 degrees of freedom, p= 0.07 
> summary(coxph(Surv(days,status==1)~sex))
Call:
coxph(formula = Surv(days, status == 1) ~ sex)

  n= 205, number of events= 57 

      coef exp(coef) se(coef)     z Pr(>|z|)  
sex 0.6622    1.9390   0.2651 2.498   0.0125 *
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

    exp(coef) exp(-coef) lower .95 upper .95
sex     1.939     0.5157     1.153      3.26

Concordance= 0.59  (se = 0.034 )
Likelihood ratio test= 6.15  on 1 df,   p=0.01
Wald test            = 6.24  on 1 df,   p=0.01
Score (logrank) test = 6.47  on 1 df,   p=0.01

> summary(coxph(Surv(days,status==1)~sex+log(thick)+strata(ulc)))
Call:
coxph(formula = Surv(days, status == 1) ~ sex + log(thick) + 
    strata(ulc))

  n= 205, number of events= 57 

             coef exp(coef) se(coef)     z Pr(>|z|)   
sex        0.3600    1.4333   0.2702 1.332   0.1828   
log(thick) 0.5599    1.7505   0.1784 3.139   0.0017 **
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

           exp(coef) exp(-coef) lower .95 upper .95
sex            1.433     0.6977     0.844     2.434
log(thick)     1.750     0.5713     1.234     2.483

Concordance= 0.673  (se = 0.039 )
Likelihood ratio test= 13.3  on 2 df,   p=0.001
Wald test            = 12.88  on 2 df,   p=0.002
Score (logrank) test = 12.98  on 2 df,   p=0.002

> plot(survfit(coxph(Surv(days,status==1)~
+              log(thick)+sex+strata(ulc))))
> if (.make.epsf) dev.copy2eps(file="surv-cox.ps")
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> names(eba1977)
[1] "city"  "age"   "pop"   "cases"
> attach(eba1977)
> fit <- glm(cases~city+age+offset(log(pop)), family=poisson)
> summary(fit)

Call:
glm(formula = cases ~ city + age + offset(log(pop)), family = poisson)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -5.6321     0.2003 -28.125  < 2e-16 ***
cityHorsens  -0.3301     0.1815  -1.818   0.0690 .  
cityKolding  -0.3715     0.1878  -1.978   0.0479 *  
cityVejle    -0.2723     0.1879  -1.450   0.1472    
age55-59      1.1010     0.2483   4.434 9.23e-06 ***
age60-64      1.5186     0.2316   6.556 5.53e-11 ***
age65-69      1.7677     0.2294   7.704 1.31e-14 ***
age70-74      1.8569     0.2353   7.891 3.00e-15 ***
age75+        1.4197     0.2503   5.672 1.41e-08 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 129.908  on 23  degrees of freedom
Residual deviance:  23.447  on 15  degrees of freedom
AIC: 137.84

Number of Fisher Scoring iterations: 5

> min(fitted(fit))
[1] 6.731286
> pchisq(deviance(fit), df.residual(fit), lower=F)
[1] 0.07509017
> pchisq(23.45, 15, lower=F)
[1] 0.07504166
> drop1(fit, test="Chisq")
Single term deletions

Model:
cases ~ city + age + offset(log(pop))
       Df Deviance    AIC     LRT Pr(>Chi)    
<none>      23.447 137.84                     
city    3   28.307 136.69   4.859   0.1824    
age     5  126.515 230.90 103.068   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> fit2 <- glm(cases~(city=="Fredericia")+age+offset(log(pop)),
+                  family=poisson)
> anova(fit, fit2, test="Chisq")
Analysis of Deviance Table

Model 1: cases ~ city + age + offset(log(pop))
Model 2: cases ~ (city == "Fredericia") + age + offset(log(pop))
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1        15     23.448                     
2        17     23.700 -2  -0.2526   0.8814
> drop1(fit2,  test="Chisq")
Single term deletions

Model:
cases ~ (city == "Fredericia") + age + offset(log(pop))
                     Df Deviance    AIC     LRT Pr(>Chi)    
<none>                    23.700 134.09                     
city == "Fredericia"  1   28.307 136.69   4.606  0.03185 *  
age                   5  127.117 227.50 103.417  < 2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> summary(fit2)

Call:
glm(formula = cases ~ (city == "Fredericia") + age + offset(log(pop)), 
    family = poisson)

Coefficients:
                         Estimate Std. Error z value Pr(>|z|)    
(Intercept)               -5.9589     0.1809 -32.947  < 2e-16 ***
city == "Fredericia"TRUE   0.3257     0.1481   2.200   0.0278 *  
age55-59                   1.1013     0.2483   4.436 9.17e-06 ***
age60-64                   1.5203     0.2316   6.564 5.23e-11 ***
age65-69                   1.7687     0.2294   7.712 1.24e-14 ***
age70-74                   1.8592     0.2352   7.904 2.71e-15 ***
age75+                     1.4212     0.2502   5.680 1.34e-08 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 129.91  on 23  degrees of freedom
Residual deviance:  23.70  on 17  degrees of freedom
AIC: 134.09

Number of Fisher Scoring iterations: 4

> cf <- coefficients(summary(fit2))
> est <- cf[,1]
> s.e. <- cf[,2]
> rr <- exp(cbind(est, est - s.e.*qnorm(.975), est
+                      + s.e.*qnorm(.975) ))
> colnames(rr) <- c("RateRatio", "CI.lo","CI.hi")
> rr
                           RateRatio       CI.lo        CI.hi
(Intercept)              0.002582626 0.001811788  0.003681423
city == "Fredericia"TRUE 1.384992752 1.036131057  1.851314957
age55-59                 3.008134852 1.849135187  4.893571521
age60-64                 4.573665854 2.904833526  7.201245496
age65-69                 5.863391064 3.740395488  9.191368903
age70-74                 6.418715646 4.047748963 10.178474731
age75+                   4.142034525 2.536571645  6.763637070
> exp(cbind(coef(fit2), confint(fit2)))
Waiting for profiling to be done...
                                           2.5 %       97.5 %
(Intercept)              0.002582626 0.001776585  0.003617461
city == "Fredericia"TRUE 1.384992752 1.029364554  1.841214351
age55-59                 3.008134852 1.843566451  4.902377896
age60-64                 4.573665854 2.912314202  7.248205161
age65-69                 5.863391064 3.752718512  9.256967983
age70-74                 6.418715646 4.053251339 10.234350656
age75+                   4.142034525 2.527103521  6.771889935
> head(nickel.expand)
  agr  ygr  id icd exposure      dob  age1st   agein ageout lung
1  20 1931 325   0        0 1910.500 14.0737 23.7465     25    6
2  20 1931 273   0        0 1909.500 14.6913 24.7465     25    6
3  20 1931 110   0        0 1909.247 14.0302 24.9999     25    6
4  20 1931 574   0        0 1909.729 14.0356 24.5177     25    6
5  20 1931 213   0        0 1910.129 14.2018 24.1177     25    6
6  20 1931 546   0        0 1909.500 14.4945 24.7465     25    6
  nasal other
1     0  3116
2     0  3116
3     0  3116
4     0  3116
5     0  3116
6     0  3116
> subset(nickel.expand, id==325)
    agr  ygr  id icd exposure    dob  age1st   agein  ageout lung
1    20 1931 325   0        0 1910.5 14.0737 23.7465 25.0000    6
13   25 1931 325   0        0 1910.5 14.0737 25.0000 30.0000   14
172  30 1936 325   0        0 1910.5 14.0737 30.0000 35.0000   30
391  35 1941 325   0        0 1910.5 14.0737 35.0000 40.0000   81
728  40 1946 325 434        0 1910.5 14.0737 40.0000 43.0343  236
    nasal other
1       0  3116
13      0  3024
172     1  3188
391     1  3549
728     3  3643
> nickel.expand <- within(nickel.expand,
+      lung.cancer <- as.numeric(icd %in% c(162,163)))
> attach(nickel.expand)
The following object is masked from package:ISwR:

    lung

> pyr <- tapply(ageout-agein,list(ygr,agr), sum)
> print(round(pyr), na.print="-")
     20 25  30  35  40  45  50  55  60  65  70  75 80
1931  3 86 268 446 446 431 455 323 159  23   4   -  -
1936  -  - 100 327 504 512 503 472 314 130  20   5  -
1941  -  -   0 105 336 481 482 445 368 235  80  14  3
1946  -  -   -   - 102 335 461 404 369 263 157  43 10
1951  -  -   -   -   -  95 299 415 334 277 181  92 31
1956  -  -   -   -   -   -  89 252 364 257 181 101 52
1961  -  -   -   -   -   -   -  71 221 284 150 104 44
1966  -  -   -   -   -   -   -   -  66 168 208  93 51
1971  -  -   -   -   -   -   -   -   -  57 133 131 54
1976  -  -   -   -   -   -   -   -   -   -  31  68 53
> count <- tapply(lung.cancer, list(ygr, agr), sum)
> print(count, na.print="-")
     20 25 30 35 40 45 50 55 60 65 70 75 80
1931  0  0  0  0  0  4  2  2  2  0  0  -  -
1936  -  -  0  0  2  3  4  6  5  1  0  0  -
1941  -  -  0  0  0  3  7  5  6  3  2  0  0
1946  -  -  -  -  0  0  8  7  6  2  2  0  0
1951  -  -  -  -  -  0  3  3  9  6  1  0  0
1956  -  -  -  -  -  -  0  4  3  6  1  2  0
1961  -  -  -  -  -  -  -  0  1  1  3  2  1
1966  -  -  -  -  -  -  -  -  2  0  0  1  0
1971  -  -  -  -  -  -  -  -  -  0  0  2  2
1976  -  -  -  -  -  -  -  -  -  -  0  1  1
> print(round(count/pyr*1000, 1), na.print="-")
     20 25 30 35 40  45   50   55   60   65   70   75   80
1931  0  0  0  0  0 9.3  4.4  6.2 12.6  0.0  0.0    -    -
1936  -  -  0  0  4 5.9  7.9 12.7 15.9  7.7  0.0  0.0    -
1941  -  -  0  0  0 6.2 14.5 11.2 16.3 12.8 25.0  0.0  0.0
1946  -  -  -  -  0 0.0 17.4 17.3 16.3  7.6 12.8  0.0  0.0
1951  -  -  -  -  - 0.0 10.0  7.2 27.0 21.7  5.5  0.0  0.0
1956  -  -  -  -  -   -  0.0 15.9  8.2 23.4  5.5 19.8  0.0
1961  -  -  -  -  -   -    -  0.0  4.5  3.5 19.9 19.3 22.8
1966  -  -  -  -  -   -    -    - 30.1  0.0  0.0 10.7  0.0
1971  -  -  -  -  -   -    -    -    -  0.0  0.0 15.2 36.8
1976  -  -  -  -  -   -    -    -    -    -  0.0 14.6 19.0
> expect.count <- tapply(lung/1e6*(ageout-agein),
+                        list(ygr,agr), sum)
> print(round(expect.count, 1), na.print="-")
     20 25 30 35  40  45  50  55  60  65  70  75  80
1931  0  0  0  0 0.1 0.1 0.2 0.2 0.1 0.0 0.0   -   -
1936  -  -  0  0 0.1 0.1 0.2 0.3 0.2 0.1 0.0 0.0   -
1941  -  -  0  0 0.1 0.2 0.3 0.4 0.4 0.2 0.1 0.0 0.0
1946  -  -  -  - 0.0 0.2 0.4 0.5 0.6 0.5 0.2 0.0 0.0
1951  -  -  -  -   - 0.1 0.4 0.8 0.9 0.8 0.5 0.2 0.0
1956  -  -  -  -   -   - 0.1 0.6 1.2 1.0 0.7 0.3 0.1
1961  -  -  -  -   -   -   - 0.2 0.8 1.4 0.7 0.5 0.1
1966  -  -  -  -   -   -   -   - 0.2 0.9 1.3 0.6 0.2
1971  -  -  -  -   -   -   -   -   - 0.3 0.9 1.0 0.3
1976  -  -  -  -   -   -   -   -   -   - 0.2 0.6 0.4
> expect.tot <- sum(lung/1e6*(ageout-agein))
> expect.tot
[1] 24.19893
> count.tot <- sum(lung.cancer)
> count.tot
[1] 137
> count.tot/expect.tot
[1] 5.661408
> fit <- glm(lung.cancer ~ 1, poisson,
+            offset = log((ageout-agein)*lung/1e6))
> summary(fit)

Call:
glm(formula = lung.cancer ~ 1, family = poisson, offset = log((ageout - 
    agein) * lung/1e+06))

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  1.73367    0.08544   20.29   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 1175.6  on 3723  degrees of freedom
Residual deviance: 1175.6  on 3723  degrees of freedom
AIC: 1451.6

Number of Fisher Scoring iterations: 7

> exp(coef(fit))
(Intercept) 
   5.661408 
> tapply(lung.cancer, agr, sum)
20 25 30 35 40 45 50 55 60 65 70 75 80 
 0  0  0  0  2 10 24 27 34 19  9  8  4 
> tapply(lung.cancer, ygr, sum)
1931 1936 1941 1946 1951 1956 1961 1966 1971 1976 
  10   21   26   25   22   16    8    3    4    2 
> detach()
> nickel.expand <- within(nickel.expand,{
+     A <- factor(agr)
+     Y <- factor(ygr)
+     lv <- levels(A)
+     lv[1:6] <- "< 50"
+     lv[11:13] <- "70+"
+     levels(A) <- lv
+     lv <- levels(Y)
+     lv[7:10] <- "1961ff"
+     levels(Y) <- lv
+     rm(lv)
+ })
> attach(nickel.expand)
The following object is masked from package:ISwR:

    lung

> fit <- glm(lung.cancer ~ A + Y, poisson,
+            offset=log((ageout-agein)*lung/1e6))
> drop1(fit, test="Chisq")
Single term deletions

Model:
lung.cancer ~ A + Y
       Df Deviance    AIC    LRT Pr(>Chi)    
<none>      1069.7 1367.7                    
A       5   1073.8 1361.8  4.082   0.5376    
Y       6   1118.5 1404.5 48.770 8.29e-09 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> fit <- glm(lung.cancer ~ Y - 1, poisson,
+            offset=log((ageout-agein)*lung/1e6))
> summary(fit)

Call:
glm(formula = lung.cancer ~ Y - 1, family = poisson, offset = log((ageout - 
    agein) * lung/1e+06))

Coefficients:
        Estimate Std. Error z value Pr(>|z|)    
Y1931     2.6178     0.3162   8.279  < 2e-16 ***
Y1936     3.0126     0.2182  13.805  < 2e-16 ***
Y1941     2.7814     0.1961  14.182  < 2e-16 ***
Y1946     2.2787     0.2000  11.394  < 2e-16 ***
Y1951     1.8038     0.2132   8.461  < 2e-16 ***
Y1956     1.3698     0.2500   5.479 4.27e-08 ***
Y1961ff   0.4746     0.2425   1.957   0.0504 .  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 1425.0  on 3724  degrees of freedom
Residual deviance: 1073.8  on 3717  degrees of freedom
AIC: 1361.8

Number of Fisher Scoring iterations: 7

> round(exp(coef(fit)), 1)
  Y1931   Y1936   Y1941   Y1946   Y1951   Y1956 Y1961ff 
   13.7    20.3    16.1     9.8     6.1     3.9     1.6 
> expect.count <-  tapply(lung/1e6*(ageout-agein), Y, sum)
> count <- tapply(lung.cancer, Y, sum)
> cbind(count=count, expect=round(expect.count,1),
+       SMR= round(count/expect.count, 1))
       count expect  SMR
1931      10    0.7 13.7
1936      21    1.0 20.3
1941      26    1.6 16.1
1946      25    2.6  9.8
1951      22    3.6  6.1
1956      16    4.1  3.9
1961ff    17   10.6  1.6
> detach()
> nickel.expand <- within(nickel.expand,{
+     TFE <- cut(agein-age1st, c(0,20,30,40,50,100), right=F)
+     AFE <- cut(age1st, c(0, 20, 27.5, 35, 100), right=F)
+     YFE <- cut(dob + age1st, c(0, 1910, 1915, 1920, 1925),right=F)
+     EXP <- cut(exposure, c(0, 0.5, 4.5, 8.5, 12.5, 25), right=F)
+ })
> attach(nickel.expand)
The following object is masked from package:ISwR:

    lung

> fit <- glm(lung.cancer ~ TFE + AFE + YFE + EXP, poisson,
+            offset=log((ageout-agein)*lung/1e6))
> drop1(fit, test="Chisq")
Single term deletions

Model:
lung.cancer ~ TFE + AFE + YFE + EXP
       Df Deviance    AIC    LRT  Pr(>Chi)    
<none>      1052.9 1356.9                     
TFE     4   1107.3 1403.3 54.425 4.287e-11 ***
AFE     3   1055.0 1353.0  2.079 0.5560839    
YFE     3   1058.1 1356.1  5.155 0.1608219    
EXP     4   1072.0 1368.0 19.072 0.0007606 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> summary(fit)

Call:
glm(formula = lung.cancer ~ TFE + AFE + YFE + EXP, family = poisson, 
    offset = log((ageout - agein) * lung/1e+06))

Coefficients:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)     2.36836    0.55716   4.251 2.13e-05 ***
TFE[20,30)     -0.21788    0.36022  -0.605 0.545284    
TFE[30,40)     -0.77184    0.36529  -2.113 0.034605 *  
TFE[40,50)     -1.87583    0.41707  -4.498 6.87e-06 ***
TFE[50,100)    -2.22142    0.55068  -4.034 5.48e-05 ***
AFE[20,27.5)    0.28506    0.31524   0.904 0.365868    
AFE[27.5,35)    0.21961    0.34011   0.646 0.518462    
AFE[35,100)    -0.10818    0.44412  -0.244 0.807556    
YFE[1910,1915)  0.04826    0.27193   0.177 0.859137    
YFE[1915,1920) -0.56397    0.37585  -1.501 0.133483    
YFE[1920,1925) -0.42520    0.30017  -1.417 0.156614    
EXP[0.5,4.5)    0.58373    0.21200   2.753 0.005897 ** 
EXP[4.5,8.5)    1.03175    0.28364   3.638 0.000275 ***
EXP[8.5,12.5)   1.18345    0.37406   3.164 0.001557 ** 
EXP[12.5,25)    1.28601    0.48236   2.666 0.007674 ** 
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 1175.6  on 3723  degrees of freedom
Residual deviance: 1052.9  on 3709  degrees of freedom
AIC: 1356.9

Number of Fisher Scoring iterations: 7

> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> ## IGNORE_RDIFF_BEGIN
> t <- 0:10
> y <- rnorm(11, mean=5*exp(-t/5), sd=.2)
> plot(y ~ t)
> if (.make.epsf) dev.copy2eps(file="nonlin-sim.ps")
> nlsout <- nls(y ~ A*exp(-alpha*t), start=c(A=2, alpha=0.05))
> summary(nlsout)

Formula: y ~ A * exp(-alpha * t)

Parameters:
      Estimate Std. Error t value Pr(>|t|)    
A      5.05784    0.17626   28.70 3.69e-10 ***
alpha  0.20282    0.01227   16.53 4.85e-08 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.2283 on 9 degrees of freedom

Number of iterations to convergence: 5 
Achieved convergence tolerance: 1.091e-08

> attach(subset(juul2, age<20 & age>5 & sex==1))
> plot(height ~ age)
> if (.make.epsf) dev.copy2eps(file="juul-a-h.ps")
> plot(log(5.3-log(height))~age)
Warning message:
In log(5.3 - log(height)) : NaNs produced
> if (.make.epsf) dev.copy2eps(file="gomp-dif.ps")
> lm(log(5.3-log(height))~age)

Call:
lm(formula = log(5.3 - log(height)) ~ age)

Coefficients:
(Intercept)          age  
     0.4200      -0.1538  

Warning message:
In log(5.3 - log(height)) : NaNs produced
> fit <- nls(height~alpha*exp(-beta*exp(-gamma*age)),
+            start=c(alpha=exp(5.3),beta=exp(0.42),gamma=0.15))
> summary(fit)

Formula: height ~ alpha * exp(-beta * exp(-gamma * age))

Parameters:
       Estimate Std. Error t value Pr(>|t|)    
alpha 2.428e+02  1.157e+01  20.978   <2e-16 ***
beta  1.176e+00  1.892e-02  62.149   <2e-16 ***
gamma 7.903e-02  8.569e-03   9.222   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 6.811 on 499 degrees of freedom

Number of iterations to convergence: 8 
Achieved convergence tolerance: 5.228e-06
  (3 observations deleted due to missingness)

> plot(age, height)
> newage <- seq(5,20,length=500)
> lines(newage, predict(fit,newdata=data.frame(age=newage)),lwd=2)
> if (.make.epsf) dev.copy2eps(file="gompertz.ps")
> 
> fit <- nls(log(height)~log(alpha*exp(-beta*exp(-gamma*age))),
+ start=c(alpha=exp(5.3),beta=exp(.12),gamma=.12))
> summary(fit)

Formula: log(height) ~ log(alpha * exp(-beta * exp(-gamma * age)))

Parameters:
       Estimate Std. Error t value Pr(>|t|)    
alpha 255.97696   15.03921  17.021   <2e-16 ***
beta    1.18949    0.02971  40.033   <2e-16 ***
gamma   0.07033    0.00811   8.673   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.04307 on 499 degrees of freedom

Number of iterations to convergence: 8 
Achieved convergence tolerance: 2.788e-06
  (3 observations deleted due to missingness)

> plot(age, log(height))
> lines(newage, predict(fit,newdata=data.frame(age=newage)),lwd=2)
> if (.make.epsf) dev.copy2eps(file="log-gompertz.ps")
> # count quoted in text, subtract 1 for SSD()
> length(ls(pattern="SS.*", "package:stats"))-1
[1] 10
> summary(nls(height~SSgompertz(age, Asym, b2, b3)))

Formula: height ~ SSgompertz(age, Asym, b2, b3)

Parameters:
      Estimate Std. Error t value Pr(>|t|)    
Asym 2.428e+02  1.157e+01   20.98   <2e-16 ***
b2   1.176e+00  1.892e-02   62.15   <2e-16 ***
b3   9.240e-01  7.918e-03  116.69   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 6.811 on 499 degrees of freedom

Number of iterations to convergence: 5 
Achieved convergence tolerance: 1.411e-06
  (3 observations deleted due to missingness)

> cf <- coef(nls(height ~ SSgompertz(age, Asym, b2, b3)))
> summary(nls(log(height) ~
+                log(as.vector(SSgompertz(age,Asym, b2, b3))),
+             start=as.list(cf)))

Formula: log(height) ~ log(as.vector(SSgompertz(age, Asym, b2, b3)))

Parameters:
      Estimate Std. Error t value Pr(>|t|)    
Asym 2.560e+02  1.504e+01   17.02   <2e-16 ***
b2   1.189e+00  2.971e-02   40.03   <2e-16 ***
b3   9.321e-01  7.559e-03  123.31   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 0.04307 on 499 degrees of freedom

Number of iterations to convergence: 5 
Achieved convergence tolerance: 5.487e-06
  (3 observations deleted due to missingness)

> par(mfrow=c(3,1))
> plot(profile(fit))
> if (.make.epsf) dev.copy2eps(file="gomp-prof.ps")
> confint(fit)
Waiting for profiling to be done...
             2.5%        97.5%
alpha 233.5007561 294.76267648
beta    1.1442683   1.27411439
gamma   0.0550577   0.08574974
> confint.default(fit)
             2.5 %      97.5 %
alpha 226.50064942 285.4532632
beta    1.13125577   1.2477285
gamma   0.05443818   0.0862269
> ## IGNORE_RDIFF_END
> rm(list=ls())
> while(search()[2] != "package:ISwR") detach()
> 
> proc.time()
   user  system elapsed 
   2.84    0.26    3.11