R Under development (unstable) (2024-09-30 r87204 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(ISwR) > .make.epsf <- Sys.getenv("EPSF")=="y" > ps.options(height=3.5, width=4.4, pointsize=8, horiz=F) > if (.make.epsf) X11(height=3.5,width=4.4,pointsize=8) else postscript() > dev.copy2eps <- function(...) invisible(grDevices::dev.copy2eps(...)) > par(mar=c(4,4,3,2)+.1) > options(width=66, useFancyQuotes="TeX") > suppressWarnings(RNGversion("1.5.1")) #Yes, Kinderman-Ramage was buggy... > set.seed(310367) > #Rprof(interval=.001) > plot(rnorm(500)) > 2 + 2 [1] 4 > exp(-2) [1] 0.1353353 > rnorm(15) [1] -0.18326112 -0.59753287 -0.67017905 0.16075723 1.28199575 [6] 0.07976977 0.13683303 0.77155246 0.85986694 -1.01506772 [11] -0.49448567 0.52433026 1.07732656 1.09748097 -1.09318582 > x <- 2 > x [1] 2 > x + x [1] 4 > weight <- c(60, 72, 57, 90, 95, 72) > weight [1] 60 72 57 90 95 72 > height <- c(1.75, 1.80, 1.65, 1.90, 1.74, 1.91) > bmi <- weight/height^2 > bmi [1] 19.59184 22.22222 20.93664 24.93075 31.37799 19.73630 > sum(weight) [1] 446 > sum(weight)/length(weight) [1] 74.33333 > xbar <- sum(weight)/length(weight) > weight - xbar [1] -14.333333 -2.333333 -17.333333 15.666667 20.666667 [6] -2.333333 > (weight - xbar)^2 [1] 205.444444 5.444444 300.444444 245.444444 427.111111 [6] 5.444444 > sum((weight - xbar)^2) [1] 1189.333 > sqrt(sum((weight - xbar)^2)/(length(weight) - 1)) [1] 15.42293 > mean(weight) [1] 74.33333 > sd(weight) [1] 15.42293 > t.test(bmi, mu=22.5) One Sample t-test data: bmi t = 0.34488, df = 5, p-value = 0.7442 alternative hypothesis: true mean is not equal to 22.5 95 percent confidence interval: 18.41734 27.84791 sample estimates: mean of x 23.13262 > plot(height,weight) > if (.make.epsf) dev.copy2eps(file="h-w.ps") > plot(height, weight, pch=2) > if (.make.epsf) dev.copy2eps(file="h-w-triangle.ps") > hh <- c(1.65, 1.70, 1.75, 1.80, 1.85, 1.90) > lines(hh, 22.5 * hh^2) > if (.make.epsf) dev.copy2eps(file="h-w-line.ps") > args(plot.default) function (x, y = NULL, type = "p", xlim = NULL, ylim = NULL, log = "", main = NULL, sub = NULL, xlab = NULL, ylab = NULL, ann = par("ann"), axes = TRUE, frame.plot = axes, panel.first = NULL, panel.last = NULL, asp = NA, xgap.axis = NA, ygap.axis = NA, ...) NULL > c("Huey","Dewey","Louie") [1] "Huey" "Dewey" "Louie" > c('Huey','Dewey','Louie') [1] "Huey" "Dewey" "Louie" > c(T,T,F,T) [1] TRUE TRUE FALSE TRUE > bmi > 25 [1] FALSE FALSE FALSE FALSE TRUE FALSE > cat(c("Huey","Dewey","Louie")) Huey Dewey Louie> cat("Huey","Dewey","Louie", "\n") Huey Dewey Louie > cat("What is \"R\"?\n") What is "R"? > c(42,57,12,39,1,3,4) [1] 42 57 12 39 1 3 4 > x <- c(1, 2, 3) > y <- c(10, 20) > c(x, y, 5) [1] 1 2 3 10 20 5 > x <- c(red="Huey", blue="Dewey", green="Louie") > x red blue green "Huey" "Dewey" "Louie" > names(x) [1] "red" "blue" "green" > c(FALSE, 3) [1] 0 3 > c(pi, "abc") [1] "3.14159265358979" "abc" > c(FALSE, "abc") [1] "FALSE" "abc" > seq(4,9) [1] 4 5 6 7 8 9 > seq(4,10,2) [1] 4 6 8 10 > 4:9 [1] 4 5 6 7 8 9 > oops <- c(7,9,13) > rep(oops,3) [1] 7 9 13 7 9 13 7 9 13 > rep(oops,1:3) [1] 7 9 9 13 13 13 > rep(1:2,c(10,15)) [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 > x <- 1:12 > dim(x) <- c(3,4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > matrix(1:12,nrow=3,byrow=T) [,1] [,2] [,3] [,4] [1,] 1 2 3 4 [2,] 5 6 7 8 [3,] 9 10 11 12 > x <- matrix(1:12,nrow=3,byrow=T) > rownames(x) <- LETTERS[1:3] > x [,1] [,2] [,3] [,4] A 1 2 3 4 B 5 6 7 8 C 9 10 11 12 > t(x) A B C [1,] 1 5 9 [2,] 2 6 10 [3,] 3 7 11 [4,] 4 8 12 > cbind(A=1:4,B=5:8,C=9:12) A B C [1,] 1 5 9 [2,] 2 6 10 [3,] 3 7 11 [4,] 4 8 12 > rbind(A=1:4,B=5:8,C=9:12) [,1] [,2] [,3] [,4] A 1 2 3 4 B 5 6 7 8 C 9 10 11 12 > pain <- c(0,3,2,2,1) > fpain <- factor(pain,levels=0:3) > levels(fpain) <- c("none","mild","medium","severe") > fpain [1] none severe medium medium mild Levels: none mild medium severe > as.numeric(fpain) [1] 1 4 3 3 2 > levels(fpain) [1] "none" "mild" "medium" "severe" > intake.pre <- c(5260,5470,5640,6180,6390, + 6515,6805,7515,7515,8230,8770) > intake.post <- c(3910,4220,3885,5160,5645, + 4680,5265,5975,6790,6900,7335) > mylist <- list(before=intake.pre,after=intake.post) > mylist $before [1] 5260 5470 5640 6180 6390 6515 6805 7515 7515 8230 8770 $after [1] 3910 4220 3885 5160 5645 4680 5265 5975 6790 6900 7335 > mylist$before [1] 5260 5470 5640 6180 6390 6515 6805 7515 7515 8230 8770 > d <- data.frame(intake.pre,intake.post) > d intake.pre intake.post 1 5260 3910 2 5470 4220 3 5640 3885 4 6180 5160 5 6390 5645 6 6515 4680 7 6805 5265 8 7515 5975 9 7515 6790 10 8230 6900 11 8770 7335 > d$intake.pre [1] 5260 5470 5640 6180 6390 6515 6805 7515 7515 8230 8770 > intake.pre[5] [1] 6390 > intake.pre[c(3,5,7)] [1] 5640 6390 6805 > v <- c(3,5,7) > intake.pre[v] [1] 5640 6390 6805 > intake.pre[1:5] [1] 5260 5470 5640 6180 6390 > intake.pre[-c(3,5,7)] [1] 5260 5470 6180 6515 7515 7515 8230 8770 > intake.post[intake.pre > 7000] [1] 5975 6790 6900 7335 > intake.post[intake.pre > 7000 & intake.pre <= 8000] [1] 5975 6790 > intake.pre > 7000 & intake.pre <= 8000 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE [11] FALSE > d <- data.frame(intake.pre,intake.post) > d[5,1] [1] 6390 > d[5,] intake.pre intake.post 5 6390 5645 > d[d$intake.pre>7000,] intake.pre intake.post 8 7515 5975 9 7515 6790 10 8230 6900 11 8770 7335 > sel <- d$intake.pre>7000 > sel [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE [11] TRUE > d[sel,] intake.pre intake.post 8 7515 5975 9 7515 6790 10 8230 6900 11 8770 7335 > d[1:2,] intake.pre intake.post 1 5260 3910 2 5470 4220 > head(d) intake.pre intake.post 1 5260 3910 2 5470 4220 3 5640 3885 4 6180 5160 5 6390 5645 6 6515 4680 > energy expend stature 1 9.21 obese 2 7.53 lean 3 7.48 lean 4 8.08 lean 5 8.09 lean 6 10.15 lean 7 8.40 lean 8 10.88 lean 9 6.13 lean 10 7.90 lean 11 11.51 obese 12 12.79 obese 13 7.05 lean 14 11.85 obese 15 9.97 obese 16 7.48 lean 17 8.79 obese 18 9.69 obese 19 9.68 obese 20 7.58 lean 21 9.19 obese 22 8.11 lean > exp.lean <- energy$expend[energy$stature=="lean"] > exp.obese <- energy$expend[energy$stature=="obese"] > l <- split(energy$expend, energy$stature) > l $lean [1] 7.53 7.48 8.08 8.09 10.15 8.40 10.88 6.13 7.90 7.05 [11] 7.48 7.58 8.11 $obese [1] 9.21 11.51 12.79 11.85 9.97 8.79 9.69 9.68 9.19 > lapply(thuesen, mean, na.rm=T) $blood.glucose [1] 10.3 $short.velocity [1] 1.325652 > sapply(thuesen, mean, na.rm=T) blood.glucose short.velocity 10.300000 1.325652 > replicate(10,mean(rexp(20))) [1] 1.0677019 1.2166898 0.8923216 1.1281207 0.9636017 0.8406877 [7] 1.3357814 0.8249408 0.9488707 0.5724575 > m <- matrix(rnorm(12),4) > m [,1] [,2] [,3] [1,] -2.5710730 0.2524470 -0.16886795 [2,] 0.5509498 1.5430648 0.05359794 [3,] 2.4002722 0.1624704 -1.23407417 [4,] 1.4791103 0.9484525 -0.84670929 > apply(m, 2, min) [1] -2.5710730 0.1624704 -1.2340742 > tapply(energy$expend, energy$stature, median) lean obese 7.90 9.69 > intake$post [1] 3910 4220 3885 5160 5645 4680 5265 5975 6790 6900 7335 > sort(intake$post) [1] 3885 3910 4220 4680 5160 5265 5645 5975 6790 6900 7335 > order(intake$post) [1] 3 1 2 6 4 7 5 8 9 10 11 > o <- order(intake$post) > intake$post[o] [1] 3885 3910 4220 4680 5160 5265 5645 5975 6790 6900 7335 > intake$pre[o] [1] 5640 5260 5470 6515 6180 6805 6390 7515 7515 8230 8770 > intake.sorted <- intake[o,] > save.image("ch1.RData") > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > load("ch1.RData") > .foo <- dev.copy2eps > rm(dev.copy2eps) > ls() [1] "bmi" "d" "exp.lean" [4] "exp.obese" "fpain" "height" [7] "hh" "intake.post" "intake.pre" [10] "intake.sorted" "l" "m" [13] "mylist" "o" "oops" [16] "pain" "sel" "v" [19] "weight" "x" "xbar" [22] "y" > dev.copy2eps <- .foo > rm(height, weight) > sink("myfile") > ls() > sink() > attach(thuesen) > blood.glucose [1] 15.3 10.8 8.1 19.5 7.2 5.3 9.3 11.1 7.5 12.2 6.7 5.2 [13] 19.0 15.1 6.7 8.6 4.2 10.3 12.5 16.1 13.3 4.9 8.8 9.5 > search() [1] ".GlobalEnv" "thuesen" "package:ISwR" [4] "package:stats" "package:graphics" "package:grDevices" [7] "package:utils" "package:datasets" "package:methods" [10] "Autoloads" "package:base" > detach() > search() [1] ".GlobalEnv" "package:ISwR" "package:stats" [4] "package:graphics" "package:grDevices" "package:utils" [7] "package:datasets" "package:methods" "Autoloads" [10] "package:base" > thue2 <- subset(thuesen,blood.glucose<7) > thue2 blood.glucose short.velocity 6 5.3 1.49 11 6.7 1.25 12 5.2 1.19 15 6.7 1.52 17 4.2 1.12 22 4.9 1.03 > thue3 <- transform(thuesen,log.gluc=log(blood.glucose)) > thue3 blood.glucose short.velocity log.gluc 1 15.3 1.76 2.727853 2 10.8 1.34 2.379546 3 8.1 1.27 2.091864 4 19.5 1.47 2.970414 5 7.2 1.27 1.974081 6 5.3 1.49 1.667707 7 9.3 1.31 2.230014 8 11.1 1.09 2.406945 9 7.5 1.18 2.014903 10 12.2 1.22 2.501436 11 6.7 1.25 1.902108 12 5.2 1.19 1.648659 13 19.0 1.95 2.944439 14 15.1 1.28 2.714695 15 6.7 1.52 1.902108 16 8.6 NA 2.151762 17 4.2 1.12 1.435085 18 10.3 1.37 2.332144 19 12.5 1.19 2.525729 20 16.1 1.05 2.778819 21 13.3 1.32 2.587764 22 4.9 1.03 1.589235 23 8.8 1.12 2.174752 24 9.5 1.70 2.251292 > thue4 <- within(thuesen,{ + log.gluc <- log(blood.glucose) + m <- mean(log.gluc) + centered.log.gluc <- log.gluc - m + rm(m) + }) > thue4 blood.glucose short.velocity centered.log.gluc log.gluc 1 15.3 1.76 0.481879807 2.727853 2 10.8 1.34 0.133573113 2.379546 3 8.1 1.27 -0.154108960 2.091864 4 19.5 1.47 0.724441444 2.970414 5 7.2 1.27 -0.271891996 1.974081 6 5.3 1.49 -0.578266201 1.667707 7 9.3 1.31 -0.015958621 2.230014 8 11.1 1.09 0.160972087 2.406945 9 7.5 1.18 -0.231070001 2.014903 10 12.2 1.22 0.255462930 2.501436 11 6.7 1.25 -0.343865495 1.902108 12 5.2 1.19 -0.597314396 1.648659 13 19.0 1.95 0.698465958 2.944439 14 15.1 1.28 0.468721722 2.714695 15 6.7 1.52 -0.343865495 1.902108 16 8.6 NA -0.094210818 2.151762 17 4.2 1.12 -0.810888496 1.435085 18 10.3 1.37 0.086170874 2.332144 19 12.5 1.19 0.279755623 2.525729 20 16.1 1.05 0.532846250 2.778819 21 13.3 1.32 0.341791014 2.587764 22 4.9 1.03 -0.656737817 1.589235 23 8.8 1.12 -0.071221300 2.174752 24 9.5 1.70 0.005318777 2.251292 > d <- par(mar=c(5,4,4,2)+.1) > x <- runif(50,0,2) > y <- runif(50,0,2) > plot(x, y, main="Main title", sub="subtitle", + xlab="x-label", ylab="y-label") > text(0.6,0.6,"text at (0.6,0.6)") > abline(h=.6,v=.6) > for (side in 1:4) mtext(-1:4,side=side,at=.7,line=-1:4) > mtext(paste("side",1:4), side=1:4, line=-1,font=2) > if (.make.epsf) dev.copy2eps(file="layout.ps") > par(d) > plot(x, y, type="n", xlab="", ylab="", axes=F) > points(x,y) > axis(1) > axis(2,at=seq(0.2,1.8,0.2)) > box() > title(main="Main title", sub="subtitle", + xlab="x-label", ylab="y-label") > set.seed(1234) #make it happen.... > x <- rnorm(100) > hist(x,freq=F) > curve(dnorm(x),add=T) > h <- hist(x, plot=F) > ylim <- range(0, h$density, dnorm(0)) > hist(x, freq=F, ylim=ylim) > curve(dnorm(x), add=T) > if (.make.epsf) dev.copy2eps(file="hist+norm.ps") > hist.with.normal <- function(x, xlab=deparse(substitute(x)),...) + { + h <- hist(x, plot=F, ...) + s <- sd(x) + m <- mean(x) + ylim <- range(0,h$density,dnorm(0,sd=s)) + hist(x, freq=F, ylim=ylim, xlab=xlab, ...) + curve(dnorm(x,m,s), add=T) + } > hist.with.normal(rnorm(200)) > y <- 12345 > x <- y/2 > while (abs(x*x-y) > 1e-10) x <- (x + y/x)/2 > x [1] 111.1081 > x^2 [1] 12345 > x <- y/2 > repeat{ + x <- (x + y/x)/2 + if (abs(x*x-y) < 1e-10) break + } > x [1] 111.1081 > x <- seq(0, 1,.05) > plot(x, x, ylab="y", type="l") > for ( j in 2:8 ) lines(x, x^j) > t.test(bmi, mu=22.5)$p.value [1] 0.7442183 > print function (x, ...) UseMethod("print") > ## length(methods("print")) # quoted in text > ## (this test zapped 2024-10-01 due to platform dependency) > thuesen2 <- read.table( + system.file("rawdata","thuesen.txt",package="ISwR"), header=T) > thuesen2 blood.glucose short.velocity 1 15.3 1.76 2 10.8 1.34 3 8.1 1.27 4 19.5 1.47 5 7.2 1.27 6 5.3 1.49 7 9.3 1.31 8 11.1 1.09 9 7.5 1.18 10 12.2 1.22 11 6.7 1.25 12 5.2 1.19 13 19.0 1.95 14 15.1 1.28 15 6.7 1.52 16 8.6 NA 17 4.2 1.12 18 10.3 1.37 19 12.5 1.19 20 16.1 1.05 21 13.3 1.32 22 4.9 1.03 23 8.8 1.12 24 9.5 1.70 > levels(secretin$time) [1] "20" "30" "60" "90" "pre" > ## IGNORE_RDIFF_BEGIN > # keep CRAN happy - this output is obviously system-dependent > system.file("rawdata", "thuesen.txt", package="ISwR") [1] "D:/RCompile/CRANincoming/R-devel/lib/ISwR/rawdata/thuesen.txt" > ## IGNORE_RDIFF_END > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > sample(1:40,5) [1] 19 8 33 21 31 > sample(c("H","T"), 10, replace=T) [1] "T" "T" "H" "T" "T" "T" "H" "T" "H" "H" > sample(c("succ", "fail"), 10, replace=T, prob=c(0.9, 0.1)) [1] "fail" "fail" "succ" "succ" "fail" "succ" "succ" "succ" [9] "succ" "succ" > 1/prod(40:36) [1] 1.266449e-08 > prod(5:1)/prod(40:36) [1] 1.519738e-06 > 1/choose(40,5) [1] 1.519738e-06 > x <- seq(-4,4,0.1) > plot(x,dnorm(x),type="l") > if (.make.epsf) dev.copy2eps(file="bellcurve.ps") > x <- 0:50 > plot(x,dbinom(x,size=50,prob=.33),type="h") > if (.make.epsf) dev.copy2eps(file="binomdist.ps") > 1-pnorm(160,mean=132,sd=13) [1] 0.01562612 > pbinom(16,size=20,prob=.5) [1] 0.9987116 > 1-pbinom(15,size=20,prob=.5) [1] 0.005908966 > 1-pbinom(15,20,.5)+pbinom(4,20,.5) [1] 0.01181793 > xbar <- 83 > sigma <- 12 > n <- 5 > sem <- sigma/sqrt(n) > sem [1] 5.366563 > xbar + sem * qnorm(0.025) [1] 72.48173 > xbar + sem * qnorm(0.975) [1] 93.51827 > set.seed(310367) > rnorm(10) [1] -0.2996466 -0.1718510 -0.1955634 1.2280843 -2.6074190 [6] -0.2999453 -0.4655102 -1.5680666 1.2545876 -1.8028839 > rnorm(10) [1] 1.7082495 0.1432875 -1.0271750 -0.9246647 0.6402383 [6] 0.7201677 -0.3071239 1.2090712 0.8699669 0.5882753 > rnorm(10,mean=7,sd=5) [1] 8.934983 8.611855 4.675578 3.670129 4.223117 5.484290 [7] 12.141946 8.057541 -2.893164 13.590586 > rbinom(10,size=20,prob=.5) [1] 12 11 10 8 11 8 11 8 8 13 > ## no data sets used by exercises > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > x <- rnorm(50) > mean(x) [1] -0.1565061 > sd(x) [1] 1.175373 > var(x) [1] 1.381502 > median(x) [1] -0.3560654 > quantile(x) 0% 25% 50% 75% 100% -2.1565603 -0.9689894 -0.3560654 0.6133640 2.9887241 > pvec <- seq(0,1,0.1) > pvec [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 > quantile(x,pvec) 0% 10% 20% 30% 40% -2.15656030 -1.54828008 -1.13398001 -0.79701920 -0.62973546 50% 60% 70% 80% 90% -0.35606539 -0.05965905 0.36950570 0.85233024 1.29837806 100% 2.98872414 > attach(juul) > mean(igf1) [1] NA > mean(igf1,na.rm=T) [1] 340.168 > sum(!is.na(igf1)) [1] 1018 > summary(igf1) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 25.0 202.2 313.5 340.2 462.8 915.0 321 > summary(juul) age menarche sex igf1 Min. : 0.170 Min. :1.000 Min. :1.000 Min. : 25.0 1st Qu.: 9.053 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:202.2 Median :12.560 Median :1.000 Median :2.000 Median :313.5 Mean :15.095 Mean :1.476 Mean :1.534 Mean :340.2 3rd Qu.:16.855 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:462.8 Max. :83.000 Max. :2.000 Max. :2.000 Max. :915.0 NA's :5 NA's :635 NA's :5 NA's :321 tanner testvol Min. :1.00 Min. : 1.000 1st Qu.:1.00 1st Qu.: 1.000 Median :2.00 Median : 3.000 Mean :2.64 Mean : 7.896 3rd Qu.:5.00 3rd Qu.:15.000 Max. :5.00 Max. :30.000 NA's :240 NA's :859 > detach(juul) > juul$sex <- factor(juul$sex,labels=c("M","F")) > juul$menarche <- factor(juul$menarche,labels=c("No","Yes")) > juul$tanner <- factor(juul$tanner, + labels=c("I","II","III","IV","V")) > attach(juul) > summary(juul) age menarche sex igf1 Min. : 0.170 No :369 M :621 Min. : 25.0 1st Qu.: 9.053 Yes :335 F :713 1st Qu.:202.2 Median :12.560 NA's:635 NA's: 5 Median :313.5 Mean :15.095 Mean :340.2 3rd Qu.:16.855 3rd Qu.:462.8 Max. :83.000 Max. :915.0 NA's :5 NA's :321 tanner testvol I :515 Min. : 1.000 II :103 1st Qu.: 1.000 III : 72 Median : 3.000 IV : 81 Mean : 7.896 V :328 3rd Qu.:15.000 NA's:240 Max. :30.000 NA's :859 > hist(x) > if (.make.epsf) dev.copy2eps(file="hist.ps") > mid.age <- c(2.5,7.5,13,16.5,17.5,19,22.5,44.5,70.5) > acc.count <- c(28,46,58,20,31,64,149,316,103) > age.acc <- rep(mid.age,acc.count) > brk <- c(0,5,10,16,17,18,20,25,60,80) > hist(age.acc,breaks=brk) > if (.make.epsf) dev.copy2eps(file="hist-acc-right.ps") > n <- length(x) > plot(sort(x),(1:n)/n,type="s",ylim=c(0,1)) > if (.make.epsf) dev.copy2eps(file="empdist.ps") > qqnorm(x) > if (.make.epsf) dev.copy2eps(file="qqnorm.ps") > par(mfrow=c(1,2)) > boxplot(IgM) > boxplot(log(IgM)) > par(mfrow=c(1,1)) > if (.make.epsf) dev.copy2eps(file="boxplot-IgM.ps") > attach(red.cell.folate) > tapply(folate,ventilation,mean) N2O+O2,24h N2O+O2,op O2,24h 316.6250 256.4444 278.0000 > tapply(folate,ventilation,sd) N2O+O2,24h N2O+O2,op O2,24h 58.71709 37.12180 33.75648 > tapply(folate,ventilation,length) N2O+O2,24h N2O+O2,op O2,24h 8 9 5 > xbar <- tapply(folate, ventilation, mean) > s <- tapply(folate, ventilation, sd) > n <- tapply(folate, ventilation, length) > cbind(mean=xbar, std.dev=s, n=n) mean std.dev n N2O+O2,24h 316.6250 58.71709 8 N2O+O2,op 256.4444 37.12180 9 O2,24h 278.0000 33.75648 5 > tapply(igf1, tanner, mean) I II III IV V NA NA NA NA NA > tapply(igf1, tanner, mean, na.rm=T) I II III IV V 207.4727 352.6714 483.2222 513.0172 465.3344 > aggregate(juul[c("age","igf1")], + list(sex=juul$sex), mean, na.rm=T) sex age igf1 1 M 15.38436 310.8866 2 F 14.84363 368.1006 > aggregate(juul[c("age","igf1")], juul["sex"], mean, na.rm=T) sex age igf1 1 M 15.38436 310.8866 2 F 14.84363 368.1006 > by(juul, juul["sex"], summary) sex: M age menarche sex igf1 tanner Min. : 0.17 No : 0 M:621 Min. : 29.0 I :291 1st Qu.: 8.85 Yes : 0 F: 0 1st Qu.:176.0 II : 55 Median :12.38 NA's:621 Median :280.0 III : 34 Mean :15.38 Mean :310.9 IV : 41 3rd Qu.:16.77 3rd Qu.:430.2 V :124 Max. :83.00 Max. :915.0 NA's: 76 NA's :145 testvol Min. : 1.000 1st Qu.: 1.000 Median : 3.000 Mean : 7.896 3rd Qu.:15.000 Max. :30.000 NA's :141 ------------------------------------------------- sex: F age menarche sex igf1 tanner Min. : 0.25 No :369 M: 0 Min. : 25.0 I :224 1st Qu.: 9.30 Yes :335 F:713 1st Qu.:233.0 II : 48 Median :12.80 NA's: 9 Median :352.0 III : 38 Mean :14.84 Mean :368.1 IV : 40 3rd Qu.:16.93 3rd Qu.:483.0 V :204 Max. :75.12 Max. :914.0 NA's:159 NA's :176 testvol Min. : NA 1st Qu.: NA Median : NA Mean :NaN 3rd Qu.: NA Max. : NA NA's :713 > attach(energy) > expend.lean <- expend[stature=="lean"] > expend.obese <- expend[stature=="obese"] > par(mfrow=c(2,1)) > hist(expend.lean,breaks=10,xlim=c(5,13),ylim=c(0,4),col="white") > hist(expend.obese,breaks=10,xlim=c(5,13),ylim=c(0,4),col="grey") > par(mfrow=c(1,1)) > if (.make.epsf) dev.copy2eps(file="expend-hist-2on1.ps") > boxplot(expend ~ stature) > if (.make.epsf) dev.copy2eps(file="boxplots-expend-stat.ps") > boxplot(expend.lean,expend.obese) > opar <- par(mfrow=c(2,2), mex=0.8, mar=c(3,3,2,1)+.1) > stripchart(expend ~ stature) > stripchart(expend ~ stature, method="stack") > stripchart(expend ~ stature, method="jitter") > stripchart(expend ~ stature, method="jitter", jitter=.03) > par(opar) > if (.make.epsf) dev.copy2eps(file="stripcharts-expend-stat.ps") > caff.marital <- matrix(c(652,1537,598,242,36,46,38,21,218 + ,327,106,67), + nrow=3,byrow=T) > caff.marital [,1] [,2] [,3] [,4] [1,] 652 1537 598 242 [2,] 36 46 38 21 [3,] 218 327 106 67 > colnames(caff.marital) <- c("0","1-150","151-300",">300") > rownames(caff.marital) <- c("Married","Prev.married","Single") > caff.marital 0 1-150 151-300 >300 Married 652 1537 598 242 Prev.married 36 46 38 21 Single 218 327 106 67 > names(dimnames(caff.marital)) <- c("marital","consumption") > caff.marital consumption marital 0 1-150 151-300 >300 Married 652 1537 598 242 Prev.married 36 46 38 21 Single 218 327 106 67 > as.data.frame(as.table(caff.marital)) marital consumption Freq 1 Married 0 652 2 Prev.married 0 36 3 Single 0 218 4 Married 1-150 1537 5 Prev.married 1-150 46 6 Single 1-150 327 7 Married 151-300 598 8 Prev.married 151-300 38 9 Single 151-300 106 10 Married >300 242 11 Prev.married >300 21 12 Single >300 67 > table(sex) sex M F 621 713 > table(sex,menarche) menarche sex No Yes M 0 0 F 369 335 > table(menarche,tanner) tanner menarche I II III IV V No 221 43 32 14 2 Yes 1 1 5 26 202 > xtabs(~ tanner + sex, data=juul) sex tanner M F I 291 224 II 55 48 III 34 38 IV 41 40 V 124 204 > xtabs(~ dgn + diab + coma, data=stroke) , , coma = No diab dgn No Yes ICH 53 6 ID 143 21 INF 411 64 SAH 38 0 , , coma = Yes diab dgn No Yes ICH 19 1 ID 23 3 INF 23 2 SAH 9 0 > ftable(coma + diab ~ dgn, data=stroke) coma No Yes diab No Yes No Yes dgn ICH 53 6 19 1 ID 143 21 23 3 INF 411 64 23 2 SAH 38 0 9 0 > t(caff.marital) marital consumption Married Prev.married Single 0 652 36 218 1-150 1537 46 327 151-300 598 38 106 >300 242 21 67 > tanner.sex <- table(tanner,sex) > tanner.sex sex tanner M F I 291 224 II 55 48 III 34 38 IV 41 40 V 124 204 > margin.table(tanner.sex,1) tanner I II III IV V 515 103 72 81 328 > margin.table(tanner.sex,2) sex M F 545 554 > prop.table(tanner.sex,1) sex tanner M F I 0.5650485 0.4349515 II 0.5339806 0.4660194 III 0.4722222 0.5277778 IV 0.5061728 0.4938272 V 0.3780488 0.6219512 > tanner.sex/sum(tanner.sex) sex tanner M F I 0.26478617 0.20382166 II 0.05004550 0.04367607 III 0.03093722 0.03457689 IV 0.03730664 0.03639672 V 0.11282985 0.18562329 > total.caff <- margin.table(caff.marital,2) > total.caff consumption 0 1-150 151-300 >300 906 1910 742 330 > barplot(total.caff, col="white") > if (.make.epsf) dev.copy2eps(file="simple-bar.ps") > par(mfrow=c(2,2)) > barplot(caff.marital, col="white") > barplot(t(caff.marital), col="white") > barplot(t(caff.marital), col="white", beside=T) > barplot(prop.table(t(caff.marital),2), col="white", beside=T) > par(mfrow=c(1,1)) > if (.make.epsf) dev.copy2eps(file="mat-4-bar.ps") > barplot(prop.table(t(caff.marital),2),beside=T, + legend.text=colnames(caff.marital), + col=c("white","grey80","grey50","black")) > if (.make.epsf) dev.copy2eps(file="pretty-bar.ps") > dotchart(t(caff.marital), lcolor="black") > if (.make.epsf) dev.copy2eps(file="dotchart.ps") > opar <- par(mfrow=c(2,2),mex=0.8, mar=c(1,1,2,1)) > slices <- c("white","grey80","grey50","black") > pie(caff.marital["Married",], main="Married", col=slices) > pie(caff.marital["Prev.married",], + main="Previously married", col=slices) > pie(caff.marital["Single",], main="Single", col=slices) > par(opar) > if (.make.epsf) dev.copy2eps(file="pie.ps") > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > daily.intake <- c(5260,5470,5640,6180,6390,6515, + 6805,7515,7515,8230,8770) > mean(daily.intake) [1] 6753.636 > sd(daily.intake) [1] 1142.123 > quantile(daily.intake) 0% 25% 50% 75% 100% 5260 5910 6515 7515 8770 > t.test(daily.intake,mu=7725) One Sample t-test data: daily.intake t = -2.8208, df = 10, p-value = 0.01814 alternative hypothesis: true mean is not equal to 7725 95 percent confidence interval: 5986.348 7520.925 sample estimates: mean of x 6753.636 > t.test(daily.intake,mu=7725) One Sample t-test data: daily.intake t = -2.8208, df = 10, p-value = 0.01814 alternative hypothesis: true mean is not equal to 7725 95 percent confidence interval: 5986.348 7520.925 sample estimates: mean of x 6753.636 > wilcox.test(daily.intake, mu=7725) Wilcoxon signed rank test with continuity correction data: daily.intake V = 8, p-value = 0.0293 alternative hypothesis: true location is not equal to 7725 Warning message: In wilcox.test.default(daily.intake, mu = 7725) : cannot compute exact p-value with ties > attach(energy) > energy expend stature 1 9.21 obese 2 7.53 lean 3 7.48 lean 4 8.08 lean 5 8.09 lean 6 10.15 lean 7 8.40 lean 8 10.88 lean 9 6.13 lean 10 7.90 lean 11 11.51 obese 12 12.79 obese 13 7.05 lean 14 11.85 obese 15 9.97 obese 16 7.48 lean 17 8.79 obese 18 9.69 obese 19 9.68 obese 20 7.58 lean 21 9.19 obese 22 8.11 lean > t.test(expend~stature) Welch Two Sample t-test data: expend by stature t = -3.8555, df = 15.919, p-value = 0.001411 alternative hypothesis: true difference in means between group lean and group obese is not equal to 0 95 percent confidence interval: -3.459167 -1.004081 sample estimates: mean in group lean mean in group obese 8.066154 10.297778 > t.test(expend~stature, var.equal=T) Two Sample t-test data: expend by stature t = -3.9456, df = 20, p-value = 0.000799 alternative hypothesis: true difference in means between group lean and group obese is not equal to 0 95 percent confidence interval: -3.411451 -1.051796 sample estimates: mean in group lean mean in group obese 8.066154 10.297778 > var.test(expend~stature) F test to compare two variances data: expend by stature F = 0.78445, num df = 12, denom df = 8, p-value = 0.6797 alternative hypothesis: true ratio of variances is not equal to 1 95 percent confidence interval: 0.1867876 2.7547991 sample estimates: ratio of variances 0.784446 > wilcox.test(expend~stature) Wilcoxon rank sum test with continuity correction data: expend by stature W = 12, p-value = 0.002122 alternative hypothesis: true location shift is not equal to 0 Warning message: In wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...) : cannot compute exact p-value with ties > attach(intake) > intake pre post 1 5260 3910 2 5470 4220 3 5640 3885 4 6180 5160 5 6390 5645 6 6515 4680 7 6805 5265 8 7515 5975 9 7515 6790 10 8230 6900 11 8770 7335 > post - pre [1] -1350 -1250 -1755 -1020 -745 -1835 -1540 -1540 -725 -1330 [11] -1435 > t.test(pre, post, paired=T) Paired t-test data: pre and post t = 11.941, df = 10, p-value = 3.059e-07 alternative hypothesis: true mean difference is not equal to 0 95 percent confidence interval: 1074.072 1566.838 sample estimates: mean difference 1320.455 > t.test(pre, post) #WRONG! Welch Two Sample t-test data: pre and post t = 2.6242, df = 19.92, p-value = 0.01629 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: 270.5633 2370.3458 sample estimates: mean of x mean of y 6753.636 5433.182 > wilcox.test(pre, post, paired=T) Wilcoxon signed rank test with continuity correction data: pre and post V = 66, p-value = 0.00384 alternative hypothesis: true location shift is not equal to 0 Warning message: In wilcox.test.default(pre, post, paired = T) : cannot compute exact p-value with ties > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > attach(thuesen) > lm(short.velocity~blood.glucose) Call: lm(formula = short.velocity ~ blood.glucose) Coefficients: (Intercept) blood.glucose 1.09781 0.02196 > summary(lm(short.velocity~blood.glucose)) Call: lm(formula = short.velocity ~ blood.glucose) Residuals: Min 1Q Median 3Q Max -0.40141 -0.14760 -0.02202 0.03001 0.43490 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.09781 0.11748 9.345 6.26e-09 *** blood.glucose 0.02196 0.01045 2.101 0.0479 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.2167 on 21 degrees of freedom (1 observation deleted due to missingness) Multiple R-squared: 0.1737, Adjusted R-squared: 0.1343 F-statistic: 4.414 on 1 and 21 DF, p-value: 0.0479 > summary(lm(short.velocity~blood.glucose)) Call: lm(formula = short.velocity ~ blood.glucose) Residuals: Min 1Q Median 3Q Max -0.40141 -0.14760 -0.02202 0.03001 0.43490 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.09781 0.11748 9.345 6.26e-09 *** blood.glucose 0.02196 0.01045 2.101 0.0479 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.2167 on 21 degrees of freedom (1 observation deleted due to missingness) Multiple R-squared: 0.1737, Adjusted R-squared: 0.1343 F-statistic: 4.414 on 1 and 21 DF, p-value: 0.0479 > plot(blood.glucose,short.velocity) > abline(lm(short.velocity~blood.glucose)) > if (.make.epsf) dev.copy2eps(file="velo-gluc-line.ps") > lm.velo <- lm(short.velocity~blood.glucose) > fitted(lm.velo) 1 2 3 4 5 6 7 1.433841 1.335010 1.275711 1.526084 1.255945 1.214216 1.302066 8 9 10 11 12 13 14 1.341599 1.262534 1.365758 1.244964 1.212020 1.515103 1.429449 15 17 18 19 20 21 22 1.244964 1.190057 1.324029 1.372346 1.451411 1.389916 1.205431 23 24 1.291085 1.306459 > resid(lm.velo) 1 2 3 4 5 0.326158532 0.004989882 -0.005711308 -0.056084062 0.014054962 6 7 8 9 10 0.275783754 0.007933665 -0.251598875 -0.082533795 -0.145757649 11 12 13 14 15 0.005036223 -0.022019994 0.434897199 -0.149448964 0.275036223 17 18 19 20 21 -0.070057471 0.045971143 -0.182346406 -0.401411486 -0.069916424 22 23 24 -0.175431237 -0.171085074 0.393541161 > options(error=expression(NULL)) > plot(blood.glucose,short.velocity) > lines(blood.glucose,fitted(lm.velo)) Error in xy.coords(x, y) : 'x' and 'y' lengths differ Calls: lines -> lines.default -> plot.xy -> xy.coords > options(error=NULL) > lines(blood.glucose[!is.na(short.velocity)],fitted(lm.velo)) > cc <- complete.cases(thuesen) > options(na.action=na.exclude) > lm.velo <- lm(short.velocity~blood.glucose) > fitted(lm.velo) 1 2 3 4 5 6 7 1.433841 1.335010 1.275711 1.526084 1.255945 1.214216 1.302066 8 9 10 11 12 13 14 1.341599 1.262534 1.365758 1.244964 1.212020 1.515103 1.429449 15 16 17 18 19 20 21 1.244964 NA 1.190057 1.324029 1.372346 1.451411 1.389916 22 23 24 1.205431 1.291085 1.306459 > segments(blood.glucose,fitted(lm.velo), + blood.glucose,short.velocity) > if (.make.epsf) dev.copy2eps(file="velo-gluc-seg.ps") > plot(fitted(lm.velo),resid(lm.velo)) > if (.make.epsf) dev.copy2eps(file="velo-gluc-resid.ps") > qqnorm(resid(lm.velo)) > if (.make.epsf) dev.copy2eps(file="velo-gluc-qqnorm.ps") > predict(lm.velo) 1 2 3 4 5 6 7 1.433841 1.335010 1.275711 1.526084 1.255945 1.214216 1.302066 8 9 10 11 12 13 14 1.341599 1.262534 1.365758 1.244964 1.212020 1.515103 1.429449 15 16 17 18 19 20 21 1.244964 NA 1.190057 1.324029 1.372346 1.451411 1.389916 22 23 24 1.205431 1.291085 1.306459 > predict(lm.velo,int="c") fit lwr upr 1 1.433841 1.291371 1.576312 2 1.335010 1.240589 1.429431 3 1.275711 1.169536 1.381887 4 1.526084 1.306561 1.745607 5 1.255945 1.139367 1.372523 6 1.214216 1.069315 1.359118 7 1.302066 1.205244 1.398889 8 1.341599 1.246317 1.436881 9 1.262534 1.149694 1.375374 10 1.365758 1.263750 1.467765 11 1.244964 1.121641 1.368287 12 1.212020 1.065457 1.358583 13 1.515103 1.305352 1.724854 14 1.429449 1.290217 1.568681 15 1.244964 1.121641 1.368287 16 NA NA NA 17 1.190057 1.026217 1.353898 18 1.324029 1.230050 1.418008 19 1.372346 1.267629 1.477064 20 1.451411 1.295446 1.607377 21 1.389916 1.276444 1.503389 22 1.205431 1.053805 1.357057 23 1.291085 1.191084 1.391086 24 1.306459 1.210592 1.402326 > predict(lm.velo,int="p") fit lwr upr 1 1.433841 0.9612137 1.906469 2 1.335010 0.8745815 1.795439 3 1.275711 0.8127292 1.738693 4 1.526084 1.0248161 2.027352 5 1.255945 0.7904672 1.721423 6 1.214216 0.7408499 1.687583 7 1.302066 0.8411393 1.762993 8 1.341599 0.8809929 1.802205 9 1.262534 0.7979780 1.727090 10 1.365758 0.9037136 1.827802 11 1.244964 0.7777510 1.712177 12 1.212020 0.7381424 1.685898 13 1.515103 1.0180367 2.012169 14 1.429449 0.9577873 1.901111 15 1.244964 0.7777510 1.712177 16 NA NA NA 17 1.190057 0.7105546 1.669560 18 1.324029 0.8636906 1.784367 19 1.372346 0.9096964 1.834996 20 1.451411 0.9745421 1.928281 21 1.389916 0.9252067 1.854626 22 1.205431 0.7299634 1.680899 23 1.291085 0.8294798 1.752690 24 1.306459 0.8457315 1.767186 Warning message: In predict.lm(lm.velo, int = "p") : predictions on current data refer to _future_ responses > pred.frame <- data.frame(blood.glucose=4:20) > pp <- predict(lm.velo, int="p", newdata=pred.frame) > pc <- predict(lm.velo, int="c", newdata=pred.frame) > plot(blood.glucose,short.velocity, + ylim=range(short.velocity, pp, na.rm=T)) > pred.gluc <- pred.frame$blood.glucose > matlines(pred.gluc, pc, lty=c(1,2,2), col="black") > matlines(pred.gluc, pp, lty=c(1,3,3), col="black") > if (.make.epsf) dev.copy2eps(file="velo-gluc-final.ps") > options(error=expression(NULL)) > cor(blood.glucose,short.velocity) [1] NA > options(error=NULL) > cor(blood.glucose,short.velocity,use="complete.obs") [1] 0.4167546 > cor(thuesen,use="complete.obs") blood.glucose short.velocity blood.glucose 1.0000000 0.4167546 short.velocity 0.4167546 1.0000000 > cor.test(blood.glucose,short.velocity) Pearson's product-moment correlation data: blood.glucose and short.velocity t = 2.101, df = 21, p-value = 0.0479 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: 0.005496682 0.707429479 sample estimates: cor 0.4167546 > cor.test(blood.glucose,short.velocity,method="spearman") Spearman's rank correlation rho data: blood.glucose and short.velocity S = 1380.4, p-value = 0.1392 alternative hypothesis: true rho is not equal to 0 sample estimates: rho 0.318002 Warning message: In cor.test.default(blood.glucose, short.velocity, method = "spearman") : Cannot compute exact p-value with ties > cor.test(blood.glucose,short.velocity,method="kendall") Kendall's rank correlation tau data: blood.glucose and short.velocity z = 1.5604, p-value = 0.1187 alternative hypothesis: true tau is not equal to 0 sample estimates: tau 0.2350616 Warning message: In cor.test.default(blood.glucose, short.velocity, method = "kendall") : Cannot compute exact p-value with ties > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > attach(red.cell.folate) > summary(red.cell.folate) folate ventilation Min. :206.0 N2O+O2,24h:8 1st Qu.:249.5 N2O+O2,op :9 Median :274.0 O2,24h :5 Mean :283.2 3rd Qu.:305.5 Max. :392.0 > anova(lm(folate~ventilation)) Analysis of Variance Table Response: folate Df Sum Sq Mean Sq F value Pr(>F) ventilation 2 15516 7757.9 3.7113 0.04359 * Residuals 19 39716 2090.3 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > attach(juul) > anova(lm(igf1~tanner)) ## WRONG! Analysis of Variance Table Response: igf1 Df Sum Sq Mean Sq F value Pr(>F) tanner 1 10985605 10985605 686.07 < 2.2e-16 *** Residuals 790 12649728 16012 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > juul$tanner <- factor(juul$tanner, + labels=c("I","II","III","IV","V")) > detach(juul) > attach(juul) > summary(tanner) I II III IV V NA's 515 103 72 81 328 240 > anova(lm(igf1~tanner)) Analysis of Variance Table Response: igf1 Df Sum Sq Mean Sq F value Pr(>F) tanner 4 12696217 3174054 228.35 < 2.2e-16 *** Residuals 787 10939116 13900 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > summary(lm(folate~ventilation)) Call: lm(formula = folate ~ ventilation) Residuals: Min 1Q Median 3Q Max -73.625 -35.361 -4.444 35.625 75.375 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 316.62 16.16 19.588 4.65e-14 *** ventilationN2O+O2,op -60.18 22.22 -2.709 0.0139 * ventilationO2,24h -38.62 26.06 -1.482 0.1548 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 45.72 on 19 degrees of freedom Multiple R-squared: 0.2809, Adjusted R-squared: 0.2052 F-statistic: 3.711 on 2 and 19 DF, p-value: 0.04359 > pairwise.t.test(folate, ventilation, p.adj="bonferroni") Pairwise comparisons using t tests with pooled SD data: folate and ventilation N2O+O2,24h N2O+O2,op N2O+O2,op 0.042 - O2,24h 0.464 1.000 P value adjustment method: bonferroni > pairwise.t.test(folate,ventilation) Pairwise comparisons using t tests with pooled SD data: folate and ventilation N2O+O2,24h N2O+O2,op N2O+O2,op 0.042 - O2,24h 0.310 0.408 P value adjustment method: holm > oneway.test(folate~ventilation) One-way analysis of means (not assuming equal variances) data: folate and ventilation F = 2.9704, num df = 2.000, denom df = 11.065, p-value = 0.09277 > pairwise.t.test(folate,ventilation,pool.sd=F) Pairwise comparisons using t tests with non-pooled SD data: folate and ventilation N2O+O2,24h N2O+O2,op N2O+O2,op 0.087 - O2,24h 0.321 0.321 P value adjustment method: holm > xbar <- tapply(folate, ventilation, mean) > s <- tapply(folate, ventilation, sd) > n <- tapply(folate, ventilation, length) > sem <- s/sqrt(n) > stripchart(folate~ventilation, method="jitter", + jitter=0.05, pch=16, vert=T) > arrows(1:3,xbar+sem,1:3,xbar-sem,angle=90,code=3,length=.1) > lines(1:3,xbar,pch=4,type="b",cex=2) > if (.make.epsf) dev.copy2eps(file="oneway.ps") > bartlett.test(folate~ventilation) Bartlett test of homogeneity of variances data: folate by ventilation Bartlett's K-squared = 2.0951, df = 2, p-value = 0.3508 > kruskal.test(folate~ventilation) Kruskal-Wallis rank sum test data: folate by ventilation Kruskal-Wallis chi-squared = 4.1852, df = 2, p-value = 0.1234 > attach(heart.rate) > heart.rate hr subj time 1 96 1 0 2 110 2 0 3 89 3 0 4 95 4 0 5 128 5 0 6 100 6 0 7 72 7 0 8 79 8 0 9 100 9 0 10 92 1 30 11 106 2 30 12 86 3 30 13 78 4 30 14 124 5 30 15 98 6 30 16 68 7 30 17 75 8 30 18 106 9 30 19 86 1 60 20 108 2 60 21 85 3 60 22 78 4 60 23 118 5 60 24 100 6 60 25 67 7 60 26 74 8 60 27 104 9 60 28 92 1 120 29 114 2 120 30 83 3 120 31 83 4 120 32 118 5 120 33 94 6 120 34 71 7 120 35 74 8 120 36 102 9 120 > gl(9,1,36) [1] 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4 [32] 5 6 7 8 9 Levels: 1 2 3 4 5 6 7 8 9 > gl(4,9,36,labels=c(0,30,60,120)) [1] 0 0 0 0 0 0 0 0 0 30 30 30 30 30 30 [16] 30 30 30 60 60 60 60 60 60 60 60 60 120 120 120 [31] 120 120 120 120 120 120 Levels: 0 30 60 120 > anova(lm(hr~subj+time)) Analysis of Variance Table Response: hr Df Sum Sq Mean Sq F value Pr(>F) subj 8 8966.6 1120.82 90.6391 4.863e-16 *** time 3 151.0 50.32 4.0696 0.01802 * Residuals 24 296.8 12.37 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > interaction.plot(time, subj, hr) > if (.make.epsf) dev.copy2eps(file="interaction-plot.ps") > interaction.plot(ordered(time),subj,hr) > friedman.test(hr~time|subj,data=heart.rate) Friedman rank sum test data: hr and time and subj Friedman chi-squared = 8.5059, df = 3, p-value = 0.03664 > attach(thuesen) > lm.velo <- lm(short.velocity~blood.glucose) > anova(lm.velo) Analysis of Variance Table Response: short.velocity Df Sum Sq Mean Sq F value Pr(>F) blood.glucose 1 0.20727 0.207269 4.414 0.0479 * Residuals 21 0.98610 0.046957 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > prop.test(39,215,.15) 1-sample proportions test with continuity correction data: 39 out of 215, null probability 0.15 X-squared = 1.425, df = 1, p-value = 0.2326 alternative hypothesis: true p is not equal to 0.15 95 percent confidence interval: 0.1335937 0.2408799 sample estimates: p 0.1813953 > binom.test(39,215,.15) Exact binomial test data: 39 and 215 number of successes = 39, number of trials = 215, p-value = 0.2135 alternative hypothesis: true probability of success is not equal to 0.15 95 percent confidence interval: 0.1322842 0.2395223 sample estimates: probability of success 0.1813953 > lewitt.machin.success <- c(9,4) > lewitt.machin.total <- c(12,13) > prop.test(lewitt.machin.success,lewitt.machin.total) 2-sample test for equality of proportions with continuity correction data: lewitt.machin.success out of lewitt.machin.total X-squared = 3.2793, df = 1, p-value = 0.07016 alternative hypothesis: two.sided 95 percent confidence interval: 0.01151032 0.87310506 sample estimates: prop 1 prop 2 0.7500000 0.3076923 > matrix(c(9,4,3,9),2) [,1] [,2] [1,] 9 3 [2,] 4 9 > lewitt.machin <- matrix(c(9,4,3,9),2) > fisher.test(lewitt.machin) Fisher's Exact Test for Count Data data: lewitt.machin p-value = 0.04718 alternative hypothesis: true odds ratio is not equal to 1 95 percent confidence interval: 0.9006803 57.2549701 sample estimates: odds ratio 6.180528 > chisq.test(lewitt.machin) Pearson's Chi-squared test with Yates' continuity correction data: lewitt.machin X-squared = 3.2793, df = 1, p-value = 0.07016 > caesar.shoe <4 4 4.5 5 5.5 6+ Yes 5 7 6 7 8 10 No 17 28 36 41 46 140 > caesar.shoe.yes <- caesar.shoe["Yes",] > caesar.shoe.total <- margin.table(caesar.shoe,2) > caesar.shoe.yes <4 4 4.5 5 5.5 6+ 5 7 6 7 8 10 > caesar.shoe.total <4 4 4.5 5 5.5 6+ 22 35 42 48 54 150 > prop.test(caesar.shoe.yes,caesar.shoe.total) 6-sample test for equality of proportions without continuity correction data: caesar.shoe.yes out of caesar.shoe.total X-squared = 9.2874, df = 5, p-value = 0.09814 alternative hypothesis: two.sided sample estimates: prop 1 prop 2 prop 3 prop 4 prop 5 prop 6 0.22727273 0.20000000 0.14285714 0.14583333 0.14814815 0.06666667 Warning message: In prop.test(caesar.shoe.yes, caesar.shoe.total) : Chi-squared approximation may be incorrect > prop.trend.test(caesar.shoe.yes,caesar.shoe.total) Chi-squared Test for Trend in Proportions data: caesar.shoe.yes out of caesar.shoe.total , using scores: 1 2 3 4 5 6 X-squared = 8.0237, df = 1, p-value = 0.004617 > caff.marital <- matrix(c(652,1537,598,242,36,46,38,21,218 + ,327,106,67), + nrow=3,byrow=T) > colnames(caff.marital) <- c("0","1-150","151-300",">300") > rownames(caff.marital) <- c("Married","Prev.married","Single") > caff.marital 0 1-150 151-300 >300 Married 652 1537 598 242 Prev.married 36 46 38 21 Single 218 327 106 67 > chisq.test(caff.marital) Pearson's Chi-squared test data: caff.marital X-squared = 51.656, df = 6, p-value = 2.187e-09 > chisq.test(caff.marital)$expected 0 1-150 151-300 >300 Married 705.83179 1488.01183 578.06533 257.09105 Prev.married 32.85648 69.26698 26.90895 11.96759 Single 167.31173 352.72119 137.02572 60.94136 > chisq.test(caff.marital)$observed 0 1-150 151-300 >300 Married 652 1537 598 242 Prev.married 36 46 38 21 Single 218 327 106 67 > E <- chisq.test(caff.marital)$expected > O <- chisq.test(caff.marital)$observed > (O-E)^2/E 0 1-150 151-300 >300 Married 4.1055981 1.612783 0.6874502 0.8858331 Prev.married 0.3007537 7.815444 4.5713926 6.8171090 Single 15.3563704 1.875645 7.0249243 0.6023355 > attach(juul) > chisq.test(tanner,sex) Pearson's Chi-squared test data: tanner and sex X-squared = 28.867, df = 4, p-value = 8.318e-06 > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > curve(pt(x,25,ncp=3), from=0, to=6) > abline(v=qt(.975,25)) > if (.make.epsf) dev.copy2eps(file="noncentral-t.ps") > pt(qt(.975,25),25,ncp=3) [1] 0.1779891 > power.t.test(delta=0.5, sd=2, sig.level = 0.01, power=0.9) Two-sample t test power calculation n = 477.8021 delta = 0.5 sd = 2 sig.level = 0.01 power = 0.9 alternative = two.sided NOTE: n is number in *each* group > power.t.test(n=450, delta=0.5, sd=2, sig.level = 0.01) Two-sample t test power calculation n = 450 delta = 0.5 sd = 2 sig.level = 0.01 power = 0.8784433 alternative = two.sided NOTE: n is number in *each* group > power.t.test(delta=0.5, sd=2, sig.level = 0.01, power=0.9, + alt="one.sided") Two-sample t test power calculation n = 417.898 delta = 0.5 sd = 2 sig.level = 0.01 power = 0.9 alternative = one.sided NOTE: n is number in *each* group > power.t.test(delta=10, sd=10*sqrt(2), power=0.85, type="paired") Paired t test power calculation n = 19.96892 delta = 10 sd = 14.14214 sig.level = 0.05 power = 0.85 alternative = two.sided NOTE: n is number of *pairs*, sd is std.dev. of *differences* within pairs > power.prop.test(power=.85,p1=.15,p2=.30) Two-sample comparison of proportions power calculation n = 137.604 p1 = 0.15 p2 = 0.3 sig.level = 0.05 power = 0.85 alternative = two.sided NOTE: n is number in *each* group > ### no data sets in exercises > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > age <- subset(juul, age >= 10 & age <= 16)$age > range(age) [1] 10.01 16.00 > agegr <- cut(age, seq(10,16,2), right=F, include.lowest=T) > length(age) [1] 502 > table(agegr) agegr [10,12) [12,14) [14,16] 190 168 144 > agegr2 <- cut(age, seq(10,16,2), right=F) > table(agegr2) agegr2 [10,12) [12,14) [14,16) 190 168 142 > q <- quantile(age, c(0, .25, .50, .75, 1)) > q 0% 25% 50% 75% 100% 10.0100 11.3825 12.6400 14.2275 16.0000 > ageQ <- cut(age, q, include.lowest=T) > table(ageQ) ageQ [10,11.4] (11.4,12.6] (12.6,14.2] (14.2,16] 126 125 125 126 > levels(ageQ) <- c("1st", "2nd", "3rd", "4th") > levels(agegr) <- c("10-11", "12-13", "14-15") > pain <- c(0,3,2,2,1) > fpain <- factor(pain,levels=0:3, + labels=c("none","mild","medium","severe")) > text.pain <- c("none","severe", "medium", "medium", "mild") > factor(text.pain) [1] none severe medium medium mild Levels: medium mild none severe > ftpain <- factor(text.pain) > ftpain2 <- factor(ftpain, + levels=c("none", "mild", "medium", "severe")) > ftpain3 <- ftpain2 > levels(ftpain3) <- list( + none="none", + intermediate=c("mild","medium"), + severe="severe") > ftpain3 [1] none severe intermediate intermediate [5] intermediate Levels: none intermediate severe > ftpain4 <- ftpain2 > levels(ftpain4) <- c("none","intermediate","intermediate","severe") > ftpain4 [1] none severe intermediate intermediate [5] intermediate Levels: none intermediate severe > stroke <- read.csv2( + system.file("rawdata","stroke.csv", package="ISwR"), + na.strings=".") > names(stroke) <- tolower(names(stroke)) > head(stroke) sex died dstr age dgn coma diab minf han 1 1 7.01.1991 2.01.1991 76 INF 0 0 1 0 2 1 3.01.1991 58 INF 0 0 0 0 3 1 2.06.1991 8.01.1991 74 INF 0 0 1 1 4 0 13.01.1991 11.01.1991 77 ICH 0 1 0 1 5 0 23.01.1996 13.01.1991 76 INF 0 1 0 1 6 1 13.01.1991 13.01.1991 48 ICH 1 0 0 1 > stroke <- transform(stroke, + died = as.Date(died, format="%d.%m.%Y"), + dstr = as.Date(dstr, format="%d.%m.%Y")) > summary(stroke$died) Min. 1st Qu. Median Mean 3rd Qu. "1991-01-07" "1992-03-14" "1993-01-23" "1993-02-15" "1993-11-04" Max. NA's "1996-02-22" "338" > summary(stroke$dstr) Min. 1st Qu. Median Mean 3rd Qu. "1991-01-02" "1991-11-08" "1992-08-12" "1992-07-27" "1993-04-30" Max. "1993-12-31" > summary(stroke$died - stroke$dstr) Length Class Mode 829 difftime numeric > head(stroke$died - stroke$dstr) Time differences in days [1] 5 NA 145 2 1836 0 > o <- options(width=60) # minor cheat for visual purposes > stroke <- transform(stroke, + end = pmin(died, as.Date("1996-1-1"), na.rm = T), + dead = !is.na(died) & died < as.Date("1996-1-1")) > head(stroke) sex died dstr age dgn coma diab minf han 1 1 1991-01-07 1991-01-02 76 INF 0 0 1 0 2 1 1991-01-03 58 INF 0 0 0 0 3 1 1991-06-02 1991-01-08 74 INF 0 0 1 1 4 0 1991-01-13 1991-01-11 77 ICH 0 1 0 1 5 0 1996-01-23 1991-01-13 76 INF 0 1 0 1 6 1 1991-01-13 1991-01-13 48 ICH 1 0 0 1 end dead 1 1991-01-07 TRUE 2 1996-01-01 FALSE 3 1991-06-02 TRUE 4 1991-01-13 TRUE 5 1996-01-01 FALSE 6 1991-01-13 TRUE > options(o); rm(o) > stroke <- transform(stroke, + obstime = as.numeric(end - dstr, units="days")/365.25) > rawstroke <- read.csv2( + system.file("rawdata","stroke.csv", package="ISwR"), + na.strings=".") > ix <- c("DSTR", "DIED") > rawstroke[ix] <- lapply(rawstroke[ix], + as.Date, format="%d.%m.%Y") > head(rawstroke) SEX DIED DSTR AGE DGN COMA DIAB MINF HAN 1 1 1991-01-07 1991-01-02 76 INF 0 0 1 0 2 1 1991-01-03 58 INF 0 0 0 0 3 1 1991-06-02 1991-01-08 74 INF 0 0 1 1 4 0 1991-01-13 1991-01-11 77 ICH 0 1 0 1 5 0 1996-01-23 1991-01-13 76 INF 0 1 0 1 6 1 1991-01-13 1991-01-13 48 ICH 1 0 0 1 > ix <- 6:9 > rawstroke[ix] <- lapply(rawstroke[ix], + factor, levels=0:1, labels=c("No","Yes")) > strokesub <- ISwR::stroke[1:10,2:3] > strokesub died dstr 1 1991-01-07 1991-01-02 2 1991-01-03 3 1991-06-02 1991-01-08 4 1991-01-13 1991-01-11 5 1991-01-13 6 1991-01-13 1991-01-13 7 1993-12-01 1991-01-14 8 1991-12-12 1991-01-14 9 1991-01-15 10 1993-11-10 1991-01-15 > strokesub <- transform(strokesub, + event = !is.na(died)) > strokesub <- transform(strokesub, + obstime = ifelse(event, died-dstr, as.Date("1996-1-1") - dstr)) > strokesub died dstr event obstime 1 1991-01-07 1991-01-02 TRUE 5 2 1991-01-03 FALSE 1824 3 1991-06-02 1991-01-08 TRUE 145 4 1991-01-13 1991-01-11 TRUE 2 5 1991-01-13 FALSE 1814 6 1991-01-13 1991-01-13 TRUE 0 7 1993-12-01 1991-01-14 TRUE 1052 8 1991-12-12 1991-01-14 TRUE 332 9 1991-01-15 FALSE 1812 10 1993-11-10 1991-01-15 TRUE 1030 > juulgrl <- subset(juul, sex==2, select=-c(testvol,sex)) > juulboy <- subset(juul, sex==1, select=-c(menarche,sex)) > juulgrl$sex <- factor("F") > juulgrl$testvol <- NA > juulboy$sex <- factor("M") > juulboy$menarche <- NA > juulall <- rbind(juulboy, juulgrl) > names(juulall) [1] "age" "igf1" "tanner" "testvol" "sex" [6] "menarche" > levels(juulall$sex) [1] "M" "F" > head(nickel) id icd exposure dob age1st agein ageout 1 3 0 5 1889.019 17.4808 45.2273 92.9808 2 4 162 5 1885.978 23.1864 48.2684 63.2712 3 6 163 10 1881.255 25.2452 52.9917 54.1644 4 8 527 9 1886.340 24.7206 47.9067 69.6794 5 9 150 0 1879.500 29.9575 54.7465 76.8442 6 10 163 2 1889.915 21.2877 44.3314 62.5413 > head(ewrates) year age lung nasal other 1 1931 10 1 0 1269 2 1931 15 2 0 2201 3 1931 20 6 0 3116 4 1931 25 14 0 3024 5 1931 30 30 1 3188 6 1931 35 68 1 4165 > nickel <- transform(nickel, + agr = trunc(agein/5)*5, + ygr = trunc((dob+agein-1)/5)*5+1) > mrg <- merge(nickel, ewrates, + by.x=c("agr","ygr"), by.y=c("age","year")) > head(mrg,10) agr ygr id icd exposure dob age1st agein ageout 1 20 1931 273 154 0 1909.500 14.6913 24.7465 55.9302 2 20 1931 213 162 0 1910.129 14.2018 24.1177 63.0493 3 20 1931 546 0 0 1909.500 14.4945 24.7465 72.5000 4 20 1931 574 491 0 1909.729 14.0356 24.5177 70.6592 5 20 1931 110 0 0 1909.247 14.0302 24.9999 72.7534 6 20 1931 325 434 0 1910.500 14.0737 23.7465 43.0343 7 25 1931 56 502 2 1904.500 18.2917 29.7465 51.5847 8 25 1931 690 420 0 1906.500 17.2206 27.7465 55.1219 9 25 1931 443 420 0 1905.326 14.5562 28.9204 65.7616 10 25 1931 137 465 0 1905.386 19.0808 28.8601 74.2794 lung nasal other 1 6 0 3116 2 6 0 3116 3 6 0 3116 4 6 0 3116 5 6 0 3116 6 6 0 3116 7 14 0 3024 8 14 0 3024 9 14 0 3024 10 14 0 3024 > head(alkfos) grp c0 c3 c6 c9 c12 c18 c24 1 1 142 140 159 162 152 175 148 2 1 120 126 120 146 134 119 116 3 1 175 161 168 164 213 194 221 4 1 234 203 174 197 289 174 189 5 1 94 107 146 124 128 98 114 6 1 128 97 113 203 NA NA NA > a2 <- alkfos > names(a2) <- sub("c", "c.", names(a2)) > names(a2) [1] "grp" "c.0" "c.3" "c.6" "c.9" "c.12" "c.18" "c.24" > a.long <- reshape(a2, varying=2:8, direction="long") > head(a.long) grp time c id 1.0 1 0 142 1 2.0 1 0 120 2 3.0 1 0 175 3 4.0 1 0 234 4 5.0 1 0 94 5 6.0 1 0 128 6 > tail(a.long) grp time c id 38.24 2 24 95 38 39.24 2 24 NA 39 40.24 2 24 192 40 41.24 2 24 94 41 42.24 2 24 194 42 43.24 2 24 129 43 > o <- with(a.long, order(id, time)) > head(a.long[o,], 10) grp time c id 1.0 1 0 142 1 1.3 1 3 140 1 1.6 1 6 159 1 1.9 1 9 162 1 1.12 1 12 152 1 1.18 1 18 175 1 1.24 1 24 148 1 2.0 1 0 120 2 2.3 1 3 126 2 2.6 1 6 120 2 > a.long2 <- na.omit(a.long) > attr(a.long2, "reshapeLong") <- NULL > a.wide2 <- reshape(a.long2, direction="wide", v.names="c", + idvar="id", timevar="time") > head(a.wide2) grp id c.0 c.3 c.6 c.9 c.12 c.18 c.24 1.0 1 1 142 140 159 162 152 175 148 2.0 1 2 120 126 120 146 134 119 116 3.0 1 3 175 161 168 164 213 194 221 4.0 1 4 234 203 174 197 289 174 189 5.0 1 5 94 107 146 124 128 98 114 6.0 1 6 128 97 113 203 NA NA NA > l <- split(a.long$c, a.long$id) > l[1:3] $`1` [1] 142 140 159 162 152 175 148 $`2` [1] 120 126 120 146 134 119 116 $`3` [1] 175 161 168 164 213 194 221 > l2 <- lapply(l, function(x) x / x[1]) > a.long$c.adj <- unsplit(l2, a.long$id) > subset(a.long, id==1) grp time c id c.adj 1.0 1 0 142 1 1.0000000 1.3 1 3 140 1 0.9859155 1.6 1 6 159 1 1.1197183 1.9 1 9 162 1 1.1408451 1.12 1 12 152 1 1.0704225 1.18 1 18 175 1 1.2323944 1.24 1 24 148 1 1.0422535 > a.long$c.adj <- ave(a.long$c, a.long$id, + FUN = function(x) x / x[1]) > all.equal(unsplit(l2, a.long$id), a.long$c.adj) [1] TRUE > l <- split(a.long, a.long$id) > l2 <- lapply(l, transform, c.adj = c / c[1]) > a.long2 <- unsplit(l2, a.long$id) > all.equal(a.long2$c.adj, a.long$c.adj) [1] TRUE > head(nickel) id icd exposure dob age1st agein ageout agr ygr 1 3 0 5 1889.019 17.4808 45.2273 92.9808 45 1931 2 4 162 5 1885.978 23.1864 48.2684 63.2712 45 1931 3 6 163 10 1881.255 25.2452 52.9917 54.1644 50 1931 4 8 527 9 1886.340 24.7206 47.9067 69.6794 45 1931 5 9 150 0 1879.500 29.9575 54.7465 76.8442 50 1931 6 10 163 2 1889.915 21.2877 44.3314 62.5413 40 1931 > entry <- pmax(nickel$agein, 60) > exit <- pmin(nickel$ageout, 65) > valid <- (entry < exit) > entry <- entry[valid] > exit <- exit[valid] > cens <- (nickel$ageout[valid] > 65) > nickel60 <- nickel[valid,] > nickel60$icd[cens] <- 0 > nickel60$agein <- entry > nickel60$ageout <- exit > nickel60$agr <- 60 > nickel60$ygr <- with(nickel60, trunc((dob+agein-1)/5)*5+1) > head(nickel60) id icd exposure dob age1st agein ageout agr ygr 1 3 0 5 1889.019 17.4808 60 65.0000 60 1946 2 4 162 5 1885.978 23.1864 60 63.2712 60 1941 4 8 0 9 1886.340 24.7206 60 65.0000 60 1946 5 9 0 0 1879.500 29.9575 60 65.0000 60 1936 6 10 163 2 1889.915 21.2877 60 62.5413 60 1946 7 15 334 0 1890.500 23.2836 60 62.0000 60 1946 > trim <- function(start) + { + end <- start + 5 + entry <- pmax(nickel$agein, start) + exit <- pmin(nickel$ageout, end) + valid <- (entry < exit) + cens <- (nickel$ageout[valid] > end) + result <- nickel[valid,] + result$icd[cens] <- 0 + result$agein <- entry[valid] + result$ageout <- exit[valid] + result$agr <- start + result$ygr <- with(result, trunc((dob+agein-1)/5)*5+1) + result + } > head(trim(60)) id icd exposure dob age1st agein ageout agr ygr 1 3 0 5 1889.019 17.4808 60 65.0000 60 1946 2 4 162 5 1885.978 23.1864 60 63.2712 60 1941 4 8 0 9 1886.340 24.7206 60 65.0000 60 1946 5 9 0 0 1879.500 29.9575 60 65.0000 60 1936 6 10 163 2 1889.915 21.2877 60 62.5413 60 1946 7 15 334 0 1890.500 23.2836 60 62.0000 60 1946 > nickel.expand <- do.call("rbind", lapply(seq(20,95,5), trim)) > head(nickel.expand) id icd exposure dob age1st agein ageout agr ygr 84 110 0 0 1909.247 14.0302 24.9999 25 20 1931 156 213 0 0 1910.129 14.2018 24.1177 25 20 1931 197 273 0 0 1909.500 14.6913 24.7465 25 20 1931 236 325 0 0 1910.500 14.0737 23.7465 25 20 1931 384 546 0 0 1909.500 14.4945 24.7465 25 20 1931 400 574 0 0 1909.729 14.0356 24.5177 25 20 1931 > subset(nickel.expand, id==4) id icd exposure dob age1st agein ageout agr ygr 2 4 0 5 1885.978 23.1864 48.2684 50.0000 45 1931 2100 4 0 5 1885.978 23.1864 50.0000 55.0000 50 1931 2102 4 0 5 1885.978 23.1864 55.0000 60.0000 55 1936 2104 4 162 5 1885.978 23.1864 60.0000 63.2712 60 1941 > nickel.expand <- merge(nickel.expand, ewrates, + by.x=c("agr","ygr"), by.y=c("age","year")) > head(nickel.expand) agr ygr id icd exposure dob age1st agein ageout lung 1 20 1931 325 0 0 1910.500 14.0737 23.7465 25 6 2 20 1931 273 0 0 1909.500 14.6913 24.7465 25 6 3 20 1931 110 0 0 1909.247 14.0302 24.9999 25 6 4 20 1931 574 0 0 1909.729 14.0356 24.5177 25 6 5 20 1931 213 0 0 1910.129 14.2018 24.1177 25 6 6 20 1931 546 0 0 1909.500 14.4945 24.7465 25 6 nasal other 1 0 3116 2 0 3116 3 0 3116 4 0 3116 5 0 3116 6 0 3116 > all.equal(nickel.expand, ISwR::nickel.expand) [1] TRUE > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > par(mex=0.5) > pairs(cystfibr, gap=0, cex.labels=0.9) > if (.make.epsf) dev.copy2eps(file="cyst-fibr.ps",height=4.5,width=4.49) > attach(cystfibr) The following object is masked from package:ISwR: tlc > if (exists("age",.GlobalEnv,inh=F)) rm(age) > if (exists("height",.GlobalEnv,inh=F)) rm(height) > if (exists("weight",.GlobalEnv,inh=F)) rm(weight) > summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc)) Call: lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + rv + frc + tlc) Residuals: Min 1Q Median 3Q Max -37.338 -11.532 1.081 13.386 33.405 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 176.0582 225.8912 0.779 0.448 age -2.5420 4.8017 -0.529 0.604 sex -3.7368 15.4598 -0.242 0.812 height -0.4463 0.9034 -0.494 0.628 weight 2.9928 2.0080 1.490 0.157 bmp -1.7449 1.1552 -1.510 0.152 fev1 1.0807 1.0809 1.000 0.333 rv 0.1970 0.1962 1.004 0.331 frc -0.3084 0.4924 -0.626 0.540 tlc 0.1886 0.4997 0.377 0.711 Residual standard error: 25.47 on 15 degrees of freedom Multiple R-squared: 0.6373, Adjusted R-squared: 0.4197 F-statistic: 2.929 on 9 and 15 DF, p-value: 0.03195 > 1-25.5^2/var(pemax) [1] 0.4183949 > anova(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc)) Analysis of Variance Table Response: pemax Df Sum Sq Mean Sq F value Pr(>F) age 1 10098.5 10098.5 15.5661 0.001296 ** sex 1 955.4 955.4 1.4727 0.243680 height 1 155.0 155.0 0.2389 0.632089 weight 1 632.3 632.3 0.9747 0.339170 bmp 1 2862.2 2862.2 4.4119 0.053010 . fev1 1 1549.1 1549.1 2.3878 0.143120 rv 1 561.9 561.9 0.8662 0.366757 frc 1 194.6 194.6 0.2999 0.592007 tlc 1 92.4 92.4 0.1424 0.711160 Residuals 15 9731.2 648.7 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > 955.4+155.0+632.3+2862.2+1549.1+561.9+194.6+92.4 [1] 7002.9 > 7002.9/8 [1] 875.3625 > 875.36/648.7 [1] 1.349407 > 1-pf(1.349407,8,15) [1] 0.2935148 > ## Not command output: > m1<-lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc) > m2<-lm(pemax~age) > anova(m1,m2) Analysis of Variance Table Model 1: pemax ~ age + sex + height + weight + bmp + fev1 + rv + frc + tlc Model 2: pemax ~ age Res.Df RSS Df Sum of Sq F Pr(>F) 1 15 9731.2 2 23 16734.2 -8 -7002.9 1.3493 0.2936 > summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc+tlc)) Call: lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + rv + frc + tlc) Residuals: Min 1Q Median 3Q Max -37.338 -11.532 1.081 13.386 33.405 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 176.0582 225.8912 0.779 0.448 age -2.5420 4.8017 -0.529 0.604 sex -3.7368 15.4598 -0.242 0.812 height -0.4463 0.9034 -0.494 0.628 weight 2.9928 2.0080 1.490 0.157 bmp -1.7449 1.1552 -1.510 0.152 fev1 1.0807 1.0809 1.000 0.333 rv 0.1970 0.1962 1.004 0.331 frc -0.3084 0.4924 -0.626 0.540 tlc 0.1886 0.4997 0.377 0.711 Residual standard error: 25.47 on 15 degrees of freedom Multiple R-squared: 0.6373, Adjusted R-squared: 0.4197 F-statistic: 2.929 on 9 and 15 DF, p-value: 0.03195 > summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv+frc)) Call: lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + rv + frc) Residuals: Min 1Q Median 3Q Max -38.072 -10.067 0.113 13.526 36.990 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 221.8055 185.4350 1.196 0.2491 age -3.1346 4.4144 -0.710 0.4879 sex -4.6933 14.8363 -0.316 0.7558 height -0.5428 0.8428 -0.644 0.5286 weight 3.3157 1.7672 1.876 0.0790 . bmp -1.9403 1.0047 -1.931 0.0714 . fev1 1.0183 1.0392 0.980 0.3417 rv 0.1857 0.1887 0.984 0.3396 frc -0.2605 0.4628 -0.563 0.5813 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 24.78 on 16 degrees of freedom Multiple R-squared: 0.6339, Adjusted R-squared: 0.4508 F-statistic: 3.463 on 8 and 16 DF, p-value: 0.01649 > summary(lm(pemax~age+sex+height+weight+bmp+fev1+rv)) Call: lm(formula = pemax ~ age + sex + height + weight + bmp + fev1 + rv) Residuals: Min 1Q Median 3Q Max -39.425 -12.391 3.834 14.797 36.693 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 166.71822 154.31294 1.080 0.2951 age -1.81783 3.66773 -0.496 0.6265 sex 0.10239 11.89990 0.009 0.9932 height -0.40981 0.79257 -0.517 0.6118 weight 2.87386 1.55120 1.853 0.0814 . bmp -1.94971 0.98415 -1.981 0.0640 . fev1 1.41526 0.74788 1.892 0.0756 . rv 0.09567 0.09798 0.976 0.3425 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 24.28 on 17 degrees of freedom Multiple R-squared: 0.6266, Adjusted R-squared: 0.4729 F-statistic: 4.076 on 7 and 17 DF, p-value: 0.008452 > summary(lm(pemax~age+sex+height+weight+bmp+fev1)) Call: lm(formula = pemax ~ age + sex + height + weight + bmp + fev1) Residuals: Min 1Q Median 3Q Max -43.238 -7.403 -0.081 15.534 36.028 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 260.6313 120.5215 2.163 0.0443 * age -2.9062 3.4898 -0.833 0.4159 sex -1.2115 11.8083 -0.103 0.9194 height -0.6067 0.7655 -0.793 0.4384 weight 3.3463 1.4719 2.273 0.0355 * bmp -2.3042 0.9136 -2.522 0.0213 * fev1 1.0274 0.6329 1.623 0.1219 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 24.24 on 18 degrees of freedom Multiple R-squared: 0.6057, Adjusted R-squared: 0.4743 F-statistic: 4.608 on 6 and 18 DF, p-value: 0.00529 > summary(lm(pemax~age+sex+height+weight+bmp)) Call: lm(formula = pemax ~ age + sex + height + weight + bmp) Residuals: Min 1Q Median 3Q Max -43.194 -9.412 -2.425 9.157 40.112 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 280.4482 124.9556 2.244 0.0369 * age -3.0750 3.6352 -0.846 0.4081 sex -11.5281 10.3720 -1.111 0.2802 height -0.6853 0.7962 -0.861 0.4001 weight 3.5546 1.5281 2.326 0.0312 * bmp -1.9613 0.9263 -2.117 0.0476 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 25.27 on 19 degrees of freedom Multiple R-squared: 0.548, Adjusted R-squared: 0.429 F-statistic: 4.606 on 5 and 19 DF, p-value: 0.006388 > summary(lm(pemax~age+height+weight+bmp)) Call: lm(formula = pemax ~ age + height + weight + bmp) Residuals: Min 1Q Median 3Q Max -41.501 -15.460 -2.838 11.082 42.991 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 274.5307 125.5745 2.186 0.0409 * age -3.0832 3.6566 -0.843 0.4091 height -0.6985 0.8008 -0.872 0.3934 weight 3.6338 1.5354 2.367 0.0282 * bmp -1.9621 0.9317 -2.106 0.0480 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 25.41 on 20 degrees of freedom Multiple R-squared: 0.5186, Adjusted R-squared: 0.4223 F-statistic: 5.386 on 4 and 20 DF, p-value: 0.004137 > summary(lm(pemax~height+weight+bmp)) Call: lm(formula = pemax ~ height + weight + bmp) Residuals: Min 1Q Median 3Q Max -41.794 -11.764 -1.218 13.202 43.631 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 245.3936 119.8927 2.047 0.0534 . height -0.8264 0.7808 -1.058 0.3019 weight 2.7717 1.1377 2.436 0.0238 * bmp -1.4876 0.7375 -2.017 0.0566 . --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 25.24 on 21 degrees of freedom Multiple R-squared: 0.5015, Adjusted R-squared: 0.4302 F-statistic: 7.041 on 3 and 21 DF, p-value: 0.00187 > summary(lm(pemax~weight+bmp)) Call: lm(formula = pemax ~ weight + bmp) Residuals: Min 1Q Median 3Q Max -42.924 -13.399 4.361 16.642 48.404 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 124.8297 37.4786 3.331 0.003033 ** weight 1.6403 0.3900 4.206 0.000365 *** bmp -1.0054 0.5814 -1.729 0.097797 . --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 25.31 on 22 degrees of freedom Multiple R-squared: 0.4749, Adjusted R-squared: 0.4271 F-statistic: 9.947 on 2 and 22 DF, p-value: 0.0008374 > summary(lm(pemax~weight)) Call: lm(formula = pemax ~ weight) Residuals: Min 1Q Median 3Q Max -44.30 -22.69 2.23 15.91 48.41 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 63.5456 12.7016 5.003 4.63e-05 *** weight 1.1867 0.3009 3.944 0.000646 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 26.38 on 23 degrees of freedom Multiple R-squared: 0.4035, Adjusted R-squared: 0.3776 F-statistic: 15.56 on 1 and 23 DF, p-value: 0.0006457 > summary(lm(pemax~age+weight+height)) Call: lm(formula = pemax ~ age + weight + height) Residuals: Min 1Q Median 3Q Max -43.675 -21.566 3.229 16.274 48.068 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 64.65555 82.40935 0.785 0.441 age 1.56755 3.14363 0.499 0.623 weight 0.86949 0.85922 1.012 0.323 height -0.07608 0.80278 -0.095 0.925 Residual standard error: 27.41 on 21 degrees of freedom Multiple R-squared: 0.4118, Adjusted R-squared: 0.3278 F-statistic: 4.901 on 3 and 21 DF, p-value: 0.009776 > summary(lm(pemax~age+height)) Call: lm(formula = pemax ~ age + height) Residuals: Min 1Q Median 3Q Max -44.817 -17.883 3.815 18.275 53.824 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 17.8600 68.2493 0.262 0.796 age 2.7178 2.9325 0.927 0.364 height 0.3397 0.6900 0.492 0.627 Residual standard error: 27.43 on 22 degrees of freedom Multiple R-squared: 0.3831, Adjusted R-squared: 0.3271 F-statistic: 6.832 on 2 and 22 DF, p-value: 0.00492 > summary(lm(pemax~age)) Call: lm(formula = pemax ~ age) Residuals: Min 1Q Median 3Q Max -48.666 -17.174 6.209 16.209 51.334 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 50.408 16.657 3.026 0.00601 ** age 4.055 1.088 3.726 0.00111 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 26.97 on 23 degrees of freedom Multiple R-squared: 0.3764, Adjusted R-squared: 0.3492 F-statistic: 13.88 on 1 and 23 DF, p-value: 0.001109 > summary(lm(pemax~height)) Call: lm(formula = pemax ~ height) Residuals: Min 1Q Median 3Q Max -43.876 -19.306 1.787 18.170 61.464 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -33.2757 40.0445 -0.831 0.41453 height 0.9319 0.2596 3.590 0.00155 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 27.34 on 23 degrees of freedom Multiple R-squared: 0.3591, Adjusted R-squared: 0.3312 F-statistic: 12.89 on 1 and 23 DF, p-value: 0.001549 > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > attach(cystfibr) The following object is masked from package:ISwR: tlc > summary(lm(pemax~height+I(height^2))) Call: lm(formula = pemax ~ height + I(height^2)) Residuals: Min 1Q Median 3Q Max -51.411 -14.932 -2.288 12.787 44.933 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 615.36248 240.95580 2.554 0.0181 * height -8.08324 3.32052 -2.434 0.0235 * I(height^2) 0.03064 0.01126 2.721 0.0125 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 24.18 on 22 degrees of freedom Multiple R-squared: 0.5205, Adjusted R-squared: 0.4769 F-statistic: 11.94 on 2 and 22 DF, p-value: 0.0003081 > pred.frame <- data.frame(height=seq(110,180,2)) > lm.pemax.hq <- lm(pemax~height+I(height^2)) > predict(lm.pemax.hq,interval="pred",newdata=pred.frame) fit lwr upr 1 96.90026 37.94461 155.8559 2 94.33611 36.82985 151.8424 3 92.01705 35.73077 148.3033 4 89.94307 34.66449 145.2217 5 88.11418 33.65007 142.5783 6 86.53038 32.70806 140.3527 7 85.19166 31.85979 138.5235 8 84.09803 31.12689 137.0692 9 83.24949 30.53064 135.9683 10 82.64604 30.09150 135.2006 11 82.28767 29.82873 134.7466 12 82.17439 29.76004 134.5887 13 82.30620 29.90143 134.7110 14 82.68309 30.26696 135.0992 15 83.30507 30.86877 135.7414 16 84.17214 31.71697 136.6273 17 85.28429 32.81967 137.7489 18 86.64153 34.18298 139.1001 19 88.24386 35.81103 140.6767 20 90.09127 37.70599 142.4766 21 92.18378 39.86804 144.4995 22 94.52137 42.29540 146.7473 23 97.10404 44.98431 149.2238 24 99.93180 47.92894 151.9347 25 103.00465 51.12145 154.8879 26 106.32259 54.55189 158.0933 27 109.88562 58.20818 161.5631 28 113.69373 62.07617 165.3113 29 117.74692 66.13965 169.3542 30 122.04521 70.38050 173.7099 31 126.58858 74.77888 178.3983 32 131.37704 79.31354 183.4405 33 136.41059 83.96224 188.8589 34 141.68922 88.70229 194.6761 35 147.21294 93.51117 200.9147 36 152.98174 98.36718 207.5963 > pp <- predict(lm.pemax.hq,newdata=pred.frame,interval="pred") > pc <- predict(lm.pemax.hq,newdata=pred.frame,interval="conf") > plot(height,pemax,ylim=c(0,200)) > matlines(pred.frame$height,pp,lty=c(1,2,2),col="black") > matlines(pred.frame$height,pc,lty=c(1,3,3),col="black") > if (.make.epsf) dev.copy2eps(file="pemax-height-quad.ps") > x <- runif(20) > y <- 2*x+rnorm(20,0,0.3) > summary(lm(y~x)) Call: lm(formula = y ~ x) Residuals: Min 1Q Median 3Q Max -0.35286 -0.19547 -0.02266 0.18194 0.56356 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.2224 0.1249 -1.781 0.0918 . x 2.4369 0.2039 11.953 5.38e-10 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.2502 on 18 degrees of freedom Multiple R-squared: 0.8881, Adjusted R-squared: 0.8819 F-statistic: 142.9 on 1 and 18 DF, p-value: 5.376e-10 > summary(lm(y~x-1)) Call: lm(formula = y ~ x - 1) Residuals: Min 1Q Median 3Q Max -0.45178 -0.20224 -0.01095 0.06261 0.47787 Coefficients: Estimate Std. Error t value Pr(>|t|) x 2.11227 0.09642 21.91 6.04e-15 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.2641 on 19 degrees of freedom Multiple R-squared: 0.9619, Adjusted R-squared: 0.9599 F-statistic: 480 on 1 and 19 DF, p-value: 6.039e-15 > anova(lm(y~x)) Analysis of Variance Table Response: y Df Sum Sq Mean Sq F value Pr(>F) x 1 8.945 8.9450 142.87 5.376e-10 *** Residuals 18 1.127 0.0626 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(lm(y~x-1)) Analysis of Variance Table Response: y Df Sum Sq Mean Sq F value Pr(>F) x 1 33.484 33.484 479.96 6.039e-15 *** Residuals 19 1.326 0.070 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > model.matrix(pemax~height+weight) (Intercept) height weight 1 1 109 13.1 2 1 112 12.9 3 1 124 14.1 4 1 125 16.2 5 1 127 21.5 6 1 130 17.5 7 1 139 30.7 8 1 150 28.4 9 1 146 25.1 10 1 155 31.5 11 1 156 39.9 12 1 153 42.1 13 1 160 45.6 14 1 158 51.2 15 1 160 35.9 16 1 153 34.8 17 1 174 44.7 18 1 176 60.1 19 1 171 42.6 20 1 156 37.2 21 1 174 54.6 22 1 178 64.0 23 1 180 73.8 24 1 175 51.1 25 1 179 71.5 attr(,"assign") [1] 0 1 2 > attach(red.cell.folate) > model.matrix(folate~ventilation) (Intercept) ventilationN2O+O2,op ventilationO2,24h 1 1 0 0 2 1 0 0 3 1 0 0 4 1 0 0 5 1 0 0 6 1 0 0 7 1 0 0 8 1 0 0 9 1 1 0 10 1 1 0 11 1 1 0 12 1 1 0 13 1 1 0 14 1 1 0 15 1 1 0 16 1 1 0 17 1 1 0 18 1 0 1 19 1 0 1 20 1 0 1 21 1 0 1 22 1 0 1 attr(,"assign") [1] 0 1 1 attr(,"contrasts") attr(,"contrasts")$ventilation [1] "contr.treatment" > attach(fake.trypsin) > summary(fake.trypsin) trypsin grp grpf Min. :-39.96 Min. :1.000 1: 32 1st Qu.:119.52 1st Qu.:2.000 2:137 Median :167.59 Median :2.000 3: 38 Mean :168.68 Mean :2.583 4: 44 3rd Qu.:213.98 3rd Qu.:3.000 5: 16 Max. :390.13 Max. :6.000 6: 4 > anova(lm(trypsin~grpf)) Analysis of Variance Table Response: trypsin Df Sum Sq Mean Sq F value Pr(>F) grpf 5 224103 44821 13.508 9.592e-12 *** Residuals 265 879272 3318 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(lm(trypsin~grp)) Analysis of Variance Table Response: trypsin Df Sum Sq Mean Sq F value Pr(>F) grp 1 206698 206698 62.009 8.451e-14 *** Residuals 269 896677 3333 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > model1 <- lm(trypsin~grp) > model2 <- lm(trypsin~grpf) > anova(model1,model2) Analysis of Variance Table Model 1: trypsin ~ grp Model 2: trypsin ~ grpf Res.Df RSS Df Sum of Sq F Pr(>F) 1 269 896677 2 265 879272 4 17405 1.3114 0.2661 > anova(lm(trypsin~grp+grpf)) Analysis of Variance Table Response: trypsin Df Sum Sq Mean Sq F value Pr(>F) grp 1 206698 206698 62.2959 7.833e-14 *** grpf 4 17405 4351 1.3114 0.2661 Residuals 265 879272 3318 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > xbar.trypsin <- tapply(trypsin,grpf,mean) > stripchart(trypsin~grp, method="jitter", + jitter=.1, vertical=T, pch=20) > lines(1:6,xbar.trypsin,type="b",pch=4,cex=2,lty=2) > abline(lm(trypsin~grp)) > if (.make.epsf) dev.copy2eps(file="trypsin.ps") > n <- c(32,137, 38,44,16,4) > tryp.mean <- c(128,152,194,207,215,218) > tryp.sd <-c(50.9,58.5,49.3,66.3,60,14) > gr<-1:6 > anova(lm(tryp.mean~gr+factor(gr),weights=n)) Analysis of Variance Table Response: tryp.mean Df Sum Sq Mean Sq F value Pr(>F) gr 1 206698 206698 NaN NaN factor(gr) 4 17405 4351 NaN NaN Residuals 0 0 NaN Warning message: In anova.lm(lm(tryp.mean ~ gr + factor(gr), weights = n)) : ANOVA F-tests on an essentially perfect fit are unreliable > sum(tryp.sd^2*(n-1)) [1] 879271.9 > sum(n-1) [1] 265 > sum(tryp.sd^2*(n-1))/sum(n-1) [1] 3318.007 > 206698/3318.007 # F statistic for gr [1] 62.29583 > 1-pf(206698/3318.007,1,265) # p-value [1] 7.838175e-14 > 4351/3318.007 # F statistic for factor(gr) [1] 1.311329 > 1-pf(4351/3318.007,4,265) # p-value [1] 0.2660733 > attach(coking) > anova(lm(time~width*temp)) Analysis of Variance Table Response: time Df Sum Sq Mean Sq F value Pr(>F) width 2 123.143 61.572 222.102 3.312e-10 *** temp 1 17.209 17.209 62.076 4.394e-06 *** width:temp 2 5.701 2.851 10.283 0.002504 ** Residuals 12 3.327 0.277 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > tapply(time,list(width,temp),mean) 1600 1900 4 3.066667 2.300000 8 7.166667 5.533333 12 10.800000 7.333333 > hellung glucose conc diameter 1 1 631000 21.2 2 1 592000 21.5 3 1 563000 21.3 4 1 475000 21.0 5 1 461000 21.5 6 1 416000 21.3 7 1 385000 20.3 8 1 321000 22.7 9 1 302000 21.5 10 1 199000 22.2 11 1 195000 22.6 12 1 165000 22.5 13 1 137000 23.3 14 1 130000 23.5 15 1 90000 23.4 16 1 66000 23.6 17 1 55000 23.9 18 1 52000 23.6 19 1 46000 23.6 20 1 41000 24.5 21 1 38000 24.0 22 1 34000 25.4 23 1 32000 24.6 24 1 30000 24.9 25 1 28000 24.9 26 1 21000 24.4 27 1 20000 25.9 28 1 16000 25.5 29 1 14500 25.7 30 1 13500 25.9 31 1 11600 25.7 32 1 11000 26.3 33 2 630000 19.2 34 2 501000 19.5 35 2 332000 19.8 36 2 285000 21.0 37 2 201000 21.0 38 2 175000 21.0 39 2 129000 21.3 40 2 111000 20.5 41 2 78000 22.6 42 2 70000 22.7 43 2 69000 22.2 44 2 62000 22.7 45 2 35000 24.0 46 2 27000 23.6 47 2 24000 23.5 48 2 22000 23.3 49 2 14000 24.4 50 2 13000 24.3 51 2 11000 24.2 > summary(hellung) glucose conc diameter Min. :1.000 Min. : 11000 Min. :19.20 1st Qu.:1.000 1st Qu.: 27500 1st Qu.:21.40 Median :1.000 Median : 69000 Median :23.30 Mean :1.373 Mean :164326 Mean :23.00 3rd Qu.:2.000 3rd Qu.:243000 3rd Qu.:24.35 Max. :2.000 Max. :631000 Max. :26.30 > hellung$glucose <- factor(hellung$glucose, labels=c("Yes","No")) > summary(hellung) glucose conc diameter Yes:32 Min. : 11000 Min. :19.20 No :19 1st Qu.: 27500 1st Qu.:21.40 Median : 69000 Median :23.30 Mean :164326 Mean :23.00 3rd Qu.:243000 3rd Qu.:24.35 Max. :631000 Max. :26.30 > attach(hellung) > plot(conc,diameter,pch=as.numeric(glucose)) > locator <- function(n)list(x=4e5,y=26) > legend(locator(n=1),legend=c("glucose","no glucose"),pch=1:2) > if (.make.epsf) dev.copy2eps(file="hellung-raw.ps") > plot(conc,diameter,pch=as.numeric(glucose),log="x") > if (.make.epsf) dev.copy2eps(file="hellung-logx.ps") > plot(conc,diameter,pch=as.numeric(glucose),log="xy") > tethym.gluc <- hellung[glucose=="Yes",] > tethym.nogluc <- hellung[glucose=="No",] > lm.nogluc <- lm(log10(diameter)~ log10(conc),data=tethym.nogluc) > lm.gluc <- lm(log10(diameter)~ log10(conc),data=tethym.gluc) > abline(lm.nogluc) > abline(lm.gluc) > if (.make.epsf) dev.copy2eps(file="hellung-loglog-lines.ps") > summary(lm(log10(diameter)~ log10(conc), data=tethym.gluc)) Call: lm(formula = log10(diameter) ~ log10(conc), data = tethym.gluc) Residuals: Min 1Q Median 3Q Max -0.0267219 -0.0043361 0.0006891 0.0035489 0.0176077 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.63134 0.01345 121.29 <2e-16 *** log10(conc) -0.05320 0.00272 -19.56 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.008779 on 30 degrees of freedom Multiple R-squared: 0.9273, Adjusted R-squared: 0.9248 F-statistic: 382.5 on 1 and 30 DF, p-value: < 2.2e-16 > summary(lm(log10(diameter)~ log10(conc), data=tethym.nogluc)) Call: lm(formula = log10(diameter) ~ log10(conc), data = tethym.nogluc) Residuals: Min 1Q Median 3Q Max -0.021919 -0.004977 0.000056 0.005597 0.016625 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.634761 0.020209 80.89 < 2e-16 *** log10(conc) -0.059677 0.004125 -14.47 5.48e-11 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.009532 on 17 degrees of freedom Multiple R-squared: 0.9249, Adjusted R-squared: 0.9205 F-statistic: 209.3 on 1 and 17 DF, p-value: 5.482e-11 > summary(lm(log10(diameter)~log10(conc)*glucose)) Call: lm(formula = log10(diameter) ~ log10(conc) * glucose) Residuals: Min 1Q Median 3Q Max -0.026722 -0.004888 0.000056 0.003767 0.017608 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.631344 0.013879 117.543 <2e-16 *** log10(conc) -0.053196 0.002807 -18.954 <2e-16 *** glucoseNo 0.003418 0.023695 0.144 0.886 log10(conc):glucoseNo -0.006480 0.004821 -1.344 0.185 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.009059 on 47 degrees of freedom Multiple R-squared: 0.9361, Adjusted R-squared: 0.9321 F-statistic: 229.6 on 3 and 47 DF, p-value: < 2.2e-16 > summary(lm(log10(diameter)~log10(conc)+glucose)) Call: lm(formula = log10(diameter) ~ log10(conc) + glucose) Residuals: Min 1Q Median 3Q Max -0.025243 -0.005733 -0.000195 0.004894 0.018914 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.642132 0.011417 143.83 < 2e-16 *** log10(conc) -0.055393 0.002301 -24.07 < 2e-16 *** glucoseNo -0.028238 0.002647 -10.67 2.93e-14 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.009134 on 48 degrees of freedom Multiple R-squared: 0.9337, Adjusted R-squared: 0.9309 F-statistic: 337.9 on 2 and 48 DF, p-value: < 2.2e-16 > var.test(lm.gluc,lm.nogluc) F test to compare two variances data: lm.gluc and lm.nogluc F = 0.84817, num df = 30, denom df = 17, p-value = 0.6731 alternative hypothesis: true ratio of variances is not equal to 1 95 percent confidence interval: 0.3389901 1.9129940 sample estimates: ratio of variances 0.8481674 > anova(lm(log10(diameter)~ log10(conc)*glucose)) Analysis of Variance Table Response: log10(diameter) Df Sum Sq Mean Sq F value Pr(>F) log10(conc) 1 0.046890 0.046890 571.436 < 2.2e-16 *** glucose 1 0.009494 0.009494 115.698 2.89e-14 *** log10(conc):glucose 1 0.000148 0.000148 1.807 0.1853 Residuals 47 0.003857 0.000082 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(lm(log10(diameter)~glucose+log10(conc))) Analysis of Variance Table Response: log10(diameter) Df Sum Sq Mean Sq F value Pr(>F) glucose 1 0.008033 0.008033 96.278 4.696e-13 *** log10(conc) 1 0.048351 0.048351 579.494 < 2.2e-16 *** Residuals 48 0.004005 0.000083 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(lm(log10(diameter)~log10(conc)+ glucose)) Analysis of Variance Table Response: log10(diameter) Df Sum Sq Mean Sq F value Pr(>F) log10(conc) 1 0.046890 0.046890 561.99 < 2.2e-16 *** glucose 1 0.009494 0.009494 113.78 2.932e-14 *** Residuals 48 0.004005 0.000083 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > t.test(log10(diameter)~glucose) Welch Two Sample t-test data: log10(diameter) by glucose t = 2.7037, df = 36.31, p-value = 0.01037 alternative hypothesis: true difference in means between group Yes and group No is not equal to 0 95 percent confidence interval: 0.006492194 0.045424241 sample estimates: mean in group Yes mean in group No 1.370046 1.344088 > attach(thuesen) > options(na.action="na.exclude") > lm.velo <- lm(short.velocity~blood.glucose) > opar <- par(mfrow=c(2,2), mex=0.6, mar=c(4,4,3,2)+.3) > plot(lm.velo, which=1:4) > par(opar) > if (.make.epsf) dev.copy2eps(file="regr-diag.ps") > opar <- par(mfrow=c(2,2), mex=0.6, mar=c(4,4,3,2)+.3) > plot(rstandard(lm.velo)) > plot(rstudent(lm.velo)) > plot(dffits(lm.velo),type="l") > matplot(dfbetas(lm.velo),type="l", col="black") > lines(sqrt(cooks.distance(lm.velo)), lwd=2) > par(opar) > if (.make.epsf) dev.copy2eps(file="regr-diag2.ps") > summary(lm(short.velocity~blood.glucose, subset=-13)) Call: lm(formula = short.velocity ~ blood.glucose, subset = -13) Residuals: Min 1Q Median 3Q Max -0.31346 -0.11136 -0.01247 0.06043 0.40794 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.18929 0.11061 10.752 9.22e-10 *** blood.glucose 0.01082 0.01029 1.052 0.305 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.193 on 20 degrees of freedom (1 observation deleted due to missingness) Multiple R-squared: 0.05241, Adjusted R-squared: 0.005026 F-statistic: 1.106 on 1 and 20 DF, p-value: 0.3055 > cookd <- cooks.distance(lm(pemax~height+weight)) > cookd <- cookd/max(cookd) > cook.colors <- gray(1-sqrt(cookd)) > plot(height,weight,bg=cook.colors,pch=21,cex=1.5) > points(height,weight,pch=1,cex=1.5) > if (.make.epsf) dev.copy2eps(file="cookd-cyst-fibr.ps") > attach(secher) > rst <- rstudent(lm(log10(bwt)~log10(ad)+log10(bpd))) > range(rst) [1] -3.707509 3.674050 > rst <- rst/3.71 > plot(ad,bpd,log="xy",bg=gray(1-abs(rst)), + pch=ifelse(rst>0,24,25), cex=1.5) > if (.make.epsf) dev.copy2eps(file="rstudent-secher.ps") > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > no.yes <- c("No","Yes") > smoking <- gl(2,1,8,no.yes) > obesity <- gl(2,2,8,no.yes) > snoring <- gl(2,4,8,no.yes) > n.tot <- c(60,17,8,2,187,85,51,23) > n.hyp <- c(5,2,1,0,35,13,15,8) > data.frame(smoking,obesity,snoring,n.tot,n.hyp) smoking obesity snoring n.tot n.hyp 1 No No No 60 5 2 Yes No No 17 2 3 No Yes No 8 1 4 Yes Yes No 2 0 5 No No Yes 187 35 6 Yes No Yes 85 13 7 No Yes Yes 51 15 8 Yes Yes Yes 23 8 > expand.grid(smoking=no.yes, obesity=no.yes, snoring=no.yes) smoking obesity snoring 1 No No No 2 Yes No No 3 No Yes No 4 Yes Yes No 5 No No Yes 6 Yes No Yes 7 No Yes Yes 8 Yes Yes Yes > hyp.tbl <- cbind(n.hyp,n.tot-n.hyp) > hyp.tbl n.hyp [1,] 5 55 [2,] 2 15 [3,] 1 7 [4,] 0 2 [5,] 35 152 [6,] 13 72 [7,] 15 36 [8,] 8 15 > glm(hyp.tbl~smoking+obesity+snoring,family=binomial("logit")) Call: glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial("logit")) Coefficients: (Intercept) smokingYes obesityYes snoringYes -2.37766 -0.06777 0.69531 0.87194 Degrees of Freedom: 7 Total (i.e. Null); 4 Residual Null Deviance: 14.13 Residual Deviance: 1.618 AIC: 34.54 > glm(hyp.tbl~smoking+obesity+snoring,binomial) Call: glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial) Coefficients: (Intercept) smokingYes obesityYes snoringYes -2.37766 -0.06777 0.69531 0.87194 Degrees of Freedom: 7 Total (i.e. Null); 4 Residual Null Deviance: 14.13 Residual Deviance: 1.618 AIC: 34.54 > prop.hyp <- n.hyp/n.tot > glm.hyp <- glm(prop.hyp~smoking+obesity+snoring, + binomial,weights=n.tot) > glm(hyp.tbl~smoking+obesity+snoring, binomial("logit")) Call: glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial("logit")) Coefficients: (Intercept) smokingYes obesityYes snoringYes -2.37766 -0.06777 0.69531 0.87194 Degrees of Freedom: 7 Total (i.e. Null); 4 Residual Null Deviance: 14.13 Residual Deviance: 1.618 AIC: 34.54 > glm.hyp <- glm(hyp.tbl~smoking+obesity+snoring,binomial) > summary(glm.hyp) Call: glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.37766 0.38018 -6.254 4e-10 *** smokingYes -0.06777 0.27812 -0.244 0.8075 obesityYes 0.69531 0.28509 2.439 0.0147 * snoringYes 0.87194 0.39757 2.193 0.0283 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 14.1259 on 7 degrees of freedom Residual deviance: 1.6184 on 4 degrees of freedom AIC: 34.537 Number of Fisher Scoring iterations: 4 > summary(glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = + binomial)) Call: glm(formula = hyp.tbl ~ smoking + obesity + snoring, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.37766 0.38018 -6.254 4e-10 *** smokingYes -0.06777 0.27812 -0.244 0.8075 obesityYes 0.69531 0.28509 2.439 0.0147 * snoringYes 0.87194 0.39757 2.193 0.0283 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 14.1259 on 7 degrees of freedom Residual deviance: 1.6184 on 4 degrees of freedom AIC: 34.537 Number of Fisher Scoring iterations: 4 > glm.hyp <- glm(hyp.tbl~obesity+snoring,binomial) > summary(glm.hyp) Call: glm(formula = hyp.tbl ~ obesity + snoring, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.3921 0.3757 -6.366 1.94e-10 *** obesityYes 0.6954 0.2851 2.440 0.0147 * snoringYes 0.8655 0.3967 2.182 0.0291 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 14.1259 on 7 degrees of freedom Residual deviance: 1.6781 on 5 degrees of freedom AIC: 32.597 Number of Fisher Scoring iterations: 4 > glm.hyp <- glm(hyp.tbl~smoking+obesity+snoring,binomial) > anova(glm.hyp, test="Chisq") Analysis of Deviance Table Model: binomial, link: logit Response: hyp.tbl Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 7 14.1259 smoking 1 0.0022 6 14.1237 0.962724 obesity 1 6.8274 5 7.2963 0.008977 ** snoring 1 5.6779 4 1.6184 0.017179 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > glm.hyp <- glm(hyp.tbl~snoring+obesity+smoking,binomial) > anova(glm.hyp, test="Chisq") Analysis of Deviance Table Model: binomial, link: logit Response: hyp.tbl Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 7 14.1259 snoring 1 6.7887 6 7.3372 0.009174 ** obesity 1 5.6591 5 1.6781 0.017365 * smoking 1 0.0597 4 1.6184 0.806938 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > glm.hyp <- glm(hyp.tbl~obesity+snoring,binomial) > anova(glm.hyp, test="Chisq") Analysis of Deviance Table Model: binomial, link: logit Response: hyp.tbl Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 7 14.1259 obesity 1 6.8260 6 7.2999 0.008984 ** snoring 1 5.6218 5 1.6781 0.017738 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > drop1(glm.hyp, test="Chisq") Single term deletions Model: hyp.tbl ~ obesity + snoring Df Deviance AIC LRT Pr(>Chi) 1.6781 32.597 obesity 1 7.3372 36.256 5.6591 0.01737 * snoring 1 7.2999 36.219 5.6218 0.01774 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > caesar.shoe <4 4 4.5 5 5.5 6+ Yes 5 7 6 7 8 10 No 17 28 36 41 46 140 > shoe.score <- 1:6 > shoe.score [1] 1 2 3 4 5 6 > summary(glm(t(caesar.shoe)~shoe.score,binomial)) Call: glm(formula = t(caesar.shoe) ~ shoe.score, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.87058 0.40506 -2.149 0.03161 * shoe.score -0.25971 0.09361 -2.774 0.00553 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 9.3442 on 5 degrees of freedom Residual deviance: 1.7845 on 4 degrees of freedom AIC: 27.616 Number of Fisher Scoring iterations: 4 > anova(glm(t(caesar.shoe)~shoe.score,binomial)) Analysis of Deviance Table Model: binomial, link: logit Response: t(caesar.shoe) Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 5 9.3442 shoe.score 1 7.5597 4 1.7845 0.005969 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > caesar.shoe.yes <- caesar.shoe["Yes",] > caesar.shoe.no <- caesar.shoe["No",] > caesar.shoe.total <- caesar.shoe.yes+caesar.shoe.no > prop.trend.test(caesar.shoe.yes,caesar.shoe.total) Chi-squared Test for Trend in Proportions data: caesar.shoe.yes out of caesar.shoe.total , using scores: 1 2 3 4 5 6 X-squared = 8.0237, df = 1, p-value = 0.004617 > prop.test(caesar.shoe.yes,caesar.shoe.total) 6-sample test for equality of proportions without continuity correction data: caesar.shoe.yes out of caesar.shoe.total X-squared = 9.2874, df = 5, p-value = 0.09814 alternative hypothesis: two.sided sample estimates: prop 1 prop 2 prop 3 prop 4 prop 5 prop 6 0.22727273 0.20000000 0.14285714 0.14583333 0.14814815 0.06666667 Warning message: In prop.test(caesar.shoe.yes, caesar.shoe.total) : Chi-squared approximation may be incorrect > confint(glm.hyp) Waiting for profiling to be done... 2.5 % 97.5 % (Intercept) -3.2101098 -1.718094 obesityYes 0.1254244 1.246789 snoringYes 0.1410076 1.715763 > confint.default(glm.hyp) 2.5 % 97.5 % (Intercept) -3.12852108 -1.655631 obesityYes 0.13670388 1.254134 snoringYes 0.08801498 1.642902 > library(MASS) > plot(profile(glm.hyp)) > if (.make.epsf) dev.copy2eps(file="profile-hyp.ps") > exp(cbind(OR=coef(glm.hyp), confint(glm.hyp))) Waiting for profiling to be done... OR 2.5 % 97.5 % (Intercept) 0.09143963 0.04035218 0.1794079 obesityYes 2.00454846 1.13362951 3.4791517 snoringYes 2.37609483 1.15143343 5.5609161 > juul$menarche <- factor(juul$menarche, labels=c("No","Yes")) > juul$tanner <- factor(juul$tanner) > juul.girl <- subset(juul,age>8 & age<20 & + complete.cases(menarche)) > attach(juul.girl) The following object is masked from package:MASS: menarche > summary(glm(menarche~age,binomial)) Call: glm(formula = menarche ~ age, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -20.0132 2.0284 -9.867 <2e-16 *** age 1.5173 0.1544 9.829 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 719.39 on 518 degrees of freedom Residual deviance: 200.66 on 517 degrees of freedom AIC: 204.66 Number of Fisher Scoring iterations: 7 > summary(glm(menarche~age+tanner,binomial)) Call: glm(formula = menarche ~ age + tanner, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -13.7758 2.7630 -4.986 6.17e-07 *** age 0.8603 0.2311 3.723 0.000197 *** tanner2 -0.5211 1.4846 -0.351 0.725609 tanner3 0.8264 1.2377 0.668 0.504313 tanner4 2.5645 1.2172 2.107 0.035132 * tanner5 5.1897 1.4140 3.670 0.000242 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 604.19 on 435 degrees of freedom Residual deviance: 106.60 on 430 degrees of freedom (83 observations deleted due to missingness) AIC: 118.6 Number of Fisher Scoring iterations: 8 > drop1(glm(menarche~age+tanner,binomial),test="Chisq") Single term deletions Model: menarche ~ age + tanner Df Deviance AIC LRT Pr(>Chi) 106.60 118.60 age 1 124.50 134.50 17.901 2.327e-05 *** tanner 4 161.88 165.88 55.282 2.835e-11 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > predict(glm.hyp) 1 2 3 4 5 6 -2.3920763 -2.3920763 -1.6966575 -1.6966575 -1.5266180 -1.5266180 7 8 -0.8311991 -0.8311991 > predict(glm.hyp, type="response") 1 2 3 4 5 6 0.08377892 0.08377892 0.15490233 0.15490233 0.17848906 0.17848906 7 8 0.30339158 0.30339158 > glm.menarche <- glm(menarche~age, binomial) > Age <- seq(8,20,.1) > newages <- data.frame(age=Age) > predicted.probability <- predict(glm.menarche, + newages,type="resp") > plot(predicted.probability ~ Age, type="l") > if (.make.epsf) dev.copy2eps(file="menarche-fit.ps") > fitted(glm.hyp) 1 2 3 4 5 6 0.08377892 0.08377892 0.15490233 0.15490233 0.17848906 0.17848906 7 8 0.30339158 0.30339158 > prop.hyp [1] 0.08333333 0.11764706 0.12500000 0.00000000 0.18716578 [6] 0.15294118 0.29411765 0.34782609 > fitted(glm.hyp)*n.tot 1 2 3 4 5 6 5.0267351 1.4242416 1.2392186 0.3098047 33.3774535 15.1715698 7 8 15.4729705 6.9780063 > data.frame(fit=fitted(glm.hyp)*n.tot,n.hyp,n.tot) fit n.hyp n.tot 1 5.0267351 5 60 2 1.4242416 2 17 3 1.2392186 1 8 4 0.3098047 0 2 5 33.3774535 35 187 6 15.1715698 13 85 7 15.4729705 15 51 8 6.9780063 8 23 > age.group <- cut(age,c(8,10,12,13,14,15,16,18,20)) > tb <- table(age.group,menarche) > tb menarche age.group No Yes (8,10] 100 0 (10,12] 97 4 (12,13] 32 21 (13,14] 22 20 (14,15] 5 36 (15,16] 0 31 (16,18] 0 105 (18,20] 0 46 > rel.freq <- prop.table(tb,1)[,2] > rel.freq (8,10] (10,12] (12,13] (13,14] (14,15] (15,16] 0.00000000 0.03960396 0.39622642 0.47619048 0.87804878 1.00000000 (16,18] (18,20] 1.00000000 1.00000000 > points(rel.freq ~ c(9,11,12.5,13.5,14.5,15.5,17,19),pch=5) > if (.make.epsf) dev.copy2eps(file="menarche-fit+obs.ps") > age.gr <- cut(age,c(8,12,13,14,20)) > summary(glm(menarche~age+age.gr,binomial)) Call: glm(formula = menarche ~ age + age.gr, family = binomial) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -21.5683 5.0645 -4.259 2.06e-05 *** age 1.6250 0.4416 3.680 0.000233 *** age.gr(12,13] 0.7296 0.7856 0.929 0.353024 age.gr(13,14] -0.5219 1.1184 -0.467 0.640765 age.gr(14,20] 0.2751 1.6065 0.171 0.864053 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 719.39 on 518 degrees of freedom Residual deviance: 192.61 on 514 degrees of freedom AIC: 202.61 Number of Fisher Scoring iterations: 8 > anova(glm(menarche~age+age.gr,binomial)) Analysis of Deviance Table Model: binomial, link: logit Response: menarche Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 518 719.39 age 1 518.73 517 200.66 < 2e-16 *** age.gr 3 8.06 514 192.61 0.04483 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > 1-pchisq(8.058,3) [1] 0.04482811 > anova(glm(menarche~age+I(age^2)+I(age^3)+age.gr,binomial)) Analysis of Deviance Table Model: binomial, link: logit Response: menarche Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 518 719.39 age 1 518.73 517 200.66 < 2.2e-16 *** I(age^2) 1 0.05 516 200.61 0.819906 I(age^3) 1 8.82 515 191.80 0.002984 ** age.gr 3 3.34 512 188.46 0.342745 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Warning messages: 1: glm.fit: fitted probabilities numerically 0 or 1 occurred 2: glm.fit: fitted probabilities numerically 0 or 1 occurred > glm.menarche <- glm(menarche~age+I(age^2)+I(age^3), binomial) Warning message: glm.fit: fitted probabilities numerically 0 or 1 occurred > predicted.probability <- + predict(glm.menarche, newages, type="resp") > plot(predicted.probability ~ Age, type="l") > points(rel.freq~c(9,11,12.5,13.5,14.5,15.5,17,19), pch=5) > if (.make.epsf) dev.copy2eps(file="menarche-cubic.ps") > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > library(survival) Attaching package: `survival' The following object is masked from `package:ISwR': lung > attach(melanom) > names(melanom) [1] "no" "status" "days" "ulc" "thick" "sex" > Surv(days, status==1) [1] 10+ 30+ 35+ 99+ 185 204 210 232 232+ 279 [11] 295 355+ 386 426 469 493+ 529 621 629 659 [21] 667 718 752 779 793 817 826+ 833 858 869 [31] 872 967 977 982 1041 1055 1062 1075 1156 1228 [41] 1252 1271 1312 1427+ 1435 1499+ 1506 1508+ 1510+ 1512+ [51] 1516 1525+ 1542+ 1548 1557+ 1560 1563+ 1584 1605+ 1621 [61] 1627+ 1634+ 1641+ 1641+ 1648+ 1652+ 1654+ 1654+ 1667 1678+ [71] 1685+ 1690 1710+ 1710+ 1726 1745+ 1762+ 1779+ 1787+ 1787+ [81] 1793+ 1804+ 1812+ 1836+ 1839+ 1839+ 1854+ 1856+ 1860+ 1864+ [91] 1899+ 1914+ 1919+ 1920+ 1927+ 1933 1942+ 1955+ 1956+ 1958+ [101] 1963+ 1970+ 2005+ 2007+ 2011+ 2024+ 2028+ 2038+ 2056+ 2059+ [111] 2061 2062 2075+ 2085+ 2102+ 2103 2104+ 2108 2112+ 2150+ [121] 2156+ 2165+ 2209+ 2227+ 2227+ 2256 2264+ 2339+ 2361+ 2387+ [131] 2388 2403+ 2426+ 2426+ 2431+ 2460+ 2467 2492+ 2493+ 2521+ [141] 2542+ 2559+ 2565 2570+ 2660+ 2666+ 2676+ 2738+ 2782 2787+ [151] 2984+ 3032+ 3040+ 3042 3067+ 3079+ 3101+ 3144+ 3152+ 3154+ [161] 3180+ 3182+ 3185+ 3199+ 3228+ 3229+ 3278+ 3297+ 3328+ 3330+ [171] 3338 3383+ 3384+ 3385+ 3388+ 3402+ 3441+ 3458+ 3459+ 3459+ [181] 3476+ 3523+ 3667+ 3695+ 3695+ 3776+ 3776+ 3830+ 3856+ 3872+ [191] 3909+ 3968+ 4001+ 4103+ 4119+ 4124+ 4207+ 4310+ 4390+ 4479+ [201] 4492+ 4668+ 4688+ 4926+ 5565+ > survfit(Surv(days,status==1)~1) Call: survfit(formula = Surv(days, status == 1) ~ 1) n events median 0.95LCL 0.95UCL [1,] 205 57 NA NA NA > surv.all <- survfit(Surv(days,status==1)~1) > summary(surv.all) Call: survfit(formula = Surv(days, status == 1) ~ 1) time n.risk n.event survival std.err lower 95% CI upper 95% CI 185 201 1 0.995 0.00496 0.985 1.000 204 200 1 0.990 0.00700 0.976 1.000 210 199 1 0.985 0.00855 0.968 1.000 232 198 1 0.980 0.00985 0.961 1.000 279 196 1 0.975 0.01100 0.954 0.997 295 195 1 0.970 0.01202 0.947 0.994 386 193 1 0.965 0.01297 0.940 0.991 426 192 1 0.960 0.01384 0.933 0.988 469 191 1 0.955 0.01465 0.927 0.984 529 189 1 0.950 0.01542 0.920 0.981 621 188 1 0.945 0.01615 0.914 0.977 629 187 1 0.940 0.01683 0.907 0.973 659 186 1 0.935 0.01748 0.901 0.970 667 185 1 0.930 0.01811 0.895 0.966 718 184 1 0.925 0.01870 0.889 0.962 752 183 1 0.920 0.01927 0.883 0.958 779 182 1 0.915 0.01981 0.877 0.954 793 181 1 0.910 0.02034 0.871 0.950 817 180 1 0.904 0.02084 0.865 0.946 833 178 1 0.899 0.02134 0.859 0.942 858 177 1 0.894 0.02181 0.853 0.938 869 176 1 0.889 0.02227 0.847 0.934 872 175 1 0.884 0.02272 0.841 0.930 967 174 1 0.879 0.02315 0.835 0.926 977 173 1 0.874 0.02357 0.829 0.921 982 172 1 0.869 0.02397 0.823 0.917 1041 171 1 0.864 0.02436 0.817 0.913 1055 170 1 0.859 0.02474 0.812 0.909 1062 169 1 0.854 0.02511 0.806 0.904 1075 168 1 0.849 0.02547 0.800 0.900 1156 167 1 0.844 0.02582 0.794 0.896 1228 166 1 0.838 0.02616 0.789 0.891 1252 165 1 0.833 0.02649 0.783 0.887 1271 164 1 0.828 0.02681 0.777 0.883 1312 163 1 0.823 0.02713 0.772 0.878 1435 161 1 0.818 0.02744 0.766 0.874 1506 159 1 0.813 0.02774 0.760 0.869 1516 155 1 0.808 0.02805 0.755 0.865 1548 152 1 0.802 0.02837 0.749 0.860 1560 150 1 0.797 0.02868 0.743 0.855 1584 148 1 0.792 0.02899 0.737 0.851 1621 146 1 0.786 0.02929 0.731 0.846 1667 137 1 0.780 0.02963 0.725 0.841 1690 134 1 0.775 0.02998 0.718 0.836 1726 131 1 0.769 0.03033 0.712 0.831 1933 110 1 0.762 0.03085 0.704 0.825 2061 95 1 0.754 0.03155 0.694 0.818 2062 94 1 0.746 0.03221 0.685 0.812 2103 90 1 0.737 0.03290 0.676 0.805 2108 88 1 0.729 0.03358 0.666 0.798 2256 80 1 0.720 0.03438 0.656 0.791 2388 75 1 0.710 0.03523 0.645 0.783 2467 69 1 0.700 0.03619 0.633 0.775 2565 63 1 0.689 0.03729 0.620 0.766 2782 57 1 0.677 0.03854 0.605 0.757 3042 52 1 0.664 0.03994 0.590 0.747 3338 35 1 0.645 0.04307 0.566 0.735 > plot(surv.all) > if (.make.epsf) dev.copy2eps(file="surv-all.ps") > surv.bysex <- survfit(Surv(days,status==1)~sex) > plot(surv.bysex) > if (.make.epsf) dev.copy2eps(file="surv-bysex.ps") > plot(surv.bysex, conf.int=T, col=c("black","gray")) > survdiff(Surv(days,status==1)~sex) Call: survdiff(formula = Surv(days, status == 1) ~ sex) N Observed Expected (O-E)^2/E (O-E)^2/V sex=1 126 28 37.1 2.25 6.47 sex=2 79 29 19.9 4.21 6.47 Chisq= 6.5 on 1 degrees of freedom, p= 0.01 > survdiff(Surv(days,status==1)~sex+strata(ulc)) Call: survdiff(formula = Surv(days, status == 1) ~ sex + strata(ulc)) N Observed Expected (O-E)^2/E (O-E)^2/V sex=1 126 28 34.7 1.28 3.31 sex=2 79 29 22.3 1.99 3.31 Chisq= 3.3 on 1 degrees of freedom, p= 0.07 > summary(coxph(Surv(days,status==1)~sex)) Call: coxph(formula = Surv(days, status == 1) ~ sex) n= 205, number of events= 57 coef exp(coef) se(coef) z Pr(>|z|) sex 0.6622 1.9390 0.2651 2.498 0.0125 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 exp(coef) exp(-coef) lower .95 upper .95 sex 1.939 0.5157 1.153 3.26 Concordance= 0.59 (se = 0.034 ) Likelihood ratio test= 6.15 on 1 df, p=0.01 Wald test = 6.24 on 1 df, p=0.01 Score (logrank) test = 6.47 on 1 df, p=0.01 > summary(coxph(Surv(days,status==1)~sex+log(thick)+strata(ulc))) Call: coxph(formula = Surv(days, status == 1) ~ sex + log(thick) + strata(ulc)) n= 205, number of events= 57 coef exp(coef) se(coef) z Pr(>|z|) sex 0.3600 1.4333 0.2702 1.332 0.1828 log(thick) 0.5599 1.7505 0.1784 3.139 0.0017 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 exp(coef) exp(-coef) lower .95 upper .95 sex 1.433 0.6977 0.844 2.434 log(thick) 1.750 0.5713 1.234 2.483 Concordance= 0.673 (se = 0.039 ) Likelihood ratio test= 13.3 on 2 df, p=0.001 Wald test = 12.88 on 2 df, p=0.002 Score (logrank) test = 12.98 on 2 df, p=0.002 > plot(survfit(coxph(Surv(days,status==1)~ + log(thick)+sex+strata(ulc)))) > if (.make.epsf) dev.copy2eps(file="surv-cox.ps") > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > names(eba1977) [1] "city" "age" "pop" "cases" > attach(eba1977) > fit <- glm(cases~city+age+offset(log(pop)), family=poisson) > summary(fit) Call: glm(formula = cases ~ city + age + offset(log(pop)), family = poisson) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -5.6321 0.2003 -28.125 < 2e-16 *** cityHorsens -0.3301 0.1815 -1.818 0.0690 . cityKolding -0.3715 0.1878 -1.978 0.0479 * cityVejle -0.2723 0.1879 -1.450 0.1472 age55-59 1.1010 0.2483 4.434 9.23e-06 *** age60-64 1.5186 0.2316 6.556 5.53e-11 *** age65-69 1.7677 0.2294 7.704 1.31e-14 *** age70-74 1.8569 0.2353 7.891 3.00e-15 *** age75+ 1.4197 0.2503 5.672 1.41e-08 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 129.908 on 23 degrees of freedom Residual deviance: 23.447 on 15 degrees of freedom AIC: 137.84 Number of Fisher Scoring iterations: 5 > min(fitted(fit)) [1] 6.731286 > pchisq(deviance(fit), df.residual(fit), lower=F) [1] 0.07509017 > pchisq(23.45, 15, lower=F) [1] 0.07504166 > drop1(fit, test="Chisq") Single term deletions Model: cases ~ city + age + offset(log(pop)) Df Deviance AIC LRT Pr(>Chi) 23.447 137.84 city 3 28.307 136.69 4.859 0.1824 age 5 126.515 230.90 103.068 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > fit2 <- glm(cases~(city=="Fredericia")+age+offset(log(pop)), + family=poisson) > anova(fit, fit2, test="Chisq") Analysis of Deviance Table Model 1: cases ~ city + age + offset(log(pop)) Model 2: cases ~ (city == "Fredericia") + age + offset(log(pop)) Resid. Df Resid. Dev Df Deviance Pr(>Chi) 1 15 23.448 2 17 23.700 -2 -0.2526 0.8814 > drop1(fit2, test="Chisq") Single term deletions Model: cases ~ (city == "Fredericia") + age + offset(log(pop)) Df Deviance AIC LRT Pr(>Chi) 23.700 134.09 city == "Fredericia" 1 28.307 136.69 4.606 0.03185 * age 5 127.117 227.50 103.417 < 2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > summary(fit2) Call: glm(formula = cases ~ (city == "Fredericia") + age + offset(log(pop)), family = poisson) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -5.9589 0.1809 -32.947 < 2e-16 *** city == "Fredericia"TRUE 0.3257 0.1481 2.200 0.0278 * age55-59 1.1013 0.2483 4.436 9.17e-06 *** age60-64 1.5203 0.2316 6.564 5.23e-11 *** age65-69 1.7687 0.2294 7.712 1.24e-14 *** age70-74 1.8592 0.2352 7.904 2.71e-15 *** age75+ 1.4212 0.2502 5.680 1.34e-08 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 129.91 on 23 degrees of freedom Residual deviance: 23.70 on 17 degrees of freedom AIC: 134.09 Number of Fisher Scoring iterations: 4 > cf <- coefficients(summary(fit2)) > est <- cf[,1] > s.e. <- cf[,2] > rr <- exp(cbind(est, est - s.e.*qnorm(.975), est + + s.e.*qnorm(.975) )) > colnames(rr) <- c("RateRatio", "CI.lo","CI.hi") > rr RateRatio CI.lo CI.hi (Intercept) 0.002582626 0.001811788 0.003681423 city == "Fredericia"TRUE 1.384992752 1.036131057 1.851314957 age55-59 3.008134852 1.849135187 4.893571521 age60-64 4.573665854 2.904833526 7.201245496 age65-69 5.863391064 3.740395488 9.191368903 age70-74 6.418715646 4.047748963 10.178474731 age75+ 4.142034525 2.536571645 6.763637070 > exp(cbind(coef(fit2), confint(fit2))) Waiting for profiling to be done... 2.5 % 97.5 % (Intercept) 0.002582626 0.001776585 0.003617461 city == "Fredericia"TRUE 1.384992752 1.029364554 1.841214351 age55-59 3.008134852 1.843566451 4.902377896 age60-64 4.573665854 2.912314202 7.248205161 age65-69 5.863391064 3.752718512 9.256967983 age70-74 6.418715646 4.053251339 10.234350656 age75+ 4.142034525 2.527103521 6.771889935 > head(nickel.expand) agr ygr id icd exposure dob age1st agein ageout lung 1 20 1931 325 0 0 1910.500 14.0737 23.7465 25 6 2 20 1931 273 0 0 1909.500 14.6913 24.7465 25 6 3 20 1931 110 0 0 1909.247 14.0302 24.9999 25 6 4 20 1931 574 0 0 1909.729 14.0356 24.5177 25 6 5 20 1931 213 0 0 1910.129 14.2018 24.1177 25 6 6 20 1931 546 0 0 1909.500 14.4945 24.7465 25 6 nasal other 1 0 3116 2 0 3116 3 0 3116 4 0 3116 5 0 3116 6 0 3116 > subset(nickel.expand, id==325) agr ygr id icd exposure dob age1st agein ageout lung 1 20 1931 325 0 0 1910.5 14.0737 23.7465 25.0000 6 13 25 1931 325 0 0 1910.5 14.0737 25.0000 30.0000 14 172 30 1936 325 0 0 1910.5 14.0737 30.0000 35.0000 30 391 35 1941 325 0 0 1910.5 14.0737 35.0000 40.0000 81 728 40 1946 325 434 0 1910.5 14.0737 40.0000 43.0343 236 nasal other 1 0 3116 13 0 3024 172 1 3188 391 1 3549 728 3 3643 > nickel.expand <- within(nickel.expand, + lung.cancer <- as.numeric(icd %in% c(162,163))) > attach(nickel.expand) The following object is masked from package:ISwR: lung > pyr <- tapply(ageout-agein,list(ygr,agr), sum) > print(round(pyr), na.print="-") 20 25 30 35 40 45 50 55 60 65 70 75 80 1931 3 86 268 446 446 431 455 323 159 23 4 - - 1936 - - 100 327 504 512 503 472 314 130 20 5 - 1941 - - 0 105 336 481 482 445 368 235 80 14 3 1946 - - - - 102 335 461 404 369 263 157 43 10 1951 - - - - - 95 299 415 334 277 181 92 31 1956 - - - - - - 89 252 364 257 181 101 52 1961 - - - - - - - 71 221 284 150 104 44 1966 - - - - - - - - 66 168 208 93 51 1971 - - - - - - - - - 57 133 131 54 1976 - - - - - - - - - - 31 68 53 > count <- tapply(lung.cancer, list(ygr, agr), sum) > print(count, na.print="-") 20 25 30 35 40 45 50 55 60 65 70 75 80 1931 0 0 0 0 0 4 2 2 2 0 0 - - 1936 - - 0 0 2 3 4 6 5 1 0 0 - 1941 - - 0 0 0 3 7 5 6 3 2 0 0 1946 - - - - 0 0 8 7 6 2 2 0 0 1951 - - - - - 0 3 3 9 6 1 0 0 1956 - - - - - - 0 4 3 6 1 2 0 1961 - - - - - - - 0 1 1 3 2 1 1966 - - - - - - - - 2 0 0 1 0 1971 - - - - - - - - - 0 0 2 2 1976 - - - - - - - - - - 0 1 1 > print(round(count/pyr*1000, 1), na.print="-") 20 25 30 35 40 45 50 55 60 65 70 75 80 1931 0 0 0 0 0 9.3 4.4 6.2 12.6 0.0 0.0 - - 1936 - - 0 0 4 5.9 7.9 12.7 15.9 7.7 0.0 0.0 - 1941 - - 0 0 0 6.2 14.5 11.2 16.3 12.8 25.0 0.0 0.0 1946 - - - - 0 0.0 17.4 17.3 16.3 7.6 12.8 0.0 0.0 1951 - - - - - 0.0 10.0 7.2 27.0 21.7 5.5 0.0 0.0 1956 - - - - - - 0.0 15.9 8.2 23.4 5.5 19.8 0.0 1961 - - - - - - - 0.0 4.5 3.5 19.9 19.3 22.8 1966 - - - - - - - - 30.1 0.0 0.0 10.7 0.0 1971 - - - - - - - - - 0.0 0.0 15.2 36.8 1976 - - - - - - - - - - 0.0 14.6 19.0 > expect.count <- tapply(lung/1e6*(ageout-agein), + list(ygr,agr), sum) > print(round(expect.count, 1), na.print="-") 20 25 30 35 40 45 50 55 60 65 70 75 80 1931 0 0 0 0 0.1 0.1 0.2 0.2 0.1 0.0 0.0 - - 1936 - - 0 0 0.1 0.1 0.2 0.3 0.2 0.1 0.0 0.0 - 1941 - - 0 0 0.1 0.2 0.3 0.4 0.4 0.2 0.1 0.0 0.0 1946 - - - - 0.0 0.2 0.4 0.5 0.6 0.5 0.2 0.0 0.0 1951 - - - - - 0.1 0.4 0.8 0.9 0.8 0.5 0.2 0.0 1956 - - - - - - 0.1 0.6 1.2 1.0 0.7 0.3 0.1 1961 - - - - - - - 0.2 0.8 1.4 0.7 0.5 0.1 1966 - - - - - - - - 0.2 0.9 1.3 0.6 0.2 1971 - - - - - - - - - 0.3 0.9 1.0 0.3 1976 - - - - - - - - - - 0.2 0.6 0.4 > expect.tot <- sum(lung/1e6*(ageout-agein)) > expect.tot [1] 24.19893 > count.tot <- sum(lung.cancer) > count.tot [1] 137 > count.tot/expect.tot [1] 5.661408 > fit <- glm(lung.cancer ~ 1, poisson, + offset = log((ageout-agein)*lung/1e6)) > summary(fit) Call: glm(formula = lung.cancer ~ 1, family = poisson, offset = log((ageout - agein) * lung/1e+06)) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) 1.73367 0.08544 20.29 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 1175.6 on 3723 degrees of freedom Residual deviance: 1175.6 on 3723 degrees of freedom AIC: 1451.6 Number of Fisher Scoring iterations: 7 > exp(coef(fit)) (Intercept) 5.661408 > tapply(lung.cancer, agr, sum) 20 25 30 35 40 45 50 55 60 65 70 75 80 0 0 0 0 2 10 24 27 34 19 9 8 4 > tapply(lung.cancer, ygr, sum) 1931 1936 1941 1946 1951 1956 1961 1966 1971 1976 10 21 26 25 22 16 8 3 4 2 > detach() > nickel.expand <- within(nickel.expand,{ + A <- factor(agr) + Y <- factor(ygr) + lv <- levels(A) + lv[1:6] <- "< 50" + lv[11:13] <- "70+" + levels(A) <- lv + lv <- levels(Y) + lv[7:10] <- "1961ff" + levels(Y) <- lv + rm(lv) + }) > attach(nickel.expand) The following object is masked from package:ISwR: lung > fit <- glm(lung.cancer ~ A + Y, poisson, + offset=log((ageout-agein)*lung/1e6)) > drop1(fit, test="Chisq") Single term deletions Model: lung.cancer ~ A + Y Df Deviance AIC LRT Pr(>Chi) 1069.7 1367.7 A 5 1073.8 1361.8 4.082 0.5376 Y 6 1118.5 1404.5 48.770 8.29e-09 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > fit <- glm(lung.cancer ~ Y - 1, poisson, + offset=log((ageout-agein)*lung/1e6)) > summary(fit) Call: glm(formula = lung.cancer ~ Y - 1, family = poisson, offset = log((ageout - agein) * lung/1e+06)) Coefficients: Estimate Std. Error z value Pr(>|z|) Y1931 2.6178 0.3162 8.279 < 2e-16 *** Y1936 3.0126 0.2182 13.805 < 2e-16 *** Y1941 2.7814 0.1961 14.182 < 2e-16 *** Y1946 2.2787 0.2000 11.394 < 2e-16 *** Y1951 1.8038 0.2132 8.461 < 2e-16 *** Y1956 1.3698 0.2500 5.479 4.27e-08 *** Y1961ff 0.4746 0.2425 1.957 0.0504 . --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 1425.0 on 3724 degrees of freedom Residual deviance: 1073.8 on 3717 degrees of freedom AIC: 1361.8 Number of Fisher Scoring iterations: 7 > round(exp(coef(fit)), 1) Y1931 Y1936 Y1941 Y1946 Y1951 Y1956 Y1961ff 13.7 20.3 16.1 9.8 6.1 3.9 1.6 > expect.count <- tapply(lung/1e6*(ageout-agein), Y, sum) > count <- tapply(lung.cancer, Y, sum) > cbind(count=count, expect=round(expect.count,1), + SMR= round(count/expect.count, 1)) count expect SMR 1931 10 0.7 13.7 1936 21 1.0 20.3 1941 26 1.6 16.1 1946 25 2.6 9.8 1951 22 3.6 6.1 1956 16 4.1 3.9 1961ff 17 10.6 1.6 > detach() > nickel.expand <- within(nickel.expand,{ + TFE <- cut(agein-age1st, c(0,20,30,40,50,100), right=F) + AFE <- cut(age1st, c(0, 20, 27.5, 35, 100), right=F) + YFE <- cut(dob + age1st, c(0, 1910, 1915, 1920, 1925),right=F) + EXP <- cut(exposure, c(0, 0.5, 4.5, 8.5, 12.5, 25), right=F) + }) > attach(nickel.expand) The following object is masked from package:ISwR: lung > fit <- glm(lung.cancer ~ TFE + AFE + YFE + EXP, poisson, + offset=log((ageout-agein)*lung/1e6)) > drop1(fit, test="Chisq") Single term deletions Model: lung.cancer ~ TFE + AFE + YFE + EXP Df Deviance AIC LRT Pr(>Chi) 1052.9 1356.9 TFE 4 1107.3 1403.3 54.425 4.287e-11 *** AFE 3 1055.0 1353.0 2.079 0.5560839 YFE 3 1058.1 1356.1 5.155 0.1608219 EXP 4 1072.0 1368.0 19.072 0.0007606 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > summary(fit) Call: glm(formula = lung.cancer ~ TFE + AFE + YFE + EXP, family = poisson, offset = log((ageout - agein) * lung/1e+06)) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) 2.36836 0.55716 4.251 2.13e-05 *** TFE[20,30) -0.21788 0.36022 -0.605 0.545284 TFE[30,40) -0.77184 0.36529 -2.113 0.034605 * TFE[40,50) -1.87583 0.41707 -4.498 6.87e-06 *** TFE[50,100) -2.22142 0.55068 -4.034 5.48e-05 *** AFE[20,27.5) 0.28506 0.31524 0.904 0.365868 AFE[27.5,35) 0.21961 0.34011 0.646 0.518462 AFE[35,100) -0.10818 0.44412 -0.244 0.807556 YFE[1910,1915) 0.04826 0.27193 0.177 0.859137 YFE[1915,1920) -0.56397 0.37585 -1.501 0.133483 YFE[1920,1925) -0.42520 0.30017 -1.417 0.156614 EXP[0.5,4.5) 0.58373 0.21200 2.753 0.005897 ** EXP[4.5,8.5) 1.03175 0.28364 3.638 0.000275 *** EXP[8.5,12.5) 1.18345 0.37406 3.164 0.001557 ** EXP[12.5,25) 1.28601 0.48236 2.666 0.007674 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 1175.6 on 3723 degrees of freedom Residual deviance: 1052.9 on 3709 degrees of freedom AIC: 1356.9 Number of Fisher Scoring iterations: 7 > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > ## IGNORE_RDIFF_BEGIN > t <- 0:10 > y <- rnorm(11, mean=5*exp(-t/5), sd=.2) > plot(y ~ t) > if (.make.epsf) dev.copy2eps(file="nonlin-sim.ps") > nlsout <- nls(y ~ A*exp(-alpha*t), start=c(A=2, alpha=0.05)) > summary(nlsout) Formula: y ~ A * exp(-alpha * t) Parameters: Estimate Std. Error t value Pr(>|t|) A 5.05784 0.17626 28.70 3.69e-10 *** alpha 0.20282 0.01227 16.53 4.85e-08 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.2283 on 9 degrees of freedom Number of iterations to convergence: 5 Achieved convergence tolerance: 1.091e-08 > attach(subset(juul2, age<20 & age>5 & sex==1)) > plot(height ~ age) > if (.make.epsf) dev.copy2eps(file="juul-a-h.ps") > plot(log(5.3-log(height))~age) Warning message: In log(5.3 - log(height)) : NaNs produced > if (.make.epsf) dev.copy2eps(file="gomp-dif.ps") > lm(log(5.3-log(height))~age) Call: lm(formula = log(5.3 - log(height)) ~ age) Coefficients: (Intercept) age 0.4200 -0.1538 Warning message: In log(5.3 - log(height)) : NaNs produced > fit <- nls(height~alpha*exp(-beta*exp(-gamma*age)), + start=c(alpha=exp(5.3),beta=exp(0.42),gamma=0.15)) > summary(fit) Formula: height ~ alpha * exp(-beta * exp(-gamma * age)) Parameters: Estimate Std. Error t value Pr(>|t|) alpha 2.428e+02 1.157e+01 20.978 <2e-16 *** beta 1.176e+00 1.892e-02 62.149 <2e-16 *** gamma 7.903e-02 8.569e-03 9.222 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 6.811 on 499 degrees of freedom Number of iterations to convergence: 8 Achieved convergence tolerance: 5.228e-06 (3 observations deleted due to missingness) > plot(age, height) > newage <- seq(5,20,length=500) > lines(newage, predict(fit,newdata=data.frame(age=newage)),lwd=2) > if (.make.epsf) dev.copy2eps(file="gompertz.ps") > > fit <- nls(log(height)~log(alpha*exp(-beta*exp(-gamma*age))), + start=c(alpha=exp(5.3),beta=exp(.12),gamma=.12)) > summary(fit) Formula: log(height) ~ log(alpha * exp(-beta * exp(-gamma * age))) Parameters: Estimate Std. Error t value Pr(>|t|) alpha 255.97696 15.03921 17.021 <2e-16 *** beta 1.18949 0.02971 40.033 <2e-16 *** gamma 0.07033 0.00811 8.673 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.04307 on 499 degrees of freedom Number of iterations to convergence: 8 Achieved convergence tolerance: 2.788e-06 (3 observations deleted due to missingness) > plot(age, log(height)) > lines(newage, predict(fit,newdata=data.frame(age=newage)),lwd=2) > if (.make.epsf) dev.copy2eps(file="log-gompertz.ps") > # count quoted in text, subtract 1 for SSD() > length(ls(pattern="SS.*", "package:stats"))-1 [1] 10 > summary(nls(height~SSgompertz(age, Asym, b2, b3))) Formula: height ~ SSgompertz(age, Asym, b2, b3) Parameters: Estimate Std. Error t value Pr(>|t|) Asym 2.428e+02 1.157e+01 20.98 <2e-16 *** b2 1.176e+00 1.892e-02 62.15 <2e-16 *** b3 9.240e-01 7.918e-03 116.69 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 6.811 on 499 degrees of freedom Number of iterations to convergence: 5 Achieved convergence tolerance: 1.411e-06 (3 observations deleted due to missingness) > cf <- coef(nls(height ~ SSgompertz(age, Asym, b2, b3))) > summary(nls(log(height) ~ + log(as.vector(SSgompertz(age,Asym, b2, b3))), + start=as.list(cf))) Formula: log(height) ~ log(as.vector(SSgompertz(age, Asym, b2, b3))) Parameters: Estimate Std. Error t value Pr(>|t|) Asym 2.560e+02 1.504e+01 17.02 <2e-16 *** b2 1.189e+00 2.971e-02 40.03 <2e-16 *** b3 9.321e-01 7.559e-03 123.31 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.04307 on 499 degrees of freedom Number of iterations to convergence: 5 Achieved convergence tolerance: 5.487e-06 (3 observations deleted due to missingness) > par(mfrow=c(3,1)) > plot(profile(fit)) > if (.make.epsf) dev.copy2eps(file="gomp-prof.ps") > confint(fit) Waiting for profiling to be done... 2.5% 97.5% alpha 233.5007561 294.76267648 beta 1.1442683 1.27411439 gamma 0.0550577 0.08574974 > confint.default(fit) 2.5 % 97.5 % alpha 226.50064942 285.4532632 beta 1.13125577 1.2477285 gamma 0.05443818 0.0862269 > ## IGNORE_RDIFF_END > rm(list=ls()) > while(search()[2] != "package:ISwR") detach() > > proc.time() user system elapsed 2.40 0.31 2.71