R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > fit1 <- lm(skips ~ Opening + Solder + Mask + PadType + Panel, + data=solder) > y1 <- yates(fit1, "Opening") > > temp <- levels(solder$Opening) > tpred <- matrix(0., nrow(solder), 3) > for (i in 1:3) { + tdata <- solder + tdata$Opening <- temp[i] + tpred[,i] <- predict(fit1, newdata=tdata) + } > all.equal(y1$estimate[,"pmm"], colMeans(tpred)) [1] TRUE > > # This fit is deficient: there are no Opening=L and Mask=A6 obs > # The MPV for Mask=A6 and Opening L will therefore be NA, as well > # as for all levels of Solder, but we can compute the others. > # Solder will be NA for all levels > fit2 <- lm(skips ~ Opening*Mask + Solder, + data=solder) > y2a <- yates(fit2, "Mask", population="factorial") > y2b <- yates(fit2, "Opening", population="factorial") > y2c <- yates(fit2, "Solder", population="factorial") > > # The predict.lm function gives correct predictions for estimable > # functions (all but L,A6) and nonsense for others. It knows that > # some are not estimable due to the NA coefficients, but not which ones, > # so always prints a warning. Hence the suppressWarnings call. > tdata <- do.call(expand.grid, fit2$xlevels[1:3]) > temp <- levels(solder$Mask) > tpreda <- matrix(0., nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq(along=temp)) { + tdata$Mask <- temp[i] + suppressWarnings(tpreda[,i] <- predict(fit2, newdata=tdata)) + } > tpreda[,"A6"] <- NA # the A6 estimate is deficient > aeq(y2a$estimate[,"pmm"], colMeans(tpreda)) [1] TRUE > > tdata <- do.call(expand.grid, fit2$xlevels[1:3]) > temp <- levels(solder$Opening) > tpredb <- matrix(0., nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq(along=temp)) { + tdata$Opening <- temp[i] + suppressWarnings(tpredb[,i] <- predict(fit2, newdata=tdata)) + } > tpredb[,"L"] <- NA > aeq(y2b$estimate[,"pmm"], colMeans(tpredb)) [1] TRUE > > # Solder should be all NA > all(is.na(y2c$estimate[,"pmm"])) [1] TRUE > > # Tests for Solder are defined for a non-factorial population, however. > # the [] below retains the factor structure of the variable, where the > # runs above did not. R gets prediction correct both ways. > y2d <- yates(fit2, ~Solder) > temp <- levels(solder$Solder) > tdata <- solder > tpredd <- matrix(0, nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq(along=temp)) { + tdata$Solder[] <- temp[i] + suppressWarnings(tpredd[,i] <- predict(fit2, newdata=tdata)) + } > aeq(y2d$estimate$pmm, colMeans(tpredd)) [1] TRUE > > # > # Verify that the result is unchanged by how dummies are coded > # The coefs move all over the map, but predictions are unchanged > fit3 <- lm(skips ~ C(Opening, contr.helmert)*Mask + C(Solder, contr.SAS), + data=solder) > y3a <- yates(fit3, ~Mask, population='yates') > equal <- c("estimate", "test", "mvar") > all.equal(y3a[equal], y2a[equal]) [1] TRUE > > tdata <- do.call(expand.grid, fit2$xlevels[1:3]) # use orignal variable names > temp <- levels(solder$Mask) > cpred <- matrix(0., nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq(along=temp)) { + tdata$Mask <- temp[i] + suppressWarnings(cpred[,i] <- predict(fit3, newdata=tdata)) + } > aeq(cpred[, temp!="A6"], tpreda[, temp!= "A6"]) # same predictions [1] TRUE > all.equal(y3a$estimate, y2a$estimate) [1] TRUE > > y3b <- yates(fit3, ~Opening, population='yates') > # column names will differ > all.equal(y3b$estimate, y2b$estimate, check.attributes=FALSE) [1] TRUE > > y3d <- yates(fit3, ~Solder) > for (i in 1:3) { + print(all.equal(y3d[[i]], y2d[[i]], check.attributes=FALSE)) + } [1] TRUE [1] TRUE [1] TRUE > > # Reprise this with a character variable in the model > sdata <- solder > sdata$Mask <- as.character(sdata$Mask) > fit4 <- lm(skips ~ Opening*Mask + Solder, data=sdata) > y4a <- yates(fit4, ~ Mask, population= "yates") > y4b <- yates(fit4, ~ Opening, population= "yates") > y4d <- yates(fit4, ~ Solder) > equal <- c("estimate", "tests", "mvar", "cmat") > all.equal(y2a[equal], y4a[equal]) # the "call" component differs [1] TRUE > all.equal(y2b[equal], y4b[equal]) [1] TRUE > all.equal(y2d[equal], y4d[equal]) [1] TRUE > > proc.time() user system elapsed 1.28 0.12 1.39