R Under development (unstable) (2024-11-03 r87286 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(sfsmisc) > > ###--------------- "Iris Example for ever" ---------------------------- > data(iris) > cl.true <- as.integer(iris[,"Species"]) > n <- length(cl.true) > stopifnot(cl.true == rep(1:3, each = 50)) > m.iris <- data.matrix(iris[, 1:4]) > > .proctime00 <- proc.time() > > ## Self Prediction: Not too good (2+4 and 3+3 misclass.) > table(diagDA(m.iris, cl.true, m.iris), cl.true) cl.true 1 2 3 1 50 0 0 2 0 48 4 3 0 2 46 > table(diagDA(m.iris, cl.true, m.iris, pool=FALSE), cl.true) cl.true 1 2 3 1 50 0 0 2 0 47 3 3 0 3 47 > > ## Crossvalidation: The same example as knn() & knn1() from "class" : > data(iris3) > train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3]) > test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3]) > cl <- rep(1:3, each = 25) > > pcl <- diagDA(train, cl, test) > table(pcl, cl)## 0 + 1 + 2 misclassified cl pcl 1 2 3 1 25 0 0 2 0 24 2 3 0 1 23 > ## knn ( k=1) has 0 + 1 + 3 > ## knn ( *, k=3) has 0 + 2 + 3 ==> ``diagDA() is best ..'' > > stopifnot(pcl == diagDA(train,cl, test, pool = FALSE)) > # i.e. quadratic identical here > > ### Test 'NA' in predict dat.fr > RNGversion("3.5.0")# -- so w/ sample() still stays unchanged: Warning message: In RNGkind("Mersenne-Twister", "Inversion", "Rounding") : non-uniform 'Rounding' sampler used > set.seed(753) > itr <- sample(n, 0.9 * n) > lrn <- m.iris[ itr,] > tst <- m.iris[-itr,] > dd <- dDA(lrn, cl.true[itr]) > pd0 <- predict(dd, tst) > > i.NA <- c(3:5,7,11) > j.NA <- sample(1:ncol(tst), size=length(i.NA), replace=TRUE) > tst[cbind(i.NA, j.NA)] <- NA > pdd <- predict(dd, tst) > pcl <- diagDA(lrn, cl.true[itr], tst) > stopifnot(length(pdd) == nrow(tst), + identical(pdd, pcl), + pdd[-i.NA] == pd0[-i.NA], + which(is.na(pdd)) == i.NA) > > ## Now do some (randomized) CV : > ## for each observation, count how often it's misclassified > M <- 200 > set.seed(234) > missCl <- integer(n) > for(m in 1:M) { + itr <- sample(n, 0.9 * n) + lrn <- m.iris[ itr,] + tst <- m.iris[-itr,] + pcl <- diagDA(lrn, cl.true[itr], tst) + stopifnot(pcl == predict(dDA(lrn, cl.true[itr]), tst)) + missCl <- missCl + as.integer(pcl != cl.true[ - itr]) + } > missCl ; mean(missCl) / M [1] 0 0 2 1 2 3 7 15 12 13 5 16 18 14 5 0 0 2 1 2 3 7 15 12 13 [26] 5 16 18 14 5 0 0 2 1 2 3 7 15 12 13 5 16 18 14 5 0 0 2 1 2 [51] 3 7 15 12 13 5 16 18 14 5 0 0 2 1 2 3 7 15 12 13 5 16 18 14 5 [76] 0 0 2 1 2 3 7 15 12 13 5 16 18 14 5 0 0 2 1 2 3 7 15 12 13 [101] 5 16 18 14 5 0 0 2 1 2 3 7 15 12 13 5 16 18 14 5 0 0 2 1 2 [126] 3 7 15 12 13 5 16 18 14 5 0 0 2 1 2 3 7 15 12 13 5 16 18 14 5 [1] 0.03766667 > > ## The "same" with 'pool=FALSE' : > missCl <- integer(n) > for(m in 1:M) { + itr <- sample(n, 0.9 * n) + lrn <- m.iris[ itr,] + tst <- m.iris[-itr,] + pcl <- diagDA(lrn, cl.true[itr], tst, pool=FALSE) + stopifnot(pcl == predict(dDA(lrn, cl.true[itr], pool=FALSE), tst)) + missCl <- missCl + as.integer(pcl != cl.true[ - itr]) + } > missCl ; mean(missCl) / M ## here somewhat worse than linear [1] 1 2 1 3 9 11 13 8 14 12 12 11 14 15 7 1 2 1 3 9 11 13 8 14 12 [26] 12 11 14 15 7 1 2 1 3 9 11 13 8 14 12 12 11 14 15 7 1 2 1 3 9 [51] 11 13 8 14 12 12 11 14 15 7 1 2 1 3 9 11 13 8 14 12 12 11 14 15 7 [76] 1 2 1 3 9 11 13 8 14 12 12 11 14 15 7 1 2 1 3 9 11 13 8 14 12 [101] 12 11 14 15 7 1 2 1 3 9 11 13 8 14 12 12 11 14 15 7 1 2 1 3 9 [126] 11 13 8 14 12 12 11 14 15 7 1 2 1 3 9 11 13 8 14 12 12 11 14 15 7 [1] 0.04433333 > > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 0.42 0 0.42 NA NA > > > proc.time() user system elapsed 0.57 0.01 0.59