R Under development (unstable) (2024-10-30 r87277 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > library(optbin) > > > ### Helper Functions > > # Short for cat(sprintf(fmt, ...)). Automatically appends \n to fmt. > catf <- function(fmt, ...) { + cat(sprintf(sprintf("%s\n", fmt), ...)) + } > > # Nearly-equal floating point test of two numbers, within the absolute > # accuracy acc. > fpnear <- function(x1, x2, acc=1e-7) { + return(abs(x1-x2) < acc) + } > > > ### optbin Functionality Tests > > # Test the optimal binning result res with nbin bins against the expected > # endpoints expend, threshold values expthr, the total, best (M)SE expse, > # and the average value per bin expavg. Returns TRUE if all tests pass, > # FALSE if any fail. > verify_binning <- function(nbin, res, expend, expthr, expse, expavg) { + passed <- TRUE + + if (nbin != res$numbins) { + catf(" expected %d bins but result has %d", nbin, res$numbins) + passed <- FALSE + } + + for (b in 1:nbin) { + if (expend[b] != res$breaks[b]) { + catf(" expected bin %d endpoint at %d but got %d", + b, expend[b], res$breaks[b]) + passed <- FALSE + } + } + + for (b in 1:nbin) { + if (!fpnear(expthr[b], res$thr[b])) { + catf(" expected bin %d threshold %.3f but got %.3f", + b, expthr[b], res$thr[b]) + passed <- FALSE + } + } + + if (!fpnear(expse, res$minse)) { + catf(" expected bin %d metric %.5f but got %.5f", + b, expse, res$minse) + passed <- FALSE + } + + for (b in 1:nbin) { + if (!fpnear(expavg[b], res$binavg[b])) { + catf(" expected bin %d average %.3f but got %.3f", + b, expavg[b], res$binavg[b]) + passed <- FALSE + } + } + + return(passed) + } > > # Compare two binning results, resA and resB. Any difference causes the > # test to fail. Returns TRUE if the two are the same, FALSE if there > # are differences. > compare_binning <- function(resA, resB) { + passed <- TRUE + + if (resA$numbins != resB$numbins) { + catf(" binning A has %d bins, B %d", resA$numbins, resB$numbins) + passed <- FALSE + } + + if (resA$metric != resB$metric) { + catf(" binning A used %s, B %s", resA$metric, resB$metric) + passed <- FALSE + } + + if (!fpnear(resA$minse, resB$minse)) { + catf(" binning A best %s %.5f, B %.5f", toupper(resA$metric), + resA$minse, resB$minse) + passed <- FALSE + } + + for (b in 1:resA$numbins) { + if (!fpnear(resA$thr[b], resB$thr[b])) { + catf(" bin %d threshold in A is %.5f, B %.5f", + b, resA$thr[b], resB$thr[b]) + passed <- FALSE + } + } + + for (b in 1:resA$numbins) { + if (!fpnear(resA$binavg[b], resB$binavg[b])) { + catf(" bin %d average in A is %.5f, B %.5f", + b, resA$binavg[b], resB$binavg[b]) + passed <- FALSE + } + } + + for (b in 1:resA$numbins) { + if (!fpnear(resA$thr[b], resB$thr[b])) { + catf(" bin %d (M)SE in A is %.5f, B %.5f", + b, resA$binse[b], resB$binse[b]) + passed <- FALSE + } + } + + for (b in 1:resA$numbins) { + if (resA$breaks[b] != resB$breaks[b]) { + catf(" bin %d endpoint in A is %d, B %d", + b, resA$breaks[b], resB$breaks[b]) + passed <- FALSE + } + } + + return(passed) + } > > > # Test 1: Small linear sequence. Check with 2, 3, and 4 bins. > optbin_test1 <- function() { + passed <- TRUE + + data <- c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0) + + catf(" running test set 1 with cache") + + b <- optbin(data, 2) + if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { + passed <- FALSE + } + + b <- optbin(data, 3) + if (!verify_binning(3, b, c(3, 6, 10), c(3.0, 6.0, 10.0), + 9.0, c(2.0, 5.0, 8.5))) { + passed <- FALSE + } + + b <- optbin(data, 4) + if (!verify_binning(4, b, c(2, 4, 7, 10), c(2.0, 4.0, 7.0, 10.0), + 5.0, c(1.5, 3.5, 6.0, 9.0))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + catf(" running test set 1 without cache") + + b <- optbin(data, 2, max.cache=0) + if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { + passed <- FALSE + } + + b <- optbin(data, 3, max.cache=0) + if (!verify_binning(3, b, c(3, 6, 10), c(3.0, 6.0, 10.0), + 9.0, c(2.0, 5.0, 8.5))) { + passed <- FALSE + } + + b <- optbin(data, 4, max.cache=0) + if (!verify_binning(4, b, c(2, 4, 7, 10), c(2.0, 4.0, 7.0, 10.0), + 5.0, c(1.5, 3.5, 6.0, 9.0))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > # Test 2: Two normals, N(1, 0.25), N(2, 0.25). Check with 2, 3, and 4 bins. > optbin_test2 <- function() { + passed <- TRUE + + data <- c(0.86, 0.91, 0.92, 1.04, 1.23, 1.23, 1.30, 1.36, 1.79, 1.89, + 1.89, 2.34, 2.41, 2.58, 2.79, 2.93, 2.96, 3.21, 3.41, 3.55) + + catf(" running test set 2 with cache") + + b <- optbin(data, 2) + if (!verify_binning(2, b, c(11, 20), c(1.89, 3.55), + 2.956779797980, c(1.31090909, 2.90888888))) { + passed <- FALSE + } + + b <- optbin(data, 3) + if (!verify_binning(3, b, c(8, 14, 20), c(1.36, 2.58, 3.55), + 1.269070833333, c(1.10625, 2.15, 3.14166666))) { + passed <- FALSE + } + + b <- optbin(data, 4) + if (!verify_binning(4, b, c(8, 11, 17, 20), c(1.36, 1.89, 2.96, 3.55), + 0.686537500000, + c(1.10625, 1.85666666, 2.668333333, 3.39))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + catf(" running test set 2 without cache") + + b <- optbin(data, 2, max.cache=0) + if (!verify_binning(2, b, c(11, 20), c(1.89, 3.55), + 2.956779797980, c(1.31090909, 2.90888888))) { + passed <- FALSE + } + + b <- optbin(data, 3, max.cache=0) + if (!verify_binning(3, b, c(8, 14, 20), c(1.36, 2.58, 3.55), + 1.269070833333, c(1.10625, 2.15, 3.14166666))) { + passed <- FALSE + } + + b <- optbin(data, 4, max.cache=0) + if (!verify_binning(4, b, c(8, 11, 17, 20), c(1.36, 1.89, 2.96, 3.55), + 0.686537500000, + c(1.10625, 1.85666666, 2.668333333, 3.39))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > # Test 3: Four constant bins. Check with 2 t/m 6 bins (tie resolution 5, 6). > optbin_test3 <- function() { + passed <- TRUE + + data <- c(1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, + 5.0, 5.0, 5.0, 5.0, 7.0, 7.0, 7.0, 7.0) + + catf(" running test set 3 with cache") + + b <- optbin(data, 2) + if (!verify_binning(2, b, c(8, 16), c(3.0, 7.0), 16.0, c(2.0, 6.0))) { + passed <- FALSE + } + + b <- optbin(data, 3) + if (!verify_binning(3, b, c(4, 8, 16), c(1.0, 3.0, 7.0), 8.0, + c(1.0, 3.0, 6.0))) { + passed <- FALSE + } + + b <- optbin(data, 4) + if (!verify_binning(4, b, c(4, 8, 12, 16), c(1.0, 3.0, 5.0, 7.0), + 0.0, c(1.0, 3.0, 5.0, 7.0))) { + passed <- FALSE + } + + b <- optbin(data, 5) + if (!verify_binning(5, b, c(2, 4, 8, 12, 16), c(1.0, 1.0, 3.0, 5.0, 7.0), + 0.0, c(1.0, 1.0, 3.0, 5.0, 7.0))) { + passed <- FALSE + } + + b <- optbin(data, 6) + if (!verify_binning(6, b, c(2, 4, 6, 8, 12, 16), + c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0), 0.0, + c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + catf(" running test set 3 without cache") + + b <- optbin(data, 2, max.cache=0) + if (!verify_binning(2, b, c(8, 16), c(3.0, 7.0), 16.0, c(2.0, 6.0))) { + passed <- FALSE + } + + b <- optbin(data, 3, max.cache=0) + if (!verify_binning(3, b, c(4, 8, 16), c(1.0, 3.0, 7.0), 8.0, + c(1.0, 3.0, 6.0))) { + passed <- FALSE + } + + b <- optbin(data, 4, max.cache=0) + if (!verify_binning(4, b, c(4, 8, 12, 16), c(1.0, 3.0, 5.0, 7.0), + 0.0, c(1.0, 3.0, 5.0, 7.0))) { + passed <- FALSE + } + + b <- optbin(data, 5, max.cache=0) + if (!verify_binning(5, b, c(2, 4, 8, 12, 16), c(1.0, 1.0, 3.0, 5.0, 7.0), + 0.0, c(1.0, 1.0, 3.0, 5.0, 7.0))) { + passed <- FALSE + } + + b <- optbin(data, 6, max.cache=0) + if (!verify_binning(6, b, c(2, 4, 6, 8, 12, 16), + c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0), 0.0, + c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > # Test 4: Data where SE and MSE bins differ. Check with 2 and 3 bins. > optbin_test4 <- function() { + passed <- TRUE + + data <- c(1.00, 1.25, 1.50, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, + 5.00, 8.00, 8.25, 8.50) + + catf(" running test set 2 with cache and using SE") + + b <- optbin(data, 2, metric='se') + if (!verify_binning(2, b, c(9, 13), c(3.0, 8.5), + 11.796875000000, c(2.0, 7.4375))) { + passed <- FALSE + } + + b <- optbin(data, 3, metric='se') + if (!verify_binning(3, b, c(8, 10, 13), c(2.75, 5.0, 8.50), + 4.750000000000, c(1.875, 4.0, 8.25))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + catf(" running test set 2 with cache and using MSE") + + b <- optbin(data, 2, metric='mse') + if (!verify_binning(2, b, c(10, 13), c(5.0, 8.5), + 1.226666666667, c(2.30, 8.25))) { + passed <- FALSE + } + + b <- optbin(data, 3, metric='mse') + if (!verify_binning(3, b, c(3, 10, 13), c(1.5, 5.0, 8.5), + 1.08333333333, c(1.25, 2.75, 8.25))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + catf(" running test set 2 without cache and using SE") + + b <- optbin(data, 2, metric='se', max.cache=0) + if (!verify_binning(2, b, c(9, 13), c(3.0, 8.5), + 11.796875000000, c(2.0, 7.4375))) { + passed <- FALSE + } + + b <- optbin(data, 3, metric='se', max.cache=0) + if (!verify_binning(3, b, c(8, 10, 13), c(2.75, 5.0, 8.50), + 4.750000000000, c(1.875, 4.0, 8.25))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + catf(" running test set 2 without cache and using MSE") + + b <- optbin(data, 2, metric='mse', max.cache=0) + if (!verify_binning(2, b, c(10, 13), c(5.0, 8.5), + 1.226666666667, c(2.30, 8.25))) { + passed <- FALSE + } + + b <- optbin(data, 3, metric='mse', max.cache=0) + if (!verify_binning(3, b, c(3, 10, 13), c(1.5, 5.0, 8.5), + 1.08333333333, c(1.25, 2.75, 8.25))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > # Fill a large array with random data and compare the results with and > # without caching. > optbin_caching <- function() { + passed <- TRUE + + catf(" running random data with and without caching using SE") + + # 2520 is a multiple of 5, 7, 8, and 9 so all bins fall on endpoints. + set.seed(17) + data <- runif(2520) + + for (b in 2:10) { + bcache <- optbin(data, b) + bnocache <- optbin(data, b, max.cache=0) + if (!compare_binning(bcache, bnocache)) { + passed <- FALSE + } + } + + if (passed) { + catf(" tests passed") + } + catf(" running random data with and without caching using MSE") + + for (b in 2:10) { + bcache <- optbin(data, b, metric='mse') + bnocache <- optbin(data, b, metric='mse', max.cache=0) + if (!compare_binning(bcache, bnocache)) { + passed <- FALSE + } + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > > ### assign.optbin Functionality Tests > > # Check that the count of values assigned to each bin match the bin size > # in points. Returns TRUE if all tests pass, FALSE if any fail. > assign_test1 <- function() { + passed <- TRUE + + catf(" running assign.optbin tests") + + set.seed(19) + data <- rnorm(1000, mean=2, sd=0.5) + + binned <- optbin(data, 10) + cnt <- table(assign.optbin(data, binned)) + stpt <- 1 + for (b in 1:10) { + expcnt <- binned$breaks[b] - stpt + 1 + if (cnt[b] != expcnt) { + catf(" %d values assigned to bin %d but range has %d", + cnt[b], b, expcnt) + passed <- FALSE + } + stpt <- binned$breaks[b] + 1 + } + + if (passed) { + catf(" tests passed") + } + catf(" running assign.optbin tests without extending upper bound") + + dmax <- max(data) + data2 <- c(data, dmax+1:10) + cnt <- table(assign.optbin(data2, binned)) + stpt <- 1 + for (b in 1:10) { + expcnt <- binned$breaks[b] - stpt + 1 + if (cnt[b] != expcnt) { + catf(" %d values assigned to bin %d but range has %d", + cnt[b], b, expcnt) + passed <- FALSE + } + stpt <- binned$breaks[b] + 1 + } + + if (passed) { + catf(" tests passed") + } + catf(" running assign.optbin tests while extending upper bound") + + cnt <- table(assign.optbin(data2, binned, extend.upper=TRUE)) + stpt <- 1 + for (b in 1:9) { + expcnt <- binned$breaks[b] - stpt + 1 + if (cnt[b] != expcnt) { + catf(" %d values assigned to bin %d but range has %d", + cnt[b], b, expcnt) + passed <- FALSE + } + stpt <- binned$breaks[b] + 1 + } + + b <- 10 + expcnt <- binned$breaks[b] - stpt + 1 + if (cnt[b] != (expcnt + 10)) { + catf(" %d values assigned to bin %d but expected %d", + cnt[b], b, expcnt+10) + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > > ### Break The Part tests > > # Various error checks on input data to optbin. Returns TRUE if all tests > # pass, FALSE if any fail. > BTP_optbin <- function() { + passed <- TRUE + + catf(" running optbin BTP tests") + + b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), -1), + error=function(e) { NULL }) + if (!is.null(b)) { + catf(" negative number of bins did not raise error") + passed <- FALSE + } + + b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), 1), + error=function(e) { NULL }) + if (!is.null(b)) { + catf(" too few bins did not raise error") + passed <- FALSE + } + + b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), 2), + error=function(e) { NULL }) + if (!is.null(b)) { + catf(" non-numeric entry in data without na.rm did not raise error") + passed <- FALSE + } + + b <- tryCatch(b <- optbin(c(1, 2, 3, 4, 5, "blah", 6), 2, na.rm=TRUE), + error=function(e) { NULL }) + if (is.null(b)) { + catf(" non-numeric entry in data with na.rm did raise error") + passed <- FALSE + } else if (!verify_binning(2, b, c(3,6), c(3.0, 6.0), 4, c(2.0, 5.0))) { + passed <- FALSE + } + + b <- tryCatch(optbin(c(1, 2, 3, 4, 5, 6), 10), + error=function(e) { NULL }) + if (!is.null(b)) { + catf(" too few values for number of bins did not raise error") + passed <- FALSE + } + + # This is test1 data. + data <- c(1.0, 2.0, 3.0, 4.0, NA, 5.0, 6.0, 7.0, NA, 8.0, 9.0, NA, 10.0) + + b <- tryCatch(optbin(data, 2), + error=function(e) { NULL }) + if (!is.null(b)) { + catf(" NAs in data without removing did not raise error") + passed <- FALSE + } + + b <- tryCatch(optbin(data, 2, na.rm=TRUE), + error=function(e) { NULL }) + if (is.null(b)) { + catf(" removed NAs still caused error") + passed <- FALSE + } else if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { + passed <- FALSE + } + + b <- tryCatch(optbin(reverse(data), 2, is.sorted=TRUE), + error=function(e) { NULL }) + if (!is.null(b)) { + catf(" unsorted data after setting is.sorted TRUE did not raise error") + passed <- FALSE + } + + b <- tryCatch(optbin(data, 2, is.sorted=T, na.rm=TRUE), + error=function(e) { NULL }) + if (is.null(b)) { + catf(" pre-sorted data raised error") + passed <- FALSE + } else if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > # Error checks on the arguments to plot.optbin. Returns TRUE if all tests > # pass, FALSE if any fail. > BTP_plot <- function() { + passed <- TRUE + + catf(" running plot.optbin BTP tests") + + data <- 1:10 + b <- optbin(data, 2) + + # These generate an error because we use them in the basic plot. + # plot doesn't return anything so our error function will generate an NA. + res <- tryCatch(plot(b2, ann=TRUE), error=function(e) { NA }) + if (!is.na(res)) { + catf(" passing ann to plot.optbin did not raise error") + passed <- FALSE + } + + res <- tryCatch(plot(b2, xaxt='s'), error=function(e) { NA }) + if (!is.na(res)) { + catf(" passing xaxt to plot.optbin did not raise error") + passed <- FALSE + } + + if (passed) { + catf(" tests passed") + } + return(passed) + } > > > > ### Top Level > > allpass <- TRUE > cat('\nStarting optbin verification\n') Starting optbin verification > > if (!optbin_test1()) { + allpass <- FALSE + } running test set 1 with cache tests passed running test set 1 without cache tests passed > if (!optbin_test2()) { + allpass <- FALSE + } running test set 2 with cache tests passed running test set 2 without cache tests passed > if (!optbin_test3()) { + allpass <- FALSE + } running test set 3 with cache tests passed running test set 3 without cache tests passed > if (!optbin_test4()) { + allpass <- FALSE + } running test set 2 with cache and using SE tests passed running test set 2 with cache and using MSE tests passed running test set 2 without cache and using SE tests passed running test set 2 without cache and using MSE tests passed > if (!optbin_caching()) { + allpass <- FALSE + } running random data with and without caching using SE tests passed running random data with and without caching using MSE tests passed > if (!suppressWarnings(BTP_optbin())) { + allpass <- FALSE + } running optbin BTP tests tests passed > > if (!assign_test1()) { + allpass <- FALSE + } running assign.optbin tests tests passed running assign.optbin tests without extending upper bound tests passed running assign.optbin tests while extending upper bound tests passed > > if (!BTP_plot()) { + allpass <- FALSE + } running plot.optbin BTP tests tests passed > > > if (allpass) { + cat('\nAll tests PASSED\n\n') + } else { + cat('\nSome test FAILED\n\n') + # This should cause the build check to fail. + error("test failure") + } All tests PASSED > > > proc.time() user system elapsed 2.42 0.48 2.89