library(optbin) ### Helper Functions # Short for cat(sprintf(fmt, ...)). Automatically appends \n to fmt. catf <- function(fmt, ...) { cat(sprintf(sprintf("%s\n", fmt), ...)) } # Nearly-equal floating point test of two numbers, within the absolute # accuracy acc. fpnear <- function(x1, x2, acc=1e-7) { return(abs(x1-x2) < acc) } ### optbin Functionality Tests # Test the optimal binning result res with nbin bins against the expected # endpoints expend, threshold values expthr, the total, best (M)SE expse, # and the average value per bin expavg. Returns TRUE if all tests pass, # FALSE if any fail. verify_binning <- function(nbin, res, expend, expthr, expse, expavg) { passed <- TRUE if (nbin != res$numbins) { catf(" expected %d bins but result has %d", nbin, res$numbins) passed <- FALSE } for (b in 1:nbin) { if (expend[b] != res$breaks[b]) { catf(" expected bin %d endpoint at %d but got %d", b, expend[b], res$breaks[b]) passed <- FALSE } } for (b in 1:nbin) { if (!fpnear(expthr[b], res$thr[b])) { catf(" expected bin %d threshold %.3f but got %.3f", b, expthr[b], res$thr[b]) passed <- FALSE } } if (!fpnear(expse, res$minse)) { catf(" expected bin %d metric %.5f but got %.5f", b, expse, res$minse) passed <- FALSE } for (b in 1:nbin) { if (!fpnear(expavg[b], res$binavg[b])) { catf(" expected bin %d average %.3f but got %.3f", b, expavg[b], res$binavg[b]) passed <- FALSE } } return(passed) } # Compare two binning results, resA and resB. Any difference causes the # test to fail. Returns TRUE if the two are the same, FALSE if there # are differences. compare_binning <- function(resA, resB) { passed <- TRUE if (resA$numbins != resB$numbins) { catf(" binning A has %d bins, B %d", resA$numbins, resB$numbins) passed <- FALSE } if (resA$metric != resB$metric) { catf(" binning A used %s, B %s", resA$metric, resB$metric) passed <- FALSE } if (!fpnear(resA$minse, resB$minse)) { catf(" binning A best %s %.5f, B %.5f", toupper(resA$metric), resA$minse, resB$minse) passed <- FALSE } for (b in 1:resA$numbins) { if (!fpnear(resA$thr[b], resB$thr[b])) { catf(" bin %d threshold in A is %.5f, B %.5f", b, resA$thr[b], resB$thr[b]) passed <- FALSE } } for (b in 1:resA$numbins) { if (!fpnear(resA$binavg[b], resB$binavg[b])) { catf(" bin %d average in A is %.5f, B %.5f", b, resA$binavg[b], resB$binavg[b]) passed <- FALSE } } for (b in 1:resA$numbins) { if (!fpnear(resA$thr[b], resB$thr[b])) { catf(" bin %d (M)SE in A is %.5f, B %.5f", b, resA$binse[b], resB$binse[b]) passed <- FALSE } } for (b in 1:resA$numbins) { if (resA$breaks[b] != resB$breaks[b]) { catf(" bin %d endpoint in A is %d, B %d", b, resA$breaks[b], resB$breaks[b]) passed <- FALSE } } return(passed) } # Test 1: Small linear sequence. Check with 2, 3, and 4 bins. optbin_test1 <- function() { passed <- TRUE data <- c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0) catf(" running test set 1 with cache") b <- optbin(data, 2) if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { passed <- FALSE } b <- optbin(data, 3) if (!verify_binning(3, b, c(3, 6, 10), c(3.0, 6.0, 10.0), 9.0, c(2.0, 5.0, 8.5))) { passed <- FALSE } b <- optbin(data, 4) if (!verify_binning(4, b, c(2, 4, 7, 10), c(2.0, 4.0, 7.0, 10.0), 5.0, c(1.5, 3.5, 6.0, 9.0))) { passed <- FALSE } if (passed) { catf(" tests passed") } catf(" running test set 1 without cache") b <- optbin(data, 2, max.cache=0) if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { passed <- FALSE } b <- optbin(data, 3, max.cache=0) if (!verify_binning(3, b, c(3, 6, 10), c(3.0, 6.0, 10.0), 9.0, c(2.0, 5.0, 8.5))) { passed <- FALSE } b <- optbin(data, 4, max.cache=0) if (!verify_binning(4, b, c(2, 4, 7, 10), c(2.0, 4.0, 7.0, 10.0), 5.0, c(1.5, 3.5, 6.0, 9.0))) { passed <- FALSE } if (passed) { catf(" tests passed") } return(passed) } # Test 2: Two normals, N(1, 0.25), N(2, 0.25). Check with 2, 3, and 4 bins. optbin_test2 <- function() { passed <- TRUE data <- c(0.86, 0.91, 0.92, 1.04, 1.23, 1.23, 1.30, 1.36, 1.79, 1.89, 1.89, 2.34, 2.41, 2.58, 2.79, 2.93, 2.96, 3.21, 3.41, 3.55) catf(" running test set 2 with cache") b <- optbin(data, 2) if (!verify_binning(2, b, c(11, 20), c(1.89, 3.55), 2.956779797980, c(1.31090909, 2.90888888))) { passed <- FALSE } b <- optbin(data, 3) if (!verify_binning(3, b, c(8, 14, 20), c(1.36, 2.58, 3.55), 1.269070833333, c(1.10625, 2.15, 3.14166666))) { passed <- FALSE } b <- optbin(data, 4) if (!verify_binning(4, b, c(8, 11, 17, 20), c(1.36, 1.89, 2.96, 3.55), 0.686537500000, c(1.10625, 1.85666666, 2.668333333, 3.39))) { passed <- FALSE } if (passed) { catf(" tests passed") } catf(" running test set 2 without cache") b <- optbin(data, 2, max.cache=0) if (!verify_binning(2, b, c(11, 20), c(1.89, 3.55), 2.956779797980, c(1.31090909, 2.90888888))) { passed <- FALSE } b <- optbin(data, 3, max.cache=0) if (!verify_binning(3, b, c(8, 14, 20), c(1.36, 2.58, 3.55), 1.269070833333, c(1.10625, 2.15, 3.14166666))) { passed <- FALSE } b <- optbin(data, 4, max.cache=0) if (!verify_binning(4, b, c(8, 11, 17, 20), c(1.36, 1.89, 2.96, 3.55), 0.686537500000, c(1.10625, 1.85666666, 2.668333333, 3.39))) { passed <- FALSE } if (passed) { catf(" tests passed") } return(passed) } # Test 3: Four constant bins. Check with 2 t/m 6 bins (tie resolution 5, 6). optbin_test3 <- function() { passed <- TRUE data <- c(1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 5.0, 5.0, 5.0, 5.0, 7.0, 7.0, 7.0, 7.0) catf(" running test set 3 with cache") b <- optbin(data, 2) if (!verify_binning(2, b, c(8, 16), c(3.0, 7.0), 16.0, c(2.0, 6.0))) { passed <- FALSE } b <- optbin(data, 3) if (!verify_binning(3, b, c(4, 8, 16), c(1.0, 3.0, 7.0), 8.0, c(1.0, 3.0, 6.0))) { passed <- FALSE } b <- optbin(data, 4) if (!verify_binning(4, b, c(4, 8, 12, 16), c(1.0, 3.0, 5.0, 7.0), 0.0, c(1.0, 3.0, 5.0, 7.0))) { passed <- FALSE } b <- optbin(data, 5) if (!verify_binning(5, b, c(2, 4, 8, 12, 16), c(1.0, 1.0, 3.0, 5.0, 7.0), 0.0, c(1.0, 1.0, 3.0, 5.0, 7.0))) { passed <- FALSE } b <- optbin(data, 6) if (!verify_binning(6, b, c(2, 4, 6, 8, 12, 16), c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0), 0.0, c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0))) { passed <- FALSE } if (passed) { catf(" tests passed") } catf(" running test set 3 without cache") b <- optbin(data, 2, max.cache=0) if (!verify_binning(2, b, c(8, 16), c(3.0, 7.0), 16.0, c(2.0, 6.0))) { passed <- FALSE } b <- optbin(data, 3, max.cache=0) if (!verify_binning(3, b, c(4, 8, 16), c(1.0, 3.0, 7.0), 8.0, c(1.0, 3.0, 6.0))) { passed <- FALSE } b <- optbin(data, 4, max.cache=0) if (!verify_binning(4, b, c(4, 8, 12, 16), c(1.0, 3.0, 5.0, 7.0), 0.0, c(1.0, 3.0, 5.0, 7.0))) { passed <- FALSE } b <- optbin(data, 5, max.cache=0) if (!verify_binning(5, b, c(2, 4, 8, 12, 16), c(1.0, 1.0, 3.0, 5.0, 7.0), 0.0, c(1.0, 1.0, 3.0, 5.0, 7.0))) { passed <- FALSE } b <- optbin(data, 6, max.cache=0) if (!verify_binning(6, b, c(2, 4, 6, 8, 12, 16), c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0), 0.0, c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0))) { passed <- FALSE } if (passed) { catf(" tests passed") } return(passed) } # Test 4: Data where SE and MSE bins differ. Check with 2 and 3 bins. optbin_test4 <- function() { passed <- TRUE data <- c(1.00, 1.25, 1.50, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 5.00, 8.00, 8.25, 8.50) catf(" running test set 2 with cache and using SE") b <- optbin(data, 2, metric='se') if (!verify_binning(2, b, c(9, 13), c(3.0, 8.5), 11.796875000000, c(2.0, 7.4375))) { passed <- FALSE } b <- optbin(data, 3, metric='se') if (!verify_binning(3, b, c(8, 10, 13), c(2.75, 5.0, 8.50), 4.750000000000, c(1.875, 4.0, 8.25))) { passed <- FALSE } if (passed) { catf(" tests passed") } catf(" running test set 2 with cache and using MSE") b <- optbin(data, 2, metric='mse') if (!verify_binning(2, b, c(10, 13), c(5.0, 8.5), 1.226666666667, c(2.30, 8.25))) { passed <- FALSE } b <- optbin(data, 3, metric='mse') if (!verify_binning(3, b, c(3, 10, 13), c(1.5, 5.0, 8.5), 1.08333333333, c(1.25, 2.75, 8.25))) { passed <- FALSE } if (passed) { catf(" tests passed") } catf(" running test set 2 without cache and using SE") b <- optbin(data, 2, metric='se', max.cache=0) if (!verify_binning(2, b, c(9, 13), c(3.0, 8.5), 11.796875000000, c(2.0, 7.4375))) { passed <- FALSE } b <- optbin(data, 3, metric='se', max.cache=0) if (!verify_binning(3, b, c(8, 10, 13), c(2.75, 5.0, 8.50), 4.750000000000, c(1.875, 4.0, 8.25))) { passed <- FALSE } if (passed) { catf(" tests passed") } catf(" running test set 2 without cache and using MSE") b <- optbin(data, 2, metric='mse', max.cache=0) if (!verify_binning(2, b, c(10, 13), c(5.0, 8.5), 1.226666666667, c(2.30, 8.25))) { passed <- FALSE } b <- optbin(data, 3, metric='mse', max.cache=0) if (!verify_binning(3, b, c(3, 10, 13), c(1.5, 5.0, 8.5), 1.08333333333, c(1.25, 2.75, 8.25))) { passed <- FALSE } if (passed) { catf(" tests passed") } return(passed) } # Fill a large array with random data and compare the results with and # without caching. optbin_caching <- function() { passed <- TRUE catf(" running random data with and without caching using SE") # 2520 is a multiple of 5, 7, 8, and 9 so all bins fall on endpoints. set.seed(17) data <- runif(2520) for (b in 2:10) { bcache <- optbin(data, b) bnocache <- optbin(data, b, max.cache=0) if (!compare_binning(bcache, bnocache)) { passed <- FALSE } } if (passed) { catf(" tests passed") } catf(" running random data with and without caching using MSE") for (b in 2:10) { bcache <- optbin(data, b, metric='mse') bnocache <- optbin(data, b, metric='mse', max.cache=0) if (!compare_binning(bcache, bnocache)) { passed <- FALSE } } if (passed) { catf(" tests passed") } return(passed) } ### assign.optbin Functionality Tests # Check that the count of values assigned to each bin match the bin size # in points. Returns TRUE if all tests pass, FALSE if any fail. assign_test1 <- function() { passed <- TRUE catf(" running assign.optbin tests") set.seed(19) data <- rnorm(1000, mean=2, sd=0.5) binned <- optbin(data, 10) cnt <- table(assign.optbin(data, binned)) stpt <- 1 for (b in 1:10) { expcnt <- binned$breaks[b] - stpt + 1 if (cnt[b] != expcnt) { catf(" %d values assigned to bin %d but range has %d", cnt[b], b, expcnt) passed <- FALSE } stpt <- binned$breaks[b] + 1 } if (passed) { catf(" tests passed") } catf(" running assign.optbin tests without extending upper bound") dmax <- max(data) data2 <- c(data, dmax+1:10) cnt <- table(assign.optbin(data2, binned)) stpt <- 1 for (b in 1:10) { expcnt <- binned$breaks[b] - stpt + 1 if (cnt[b] != expcnt) { catf(" %d values assigned to bin %d but range has %d", cnt[b], b, expcnt) passed <- FALSE } stpt <- binned$breaks[b] + 1 } if (passed) { catf(" tests passed") } catf(" running assign.optbin tests while extending upper bound") cnt <- table(assign.optbin(data2, binned, extend.upper=TRUE)) stpt <- 1 for (b in 1:9) { expcnt <- binned$breaks[b] - stpt + 1 if (cnt[b] != expcnt) { catf(" %d values assigned to bin %d but range has %d", cnt[b], b, expcnt) passed <- FALSE } stpt <- binned$breaks[b] + 1 } b <- 10 expcnt <- binned$breaks[b] - stpt + 1 if (cnt[b] != (expcnt + 10)) { catf(" %d values assigned to bin %d but expected %d", cnt[b], b, expcnt+10) passed <- FALSE } if (passed) { catf(" tests passed") } return(passed) } ### Break The Part tests # Various error checks on input data to optbin. Returns TRUE if all tests # pass, FALSE if any fail. BTP_optbin <- function() { passed <- TRUE catf(" running optbin BTP tests") b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), -1), error=function(e) { NULL }) if (!is.null(b)) { catf(" negative number of bins did not raise error") passed <- FALSE } b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), 1), error=function(e) { NULL }) if (!is.null(b)) { catf(" too few bins did not raise error") passed <- FALSE } b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), 2), error=function(e) { NULL }) if (!is.null(b)) { catf(" non-numeric entry in data without na.rm did not raise error") passed <- FALSE } b <- tryCatch(b <- optbin(c(1, 2, 3, 4, 5, "blah", 6), 2, na.rm=TRUE), error=function(e) { NULL }) if (is.null(b)) { catf(" non-numeric entry in data with na.rm did raise error") passed <- FALSE } else if (!verify_binning(2, b, c(3,6), c(3.0, 6.0), 4, c(2.0, 5.0))) { passed <- FALSE } b <- tryCatch(optbin(c(1, 2, 3, 4, 5, 6), 10), error=function(e) { NULL }) if (!is.null(b)) { catf(" too few values for number of bins did not raise error") passed <- FALSE } # This is test1 data. data <- c(1.0, 2.0, 3.0, 4.0, NA, 5.0, 6.0, 7.0, NA, 8.0, 9.0, NA, 10.0) b <- tryCatch(optbin(data, 2), error=function(e) { NULL }) if (!is.null(b)) { catf(" NAs in data without removing did not raise error") passed <- FALSE } b <- tryCatch(optbin(data, 2, na.rm=TRUE), error=function(e) { NULL }) if (is.null(b)) { catf(" removed NAs still caused error") passed <- FALSE } else if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { passed <- FALSE } b <- tryCatch(optbin(reverse(data), 2, is.sorted=TRUE), error=function(e) { NULL }) if (!is.null(b)) { catf(" unsorted data after setting is.sorted TRUE did not raise error") passed <- FALSE } b <- tryCatch(optbin(data, 2, is.sorted=T, na.rm=TRUE), error=function(e) { NULL }) if (is.null(b)) { catf(" pre-sorted data raised error") passed <- FALSE } else if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) { passed <- FALSE } if (passed) { catf(" tests passed") } return(passed) } # Error checks on the arguments to plot.optbin. Returns TRUE if all tests # pass, FALSE if any fail. BTP_plot <- function() { passed <- TRUE catf(" running plot.optbin BTP tests") data <- 1:10 b <- optbin(data, 2) # These generate an error because we use them in the basic plot. # plot doesn't return anything so our error function will generate an NA. res <- tryCatch(plot(b2, ann=TRUE), error=function(e) { NA }) if (!is.na(res)) { catf(" passing ann to plot.optbin did not raise error") passed <- FALSE } res <- tryCatch(plot(b2, xaxt='s'), error=function(e) { NA }) if (!is.na(res)) { catf(" passing xaxt to plot.optbin did not raise error") passed <- FALSE } if (passed) { catf(" tests passed") } return(passed) } ### Top Level allpass <- TRUE cat('\nStarting optbin verification\n') if (!optbin_test1()) { allpass <- FALSE } if (!optbin_test2()) { allpass <- FALSE } if (!optbin_test3()) { allpass <- FALSE } if (!optbin_test4()) { allpass <- FALSE } if (!optbin_caching()) { allpass <- FALSE } if (!suppressWarnings(BTP_optbin())) { allpass <- FALSE } if (!assign_test1()) { allpass <- FALSE } if (!BTP_plot()) { allpass <- FALSE } if (allpass) { cat('\nAll tests PASSED\n\n') } else { cat('\nSome test FAILED\n\n') # This should cause the build check to fail. error("test failure") }