context("consistency") test_that("consistency:",{ .get.performance.measures <- function(pred, measures) { ans <- list() for (measure in measures) { ## need to enclose y.values into a list to avoid flattening perf <- performance(pred, measure) .check.performance.object( perf ) ans <- c(ans, list(perf@y.values)) } names(ans) <- measures ans } .check.consistency <- function(measures) { ## check entries of contingency table for consistency for (measure in c("acc", "err", "fnr", "tpr", "fpr", "tnr", "pcfall", "prec", "npv", "pcmiss",'rpp','rnp')) { if (!measure %in% names(measures)) { stop(paste("Performance measure", measure, "not in argument list.")) } } for (i in 1:length(measures$acc)) { finite.bool <- is.finite(measures$acc[[i]]) & is.finite(measures$err[[i]]) expect_equal(measures$acc[[i]][finite.bool] + measures$err[[i]][finite.bool], rep(1,length(measures$acc[[i]]))[finite.bool]) finite.bool <- is.finite(measures$fnr[[i]]) & is.finite(measures$tpr[[i]]) expect_equal(measures$fnr[[i]][finite.bool] + measures$tpr[[i]][finite.bool], rep(1,length(measures$fnr[[i]]))[finite.bool]) finite.bool <- is.finite(measures$fpr[[i]]) & is.finite(measures$tnr[[i]]) expect_equal(measures$fpr[[i]][finite.bool] + measures$tnr[[i]][finite.bool], rep(1,length(measures$fpr[[i]]))[finite.bool]) finite.bool <- is.finite(measures$prec[[i]]) & is.finite(measures$pcfall[[i]]) expect_equal(measures$prec[[i]][finite.bool] + measures$pcfall[[i]][finite.bool], rep(1,length(measures$acc[[i]]))[finite.bool]) finite.bool <- is.finite(measures$npv[[i]]) & is.finite(measures$pcmiss[[i]]) expect_equal(measures$npv[[i]][finite.bool] + measures$pcmiss[[i]][finite.bool], rep(1,length(measures$acc[[i]]))[finite.bool]) expect_equal(measures$rpp[[i]] + measures$rnp[[i]], rep(1, length(measures$rpp[[i]]))) } } ############################################################ # test length of performance measures .check.performance.object <- function(perf) { ylen <- length(perf@y.values) xlen <- length(perf@x.values) alphalen <- length(perf@alpha.values) expect_equal( (xlen==0 || xlen==ylen) && (alphalen==0 || (alphalen==xlen && alphalen==ylen)), T ) if (xlen==ylen) { for (i in 1:ylen) expect_equal( length(perf@x.values[[i]]), length(perf@y.values[[i]]) ) } if (alphalen==ylen) { for (i in 1:ylen) expect_equal( length(perf@alpha.values[[i]]), length(perf@y.values[[i]]) ) } } .check.prediction.object <- function( pred) { # 1. all entries in prediction object must have equals number of cross-validation runs lenvec <- c(length(pred@predictions), length(pred@labels), length(pred@cutoffs), length(pred@fp), length(pred@tp), length(pred@fn), length(pred@tn), length(pred@n.pos), length(pred@n.neg), length(pred@n.pos.pred), length(pred@n.neg.pred)) expect_equal( length(unique(lenvec)), 1) # 2. inside: xval runs: for (i in 1:length(pred@predictions)) { expect_equal( length(pred@predictions[[i]]), length(pred@labels[[i]])) lenvec <- c(length(pred@cutoffs[[i]]), length(pred@fp[[i]]), length(pred@tp[[i]]), length(pred@fn[[i]]), length(pred@tn[[i]]), length(pred@n.pos.pred[[i]]), length(pred@n.neg.pred[[i]])) expect_equal( length(unique(lenvec)), 1) expect_equal( unique(lenvec), length(unique(pred@predictions[[i]]))+1 ) } # 3. cutoffs sorted in descending order? for (i in 1:length(pred@predictions)) { expect_equal( sort(pred@cutoffs[[i]], decreasing=TRUE ), pred@cutoffs[[i]] ) } # 4. check 2x2 table for consistency with marginal sums for (i in 1:length(pred@predictions)) { expect_equal( pred@tp[[i]] + pred@fp[[i]], pred@n.pos.pred[[i]] ) expect_equal( pred@fn[[i]] + pred@tn[[i]], pred@n.neg.pred[[i]] ) expect_equal( pred@tp[[i]] + pred@fn[[i]], rep( pred@n.pos[[i]], length(pred@tp[[i]])) ) expect_equal( pred@fp[[i]] + pred@tn[[i]], rep( pred@n.neg[[i]], length(pred@tp[[i]])) ) expect_equal(pred@n.pos.pred[[i]] + pred@n.neg.pred[[i]], rep( pred@n.pos[[i]] + pred@n.neg[[i]], length(pred@n.pos.pred[[i]])) ) expect_equal(pred@n.pos[[i]] + pred@n.neg[[i]], length(pred@labels[[i]])) } } # .mock.prediction <- function( n.predictions, error.rate ) { if ( length(n.predictions) > 1 && length(error.rate)==1) { error.rate <- rep(error.rate, length(n.predictions) ) } if (length(n.predictions)>1) { predictions <- list() labels <- list() } else { predictions <- c() labels <- c() } for (i in 1:length(n.predictions)) { current.predictions <- runif( n.predictions[i] ) current.labels <- as.numeric( current.predictions >= 0.5) flip.indices <- sample( n.predictions[i], round( error.rate[i] * n.predictions[i] )) current.labels[ flip.indices ] <- !current.labels[ flip.indices ] # current.labels[ current.labels=="1" ] <- "+" # current.labels[ current.labels=="0" ] <- "-" if (length(n.predictions)>1) { predictions <- c( predictions, list( current.predictions )) labels <- c( labels, list( current.labels )) } } if (length( n.predictions)==1) { predictions <- list(current.predictions) labels <- list(current.labels) } ans <- list(predictions= predictions, labels= labels) # ensure, that random labels have exactly two levels if (any( sapply(labels, function(run) {length(unique(run))}) != rep(2, length(labels)) )) { # print(paste("XXX", labels, str(n.predictions), str(error.rate))) return(.mock.prediction(n.predictions, error.rate)) } else return( ans ) } ############################################################################## # consistency for (i in 1:100) { n.folds <- sample(1:10,1) fold.sizes <- sample(10:100, n.folds, replace=T) error.rates <- runif( n.folds ) pp <- .mock.prediction( fold.sizes, error.rates ) pred <- prediction( pp$predictions, pp$labels ) .check.prediction.object(pred) a <- .get.performance.measures( pred, c('acc','err','fpr','tpr','fnr','tnr','prec','pcfall','npv','pcmiss','rpp','rnp')) .check.consistency( a) } ############################################################################## # test errors crashCases <- list( ## cases that are ok to crash: list(pred= c(0), lab= c(0)), #-> Number of classes is not equal to 2. list(pred= c(1), lab= c(1)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(1,1,1)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(0,0,0)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c("a", "a", "a")), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(T, T, T)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(F, F, F)) #-> Number of classes is not equal to 2. ) for (case in crashCases) { # cat(case$pred, " ", case$lab, "\n") expect_error(pred <- prediction(case$pred, case$lab)) #checkException(measures <- .get.performance.measures(pred)) } ############################################################################## ## use consistency checks to validate results on pathological input cases performance.measures <- c('tpr','fpr','acc','err','rec','sens','fnr','tnr','spec', 'ppv','prec','npv','fall','miss','pcfall','pcmiss','rpp','rnp', 'auc','prbe','rch','mxe','rmse','phi','mat','mi','chisq', 'odds','lift','f','sar','ecost','cost') # mxe needs 0,1 labels (warning otherwise), # rmse needs numeric labels (warning otherwise), sar as well pred <- prediction( c(0.1, 0.2, 0.5), c("a", "a", "b")) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(0.1, 0.2, 0.5), c(F, F, T)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(0.1, 0.2, 0.5), c("1", "1", "0")) .check.prediction.object(pred) measures.to.evaluate <- performance.measures measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(0.1, 0.2, 0.5), c(T, F, F)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar' ] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) # prbe cannot be computed, because only one prec/rec pair available. pred <- prediction( c(0,0,0), c(0,1,1)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'prbe' ] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(0,0,0), ordered(c(0,0,0), levels=c(0,1))) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'auc' & performance.measures != 'prbe' & performance.measures != 'rch' & performance.measures != 'sar' & performance.measures != 'ecost'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(-1,-0.2,-0.6), ordered(c(1,0,1), levels=c(0,1))) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' ] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(-1,-0.2,-0.6), c(-1,1,-1)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(-1,-0.2,-0.6), c(3,2,3)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction( c(1), ordered(c("a"),levels=c('a','b'))) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'auc' & performance.measures != 'prbe' & performance.measures != 'rch' & performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar' & performance.measures != 'ecost'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) ############################################################################## # test measures for consistency on supplied data sets data(ROCR.simple) pred <- prediction(ROCR.simple$predictions, ROCR.simple$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) data(ROCR.xval) pred <- prediction(ROCR.xval$predictions, ROCR.xval$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) data(ROCR.hiv) pred <- prediction(ROCR.hiv$hiv.nn$predictions, ROCR.hiv$hiv.nn$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[performance.measures != 'mxe' & performance.measures != 'cal'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) pred <- prediction(ROCR.hiv$hiv.svm$predictions, ROCR.hiv$hiv.svm$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[performance.measures != 'mxe' & performance.measures != 'cal'] measures <- expect_warning(.get.performance.measures(pred, measures.to.evaluate), "Chi-squared approximation may be incorrect") .check.consistency( measures) skip_on_cran() skip_on_ci() ############################################################################## # Combining measures <- c('tpr','fpr','acc','err','rec','sens','fnr','tnr','spec', 'ppv','prec','npv','fall','miss','pcfall','pcmiss','rpp','rnp', 'phi','mat','mi','chisq','odds','lift') # 'auc','prbe','rch','mxe','rmse','phi','mat','mi','chisq', # 'odds','lift','f','sar','ecost','cost') for (measure1 in measures) { # print(measure1) for (measure2 in measures) { n.folds <- sample(1:2,1) fold.sizes <- sample(10:20, n.folds, replace=T) error.rates <- runif( n.folds ) pp <- .mock.prediction( fold.sizes, error.rates ) pred <- prediction( pp$predictions, pp$labels ) .check.prediction.object(pred) perf1 <- suppressWarnings(performance( pred, measure1 )) perf2 <- suppressWarnings(performance( pred, measure2 )) perf3 <- suppressWarnings(performance( pred, measure2, measure1 )) .check.performance.object(perf1) .check.performance.object(perf2) .check.performance.object(perf3) for (i in 1:n.folds) { #check elements expect_equal(setequal( c( perf1@x.values[[i]], perf2@x.values[[i]]), perf3@alpha.values[[i]] ),T) expect_equal(setequal( perf1@y.values[[i]], perf3@x.values[[i]] ),T) expect_equal(setequal( perf2@y.values[[i]], perf3@y.values[[i]] ),T) #check order ind <- sapply( perf1@x.values[[i]], function(x) { min(which(x==perf3@alpha.values[[i]]))}) expect_equal( unname(perf1@y.values[[i]]), perf3@x.values[[i]][ind] ) expect_equal( unname(perf2@y.values[[i]]), perf3@y.values[[i]][ind] ) } } } ############################################################################## # test datavase combine measures <- c('tpr','fpr','acc','err','rec','sens','fnr','tnr','spec', 'ppv','prec','npv','fall','miss','pcfall','pcmiss','rpp','rnp', 'phi','mat','mi','chisq','odds','lift') #'auc','prbe','rch','mxe','rmse','phi','mat','mi','chisq', #'odds','lift','f','sar','ecost','cost') # print("Database combine test deactivated.") data(ROCR.simple) data(ROCR.xval) data(ROCR.hiv) all.pred <- list(prediction(ROCR.simple$predictions, ROCR.simple$labels), prediction(ROCR.xval$predictions, ROCR.xval$labels), prediction(ROCR.hiv$hiv.nn$predictions, ROCR.hiv$hiv.nn$labels), prediction(ROCR.hiv$hiv.svm$predictions, ROCR.hiv$hiv.svm$labels)) lapply(all.pred, .check.prediction.object) for (pred in all.pred) { for (measure1 in measures) { # print(measure1) for (measure2 in measures) { perf1 <- suppressWarnings(performance( pred, measure1 )) perf2 <- suppressWarnings(performance( pred, measure2 )) perf3 <- suppressWarnings(performance( pred, measure2, measure1 )) .check.performance.object(perf1) .check.performance.object(perf2) .check.performance.object(perf3) for (i in 1:length(pred@labels)) { #check elements expect_equal(setequal( c( perf1@x.values[[i]], perf2@x.values[[i]]), perf3@alpha.values[[i]] ),T) expect_equal(setequal( perf1@y.values[[i]], perf3@x.values[[i]] ),T) expect_equal(setequal( perf2@y.values[[i]], perf3@y.values[[i]] ),T) # check order ind <- sapply( perf1@x.values[[i]], function(x) { min(which(x==perf3@alpha.values[[i]]))}) expect_equal( unname(perf1@y.values[[i]]), perf3@x.values[[i]][ind] ) expect_equal( unname(perf2@y.values[[i]]), perf3@y.values[[i]][ind] ) } } } } })