run_all_measures = function(truth, response, prob) { tol = sqrt(.Machine$double.eps) for (m in as.list(measures)) { if (m$type != "classif") { next } f = match.fun(m$id) perf = f(truth = truth, response = response, prob = prob) if (m$aggregated) { expect_number(perf, na.ok = FALSE, lower = m$lower - tol, upper = m$upper + tol, label = m$id) } else { expect_numeric(perf, any.missing = FALSE, lower = m$lower - tol, upper = m$upper + tol, label = m$id) } if ("sample_weights" %in% names(formals(f))) { sample_weights = runif(length(truth)) perf = f(truth = truth, response = response, prob = prob) expect_number(perf, na.ok = FALSE, lower = m$lower - tol, upper = m$upper + tol, label = m$id) } } } test_that("trigger all", { k = 3 n = 10 truth = ssample(letters[1:k], n) response = ssample(letters[1:k], n) prob = matrix(runif(n * k, min = 1e-8, max = 1 - 1e-8), nrow = n) prob = t(apply(prob, 1, function(x) x / sum(x))) colnames(prob) = letters[1:k] run_all_measures(truth, response, prob) }) test_that("integer overflow", { N = 500000 truth = ssample(c("a", "b"), N) response = truth prob = matrix(runif(N * 2), ncol = 2) prob = t(apply(prob, 1, function(x) x / sum(x))) colnames(prob) = levels(truth) run_all_measures(truth, response, prob) response = ssample(c("a", "b"), N) run_all_measures(truth, response, prob) response = factor(ifelse(truth == "a", "b", "a"), levels = levels(truth)) run_all_measures(truth, response, prob) }) test_that("tests from Metrics", { as_fac = function(...) factor(ifelse(c(...) == 0, "b", "a"), levels = c("a", "b")) as_prob = function(...) { p = c(...) p = cbind(p, 1 - p) colnames(p) = c("a", "b") p } expect_equal(ce(as_fac(1, 1, 1, 0, 0, 0), as_fac(1, 1, 1, 0, 0, 0)), 0.0) expect_equal(ce(as_fac(1, 1, 1, 0, 0, 0), as_fac(1, 1, 1, 1, 0, 0)), 1 / 6) expect_equal(ce(factor(c(1, 2, 3, 4), levels = 1:4), factor(c(1, 2, 3, 3), levels = 1:4)), 1 / 4) lvls = c("cat", "dog", "bird", "fish") expect_equal(ce(factor(c("cat", "dog", "bird"), levels = lvls), factor(c("cat", "dog", "fish"), levels = lvls)), 1 / 3) expect_equal(logloss(as_fac(1, 1, 0, 0), as_prob(1, 1, 0, 0)), 0) expect_number(logloss(as_fac(1, 1, 0, 0), as_prob(0, 0, 1, 1)), lower = 10, upper = 50) expect_equal(logloss(as_fac(1, 1, 1, 0, 0, 0), as_prob(.5, .1, .01, .9, .75, .001)), 1.881797068998267) expect_equal(mcc(factor(1:4, levels = 1:4), factor(1:4, levels = 1:4)), 1) expect_equal(mcc(factor(1:4, levels = 1:4), factor(4:1, levels = 1:4)), - 1 /3) expect_equal(mcc(factor(c("cat", "dog", "bird"), levels = lvls), factor(c("cat", "dog", "fish"), levels = lvls)), 2 / 3) # rater.a <- c(1, 2, 1) # rater.b <- c(1, 2, 2) # kappa <- ScoreQuadraticWeightedKappa(rater.a, rater.b) # expect_equal(kappa, 0.4) # rater.a <- c(1, 2, 3, 1, 2, 3) # rater.b <- c(1, 2, 3, 1, 3, 2) # kappa <- ScoreQuadraticWeightedKappa(rater.a, rater.b) # expect_equal(kappa, 0.75) # rater.a <- c(1, 2, 3) # rater.b <- c(1, 2, 3) # kappa <- ScoreQuadraticWeightedKappa(rater.a, rater.b) # expect_equal(kappa, 1.0) # rater.a <- c(1, 3, 5) # rater.b <- c(2, 4, 6) # kappa <- ScoreQuadraticWeightedKappa(rater.a, rater.b) # expect_equal(kappa, 0.8421052631578947) # rater.a <- c(1, 3, 3, 5) # rater.b <- c(2, 4, 5, 6) # kappa <- ScoreQuadraticWeightedKappa(rater.a, rater.b, 1, 6) # expect_equal(kappa, 0.6956521739130435) # # # kappa <- MeanQuadraticWeightedKappa( c(1, 1) ) # expect_equal(kappa, 0.999) # kappa <- MeanQuadraticWeightedKappa( c(1, -1) ) # expect_equal(kappa, 0.0) # kappa <- MeanQuadraticWeightedKappa( c(.5, .8), c(1.0, .5) ) # expect_equal(kappa, 0.624536446425734) }) test_that("bacc", { truth = factor(c("a", "a", "b", "b"), levels = c("a", "b")) response = factor(c("a", "a", "b", "a"), levels = c("a", "b")) expect_equal(bacc(truth, response), 0.75) expect_equal(bacc(truth, response, sample_weights = c(0.25, 0.25, 0.25, 0.25)), 0.75) expect_equal(bacc(truth, response, sample_weights = c(0.25, 0.25, 0.25, 1)), 0.6) truth = factor(c("a", "a", "a", "a", "a", "b"), levels = c("a", "b")) response = factor(c("a", "a", "a", "a", "b", "b"), levels = c("a", "b")) expect_equal(bacc(truth, response), 0.9) expect_equal(bacc(truth, response, sample_weights = c(0, 0, 0, 0, 0, 1)), 1) expect_equal(bacc(truth, response, sample_weights = c(0, 0, 0, 0, 0.5, 0.5)), 0.5) truth = factor(c("c", "a", "a", "a", "a", "b"), levels = c("a", "b", "c")) response = factor(c("c", "a", "a", "a", "b", "b"), levels = c("a", "b", "c")) expect_equal(round(bacc(truth, response), 3), 0.917) }) # test_that("ber", { # truth = factor(c("a", "a", "b", "b", "c", "c"), levels = c("a", "b", "c")) # response = factor(c("a", "a", "b", "b", "c", "c"), levels = c("a", "b", "c")) # expect_equal(ber(truth, response), 0) # response = factor(c("a", "b", "b", "c", "c", "a"), levels = c("a", "b", "c")) # expect_equal(ber(truth, response), 0.5) # response = factor(rep("a", 6), levels = c("a", "b", "c")) # expect_equal(round(ber(truth, response), 2), 0.67) # }) test_that("multiclass auc", { truth = factor(c("a", "b", "c")) prob = diag(3) colnames(prob) = levels(truth) expect_equal(mauc_aunu(truth, prob), 1) expect_equal(mauc_aunp(truth, prob), 1) expect_equal(mauc_au1u(truth, prob), 1) expect_equal(mauc_au1p(truth, prob), 1) expect_equal(mauc_mu(truth, prob), 1) auc(truth = factor(c("a", "nota", "nota")), prob = c(1, 0, 0), positive = "a") truth = ssample(c("a", "b", "c"), 100) prob = matrix(runif(300), ncol = 3) colnames(prob) = levels(truth) # having the same number of 'a', 'b', and 'c' gives the same 'n' and 'u' measures equalizer_prob = matrix(runif(900), ncol = 3) colnames(equalizer_prob) = levels(truth) equalizer_truth = unlist(list( truth, factor(truth, levels = c("b", "c", "a"), labels = c("a", "b", "c")), factor(truth, levels = c("c", "a", "b"), labels = c("a", "b", "c")))) expect_equal(mauc_aunu(equalizer_truth, equalizer_prob), mauc_aunp(equalizer_truth, equalizer_prob)) expect_equal(mauc_au1u(equalizer_truth, equalizer_prob), mauc_au1p(equalizer_truth, equalizer_prob)) # having no information in prob gives measure 0.5 maxent_prob = rbind(prob, prob, prob) expect_equal(mauc_aunu(equalizer_truth, maxent_prob), 0.5) expect_equal(mauc_aunp(equalizer_truth, maxent_prob), 0.5) expect_equal(mauc_aunu(equalizer_truth, maxent_prob), 0.5) expect_equal(mauc_au1u(equalizer_truth, maxent_prob), 0.5) expect_equal(mauc_mu(equalizer_truth, maxent_prob), 0.5) # reversing prob gives 1 - auc expect_equal(mauc_aunu(truth, prob), 1 - mauc_aunu(truth, 1 - prob)) expect_equal(mauc_aunp(truth, prob), 1 - mauc_aunp(truth, 1 - prob)) expect_equal(mauc_au1u(truth, prob), 1 - mauc_au1u(truth, 1 - prob)) expect_equal(mauc_au1p(truth, prob), 1 - mauc_au1p(truth, 1 - prob)) expect_equal(mauc_mu(truth, prob), 1 - mauc_mu(truth, 1 - prob)) # manually calculate au1u, au1p compmat = sapply(levels(truth), function(t1) { sapply(levels(truth), function(t2) { if (t1 == t2) { return(0) } auc(factor(truth == t1)[truth %in% c(t1, t2)], prob[truth %in% c(t1, t2), t1], "TRUE") }) }) expect_equal(mauc_au1u(truth, prob), sum(compmat) / 6) expect_equal(mauc_au1p(truth, prob), sum(c(compmat + t(compmat)) * c(table(truth))) / 4 / length(truth)) # manually calculate aunu, aunp compvec = sapply(levels(truth), function(t1) { auc(factor(truth == t1), prob[, t1], "TRUE") }) expect_equal(mauc_aunu(truth, prob), mean(compvec)) expect_equal(mauc_aunp(truth, prob), sum(compvec * table(truth) / length(truth))) })