test_that("Two class", { # roc_curv <- pROC::roc( # two_class_example$truth, # two_class_example$Class1, # levels = rev(levels(two_class_example$truth)), # direction = "<" # ) # # lvls <- levels(two_class_example$truth) # roc_val <- as.numeric(roc_curv$auc) roc_val <- 0.939313857389967 expect_equal( roc_auc(two_class_example, truth, Class1)[[".estimate"]], roc_val ) expect_equal( roc_auc(two_class_example, truth = "truth", Class1)[[".estimate"]], roc_val ) }) test_that("`event_level = 'second'` works", { df <- two_class_example df_rev <- df df_rev$truth <- stats::relevel(df_rev$truth, "Class2") expect_equal( roc_auc_vec(df$truth, df$Class1), roc_auc_vec(df_rev$truth, df_rev$Class1, event_level = "second") ) }) # ------------------------------------------------------------------------------ # HandTill2001::auc(HandTill2001::multcap(hpc_cv2$obs, as.matrix(select(hpc_cv2, VF:L)))) test_that("Hand Till multiclass", { # HPC_CV takes too long hpc_cv2 <- dplyr::filter(hpc_cv, Resample %in% c("Fold06", "Fold07", "Fold08", "Fold09", "Fold10")) expect_equal( roc_auc(hpc_cv2, obs, VF:L)[[".estimate"]], 0.827387699597311 ) }) test_that("can calculate Hand Till when prob matrix column names are different from level values", { # HPC_CV takes too long hpc_cv2 <- dplyr::filter(hpc_cv, Resample %in% c("Fold06", "Fold07", "Fold08", "Fold09", "Fold10")) # Mimic how parsnip returns names colnames(hpc_cv2) <- c( "obs", "pred", ".pred_VF", ".pred_F", ".pred_M", ".pred_L", "Resample" ) expect_equal( roc_auc(hpc_cv2, obs, .pred_VF:.pred_L)[[".estimate"]], 0.827387699597311 ) }) test_that("`roc_auc()` hand-till method ignores levels with 0 observations with a warning (#123)", { # Generally we return `NA_real_` for macro/macro-weighted/micro multiclass, # but pROC and HandTill2001 both ignore levels with 0 observations, so we do # too for consistency truth <- factor(c("x", "x", "z"), levels = c("x", "y", "z")) estimate <- c( c(.8, .5, .6), c(.1, .1, .1), c(.1, .4, .3) ) estimate <- matrix(estimate, ncol = 3) colnames(estimate) <- c("x", "y", "z") # HandTill2001::auc(HandTill2001::multcap(truth, estimate)) expect_snapshot( expect_identical(roc_auc_vec(truth, estimate), 0.5) ) }) # ------------------------------------------------------------------------------ test_that("binary roc auc uses equivalent of pROC `direction = <`", { # In yardstick we do events (or cases) as the first level truth <- factor(c("control", "case", "case"), levels = c("case", "control")) # Make really bad predictions # This would force `direction = "auto"` to choose `>`, # which would be incorrect. We are required to force `direction = <` for # our purposes of having `estimate` match the event estimate <- c(.8, .2, .1) # # pROC() expects levels to be in the order of control, then event. # auc <- pROC::auc( # truth, # estimate, # levels = c("control", "case"), # direction = "<" # ) # auc <- as.numeric(auc) auc <- 0 expect_identical(roc_auc_vec(truth, estimate), auc) }) test_that("equivalent of `direction = <` is forced when individual binary AUCs are computed (#123)", { truth <- factor( c( "c", "c", "c", "d", "d", "a", "d", "c", "a", "a", "d", "b", "d", "a", "d", "a", "d", "d" ), levels = c("a", "b", "c", "d") ) estimate <- c( c(c(0.5, 0.4, 0.8, 0.5, 0.8, 1, 0.8, 0, 0.5, 2 / 3, 0, 1, 0.4, 1 / 6, 0.4, 0.8, 0.5, 0.6)), c(0, 0.2, 0, 0, 0, 0, 0, 0.6, 0, 1 / 3, 0, 0, 0, 5 / 6, 0.6, 0, 0, 0), c(0, 0.2, 0, 0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), c(0.5, 0.2, 0.2, 0.5, 0.2, 0, 0.2, 0.2, 0.5, 0, 1, 0, 0.6, 0, 0, 0.2, 0.5, 0.4) ) estimate <- matrix(estimate, ncol = 4, dimnames = list(NULL, c("a", "b", "c", "d"))) # HandTill2001::auc(HandTill2001::multcap(truth, estimate)) expect_equal( roc_auc_vec(truth, estimate), 0.5890625 ) }) # ------------------------------------------------------------------------------ test_that("Multiclass ROC AUC", { hpc_f1 <- data_hpc_fold1() expect_equal( roc_auc(hpc_f1, obs, VF:L, estimator = "macro")[[".estimate"]], hpc_fold1_macro_metric(roc_auc_binary) ) expect_equal( roc_auc(hpc_f1, obs, VF:L, estimator = "macro_weighted")[[".estimate"]], hpc_fold1_macro_weighted_metric(roc_auc_binary) ) }) # ------------------------------------------------------------------------------ test_that("warning is thrown when missing events", { no_event <- dplyr::filter(two_class_example, truth == "Class2") expect_snapshot(out <- roc_auc(no_event, truth, Class1)[[".estimate"]]) expect_identical(out, NA_real_) }) test_that("warning is thrown when missing controls", { no_control <- dplyr::filter(two_class_example, truth == "Class1") expect_snapshot(out <- roc_auc(no_control, truth, Class1)[[".estimate"]]) expect_identical(out, NA_real_) }) test_that("multiclass one-vs-all approach results in multiple warnings", { no_event <- dplyr::filter(two_class_example, truth == "Class2") expect_snapshot( out <- roc_auc(no_event, truth, Class1, Class2, estimator = "macro")[[".estimate"]] ) expect_identical(out, NA_real_) expect_snapshot( out <- roc_auc(no_event, truth, Class1, Class2, estimator = "macro")[[".estimate"]] ) expect_identical(out, NA_real_) expect_snapshot( out <- roc_auc(no_event, truth, Class1, Class2, estimator = "macro_weighted")[[".estimate"]] ) expect_identical(out, NA_real_) expect_snapshot( out <- roc_auc(no_event, truth, Class1, Class2, estimator = "macro_weighted")[[".estimate"]] ) expect_identical(out, NA_real_) }) test_that("hand till approach throws warning and returns `NaN` when only 1 level has observations", { x <- factor(c("x", "x", "x"), levels = c("x", "y")) estimate <- c( c(.8, .5, .6), c(.2, .5, .4) ) estimate <- matrix(estimate, ncol = 2) colnames(estimate) <- c("x", "y") # With two levels -> one expect_snapshot( out <- roc_auc_vec(x, estimate, estimator = "hand_till") ) expect_identical(out, NaN) x <- factor(c("x", "x", "x"), levels = c("x", "y", "z")) estimate <- c( c(.8, .5, .6), c(.1, .1, .1), c(.1, .4, .3) ) estimate <- matrix(estimate, ncol = 3) colnames(estimate) <- c("x", "y", "z") # With three levels -> one expect_snapshot( out <- roc_auc_vec(x, estimate, estimator = "hand_till") ) expect_identical(out, NaN) }) # ------------------------------------------------------------------------------ test_that("df method - presense of case weights affects default multiclass estimator", { hpc_cv$weight <- read_weights_hpc_cv() hpc_cv_estimate_matrix <- as.matrix(hpc_cv[c("VF", "F", "M", "L")]) # Generally hand-till out <- roc_auc(hpc_cv, obs, VF:L) expect_identical(out$.estimator, "hand_till") expect_identical( out$.estimate, roc_auc_vec(hpc_cv$obs, hpc_cv_estimate_matrix, estimator = "hand_till") ) # Unless case weights are supplied out <- roc_auc(hpc_cv, obs, VF:L, case_weights = weight) expect_identical(out$.estimator, "macro") expect_identical( out$.estimate, roc_auc_vec(hpc_cv$obs, hpc_cv_estimate_matrix, estimator = "macro", case_weights = hpc_cv$weight) ) }) test_that("vec method - presense of case weights affects default multiclass estimator", { hpc_cv$weight <- read_weights_hpc_cv() hpc_cv_estimate_matrix <- as.matrix(hpc_cv[c("VF", "F", "M", "L")]) # Generally hand-till expect_identical( roc_auc_vec(hpc_cv$obs, hpc_cv_estimate_matrix), roc_auc_vec(hpc_cv$obs, hpc_cv_estimate_matrix, estimator = "hand_till") ) # Unless case weights are supplied expect_identical( roc_auc_vec(hpc_cv$obs, hpc_cv_estimate_matrix, case_weights = hpc_cv$weight), roc_auc_vec(hpc_cv$obs, hpc_cv_estimate_matrix, estimator = "macro", case_weights = hpc_cv$weight) ) }) test_that("can't use case weights and hand-till method", { hpc_cv$weight <- read_weights_hpc_cv() expect_snapshot(error = TRUE, { roc_auc(hpc_cv, obs, VF:L, estimator = "hand_till", case_weights = weight) }) }) # ------------------------------------------------------------------------------ test_that("Two class ROC AUC matches sklearn", { sklearn <- read_pydata("py-roc-auc") expect_equal( roc_auc(two_class_example, truth, Class1)[[".estimate"]], sklearn$binary ) }) test_that("Two class weighted ROC AUC matches sklearn", { sklearn <- read_pydata("py-roc-auc") two_class_example$weight <- read_weights_two_class_example() expect_equal( roc_auc(two_class_example, truth, Class1, case_weights = weight)[[".estimate"]], sklearn$case_weight$binary ) }) test_that("Multiclass ROC AUC matches sklearn", { sklearn <- read_pydata("py-roc-auc") expect_equal( roc_auc(hpc_cv, obs, VF:L, estimator = "macro")[[".estimate"]], sklearn$macro ) expect_equal( roc_auc(hpc_cv, obs, VF:L, estimator = "macro_weighted")[[".estimate"]], sklearn$macro_weighted ) expect_equal( roc_auc(hpc_cv, obs, VF:L, estimator = "hand_till")[[".estimate"]], sklearn$hand_till ) }) test_that("Multiclass weighted ROC AUC matches sklearn", { sklearn <- read_pydata("py-roc-auc") hpc_cv$weight <- read_weights_hpc_cv() expect_equal( roc_auc(hpc_cv, obs, VF:L, estimator = "macro", case_weights = weight)[[".estimate"]], sklearn$case_weight$macro ) expect_equal( roc_auc(hpc_cv, obs, VF:L, estimator = "macro_weighted", case_weights = weight)[[".estimate"]], sklearn$case_weight$macro_weighted ) # No support for hand_till + case weights }) test_that("grouped multiclass (one-vs-all) weighted example matches expanded equivalent", { hpc_cv$weight <- rep(1, times = nrow(hpc_cv)) hpc_cv$weight[c(100, 200, 150, 2)] <- 5 hpc_cv <- dplyr::group_by(hpc_cv, Resample) hpc_cv_expanded <- hpc_cv[vec_rep_each(seq_len(nrow(hpc_cv)), times = hpc_cv$weight), ] expect_identical( roc_auc(hpc_cv, obs, VF:L, case_weights = weight, estimator = "macro"), roc_auc(hpc_cv_expanded, obs, VF:L, estimator = "macro") ) expect_identical( roc_auc(hpc_cv, obs, VF:L, case_weights = weight, estimator = "macro_weighted"), roc_auc(hpc_cv_expanded, obs, VF:L, estimator = "macro_weighted") ) }) # ------------------------------------------------------------------------------ test_that("roc_auc() - `options` is deprecated", { skip_if(getRversion() <= "3.5.3", "Base R used a different deprecated warning class.") rlang::local_options(lifecycle_verbosity = "warning") expect_snapshot({ out <- roc_auc(two_class_example, truth, Class1, options = 1) }) expect_identical( out, roc_auc(two_class_example, truth, Class1) ) expect_snapshot({ out <- roc_auc_vec( truth = two_class_example$truth, estimate = two_class_example$Class1, options = 1 ) }) expect_identical( out, roc_auc_vec( truth = two_class_example$truth, estimate = two_class_example$Class1 ) ) }) test_that("works with hardhat case weights", { df <- two_class_example imp_wgt <- hardhat::importance_weights(seq_len(nrow(df))) freq_wgt <- hardhat::frequency_weights(seq_len(nrow(df))) expect_no_error( roc_auc_vec(df$truth, df$Class1, case_weights = imp_wgt) ) expect_no_error( roc_auc_vec(df$truth, df$Class1, case_weights = freq_wgt) ) }) test_that("errors with class_pred input", { skip_if_not_installed("probably") cp_truth <- probably::as_class_pred(two_class_example$truth, which = 1) fct_truth <- two_class_example$truth fct_truth[1] <- NA estimate <- two_class_example$Class1 expect_snapshot( error = TRUE, roc_auc_vec(cp_truth, estimate) ) })