library(cvms) context("evaluate()") test_that("multinomial evaluations are correct in evaluate()", { xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 5, num_observations = 20, apply_softmax = FALSE # Test with as well ) expect_equal(sum(random_probabilities), 51.78471, tolerance = 1e-5) data_ <- random_probabilities %>% dplyr::mutate( cl = as.factor(rep(1:5, each = 4)), cl_char = paste0("cl_", cl) ) expect_error( evaluate( data = data_, target_col = "cl", prediction_cols = paste0("class_", 1:5), type = "multinomial", apply_softmax = TRUE ), "Not all levels in 'target_col' was found in 'prediction_cols'.", fixed = T ) data_ <- data_ %>% dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5)) mn_eval_1 <- evaluate( data = data_, target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE, metrics = list("AUC" = TRUE) ) # TODO Add more tests expect_equal(mn_eval_1$`Overall Accuracy`, 0.2, tolerance = 1e-4) expect_equal(mn_eval_1$`Balanced Accuracy`, 0.5, tolerance = 1e-4) expect_equal( mn_eval_1$`Balanced Accuracy`, mean(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`) ) expect_equal(mn_eval_1$F1, NaN) expect_equal(mn_eval_1$Sensitivity, 0.2, tolerance = 1e-4) expect_equal(mn_eval_1$Specificity, 0.8, tolerance = 1e-4) expect_equal(mn_eval_1$`Pos Pred Value`, 0.23, tolerance = 1e-4) expect_equal(mn_eval_1$`Neg Pred Value`, 0.7991667, tolerance = 1e-4) expect_equal(mn_eval_1$AUC, 0.49375) expect_equal(mn_eval_1$Kappa, 0.008653846, tolerance = 1e-4) expect_equal(mn_eval_1$MCC, 0.0, tolerance = 1e-4) expect_equal(mn_eval_1$`Detection Rate`, 0.04, tolerance = 1e-4) expect_equal(mn_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mn_eval_1$Prevalence, 0.2, tolerance = 1e-4) expect_equal(as.numeric(mn_eval_1$ROC[[1]]$auc), 0.49375, tolerance = 1e-4 ) expect_equal( names(mn_eval_1$ROC[[1]]$rocs), c( "cl_1/cl_2", "cl_1/cl_3", "cl_1/cl_4", "cl_1/cl_5", "cl_2/cl_3", "cl_2/cl_4", "cl_2/cl_5", "cl_3/cl_4", "cl_3/cl_5", "cl_4/cl_5" ) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$sensitivities, c(1, 0.75, 0.75, 0.5, 0.25, 0.25, 0.25, 0, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$specificities, c(0, 0, 0.25, 0.25, 0.25, 0.5, 0.75, 0.75, 1) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$sensitivities, c(1, 0.75, 0.5, 0.5, 0.5, 0.5, 0.25, 0, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$specificities, c(0, 0, 0, 0.25, 0.5, 0.75, 0.75, 0.75, 1) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$sensitivities, c(1, 0.75, 0.75, 0.5, 0.25, 0.25, 0, 0, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$specificities, c(0, 0, 0.25, 0.25, 0.25, 0.5, 0.5, 0.75, 1) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$sensitivities, c(1, 0.75, 0.5, 0.25, 0.25, 0, 0, 0, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$specificities, c(0, 0, 0, 0, 0.25, 0.25, 0.5, 0.75, 1) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`, c(0.50000, 0.37500, 0.59375, 0.50000, 0.53125), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$F1, c(0.2222222, NaN, 0.3333333, 0.2222222, 0.2500000), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Sensitivity, c(0.25, 0.0, 0.25, 0.25, 0.25), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Specificity, c(0.7500, 0.7500, 0.9375, 0.7500, 0.8125), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.20, 0.00, 0.50, 0.20, 0.25), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`, c(0.80, 0.750, 0.8333333, 0.80, 0.81250), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Kappa, c(-3.172066e-16, -2.500000e-01, 2.307692e-01, -3.172066e-16, 6.250000e-02), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Rate`, c(0.05, 0.00, 0.05, 0.05, 0.05), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.25, 0.20, 0.10, 0.25, 0.20), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Prevalence, c(0.2, 0.2, 0.2, 0.2, 0.2), tolerance = 1e-4 ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$Support, c(4, 4, 4, 4, 4) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(12, 4, 3, 1) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N, c(12, 4, 4, 0) ) expect_equal( colnames(mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]), c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N") ) expect_equal( colnames(mn_eval_1$`Confusion Matrix`[[1]]), c("Prediction", "Target", "N") ) # Test Weighted metrics, and metrics == "all" xpectr::set_test_seed(1) mn_eval_2 <- evaluate( data = data_ %>% dplyr::sample_n(17), target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE, metrics = "all" ) # Create manual weighted mean function manual_weighted_mean <- function(x, w) { sum(x * w) / sum(w) } # Test manual weighted mean function expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(1, 1)), 0.5, tolerance = 1e-4) expect_equal(manual_weighted_mean(x = c(0.3, 0.7), w = c(1, 1)), 0.5, tolerance = 1e-4) expect_equal(manual_weighted_mean(x = c(0.5, 0.5), w = c(1, 1)), 0.5, tolerance = 1e-4) expect_equal(manual_weighted_mean(x = c(0.2), w = c(4)), 0.2) expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(4, 1)), 0.32, tolerance = 1e-4) expect_equal(mn_eval_2$`Overall Accuracy`, 0.2352941, tolerance = 1e-4) expect_equal(mn_eval_2$`Balanced Accuracy`, 0.5380586, tolerance = 1e-4) expect_equal( mn_eval_2$`Balanced Accuracy`, mean(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`) ) expect_equal(mn_eval_2$`Weighted Balanced Accuracy`, 0.5236264, tolerance = 1e-4) expect_equal( mn_eval_2$`Weighted Balanced Accuracy`, manual_weighted_mean( x = mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`, w = mn_eval_2$`Class Level Results`[[1]]$Support ) ) expect_equal(mn_eval_2$Accuracy, 0.6941176, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Accuracy`, 0.6851211, tolerance = 1e-4) expect_equal(mn_eval_2$F1, NaN) expect_equal(mn_eval_2$`Weighted F1`, NaN) expect_equal(mn_eval_2$Sensitivity, 0.2666667, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Sensitivity`, 0.2352941, tolerance = 1e-4) expect_equal(mn_eval_2$Specificity, 0.8094505, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Specificity`, 0.8119586, tolerance = 1e-4) expect_equal(mn_eval_2$`Pos Pred Value`, 0.2666667, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Pos Pred Value`, 0.2696078, tolerance = 1e-4) expect_equal(mn_eval_2$`Neg Pred Value`, 0.8094505, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Neg Pred Value`, 0.7939237, tolerance = 1e-4) expect_equal(mn_eval_2$AUC, 0.528125, tolerance = 1e-4) expect_equal(mn_eval_2$Kappa, 0.06428773, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Kappa`, 0.04481687, tolerance = 1e-4) expect_equal(mn_eval_2$MCC, 0.0526315789473684, tolerance = 1e-4) expect_equal(mn_eval_2$`Detection Rate`, 0.04705882, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Detection Rate`, 0.0449827, tolerance = 1e-4) expect_equal(mn_eval_2$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Detection Prevalence`, 0.1937716, tolerance = 1e-4) expect_equal(mn_eval_2$Prevalence, 0.2, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Prevalence`, 0.2110727, tolerance = 1e-4) expect_equal(as.numeric(mn_eval_2$ROC[[1]]$auc), 0.528125, tolerance = 1e-4) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$direction, ">", tolerance = 1e-4) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$sensitivities, c(1, 0.75, 0.75, 0.5, 0.25, 0.25, 0.25, 0, 0), tolerance = 1e-4 ) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$specificities, c(0, 0, 0.25, 0.25, 0.25, 0.5, 0.75, 0.75, 1), tolerance = 1e-4 ) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$direction, ">", tolerance = 1e-4) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$sensitivities, c(1, 1, 1, 1, 1, 0.666666666666667, 0.333333333333333, 0), tolerance = 1e-4 ) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$specificities, c(0, 0.25, 0.5, 0.75, 1, 1, 1, 1), tolerance = 1e-4 ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`, c(0.5480769, 0.3461538, 0.5865385, 0.5595238, 0.6500000), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$F1, c(0.2857143, NaN, 0.3333333, 0.2857143, 0.3333333), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Sensitivity, c(0.250, 0.00, 0.250, 0.3333333, 0.500), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Specificity, c(0.8461538, 0.6923077, 0.9230769, 0.7857143, 0.80), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.3333333, 0.0, 0.50, 0.25, 0.25), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Neg Pred Value`, c(0.7857143, 0.6923077, 0.80, 0.8461538, 0.9230769), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Kappa, c(0.1052632, -0.3076923, 0.2093023, 0.1052632, 0.2093023), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Rate`, c(0.05882353, 0.00, 0.05882353, 0.05882353, 0.05882353), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.1764706, 0.2352941, 0.1176471, 0.2352941, 0.2352941), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Prevalence, c(0.2352941, 0.2352941, 0.2352941, 0.1764706, 0.1176471), tolerance = 1e-4 ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$Support, c(4, 4, 4, 3, 2) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(11, 2, 3, 1) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N, c(9, 4, 4, 0) ) expect_equal( colnames(mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]), c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N") ) expect_equal( colnames(mn_eval_2$`Confusion Matrix`[[1]]), c("Prediction", "Target", "N") ) # Enabling and disabling a few metrics xpectr::set_test_seed(1) mn_eval_3 <- evaluate( data = data_ %>% dplyr::sample_n(17), target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE, metrics = list("Accuracy" = TRUE, "Overall Accuracy" = FALSE, "F1" = FALSE) ) expect_true("Accuracy" %in% colnames(mn_eval_3)) expect_equal(mn_eval_3$Accuracy, 0.6941176, tolerance = 1e-4) expect_true("Accuracy" %in% colnames(mn_eval_3$`Class Level Results`[[1]])) expect_equal(mn_eval_3$`Class Level Results`[[1]]$Accuracy, c(0.7058824, 0.5294118, 0.7647059, 0.7058824, 0.7647059), tolerance = 1e-4 ) expect_true("Overall Accuracy" %ni% colnames(mn_eval_3)) expect_true("F1" %ni% colnames(mn_eval_3)) expect_true("F1" %ni% colnames(mn_eval_3$`Class Level Results`[[1]])) # TODO # ID level data_ <- data_ %>% dplyr::mutate(id = factor(rep(1:10, each = 2))) xpectr::set_test_seed(9) suppressWarnings( mn_id_eval_1 <- evaluate( data = data_ %>% dplyr::sample_n(13), target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), id_col = "id", id_method = "mean", type = "multinomial", apply_softmax = TRUE, metrics = "all" ) ) expect_equal(mn_id_eval_1$`Overall Accuracy`, 0.222222, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Balanced Accuracy`, 0.5535714, tolerance = 1e-4) expect_equal( mn_id_eval_1$`Balanced Accuracy`, mean(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`) ) expect_equal(mn_id_eval_1$`Weighted Balanced Accuracy`, 0.5178571, tolerance = 1e-4) expect_equal( mn_id_eval_1$`Weighted Balanced Accuracy`, manual_weighted_mean( x = mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`, w = mn_id_eval_1$`Class Level Results`[[1]]$Support ) ) expect_equal(mn_id_eval_1$Accuracy, 0.688888889, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Accuracy`, 0.6790123, tolerance = 1e-4) expect_equal(mn_id_eval_1$F1, NaN) expect_equal(mn_id_eval_1$`Weighted F1`, NaN) expect_equal(mn_id_eval_1$Sensitivity, 0.3, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Sensitivity`, 0.222222, tolerance = 1e-4) expect_equal(mn_id_eval_1$Specificity, 0.8071429, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Specificity`, 0.8134921, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Pos Pred Value`, NaN) expect_equal(mn_id_eval_1$`Weighted Pos Pred Value`, NaN) expect_equal(mn_id_eval_1$`Neg Pred Value`, 0.8126984, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Neg Pred Value`, 0.7918871, tolerance = 1e-4) expect_equal(mn_id_eval_1$AUC, 0.6, tolerance = 1e-4) expect_equal(mn_id_eval_1$Kappa, 0.03714286, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Kappa`, -0.003174603, tolerance = 1e-4) expect_equal(mn_id_eval_1$MCC, 0.0484122918275927, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Detection Rate`, 0.04444444, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Detection Rate`, 0.03703704, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Detection Prevalence`, 0.1851852, tolerance = 1e-4) expect_equal(mn_id_eval_1$Prevalence, 0.2, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Prevalence`, 0.2098765, tolerance = 1e-4) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`, c(0.6785714, 0.3571429, 0.3571429, 0.8750000, 0.5000000), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$F1, c(0.5, NaN, NaN, 0.5, NA), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Sensitivity, c(0.5, 0.0, 0.0, 1.0, 0.0), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Specificity, c(0.8571429, 0.7142857, 0.7142857, 0.7500000, 1.0000000), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.5000000, 0.0000000, 0.0000000, 0.3333333, NaN), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`, c(0.8571429, 0.7142857, 0.7142857, 1.0000000, 0.7777778), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Kappa, c(0.3571429, -0.2857143, -0.2857143, 0.4000000, 0.0000000), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Rate`, c(0.1111111, 0.0000000, 0.0000000, 0.1111111, 0.0000000), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.2222222, 0.2222222, 0.2222222, 0.3333333, 0.0000000), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Prevalence, c(0.2222222, 0.2222222, 0.2222222, 0.1111111, 0.2222222), tolerance = 1e-4 ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$Support, c(2, 2, 2, 1, 2) ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction, c("0", "1", "0", "1") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target, c("0", "0", "1", "1") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(6, 1, 1, 1) ) expect_equal( colnames(mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]), c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N") ) expect_equal( colnames(mn_id_eval_1$`Confusion Matrix`[[1]]), c("Prediction", "Target", "N") ) expect_equal( colnames(mn_id_eval_1$Predictions[[1]]), c("Target", "Prediction", "SD", "Predicted Class", "id", "id_method") ) preds <- tidyr::unnest( mn_id_eval_1$Predictions[[1]], cols = c("Prediction", "SD"), names_sep = c("_") ) ## Testing 'preds' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(preds), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( preds[["Target"]], c("cl_1", "cl_1", "cl_2", "cl_2", "cl_3", "cl_3", "cl_4", "cl_5", "cl_5"), fixed = TRUE) expect_equal( preds[["Prediction_cl_1"]], c(0.15567, 0.24832, 0.21023, 0.23344, 0.21542, 0.17221, 0.17573, 0.31819, 0.1845), tolerance = 1e-4) expect_equal( preds[["Prediction_cl_2"]], c(0.20081, 0.17465, 0.16815, 0.17671, 0.27402, 0.22698, 0.20845, 0.13146, 0.18244), tolerance = 1e-4) expect_equal( preds[["Prediction_cl_3"]], c(0.23579, 0.23094, 0.23456, 0.19429, 0.23884, 0.22597, 0.118, 0.19822, 0.1765), tolerance = 1e-4) expect_equal( preds[["Prediction_cl_4"]], c(0.20689, 0.1759, 0.19107, 0.25941, 0.12493, 0.19673, 0.26071, 0.17429, 0.24663), tolerance = 1e-4) expect_equal( preds[["Prediction_cl_5"]], c(0.20084, 0.17018, 0.19599, 0.13616, 0.14679, 0.17811, 0.23711, 0.17783, 0.20994), tolerance = 1e-4) expect_equal( preds[["SD_cl_1"]], c(0.07539, 0.23713, 0.49265, NA, NA, NA, NA, NA, 0.28101), tolerance = 1e-4) expect_equal( preds[["SD_cl_2"]], c(0.51093, 0.37202, 0.08407, NA, NA, NA, NA, NA, 0.22093), tolerance = 1e-4) expect_equal( preds[["SD_cl_3"]], c(0.12296, 0.16256, 0.18359, NA, NA, NA, NA, NA, 0.18044), tolerance = 1e-4) expect_equal( preds[["SD_cl_4"]], c(0.43789, 0.08957, 0.27779, NA, NA, NA, NA, NA, 0.12961), tolerance = 1e-4) expect_equal( preds[["SD_cl_5"]], c(0.19647, 0.05278, 0.39202, NA, NA, NA, NA, NA, 0.14562), tolerance = 1e-4) expect_equal( preds[["Predicted Class"]], c("cl_3", "cl_1", "cl_3", "cl_4", "cl_2", "cl_2", "cl_4", "cl_1", "cl_4"), fixed = TRUE) expect_equal( preds[["id"]], structure(c(1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "factor")) expect_equal( preds[["id_method"]], c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean"), fixed = TRUE) # Testing column names expect_equal( names(preds), c("Target", "Prediction_cl_1", "Prediction_cl_2", "Prediction_cl_3", "Prediction_cl_4", "Prediction_cl_5", "SD_cl_1", "SD_cl_2", "SD_cl_3", "SD_cl_4", "SD_cl_5", "Predicted Class", "id", "id_method"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(preds), c("character", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "character", "factor", "character"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(preds), c("character", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "character", "integer", "character"), fixed = TRUE) # Testing dimensions expect_equal( dim(preds), c(9L, 14L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(preds)), character(0), fixed = TRUE) ## Finished testing 'preds' #### # Test grouping vars data_2 <- data_ %>% dplyr::mutate(fold_ = 1) %>% dplyr::bind_rows(data_ %>% dplyr::mutate(fold_ = 2)) mn_id_eval_2 <- evaluate( data = data_2 %>% dplyr::group_by(fold_), target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), id_col = "id", id_method = "majority", type = "multinomial", apply_softmax = TRUE ) expect_equal(mn_id_eval_2$fold_, c(1, 2)) expect_equal(dplyr::bind_rows(mn_id_eval_2$`Class Level Results`)$fold_, rep(1:2, each = 5)) expect_equal( colnames(mn_id_eval_2), c( "fold_", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "Predictions", "Confusion Matrix", "Class Level Results", "Process" ) ) expect_equal( colnames(mn_id_eval_2$`Class Level Results`[[1]]), c( "fold_", "Class", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "Detection Rate", "Detection Prevalence", "Prevalence", "Support", "Confusion Matrix" ) ) expect_equal( mn_id_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]][, -1], # remove fold_ mn_id_eval_2$`Class Level Results`[[2]]$`Confusion Matrix`[[1]][, -1] ) expect_equal( colnames(mn_id_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]), c("fold_", "Class", "Prediction", "Target", "Pos_0", "Pos_1", "N") ) # What happens when a class is not in the targets but has a probability column? data_3 <- random_probabilities %>% dplyr::mutate( cl = as.factor(rep(1:4, each = 5)), cl_char = paste0("cl_", cl) ) %>% dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5)) # Testing multinomial expect_warning(mb_eval <- evaluate( data = data_3, target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), apply_softmax = TRUE, type = "multinomial", metrics = list("AUC" = TRUE) ), "The following classes were not found in 'response': cl_5.", fixed = TRUE ) expect_equal(mb_eval$`Overall Accuracy`, 0.3) expect_equal(mb_eval$`Balanced Accuracy`, NaN) expect_equal(mb_eval$F1, NaN) expect_equal(mb_eval$Sensitivity, NaN) expect_equal(mb_eval$Specificity, 0.8266667, tolerance = 1e-4) expect_equal(mb_eval$`Pos Pred Value`, 0.31, tolerance = 1e-4) expect_equal(mb_eval$`Neg Pred Value`, 0.825555555555556, tolerance = 1e-4) expect_equal(mb_eval$AUC, 0.49, tolerance = 1e-4) expect_equal(mb_eval$Kappa, 0.1133333, tolerance = 1e-4) expect_equal(mb_eval$MCC, 0.130327042490215, tolerance = 1e-4) expect_equal(mb_eval$`Detection Rate`, 0.06, tolerance = 1e-4) expect_equal(mb_eval$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mb_eval$Prevalence, 0.2, tolerance = 1e-4) expect_true("cl_5" %ni% mb_eval$Predictions[[1]]$Target) expect_equal( mb_eval$`Confusion Matrix`[[1]]$Target, rep(paste0("cl_", 1:5), each = 5) ) expect_equal( mb_eval$`Confusion Matrix`[[1]]$Prediction, rep(paste0("cl_", 1:5), 5) ) expect_equal( mb_eval$`Confusion Matrix`[[1]]$N, c( 1L, 1L, 1L, 0L, 2L, 2L, 1L, 0L, 2L, 0L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L ) ) expect_equal( mb_eval$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal( mb_eval$`Class Level Results`[[1]]$`Balanced Accuracy`, c( 0.466666666666667, 0.5, 0.566666666666667, 0.733333333333333, NaN ) ) expect_equal( mb_eval$`Class Level Results`[[1]]$F1, c(0.2, 0.222222222222222, 0.285714285714286, 0.6, NaN) ) expect_equal( mb_eval$`Class Level Results`[[1]]$Sensitivity, c(0.2, 0.2, 0.2, 0.6, NA) ) expect_equal(mb_eval$`Class Level Results`[[1]]$Specificity, c( 0.733333333333333, 0.8, 0.933333333333333, 0.866666666666667, 0.8 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.2, 0.25, 0.5, 0.6, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Neg Pred Value`, c( 0.733333333333333, 0.75, 0.777777777777778, 0.866666666666667, 1 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Kappa, c( -0.0666666666666667, -3.17206578464331e-16, 0.166666666666666, 0.466666666666667, 0 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Rate`, c(0.05, 0.05, 0.05, 0.15, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.25, 0.2, 0.1, 0.25, 0.2), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Prevalence, c(0.25, 0.25, 0.25, 0.25, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Support, c(5L, 5L, 5L, 5L, NaN), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(11, 4, 4, 1), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[5]]$N, c(16, 4, 0, 0), tolerance = 1e-4 ) # TODO test that group_by and evaluate work correctly together }) test_that("multinomial evaluations with one predicted class column is correctly unpacked in evaluate()", { xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 5, num_observations = 20, apply_softmax = FALSE # Test with as well ) expect_equal(sum(random_probabilities), 51.78471, tolerance = 1e-5) random_classes <- argmax(random_probabilities) data_ <- random_probabilities %>% dplyr::mutate( cl = as.factor(rep(1:5, each = 4)), cl_char = paste0("cl_", cl), pred_cl = random_classes ) data_ <- data_ %>% dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5)) %>% dplyr::mutate(pred_cl_char = paste0("cl_", pred_cl)) data_classes <- data_ %>% base_select(c("cl", "cl_char", "pred_cl", "pred_cl_char")) mn_eval_1 <- evaluate( data = data_classes, target_col = "cl_char", prediction_cols = "pred_cl_char", type = "multinomial", apply_softmax = FALSE, metrics = list("AUC" = TRUE) ) # TODO Add more tests expect_equal(mn_eval_1$`Overall Accuracy`, 0.2, tolerance = 1e-4) expect_equal(mn_eval_1$`Balanced Accuracy`, 0.5, tolerance = 1e-4) expect_equal( mn_eval_1$`Balanced Accuracy`, mean(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`) ) expect_equal(mn_eval_1$F1, NaN) expect_equal(mn_eval_1$Sensitivity, 0.2, tolerance = 1e-4) expect_equal(mn_eval_1$Specificity, 0.8, tolerance = 1e-4) expect_equal(mn_eval_1$`Pos Pred Value`, 0.23, tolerance = 1e-4) expect_equal(mn_eval_1$`Neg Pred Value`, 0.7991667, tolerance = 1e-4) expect_equal(mn_eval_1$AUC, 0.5) expect_equal(mn_eval_1$Kappa, 0.008653846, tolerance = 1e-4) expect_equal(mn_eval_1$MCC, 0.0, tolerance = 1e-4) expect_equal(mn_eval_1$`Detection Rate`, 0.04, tolerance = 1e-4) expect_equal(mn_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mn_eval_1$Prevalence, 0.2, tolerance = 1e-4) expect_equal(as.numeric(mn_eval_1$ROC[[1]]$auc), 0.5, tolerance = 1e-4 ) expect_equal( names(mn_eval_1$ROC[[1]]$rocs), c( "cl_1/cl_2", "cl_1/cl_3", "cl_1/cl_4", "cl_1/cl_5", "cl_2/cl_3", "cl_2/cl_4", "cl_2/cl_5", "cl_3/cl_4", "cl_3/cl_5", "cl_4/cl_5" ) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$sensitivities, c(1, 0.5, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$specificities, c(0, 0.25, 1) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$sensitivities, c(1, 0.75, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$specificities, c(0, 0, 1) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$sensitivities, c(1, 0.5, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$specificities, c(0, 0.25, 1) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$sensitivities, c(1, 0.75, 0) ) expect_equal( mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$specificities, c(0, 0.25, 1) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`, c(0.50000, 0.37500, 0.59375, 0.50000, 0.53125), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$F1, c(0.2222222, NaN, 0.3333333, 0.2222222, 0.2500000), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Sensitivity, c(0.25, 0.0, 0.25, 0.25, 0.25), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Specificity, c(0.7500, 0.7500, 0.9375, 0.7500, 0.8125), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.20, 0.00, 0.50, 0.20, 0.25), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`, c(0.80, 0.750, 0.8333333, 0.80, 0.81250), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Kappa, c(-3.172066e-16, -2.500000e-01, 2.307692e-01, -3.172066e-16, 6.250000e-02), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Rate`, c(0.05, 0.00, 0.05, 0.05, 0.05), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.25, 0.20, 0.10, 0.25, 0.20), tolerance = 1e-4 ) expect_equal(mn_eval_1$`Class Level Results`[[1]]$Prevalence, c(0.2, 0.2, 0.2, 0.2, 0.2), tolerance = 1e-4 ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$Support, c(4, 4, 4, 4, 4) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(12, 4, 3, 1) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N, c(12, 4, 4, 0) ) expect_equal( colnames(mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]), c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N") ) expect_equal( colnames(mn_eval_1$`Confusion Matrix`[[1]]), c("Prediction", "Target", "N") ) # Test Weighted metrics, and metrics == "all" # Sampled data frame if (TRUE) { xpectr::set_test_seed(1) mn_eval_2 <- evaluate( data = data_classes %>% dplyr::sample_n(17), target_col = "cl_char", prediction_cols = "pred_cl_char", type = "multinomial", apply_softmax = FALSE, metrics = "all" ) # Create manual weighted mean function manual_weighted_mean <- function(x, w) { sum(x * w) / sum(w) } # Test manual weighted mean function expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(1, 1)), 0.5, tolerance = 1e-4) expect_equal(manual_weighted_mean(x = c(0.3, 0.7), w = c(1, 1)), 0.5, tolerance = 1e-4) expect_equal(manual_weighted_mean(x = c(0.5, 0.5), w = c(1, 1)), 0.5, tolerance = 1e-4) expect_equal(manual_weighted_mean(x = c(0.2), w = c(4)), 0.2) expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(4, 1)), 0.32, tolerance = 1e-4) expect_equal(mn_eval_2$`Overall Accuracy`, 0.2352941, tolerance = 1e-4) expect_equal(mn_eval_2$`Balanced Accuracy`, 0.5380586, tolerance = 1e-4) expect_equal( mn_eval_2$`Balanced Accuracy`, mean(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`) ) expect_equal(mn_eval_2$`Weighted Balanced Accuracy`, 0.5236264, tolerance = 1e-4) expect_equal( mn_eval_2$`Weighted Balanced Accuracy`, manual_weighted_mean( x = mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`, w = mn_eval_2$`Class Level Results`[[1]]$Support ) ) expect_equal(mn_eval_2$Accuracy, 0.6941176, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Accuracy`, 0.6851211, tolerance = 1e-4) expect_equal(mn_eval_2$F1, NaN) expect_equal(mn_eval_2$`Weighted F1`, NaN) expect_equal(mn_eval_2$Sensitivity, 0.2666667, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Sensitivity`, 0.2352941, tolerance = 1e-4) expect_equal(mn_eval_2$Specificity, 0.8094505, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Specificity`, 0.8119586, tolerance = 1e-4) expect_equal(mn_eval_2$`Pos Pred Value`, 0.2666667, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Pos Pred Value`, 0.2696078, tolerance = 1e-4) expect_equal(mn_eval_2$`Neg Pred Value`, 0.8094505, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Neg Pred Value`, 0.7939237, tolerance = 1e-4) expect_equal(mn_eval_2$AUC, 0.5416667, tolerance = 1e-4) expect_equal(mn_eval_2$Kappa, 0.06428773, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Kappa`, 0.04481687, tolerance = 1e-4) expect_equal(mn_eval_2$MCC, 0.0526315789473684, tolerance = 1e-4) expect_equal(mn_eval_2$`Detection Rate`, 0.04705882, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Detection Rate`, 0.0449827, tolerance = 1e-4) expect_equal(mn_eval_2$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Detection Prevalence`, 0.1937716, tolerance = 1e-4) expect_equal(mn_eval_2$Prevalence, 0.2, tolerance = 1e-4) expect_equal(mn_eval_2$`Weighted Prevalence`, 0.2110727, tolerance = 1e-4) expect_equal(as.numeric(mn_eval_2$ROC[[1]]$auc), 0.5416667, tolerance = 1e-4) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$direction, ">", tolerance = 1e-4) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$sensitivities, c(1, 1, 0), tolerance = 1e-4 ) expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$specificities, c(0.00, 0.25, 1.00), tolerance = 1e-4 ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`, c(0.5480769, 0.3461538, 0.5865385, 0.5595238, 0.6500000), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$F1, c(0.2857143, NaN, 0.3333333, 0.2857143, 0.3333333), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Sensitivity, c(0.250, 0.00, 0.250, 0.3333333, 0.500), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Specificity, c(0.8461538, 0.6923077, 0.9230769, 0.7857143, 0.80), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.3333333, 0.0, 0.50, 0.25, 0.25), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Neg Pred Value`, c(0.7857143, 0.6923077, 0.80, 0.8461538, 0.9230769), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Kappa, c(0.1052632, -0.3076923, 0.2093023, 0.1052632, 0.2093023), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Rate`, c(0.05882353, 0.00, 0.05882353, 0.05882353, 0.05882353), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.1764706, 0.2352941, 0.1176471, 0.2352941, 0.2352941), tolerance = 1e-4 ) expect_equal(mn_eval_2$`Class Level Results`[[1]]$Prevalence, c(0.2352941, 0.2352941, 0.2352941, 0.1764706, 0.1176471), tolerance = 1e-4 ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$Support, c(4, 4, 4, 3, 2) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(11, 2, 3, 1) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction, as.character(c(0, 1, 0, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target, as.character(c(0, 0, 1, 1)) ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N, c(9, 4, 4, 0) ) expect_equal( colnames(mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]), c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N") ) expect_equal( colnames(mn_eval_2$`Confusion Matrix`[[1]]), c("Prediction", "Target", "N") ) } # Enabling and disabling a few metrics # TODO # ID level if (TRUE) { # ID level data_classes <- data_classes %>% dplyr::mutate(id = factor(rep(1:10, each = 2))) xpectr::set_test_seed(9) suppressWarnings( mn_id_eval_1 <- evaluate( data = data_classes %>% dplyr::sample_n(13), target_col = "cl_char", prediction_cols = "pred_cl_char", id_col = "id", id_method = "mean", type = "multinomial", apply_softmax = FALSE, metrics = "all" ) ) expect_equal(mn_id_eval_1$`Overall Accuracy`, 0.222222, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Balanced Accuracy`, 0.5535714, tolerance = 1e-4) expect_equal( mn_id_eval_1$`Balanced Accuracy`, mean(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`) ) expect_equal(mn_id_eval_1$`Weighted Balanced Accuracy`, 0.5178571, tolerance = 1e-4) expect_equal( mn_id_eval_1$`Weighted Balanced Accuracy`, manual_weighted_mean( x = mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`, w = mn_id_eval_1$`Class Level Results`[[1]]$Support ) ) expect_equal(mn_id_eval_1$Accuracy, 0.688888889, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Accuracy`, 0.6790123, tolerance = 1e-4) expect_equal(mn_id_eval_1$F1, NaN) expect_equal(mn_id_eval_1$`Weighted F1`, NaN) expect_equal(mn_id_eval_1$Sensitivity, 0.3, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Sensitivity`, 0.222222, tolerance = 1e-4) expect_equal(mn_id_eval_1$Specificity, 0.8071429, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Specificity`, 0.8134921, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Pos Pred Value`, NaN) expect_equal(mn_id_eval_1$`Weighted Pos Pred Value`, NaN) expect_equal(mn_id_eval_1$`Neg Pred Value`, 0.81111, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Neg Pred Value`, 0.7901235, tolerance = 1e-4) expect_equal(mn_id_eval_1$AUC, 0.575, tolerance = 1e-4) expect_equal(mn_id_eval_1$Kappa, 0.043636, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Kappa`, 0.00404, tolerance = 1e-4) expect_equal(mn_id_eval_1$MCC, 0.0510310363079829, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Detection Rate`, 0.04444444, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Detection Rate`, 0.03703704, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Detection Prevalence`, 0.1851852, tolerance = 1e-4) expect_equal(mn_id_eval_1$Prevalence, 0.2, tolerance = 1e-4) expect_equal(mn_id_eval_1$`Weighted Prevalence`, 0.2098765, tolerance = 1e-4) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`, c(0.607142857142857, 0.285714285714286, 0.5, 0.875, 0.5), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$F1, c(0.4, NaN, NaN, 0.5, NA), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Sensitivity, c(0.5, 0.0, 0.0, 1.0, 0.0), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Specificity, c(0.714285714285714, 0.571428571428571, 1, 0.75, 1), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.333333333333333, 0, NaN, 0.333333333333333, NaN), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`, c( 0.833333333333333, 0.666666666666667, 0.777777777777778, 1, 0.777777777777778 ), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Kappa, c(0.181818181818182, -0.363636363636363, 0, 0.4, 0), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Rate`, c(0.1111111, 0.0000000, 0.0000000, 0.1111111, 0.0000000), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.3333333, 0.3333333, 0, 0.3333333, 0), tolerance = 1e-4 ) expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Prevalence, c(0.2222222, 0.2222222, 0.2222222, 0.1111111, 0.2222222), tolerance = 1e-4 ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$Support, c(2, 2, 2, 1, 2) ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction, c("0", "1", "0", "1") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target, c("0", "0", "1", "1") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0, c("TP", "FN", "FP", "TN") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1, c("TN", "FP", "FN", "TP") ) expect_equal( mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(5, 2, 1, 1) ) expect_equal( colnames(mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]), c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N") ) expect_equal( colnames(mn_id_eval_1$`Confusion Matrix`[[1]]), c("Prediction", "Target", "N") ) preds <- tidyr::unnest(mn_id_eval_1$Predictions[[1]], cols = c(Prediction)) expect_equal( preds$Target, c( "cl_1", "cl_1", "cl_2", "cl_2", "cl_3", "cl_3", "cl_4", "cl_5", "cl_5" ) ) expect_equal( preds$`Predicted Class`, c( "cl_2", "cl_1", "cl_1", "cl_4", "cl_2", "cl_2", "cl_4", "cl_1", "cl_4" ) ) expect_equal( preds$id, structure( c(1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "factor" ) ) expect_equal(preds$id_method, rep("mean", 9)) expect_equal( preds$cl_1, c( 0.158794620741017, 0.261808068888474, 0.261808068888474, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.40460967519169, 0.158794620741017 ) ) expect_equal( preds$cl_2, c( 0.261808068888474, 0.158794620741017, 0.158794620741017, 0.148847581202078, 0.40460967519169, 0.40460967519169, 0.148847581202078, 0.148847581202078, 0.158794620741017 ) ) expect_equal( preds$cl_3, c( 0.158794620741017, 0.261808068888474, 0.158794620741017, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.158794620741017 ) ) expect_equal( preds$cl_4, c( 0.158794620741017, 0.158794620741017, 0.158794620741017, 0.40460967519169, 0.148847581202078, 0.148847581202078, 0.40460967519169, 0.148847581202078, 0.261808068888474 ) ) expect_equal( preds$cl_5, c( 0.261808068888474, 0.158794620741017, 0.261808068888474, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.148847581202078, 0.261808068888474 ) ) } # Test grouping vars data_2 <- data_classes %>% dplyr::mutate(fold_ = 1) %>% dplyr::bind_rows(data_classes %>% dplyr::mutate(fold_ = 2)) mn_id_eval_2 <- evaluate( data = data_2 %>% dplyr::group_by(fold_), target_col = "cl_char", prediction_cols = "pred_cl_char", id_col = "id", id_method = "majority", type = "multinomial", apply_softmax = FALSE ) expect_equal(mn_id_eval_2$fold_, c(1, 2)) expect_equal(dplyr::bind_rows(mn_id_eval_2$`Class Level Results`)$fold_, rep(1:2, each = 5)) expect_equal( colnames(mn_id_eval_2), c( "fold_", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "Predictions", "Confusion Matrix", "Class Level Results", "Process" ) ) expect_equal( colnames(mn_id_eval_2$`Class Level Results`[[1]]), c( "fold_", "Class", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "Detection Rate", "Detection Prevalence", "Prevalence", "Support", "Confusion Matrix" ) ) expect_equal( mn_id_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]][, -1], mn_id_eval_2$`Class Level Results`[[2]]$`Confusion Matrix`[[1]][, -1] ) # What happens when a class is not in the targets but has a probability column? data_3 <- data_classes %>% dplyr::mutate( cl = as.factor(rep(1:4, each = 5)), cl_char = paste0("cl_", cl) ) # Testing multinomial expect_warning(mb_eval <- evaluate( data = data_3, target_col = "cl_char", prediction_cols = "pred_cl_char", apply_softmax = FALSE, type = "multinomial", metrics = list("AUC" = TRUE) ), "The following classes were not found in 'response': cl_5.", fixed = TRUE ) expect_equal(mb_eval$`Overall Accuracy`, 0.3) expect_equal(mb_eval$`Balanced Accuracy`, NaN) expect_equal(mb_eval$F1, NaN) expect_equal(mb_eval$Sensitivity, NaN) expect_equal(mb_eval$Specificity, 0.8266667, tolerance = 1e-4) expect_equal(mb_eval$`Pos Pred Value`, 0.31, tolerance = 1e-4) expect_equal(mb_eval$`Neg Pred Value`, 0.825555555555556, tolerance = 1e-4) expect_equal(mb_eval$AUC, 0.5666667, tolerance = 1e-4) expect_equal(mb_eval$Kappa, 0.1133333, tolerance = 1e-4) expect_equal(mb_eval$MCC, 0.1303270, tolerance = 1e-4) expect_equal(mb_eval$`Detection Rate`, 0.06, tolerance = 1e-4) expect_equal(mb_eval$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mb_eval$Prevalence, 0.2, tolerance = 1e-4) expect_true("cl_5" %ni% mb_eval$Predictions[[1]]$Target) expect_equal( mb_eval$`Confusion Matrix`[[1]]$Target, rep(paste0("cl_", 1:5), each = 5) ) expect_equal( mb_eval$`Confusion Matrix`[[1]]$Prediction, rep(paste0("cl_", 1:5), 5) ) expect_equal( mb_eval$`Confusion Matrix`[[1]]$N, c( 1L, 1L, 1L, 0L, 2L, 2L, 1L, 0L, 2L, 0L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L ) ) expect_equal( mb_eval$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal( mb_eval$`Class Level Results`[[1]]$`Balanced Accuracy`, c( 0.466666666666667, 0.5, 0.566666666666667, 0.733333333333333, NaN ) ) expect_equal( mb_eval$`Class Level Results`[[1]]$F1, c(0.2, 0.222222222222222, 0.285714285714286, 0.6, NaN) ) expect_equal( mb_eval$`Class Level Results`[[1]]$Sensitivity, c(0.2, 0.2, 0.2, 0.6, NA) ) expect_equal(mb_eval$`Class Level Results`[[1]]$Specificity, c( 0.733333333333333, 0.8, 0.933333333333333, 0.866666666666667, 0.8 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.2, 0.25, 0.5, 0.6, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Neg Pred Value`, c( 0.733333333333333, 0.75, 0.777777777777778, 0.866666666666667, 1 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Kappa, c( -0.0666666666666667, -3.17206578464331e-16, 0.166666666666666, 0.466666666666667, 0 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Rate`, c(0.05, 0.05, 0.05, 0.15, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.25, 0.2, 0.1, 0.25, 0.2), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Prevalence, c(0.25, 0.25, 0.25, 0.25, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Support, c(5L, 5L, 5L, 5L, NaN), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(11, 4, 4, 1), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[5]]$N, c(16, 4, 0, 0), tolerance = 1e-4 ) # What if a target class is never predicted? mb_eval <- evaluate( data = data_3, target_col = "pred_cl_char", prediction_cols = "cl_char", apply_softmax = FALSE, type = "multinomial", metrics = list("AUC" = TRUE) ) expect_equal(mb_eval$`Overall Accuracy`, 0.3, tolerance = 1e-4) expect_equal(mb_eval$`Balanced Accuracy`, 0.5677778, tolerance = 1e-4) expect_equal(mb_eval$F1, NaN) expect_equal(mb_eval$Sensitivity, 0.31) expect_equal(mb_eval$Specificity, 0.8255556, tolerance = 1e-4) expect_equal(mb_eval$`Pos Pred Value`, NaN, tolerance = 1e-4) expect_equal(mb_eval$`Neg Pred Value`, 0.8266667, tolerance = 1e-4) expect_equal(mb_eval$AUC, 0.56875, tolerance = 1e-4) expect_equal(mb_eval$Kappa, 0.1133333, tolerance = 1e-4) expect_equal(mb_eval$MCC, 0.130327042490215, tolerance = 1e-4) expect_equal(mb_eval$`Detection Rate`, 0.06, tolerance = 1e-4) expect_equal(mb_eval$`Detection Prevalence`, 0.2, tolerance = 1e-4) expect_equal(mb_eval$Prevalence, 0.2, tolerance = 1e-4) expect_true("cl_5" %in% mb_eval$Predictions[[1]]$Target) expect_equal( mb_eval$`Confusion Matrix`[[1]]$Target, rep(paste0("cl_", 1:5), each = 5) ) expect_equal( mb_eval$`Confusion Matrix`[[1]]$Prediction, rep(paste0("cl_", 1:5), 5) ) expect_equal( mb_eval$`Confusion Matrix`[[1]]$N, c( 1L, 2L, 1L, 1L, 0L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 3L, 0L, 2L, 0L, 1L, 1L, 0L ) ) expect_equal( mb_eval$`Class Level Results`[[1]]$Class, c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5") ) expect_equal( mb_eval$`Class Level Results`[[1]]$`Balanced Accuracy`, c( 0.466666666666667, 0.5, 0.638888888888889, 0.733333333333333, 0.5 ) ) expect_equal( mb_eval$`Class Level Results`[[1]]$F1, c(0.2, 0.222222222222222, 0.285714285714286, 0.6, NaN) ) expect_equal( mb_eval$`Class Level Results`[[1]]$Sensitivity, c(0.2, 0.25, 0.5, 0.6, 0) ) expect_equal(mb_eval$`Class Level Results`[[1]]$Specificity, c( 0.733333333333333, 0.75, 0.777777777777778, 0.866666666666667, 1 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Pos Pred Value`, c(0.2, 0.2, 0.2, 0.6, NaN), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Neg Pred Value`, c( 0.733333333333333, 0.8, 0.933333333333333, 0.866666666666667, 0.8 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Kappa, c( -0.0666666666666667, -3.17206578464331e-16, 0.166666666666666, 0.466666666666667, 0 ), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Rate`, c(0.05, 0.05, 0.05, 0.15, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Prevalence`, c(0.25, 0.25, 0.25, 0.25, 0), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Prevalence, c(0.25, 0.2, 0.1, 0.25, 0.2), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$Support, c(5L, 4L, 2L, 5L, 4), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N, c(11, 4, 4, 1), tolerance = 1e-4 ) expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[5]]$N, c(16, 0, 4, 0), tolerance = 1e-4 ) # # In grouped multinomial, when there are not the same classes in # the subsets, test that only the available classes are used # xpectr::set_test_seed(4) # group a: a,b,c,d # group b: a,b,c dd <- dplyr::bind_rows( data.frame("target" = sample(c("a", "b", "c", "d"), 20, replace=T), "prediction" = sample(c("a", "b", "c", "d"), 20, replace=T), "group" = "a", stringsAsFactors = FALSE), data.frame("target" = sample(c("a", "b", "c"), 20, replace=T), "prediction" = sample(c("a", "b", "c"), 20, replace=T), "group" = "b", stringsAsFactors = FALSE) ) res <- dd %>% dplyr::group_by(.data$group) %>% evaluate(target_col = "target", prediction_cols = "prediction", type="multinomial") expect_equal( res$Process[[1]]$Classes, c("a", "b", "c", "d"), fixed = TRUE) expect_equal( res$Process[[2]]$Classes, c("a", "b", "c"), fixed = TRUE) expect_equal( res$`Class Level Results`[[1]]$Class, c("a", "b", "c", "d"), fixed = TRUE) expect_equal( res$`Class Level Results`[[2]]$Class, c("a", "b", "c"), fixed = TRUE) expect_equal( unique(res$`Confusion Matrix`[[1]]$Target), c("a", "b", "c", "d"), fixed = TRUE) expect_equal( unique(res$`Confusion Matrix`[[2]]$Target), c("a", "b", "c"), fixed = TRUE) ## Testing 'res' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(res), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( res[["group"]], c("a", "b"), fixed = TRUE) expect_equal( res[["Overall Accuracy"]], c(0.25, 0.25), tolerance = 1e-4) expect_equal( res[["Balanced Accuracy"]], c(0.51335, 0.4472), tolerance = 1e-4) expect_equal( res[["F1"]], c(NaN, 0.2552), tolerance = 1e-4) expect_equal( res[["Sensitivity"]], c(0.28125, 0.27727), tolerance = 1e-4) expect_equal( res[["Specificity"]], c(0.74545, 0.61713), tolerance = 1e-4) expect_equal( res[["Pos Pred Value"]], c(0.2125, 0.26389), tolerance = 1e-4) expect_equal( res[["Neg Pred Value"]], c(0.75417, 0.63095), tolerance = 1e-4) expect_equal( res[["Kappa"]], c(-0.01188, -0.08712), tolerance = 1e-4) expect_equal( res[["MCC"]], c(0, -0.1117), tolerance = 1e-4) expect_equal( res[["Detection Rate"]], c(0.0625, 0.08333), tolerance = 1e-4) expect_equal( res[["Detection Prevalence"]], c(0.25, 0.33333), tolerance = 1e-4) expect_equal( res[["Prevalence"]], c(0.25, 0.33333), tolerance = 1e-4) # Testing column names expect_equal( names(res), c("group", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(res), c("character", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(res), c("character", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(res), c(2L, 17L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(res)), character(0), fixed = TRUE) ## Finished testing 'res' #### # TODO test that group_by and evaluate work correctly together }) test_that("nested tibbles are correctly added to grouped multinomial results in evaluate()", { xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 3, num_observations = 60, apply_softmax = FALSE # Test with as well ) expect_equal(sum(random_probabilities), 92.56257, tolerance = 1e-5) random_classes <- argmax(random_probabilities) data_ <- random_probabilities %>% dplyr::mutate( cl = as.factor(rep(1:3, each = 20)), cl_char = paste0("cl_", cl), pred_cl = random_classes ) data_ <- data_ %>% dplyr::rename_at(dplyr::vars(paste0("class_", 1:3)), .funs = ~ paste0("cl_", 1:3)) %>% dplyr::mutate(pred_cl_char = paste0("cl_", pred_cl)) # add grouping vars data_[["gk1_char"]] <- as.character(rep(1:3, each = 20)) data_[["gk2_int"]] <- rep(rep(1:5, each = 4), 3) data_ <- data_ %>% dplyr::bind_rows(data_ %>% dplyr::mutate( gk1_char = "10", gk2_int = 10 )) data_ <- data_ %>% dplyr::sample_frac() mn_eval_1 <- data_ %>% dplyr::group_by(.data$gk1_char, .data$gk2_int) %>% evaluate( target_col = "cl_char", prediction_cols = paste0("cl_", 1:3), type = "multinomial", apply_softmax = TRUE ) %>% .[, c("gk1_char", "gk2_int", "Predictions", "Confusion Matrix", "Class Level Results")] clr_grkeys <- plyr::ldply(mn_eval_1$`Class Level Results`, function(clr) { tibble::tibble( "gk1_char" = unique(clr[["gk1_char"]]), "gk2_int" = unique(clr[["gk2_int"]]) ) }) expect_equal(mn_eval_1$gk1_char, clr_grkeys$gk1_char) expect_equal(mn_eval_1$gk2_int, clr_grkeys$gk2_int) clr_cnfm_grkeys <- plyr::ldply(mn_eval_1$`Class Level Results`, function(clr) { bn_confmat <- dplyr::bind_rows(clr[["Confusion Matrix"]]) tibble::tibble( "gk1_char" = unique(bn_confmat[["gk1_char"]]), "gk2_int" = unique(bn_confmat[["gk2_int"]]) ) }) expect_equal(mn_eval_1$gk1_char, clr_cnfm_grkeys$gk1_char) expect_equal(mn_eval_1$gk2_int, clr_cnfm_grkeys$gk2_int) pred_grkeys <- plyr::ldply(mn_eval_1$Predictions, function(clr) { tibble::tibble( "gk1_char" = unique(clr[["gk1_char"]]), "gk2_int" = unique(clr[["gk2_int"]]) ) }) expect_equal(mn_eval_1$gk1_char, pred_grkeys$gk1_char) expect_equal(mn_eval_1$gk2_int, pred_grkeys$gk2_int) cnfm_grkeys <- plyr::ldply(mn_eval_1$`Confusion Matrix`, function(clr) { tibble::tibble( "gk1_char" = unique(clr[["gk1_char"]]), "gk2_int" = unique(clr[["gk2_int"]]) ) }) expect_equal(mn_eval_1$gk1_char, cnfm_grkeys$gk1_char) expect_equal(mn_eval_1$gk2_int, cnfm_grkeys$gk2_int) }) test_that("nested tibbles are correctly added to grouped binomial results in evaluate()", { xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 1, num_observations = 60, apply_softmax = FALSE # Test with as well ) expect_equal(sum(random_probabilities), 30.72525, tolerance = 1e-5) random_classes <- ifelse(random_probabilities$class_1 > 0.5, 1, 0) data_ <- random_probabilities %>% dplyr::mutate( pred_cl = random_classes, cl = as.factor(rep(c(0, 1), each = 30)) ) data_ <- data_ %>% dplyr::sample_frac() # add grouping vars data_[["gk1_char"]] <- as.character(rep(1:3, each = 20)) data_[["gk2_int"]] <- rep(rep(1:2, each = 10), 3) data_ <- data_ %>% dplyr::bind_rows(data_ %>% dplyr::mutate( gk1_char = "10", gk2_int = 10 )) data_ <- data_ %>% dplyr::sample_frac() bn_eval_1 <- data_ %>% dplyr::group_by(.data$gk1_char, .data$gk2_int) %>% evaluate( target_col = "cl", prediction_cols = "class_1", type = "binomial", ) %>% .[, c("gk1_char", "gk2_int", "Predictions", "Confusion Matrix")] pred_grkeys <- plyr::ldply(bn_eval_1$Predictions, function(clr) { tibble::tibble( "gk1_char" = unique(clr[["gk1_char"]]), "gk2_int" = unique(clr[["gk2_int"]]) ) }) expect_equal(bn_eval_1$gk1_char, pred_grkeys$gk1_char) expect_equal(bn_eval_1$gk2_int, pred_grkeys$gk2_int) cnfm_grkeys <- plyr::ldply(bn_eval_1$`Confusion Matrix`, function(clr) { tibble::tibble( "gk1_char" = unique(clr[["gk1_char"]]), "gk2_int" = unique(clr[["gk2_int"]]) ) }) expect_equal(bn_eval_1$gk1_char, cnfm_grkeys$gk1_char) expect_equal(bn_eval_1$gk2_int, cnfm_grkeys$gk2_int) }) test_that("nested tibbles are correctly added to grouped gaussian results in evaluate()", { #### #### xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 2, num_observations = 60, apply_softmax = FALSE # Test with as well ) expect_equal(sum(random_probabilities), 61.59519, tolerance = 1e-5) data_ <- random_probabilities # add grouping vars data_[["gk1_char"]] <- as.character(rep(1:3, each = 20)) data_[["gk2_int"]] <- rep(rep(1:2, each = 10), 3) data_ <- data_ %>% dplyr::bind_rows(data_ %>% dplyr::mutate( gk1_char = "10", gk2_int = 10 )) data_ <- data_ %>% dplyr::sample_frac() gs_eval_1 <- data_ %>% dplyr::group_by(.data$gk1_char, .data$gk2_int) %>% evaluate( target_col = "class_1", prediction_cols = "class_2", type = "gaussian", ) %>% .[, c("gk1_char", "gk2_int", "Predictions")] pred_grkeys <- plyr::ldply(gs_eval_1$Predictions, function(clr) { tibble::tibble( "gk1_char" = unique(clr[["gk1_char"]]), "gk2_int" = unique(clr[["gk2_int"]]) ) }) expect_equal(gs_eval_1$gk1_char, pred_grkeys$gk1_char) expect_equal(gs_eval_1$gk2_int, pred_grkeys$gk2_int) }) test_that("specific multinomial predictions yield correct results in evaluate()", { #### #### data <- tibble::tibble( "target" = c( "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3" ), "1" = c( 0.362824302965311, 0.360903717826462, 0.285822146515266, 0.37477371815703, 0.381700937144022, 0.273724077239358, 0.341941761553692, 0.304958692149421, 0.296272111439159, 0.42404336282095, 0.293116571086624, 0.435758286831625, 0.28272698563188, 0.224804797891925, 0.198163481343272, 0.222945305755905, 0.300594211265473, 0.3278586388128, 0.414855759724269, 0.33840681107911, 0.407904832070239, 0.329062099428566, 0.347623656119297, 0.436145034077774, 0.335017655729982, 0.331364482540279, 0.283536882101457, 0.281143446843595, 0.425995399790343, 0.261537001684637, 0.237313955938272, 0.23261151589263, 0.385748271928449, 0.334411930525475, 0.401249217427923, 0.420235849517611, 0.425289710559998, 0.262465444718002, 0.336697104880243, 0.231858151646129, 0.210092588546093, 0.47741912516683, 0.244565586519055, 0.286947978160956, 0.417209987822813, 0.409276670773936, 0.354307298336307, 0.276564090289706, 0.330862775865476, 0.44695225847507, 0.317576338796576, 0.382847300413345, 0.224081474342672, 0.308365835600141, 0.302015399930661, 0.313332459936172, 0.251934064646274, 0.23915184388103, 0.277538091890923, 0.471891254622384, 0.40754972988395, 0.345372198209239, 0.33466402327811, 0.462461145964111, 0.266579885476856, 0.352899631920573, 0.421312945065338, 0.200478726356666, 0.309794998292503, 0.380501567619241, 0.370761060364264, 0.286594888362904, 0.361067815387182, 0.245947918320429, 0.389260130880553, 0.321714471097802, 0.331089372813525, 0.256210536793289, 0.285625126260833, 0.27568485403864, 0.274822151212381, 0.389484563107126, 0.212979233666992, 0.296561920614094, 0.200225006332582, 0.242554965054235, 0.401234678799669, 0.330009524801572, 0.348663184860405, 0.411214822720279, 0.307151655834041, 0.204995757155556, 0.351402327921561, 0.374094844465352, 0.248711786514304, 0.336708510840726, 0.408292124109811, 0.357562258085327, 0.34181138284562, 0.323252856286123, 0.447544862966328, 0.429915527750429, 0.363601070683015, 0.333552425710284, 0.398225671426714, 0.221696833964976, 0.271265251552504, 0.481294916218702, 0.328149707350169, 0.450855293083027, 0.237104558776676, 0.374383211394644, 0.443948913175648, 0.494051191743219, 0.256273986260961, 0.489484795353784, 0.329464982964633, 0.220032486448317, 0.408557119092641, 0.319230850919545, 0.355237403525459, 0.23455484947749, 0.382020061725786, 0.267110221515588, 0.449602783872994, 0.202164855975727, 0.236445026776993, 0.319748604954957, 0.293320401651886, 0.287711207487059, 0.358419477510523, 0.346556723204159, 0.206074092549024, 0.22717751457188, 0.397646700390622, 0.446688182141175, 0.25620775941098, 0.501804532497241, 0.408063047760274, 0.443251289188255, 0.249444482951462, 0.383829461406706, 0.526313287884649, 0.452849242524878, 0.253013631607335, 0.334191437619488, 0.210413611589952, 0.23421023559276, 0.321507174883409, 0.509517153778936, 0.315671789411075, 0.255620649439559, 0.239331881508991, 0.322754669547894, 0.435042212296461, 0.251412865189562, 0.282387104005381, 0.245973141871465, 0.341257410449456, 0.277527088361948, 0.399220690874693, 0.439926753580167, 0.345819178673885, 0.305382501740128, 0.418275547531307, 0.357628745014896, 0.294197703135495, 0.438696916274104, 0.316517159370203, 0.357792733303972, 0.28831445694109, 0.327860691995961, 0.22281288859293, 0.314065020723387, 0.459321941470143, 0.387693577991577, 0.363268586267414, 0.303950826699894, 0.372655080723904, 0.339529039157154, 0.224621024989129, 0.296482850978123, 0.314332611351849, 0.234267899937577, 0.437859055024521, 0.269731707168111, 0.331690324355338, 0.280473038011755, 0.399824753263675, 0.383719422449624, 0.330850303521819, 0.225277208971775, 0.359553396121816, 0.328615974470287, 0.231945476475709, 0.214223736176299, 0.20582242433687, 0.375727645283227, 0.235524783117192, 0.286268463291008, 0.229673192858615, 0.283772599798294, 0.219611172339202, 0.295826542146669, 0.285141592656138, 0.192558055771125, 0.355721326100929, 0.409545410065343, 0.331835694337497, 0.230954644322117, 0.340582165139984, 0.294251623668547, 0.298768624646445, 0.291985965689547, 0.300172335564108, 0.318523203157749, 0.237566842343213, 0.37261470439223, 0.331390765035546, 0.364342409833174, 0.397916841690445, 0.217399738068907, 0.264193767365295, 0.208427577921687, 0.420006306243977, 0.292308232373888, 0.281443833338, 0.30947189334135, 0.257203806304276, 0.295150877610198, 0.439630183471502, 0.321730635029117, 0.383315062595601, 0.225583239939362, 0.309872283286329, 0.255887220229597, 0.316939046835727, 0.449868348167623, 0.304614105760487, 0.248742717566446, 0.305293159179038, 0.222558427746317, 0.244342581208601, 0.503534783305527, 0.286383868562386, 0.428804177911175, 0.295404759872436, 0.30687255200497, 0.445596166797302, 0.334706962656109 ), "2" = c( 0.385780580440307, 0.342604539124775, 0.423080439134247, 0.225175445397002, 0.312989908016585, 0.475217543730522, 0.224916968457944, 0.30476258690942, 0.482054751251862, 0.214841434476723, 0.344321198446437, 0.333138708845406, 0.292804142075078, 0.31480368331023, 0.453026287138725, 0.4719140606185, 0.338078757628025, 0.296611945042962, 0.25365258932653, 0.305362051955673, 0.240571861737913, 0.333693522274202, 0.27392009695986, 0.28042156343828, 0.377909676645575, 0.279959262495121, 0.2122887561223, 0.470196696415608, 0.239862567097843, 0.370221172919109, 0.25443866213747, 0.477173210501813, 0.259642635838842, 0.322066433258374, 0.353453695410528, 0.1765751153703, 0.285504011914052, 0.424456315084771, 0.288460995364288, 0.35958455461334, 0.478761722406255, 0.250631249168976, 0.445446891169545, 0.369385615626211, 0.349831286201674, 0.234976115878086, 0.378701320138235, 0.431201840261207, 0.350278234492975, 0.262288382369503, 0.307058606700166, 0.320533642024935, 0.43236586844692, 0.292897655407014, 0.272621596850078, 0.270996516799443, 0.410085989322878, 0.444842557489891, 0.388276130257391, 0.235166674305724, 0.285112014266039, 0.437273128065374, 0.454306056457757, 0.193441477963145, 0.420457813840421, 0.20658635492773, 0.266523286854865, 0.398245797218399, 0.397335280211415, 0.214226033921459, 0.334049449261309, 0.465337959942035, 0.233723758733514, 0.31472699114585, 0.190099685465158, 0.336264164127875, 0.228217420461845, 0.478951976081042, 0.445589110407979, 0.429453925922651, 0.472930553187392, 0.332138049773757, 0.431357175765288, 0.227158863320801, 0.414490874557254, 0.492841238109576, 0.234359938505408, 0.446090237254051, 0.35319859961771, 0.345280017404155, 0.327026905651897, 0.315397998189469, 0.341892635859671, 0.363505947910766, 0.404023366558927, 0.276862535515932, 0.294742179813213, 0.294768056642531, 0.266684517736902, 0.311820705852887, 0.274265040914617, 0.245816171241433, 0.440341510685916, 0.421104352741149, 0.388918784994157, 0.421278084276545, 0.240879703622285, 0.289683431392311, 0.339677067961078, 0.208333263474704, 0.437464387830416, 0.367535015967519, 0.226639265821188, 0.311401754535334, 0.372083534671264, 0.250419612561257, 0.299632515128755, 0.32824246642957, 0.297400161614639, 0.313227906367457, 0.300459136297168, 0.303048382688999, 0.423041978939619, 0.21995462356869, 0.220422428036373, 0.405812939731575, 0.295769536850216, 0.276168343895395, 0.265750665287833, 0.253402549071951, 0.411972235353439, 0.351927605924981, 0.485986422454677, 0.321551112338582, 0.206970975070247, 0.200850312668311, 0.372875934652805, 0.234374628487236, 0.245897425556106, 0.253770902739585, 0.468416469360073, 0.410120970528605, 0.238800068178443, 0.280904468157008, 0.359143089366176, 0.407538360613741, 0.463482198596469, 0.346052750154407, 0.287160871624961, 0.227514884062728, 0.240774210253124, 0.468578579358505, 0.353317373812137, 0.349640083059674, 0.369950508544139, 0.380002971753307, 0.289234489357681, 0.395516529035609, 0.467797098876064, 0.338410210733615, 0.331006277488809, 0.266926876246406, 0.198909961692134, 0.477956034101944, 0.304153252664601, 0.269291217911081, 0.42201242766336, 0.236603843824688, 0.323467921547914, 0.352132946072107, 0.369703528563403, 0.329596532065509, 0.327226373739197, 0.462085224932779, 0.189039991684081, 0.314951443633383, 0.284435887439821, 0.366590341963145, 0.395601714171088, 0.335225687954128, 0.5311988845767, 0.346007208388757, 0.446522267443117, 0.343863071903928, 0.292892871402239, 0.382830131827548, 0.385357786983514, 0.214142145338179, 0.376626067248013, 0.389178684389126, 0.338344548396213, 0.403001958525644, 0.290654812925342, 0.247101836432825, 0.370492901531161, 0.461281265139314, 0.375012631382266, 0.363470643270294, 0.226067381809261, 0.313047833074042, 0.479443190691509, 0.490484639496364, 0.391240933517263, 0.478047191943444, 0.356438854507902, 0.343453444225452, 0.216202187035667, 0.237739153185802, 0.301640124052444, 0.247512173039711, 0.309589249125308, 0.308972701838745, 0.358423819908425, 0.304335319964878, 0.448013834885817, 0.378099789829676, 0.541697788505534, 0.268360413749574, 0.331964563898707, 0.26728121083406, 0.225911392328278, 0.533006780442077, 0.342769074491857, 0.347630476342447, 0.345065351570056, 0.35598982747141, 0.453613405495365, 0.191197557332096, 0.401269074847496, 0.438308839883431, 0.239209391932476, 0.29631107556132, 0.178586958336824, 0.478858385133731, 0.475817691964852, 0.427658658905656, 0.298338105256703, 0.272983994354953, 0.308124320381545, 0.471773233423636, 0.373331904052012, 0.260774223314073, 0.326618966011962, 0.252950597054979, 0.399516649309411, 0.258469593402028, 0.328387994212459, 0.263157498352965, 0.180860493953652, 0.325672027376522 ), "3" = c( 0.251395116594382, 0.296491743048763, 0.291097414350488, 0.400050836445967, 0.305309154839393, 0.25105837903012, 0.433141269988364, 0.390278720941159, 0.221673137308979, 0.361115202702327, 0.362562230466938, 0.231103004322969, 0.424468872293042, 0.460391518797845, 0.348810231518003, 0.305140633625595, 0.361327031106502, 0.375529416144238, 0.331491650949201, 0.356231136965217, 0.351523306191848, 0.337244378297231, 0.378456246920842, 0.283433402483946, 0.287072667624444, 0.3886762549646, 0.504174361776243, 0.248659856740797, 0.334142033111814, 0.368241825396254, 0.508247381924258, 0.290215273605557, 0.354609092232709, 0.343521636216152, 0.245297087161549, 0.403189035112089, 0.28920627752595, 0.313078240197227, 0.374841899755469, 0.408557293740532, 0.311145689047652, 0.271949625664194, 0.3099875223114, 0.343666406212833, 0.232958725975514, 0.355747213347978, 0.266991381525458, 0.292234069449087, 0.318858989641549, 0.290759359155428, 0.375365054503257, 0.29661905756172, 0.343552657210408, 0.398736508992845, 0.425363003219261, 0.415671023264385, 0.337979946030848, 0.316005598629079, 0.334185777851686, 0.292942071071892, 0.30733825585001, 0.217354673725387, 0.211029920264134, 0.344097376072744, 0.312962300682723, 0.440514013151697, 0.312163768079797, 0.401275476424934, 0.292869721496081, 0.4052723984593, 0.295189490374428, 0.24806715169506, 0.405208425879304, 0.439325090533721, 0.420640183654289, 0.342021364774323, 0.440693206724629, 0.264837487125669, 0.268785763331188, 0.294861220038709, 0.252247295600227, 0.278377387119117, 0.35566359056772, 0.476279216065105, 0.385284119110164, 0.264603796836189, 0.364405382694923, 0.223900237944377, 0.298138215521885, 0.243505159875566, 0.365821438514061, 0.479606244654974, 0.306705036218769, 0.262399207623883, 0.347264846926769, 0.386428953643342, 0.296965696076976, 0.347669685272142, 0.391504099417478, 0.364926437860989, 0.278190096119055, 0.324268301008138, 0.196057418631069, 0.245343221548567, 0.212855543579129, 0.357025081758479, 0.487855044825211, 0.229021652388988, 0.332173224688753, 0.340811443442268, 0.325431053392908, 0.258081772637838, 0.329411821003164, 0.194547053721447, 0.371642479067775, 0.260095592084959, 0.370902501906612, 0.451725047122112, 0.29404271929272, 0.367541242712998, 0.344303460177374, 0.462396767833511, 0.194937959334594, 0.512935154915721, 0.329974788090632, 0.392022204292698, 0.467785436372791, 0.404083051149648, 0.440928933060281, 0.45888624344099, 0.229608287136038, 0.30151567087086, 0.307939484996299, 0.451271373089538, 0.395382324539131, 0.352461505190514, 0.370916305936214, 0.263820839015524, 0.34603952668362, 0.30297780807216, 0.282139047688465, 0.206049568064689, 0.234886643936908, 0.266246289318114, 0.387843279026488, 0.258270201766771, 0.326104189813578, 0.419737014252832, 0.39133195349163, 0.262967962158336, 0.443554000335801, 0.275800771201936, 0.407350744678872, 0.327605247392432, 0.1950072791594, 0.36858416305713, 0.428378406636938, 0.358510329092926, 0.190945490674481, 0.384062700904437, 0.269773031636498, 0.293146370173427, 0.455270859633981, 0.216661464157928, 0.277571199804092, 0.373080037074023, 0.283789869201145, 0.324699239901208, 0.360014919081883, 0.290074320623921, 0.341982014495507, 0.34254277593853, 0.449960737667873, 0.223849754343834, 0.351638066845776, 0.29735497837504, 0.352295526292765, 0.329458831336961, 0.231743205105008, 0.325245272888717, 0.24418009043417, 0.35750994063312, 0.239145121205034, 0.421869028158495, 0.269248073573241, 0.347438161004341, 0.282951888661148, 0.505384816650066, 0.223549179488312, 0.22710189316125, 0.330805148081968, 0.371720832502581, 0.349791790952842, 0.424282189096888, 0.39756162199313, 0.324494998684386, 0.419164944280864, 0.260801711446479, 0.538407835073546, 0.400683703634949, 0.290883616449876, 0.225742760705341, 0.389147894143535, 0.226126265909887, 0.35841955283596, 0.463988500003423, 0.428076486863404, 0.352715436748856, 0.366524181610059, 0.521533182638172, 0.349828585734708, 0.396775674492708, 0.34280755544513, 0.403678714345574, 0.251813829550075, 0.303377007012575, 0.220735369151253, 0.359024881858196, 0.336644671065747, 0.368376379332766, 0.376171765981278, 0.249593481489016, 0.393037158142848, 0.443941945735865, 0.234928342185967, 0.351701940154702, 0.264942761166635, 0.499330549326554, 0.341527118848228, 0.266540282506371, 0.321160424596023, 0.381958289409563, 0.438097979067575, 0.295558374926908, 0.214310024748819, 0.316454120864747, 0.38472284790757, 0.277147657477424, 0.387261573857968, 0.279484049009918, 0.32137493676895, 0.51666734893961, 0.429038452779437, 0.243514619639494, 0.314099482128203, 0.312726228686797, 0.376207245915106, 0.429969949642065, 0.373543339249046, 0.339621009967369 ), ".groups" = rep(1:10, each = 25) ) evals <- evaluate( data = data %>% dplyr::group_by(.data$.groups), target_col = "target", prediction_cols = c("1", "2", "3"), metrics = list("AUC" = TRUE), type = "multinomial" ) expect_equal( colnames(evals), c( ".groups", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "AUC", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "Predictions", "ROC", "Confusion Matrix", "Class Level Results", "Process" ) ) expect_equal( evals$`Overall Accuracy`, c(0.56, 0.36, 0.64, 0.28, 0.52, 0.24, 0.44, 0.4, 0.32, 0.44) ) expect_equal(evals$`Balanced Accuracy`, c( 0.665178571428571, 0.523974867724868, 0.73776455026455, 0.466104497354497, 0.636243386243386, 0.427744708994709, 0.580687830687831, 0.552910052910053, 0.498015873015873, 0.581845238095238 ), tolerance = 1e-5 ) expect_equal(evals$F1, c( 0.551190476190476, 0.347276688453159, 0.638304093567251, 0.276960784313726, 0.511111111111111, 0.237745098039216, 0.439814814814815, 0.401960784313726, 0.313186813186813, 0.423411469851098 ), tolerance = 1e-5 ) expect_equal(evals$Sensitivity, c( 0.55026455026455, 0.365079365079365, 0.656084656084656, 0.291005291005291, 0.513227513227513, 0.232804232804233, 0.439153439153439, 0.402116402116402, 0.338624338624339, 0.439153439153439 ), tolerance = 1e-5 ) expect_equal(evals$Specificity, c( 0.780092592592593, 0.68287037037037, 0.819444444444444, 0.641203703703704, 0.759259259259259, 0.622685185185185, 0.722222222222222, 0.703703703703704, 0.657407407407407, 0.724537037037037 ), tolerance = 1e-5 ) expect_equal(evals$`Pos Pred Value`, c( 0.562770562770563, 0.340740740740741, 0.644444444444444, 0.272619047619048, 0.531746031746032, 0.24537037037037, 0.44973544973545, 0.41547619047619, 0.314814814814815, 0.433333333333333 ), tolerance = 1e-5 ) expect_equal(evals$`Neg Pred Value`, c( 0.78042328042328, 0.687426900584795, 0.824780701754386, 0.645315904139434, 0.761283550757235, 0.620098039215686, 0.719907407407407, 0.700871459694989, 0.656669719169719, 0.727777777777778 ), tolerance = 1e-5 ) expect_equal(evals$AUC, c( 0.666960611405056, 0.529100529100529, 0.766901822457378, 0.496766607877719, 0.563786008230453, 0.397119341563786, 0.6222810111699, 0.481187536743092, 0.468841857730747, 0.583774250440917 ), tolerance = 1e-5 ) expect_equal(evals$Kappa, c( 0.332217431748538, 0.0411382701447752, 0.462128481918942, -0.0736440767694057, 0.274254384977631, -0.1375345264999, 0.163202892673696, 0.108837675055412, -0.0101585873493696, 0.160109639592244 ), tolerance = 1e-5 ) expect_equal(evals$MCC, c(0.339040525810531, 0.0486630968728153, 0.469598884822668, -0.0726394381146181, 0.281551008833778, -0.137349796341982, 0.164251207729469, 0.108959157171927, -0.0175562702416747, 0.172015615514047), tolerance = 1e-5 ) expect_equal(evals$`Detection Rate`, c( 0.186666666666667, 0.12, 0.213333333333333, 0.0933333333333333, 0.173333333333333, 0.08, 0.146666666666667, 0.133333333333333, 0.106666666666667, 0.146666666666667 ), tolerance = 1e-5 ) expect_equal(evals$`Detection Prevalence`, c( 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333 ), tolerance = 1e-5 ) expect_equal(evals$Prevalence, c( 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333 ), tolerance = 1e-5 ) expect_equal( evals$.groups, 1:10 ) expect_equal(names(evals$ROC[[1]]$rocs), c("1/2", "1/3", "2/3")) expect_equal(evals$ROC[[1]]$rocs$`1/2`[[1]]$sensitivities, c( 1, 1, 1, 1, 0.857142857142857, 0.857142857142857, 0.857142857142857, 0.857142857142857, 0.714285714285714, 0.714285714285714, 0.571428571428571, 0.428571428571429, 0.428571428571429, 0.285714285714286, 0.142857142857143, 0.142857142857143, 0 ), tolerance = 1e-5 ) expect_equal(evals$ROC[[1]]$rocs$`1/2`[[2]]$sensitivities, c( 1, 1, 1, 1, 0.888888888888889, 0.777777777777778, 0.777777777777778, 0.666666666666667, 0.555555555555556, 0.444444444444444, 0.333333333333333, 0.333333333333333, 0.333333333333333, 0.222222222222222, 0.222222222222222, 0.111111111111111, 0 ), tolerance = 1e-5 ) expect_equal(evals$ROC[[1]]$rocs$`1/2`[[1]]$specificities, c( 0, 0.111111111111111, 0.222222222222222, 0.333333333333333, 0.333333333333333, 0.444444444444444, 0.555555555555556, 0.666666666666667, 0.666666666666667, 0.777777777777778, 0.777777777777778, 0.777777777777778, 0.888888888888889, 0.888888888888889, 0.888888888888889, 1, 1 ), tolerance = 1e-5 ) expect_equal(evals$ROC[[1]]$rocs$`1/2`[[2]]$specificities, c( 0, 0.142857142857143, 0.285714285714286, 0.428571428571429, 0.428571428571429, 0.428571428571429, 0.571428571428571, 0.571428571428571, 0.571428571428571, 0.571428571428571, 0.571428571428571, 0.714285714285714, 0.857142857142857, 0.857142857142857, 1, 1, 1 ), tolerance = 1e-5 ) # class level expect_equal(evals$`Class Level Results`[[1]]$Class, as.character(c(1, 2, 3))) expect_equal(evals$`Class Level Results`[[1]]$`Balanced Accuracy`, c(0.715277777777778, 0.603174603174603, 0.677083333333333), tolerance = 1e-5 ) expect_equal(evals$`Class Level Results`[[1]]$F1, c(0.625, 0.428571428571429, 0.6), tolerance = 1e-5 ) expect_equal( evals$`Class Level Results`[[1]]$Support, c(9, 7, 9) ) expect_equal( evals$`Class Level Results`[[1]]$`Confusion Matrix`[[1]], structure(list( .groups = c(1L, 1L, 1L, 1L), Class = c("1", "1", "1", "1"), Prediction = c("0", "1", "0", "1"), Target = c("0", "0", "1", "1"), Pos_0 = c("TP", "FN", "FP", "TN"), Pos_1 = c("TN", "FP", "FN", "TP"), N = c(14L, 2L, 4L, 5L) ), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L) ) ) }) # TODO Add test that majority vote id_method works when not all classes are predicted most by one of the ids test_that("arguments throw proper errors and warnings in evaluate()", { #### #### xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 5, num_observations = 20, apply_softmax = FALSE # Test with as well ) data_ <- random_probabilities %>% dplyr::mutate( cl = as.factor(rep(1:5, each = 4)), cl_char = paste0("cl_", cl) ) %>% dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5)) # Testing 'metrics' expect_error( xpectr::strip_msg(evaluate( data = data_, target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE, metrics = "none" )), xpectr::strip(paste0( "1 assertions failed:\n * Variable 'metrics': Must be of typ", "e 'list', not 'character'." )), fixed = TRUE ) expect_error( xpectr::strip_msg(evaluate( data = data_, target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE, metrics = list(TRUE, FALSE) )), xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must have names."), fixed = TRUE ) # TODO add more # Testing gaussian expect_error(evaluate( data = data_, target_col = "cl_1", prediction_cols = "cl_2", type = "gaussian", id_col = "cl", id_method = "mean" ), paste0( "The targets must be constant within the IDs with the current ID method. ", "These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5." ), fixed = TRUE ) # Only one class in target column for binomial data_3 <- data.frame( "target" = c(1, 1, 1, 1, 1), "prediction" = c(0.1, 0.2, 0.7, 0.8, 0.9) ) expect_error(evaluate( data = data_3, target_col = "target", prediction_cols = "prediction", cutoff = 0.5, type = "binomial" ), "found less than 2 levels in the target column.", fixed = TRUE ) # Test that pROC::roc returns the expected error # when there's only observations for one level in the target col ("response") expect_error( pROC::roc( data.frame( "target" = c(1, 1, 1), preds = c(0.01, 0.01, 1 - 0.02) ), response = "target", predictor = "preds", levels = c(0, 1) ), "No control observation.", fixed = TRUE ) }) test_that("binomial evaluation works in evaluate()", { #### #### xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 1, num_observations = 20, apply_softmax = FALSE # Test with as well ) expect_equal(sum(random_probabilities), 11.10334, tolerance = 1e-5) data_ <- random_probabilities %>% dplyr::rename(prediction = class_1) %>% dplyr::mutate( cl = as.factor(rep(1:2, each = 10)), cl_char = paste0("cl_", cl), predicted_class = ifelse(prediction > 0.5, "cl_2", "cl_1" ), inv_prediction = 1 - prediction, inv_predicted_class = ifelse(inv_prediction > 0.5, "cl_2", "cl_1" ), ) bn_eval_1 <- evaluate( data = data_, target_col = "cl_char", prediction_cols = "prediction", type = "binomial", apply_softmax = TRUE, metrics = list("Accuracy" = TRUE) ) bn_eval_1_inv <- evaluate( data = data_, target_col = "cl_char", prediction_cols = "inv_prediction", type = "binomial", apply_softmax = TRUE, metrics = list("Accuracy" = TRUE) ) expect_equal( bn_eval_1$Accuracy, mean(data_$cl_char == data_$predicted_class) ) expect_equal( bn_eval_1$Accuracy, 0.45 ) expect_equal( bn_eval_1$`Balanced Accuracy`, 0.45 ) expect_equal( bn_eval_1_inv$Accuracy, mean(data_$cl_char == data_$inv_predicted_class) ) expect_equal( bn_eval_1_inv$Accuracy, 0.55 ) expect_equal( bn_eval_1_inv$`Balanced Accuracy`, 0.55 ) expect_equal(bn_eval_1$F1, 0.4761905, tolerance = 1e-4 ) expect_equal(bn_eval_1$Sensitivity, 0.5, tolerance = 1e-4 ) expect_equal(bn_eval_1$Specificity, 0.4, tolerance = 1e-4 ) expect_equal(bn_eval_1$`Pos Pred Value`, 0.4545455, tolerance = 1e-4 ) expect_equal(bn_eval_1$`Neg Pred Value`, 0.4444444, tolerance = 1e-4 ) expect_equal(bn_eval_1$AUC, 0.53, tolerance = 1e-4 ) expect_equal(bn_eval_1$AUC, bn_eval_1$ROC[[1]]$auc[[1]], tolerance = 1e-4 ) expect_equal(bn_eval_1$`Lower CI`, 0.2573215, tolerance = 1e-4 ) expect_equal(bn_eval_1$`Upper CI`, 0.8026785, tolerance = 1e-4 ) expect_equal(bn_eval_1$Kappa, -0.1, tolerance = 1e-4 ) expect_equal(bn_eval_1$MCC, -0.1005038, tolerance = 1e-4 ) expect_equal(bn_eval_1$`Detection Rate`, 0.25, tolerance = 1e-4 ) expect_equal(bn_eval_1$`Detection Prevalence`, 0.55, tolerance = 1e-4 ) expect_equal(bn_eval_1$Prevalence, 0.5, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$F1, 0.5263158, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$Sensitivity, 0.5, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$Specificity, 0.6, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$`Pos Pred Value`, 0.5555556, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$`Neg Pred Value`, 0.5454545, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$AUC, 0.47, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$AUC, bn_eval_1_inv$ROC[[1]]$auc[[1]], tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$`Lower CI`, 0.1973215, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$`Upper CI`, 0.7426785, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$Kappa, 0.1, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$MCC, 0.1005038, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$`Detection Rate`, 0.25, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$`Detection Prevalence`, 0.45, tolerance = 1e-4 ) expect_equal(bn_eval_1_inv$Prevalence, 0.5, tolerance = 1e-4 ) expect_true(!identical(bn_eval_1$ROC[[1]], bn_eval_1_inv$ROC[[1]])) expect_equal( bn_eval_1$ROC[[1]]$sensitivities, c( 1, 1, 0.9, 0.9, 0.8, 0.8, 0.8, 0.7, 0.6, 0.5, 0.5, 0.5, 0.5, 0.4, 0.3, 0.2, 0.1, 0.1, 0.1, 0.1, 0 ) ) expect_equal( bn_eval_1$ROC[[1]]$specificities, c( 0, 0.1, 0.1, 0.2, 0.2, 0.3, 0.4, 0.4, 0.4, 0.4, 0.5, 0.6, 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.9, 1, 1 ) ) expect_equal( bn_eval_1_inv$ROC[[1]]$sensitivities, c( 1, 0.9, 0.9, 0.9, 0.9, 0.8, 0.7, 0.6, 0.5, 0.5, 0.5, 0.5, 0.4, 0.3, 0.2, 0.2, 0.2, 0.1, 0.1, 0, 0 ) ) expect_equal( bn_eval_1_inv$ROC[[1]]$specificities, c( 0, 0, 0.1, 0.2, 0.3, 0.3, 0.3, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.6, 0.6, 0.7, 0.8, 0.8, 0.9, 0.9, 1 ) ) bn_eval_2 <- evaluate( data = data_, target_col = "cl_char", prediction_cols = "prediction", type = "binomial", apply_softmax = TRUE, positive = "cl_1", metrics = list("Accuracy" = TRUE) ) expect_equal( bn_eval_2$Accuracy, mean(data_$cl_char == data_$predicted_class) ) expect_equal( bn_eval_2$Accuracy, 0.45 ) expect_equal( bn_eval_2$`Balanced Accuracy`, 0.45 ) expect_equal(bn_eval_2$F1, 0.4210526, tolerance = 1e-4 ) expect_equal(bn_eval_2$Sensitivity, 0.4, tolerance = 1e-4 ) expect_equal(bn_eval_2$Specificity, 0.5, tolerance = 1e-4 ) expect_equal(bn_eval_2$`Pos Pred Value`, 0.4444444, tolerance = 1e-4 ) expect_equal(bn_eval_2$`Neg Pred Value`, 0.4545455, tolerance = 1e-4 ) expect_equal(bn_eval_2$AUC, 0.53, tolerance = 1e-4 ) expect_equal(bn_eval_2$`Lower CI`, 0.2573215, tolerance = 1e-4 ) expect_equal(bn_eval_2$`Upper CI`, 0.8026785, tolerance = 1e-4 ) expect_equal(bn_eval_2$Kappa, -0.1, tolerance = 1e-4 ) expect_equal(bn_eval_2$MCC, -0.1005038, tolerance = 1e-4 ) expect_equal(bn_eval_2$`Detection Rate`, 0.2, tolerance = 1e-4 ) expect_equal(bn_eval_2$`Detection Prevalence`, 0.45, tolerance = 1e-4 ) expect_equal(bn_eval_2$Prevalence, 0.5, tolerance = 1e-4 ) expect_equal(bn_eval_2$Process[[1]]$`Positive Class`, "cl_1") # not including predictions bn_eval_2_no_preds <- evaluate( data = data_, target_col = "cl_char", prediction_cols = "prediction", type = "binomial", apply_softmax = TRUE, positive = "cl_1", metrics = list("Accuracy" = TRUE), include_predictions = FALSE ) expect_equal(bn_eval_2_no_preds$Process[[1]]$`Positive Class`, "cl_1") expect_equal( colnames(bn_eval_2_no_preds), c( "Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "AUC", "Lower CI", "Upper CI", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "ROC", "Confusion Matrix", "Process" ) ) expect_identical(bn_eval_2_no_preds, bn_eval_2 %>% dplyr::select(-dplyr::one_of("Predictions"))) # TODO Create actual expected tests, where you curate a dataset, an aggregated version (all methods) # and make sure the results are identical in all settings. # ID level data_ <- data_ %>% dplyr::mutate(id = factor(rep(1:10, each = 2))) bn_eval_3 <- evaluate( data = data_, target_col = "cl_char", prediction_cols = "prediction", id_col = "id", id_method = "mean", type = "binomial", apply_softmax = TRUE ) ## Testing 'bn_eval_3' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(bn_eval_3), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( bn_eval_3[["Balanced Accuracy"]], 0.6, tolerance = 1e-4) expect_equal( bn_eval_3[["F1"]], 0.66667, tolerance = 1e-4) expect_equal( bn_eval_3[["Sensitivity"]], 0.8, tolerance = 1e-4) expect_equal( bn_eval_3[["Specificity"]], 0.4, tolerance = 1e-4) expect_equal( bn_eval_3[["Pos Pred Value"]], 0.57143, tolerance = 1e-4) expect_equal( bn_eval_3[["Neg Pred Value"]], 0.66667, tolerance = 1e-4) expect_equal( bn_eval_3[["AUC"]], 0.52, tolerance = 1e-4) expect_equal( bn_eval_3[["Lower CI"]], 0.10515, tolerance = 1e-4) expect_equal( bn_eval_3[["Upper CI"]], 0.93485, tolerance = 1e-4) expect_equal( bn_eval_3[["Kappa"]], 0.2, tolerance = 1e-4) expect_equal( bn_eval_3[["MCC"]], 0.21822, tolerance = 1e-4) expect_equal( bn_eval_3[["Detection Rate"]], 0.4, tolerance = 1e-4) expect_equal( bn_eval_3[["Detection Prevalence"]], 0.7, tolerance = 1e-4) expect_equal( bn_eval_3[["Prevalence"]], 0.5, tolerance = 1e-4) expect_equal( bn_eval_3[["Process"]][[1]][["Positive Class"]], "cl_2", fixed = TRUE) # Testing column names expect_equal( names(bn_eval_3), c("Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "AUC", "Lower CI", "Upper CI", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(bn_eval_3), c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(bn_eval_3), c("double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(bn_eval_3), c(1L, 19L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(bn_eval_3)), character(0), fixed = TRUE) ## Finished testing 'bn_eval_3' #### ## Testing 'bn_eval_3$Predictions[[1]]' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(bn_eval_3$Predictions[[1]]), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( bn_eval_3$Predictions[[1]][["Target"]], c("cl_1", "cl_1", "cl_1", "cl_1", "cl_1", "cl_2", "cl_2", "cl_2", "cl_2", "cl_2"), fixed = TRUE) expect_equal( bn_eval_3$Predictions[[1]][["Prediction"]], c(0.31882, 0.74053, 0.55004, 0.80274, 0.34545, 0.19127, 0.53556, 0.63377, 0.85476, 0.57874), tolerance = 1e-4) expect_equal( bn_eval_3$Predictions[[1]][["SD"]], c(0.07539, 0.23713, 0.49265, 0.20073, 0.40116, 0.0208, 0.2142, 0.19243, 0.19395, 0.28101), tolerance = 1e-4) expect_equal( bn_eval_3$Predictions[[1]][["Predicted Class"]], c("cl_1", "cl_2", "cl_2", "cl_2", "cl_1", "cl_1", "cl_2", "cl_2", "cl_2", "cl_2"), fixed = TRUE) expect_equal( bn_eval_3$Predictions[[1]][["id"]], structure(1:10, .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "factor")) expect_equal( bn_eval_3$Predictions[[1]][["id_method"]], c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean"), fixed = TRUE) # Testing column names expect_equal( names(bn_eval_3$Predictions[[1]]), c("Target", "Prediction", "SD", "Predicted Class", "id", "id_method"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(bn_eval_3$Predictions[[1]]), c("character", "numeric", "numeric", "character", "factor", "character"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(bn_eval_3$Predictions[[1]]), c("character", "double", "double", "character", "integer", "character"), fixed = TRUE) # Testing dimensions expect_equal( dim(bn_eval_3$Predictions[[1]]), c(10L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(bn_eval_3$Predictions[[1]])), character(0), fixed = TRUE) ## Finished testing 'bn_eval_3$Predictions[[1]]' #### ## Testing 'bn_eval_3$`Confusion Matrix`[[1]]' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Assigning output output_19148 <- bn_eval_3$`Confusion Matrix`[[1]] # Testing class expect_equal( class(output_19148), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19148[["Prediction"]], c("cl_1", "cl_2", "cl_1", "cl_2"), fixed = TRUE) expect_equal( output_19148[["Target"]], c("cl_1", "cl_1", "cl_2", "cl_2"), fixed = TRUE) expect_equal( output_19148[["Pos_cl_1"]], c("TP", "FN", "FP", "TN"), fixed = TRUE) expect_equal( output_19148[["Pos_cl_2"]], c("TN", "FP", "FN", "TP"), fixed = TRUE) expect_equal( output_19148[["N"]], c(2, 3, 1, 4), tolerance = 1e-4) # Testing column names expect_equal( names(output_19148), c("Prediction", "Target", "Pos_cl_1", "Pos_cl_2", "N"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19148), c("character", "character", "character", "character", "integer"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19148), c("character", "character", "character", "character", "integer"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19148), 4:5) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19148)), character(0), fixed = TRUE) ## Finished testing 'bn_eval_3$`Confusion Matrix`[[1]]' #### bn_eval_3_no_preds <- evaluate( data = data_, target_col = "cl_char", prediction_cols = "prediction", id_col = "id", id_method = "mean", type = "binomial", apply_softmax = TRUE, include_predictions = FALSE ) expect_equal( colnames(bn_eval_3_no_preds), c( "Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "AUC", "Lower CI", "Upper CI", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "ROC", "Confusion Matrix", "Process" ) ) # TODO ADD TESTS HERE! # Majority vote bn_eval_4 <- evaluate( data = data_, target_col = "cl_char", prediction_cols = "prediction", id_col = "id", id_method = "majority", type = "binomial", apply_softmax = FALSE ) expect_equal( colnames(bn_eval_4), c( "Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "AUC", "Lower CI", "Upper CI", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "Predictions", "ROC", "Confusion Matrix", "Process" ) ) expect_equal(bn_eval_4$`Balanced Accuracy`, 0.5) expect_equal(bn_eval_4$F1, 0.6153846, tolerance = 1e-4) expect_equal(bn_eval_4$Sensitivity, 0.8, tolerance = 1e-4) expect_equal(bn_eval_4$Specificity, 0.2, tolerance = 1e-4) expect_equal(bn_eval_4$`Pos Pred Value`, 0.5, tolerance = 1e-4) expect_equal(bn_eval_4$`Neg Pred Value`, 0.5, tolerance = 1e-4) expect_equal(bn_eval_4$AUC, 0.42, tolerance = 1e-4) expect_equal(bn_eval_4$`Lower CI`, 0.05437346, tolerance = 1e-4) expect_equal(bn_eval_4$`Upper CI`, 0.7856265, tolerance = 1e-4) expect_equal(bn_eval_4$Kappa, 0, tolerance = 1e-4) expect_equal(bn_eval_4$MCC, 0, tolerance = 1e-4) expect_equal(bn_eval_4$`Detection Rate`, 0.4, tolerance = 1e-4) expect_equal(bn_eval_4$`Detection Prevalence`, 0.8, tolerance = 1e-4) expect_equal(bn_eval_4$Prevalence, 0.5, tolerance = 1e-4) expect_equal(bn_eval_4[["Process"]][[1]]$`Positive Class`, "cl_2") expect_equal(length(bn_eval_4$Predictions), 1, tolerance = 1e-4) expect_equal(bn_eval_4$Predictions[[1]]$Target, c( "cl_1", "cl_1", "cl_1", "cl_1", "cl_1", "cl_2", "cl_2", "cl_2", "cl_2", "cl_2" ), tolerance = 1e-4 ) expect_equal(bn_eval_4$Predictions[[1]]$Prediction, c( 1e-40, 1e+00, 5e-01, 1e+00, 5e-01, 1e-40, 5e-01, 5e-01, 1e+00, 5e-01 ), tolerance = 1e-4 ) expect_equal(bn_eval_4$Predictions[[1]]$`Predicted Class`, c( "cl_1", "cl_2", "cl_2", "cl_2", "cl_2", "cl_1", "cl_2", "cl_2", "cl_2", "cl_2" ), tolerance = 1e-4 ) expect_equal(bn_eval_4$Predictions[[1]]$id, factor(1:10), tolerance = 1e-4 ) expect_equal(bn_eval_4$Predictions[[1]]$id_method, rep("majority", 10), tolerance = 1e-4 ) data_2 <- data_ %>% dplyr::mutate(fold_ = 1) %>% dplyr::bind_rows(data_ %>% dplyr::mutate(fold_ = 2)) bn_eval_5 <- evaluate( data = data_2 %>% dplyr::group_by(fold_), target_col = "cl_char", prediction_cols = "prediction", id_col = "id", id_method = "majority", type = "binomial", apply_softmax = FALSE ) # TODO Add tests here that grouped dataframes work in binomial! # Errors expect_error(evaluate( data = data_ %>% dplyr::mutate( cl = ifelse(dplyr::row_number() == 5, 3, cl), cl_char = ifelse(dplyr::row_number() == 5, "cl_3", cl_char) ), target_col = "cl_char", prediction_cols = "prediction", type = "binomial", apply_softmax = TRUE, metrics = list("Accuracy" = TRUE) ), "The target column must maximally contain 2 levels.", fixed = TRUE ) }) test_that("softmax works in multiclass_probability_tibble()", { # Test softmax was applied correctly in multiclass_probability_tibble xpectr::set_test_seed(1) random_probabilities_1 <- multiclass_probability_tibble( num_classes = 3, num_observations = 20, apply_softmax = TRUE ) xpectr::set_test_seed(1) random_probabilities_2 <- multiclass_probability_tibble( num_classes = 3, num_observations = 20, apply_softmax = FALSE ) %>% softmax() expect_equal(sum(random_probabilities_1), sum(random_probabilities_2)) expect_equal(sum(random_probabilities_1), 20) # due to softmax, each row sums to 1 expect_equal(sum(softmax_row(c(1, 2, 3, 4))), 1) expect_equal(as.vector(t(softmax_row(c(1, 2, 3, 4)))), c(0.03205860, 0.08714432, 0.23688282, 0.64391426), tolerance = 1e-4 ) expect_equal(colnames(softmax_row(c(1, 2, 3, 4))), c("V1", "V2", "V3", "V4")) }) test_that("probability nesting works in multinomial evaluate", { xpectr::set_test_seed(1) random_probabilities_1 <- multiclass_probability_tibble( num_classes = 3, num_observations = 20, apply_softmax = TRUE ) system.time({ manually_nested_probs <- random_probabilities_1 %>% dplyr::mutate(ind = 1:20) %>% dplyr::group_by(ind) %>% legacy_nest(1:3) %>% dplyr::pull(.data$data) }) # Changed to basically do the same as above system.time({ package_nested_probs <- random_probabilities_1 %>% nest_rowwise() }) # group_nest adds an attribute attr(package_nested_probs, "ptype") <- NULL expect_true(identical(manually_nested_probs, as.list(package_nested_probs))) unnested <- package_nested_probs %>% dplyr::bind_rows() expect_true(identical(random_probabilities_1, unnested)) }) test_that("gaussian evaluations are correct in evaluate()", { xpectr::set_test_seed(1) score_model_1 <- lm("score ~ diagnosis", data = participant.scores) score_model_2 <- lm("score ~ diagnosis+age", data = participant.scores) # summary(score_model_1) score_predictions <- stats::predict(score_model_1, participant.scores, type = "response", allow.new.levels = TRUE ) eval_data <- participant.scores eval_data[["score_predictions"]] <- score_predictions expect_error(evaluate(eval_data, target_col = "score", prediction_cols = "score_predictions", models = list(score_model_1), type = "gaussian", metrics = "all" ), class = "lifecycle_error_deprecated" ) e1 <- evaluate(eval_data, target_col = "score", prediction_cols = "score_predictions", type = "gaussian", metrics = "all" ) expect_equal( colnames(e1), c("RMSE", "MAE", "NRMSE(RNG)","NRMSE(IQR)","NRMSE(STD)","NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE", "TSE", "Predictions", "Process" ) ) expect_equal(e1$RMSE, 16.16881, tolerance = 1e-4) expect_equal(e1$MAE, 13.47778, tolerance = 1e-4) expect_equal(e1$`NRMSE(RNG)`, 0.2277298, tolerance = 1e-4) expect_equal(e1$`NRMSE(IQR)`, 0.5774577, tolerance = 1e-4) expect_equal(e1$`NRMSE(STD)`, 0.8380279, tolerance = 1e-4) expect_equal(e1$`NRMSE(AVG)`, 0.417080334230652, tolerance = 1e-4) expect_equal(e1$RMSLE, 0.4677011, tolerance = 1e-4) expect_equal(e1$MALE, 0.3768815, tolerance = 1e-4) expect_equal(e1$RAE, 0.8666762, tolerance = 1e-4) expect_equal(e1$RSE, 0.7265077, tolerance = 1e-4) expect_equal(e1$RRSE, 0.8523542, tolerance = 1e-4) expect_equal(e1$MAPE, 0.4836546, tolerance = 1e-4) expect_equal(e1$MSE, 261.4306, tolerance = 1e-4) expect_equal(e1$TAE, 404.3333, tolerance = 1e-4) expect_equal(e1$TSE, 7842.917, tolerance = 1e-4) expect_equal(e1$Predictions[[1]][["Target"]], eval_data$score) expect_equal(e1$Predictions[[1]][["Prediction"]], eval_data$score_predictions) # Not passing a model # This should remove the metrics that depend on the models to be passed e2 <- evaluate(eval_data, target_col = "score", prediction_cols = "score_predictions", type = "gaussian", metrics = "all" ) expect_equal( colnames(e2), c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE", "TSE", "Predictions", "Process") ) expect_equal(e2$RMSE, 16.16881, tolerance = 1e-4) expect_equal(e2$MAE, 13.47778, tolerance = 1e-4) expect_equal(e2$`NRMSE(RNG)`, 0.227729778737206, tolerance = 1e-4) expect_equal(e2$`NRMSE(IQR)`, 0.577457653226487, tolerance = 1e-4) expect_equal(e2$`NRMSE(STD)`, 0.838027891023239, tolerance = 1e-4) expect_equal(e2$`NRMSE(AVG)`, 0.417080334230652, tolerance = 1e-4) expect_equal(e2$RMSLE, 0.4677011, tolerance = 1e-4) expect_equal(e2$MALE, 0.3768815, tolerance = 1e-4) expect_equal(e2$Predictions[[1]][["Target"]], eval_data$score) expect_equal(e2$Predictions[[1]][["Prediction"]], eval_data$score_predictions) # Grouped with multiple models eval_data_2 <- eval_data score_predictions_2 <- stats::predict(score_model_2, participant.scores, type = "response", allow.new.levels = TRUE ) eval_data_2[["score_predictions"]] <- score_predictions_2 e3 <- evaluate(eval_data_2, target_col = "score", prediction_cols = "score_predictions", type = "gaussian", metrics = "all" ) expect_equal( colnames(e3), c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE", "TSE", "Predictions", "Process") ) eval_data_3 <- dplyr::bind_rows( eval_data %>% dplyr::mutate(fold_ = 1), eval_data_2 %>% dplyr::mutate(fold_ = 2) ) %>% dplyr::group_by(fold_) # eval_data_3 %>% dplyr::group_keys() # eval_data_3 %>% dplyr::group_indices() e4 <- evaluate(eval_data_3, target_col = "score", prediction_cols = "score_predictions", type = "gaussian", metrics = "all" ) %>% dplyr::mutate(fold_ = as.factor(.data$fold_)) expect_equal( colnames(e4), c("fold_", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE", "TSE", "Predictions", "Process") ) e1_e3 <- dplyr::bind_rows(e1, e3) %>% dplyr::mutate(fold_ = factor(1:2)) %>% dplyr::select("fold_", dplyr::everything()) e1_e3$Predictions[[1]] <- e1_e3$Predictions[[1]] %>% dplyr::as_tibble() %>% tibble::add_column("fold_" = 1, .before = "Target") e1_e3$Predictions[[2]] <- e1_e3$Predictions[[2]] %>% dplyr::as_tibble() %>% tibble::add_column("fold_" = 2, .before = "Target") expect_true(length(setdiff(colnames(e4), colnames(e1_e3))) == 0) expect_identical(e4, e1_e3) expect_equal(e4$fold_, factor(c(1, 2))) expect_equal(e4$RMSE, c(16.16881, 16.12762), tolerance = 1e-4) expect_equal(e4$MAE, c(13.47778, 13.28942), tolerance = 1e-4) expect_equal(e4$`NRMSE(RNG)`, c(0.227729778737206, 0.227149512023389), tolerance = 1e-4) expect_equal(e4$`NRMSE(IQR)`, c(0.577457653226487, 0.575986262630736), tolerance = 1e-4) expect_equal(e4$`NRMSE(STD)`, c(0.838027891023239, 0.835892554603436), tolerance = 1e-4) expect_equal(e4$`NRMSE(AVG)`, c(0.417080334230652, 0.416017592957711), tolerance = 1e-4) expect_equal(e4$RMSLE, c(0.4677011, 0.4666284), tolerance = 1e-4) expect_equal(e4$MALE, c(0.3768815, 0.3723774), tolerance = 1e-4) expect_equal(e4$RAE, c(0.866676193198057, 0.8545638890023), tolerance = 1e-4) expect_equal(e4$RSE, c(0.852354191878764, 0.850182351337433)^2, tolerance = 1e-4) expect_equal(e4$RRSE, c(0.852354191878764, 0.850182351337433), tolerance = 1e-4) expect_equal(e4$MAPE, c(0.483654620140199, 0.478764805777454), tolerance = 1e-4) expect_equal(e4$MSE, c(261.430555555556, 260.099976995629), tolerance = 1e-4) expect_equal(e4$TAE, c(404.333333333333, 398.68253968254), tolerance = 1e-4) expect_equal(e4$TSE, c(7842.91666666667, 7802.99930986888), tolerance = 1e-4) expect_equal(e4$Predictions[[1]]$Target, c( 10, 24, 45, 24, 40, 67, 15, 30, 40, 35, 50, 78, 24, 54, 62, 14, 25, 30, 11, 35, 41, 16, 32, 44, 33, 53, 66, 29, 55, 81 ), tolerance = 1e-4 ) expect_equal(e4$Predictions[[2]]$Target, c( 10, 24, 45, 24, 40, 67, 15, 30, 40, 35, 50, 78, 24, 54, 62, 14, 25, 30, 11, 35, 41, 16, 32, 44, 33, 53, 66, 29, 55, 81 ), tolerance = 1e-4 ) expect_equal(e4$Predictions[[1]]$Prediction, c( 30.66667, 30.66667, 30.66667, 50.91667, 50.91667, 50.91667, 30.66667, 30.66667, 30.66667, 50.91667, 50.91667, 50.91667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 30.66667, 50.91667, 50.91667, 50.91667, 50.91667, 50.91667, 50.91667 ), tolerance = 1e-4 ) expect_equal(e4$Predictions[[2]]$Prediction, c( 29.17288, 29.17288, 29.17288, 50.16977, 50.16977, 50.16977, 30.33471, 30.33471, 30.33471, 49.83782, 49.83782, 49.83782, 31.16460, 31.16460, 31.16460, 30.99862, 30.99862, 30.99862, 32.99034, 32.99034, 32.99034, 29.33885, 29.33885, 29.33885, 51.99551, 51.99551, 51.99551, 51.66356, 51.66356, 51.66356 ), tolerance = 1e-4 ) # ID evaluation age_model_1 <- lm("age ~ diagnosis", participant.scores) age_model_2 <- lm("age ~ diagnosis + score", participant.scores) age_predictions_1 <- stats::predict(age_model_1, participant.scores, type = "response", allow.new.levels = TRUE ) age_predictions_2 <- stats::predict(age_model_2, participant.scores, type = "response", allow.new.levels = TRUE ) id_eval_data_4 <- participant.scores %>% dplyr::mutate( fold_ = 1, predicted_age = age_predictions_1 ) %>% dplyr::bind_rows(participant.scores %>% dplyr::mutate( fold_ = 2, predicted_age = age_predictions_2 )) %>% dplyr::group_by(fold_) e5 <- evaluate(id_eval_data_4, target_col = "age", prediction_cols = "predicted_age", id_col = "participant", id_method = "mean", type = "gaussian", metrics = "all" ) ## Testing 'e5' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(e5), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( e5[["fold_"]], c(1, 2), tolerance = 1e-4) expect_equal( e5[["RMSE"]], c(6.94982, 6.91723), tolerance = 1e-4) expect_equal( e5[["MAE"]], c(6, 5.9356), tolerance = 1e-4) expect_equal( e5[["NRMSE(RNG)"]], c(0.30217, 0.30075), tolerance = 1e-4) expect_equal( e5[["NRMSE(IQR)"]], c(0.66189, 0.65878), tolerance = 1e-4) expect_equal( e5[["NRMSE(STD)"]], c(0.94342, 0.939), tolerance = 1e-4) expect_equal( e5[["NRMSE(AVG)"]], c(0.24471, 0.24356), tolerance = 1e-4) expect_equal( e5[["RSE"]], c(0.98894, 0.97969), tolerance = 1e-4) expect_equal( e5[["RRSE"]], c(0.99446, 0.98979), tolerance = 1e-4) expect_equal( e5[["RAE"]], c(1, 0.98927), tolerance = 1e-4) expect_equal( e5[["RMSLE"]], c(0.23417, 0.23293), tolerance = 1e-4) expect_equal( e5[["MALE"]], c(0.20614, 0.20397), tolerance = 1e-4) expect_equal( e5[["MAPE"]], c(0.22259, 0.22016), tolerance = 1e-4) expect_equal( e5[["MSE"]], c(48.3, 47.8481), tolerance = 1e-4) expect_equal( e5[["TAE"]], c(60, 59.35604), tolerance = 1e-4) expect_equal( e5[["TSE"]], c(483, 478.48096), tolerance = 1e-4) # Testing column names expect_equal( names(e5), c("fold_", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE", "TSE", "Predictions", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(e5), c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(e5), c("double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(e5), c(2L, 18L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(e5)), character(0), fixed = TRUE) ## Finished testing 'e5' #### ## Testing 'e5$Predictions[[1]]' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(e5$Predictions[[1]]), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( e5$Predictions[[1]][["fold_"]], c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), tolerance = 1e-4) expect_equal( e5$Predictions[[1]][["Target"]], c(20, 23, 27, 21, 32, 31, 43, 21, 34, 32), tolerance = 1e-4) expect_equal( e5$Predictions[[1]][["Prediction"]], c(29, 27.5, 29, 27.5, 29, 29, 29, 29, 27.5, 27.5), tolerance = 1e-4) expect_equal( e5$Predictions[[1]][["SD"]], c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), tolerance = 1e-4) expect_equal( e5$Predictions[[1]][["participant"]], structure(1:10, .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "factor")) expect_equal( e5$Predictions[[1]][["id_method"]], c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean"), fixed = TRUE) # Testing column names expect_equal( names(e5$Predictions[[1]]), c("fold_", "Target", "Prediction", "SD", "participant", "id_method"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(e5$Predictions[[1]]), c("numeric", "numeric", "numeric", "numeric", "factor", "character"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(e5$Predictions[[1]]), c("double", "double", "double", "double", "integer", "character"), fixed = TRUE) # Testing dimensions expect_equal( dim(e5$Predictions[[1]]), c(10L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(e5$Predictions[[1]])), character(0), fixed = TRUE) ## Finished testing 'e5$Predictions[[1]]' #### ## Testing 'e5$Predictions[[2]]' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(e5$Predictions[[2]]), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( e5$Predictions[[2]][["fold_"]], c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2), tolerance = 1e-4) expect_equal( e5$Predictions[[2]][["Target"]], c(20, 23, 27, 21, 32, 31, 43, 21, 34, 32), tolerance = 1e-4) expect_equal( e5$Predictions[[2]][["Prediction"]], c(28.86712, 27.27768, 28.92845, 27.60477, 29.49063, 28.7649, 28.94889, 29, 27.49233, 27.62521), tolerance = 1e-4) expect_equal( e5$Predictions[[2]][["SD"]], c(0.5402, 0.66644, 0.38585, 0.66926, 0.61431, 0.251, 0.48679, 0.43076, 0.50975, 0.79728), tolerance = 1e-4) expect_equal( e5$Predictions[[2]][["participant"]], structure(1:10, .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "factor")) expect_equal( e5$Predictions[[2]][["id_method"]], c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean"), fixed = TRUE) # Testing column names expect_equal( names(e5$Predictions[[2]]), c("fold_", "Target", "Prediction", "SD", "participant", "id_method"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(e5$Predictions[[2]]), c("numeric", "numeric", "numeric", "numeric", "factor", "character"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(e5$Predictions[[2]]), c("double", "double", "double", "double", "integer", "character"), fixed = TRUE) # Testing dimensions expect_equal( dim(e5$Predictions[[2]]), c(10L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(e5$Predictions[[2]])), character(0), fixed = TRUE) ## Finished testing 'e5$Predictions[[2]]' #### # Not including predictions in the output e6 <- evaluate(id_eval_data_4, target_col = "age", prediction_cols = "predicted_age", id_col = "participant", id_method = "mean", type = "gaussian", metrics = "all", include_predictions = FALSE ) expect_equal( colnames(e6), c("fold_", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE", "TSE", "Process") ) }) test_that("evaluate() treats dfs and tbls the same", { # Gaussian # Binomial # Multinomial xpectr::set_test_seed(1) random_probabilities <- multiclass_probability_tibble( num_classes = 5, num_observations = 20, apply_softmax = FALSE # Test with as well ) expect_equal(sum(random_probabilities), 51.78471, tolerance = 1e-5) data_ <- random_probabilities %>% dplyr::mutate( cl = as.factor(rep(1:5, each = 4)), cl_char = paste0("cl_", cl) ) %>% dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5)) mn_eval_1_tbl <- evaluate( data = data_, target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE ) mn_eval_1_df <- evaluate( data = as.data.frame(data_), target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE ) mn_eval_1_dt <- evaluate( # TODO Need to test this for gaussian and binomial as well!!! data = as.data.table(data_), target_col = "cl_char", prediction_cols = paste0("cl_", 1:5), type = "multinomial", apply_softmax = TRUE ) expect_identical( mn_eval_1_df$`Class Level Results`[[1]]$`Confusion Matrix`, mn_eval_1_tbl$`Class Level Results`[[1]]$`Confusion Matrix` ) expect_identical( mn_eval_1_df$`Confusion Matrix`, mn_eval_1_tbl$`Confusion Matrix` ) expect_identical(mn_eval_1_tbl, mn_eval_1_df) # TODO Find out why the group_nest attribute is only added to DT ? # attr(mn_eval_1_dt$Predictions[[1]]$Prediction, "ptype") <- NULL # There is a "attr(*, ".internal.selfref")= " attribute added to the # predictions list with the data.table. expect_identical( as.list(mn_eval_1_tbl$Predictions[[1]]$Prediction), as.list(mn_eval_1_dt$Predictions[[1]]$Prediction) ) mn_eval_1_dt$Predictions <- NULL mn_eval_1_tbl$Predictions <- NULL expect_identical(mn_eval_1_dt, mn_eval_1_tbl) }) test_that("evaluate() works with wines dataset", { xpectr::set_test_seed(1) testthat::skip_on_cran() # Load wines dataset w <- wines varieties <- unique(as.character(w$Variety)) ## Create All_x one-vs-all evaluations to_evaluate <- plyr::llply(varieties, function(vary) { d <- w d[["label"]] <- ifelse(as.character(d$Variety) == vary, 1, 0) d[["current_variety"]] <- vary d[["probability"]] <- matrix(0, nrow = 1, ncol = length(varieties)) %>% as.data.frame() %>% setNames(varieties) %>% dplyr::mutate_at(.vars = vary, .funs = ~1) %>% list() d }) %>% dplyr::bind_rows() %>% dplyr::mutate(prediction = 1) evaluations <- to_evaluate %>% dplyr::group_by(current_variety) %>% evaluate( target_col = "label", prediction_cols = "prediction", type = "binomial", metrics = list("Accuracy" = TRUE) ) %>% dplyr::arrange(current_variety) expect_equal(sort(evaluations$current_variety), sort(varieties)) expect_equal(evaluations$`Balanced Accuracy`, rep(0.5, length(varieties))) expect_equal(evaluations$Accuracy, c( 0.0271739130434783, 0.0217391304347826, 0.0380434782608696, 0.0108695652173913, 0.00815217391304348, 0.0135869565217391, 0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913 ), tolerance = 1e-4 ) expect_equal(evaluations$F1, c( 0.0529100529100529, 0.0425531914893617, 0.0732984293193717, 0.021505376344086, 0.0161725067385445, 0.0268096514745308, 0.678635547576302, 0.107969151670951, 0.334841628959276, 0.196078431372549 ), tolerance = 1e-4 ) expect_equal(evaluations$Sensitivity, c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), tolerance = 1e-4) expect_equal(evaluations$Specificity, c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), tolerance = 1e-4) expect_equal(evaluations$`Pos Pred Value`, c( 0.0271739130434783, 0.0217391304347826, 0.0380434782608696, 0.0108695652173913, 0.00815217391304348, 0.0135869565217391, 0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913 ), tolerance = 1e-4 ) expect_equal(evaluations$`Neg Pred Value`, c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN), tolerance = 1e-4 ) expect_equal(evaluations$AUC, c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), tolerance = 1e-4 ) expect_equal(evaluations$`Lower CI`, c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), tolerance = 1e-4 ) expect_equal(evaluations$`Upper CI`, c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), tolerance = 1e-4 ) expect_equal(evaluations$Kappa, c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), tolerance = 1e-4 ) expect_equal(evaluations$MCC, c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), tolerance = 1e-4 ) expect_equal(evaluations$`Detection Rate`, c( 0.0271739130434783, 0.0217391304347826, 0.0380434782608696, 0.0108695652173913, 0.00815217391304348, 0.0135869565217391, 0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913 ), tolerance = 1e-4 ) expect_equal(evaluations$`Detection Prevalence`, c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), tolerance = 1e-4 ) expect_equal(evaluations$Prevalence, c( 0.0271739130434783, 0.0217391304347826, 0.0380434782608696, 0.0108695652173913, 0.00815217391304348, 0.0135869565217391, 0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913 ), tolerance = 1e-4 ) expect_equal(evaluations$`Confusion Matrix`[[1]]$N, c(0L, 358L, 0L, 10L), tolerance = 1e-4 ) ## Create All_x multinomial evaluations mn_evaluations <- to_evaluate %>% legacy_unnest(probability) %>% dplyr::group_by(current_variety) %>% evaluate( target_col = "Variety", prediction_cols = varieties, type = "multinomial", metrics = list("Accuracy" = TRUE) ) %>% dplyr::arrange(current_variety) expect_equal( mn_evaluations$`Class Level Results`[[1]]$Accuracy, c(0.0271739130434783, 0.978260869565217, 0.96195652173913, 0.989130434782609, 0.991847826086957, 0.986413043478261, 0.486413043478261, 0.942934782608696, 0.798913043478261, 0.891304347826087), tolerance = 1e-4 ) expect_equal( mn_evaluations$`Class Level Results`[[1]]$F1, c(0.0529100529100529, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN), tolerance = 1e-4 ) expect_equal( mn_evaluations$`Overall Accuracy`, c( 0.0271739130434783, 0.0217391304347826, 0.0380434782608696, 0.0108695652173913, 0.00815217391304348, 0.0135869565217391, 0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913 ), tolerance = 1e-4 ) expect_equal(mn_evaluations$`Balanced Accuracy`, c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), tolerance = 1e-4 ) expect_equal(mn_evaluations$Accuracy, c( 0.805434782608696, 0.804347826086957, 0.807608695652174, 0.802173913043478, 0.801630434782609, 0.802717391304348, 0.902717391304348, 0.811413043478261, 0.840217391304348, 0.821739130434783 ), tolerance = 1e-4 ) expect_equal(mn_evaluations$F1, c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN), tolerance = 1e-4 ) expect_equal(mn_evaluations$Sensitivity, c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), tolerance = 1e-4 ) expect_equal(mn_evaluations$Specificity, c(0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9), tolerance = 1e-4 ) expect_equal(mn_evaluations$`Pos Pred Value`, c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN), tolerance = 1e-4 ) expect_equal(mn_evaluations$`Neg Pred Value`, c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN), tolerance = 1e-4 ) expect_equal(mn_evaluations$Kappa, c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), tolerance = 1e-4 ) expect_equal(mn_evaluations$MCC, c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), tolerance = 1e-4 ) expect_equal(mn_evaluations$`Detection Rate`, c( 0.00271739130434783, 0.00217391304347826, 0.00380434782608696, 0.00108695652173913, 0.000815217391304348, 0.00135869565217391, 0.0513586956521739, 0.00570652173913043, 0.0201086956521739, 0.0108695652173913 ), tolerance = 1e-4 ) expect_equal(mn_evaluations$`Detection Prevalence`, c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), tolerance = 1e-4 ) expect_equal(mn_evaluations$Prevalence, c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), tolerance = 1e-4 ) expect_equal(mn_evaluations$`Confusion Matrix`[[1]]$N, c( 10L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 189L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 21L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 74L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L ), tolerance = 1e-4 ) # Check that these All_x evaluations are the same in baseline() xpectr::set_test_seed(1) suppressWarnings(bsl <- baseline( w, dependent_col = "Variety", n = 1, family = "multinomial", metrics = list("Accuracy" = TRUE) )) all_x_baseline_results <- bsl$summarized_metrics[bsl$summarized_metrics$Measure %in% paste0("All_", varieties), ] expect_equal(all_x_baseline_results$`Overall Accuracy`, evaluations$Accuracy, tolerance = 1e-4) expect_equal(mn_evaluations$`Overall Accuracy`, evaluations$Accuracy, tolerance = 1e-4) expect_equal(all_x_baseline_results$`Balanced Accuracy`, mn_evaluations$`Balanced Accuracy`, tolerance = 1e-4) expect_equal(all_x_baseline_results$Accuracy, mn_evaluations$Accuracy, tolerance = 1e-4) expect_equal(all_x_baseline_results$F1, mn_evaluations$F1, tolerance = 1e-4) expect_equal(all_x_baseline_results$Sensitivity, mn_evaluations$Sensitivity, tolerance = 1e-4) expect_equal(all_x_baseline_results$Specificity, mn_evaluations$Specificity, tolerance = 1e-4) expect_equal(all_x_baseline_results$`Pos Pred Value`, mn_evaluations$`Pos Pred Value`, tolerance = 1e-4) expect_equal(all_x_baseline_results$`Neg Pred Value`, mn_evaluations$`Neg Pred Value`, tolerance = 1e-4) expect_equal(all_x_baseline_results$Kappa, mn_evaluations$Kappa, tolerance = 1e-4) expect_equal(all_x_baseline_results$MCC, mn_evaluations$MCC, tolerance = 1e-4) expect_equal(all_x_baseline_results$`Detection Rate`, mn_evaluations$`Detection Rate`, tolerance = 1e-4) expect_equal(all_x_baseline_results$`Detection Prevalence`, mn_evaluations$`Detection Prevalence`, tolerance = 1e-4) expect_equal(all_x_baseline_results$Prevalence, mn_evaluations$Prevalence, tolerance = 1e-4) }) test_that("evaluate() is agnostic about the order of the input data", { dat <- data.frame( "target" = c(2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1), "prediction" = c( 0.7, 0.3, 0.1, 0.1, 0.9, 0.3, 0.8, 0.7, 0.9, 0.1, 0.9, 0.1, 0.2, 0.4, 0.2, 0.6, 0.1, 0.6, 0.1, 0.2, 0.1, 0.8, 0.3 ), ".group" = paste0("cl_", c(1, 3, 2, 2, 3, 1, 3, 1, 2, 3, 2, 1, 2, 1, 1, 3, 3, 2, 1, 3, 2, 1, 1)) ) eval_1 <- dat %>% dplyr::group_by(.data$.group) %>% evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial") eval_2 <- dat %>% dplyr::arrange(.data$.group) %>% dplyr::group_by(.data$.group) %>% evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial") eval_3 <- dat %>% dplyr::arrange(dplyr::desc(.data$.group)) %>% dplyr::group_by(.data$.group) %>% evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial") expect_identical(eval_1[, 1:15], eval_2[, 1:15]) expect_identical(eval_1[, 1:15], eval_3[, 1:15]) eval_group_1 <- dat %>% dplyr::filter(.data$.group == "cl_1") %>% evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial") eval_group_2 <- dat %>% dplyr::filter(.data$.group == "cl_2") %>% evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial") eval_group_3 <- dat %>% dplyr::filter(.data$.group == "cl_3") %>% evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial") eval_groups <- dplyr::bind_rows( eval_group_1, eval_group_2, eval_group_3 ) expect_equal(eval_1[, 2:15], eval_groups[, 1:14]) }) test_that("evaluate() and confusion_matrix() has same metric values", { # Binomial xpectr::set_test_seed(42) df_binom <- tibble::tibble( "target" = as.character(sample(0:1, 50, replace = TRUE)), "prediction" = as.character(sample(0:1, 50, replace = TRUE)) ) eval_binom <- evaluate( df_binom, target_col = "target", prediction_cols = "prediction", type = "binomial", positive = "0", metrics = "all") cfm_binom <- confusion_matrix(df_binom$target, df_binom$prediction, positive = "0", metrics = "all") # same order of metrics shared_cols <- intersect(colnames(eval_binom), colnames(cfm_binom)) eval_binom_shared <- eval_binom %>% base_deselect(cols = c( "Confusion Matrix", "Process", setdiff(colnames(eval_binom), shared_cols) )) cfm_binom_shared <- cfm_binom %>% base_deselect(cols = c( "Confusion Matrix", setdiff(colnames(cfm_binom), shared_cols) )) expect_equal(colnames(eval_binom_shared), colnames(cfm_binom_shared)) expect_equal(unlist(eval_binom_shared), unlist(cfm_binom_shared)) expect_equal(eval_binom$`Confusion Matrix`, cfm_binom$`Confusion Matrix`) expect_equal(eval_binom[["Process"]][[1]]$`Positive Class`, "0") # Multinomial xpectr::set_test_seed(42) df_multinom <- tibble::tibble( "target" = as.character(sample(0:3, 50, replace = TRUE)), "prediction" = as.character(sample(0:3, 50, replace = TRUE)) ) eval_multinom <- evaluate( df_multinom, target_col = "target", prediction_cols = "prediction", type = "multinomial", metrics = "all") cfm_multinom <- confusion_matrix( df_multinom$target, df_multinom$prediction, metrics = "all") # same order of metrics shared_cols <- intersect(colnames(eval_multinom), colnames(cfm_multinom)) eval_multinom_shared <- eval_multinom %>% base_deselect(cols = c( "Confusion Matrix", "Class Level Results", setdiff(colnames(eval_multinom), shared_cols) )) cfm_multinom_shared <- cfm_multinom %>% base_deselect(cols = c( "Confusion Matrix", "Class Level Results", setdiff(colnames(cfm_multinom), shared_cols) )) expect_equal(colnames(eval_multinom_shared), colnames(cfm_multinom_shared)) expect_equal(unlist(eval_multinom_shared), unlist(cfm_multinom_shared)) expect_equal(eval_multinom$`Confusion Matrix`, cfm_multinom$`Confusion Matrix`) # class level results clr_eval_multinom <- eval_multinom$`Class Level Results`[[1]] clr_cfm_multinom <- cfm_multinom$`Class Level Results`[[1]] clr_eval_multinom_cfm <- clr_eval_multinom$`Confusion Matrix` clr_eval_multinom$`Confusion Matrix` <- NULL clr_cfm_multinom_cfm <- clr_cfm_multinom$`Confusion Matrix` clr_cfm_multinom$`Confusion Matrix` <- NULL shared_cols <- intersect(colnames(clr_eval_multinom), colnames(clr_cfm_multinom)) expect_equal(clr_eval_multinom[, shared_cols], clr_cfm_multinom[, shared_cols]) clr_eval_multinom_cfm <- lapply(clr_eval_multinom_cfm, function(x){base_deselect(x, "Class")}) expect_equal(clr_eval_multinom_cfm, clr_cfm_multinom_cfm) }) test_that("evaluate() and evaluate_residuals() has same metric values", { xpectr::set_test_seed(42) df <- tibble::tibble( "target" = runif(50), "prediction" = runif(50) ) eval <- evaluate( df, target_col = "target", prediction_cols = "prediction", type = "gaussian", metrics = "all") resid_eval <- evaluate_residuals( df, prediction_col = "prediction", target_col = "target", metrics = "all") # same order of metrics shared_cols <- intersect(colnames(eval), colnames(resid_eval)) eval_shared <- eval %>% base_deselect(cols = c( setdiff(colnames(eval), shared_cols) )) resid_eval_shared <- resid_eval %>% base_deselect(cols = c( setdiff(colnames(resid_eval), shared_cols) )) expect_equal(colnames(eval_shared), colnames(resid_eval_shared)) expect_equal(unlist(eval_shared), unlist(resid_eval_shared)) }) # TODO Check that evaluate and (cross_)validate* all have the same metric value # for the same data test_that("the different prediction formats work properly in Gaussian evaluate()", { testthat::skip_on_cran() xpectr::set_test_seed(42) # Gaussian gauss <- tibble::tibble( "ID" = rep(c(1:5), each = 3), "Target_1" = runif(15), "Prediction_1" = runif(15), "Target_const" = rep(c(1,2,1,3,2), each = 3), "Prediction_2" = runif(15) * 10 / 3 ) %>% dplyr::mutate( ID_fct = factor(ID), ID_chr = as.character(ID), Target_1_fct = factor(Target_1), Target_1_chr = as.character(Target_1), Prediction_1_fct = factor(Prediction_1), Prediction_1_chr = as.character(Prediction_1) ) # Generate expectations for 'evaluate' # Tip: comment out the gxs_function() call # so it is easy to regenerate the tests xpectr::set_test_seed(42) # xpectr::gxs_function( # fn = evaluate, # args_values = list( # "data" = list(gauss), # "target_col" = list("Target_1", "Target_1_chr", "Target_1_fct", "Target_const"), # "prediction_cols" = list("Prediction_1", "Prediction_1_chr", "Prediction_1_fct", "Prediction_2", # c("Prediction_1", "Prediction_2")), # "type" = list("gaussian"), # "id_col" = list(NULL, "ID", "ID_fct", "ID_chr", "Target_1"), # "id_method" = list("mean"), # "metrics" = list(list("all" = FALSE, "RMSE" = TRUE)) # ), # extra_combinations = list( # list("target_col" = "Target_const", "prediction_cols" = "Prediction_2", "id_col" = "ID_fct") # ), # indentation = 2 # ) ## Testing 'evaluate' #### ## Initially generated by xpectr # Testing different combinations of argument values # Testing evaluate(data = gauss, target_col = "Target_... xpectr::set_test_seed(42) # Assigning output output_19148 <- evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)) # Testing class expect_equal( class(output_19148), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19148[["RMSE"]], 0.3794, tolerance = 1e-4) # Testing column names expect_equal( names(output_19148), c("RMSE", "Predictions", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19148), c("numeric", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19148), c("double", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19148), c(1, 3)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19148)), character(0), fixed = TRUE) # Testing evaluate(data = NULL, target_col = "Target_1... # Changed from baseline: data = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19370 <- xpectr::capture_side_effects(evaluate(data = NULL, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19370[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19370[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: target_col = "Target_... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_12861 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1_chr", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_12861[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'data[[target_col]]': Must be of type 'numeric', not 'character'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_12861[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: target_col = "Target_... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_18304 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1_fct", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_18304[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'data[[target_col]]': Must be of type 'numeric', not 'factor'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_18304[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: target_col = "Target_... xpectr::set_test_seed(42) # Assigning output output_16417 <- evaluate(data = gauss, target_col = "Target_const", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)) # Testing class expect_equal( class(output_16417), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_16417[["RMSE"]], 1.49652, tolerance = 1e-4) # Testing column names expect_equal( names(output_16417), c("RMSE", "Predictions", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_16417), c("numeric", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_16417), c("double", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_16417), c(1, 3)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_16417)), character(0), fixed = TRUE) # Testing evaluate(data = gauss, target_col = NULL, pr... # Changed from baseline: target_col = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_15190 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = NULL, prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_15190[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'target_col': Must be of type 'string', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_15190[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: target_col, predictio... xpectr::set_test_seed(42) # Assigning output output_17365 <- evaluate(data = gauss, target_col = "Target_const", prediction_cols = "Prediction_2", type = "gaussian", id_col = "ID_fct", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)) # Testing class expect_equal( class(output_17365), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_17365[["RMSE"]], 0.85334, tolerance = 1e-4) # Testing column names expect_equal( names(output_17365), c("RMSE", "Predictions", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_17365), c("numeric", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_17365), c("double", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_17365), c(1, 3)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_17365)), character(0), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_11346 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1_chr", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_11346[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'data[[prediction_col]]': Must be of type 'numeric', not 'character'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_11346[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_16569 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1_fct", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_16569[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'data[[prediction_col]]': Must be of type 'numeric', not 'factor'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_16569[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Assigning output output_17050 <- evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_2", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)) # Testing class expect_equal( class(output_17050), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_17050[["RMSE"]], 1.50669, tolerance = 1e-4) # Testing column names expect_equal( names(output_17050), c("RMSE", "Predictions", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_17050), c("numeric", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_17050), c("double", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_17050), c(1, 3)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_17050)), character(0), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: prediction_cols = c("... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14577 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = c("Prediction_1", "Prediction_2"), type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14577[['error']]), xpectr::strip("1 assertions failed:\n * When 'type' is 'gaussian', 'prediction_cols' must have length 1."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14577[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: prediction_cols = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_17191 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = NULL, type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_17191[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'prediction_cols': Must be of type 'character', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_17191[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: type = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19346 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = NULL, id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19346[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'type': Must be a subset of {'gaussian','binomial','multinomial'}, not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19346[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: id_col = "ID_chr" xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_12554 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "ID_chr", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_12554[['error']]), xpectr::strip("The targets must be constant within the IDs with the current ID method. These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_12554[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: id_col = "Target_1" xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14622 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "Target_1", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14622[['error']]), xpectr::strip("1 assertions failed:\n * 'id_col' and 'target_col' cannot be identical."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14622[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: id_col = "ID" xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19400 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "ID", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19400[['error']]), xpectr::strip("The targets must be constant within the IDs with the current ID method. These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19400[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: id_col = "ID_fct" xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19782 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "ID_fct", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19782[['error']]), xpectr::strip("The targets must be constant within the IDs with the current ID method. These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19782[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: id_method = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_11174 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = NULL, metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_11174[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'id_method': Must be a subset of {'mean','majority'}, not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_11174[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = gauss, target_col = "Target_... # Changed from baseline: metrics = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14749 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = NULL), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14749[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must be of type 'list', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14749[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) ## Finished testing 'evaluate' #### # xpectr::set_test_seed(42) # Assigning output output_17365 <- evaluate(data = gauss, target_col = "Target_const", prediction_cols = "Prediction_2", type = "gaussian", id_col = "ID_fct", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)) ## Testing 'output_17365$Predictions[[1]]' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(output_17365$Predictions[[1]]), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_17365$Predictions[[1]][["Target"]], c(1, 2, 1, 3, 2), tolerance = 1e-4) expect_equal( output_17365$Predictions[[1]][["Prediction"]], c(2.15195, 1.69115, 1.24622, 1.58568, 1.60302), tolerance = 1e-4) expect_equal( output_17365$Predictions[[1]][["SD"]], c(0.75329, 1.4739, 1.5737, 0.40383, 1.56662), tolerance = 1e-4) expect_equal( output_17365$Predictions[[1]][["ID_fct"]], structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor")) expect_equal( output_17365$Predictions[[1]][["id_method"]], c("mean", "mean", "mean", "mean", "mean"), fixed = TRUE) # Testing column names expect_equal( names(output_17365$Predictions[[1]]), c("Target", "Prediction", "SD", "ID_fct", "id_method"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_17365$Predictions[[1]]), c("numeric", "numeric", "numeric", "factor", "character"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_17365$Predictions[[1]]), c("double", "double", "double", "integer", "character"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_17365$Predictions[[1]]), c(5L, 5L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_17365$Predictions[[1]])), character(0), fixed = TRUE) ## Finished testing 'output_17365$Predictions[[1]]' #### }) test_that("the different prediction formats work properly in multinomial evaluate()", { testthat::skip_on_cran() xpectr::set_test_seed(42) multinom <- tibble::tibble( "ID" = rep(1:5, each = 3), "Target" = rep(c(1,2,1,3,3), each = 3), "PredictedClass" = sample(c(1,2,3), 15, replace = TRUE), "1" = runif(15), "2" = runif(15), "3" = runif(15), "4" = runif(15) ) %>% dplyr::mutate( PredictedClass_chr = as.character(PredictedClass), PredictedClass_fct = as.factor(PredictedClass), Target_chr = as.character(Target), Target_fct = as.factor(Target), `1_chr` = as.character(`1`), Target_with_1chr = ifelse(Target == 1, "1_chr", Target), ID_fct = as.factor(ID), ID_chr = as.character(ID) ) # Generate expectations for 'evaluate' # Tip: comment out the gxs_function() call # so it is easy to regenerate the tests xpectr::set_test_seed(42) # xpectr::gxs_function( # fn = evaluate, # args_values = list( # "data" = list(multinom), # "target_col" = list("Target_chr", "Target", "Target_fct", "4"), # "prediction_cols" = list(c("1","2","3"), c("1","2","3","4"), c("1","2"), # "PredictedClass", "PredictedClass_chr", "PredictedClass_fct"), # "type" = list("multinomial"), # "id_col" = list(NULL, "ID", "ID_fct", "ID_chr"), # "id_method" = list("mean"), # "apply_softmax" = list(TRUE, FALSE), # "include_predictions" = list(TRUE), # "metrics" = list(list("all" = FALSE, "Overall Accuracy" = TRUE, "Sensitivity" = TRUE)) # ), # extra_combinations = list( # list("target_col" = "Target_with_1chr", "prediction_cols" = c("1_chr","2","3")), # list("target_col" = "Target_with_1chr", "prediction_cols" = "PredictedClass_chr", "apply_softmax" = FALSE) # ), # indentation = 2 # ) ## Testing 'evaluate' #### ## Initially generated by xpectr # Testing different combinations of argument values # Testing evaluate(data = multinom, target_col = "Targ... xpectr::set_test_seed(42) # Assigning output output_19148 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_19148), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19148[["Overall Accuracy"]], 0.33333, tolerance = 1e-4) expect_equal( output_19148[["Sensitivity"]], 0.27778, tolerance = 1e-4) # Testing column names expect_equal( names(output_19148), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19148), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19148), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19148), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19148)), character(0), fixed = TRUE) # Testing evaluate(data = NULL, target_col = "Target_c... # Changed from baseline: data = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19370 <- xpectr::capture_side_effects(evaluate(data = NULL, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19370[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19370[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: target_col = "Target" xpectr::set_test_seed(42) # Assigning output output_12861 <- evaluate(data = multinom, target_col = "Target", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_12861), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_12861[["Overall Accuracy"]], 0.33333, tolerance = 1e-4) expect_equal( output_12861[["Sensitivity"]], 0.27778, tolerance = 1e-4) # Testing column names expect_equal( names(output_12861), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_12861), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_12861), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_12861), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_12861)), character(0), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: target_col = "Target_... xpectr::set_test_seed(42) # Assigning output output_18304 <- evaluate(data = multinom, target_col = "Target_fct", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_18304), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_18304[["Overall Accuracy"]], 0.33333, tolerance = 1e-4) expect_equal( output_18304[["Sensitivity"]], 0.27778, tolerance = 1e-4) # Testing column names expect_equal( names(output_18304), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_18304), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_18304), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_18304), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_18304)), character(0), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "4", ... # Changed from baseline: target_col = "4" xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_16417 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "4", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_16417[['error']]), xpectr::strip("Not all levels in 'target_col' was found in 'prediction_cols'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_16417[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = NULL,... # Changed from baseline: target_col = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_15190 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = NULL, prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_15190[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'target_col': Must be of type 'string', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_15190[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: target_col, predictio... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_17365 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_with_1chr", prediction_cols = c("1_chr", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_17365[['error']]), xpectr::strip("the prediction columns must be numeric."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_17365[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: target_col, predictio... xpectr::set_test_seed(42) # Assigning output output_11346 <- evaluate(data = multinom, target_col = "Target_with_1chr", prediction_cols = "PredictedClass_chr", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = FALSE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_11346), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_11346[["Overall Accuracy"]], 0.33333, tolerance = 1e-4) expect_equal( output_11346[["Sensitivity"]], NaN, tolerance = 1e-4) # Testing column names expect_equal( names(output_11346), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_11346), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_11346), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_11346), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_11346)), character(0), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: prediction_cols = c("... xpectr::set_test_seed(42) # Assigning output output_16569 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3", "4"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_16569), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_16569[["Overall Accuracy"]], 0.26667, tolerance = 1e-4) expect_equal( output_16569[["Sensitivity"]], NaN, tolerance = 1e-4) # Testing column names expect_equal( names(output_16569), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_16569), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_16569), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_16569), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_16569)), character(0), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: prediction_cols = c("... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_17050 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_17050[['error']]), xpectr::strip("Not all levels in 'target_col' was found in 'prediction_cols'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_17050[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14577 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = "PredictedClass", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14577[['error']]), xpectr::strip("2 assertions failed:\n * When 'type' is 'multinomial' and 'prediction_cols' has length 1, both 'data[[target_col]]' and 'data[[prediction_cols]]'\n * must have type character.\n * When passing 'prediction_cols' as single column with multiple classes, 'apply_softmax' should be 'FALSE'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14577[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_17191 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = "PredictedClass_chr", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_17191[['error']]), xpectr::strip("1 assertions failed:\n * When passing 'prediction_cols' as single column with multiple classes, 'apply_softmax' should be 'FALSE'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_17191[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19346 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = "PredictedClass_fct", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19346[['error']]), xpectr::strip("2 assertions failed:\n * When 'type' is 'multinomial' and 'prediction_cols' has length 1, both 'data[[target_col]]' and 'data[[prediction_cols]]'\n * must have type character.\n * When passing 'prediction_cols' as single column with multiple classes, 'apply_softmax' should be 'FALSE'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19346[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: prediction_cols = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_12554 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = NULL, type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_12554[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'prediction_cols': Must be of type 'character', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_12554[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: type = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14622 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = NULL, id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14622[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'type': Must be a subset of {'gaussian','binomial','multinomial'}, not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14622[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: id_col = "ID_fct" xpectr::set_test_seed(42) # Assigning output output_19400 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = "ID_fct", id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_19400), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19400[["Overall Accuracy"]], 0.4, tolerance = 1e-4) expect_equal( output_19400[["Sensitivity"]], 0.33333, tolerance = 1e-4) # Testing column names expect_equal( names(output_19400), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19400), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19400), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19400), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19400)), character(0), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: id_col = "ID_chr" xpectr::set_test_seed(42) # Assigning output output_19782 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = "ID_chr", id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_19782), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19782[["Overall Accuracy"]], 0.4, tolerance = 1e-4) expect_equal( output_19782[["Sensitivity"]], 0.33333, tolerance = 1e-4) # Testing column names expect_equal( names(output_19782), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19782), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19782), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19782), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19782)), character(0), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: id_col = "ID" xpectr::set_test_seed(42) # Assigning output output_11174 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = "ID", id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)) # Testing class expect_equal( class(output_11174), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_11174[["Overall Accuracy"]], 0.4, tolerance = 1e-4) expect_equal( output_11174[["Sensitivity"]], 0.33333, tolerance = 1e-4) # Testing column names expect_equal( names(output_11174), c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_11174), c("numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_11174), c("double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_11174), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_11174)), character(0), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: id_method = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14749 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = NULL, apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14749[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'id_method': Must be a subset of {'mean','majority'}, not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14749[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: apply_softmax = FALSE xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_15603 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = FALSE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_15603[['error']]), xpectr::strip("'multinomial' evaluate(): Not all probabilities added up to 1 row-wise (tolerance of 5 decimals)."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_15603[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: apply_softmax = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19040 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = NULL, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19040[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'apply_softmax': Must be of type 'logical flag', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19040[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: include_predictions =... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_11387 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = NULL, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_11387[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'include_predictions': Must be of type 'logical flag', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_11387[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = multinom, target_col = "Targ... # Changed from baseline: metrics = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19888 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = NULL), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19888[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must be of type 'list', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19888[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) ## Finished testing 'evaluate' #### # }) test_that("the different prediction formats work properly in binomial evaluate()", { testthat::skip_on_cran() xpectr::set_test_seed(42) binom <- tibble::tibble( "ID" = rep(1:5, each = 3), "Target" = rep(c(1,2,1,1,2), each = 3), "PredictedClass" = sample(c(1,2), 15, replace = TRUE), "Prediction" = runif(15), "1" = runif(15), "2" = runif(15) ) %>% dplyr::mutate( PredictedClass_chr = as.character(PredictedClass), PredictedClass_fct = as.factor(PredictedClass), Target_chr = as.character(Target), Target_fct = as.factor(Target), ID_fct = as.factor(ID), ID_chr = as.character(ID) ) # Generate expectations for 'evaluate' # Tip: comment out the gxs_function() call # so it is easy to regenerate the tests xpectr::set_test_seed(42) # xpectr::gxs_function( # fn = evaluate, # args_values = list( # "data" = list(binom), # "target_col" = list("Target", "Target_chr", "Target_fct"), # "prediction_cols" = list("Prediction", c("1", "2"), "PredictedClass", "PredictedClass_chr", "PredictedClass_fct"), # "type" = list("binomial", "multinomial"), # "id_col" = list(NULL, "ID", "ID_fct", "ID_chr", "Target_fct"), # "id_method" = list("mean"), # "cutoff" = list(0.5), # "positive" = list(2, 1, "2"), # "metrics" = list(list("all" = FALSE, "Balanced Accuracy" = TRUE, "Sensitivity" = TRUE)), # "include_predictions" = list(TRUE) # ), # extra_combinations = list( # list("prediction_cols" = "PredictedClass_chr", "target_col" = "Target_chr", "id_col" = "ID_chr") # ), # indentation = 2 # ) ## Testing 'evaluate' #### ## Initially generated by xpectr # Testing different combinations of argument values # Testing evaluate(data = binom, target_col = "Target"... xpectr::set_test_seed(42) # Assigning output output_19148 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_19148), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19148[["Balanced Accuracy"]], 0.55556, tolerance = 1e-4) expect_equal( output_19148[["Sensitivity"]], 0.66667, tolerance = 1e-4) expect_equal( output_19148[["ROC"]], NA) expect_equal( output_19148[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_19148), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19148), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19148), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19148), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19148)), character(0), fixed = TRUE) # Testing evaluate(data = NULL, target_col = "Target",... # Changed from baseline: data = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19370 <- xpectr::capture_side_effects(evaluate(data = NULL, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19370[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19370[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target_... # Changed from baseline: target_col = "Target_... xpectr::set_test_seed(42) # Assigning output output_12861 <- evaluate(data = binom, target_col = "Target_chr", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_12861), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_12861[["Balanced Accuracy"]], 0.55556, tolerance = 1e-4) expect_equal( output_12861[["Sensitivity"]], 0.66667, tolerance = 1e-4) expect_equal( output_12861[["ROC"]], NA) expect_equal( output_12861[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_12861), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_12861), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_12861), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_12861), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_12861)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target_... # Changed from baseline: target_col = "Target_... xpectr::set_test_seed(42) # Assigning output output_18304 <- evaluate(data = binom, target_col = "Target_fct", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_18304), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_18304[["Balanced Accuracy"]], 0.55556, tolerance = 1e-4) expect_equal( output_18304[["Sensitivity"]], 0.66667, tolerance = 1e-4) expect_equal( output_18304[["ROC"]], NA) expect_equal( output_18304[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_18304), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_18304), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_18304), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_18304), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_18304)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = NULL, pr... # Changed from baseline: target_col = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_16417 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = NULL, prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_16417[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'target_col': Must be of type 'string', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_16417[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target_... # Changed from baseline: target_col, predictio... xpectr::set_test_seed(42) # Assigning output output_15190 <- evaluate(data = binom, target_col = "Target_chr", prediction_cols = "PredictedClass_chr", type = "binomial", id_col = "ID_chr", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_15190), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_15190[["Balanced Accuracy"]], 0.25, tolerance = 1e-4) expect_equal( output_15190[["Sensitivity"]], 0.5, tolerance = 1e-4) expect_equal( output_15190[["ROC"]], NA) expect_equal( output_15190[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_15190), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_15190), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_15190), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_15190), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_15190)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: prediction_cols = c("... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_17365 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = c("1", "2"), type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_17365[['error']]), xpectr::strip("1 assertions failed:\n * When 'type' is 'binomial', 'prediction_cols' must have length 1."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_17365[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_11346 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "PredictedClass", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_11346[['error']]), xpectr::strip("1 assertions failed:\n * When 'type' is 'binomial' and 'data[[prediction_cols]]' is numeric, the values in 'data[[prediction_cols]]' must be\n * between 0 and 1."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_11346[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Assigning output output_16569 <- evaluate(data = binom, target_col = "Target", prediction_cols = "PredictedClass_chr", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_16569), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_16569[["Balanced Accuracy"]], 0.5, tolerance = 1e-4) expect_equal( output_16569[["Sensitivity"]], 0.66667, tolerance = 1e-4) expect_equal( output_16569[["ROC"]], NA) expect_equal( output_16569[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_16569), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_16569), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_16569), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_16569), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_16569)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: prediction_cols = "Pr... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_17050 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "PredictedClass_fct", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_17050[['error']]), xpectr::strip("1 assertions failed:\n * When 'type' is 'binomial', 'data[[prediction_cols]]' must be either numeric or character."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_17050[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: prediction_cols = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14577 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = NULL, type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14577[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'prediction_cols': Must be of type 'character', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14577[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: type = "multinomial" xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_17191 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "multinomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_17191[['error']]), xpectr::strip("1 assertions failed:\n * When 'type' is 'multinomial' and 'prediction_cols' has length 1, both 'data[[target_col]]' and 'data[[prediction_cols]]'\n * must have type character."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_17191[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: type = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19346 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = NULL, id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19346[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'type': Must be a subset of {'gaussian','binomial','multinomial'}, not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19346[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: id_col = "ID_chr" xpectr::set_test_seed(42) # Assigning output output_12554 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "ID_chr", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_12554), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_12554[["Balanced Accuracy"]], 0.66667, tolerance = 1e-4) expect_equal( output_12554[["Sensitivity"]], 1, tolerance = 1e-4) expect_equal( output_12554[["ROC"]], NA) expect_equal( output_12554[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_12554), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_12554), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_12554), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_12554), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_12554)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: id_col = "Target_fct" xpectr::set_test_seed(42) # Assigning output output_14622 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "Target_fct", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_14622), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_14622[["Balanced Accuracy"]], 0.5, tolerance = 1e-4) expect_equal( output_14622[["Sensitivity"]], 1, tolerance = 1e-4) expect_equal( output_14622[["ROC"]], NA) expect_equal( output_14622[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_14622), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_14622), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_14622), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_14622), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_14622)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: id_col = "ID" xpectr::set_test_seed(42) # Assigning output output_19400 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "ID", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_19400), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19400[["Balanced Accuracy"]], 0.66667, tolerance = 1e-4) expect_equal( output_19400[["Sensitivity"]], 1, tolerance = 1e-4) expect_equal( output_19400[["ROC"]], NA) expect_equal( output_19400[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_19400), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19400), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19400), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19400), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19400)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: id_col = "ID_fct" xpectr::set_test_seed(42) # Assigning output output_19782 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "ID_fct", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_19782), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19782[["Balanced Accuracy"]], 0.66667, tolerance = 1e-4) expect_equal( output_19782[["Sensitivity"]], 1, tolerance = 1e-4) expect_equal( output_19782[["ROC"]], NA) expect_equal( output_19782[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_19782), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19782), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19782), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19782), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19782)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: id_method = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_11174 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = NULL, cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_11174[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'id_method': Must be a subset of {'mean','majority'}, not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_11174[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: cutoff = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_14749 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = NULL, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_14749[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'cutoff': Must be of type 'number', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_14749[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: positive = 1 xpectr::set_test_seed(42) # Assigning output output_15603 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 1, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_15603), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_15603[["Balanced Accuracy"]], 0.55556, tolerance = 1e-4) expect_equal( output_15603[["Sensitivity"]], 0.44444, tolerance = 1e-4) expect_equal( output_15603[["ROC"]], NA) expect_equal( output_15603[["Process"]][[1]][["Positive Class"]], "1", fixed = TRUE) # Testing column names expect_equal( names(output_15603), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_15603), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_15603), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_15603), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_15603)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: positive = "2" xpectr::set_test_seed(42) # Assigning output output_19040 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = "2", metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE) # Testing class expect_equal( class(output_19040), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19040[["Balanced Accuracy"]], 0.55556, tolerance = 1e-4) expect_equal( output_19040[["Sensitivity"]], 0.66667, tolerance = 1e-4) expect_equal( output_19040[["ROC"]], NA) expect_equal( output_19040[["Process"]][[1]][["Positive Class"]], "2", fixed = TRUE) # Testing column names expect_equal( names(output_19040), c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19040), c("numeric", "numeric", "list", "logical", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19040), c("double", "double", "list", "logical", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19040), c(1L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19040)), character(0), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: positive = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_11387 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = NULL, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_11387[['error']]), xpectr::strip("Assertion failed. One of the following must apply:\n * checkmate::check_choice(positive): Must be a subset of {'1','2'}, not 'NULL'\n * checkmate::check_string(positive): Must be of type 'string', not 'NULL'"), fixed = TRUE) expect_equal( xpectr::strip(side_effects_11387[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: metrics = NULL xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19888 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = NULL, include_predictions = TRUE), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19888[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must be of type 'list', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19888[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) # Testing evaluate(data = binom, target_col = "Target"... # Changed from baseline: include_predictions =... xpectr::set_test_seed(42) # Testing side effects # Assigning side effects side_effects_19466 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = NULL), reset_seed = TRUE) expect_equal( xpectr::strip(side_effects_19466[['error']]), xpectr::strip("1 assertions failed:\n * Variable 'include_predictions': Must be of type 'logical flag', not 'NULL'."), fixed = TRUE) expect_equal( xpectr::strip(side_effects_19466[['error_class']]), xpectr::strip(c("simpleError", "error", "condition")), fixed = TRUE) ## Finished testing 'evaluate' #### # }) test_that("testing 2-class multinomial evaluation works", { xpectr::set_test_seed(42) data_ <- multiclass_probability_tibble( num_classes = 2, num_observations = 60, apply_softmax = FALSE # Done in evaluate ) %>% dplyr::mutate( cl = as.factor(rep(c("class_1", "class_2"), each = 30)) ) %>% dplyr::sample_frac() expect_equal(sum(data_[, c("class_1", "class_2")]), 63.33427, tolerance = 1e-5) mnm_eval_2class <- evaluate( data_, target_col = "cl", prediction_cols = c("class_1", "class_2"), type = "multinomial", apply_softmax = TRUE) ## Testing 'mnm_eval_2class' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(mnm_eval_2class), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( mnm_eval_2class[["Overall Accuracy"]], 0.6, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Balanced Accuracy"]], 0.6, tolerance = 1e-4) expect_equal( mnm_eval_2class[["F1"]], 0.59956, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Sensitivity"]], 0.6, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Specificity"]], 0.6, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Pos Pred Value"]], 0.60045, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Neg Pred Value"]], 0.60045, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Kappa"]], 0.2, tolerance = 1e-4) expect_equal( mnm_eval_2class[["MCC"]], 0.20045, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Detection Rate"]], 0.3, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Detection Prevalence"]], 0.5, tolerance = 1e-4) expect_equal( mnm_eval_2class[["Prevalence"]], 0.5, tolerance = 1e-4) # Testing column names expect_equal( names(mnm_eval_2class), c("Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(mnm_eval_2class), c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(mnm_eval_2class), c("double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(mnm_eval_2class), c(1L, 16L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(mnm_eval_2class)), character(0), fixed = TRUE) ## Finished testing 'mnm_eval_2class' #### ## Testing 'mnm_eval_2class$`Class Level Results`[[1]]' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Assigning output output_19148 <- mnm_eval_2class$`Class Level Results`[[1]] # Testing class expect_equal( class(output_19148), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19148[["Class"]], c("class_1", "class_2"), fixed = TRUE) expect_equal( output_19148[["Balanced Accuracy"]], c(0.6, 0.6), tolerance = 1e-4) expect_equal( output_19148[["F1"]], c(0.6129, 0.58621), tolerance = 1e-4) expect_equal( output_19148[["Sensitivity"]], c(0.63333, 0.56667), tolerance = 1e-4) expect_equal( output_19148[["Specificity"]], c(0.56667, 0.63333), tolerance = 1e-4) expect_equal( output_19148[["Pos Pred Value"]], c(0.59375, 0.60714), tolerance = 1e-4) expect_equal( output_19148[["Neg Pred Value"]], c(0.60714, 0.59375), tolerance = 1e-4) expect_equal( output_19148[["Kappa"]], c(0.2, 0.2), tolerance = 1e-4) expect_equal( output_19148[["Detection Rate"]], c(0.31667, 0.28333), tolerance = 1e-4) expect_equal( output_19148[["Detection Prevalence"]], c(0.53333, 0.46667), tolerance = 1e-4) expect_equal( output_19148[["Prevalence"]], c(0.5, 0.5), tolerance = 1e-4) expect_equal( output_19148[["Support"]], c(30, 30), tolerance = 1e-4) # Testing column names expect_equal( names(output_19148), c("Class", "Balanced Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "Detection Rate", "Detection Prevalence", "Prevalence", "Support", "Confusion Matrix"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19148), c("character", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "integer", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19148), c("character", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "integer", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19148), c(2L, 13L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19148)), character(0), fixed = TRUE) ## Finished testing 'mnm_eval_2class$`Class Level Results`[[1]]' #### ## Testing 'mnm_eval_2class$`Class Level Results`[[1]]$`...' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Assigning output output_19148 <- mnm_eval_2class$`Class Level Results`[[1]]$`Confusion Matrix`[[1]] # Testing class expect_equal( class(output_19148), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19148[["Class"]], c("class_1", "class_1", "class_1", "class_1")) expect_equal( output_19148[["Prediction"]], c("0", "1", "0", "1"), fixed = TRUE) expect_equal( output_19148[["Target"]], c("0", "0", "1", "1"), fixed = TRUE) expect_equal( output_19148[["Pos_0"]], c("TP", "FN", "FP", "TN"), fixed = TRUE) expect_equal( output_19148[["Pos_1"]], c("TN", "FP", "FN", "TP"), fixed = TRUE) expect_equal( output_19148[["N"]], c(17, 13, 11, 19), tolerance = 1e-4) # Testing column names expect_equal( names(output_19148), c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19148), c("character", "character", "character", "character", "character", "integer"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19148), c("character", "character", "character", "character", "character", "integer"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19148), c(4L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19148)), character(0), fixed = TRUE) ## Finished testing 'mnm_eval_2class$`Class Level Results`[[1]]$`...' #### ## Testing 'mnm_eval_2class$`Confusion Matrix`[[1]]' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Assigning output output_19148 <- mnm_eval_2class$`Confusion Matrix`[[1]] # Testing class expect_equal( class(output_19148), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( output_19148[["Prediction"]], c("class_1", "class_2", "class_1", "class_2"), fixed = TRUE) expect_equal( output_19148[["Target"]], c("class_1", "class_1", "class_2", "class_2"), fixed = TRUE) expect_equal( output_19148[["Pos_class_1"]], c("TP", "FN", "FP", "TN"), fixed = TRUE) expect_equal( output_19148[["Pos_class_2"]], c("TN", "FP", "FN", "TP"), fixed = TRUE) expect_equal( output_19148[["N"]], c(19, 11, 13, 17), tolerance = 1e-4) # Testing column names expect_equal( names(output_19148), c("Prediction", "Target", "Pos_class_1", "Pos_class_2", "N"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(output_19148), c("character", "character", "character", "character", "integer"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(output_19148), c("character", "character", "character", "character", "integer"), fixed = TRUE) # Testing dimensions expect_equal( dim(output_19148), 4:5) # Testing group keys expect_equal( colnames(dplyr::group_keys(output_19148)), character(0), fixed = TRUE) ## Finished testing 'mnm_eval_2class$`Confusion Matrix`[[1]]' #### }) test_that("testing different number of classes in grouped multinomial evaluate()", { xpectr::set_test_seed(42) df <- plyr::ldply(1:3, function(i){ multiclass_probability_tibble( num_classes = 2+i, num_observations = 15, apply_softmax = TRUE, add_predicted_classes = TRUE, add_targets = TRUE ) %>% dplyr::mutate(TheGroup = paste0("gr", i)) }) %>% dplyr::select(`Predicted Class`, "Target", "TheGroup", dplyr::everything()) # Different number of classes per group # Prediction col is the predicted class eval_1 <- df %>% dplyr::group_by(.data$TheGroup) %>% evaluate( target_col = "Target", prediction_cols = "Predicted Class", type = "multinomial", metrics = list("all" = FALSE, "Accuracy" = TRUE)) ## Testing 'eval_1' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(eval_1), c("eval_results", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( eval_1[["TheGroup"]], c("gr1", "gr2", "gr3"), fixed = TRUE) expect_equal( eval_1[["Accuracy"]], c(0.55556, 0.63333, 0.70667), tolerance = 1e-4) # Testing column names expect_equal( names(eval_1), c("TheGroup", "Accuracy", "Predictions", "Confusion Matrix", "Class Level Results", "Process"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(eval_1), c("character", "numeric", "list", "list", "list", "list"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(eval_1), c("character", "double", "list", "list", "list", "list"), fixed = TRUE) # Testing dimensions expect_equal( dim(eval_1), c(3L, 6L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(eval_1)), character(0), fixed = TRUE) ## Finished testing 'eval_1' #### # Classes reported in 'Process' expect_equal( eval_1$Process[[1]]$Classes, c("class_1", "class_2", "class_3"), fixed = TRUE) expect_equal( eval_1$Process[[2]]$Classes, c("class_1", "class_2", "class_3", "class_4"), fixed = TRUE) expect_equal( eval_1$Process[[3]]$Classes, c("class_1", "class_2", "class_3", "class_4", "class_5"), fixed = TRUE) # Nested predictions expect_equal( unlist(eval_1$Predictions[[1]]$Prediction[[1]]), c(class_1 = 0, class_2 = 1, class_3 = 0), tolerance = 1e-4) expect_equal( unlist(eval_1$Predictions[[2]]$Prediction[[1]]), c(class_1 = 0, class_2 = 0, class_3 = 0, class_4 = 1), tolerance = 1e-4) expect_equal( unlist(eval_1$Predictions[[3]]$Prediction[[1]]), c(class_1 = 0, class_2 = 0, class_3 = 0, class_4 = 1, class_5 = 0), tolerance = 1e-4) # Confusion matrices expect_equal(nrow(eval_1$`Confusion Matrix`[[1]]), 3^2) expect_equal(nrow(eval_1$`Confusion Matrix`[[2]]), 4^2) expect_equal(nrow(eval_1$`Confusion Matrix`[[3]]), 5^2) # Class level results expect_equal(nrow(eval_1$`Class Level Results`[[1]]), 3) expect_equal(nrow(eval_1$`Class Level Results`[[2]]), 4) expect_equal(nrow(eval_1$`Class Level Results`[[3]]), 5) # Different number of classes per group # Prediction cols are the probabilities # First set NAs to 0 df[is.na(df)] <- 0.0 eval_2 <- df %>% dplyr::group_by(.data$TheGroup) %>% evaluate( target_col = "Target", prediction_cols = paste0("class_", 1:5), type = "multinomial", metrics = list("all" = FALSE, "Accuracy" = TRUE)) expect_equal( eval_1[["Accuracy"]], eval_2[["Accuracy"]] ) expect_equal( eval_1[["TheGroup"]], eval_2[["TheGroup"]] ) # Nested predictions expect_equal( unlist(eval_2$Predictions[[1]]$Prediction[[1]]), c(class_1 = 0.349267814, class_2 = 0.35818423, class_3 = 0.29254794), tolerance = 1e-4) expect_equal( unlist(eval_2$Predictions[[2]]$Prediction[[1]]), c(class_1 = 0.228885461, class_2 = 0.239121137, class_3 = 0.227020643, class_4 = 0.304972757), tolerance = 1e-4) expect_equal( unlist(eval_2$Predictions[[3]]$Prediction[[1]]), c(class_1 = 0.20777347, class_2 = 0.23889069, class_3 = 0.1392378, class_4 = 0.29139622, class_5 = 0.12270179), tolerance = 1e-4) # Confusion matrices expect_equal(nrow(eval_2$`Confusion Matrix`[[1]]), 3^2) expect_equal(nrow(eval_2$`Confusion Matrix`[[2]]), 4^2) expect_equal(nrow(eval_2$`Confusion Matrix`[[3]]), 5^2) # Class level results expect_equal(nrow(eval_2$`Class Level Results`[[1]]), 3) expect_equal(nrow(eval_2$`Class Level Results`[[2]]), 4) expect_equal(nrow(eval_2$`Class Level Results`[[3]]), 5) }) # commented out #### # test_that("profiling",{ # # # Load file with prepared predictions and hparams # load(file="") # # evals <- predictions %>% # dplyr::group_by(results_folder, epoch) %>% # cvms:::evaluate(target_col = "target_string", # prediction_cols = current_hparams %>% # dplyr::arrange(class_indices_map_values) %>% # dplyr::pull(class_names) %>% # as.character(), # type = "multinomial", # apply_softmax = FALSE, # parallel = TRUE) # # # })