library(cvms) context("metrics") # Diagnosis by score test_that("Metrics work for glm in validate()", { # skip_test_if_old_R_version() xpectr::set_test_seed(7) dat <- groupdata2::partition(participant.scores, p = 0.8, cat_col = "diagnosis", id_col = "participant", list_out = FALSE ) validated <- validate( train_data = dat, formulas = "diagnosis~score", partitions_col = ".partitions", family = "binomial", positive = 1 ) same_model <- glm(diagnosis ~ score, data = dat[dat$.partitions == 1, ], family = "binomial") train_data <- dat[dat$.partitions == 1, ] test_data <- dat[dat$.partitions == 2, ] prob <- predict(same_model, newdata = test_data, type = c("response")) test_data$prob <- prob test_data <- test_data %>% dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0)) # AUC g <- pROC::roc(diagnosis ~ prob, data = test_data, direction = "<", levels = c(0, 1)) expect_equal(validated$AUC, as.numeric(g$auc)) if (requireNamespace("AUC", quietly = TRUE)){ auc2 <- AUC::auc(AUC::roc(test_data$prob, factor(test_data$diagnosis))) expect_equal(validated$AUC, auc2) } # Confusion Matrix Metrics conf_mat <- confusion_matrix( targets = test_data$diagnosis, predictions = test_data$pred, positive = levels(as.factor(test_data$diagnosis))[1], c_levels = levels(as.factor(train_data$diagnosis)) ) # Sensitivity expect_equal(validated$Sensitivity, conf_mat$Sensitivity) # Specificity expect_equal(validated$Specificity, conf_mat$Specificity) # posPredValue expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`) # negPredValue expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`) }) test_that("Metrics work for glmer in validate()", { # skip_test_if_old_R_version() xpectr::set_test_seed(7) dat <- groupdata2::partition(participant.scores, p = 0.8, cat_col = "diagnosis", id_col = "participant", list_out = FALSE ) validated <- validate( train_data = dat, formulas = "diagnosis~score+(1|session)", partitions_col = ".partitions", family = "binomial", positive = 1 ) same_model <- lme4::glmer(diagnosis ~ score + (1 | session), data = dat[dat$.partitions == 1, ], family = "binomial") train_data <- dat[dat$.partitions == 1, ] test_data <- dat[dat$.partitions == 2, ] prob <- predict(same_model, newdata = test_data, type = c("response")) test_data$prob <- prob test_data <- test_data %>% dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0)) # AUC auc1 <- pROC::roc(diagnosis ~ prob, data = test_data, levels = c(0, 1), direction = "<") expect_equal(validated$AUC, as.numeric(auc1$auc)) if (requireNamespace("AUC", quietly = TRUE)){ auc2 <- AUC::auc(AUC::roc(test_data$prob, factor(test_data$diagnosis))) expect_equal(validated$AUC, auc2) } # Confusion Matrix metrics conf_mat <- confusion_matrix( targets = test_data$diagnosis, predictions = test_data$pred, positive = levels(as.factor(test_data$diagnosis))[1], c_levels = levels(as.factor(train_data$diagnosis)) ) # Sensitivity expect_equal(validated$Sensitivity, conf_mat$Sensitivity) # Specificity expect_equal(validated$Specificity, conf_mat$Specificity) # posPredValue expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`) # negPredValue expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`) }) # Diagnosis by age test_that("Metrics work for glm in validate()", { # skip_test_if_old_R_version() xpectr::set_test_seed(6) dat <- groupdata2::partition(participant.scores, p = 0.8, cat_col = "diagnosis", id_col = "participant", list_out = FALSE ) validated <- validate( train_data = dat, formulas = "diagnosis~age", partitions_col = ".partitions", family = "binomial", positive = 1 ) same_model <- glm(diagnosis ~ age, data = dat[dat$.partitions == 1, ], family = "binomial") train_data <- dat[dat$.partitions == 1, ] test_data <- dat[dat$.partitions == 2, ] prob <- predict(same_model, newdata = test_data, type = c("response")) test_data$prob <- prob test_data <- test_data %>% dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0)) # AUC g <- pROC::roc(diagnosis ~ prob, data = test_data, direction = "<", levels = c(0, 1) ) expect_equal(validated$AUC, as.numeric(g$auc)) if (requireNamespace("AUC", quietly = TRUE)){ roc_ <- AUC::roc(test_data$prob, factor(test_data$diagnosis)) auc2 <- AUC::auc(AUC::roc(test_data$prob, factor(test_data$diagnosis))) expect_equal(validated$AUC, auc2) # TODO What is the actual underlying error here? } # Confusion matrix metrics conf_mat <- confusion_matrix( targets = test_data$diagnosis, predictions = test_data$pred, positive = levels(as.factor(test_data$diagnosis))[1], c_levels = levels(as.factor(train_data$diagnosis)) ) # Sensitivity expect_equal(validated$Sensitivity, conf_mat$Sensitivity) # Specificity expect_equal(validated$Specificity, conf_mat$Specificity) # posPredValue expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`) # negPredValue expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`) }) test_that("Metrics work for glmer in validate()", { # skip_test_if_old_R_version() xpectr::set_test_seed(201) dat <- groupdata2::partition(participant.scores, p = 0.8, cat_col = "diagnosis", id_col = "participant", list_out = FALSE ) validated <- validate( train_data = dat, formulas = "diagnosis~age+(1|session)", partitions_col = ".partitions", family = "binomial", positive = 1 ) same_model <- lme4::glmer(diagnosis ~ age + (1 | session), data = dat[dat$.partitions == 1, ], family = "binomial" ) train_data <- dat[dat$.partitions == 1, ] test_data <- dat[dat$.partitions == 2, ] prob <- predict(same_model, newdata = test_data, type = c("response")) test_data$prob <- prob test_data <- test_data %>% dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0)) # AUC auc1 <- pROC::roc(diagnosis ~ prob, data = test_data, direction = "<", levels = c(0, 1)) expect_equal(validated$AUC, as.numeric(auc1$auc)) if (requireNamespace("AUC", quietly = TRUE)){ auc2 <- AUC::auc(AUC::roc( test_data$prob, factor(test_data$diagnosis, levels = levels(as.factor(train_data$diagnosis))) )) expect_equal(validated$AUC, auc2) } # Confusion matrix metrics conf_mat <- confusion_matrix( targets = test_data$diagnosis, predictions = test_data$pred, positive = levels(as.factor(test_data$diagnosis))[1], c_levels = levels(as.factor(train_data$diagnosis)) ) # Sensitivity expect_equal(validated$Sensitivity, conf_mat$Sensitivity) # Specificity expect_equal(validated$Specificity, conf_mat$Specificity) # posPredValue expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`) # negPredValue expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`) }) test_that("Metrics work when 0 is positive class for glmer in validate()", { # skip_test_if_old_R_version() # AUC approach was improved from this answer: https://stats.stackexchange.com/a/269577 # Here I test that it works. # First we will check what should be the behavior, when changing positive to 0. participant.scores$perfect_predicted_probability <- c( 0.8, 0.9, 0.7, 0.3, 0.2, 0.1, 0.8, 0.7, 0.7, 0.1, 0.4, 0.3, 0.8, 0.9, 0.7, 0.8, 0.7, 0.7, 0.7, 0.9, 0.8, 0.8, 0.7, 0.95, 0.3, 0.2, 0.1, 0.4, 0.25, 0.2 ) participant.scores$few_false_negs_predicted_probability <- c( 0.2, 0.3, 0.4, 0.3, 0.2, 0.1, 0.8, 0.7, 0.7, 0.1, 0.4, 0.3, 0.8, 0.9, 0.7, 0.8, 0.7, 0.7, 0.7, 0.9, 0.8, 0.8, 0.7, 0.95, 0.3, 0.2, 0.1, 0.4, 0.25, 0.2 ) participant.scores$few_false_pos_predicted_probability <- c( 0.8, 0.9, 0.7, 0.7, 0.9, 0.6, 0.8, 0.7, 0.7, 0.1, 0.4, 0.3, 0.8, 0.9, 0.7, 0.8, 0.7, 0.7, 0.7, 0.9, 0.8, 0.8, 0.7, 0.95, 0.3, 0.2, 0.1, 0.4, 0.25, 0.2 ) participant.scores$worst_predicted_probability <- 1 - c( 0.8, 0.9, 0.7, 0.3, 0.2, 0.1, 0.8, 0.7, 0.7, 0.1, 0.4, 0.3, 0.8, 0.9, 0.7, 0.8, 0.7, 0.7, 0.7, 0.9, 0.8, 0.8, 0.7, 0.95, 0.3, 0.2, 0.1, 0.4, 0.25, 0.2 ) # AUC (positive = 1 vs positive = 0) # PERFECT # With AUC:: if (requireNamespace("AUC", quietly = TRUE)){ AUC_auc_perfect <- AUC::auc(AUC::roc( participant.scores$perfect_predicted_probability, factor(participant.scores$diagnosis) )) AUC_auc_perfect_pos0 <- AUC::auc(AUC::roc( 1 - participant.scores$perfect_predicted_probability, factor(1 - participant.scores$diagnosis) )) expect_equal(AUC_auc_perfect, AUC_auc_perfect_pos0) # With pROC pROC_auc_perfect <- as.numeric(pROC::roc( response = participant.scores$diagnosis, predictor = participant.scores$perfect_predicted_probability, direction = "<", levels = c(0, 1) )$auc) pROC_auc_perfect_pos0 <- as.numeric(pROC::roc( response = 1 - participant.scores$diagnosis, predictor = 1 - participant.scores$perfect_predicted_probability, direction = ">", levels = c(1, 0) )$auc) expect_equal(pROC_auc_perfect, pROC_auc_perfect_pos0) expect_equal(pROC_auc_perfect, AUC_auc_perfect) expect_equal(AUC_auc_perfect_pos0, pROC_auc_perfect_pos0) } # FALSE NEGATIVES # With AUC if (requireNamespace("AUC", quietly = TRUE)){ AUC_auc_false_negs <- AUC::auc(AUC::roc( participant.scores$few_false_negs_predicted_probability, factor(participant.scores$diagnosis) )) AUC_auc_false_negs_pos0 <- AUC::auc(AUC::roc( 1 - participant.scores$few_false_negs_predicted_probability, factor(1 - participant.scores$diagnosis) )) expect_equal(AUC_auc_false_negs, AUC_auc_false_negs_pos0) # With pROC pROC_auc_false_negs <- as.numeric(pROC::roc( response = participant.scores$diagnosis, predictor = participant.scores$few_false_negs_predicted_probability, direction = "<", levels = c(0, 1) )$auc) pROC_auc_false_negs_pos0 <- as.numeric(pROC::roc( response = 1 - participant.scores$diagnosis, predictor = 1 - participant.scores$few_false_negs_predicted_probability, direction = ">", levels = c(1, 0) )$auc) expect_equal(pROC_auc_false_negs, pROC_auc_false_negs_pos0) expect_equal(pROC_auc_false_negs, AUC_auc_false_negs) expect_equal(AUC_auc_false_negs_pos0, pROC_auc_false_negs_pos0) } # FALSE POSITIVES # With AUC if (requireNamespace("AUC", quietly = TRUE)){ AUC_auc_false_pos <- AUC::auc(AUC::roc( participant.scores$few_false_pos_predicted_probability, factor(participant.scores$diagnosis) )) AUC_auc_false_pos_pos0 <- AUC::auc(AUC::roc( 1 - participant.scores$few_false_pos_predicted_probability, factor(1 - participant.scores$diagnosis) )) expect_equal(AUC_auc_false_pos, AUC_auc_false_pos_pos0) # With pROC pROC_auc_false_pos <- as.numeric(pROC::roc( response = participant.scores$diagnosis, predictor = participant.scores$few_false_pos_predicted_probability, direction = "<", levels = c(0, 1) )$auc) pROC_auc_false_pos_pos0 <- as.numeric(pROC::roc( response = 1 - participant.scores$diagnosis, predictor = 1 - participant.scores$few_false_pos_predicted_probability, direction = ">", levels = c(1, 0) )$auc) expect_equal(pROC_auc_false_pos, pROC_auc_false_pos_pos0) expect_equal(pROC_auc_false_pos, AUC_auc_false_pos) expect_equal(AUC_auc_false_pos_pos0, pROC_auc_false_pos_pos0) } # ALL WRONG # With AUC if (requireNamespace("AUC", quietly = TRUE)){ AUC_auc_worst <- AUC::auc(AUC::roc( participant.scores$worst_predicted_probability, factor(participant.scores$diagnosis) )) AUC_auc_worst_pos0 <- AUC::auc(AUC::roc( 1 - participant.scores$worst_predicted_probability, factor(1 - participant.scores$diagnosis) )) expect_equal(AUC_auc_worst, AUC_auc_worst_pos0) } # With pROC pROC_auc_worst <- as.numeric(pROC::roc( response = participant.scores$diagnosis, predictor = participant.scores$worst_predicted_probability, direction = "<", levels = c(0, 1) )$auc) pROC_auc_worst_pos0 <- as.numeric(pROC::roc( response = 1 - participant.scores$diagnosis, predictor = 1 - participant.scores$worst_predicted_probability, direction = ">", levels = c(1, 0) )$auc) expect_equal(pROC_auc_worst, pROC_auc_worst_pos0) expect_equal(pROC_auc_worst, AUC_auc_worst) expect_equal(AUC_auc_worst_pos0, pROC_auc_worst_pos0) xpectr::set_test_seed(201) dat <- groupdata2::partition(participant.scores, p = 0.8, cat_col = "diagnosis", id_col = "participant", list_out = FALSE ) validated_pos1 <- validate( train_data = dat, formulas = "diagnosis~score", partitions_col = ".partitions", family = "binomial", positive = 2 ) validated_pos0 <- validate( train_data = dat, formulas = "diagnosis~score", partitions_col = ".partitions", family = "binomial", positive = 1 ) expect_equal(validated_pos1$AUC, validated_pos0$AUC) validated_pos1 <- validate( train_data = dat, formulas = "diagnosis~age", partitions_col = ".partitions", family = "binomial", positive = 2 ) validated_pos0 <- validate( train_data = dat, formulas = "diagnosis~age", partitions_col = ".partitions", family = "binomial", positive = 1 ) expect_equal(validated_pos1$AUC, validated_pos0$AUC) # If dependent variable is character factor dat$diagnosis_chr <- factor(ifelse(dat$diagnosis == 0, "a", "b")) validated_pos1_num <- validate( train_data = dat, formulas = "diagnosis_chr~age", partitions_col = ".partitions", family = "binomial", positive = 2 ) validated_pos1_chr <- validate( train_data = dat, formulas = "diagnosis_chr~age", partitions_col = ".partitions", family = "binomial", positive = "b" ) expect_equal(validated_pos1_num$AUC, validated_pos1_chr$AUC) validated_pos0_num <- validate( train_data = dat, formulas = "diagnosis_chr~age", partitions_col = ".partitions", family = "binomial", positive = 1 ) validated_pos0_chr <- validate( train_data = dat, formulas = "diagnosis_chr~age", partitions_col = ".partitions", family = "binomial", positive = "a" ) expect_equal(validated_pos0_num$AUC, validated_pos0_chr$AUC) expect_equal(validated_pos0_num$AUC, validated_pos1_num$AUC) expect_equal(validated_pos0_chr$AUC, validated_pos1_chr$AUC) }) test_that("AUC works", { # skip_test_if_old_R_version() # # In this test I printed the predictions within each training loop # and manually copied the predictions # I did this to ensure that cross_validate gathers the predictions correctly before # calculating its metrics. This is incredibly important. # Metrics are calculated and compared to the metrics I got from cross_validate. # target <- c( 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ) predictions_prob <- c( 0.77379615, 0.36952324, 0.09125579, 0.89205819, 0.73620142, 0.55282759, 0.8307928, 0.6042899, 0.1754574, 0.9317034, 0.8307928, 0.5145979, 0.9269098, 0.6874739, 0.5867096, 0.71867985, 0.26746773, 0.09346533, 0.85976827, 0.24884534, 0.13205012, 0.6503171, 0.4541755, 0.1564246, 0.8445872, 0.7085838, 0.5871876, 0.8514956, 0.7607141, 0.7085838 ) predictions <- dplyr::if_else(predictions_prob > 0.5, 1, 0) pred_df <- data.frame("obs" = target, "prob" = predictions_prob, "pred" = predictions) # AUC auc1 <- pROC::roc(obs ~ prob, data = pred_df, direction = "<", levels = c(0, 1)) expect_equal(as.numeric(auc1$auc), 0.7615741, tolerance = 1e-3) if (requireNamespace("AUC", quietly = TRUE)){ auc2 <- AUC::auc(AUC::roc(pred_df$prob, factor(pred_df$obs))) expect_equal(auc2, 0.7615741, tolerance = 1e-3) } }) test_that("Metrics work in confusion_matrix()", { # skip_test_if_old_R_version() # # In this test I printed the predictions within each training loop # and manually copied the predictions # I did this to ensure that cross_validate gathers the predictions correctly before # calculating its metrics. This is incredibly important. # Metrics are calculated and compared to the metrics I got from cross_validate. # target <- c( 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ) predictions_prob <- c( 0.77379615, 0.36952324, 0.09125579, 0.89205819, 0.73620142, 0.55282759, 0.8307928, 0.6042899, 0.1754574, 0.9317034, 0.8307928, 0.5145979, 0.9269098, 0.6874739, 0.5867096, 0.71867985, 0.26746773, 0.09346533, 0.85976827, 0.24884534, 0.13205012, 0.6503171, 0.4541755, 0.1564246, 0.8445872, 0.7085838, 0.5871876, 0.8514956, 0.7607141, 0.7085838 ) predictions <- dplyr::if_else(predictions_prob > 0.5, 1, 0) pred_df <- data.frame("obs" = target, "prob" = predictions_prob, "pred" = predictions) # Confusion matrix metrics conf_mat <- confusion_matrix( targets = pred_df$obs, predictions = pred_df$pred, positive = levels(as.factor(pred_df$obs))[1], c_levels = levels(as.factor(pred_df$obs)), metrics = "all" ) ## Testing 'conf_mat' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(conf_mat), c("cfm_results", "cfm_binomial", "tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( conf_mat[["Positive Class"]], "0", fixed = TRUE) expect_equal( conf_mat[["Balanced Accuracy"]], 0.73611, tolerance = 1e-4) expect_equal( conf_mat[["Accuracy"]], 0.76667, tolerance = 1e-4) expect_equal( conf_mat[["F1"]], 0.66667, tolerance = 1e-4) expect_equal( conf_mat[["Sensitivity"]], 0.58333, tolerance = 1e-4) expect_equal( conf_mat[["Specificity"]], 0.88889, tolerance = 1e-4) expect_equal( conf_mat[["Pos Pred Value"]], 0.77778, tolerance = 1e-4) expect_equal( conf_mat[["Neg Pred Value"]], 0.7619, tolerance = 1e-4) expect_equal( conf_mat[["Kappa"]], 0.49275, tolerance = 1e-4) expect_equal( conf_mat[["MCC"]], 0.50483, tolerance = 1e-4) expect_equal( conf_mat[["Detection Rate"]], 0.23333, tolerance = 1e-4) expect_equal( conf_mat[["Detection Prevalence"]], 0.3, tolerance = 1e-4) expect_equal( conf_mat[["Prevalence"]], 0.4, tolerance = 1e-4) expect_equal( conf_mat[["False Neg Rate"]], 0.41667, tolerance = 1e-4) expect_equal( conf_mat[["False Pos Rate"]], 0.11111, tolerance = 1e-4) expect_equal( conf_mat[["False Discovery Rate"]], 0.22222, tolerance = 1e-4) expect_equal( conf_mat[["False Omission Rate"]], 0.2381, tolerance = 1e-4) expect_equal( conf_mat[["Threat Score"]], 0.5, tolerance = 1e-4) # Testing column names expect_equal( names(conf_mat), c("Confusion Matrix", "Table", "Positive Class", "Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence", "False Neg Rate", "False Pos Rate", "False Discovery Rate", "False Omission Rate", "Threat Score"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(conf_mat), c("list", "list", "character", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(conf_mat), c("list", "list", "character", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double"), fixed = TRUE) # Testing dimensions expect_equal( dim(conf_mat), c(1L, 20L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(conf_mat)), character(0), fixed = TRUE) ## Finished testing 'conf_mat' #### # F1 F1 <- (2 * conf_mat$`Pos Pred Value` * conf_mat$Sensitivity) / (conf_mat$`Pos Pred Value` + conf_mat$Sensitivity) expect_equal(F1, 0.6666667, tolerance = 1e-5) expect_equal(conf_mat$F1, F1, tolerance = 1e-5) confMatTable <- conf_mat[["Table"]][[1]] # Confusion matrix TP <- confMatTable[1] # Dependent on positive = 0 ? FP <- confMatTable[3] FN <- confMatTable[2] TN <- confMatTable[4] ppv <- TP / (TP + FP) expect_equal(ppv, conf_mat$`Pos Pred Value`[[1]], tolerance = 1e-5) expect_equal(ppv, 0.7777778, tolerance = 1e-5) npv <- TN / (TN + FN) expect_equal(npv, conf_mat$`Neg Pred Value`[[1]], tolerance = 1e-5) expect_equal(npv, 0.7619048, tolerance = 1e-5) sensitivity <- TP / (TP + FN) expect_equal(sensitivity, conf_mat$Sensitivity[[1]], tolerance = 1e-5) expect_equal(sensitivity, 0.5833333, tolerance = 1e-5) specificity <- TN / (TN + FP) expect_equal(specificity, conf_mat$Specificity[[1]], tolerance = 1e-5) expect_equal(specificity, 0.8888889, tolerance = 1e-5) acc <- (TP + TN) / (TP + TN + FP + FN) expect_equal(acc, conf_mat$Accuracy[[1]], tolerance = 1e-5) expect_equal(acc, 0.7666667, tolerance = 1e-5) F1_2 <- 2 * ppv * sensitivity / (ppv + sensitivity) expect_equal(F1_2, conf_mat$F1[[1]], tolerance = 1e-5) expect_equal(F1_2, 0.6666667, tolerance = 1e-5) bal_acc <- (sensitivity + specificity) / 2 expect_equal(bal_acc, conf_mat$`Balanced Accuracy`[[1]], tolerance = 1e-5) expect_equal(bal_acc, 0.7361111, tolerance = 1e-5) bal_acc <- (sensitivity + specificity) / 2 expect_equal(bal_acc, conf_mat$`Balanced Accuracy`[[1]], tolerance = 1e-5) expect_equal(bal_acc, 0.7361111, tolerance = 1e-5) p_observed <- TP + TN p_expected <- (((TP + FP)*(TP + FN))+((FN+TN)*(FP+TN))) / (TP + TN + FP + FN) kappa <- (p_observed - p_expected) / ((TP + TN + FP + FN)-p_expected) expect_equal(kappa, conf_mat$Kappa[[1]], tolerance = 1e-5) expect_equal(kappa, 0.4927536, tolerance = 1e-5) mcc <- ((TP*TN) - (FP*FN))/sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) expect_equal(mcc, conf_mat$MCC[[1]], tolerance = 1e-5) expect_equal(mcc, 0.5048268, tolerance = 1e-5) detection_rate <- TP / (TP + TN + FP + FN) expect_equal(detection_rate, conf_mat$`Detection Rate`[[1]], tolerance = 1e-5) expect_equal(detection_rate, 0.2333333, tolerance = 1e-5) detection_prevalence <- (TP + FP) / (TP + TN + FP + FN) expect_equal(detection_prevalence, conf_mat$`Detection Prevalence`[[1]], tolerance = 1e-5) expect_equal(detection_prevalence, 0.3, tolerance = 1e-5) prevalence <- (TP + FN) / (TP + TN + FP + FN) expect_equal(prevalence, conf_mat$Prevalence[[1]], tolerance = 1e-5) expect_equal(prevalence, 0.4, tolerance = 1e-5) fnr <- FN / (FN + TP) expect_equal(fnr, conf_mat$`False Neg Rate`[[1]], tolerance = 1e-5) expect_equal(fnr, 0.4166667, tolerance = 1e-5) fpr <- FP / (TN + FP) expect_equal(fpr, conf_mat$`False Pos Rate`[[1]], tolerance = 1e-5) expect_equal(fpr, 0.1111111, tolerance = 1e-5) fdr <- FP / (FP + TP) expect_equal(fdr, conf_mat$`False Discovery Rate`[[1]], tolerance = 1e-5) expect_equal(fdr, 0.22222, tolerance = 1e-5) f_omission_rate <- FN / (FN + TN) expect_equal(f_omission_rate, conf_mat$`False Omission Rate`[[1]], tolerance = 1e-5) expect_equal(f_omission_rate, 0.2380952, tolerance = 1e-5) threat_score <- TP / (TP + FN + FP) expect_equal(threat_score, conf_mat$`Threat Score`[[1]], tolerance = 1e-5) expect_equal(threat_score, 0.5, tolerance = 1e-5) # Test that MCC does not care about what class if positive expect_equal( mcc(list("TP" = TP, "FP" = FP, "FN" = FN, "TN" = TN)), mcc(list("TP" = TN, "FP" = FN, "FN" = FP, "TN" = TP)) ) # Multiclass MCC xpectr::set_test_seed(1) mc_preds <- factor(sample(c(1,2,3,4), 100, replace = TRUE)) mc_targs_random <- factor(sample(c(1,2,3,4), 100, replace = TRUE)) mc_targs_good <- factor(ifelse(runif(100) < 0.7, mc_preds, mc_targs_random)) mcc_cmf <- confusion_matrix(targets = mc_targs_good, predictions = mc_preds, do_one_vs_all = FALSE) # yardstick_mcc <- yardstick::mcc_vec(mc_targs_good, mc_preds) # 0.759631087897275 expect_equal(mcc_cmf$MCC, 0.759631087897275, tolerance = 1e-8) mcc_cmf <- confusion_matrix(targets = mc_targs_random, predictions = mc_preds, do_one_vs_all = FALSE) # yardstick_mcc <- yardstick::mcc_vec(mc_targs_random, mc_preds) # 0.0153721822602552 expect_equal(mcc_cmf$MCC, 0.0153721822602552, tolerance = 1e-8) }) test_that("evaluate_residuals() metrics work", { # skip_test_if_old_R_version() # Normal distribution xpectr::set_test_seed(6) targets <- rnorm(100) preds <- rnorm(100) df <- data.frame(t = targets, p = preds) results <- evaluate_residuals(df, prediction_col = "p", target_col = "t", metrics = "all") ## Testing 'results' #### ## Initially generated by xpectr xpectr::set_test_seed(42) # Testing class expect_equal( class(results), c("tbl_df", "tbl", "data.frame"), fixed = TRUE) # Testing column values expect_equal( results[["RMSE"]], 1.23924, tolerance = 1e-4) expect_equal( results[["MAE"]], 0.98881, tolerance = 1e-4) expect_equal( results[["NRMSE(RNG)"]], 0.27174, tolerance = 1e-4) expect_equal( results[["NRMSE(IQR)"]], 0.96402, tolerance = 1e-4) expect_equal( results[["NRMSE(STD)"]], 1.1991, tolerance = 1e-4) expect_equal( results[["NRMSE(AVG)"]], -121.41375, tolerance = 1e-4) expect_equal( results[["RMSLE"]], NaN, tolerance = 1e-4) expect_equal( results[["MALE"]], NaN, tolerance = 1e-4) expect_equal( results[["RAE"]], 1.20507, tolerance = 1e-4) expect_equal( results[["RSE"]], 1.45236, tolerance = 1e-4) expect_equal( results[["RRSE"]], 1.20514, tolerance = 1e-4) expect_equal( results[["MAPE"]], 4.7236, tolerance = 1e-4) expect_equal( results[["MSE"]], 1.53572, tolerance = 1e-4) expect_equal( results[["TAE"]], 98.88096, tolerance = 1e-4) expect_equal( results[["TSE"]], 153.5717, tolerance = 1e-4) # Testing column names expect_equal( names(results), c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE", "TSE"), fixed = TRUE) # Testing column classes expect_equal( xpectr::element_classes(results), c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric"), fixed = TRUE) # Testing column types expect_equal( xpectr::element_types(results), c("double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double"), fixed = TRUE) # Testing dimensions expect_equal( dim(results), c(1L, 15L)) # Testing group keys expect_equal( colnames(dplyr::group_keys(results)), character(0), fixed = TRUE) ## Finished testing 'results' #### # Manual calculation resids <- function(targets, preds, log=FALSE){ if (isTRUE(log)) xpectr::suppress_mw(err <- log(preds + 1) - log(targets + 1)) else err <- preds - targets err } centered_targets <- function(targets){ targets - mean(targets) } rmse_ <- function(targets, preds, log=FALSE) { err <- resids(targets, preds, log) sqrt(mean(err^2)) } mae_ <- function(targets, preds, log=FALSE) { err <- resids(targets, preds, log) mean(abs(err)) } nrmse_ <- function(targets, preds, by){ rms <- rmse_(targets, preds) if (by == "iqr") div <- IQR(targets) else if (by == "sd") div <- sd(targets) else if (by == "avg") div <- mean(targets) else if (by == "rng") div <- max(targets)-min(targets) rms / div } rae_ <- function(targets, preds){ err <- sum(abs(resids(targets, preds))) cent <- sum(abs(centered_targets(targets))) err/cent } rse_ <- function(targets, preds){ err <- sum(resids(targets, preds)^2) cent <- sum(centered_targets(targets)^2) err/cent } mape_ <- function(targets, preds){ err <- resids(targets, preds) mean(abs(err/targets)) } # RMSE expect_equal(results$RMSE, rmse_(targets, preds), tolerance = 1e-3) expect_equal(results$RMSE, 1.23924, tolerance = 1e-3) # RMSLE expect_equal(results$RMSLE, rmse_(targets, preds, log = TRUE), tolerance = 1e-3) expect_equal(results$RMSLE, NaN, tolerance = 1e-3) # MAE expect_equal(results$MAE, mae_(targets, preds), tolerance = 1e-3) expect_equal(results$MAE, 0.9888096, tolerance = 1e-3) # MALE expect_equal(results$MALE, mae_(targets, preds, log = TRUE), tolerance = 1e-3) expect_equal(results$MALE, NaN, tolerance = 1e-3) # NRMSE expect_equal(results$`NRMSE(RNG)`, nrmse_(targets, preds, by = "rng"), tolerance = 1e-3) expect_equal(results$`NRMSE(RNG)`, 0.271736645098678, tolerance = 1e-3) expect_equal(results$`NRMSE(IQR)`, nrmse_(targets, preds, by = "iqr"), tolerance = 1e-3) expect_equal(results$`NRMSE(IQR)`, 0.964022899327126, tolerance = 1e-3) expect_equal(results$`NRMSE(STD)`, nrmse_(targets, preds, by = "sd"), tolerance = 1e-3) expect_equal(results$`NRMSE(STD)`, 1.19909776380955, tolerance = 1e-3) expect_equal(results$`NRMSE(AVG)`, nrmse_(targets, preds, by = "avg"), tolerance = 1e-3) expect_equal(results$`NRMSE(AVG)`, -121.413747175841, tolerance = 1e-3) # RAE expect_equal(results$RAE, rae_(targets, preds), tolerance = 1e-3) expect_equal(results$RAE, 1.2050715889456, tolerance = 1e-3) # RSE expect_equal(results$RSE, rse_(targets, preds), tolerance = 1e-3) expect_equal(results$RSE, 1.45235903754855, tolerance = 1e-3) # RRSE expect_equal(results$RRSE, sqrt(rse_(targets, preds)), tolerance = 1e-3) expect_equal(results$RRSE, 1.20513859682136, tolerance = 1e-3) # MAPE expect_equal(results$MAPE, mape_(targets, preds), tolerance = 1e-3) expect_equal(results$MAPE, 4.72360030788065, tolerance = 1e-3) # MSE expect_equal(results$MSE, rmse_(targets, preds)^2, tolerance = 1e-3) expect_equal(results$MSE, 1.53571701341794, tolerance = 1e-3) # TAE expect_equal(results$TAE, sum(abs(resids(targets, preds))), tolerance = 1e-3) expect_equal(results$TAE, 98.880955884436, tolerance = 1e-3) # TSE expect_equal(results$TSE, sum(resids(targets, preds)^2), tolerance = 1e-3) expect_equal(results$TSE, 153.571701341794, tolerance = 1e-3) # Uniform distribution xpectr::set_test_seed(9) targets <- runif(100, min = 45, max = 97) preds <- runif(100, min = 54, max = 120) df <- data.frame(t = targets, p = preds) results <- evaluate_residuals(df, prediction_col = "p", target_col = "t", metrics = "all") # RMSE expect_equal(results$RMSE, rmse_(targets, preds), tolerance = 1e-3) expect_equal(results$RMSE, 30.2487016310356, tolerance = 1e-3) # RMSLE expect_equal(results$RMSLE, rmse_(targets, preds, log = TRUE), tolerance = 1e-3) expect_equal(results$RMSLE, 0.381933438597387, tolerance = 1e-3) # MAE expect_equal(results$MAE, mae_(targets, preds), tolerance = 1e-3) expect_equal(results$MAE, 24.3477034755331, tolerance = 1e-3) # MALE expect_equal(results$MALE, mae_(targets, preds, log = TRUE), tolerance = 1e-3) expect_equal(results$MALE, 0.309460458578487, tolerance = 1e-3) # NRMSE expect_equal(results$`NRMSE(RNG)`, nrmse_(targets, preds, by = "rng"), tolerance = 1e-3) expect_equal(results$`NRMSE(RNG)`, 0.585117778308063, tolerance = 1e-3) expect_equal(results$`NRMSE(IQR)`, nrmse_(targets, preds, by = "iqr"), tolerance = 1e-3) expect_equal(results$`NRMSE(IQR)`, 1.05622503346585, tolerance = 1e-3) expect_equal(results$`NRMSE(STD)`, nrmse_(targets, preds, by = "sd"), tolerance = 1e-3) expect_equal(results$`NRMSE(STD)`, 1.90845876022934, tolerance = 1e-3) expect_equal(results$`NRMSE(AVG)`, nrmse_(targets, preds, by = "avg"), tolerance = 1e-3) expect_equal(results$`NRMSE(AVG)`, 0.428886643444635, tolerance = 1e-3) # RAE expect_equal(results$RAE, rae_(targets, preds), tolerance = 1e-3) expect_equal(results$RAE, 1.76907453491764, tolerance = 1e-3) # RSE expect_equal(results$RSE, rse_(targets, preds), tolerance = 1e-3) expect_equal(results$RSE, 3.67900488837992, tolerance = 1e-3) # RRSE expect_equal(results$RRSE, sqrt(rse_(targets, preds)), tolerance = 1e-3) expect_equal(results$RRSE, 1.9180732228932, tolerance = 1e-3) # MAPE expect_equal(results$MAPE, mape_(targets, preds), tolerance = 1e-3) expect_equal(results$MAPE, 0.390727375837037, tolerance = 1e-3) # MSE expect_equal(results$MSE, rmse_(targets, preds)^2, tolerance = 1e-3) expect_equal(results$MSE, 914.983950363419, tolerance = 1e-3) # TAE expect_equal(results$TAE, sum(abs(resids(targets, preds))), tolerance = 1e-3) expect_equal(results$TAE, 2434.77034755331, tolerance = 1e-3) # TSE expect_equal(results$TSE, sum(resids(targets, preds)^2), tolerance = 1e-3) expect_equal(results$TSE, 91498.3950363419, tolerance = 1e-3) })