test_that("Binary windowed functions work", { skip_if_not_installed("modeldata") x30 <- .cal_table_windowed( segment_logistic, truth = Class, estimate = .pred_good, step_size = 0.11, window_size = 0.10 ) x30_1 <- segment_logistic |> dplyr::mutate( x = dplyr::case_when( .pred_good <= 0.05 ~ 1, .pred_good >= 0.06 & .pred_good <= 0.16 ~ 2, .pred_good >= 0.17 & .pred_good <= 0.27 ~ 3, .pred_good >= 0.28 & .pred_good <= 0.38 ~ 4, .pred_good >= 0.39 & .pred_good <= 0.49 ~ 5, .pred_good >= 0.50 & .pred_good <= 0.60 ~ 6, .pred_good >= 0.61 & .pred_good <= 0.71 ~ 7, .pred_good >= 0.72 & .pred_good <= 0.82 ~ 8, .pred_good >= 0.83 & .pred_good <= 0.93 ~ 9, .pred_good >= 0.94 & .pred_good <= 1 ~ 10, ) ) |> dplyr::filter(!is.na(x)) |> dplyr::count(x) expect_equal( x30$total, x30_1$n ) x31 <- cal_plot_windowed(segment_logistic, Class, .pred_good) expect_s3_class(x31, "ggplot") expect_false(has_facet(x31)) x32 <- .cal_table_windowed( testthat_cal_binary(), step_size = 0.11, window_size = 0.10 ) x32_1 <- testthat_cal_binary() |> tune::collect_predictions(summarize = TRUE) |> dplyr::mutate( x = dplyr::case_when( .pred_class_1 <= 0.05 ~ 1, .pred_class_1 >= 0.06 & .pred_class_1 <= 0.16 ~ 2, .pred_class_1 >= 0.17 & .pred_class_1 <= 0.27 ~ 3, .pred_class_1 >= 0.28 & .pred_class_1 <= 0.38 ~ 4, .pred_class_1 >= 0.39 & .pred_class_1 <= 0.49 ~ 5, .pred_class_1 >= 0.50 & .pred_class_1 <= 0.60 ~ 6, .pred_class_1 >= 0.61 & .pred_class_1 <= 0.71 ~ 7, .pred_class_1 >= 0.72 & .pred_class_1 <= 0.82 ~ 8, .pred_class_1 >= 0.83 & .pred_class_1 <= 0.93 ~ 9, .pred_class_1 >= 0.94 & .pred_class_1 <= 1 ~ 10, ) ) |> dplyr::filter(!is.na(x)) |> dplyr::count(.config, x) expect_equal( x32$total, x32_1$n ) x33 <- cal_plot_windowed(testthat_cal_binary()) expect_s3_class(x33, "ggplot") expect_true(has_facet(x33)) win_configs <- bin_with_configs() |> cal_plot_windowed(truth = Class, estimate = .pred_good) expect_true(has_facet(win_configs)) # ------------------------------------------------------------------------------ # multinomial outcome, binary windowed plots multi_configs_from_tune <- testthat_cal_multiclass() |> cal_plot_windowed() expect_s3_class(multi_configs_from_tune, "ggplot") # should be faceted by .config and class expect_true(inherits(multi_configs_from_tune$facet, "FacetGrid")) multi_configs_from_df <- mnl_with_configs() |> cal_plot_windowed(truth = obs, estimate = c(VF:L)) expect_s3_class(multi_configs_from_df, "ggplot") # should be faceted by .config and class expect_true(inherits(multi_configs_from_df$facet, "FacetGrid")) }) test_that("windowed plot function errors - grouped_df", { expect_snapshot_error( cal_plot_windowed(dplyr::group_by(mtcars, vs)) ) }) test_that("don't facet if there is only one .config", { class_data <- testthat_cal_binary() class_data$.predictions <- lapply( class_data$.predictions, function(x) dplyr::filter(x, .config == "Preprocessor1_Model1") ) res_windowed <- cal_plot_windowed(class_data) expect_null(res_windowed$data[[".config"]]) expect_s3_class(res_windowed, "ggplot") }) test_that("Groupings that may not match work", { model <- glm(Class ~ .pred_good, segment_logistic, family = "binomial") preds <- 1 - predict(model, segment_logistic, type = "response") combined <- dplyr::bind_rows( dplyr::mutate(segment_logistic, source = "original"), dplyr::mutate(segment_logistic, .pred_good = preds, source = "glm") ) x51 <- combined |> dplyr::group_by(source) |> .cal_table_windowed( truth = Class, estimate = .pred_good, step_size = 0.11, window_size = 0.10 ) x51_1 <- combined |> dplyr::mutate( x = dplyr::case_when( .pred_good <= 0.05 ~ 1, .pred_good >= 0.06 & .pred_good <= 0.16 ~ 2, .pred_good >= 0.17 & .pred_good <= 0.27 ~ 3, .pred_good >= 0.28 & .pred_good <= 0.38 ~ 4, .pred_good >= 0.39 & .pred_good <= 0.49 ~ 5, .pred_good >= 0.50 & .pred_good <= 0.60 ~ 6, .pred_good >= 0.61 & .pred_good <= 0.71 ~ 7, .pred_good >= 0.72 & .pred_good <= 0.82 ~ 8, .pred_good >= 0.83 & .pred_good <= 0.93 ~ 9, .pred_good >= 0.94 & .pred_good <= 1 ~ 10, ) ) |> dplyr::filter(!is.na(x)) |> dplyr::count(source, x) expect_equal( x51$total, x51_1$n ) }) test_that("Groups are respected", { preds <- segment_logistic |> dplyr::mutate(source = "logistic") |> dplyr::bind_rows(segment_naive_bayes) |> dplyr::mutate(source = ifelse(is.na(source), "nb", source)) |> dplyr::group_by(source) x42 <- .cal_table_windowed(preds, Class, .pred_good) expect_equal(as.integer(table(x42$source)), c(21, 21)) expect_equal(unique(x42$source), c("logistic", "nb")) })