testthat::test_that("test against Karol original script", { # test takes too long for CRAN testthat::skip_on_cran() # ============================ Download data ===================== # download the correct version of freMTPL2freq dataset to complete the rec commit <- "2a718359896bee4edf852721364ac5eaae442fc1" # <- use this commit url <- paste0("https://github.com/dutangc/CASdatasets/raw/", commit, "/data/freMTPL2freq.rda") temp <- tempfile() download.file(url, temp) load(temp) freMTPL2freq <- freMTPL2freq |> dplyr::mutate( ClaimRate = ClaimNb / Exposure, ClaimRate = pmin(ClaimRate, quantile(ClaimRate, 0.999)) ) |> dplyr::select(-dplyr::all_of(c("IDpol", "Exposure", "ClaimNb"))) # ============================ Input data ===================== data <- freMTPL2freq |> split_into_train_validate_test(seed = 1) # changing factors to characters... this is necessary as bug in original script handles factors incorrectly # changing "ClaimRate" to use "ClaimNb"... this is necessary as "ClaimNb" hardcoded in KG script and easier to modify in package script # changing "ClaimNb" to round to integer values. This is to avoid warnings in the test environment. splits <- data |> purrr::modify(.f = function(x) dplyr::rename(x, "ClaimNb" = "ClaimRate")) |> purrr::modify(.f = function(x) dplyr::mutate(x, ClaimNb = round(ClaimNb))) # ============================ IBLM package process ===================== IBLM <- train_iblm_xgb( splits, response_var = "ClaimNb", family = "poisson", params = list(seed=0, tree_method = "auto") ) # `migrate_reference_to_bias = FALSE` for purposes of test as trying to reconile with KG original script explainer_nu <- explain_iblm(iblm_model = IBLM, data = splits$test, migrate_reference_to_bias = FALSE) # ============================ Karol (og) process ===================== # the following data objects are taken from Karol original script, using the same seed, input and settings # For audit, the inputs were constructed in the `https://github.com/IFoA-ADSWP/IBLM_testing` repo # The inputs are created in: # branch: testing_object_construction # script: construct_explain_test explainer_og <- list() explainer_og$shap_wide_colsums <- c( bias = -1708.6684105788445, VehPower = 26.01553420041986, VehAge = -256.571148711141, DrivAge = -50.06147676303107, BonusMalus = -18.42787296280043, Density = 22.072232441378393, VehBrandB1 = -273.386326685757, VehBrandB10 = 39.751871885357104, VehBrandB11 = -23.553031662544527, VehBrandB12 = 1089.8126656933327, VehBrandB13 = -48.09883122530846, VehBrandB14 = -5.5525740051489265, VehBrandB2 = -495.90280463883755, VehBrandB3 = -224.97964470623992, VehBrandB4 = 125.75223000903497, VehBrandB5 = -129.36582155104406, VehBrandB6 = -28.16889056122818, VehGasDiesel = 59.906505742664194, VehGasRegular = -56.34294857816951, AreaA = 24.2141983361862, AreaB = 6.580454605180421, AreaC = 9.029201175868138, AreaD = -60.96857544378054, AreaE = -66.20219113786334, AreaF = 35.95275043609945, RegionAlsace = 14.868020770605654, RegionAquitaine = 200.02629335300298, RegionAuvergne = -19.55482835287694, `RegionBasse-Normandie` = 10.147629502112977, RegionBourgogne = -28.502676848816918, RegionBretagne = -243.14726781326317, RegionCentre = -1012.1001042925345, `RegionChampagne-Ardenne` = 42.88122421933804, RegionCorse = -9.18966015346814, `RegionFranche-Comte` = 9.929858500952832, `RegionHaute-Normandie` = 42.67572305968497, `RegionIle-de-France` = 445.5563439260586, `RegionLanguedoc-Roussillon` = 207.8120885383687, RegionLimousin = -29.94007034938113, RegionLorraine = -28.336859274364542, `RegionMidi-Pyrenees` = 163.82334529876243, `RegionNord-Pas-de-Calais` = 154.3834550518077, `RegionPays-de-la-Loire` = -74.87333380343625, RegionPicardie = 3.6783490418456495, `RegionPoitou-Charentes` = -2.8664685743424343, `RegionProvence-Alpes-Cotes-D'Azur` = 350.3612418755074, `RegionRhone-Alpes` = -585.5019154318434 ) # c( # bias = -2067.634628662665, # VehPower = 77.15579762084639, # VehAge = -23.215015305266128, # DrivAge = -61.24309050987545, # BonusMalus = -22.445602931956973, # Density = 25.378615547426733, # VehBrandB1 = 0.4929784910491435, # VehBrandB10 = 20.502513993749744, # VehBrandB11 = 47.836586910823826, # VehBrandB12 = 586.8557514288768, # VehBrandB13 = -16.759563245454046, # VehBrandB14 = -10.66495745186694, # VehBrandB2 = -354.99009596340693, # VehBrandB3 = -169.61300241362187, # VehBrandB4 = -47.662840547491214, # VehBrandB5 = -57.07242563375621, # VehBrandB6 = -4.931066558579914, # VehGasDiesel = 262.6049230671415, # VehGasRegular = -260.8031225083723, # AreaA = -7.057203395199394, # AreaB = 8.401192414939942, # AreaC = 6.267889281658995, # AreaD = 7.026916613533103, # AreaE = 7.206054686117568, # AreaF = 0.7287656644657545, # RegionAlsace = 29.41269837069558, # RegionAquitaine = 219.19630470527773, # RegionAuvergne = 0.03033583141223062, # `RegionBasse-Normandie` = 9.1936749826491, # RegionBourgogne = -52.224121282157284, # RegionBretagne = -227.96657285522087, # RegionCentre = -1098.584975179394, # `RegionChampagne-Ardenne` = 32.18406923182192, # RegionCorse = 32.32750511944323, # `RegionFranche-Comte` = 7.144027646740142, # `RegionHaute-Normandie` = 47.25576676859055, # `RegionIle-de-France` = 304.6266946428623, # `RegionLanguedoc-Roussillon` = 222.1023564936586, # RegionLimousin = 23.28911739943578, # RegionLorraine = 58.126529967139504, # `RegionMidi-Pyrenees` = 91.25889614265907, # `RegionNord-Pas-de-Calais` = 89.35858378249577, # `RegionPays-de-la-Loire` = 83.542801530637, # RegionPicardie = 14.302402913701371, # `RegionPoitou-Charentes` = 19.301416881062323, # `RegionProvence-Alpes-Cotes-D'Azur` = 143.6244444957265, # `RegionRhone-Alpes` = -398.0069321255505 # ) explainer_og$raw_shap_colsums <- c( VehPower = 64.9431847836795, VehAge = -498.18680342239327, DrivAge = -836.9150417760684, BonusMalus = -674.3150080790183, VehBrand = 26.30884255161618, VehGas = 3.563557164494682, Area = -51.394162028309665, Density = 243.26412361955158, Region = -387.8696117562795, BIAS = -823.2791528105736 ) # c( # VehPower = 120.16935467716758, # VehAge = -550.591719013908, # DrivAge = -1125.4035902854048, # BonusMalus = -946.9910313908222, # VehBrand = -6.006120989677584, # VehGas = 1.8018005587691732, # Area = 22.57361526551597, # Density = 97.71078889113824, # Region = -350.50497453631397, # BIAS = -1023.7343763839453 # ) # ============================ comparisons ===================== # increase tolerance slightly for non-Windows # The absolute figures we are reconciling against are ran on windows and show tiny differences is_windows <- (Sys.info()["sysname"] == "windows") os_tolerance <- if (!is_windows) 1e-6 else testthat::testthat_tolerance() testthat::expect_equal( explainer_nu$beta_corrections |> colSums(), explainer_og$shap_wide_colsums, tolerance = os_tolerance ) testthat::expect_equal( explainer_nu$shap |> colSums(), explainer_og$raw_shap_colsums, tolerance = os_tolerance ) }) testthat::test_that("test explain completes when one categorical and one continuous", { vars <- c("VehBrand", "VehPower", "ClaimRate") splits <- freMTPLmini |> dplyr::select(dplyr::all_of(vars)) |> split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "poisson" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[1]) ex$overall_correction() ex$bias_density() } ) }) testthat::test_that("test explain completes when categorical only", { vars <- c("VehBrand", "VehGas", "Area", "ClaimRate") splits <- freMTPLmini |> dplyr::select(dplyr::all_of(vars)) |> split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "poisson" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[1]) ex$overall_correction() ex$bias_density() } ) }) testthat::test_that("test explain completes when continuous only", { vars <- c("VehPower", "VehAge", "DrivAge", "BonusMalus", "ClaimRate") splits <- freMTPLmini |> dplyr::select(dplyr::all_of(vars)) |> split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "poisson" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[1]) ex$overall_correction() ex$bias_density() } ) }) testthat::test_that("test explain completes when logical field", { vars <- names(freMTPLmini) splits <- freMTPLmini |> dplyr::select(dplyr::all_of(vars)) |> dplyr::mutate(dummy = sample(c(TRUE, FALSE), size = nrow(freMTPLmini), replace = TRUE)) |> split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "poisson" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[1]) ex$overall_correction() ex$bias_density() } ) }) testthat::test_that("test explain completes when no reference/zero levels", { vars <- c("VehPower", "VehAge", "DrivAge", "BonusMalus", "ClaimRate") splits <- freMTPLmini |> dplyr::select(dplyr::all_of(vars)) |> dplyr::mutate(dplyr::across(-dplyr::all_of("ClaimRate"), \(x) pmax(x, 1))) |> split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "poisson" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[1]) ex$overall_correction() suppressMessages({ex$bias_density()}) # expect message here to let user no there are no plots produced } ) }) testthat::test_that("test migrate-to-bias vs non-migrate-to-bias options", { # A note on this test... # This test compares the predictions with 'migrate_reference_to_bias' as TRUE or FALSE. # They should lead to the same predictions # ============================ Input data ===================== splits <- freMTPLmini |> split_into_train_validate_test(seed = 1) # ============================ IBLM package process ===================== IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "poisson" ) explainer_w_migrate <- explain_iblm(iblm_model = IBLM, data = splits$test, migrate_reference_to_bias = TRUE) explainer_wout_migrate <- explain_iblm(iblm_model = IBLM, data = splits$test, migrate_reference_to_bias = FALSE) coeff_multiplier <- splits$test |> dplyr::select(-dplyr::all_of("ClaimRate")) |> dplyr::mutate( dplyr::across( dplyr::all_of(IBLM$predictor_vars$categorical), ~1 ) ) |> dplyr::mutate(bias = 1, .before = 1) predict_w_migrate <- rowSums(explainer_w_migrate$data_beta_coeff * coeff_multiplier) |> exp() |> unname() predict_wout_migrate <- rowSums(explainer_wout_migrate$data_beta_coeff * coeff_multiplier) |> exp() |> unname() prediction_max_difference <- max(abs(predict_w_migrate / predict_wout_migrate - 1)) testthat::expect_equal(prediction_max_difference, 0) }) testthat::test_that("test gaussian can run", { # note this is just a crude test that it will run. should probably expand with numerical reconciliations vars <- names(freMTPLmini) splits <- freMTPLmini |> split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "gaussian" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[2]) ex$overall_correction() ex$bias_density() } ) get_pinball_scores(splits$test, IBLM) }) testthat::test_that("test gamma can run", { # note this is just a crude test that it will run. should probably expand with numerical reconciliations vars <- names(freMTPLmini) splits <- freMTPLmini |> dplyr::mutate(ClaimRate = round(ClaimRate) |> pmax(0.1)) |> # set min. ClaimRate to 0.1 as pre-requisite for gamma dist is x>0 split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "gamma" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[2]) ex$overall_correction() ex$bias_density() get_pinball_scores(splits$test, IBLM) } ) }) testthat::test_that("test tweedie can run", { # note this is just a crude test that it will run. should probably expand with numerical reconciliations vars <- names(freMTPLmini) splits <- freMTPLmini |> split_into_train_validate_test(seed = 1) IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "tweedie" ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[2]) ex$overall_correction() ex$bias_density() get_pinball_scores(splits$test, IBLM) } ) }) testthat::test_that("test can change objective function", { # note this is just a crude test that it will run. should probably expand with numerical reconciliations vars <- names(freMTPLmini) splits <- freMTPLmini |> split_into_train_validate_test(seed = 1) testthat::expect_message( IBLM <- train_iblm_xgb( splits, response_var = "ClaimRate", family = "poisson", params = list(objective = "reg:squarederror") ) ) testthat::expect_no_error( { ex <- explain_iblm(iblm_model = IBLM, data = splits$test) ex$beta_corrected_scatter(vars[1]) ex$beta_corrected_density(vars[2]) ex$overall_correction() ex$bias_density() get_pinball_scores(splits$test, IBLM) } ) })