testthat::test_that("test against Karol original script", { # test takes too long for CRAN testthat::skip_on_cran() # ============================ Download data ===================== # download the correct version of freMTPL2freq dataset to complete the rec commit <- "2a718359896bee4edf852721364ac5eaae442fc1" # <- use this commit url <- paste0("https://github.com/dutangc/CASdatasets/raw/", commit, "/data/freMTPL2freq.rda") temp <- tempfile() download.file(url, temp) load(temp) freMTPL2freq <- freMTPL2freq |> dplyr::mutate( ClaimRate = ClaimNb / Exposure, ClaimRate = pmin(ClaimRate, quantile(ClaimRate, 0.999)) ) |> dplyr::select(-dplyr::all_of(c("IDpol", "Exposure", "ClaimNb"))) # ============================ Input data ===================== data <- freMTPL2freq |> split_into_train_validate_test(seed = 1) # changing factors to characters... this is necessary as bug in original script handles factors incorrectly # changing "ClaimRate" to use "ClaimNb"... this is necessary as "ClaimNb" hardcoded in KG script and easier to modify in package script # changing "ClaimNb" to round to integer values. This is to avoid warnings in the test environment. splits <- data |> purrr::modify(.f = function(x) dplyr::rename(x, "ClaimNb" = "ClaimRate")) |> purrr::modify(.f = function(x) dplyr::mutate(x, ClaimNb = round(ClaimNb))) # ============================ IBLM package process ===================== IBLM_nu <- train_iblm_xgb( splits, response_var = "ClaimNb", family = "poisson", verbose = 2, params = list(seed = 0, tree_method = "auto") ) # ============================ Karol (og) process ===================== # the following data objects are taken from Karol original script, using the same seed, input and settings # For audit, the inputs were constructed in the `https://github.com/IFoA-ADSWP/IBLM_testing` repo # The inputs are created in: # branch: testing_object_construction # script: construct_iblm_model_test IBLM_og <- list() IBLM_og$glm_model$coefficients <- c( `(Intercept)` = -4.242066337203175, VehPower = 0.020919022901048436, VehAge = -0.0015729129207102523, DrivAge = 0.003678738724085157, BonusMalus = 0.022271569195154908, VehBrandB10 = -0.022050779654071543, VehBrandB11 = 0.12258915754554618, VehBrandB12 = -0.33937589303248894, VehBrandB13 = -0.024222623095271373, VehBrandB14 = -0.15209238438395092, VehBrandB2 = 0.018380080148412886, VehBrandB3 = 0.06854296197332409, VehBrandB4 = 0.069628410141503, VehBrandB5 = 0.16393122240390984, VehBrandB6 = 0.07708115361227003, VehGasRegular = -0.11690985440726433, AreaB = 0.09498747735199077, AreaC = 0.187048079826102, AreaD = 0.3047512946817673, AreaE = 0.37852879400458916, AreaF = 0.7534667692151087, Density = -8.754411353089703e-06, RegionAquitaine = -0.05006972584714555, RegionAuvergne = -0.2685436498310038, `RegionBasse-Normandie` = 0.03663198601389714, RegionBourgogne = -0.05270900448597896, RegionBretagne = 0.007879177787769805, RegionCentre = 0.04112781448054476, `RegionChampagne-Ardenne` = -0.17751644960877921, RegionCorse = 0.004739475283265479, `RegionFranche-Comte` = 0.13791137553603086, `RegionHaute-Normandie` = -0.09172377067765793, `RegionIle-de-France` = -0.11793808470968409, `RegionLanguedoc-Roussillon` = 0.026517531294650443, RegionLimousin = 0.185707738525705, RegionLorraine = -0.1444734080684801, `RegionMidi-Pyrenees` = -0.3722734844103491, `RegionNord-Pas-de-Calais` = -0.14617441627015104, `RegionPays-de-la-Loire` = 0.07216540567490749, RegionPicardie = 0.16669065844932673, `RegionPoitou-Charentes` = 0.11191692361170916, `RegionProvence-Alpes-Cotes-D'Azur` = -0.028409322255906543, `RegionRhone-Alpes` = 0.2464175176544479 ) # commented out: # previous test result using xgboost v3.1.2.1 but not including data.matrix() fix # IBLM_og$booster_model$evaluation_log <- # data.frame( # iter = 1:51, # validation_poisson_nloglik = c( # 4.056412669623219, 4.049391971186827, 4.04593601499781, 4.040796560592584, # 4.038147681033262, 4.037023673948217, 4.035670705689747, 4.03558868150948, # 4.034461414394595, 4.0350597042646505, 4.033899296287862, 4.034118170740442, # 4.0336632400412, 4.033490046582557, 4.033011656338575, 4.033969343580477, # 4.033498235629004, 4.032802075205225, 4.032345918316999, 4.031624212365903, # 4.03118653271876, 4.030500840845004, 4.030124733061418, 4.030014329129201, # 4.028939285980656, 4.028043152320707, 4.029473892921388, 4.029125284354635, # 4.028950720739686, 4.028619772708841, 4.028666405145946, 4.029250004327979, # 4.02853665670761, 4.030008083076863, 4.029501758779282, 4.029577520962253, # 4.029540839021276, 4.029386050326882, 4.02949880839609, 4.0289731192691045, # 4.0294204893222005, 4.029389321784211, 4.029361198219802, 4.028683057475174, # 4.028373390836119, 4.0284907752229975, 4.02853633067712, 4.030251662234247, # 4.030247865018032, 4.0301320481799445, 4.029976138293727 # ) # ) IBLM_og$booster_model$evaluation_log <- data.frame( iter = 1:42, validation_poisson_nloglik = c( 4.055663071821976, 4.0511376224278965, 4.047429231854068, 4.043382042312346, 4.040398398015043, 4.038383840658861, 4.035323472723095, 4.033685188183917, 4.033753625098215, 4.033098768753775, 4.0323387971392535, 4.031543764158868, 4.031739708292138, 4.032990946799782, 4.030436560262982, 4.029935233654651, 4.029625360265585, 4.031376165789747, 4.032221250794373, 4.032896658696438, 4.03057581403106, 4.0309315416143505, 4.029629209625946, 4.0305141145064445, 4.030329241380248, 4.029850974831948, 4.030353241269721, 4.030224678768604, 4.030504354251887, 4.029639271228043, 4.030380707268085, 4.030557012210286, 4.031261938765065, 4.030918106713254, 4.030792515377258, 4.030857810165818, 4.031318963031328, 4.031009757468136, 4.0310946117681485, 4.031450972544412, 4.031814775847612, 4.03165021015748 ) ) # ============================ comparisons ===================== # was GLM fitted the same coefficients? testthat::expect_equal( IBLM_nu$glm_model$coefficients, IBLM_og$glm_model$coefficients ) # was XGBoost fitted with the same log? testthat::expect_equal( IBLM_nu$booster_model |> attr("evaluation_log") |> as.data.frame(), IBLM_og$booster_model$evaluation_log ) })