testthat::test_that("test against Karol original script", {

  # test takes too long for CRAN
  testthat::skip_on_cran()


  # ============================ Download data =====================

  # download the correct version of freMTPL2freq dataset to complete the rec

  commit <- "2a718359896bee4edf852721364ac5eaae442fc1"  # <- use this commit

  url <- paste0("https://github.com/dutangc/CASdatasets/raw/", commit, "/data/freMTPL2freq.rda")

  temp <- tempfile()

  download.file(url, temp)

  load(temp)


  freMTPL2freq <- freMTPL2freq |>
    dplyr::mutate(
      ClaimRate = ClaimNb / Exposure,
      ClaimRate = pmin(ClaimRate, quantile(ClaimRate, 0.999))
    ) |>
    dplyr::select(-dplyr::all_of(c("IDpol", "Exposure", "ClaimNb")))

  # ============================ Input data =====================

  data <- freMTPL2freq |> split_into_train_validate_test(seed = 1)


  # changing factors to characters... this is necessary as bug in original script handles factors incorrectly
  # changing "ClaimRate" to use "ClaimNb"... this is necessary as "ClaimNb" hardcoded in KG script and easier to modify in package script
  # changing "ClaimNb" to round to integer values. This is to avoid warnings in the test environment.
  splits <- data |>
    purrr::modify(.f = function(x) x |> dplyr::mutate(dplyr::across(dplyr::where(is.factor), function(field) as.character(field)))) |>
    purrr::modify(.f = function(x) dplyr::rename(x, "ClaimNb" = "ClaimRate")) |>
    purrr::modify(.f = function(x) dplyr::mutate(x, ClaimNb = round(ClaimNb)))

  # ============================ IBLM package process =====================

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimNb",
    family = "poisson"
  )

  # `migrate_reference_to_bias = FALSE` for purposes of test as trying to reconile with KG original script
  explainer_nu <- explain_iblm(iblm_model = IBLM, data = splits$test, migrate_reference_to_bias = FALSE)


  # ============================ Karol (og) process =====================

  # the following data objects are taken from Karol original script, using the same seed, input and settings

  # For audit, the inputs were constructed in the `https://github.com/IFoA-ADSWP/IBLM_testing` repo
  # The inputs are created in:
  # branch: testing_object_construction
  # script: construct_explain_test

  explainer_og <- list()

  explainer_og$shap_wide_colsums <-
    c(
      bias = -2067.634628662665,
      VehPower = 77.15579762084639,
      VehAge = -23.215015305266128,
      DrivAge = -61.24309050987545,
      BonusMalus = -22.445602931956973,
      Density = 25.378615547426733,
      VehBrandB1 = 0.4929784910491435,
      VehBrandB10 = 20.502513993749744,
      VehBrandB11 = 47.836586910823826,
      VehBrandB12 = 586.8557514288768,
      VehBrandB13 = -16.759563245454046,
      VehBrandB14 = -10.66495745186694,
      VehBrandB2 = -354.99009596340693,
      VehBrandB3 = -169.61300241362187,
      VehBrandB4 = -47.662840547491214,
      VehBrandB5 = -57.07242563375621,
      VehBrandB6 = -4.931066558579914,
      VehGasDiesel = 262.6049230671415,
      VehGasRegular = -260.8031225083723,
      AreaA = -7.057203395199394,
      AreaB = 8.401192414939942,
      AreaC = 6.267889281658995,
      AreaD = 7.026916613533103,
      AreaE = 7.206054686117568,
      AreaF = 0.7287656644657545,
      RegionAlsace = 29.41269837069558,
      RegionAquitaine = 219.19630470527773,
      RegionAuvergne = 0.03033583141223062,
      `RegionBasse-Normandie` = 9.1936749826491,
      RegionBourgogne = -52.224121282157284,
      RegionBretagne = -227.96657285522087,
      RegionCentre = -1098.584975179394,
      `RegionChampagne-Ardenne` = 32.18406923182192,
      RegionCorse = 32.32750511944323,
      `RegionFranche-Comte` = 7.144027646740142,
      `RegionHaute-Normandie` = 47.25576676859055,
      `RegionIle-de-France` = 304.6266946428623,
      `RegionLanguedoc-Roussillon` = 222.1023564936586,
      RegionLimousin = 23.28911739943578,
      RegionLorraine = 58.126529967139504,
      `RegionMidi-Pyrenees` = 91.25889614265907,
      `RegionNord-Pas-de-Calais` = 89.35858378249577,
      `RegionPays-de-la-Loire` = 83.542801530637,
      RegionPicardie = 14.302402913701371,
      `RegionPoitou-Charentes` = 19.301416881062323,
      `RegionProvence-Alpes-Cotes-D'Azur` = 143.6244444957265,
      `RegionRhone-Alpes` = -398.0069321255505
    )

  explainer_og$raw_shap_colsums <-
    c(
      VehPower = 120.16935467716758,
      VehAge = -550.591719013908,
      DrivAge = -1125.4035902854048,
      BonusMalus = -946.9910313908222,
      VehBrand = -6.006120989677584,
      VehGas = 1.8018005587691732,
      Area = 22.57361526551597,
      Density = 97.71078889113824,
      Region = -350.50497453631397,
      BIAS = -1023.7343763839453
    )

  explainer_og$betas <-
    c(
      `(Intercept)` = -4.242066337203175,
      VehPower = 0.020919022901048436,
      VehAge = -0.0015729129207102523,
      DrivAge = 0.003678738724085157,
      BonusMalus = 0.022271569195154908,
      VehBrandB10 = -0.022050779654071543,
      VehBrandB11 = 0.12258915754554618,
      VehBrandB12 = -0.33937589303248894,
      VehBrandB13 = -0.024222623095271373,
      VehBrandB14 = -0.15209238438395092,
      VehBrandB2 = 0.018380080148412886,
      VehBrandB3 = 0.06854296197332409,
      VehBrandB4 = 0.069628410141503,
      VehBrandB5 = 0.16393122240390984,
      VehBrandB6 = 0.07708115361227003,
      VehGasRegular = -0.11690985440726433,
      AreaB = 0.09498747735199077,
      AreaC = 0.187048079826102,
      AreaD = 0.3047512946817673,
      AreaE = 0.37852879400458916,
      AreaF = 0.7534667692151087,
      Density = -8.754411353089703e-06,
      RegionAquitaine = -0.05006972584714555,
      RegionAuvergne = -0.2685436498310038,
      `RegionBasse-Normandie` = 0.03663198601389714,
      RegionBourgogne = -0.05270900448597896,
      RegionBretagne = 0.007879177787769805,
      RegionCentre = 0.04112781448054476,
      `RegionChampagne-Ardenne` = -0.17751644960877921,
      RegionCorse = 0.004739475283265479,
      `RegionFranche-Comte` = 0.13791137553603086,
      `RegionHaute-Normandie` = -0.09172377067765793,
      `RegionIle-de-France` = -0.11793808470968409,
      `RegionLanguedoc-Roussillon` = 0.026517531294650443,
      RegionLimousin = 0.185707738525705,
      RegionLorraine = -0.1444734080684801,
      `RegionMidi-Pyrenees` = -0.3722734844103491,
      `RegionNord-Pas-de-Calais` = -0.14617441627015104,
      `RegionPays-de-la-Loire` = 0.07216540567490749,
      RegionPicardie = 0.16669065844932673,
      `RegionPoitou-Charentes` = 0.11191692361170916,
      `RegionProvence-Alpes-Cotes-D'Azur` = -0.028409322255906543,
      `RegionRhone-Alpes` = 0.2464175176544479
    )

  # ============================ comparisons =====================

  # increase tolerance slightly for mac-os.
  # The absolute figures we are reconciling against are ran on windows and show tiny differences
  is_mac_arm <- Sys.info()["sysname"] == "Darwin" &&
    grepl("arm", Sys.info()["machine"], ignore.case = TRUE)

  os_tolerance <- if (is_mac_arm) 1e-6 else testthat::testthat_tolerance()

  testthat::expect_equal(
    explainer_nu$beta_corrections |> colSums(),
    explainer_og$shap_wide_colsums,
    tolerance = os_tolerance
  )

  testthat::expect_equal(
    explainer_nu$shap |> colSums(),
    explainer_og$raw_shap_colsums,
    tolerance = os_tolerance
  )
})



testthat::test_that("test explain completes when one categorical and one continuous", {
  vars <- c("VehBrand", "VehPower", "ClaimRate")

    splits <- freMTPLmini  |>
      dplyr::select(dplyr::all_of(vars)) |>
      split_into_train_validate_test(seed = 1)

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "poisson"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[1])
      ex$overall_correction()
      ex$bias_density()
    }
  )
})

testthat::test_that("test explain completes when categorical only", {
  vars <- c("VehBrand", "VehGas", "Area", "ClaimRate")


    splits <- freMTPLmini  |>
      dplyr::select(dplyr::all_of(vars)) |>
      split_into_train_validate_test(seed = 1)

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "poisson"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[1])
      ex$overall_correction()
      ex$bias_density()
    }
  )
})

testthat::test_that("test explain completes when continuous only", {

  vars <- c("VehPower", "VehAge", "DrivAge", "BonusMalus", "ClaimRate")

  splits <- freMTPLmini  |>
    dplyr::select(dplyr::all_of(vars)) |>
    split_into_train_validate_test(seed = 1)

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "poisson"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[1])
      ex$overall_correction()
      ex$bias_density()
    }
  )
})


testthat::test_that("test explain completes when logical field", {

  vars <- names(freMTPLmini)


  splits <- freMTPLmini  |>
    dplyr::select(dplyr::all_of(vars)) |>
    dplyr::mutate(dummy = sample(c(TRUE, FALSE), size = nrow(freMTPLmini), replace = TRUE)) |>
    split_into_train_validate_test(seed = 1)

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "poisson"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[1])
      ex$overall_correction()
      ex$bias_density()
    }
  )
})



testthat::test_that("test explain completes when no reference/zero levels", {

  vars <- c("VehPower", "VehAge", "DrivAge", "BonusMalus", "ClaimRate")

  splits <- freMTPLmini  |>
    dplyr::select(dplyr::all_of(vars)) |>
    dplyr::mutate(dplyr::across(-dplyr::all_of("ClaimRate"), \(x) pmax(x, 1))) |>
    split_into_train_validate_test(seed = 1)

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "poisson"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[1])
      ex$overall_correction()
      suppressMessages({ex$bias_density()}) # expect message here to let user no there are no plots produced
      }
  )
})











testthat::test_that("test migrate-to-bias vs non-migrate-to-bias options", {

  # A note on this test...

  # This test compares the predictions with 'migrate_reference_to_bias' as TRUE or FALSE.
  # They should lead to the same predictions

  # ============================ Input data =====================

  splits <- freMTPLmini |>
    split_into_train_validate_test(seed = 1)

  # ============================ IBLM package process =====================

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "poisson"
  )

  explainer_w_migrate <- explain_iblm(iblm_model = IBLM, data = splits$test, migrate_reference_to_bias = TRUE)

  explainer_wout_migrate <- explain_iblm(iblm_model = IBLM, data = splits$test, migrate_reference_to_bias = FALSE)

  coeff_multiplier <- splits$test |>
    dplyr::select(-dplyr::all_of("ClaimRate")) |>
    dplyr::mutate(
      dplyr::across(
        dplyr::all_of(IBLM$predictor_vars$categorical),
        ~1
      )
    ) |>
    dplyr::mutate(bias = 1, .before = 1)

  predict_w_migrate <- rowSums(explainer_w_migrate$data_beta_coeff * coeff_multiplier) |>
    exp() |>
    unname()

  predict_wout_migrate <- rowSums(explainer_wout_migrate$data_beta_coeff * coeff_multiplier) |>
    exp() |>
    unname()

  prediction_max_difference <- max(abs(predict_w_migrate / predict_wout_migrate - 1))

  testthat::expect_equal(prediction_max_difference, 0)

})










testthat::test_that("test gaussian can run", {

  # note this is just a crude test that it will run. should probably expand with numerical reconciliations

  vars <- names(freMTPLmini)

  splits <- freMTPLmini |>
    split_into_train_validate_test(seed = 1)

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "gaussian"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[2])
      ex$overall_correction()
      ex$bias_density()
    }
  )

  get_pinball_scores(splits$test, IBLM)

})


testthat::test_that("test gamma can run", {

  # note this is just a crude test that it will run. should probably expand with numerical reconciliations

  vars <- names(freMTPLmini)

  splits <- freMTPLmini |>
    dplyr::mutate(ClaimRate = round(ClaimRate) |> pmax(0.1)) |> # set min. ClaimRate to 0.1 as pre-requisite for gamma dist is x>0
    split_into_train_validate_test(seed = 1)


  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "gamma"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[2])
      ex$overall_correction()
      ex$bias_density()
      get_pinball_scores(splits$test, IBLM)
    }
  )

})



testthat::test_that("test tweedie can run", {

  # note this is just a crude test that it will run. should probably expand with numerical reconciliations

  vars <- names(freMTPLmini)

  splits <- freMTPLmini |>
    split_into_train_validate_test(seed = 1)

  IBLM <- train_iblm_xgb(
    splits,
    response_var = "ClaimRate",
    family = "tweedie"
  )

  testthat::expect_no_error(
    {
      ex <- explain_iblm(iblm_model = IBLM, data = splits$test)
      ex$beta_corrected_scatter(vars[1])
      ex$beta_corrected_density(vars[2])
      ex$overall_correction()
      ex$bias_density()
      get_pinball_scores(splits$test, IBLM)
    }
  )

})