# Copyright 2025 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
test_that("LightGBM settings work", {
  skip_if_not_installed("lightgbm")
  skip_on_cran()
  seed <- sample(10000000, 1)
  # =====================================
  # checking Light GBM
  # =====================================
  lgbmSet <- setLightGBM(
    nthread = 5,
    earlyStopRound = 25,
    numIterations = 10,
    numLeaves = c(31, 20),
    maxDepth = 5,
    minDataInLeaf = 10,
    learningRate = 0.1,
    lambdaL1 = 0,
    lambdaL2 = 0,
    scalePosWeight = 1,
    isUnbalance = FALSE,
    seed = seed
  )

  expect_s3_class(lgbmSet, "modelSettings")
  expect_type(lgbmSet$param, "list")

  expect_equal(lgbmSet$settings$modelType, "binary")
  expect_equal(lgbmSet$settings$seed, seed)
  expect_equal(lgbmSet$settings$modelName, "lightGBM")

  expect_equal(lgbmSet$settings$threads, 5)
  expect_equal(lgbmSet$settings$variableImportance, "varImpLightGBM")
  expect_equal(lgbmSet$settings$train, "fitLightGBM")
  expect_equal(lgbmSet$settings$predict, "predictLightGBM")
})


test_that("LightGBM settings expected errors", {
  skip_if_not_installed("lightgbm")
  skip_on_cran()
  # =====================================
  # checking Gradient Boosting Machine
  # =====================================

  expect_error(setLightGBM(numIterations = -1))
  expect_error(setLightGBM(numLeaves = -1))
  expect_error(setLightGBM(numLeaves = 10000000))
  expect_error(setLightGBM(learningRate = -2))
  expect_error(setLightGBM(seed = "F"))
  expect_error(setLightGBM(lambdaL1 = -1))
  expect_error(setLightGBM(lambdaL2 = -1))
  expect_error(setLightGBM(scalePosWeight = -1))
  expect_error(setLightGBM(isUnbalance = TRUE, scalePosWeight = 0.5))
})


test_that("LightGBM working checks", {
  skip_if_not_installed("lightgbm")
  skip_on_cran()
  skip_if_offline()
  modelSettings <- setLightGBM(
    numIterations = 10, 
    maxDepth = 3, 
    learningRate = 0.1, 
    numLeaves = 31, 
    minDataInLeaf = 10, 
    lambdaL1 = 0, 
    lambdaL2 = 0
  )

  fitModel <- fitPlp(
    trainData = trainData,
    modelSettings = modelSettings,
    analysisId = "lgbmTest",
    analysisPath = tempdir()
  )

  expect_equal(nrow(fitModel$prediction), nrow(trainData$labels) * 2)
  expect_equal(length(unique(fitModel$prediction$evaluationType)), 2)

  # check prediction between 0 and 1
  expect_gte(min(fitModel$prediction$value), 0)
  expect_lte(max(fitModel$prediction$value), 1)

  expect_equal(class(fitModel$model), c("lgb.Booster", "R6"))

  expect_lte(
    nrow(fitModel$covariateImportance), 
    trainData$covariateData$covariateRef %>% dplyr::tally() %>% dplyr::pull()
  )

  expect_equal(fitModel$modelDesign$outcomeId, outcomeId)
  expect_equal(fitModel$modelDesign$targetId, 1)
  # TODO check other model design values?

  # test that at least some features have importances that are not zero
  expect_equal(sum(abs(fitModel$covariateImportance$covariateValue)) > 0, TRUE)

  # test save and load
  savePath <- tempfile("lgbmTest_")
  unlink(savePath, recursive = TRUE)
  savePlpModel(fitModel, savePath)

  # current format should be JSON:
  expect_true(file.exists(file.path(savePath, "model.json")))
  serialized <- ParallelLogger::loadSettingsFromJson(file.path(savePath, "model.json"))
  expect_equal(serialized$modelType, "lightGBM")
  expect_equal(serialized$modelFormat, "lightgbm_txt")
  expect_true(is.character(serialized$model))
  expect_length(serialized$model, 1)
  expect_match(serialized$model, "^tree")
  expect_match(serialized$model, "version=")
  expect_equal(serialized$model, fitModel$model$save_model_to_string(NULL))

  loadedBooster <- lightgbm::lgb.load(model_str = serialized$model)
  expect_equal(class(loadedBooster), c("lgb.Booster", "R6"))
  expect_equal(loadedBooster$num_trees(), fitModel$model$num_trees())

  loadModel <- loadPlpModel(savePath)

  expect_s3_class(loadModel, "plpModel")
  expect_equal(class(loadModel$model), c("lgb.Booster", "R6"))
  expect_equal(loadModel$model$num_trees(), fitModel$model$num_trees())

  predFit <- predictPlp(
    plpModel = fitModel,
    plpData = testData,
    population = testData$labels
  )
  predLoad <- predictPlp(
    plpModel = loadModel,
    plpData = testData,
    population = testData$labels
  )
  expect_equal(predLoad$value, predFit$value, tolerance = 0e-10)
  expect_true(all(predLoad$value >= 0))
  expect_true(all(predLoad$value <= 1))

  # backwards compatibility: old LightGBM serialization (raw text in model.json)
  lightgbm::lgb.save(fitModel$model, file.path(savePath, "model.json"))
  expect_error(ParallelLogger::loadSettingsFromJson(file.path(savePath, "model.json")))
  loadModelOld <- loadPlpModel(savePath)
  expect_s3_class(loadModelOld, "plpModel")
  expect_equal(class(loadModelOld$model), c("lgb.Booster", "R6"))
})