# Copyright 2025 Observational Health Data Sciences and Informatics # # This file is part of PatientLevelPrediction # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. test_that("DecisionTree settings work checks", { skip_if_not_installed("reticulate") skip_on_cran() dtset <- setDecisionTree( criterion = list("gini"), splitter = list("best"), maxDepth = list(4, 10, NULL), minSamplesSplit = list(2, 10), minSamplesLeaf = list(10, 50), minWeightFractionLeaf = list(0), maxFeatures = list(100, "sqrt", NULL), maxLeafNodes = list(NULL), minImpurityDecrease = list(10^-7), classWeight = list(NULL), seed = sample(1000000, 1) ) expect_false(dtset$settings$requiresDenseMatrix) expect_equal(dtset$settings$modelName, "decisionTree") expect_equal(dtset$settings$modelType, "binary") expect_equal(dtset$settings$pythonModule, "sklearn.tree") expect_equal(dtset$settings$pythonClass, "DecisionTreeClassifier") }) test_that("DecisionTree errors as expected", { skip_if_not_installed("reticulate") skip_on_cran() expect_error(setDecisionTree(criterion = list("madeup"))) expect_error(setDecisionTree(maxDepth = list(-1))) expect_error(setDecisionTree(minSamplesSplit = list(-1))) expect_error(setDecisionTree(minSamplesLeaf = list(-1))) }) test_that("check fit of DecisionTree", { skip_if_not_installed("reticulate") skip_on_cran() modelSettings <- setDecisionTree( criterion = list("gini"), splitter = list("best"), maxDepth = list(as.integer(4)), minSamplesSplit = list(2), minSamplesLeaf = list(10), minWeightFractionLeaf = list(0), maxFeatures = list("sqrt"), maxLeafNodes = list(NULL), minImpurityDecrease = list(10^-7), classWeight = list(NULL), seed = sample(1000000, 1) ) plpModel <- fitPlp( trainData = tinyTrainData, modelSettings = modelSettings, analysisId = "DecisionTree", analysisPath = tempdir() ) predictions <- predictPlp(plpModel, tinyTrainData, tinyTrainData$labels) trainPredictions <- plpModel$prediction %>% dplyr::filter(.data$evaluationType == "Train") %>% dplyr::pull(.data$value) expect_equal(mean(predictions$value), mean(trainPredictions)) expect_correct_fitPlp(plpModel, trainData, testLocation = FALSE) # add check for other model design settings # test save and load savePath <- tempfile("decisionTree_") unlink(savePath, recursive = TRUE) savePlpModel(plpModel, savePath) loadModel <- loadPlpModel(savePath) expect_s3_class(loadModel, "plpModel") predFit <- predictPlp(plpModel, testData, testData$labels) predLoad <- predictPlp(loadModel, testData, testData$labels) expect_equal(predLoad$value, predFit$value, tolerance = 1e-10) expect_true(all(predLoad$value >= 0)) expect_true(all(predLoad$value <= 1)) # test with one feature oneModel <- fitPlp( trainData = oneTrainData, modelSettings = modelSettings, analysisId = "DecisionTreeOne", analysisPath = tempdir() ) onePredictions <- predictPlp(oneModel, oneTrainData, oneTrainData$labels) oneTrainPredictions <- oneModel$prediction %>% dplyr::filter(.data$evaluationType == "Train") %>% dplyr::pull(.data$value) expect_equal(mean(onePredictions$value), mean(oneTrainPredictions)) expect_correct_fitPlp(oneModel, oneTrainData, testLocation = FALSE) }) test_that("fitClassifier errors with wrong covariateData", { skip_if_not_installed("reticulate") skip_on_cran() newTrainData <- copyTrainData(trainData) class(newTrainData$covariateData) <- "notCovariateData" modelSettings <- setAdaBoost() analysisId <- 42 expect_error(fitClassifier(newTrainData, modelSettings, analysisId = analysisId )) }) test_that("AdaBoost fit works", { skip_if_not_installed("reticulate") skip_on_cran() modelSettings <- setAdaBoost( nEstimators = list(10), learningRate = list(0.1), ) plpModel <- fitPlp( trainData = tinyTrainData, modelSettings = modelSettings, analysisId = "Adaboost", analysisPath = tempdir() ) expect_correct_fitPlp(plpModel, trainData, testLocation = FALSE) oneModel <- fitPlp( trainData = oneTrainData, modelSettings = modelSettings, analysisId = "AdaBoostOne", analysisPath = tempdir() ) onePredictions <- predictPlp(oneModel, oneTrainData, oneTrainData$labels) oneTrainPredictions <- oneModel$prediction %>% dplyr::filter(.data$evaluationType == "Train") %>% dplyr::pull(.data$value) expect_equal(mean(onePredictions$value), mean(oneTrainPredictions)) expect_correct_fitPlp(oneModel, oneTrainData, testLocation = FALSE) }) test_that("RandomForest fit works", { skip_if_not_installed("reticulate") skip_on_cran() modelSettings <- setRandomForest( ntrees = list(10), maxDepth = list(4), minSamplesSplit = list(2), minSamplesLeaf = list(10), mtries = list("sqrt"), maxSamples = list(0.9), classWeight = list(NULL) ) plpModel <- fitPlp( trainData = tinyTrainData, modelSettings = modelSettings, analysisId = "RandomForest", analysisPath = tempdir() ) expect_correct_fitPlp(plpModel, trainData, testLocation = FALSE) oneModel <- fitPlp( trainData = oneTrainData, modelSettings = modelSettings, analysisId = "RFOne", analysisPath = tempdir() ) onePredictions <- predictPlp(oneModel, oneTrainData, oneTrainData$labels) oneTrainPredictions <- oneModel$prediction %>% dplyr::filter(.data$evaluationType == "Train") %>% dplyr::pull(.data$value) expect_equal(mean(onePredictions$value), mean(oneTrainPredictions)) expect_correct_fitPlp(oneModel, oneTrainData, testLocation = FALSE) }) test_that("MLP fit works", { skip_if_not_installed("reticulate") skip_on_cran() modelSettings <- setMLP( hiddenLayerSizes = list(c(20)), alpha = list(1e-6), maxIter = list(50), epsilon = list(1e-08), learningRateInit = list(0.01), tol = list(1e-2) # reduce tol so I don't get convergence warnings ) plpModel <- fitPlp( trainData = tinyTrainData, modelSettings = modelSettings, analysisId = "MLP", analysisPath = tempdir() ) expect_correct_fitPlp(plpModel, trainData, testLocation = FALSE) oneModel <- fitPlp( trainData = oneTrainData, modelSettings = modelSettings, analysisId = "MLPOne", analysisPath = tempdir() ) onePredictions <- predictPlp(oneModel, oneTrainData, oneTrainData$labels) oneTrainPredictions <- oneModel$prediction %>% dplyr::filter(.data$evaluationType == "Train") %>% dplyr::pull(.data$value) expect_equal(mean(onePredictions$value), mean(oneTrainPredictions)) expect_correct_fitPlp(oneModel, oneTrainData, testLocation = FALSE) }) test_that("Naive bayes fit works", { skip_if_not_installed("reticulate") skip_on_cran() modelSettings <- setNaiveBayes() plpModel <- fitPlp( trainData = tinyTrainData, modelSettings = modelSettings, analysisId = "Naive bayes", analysisPath = tempdir() ) expect_correct_fitPlp(plpModel, trainData, testLocation = FALSE) # test save and load savePath <- tempfile("naiveBayes_") unlink(savePath, recursive = TRUE) savePlpModel(plpModel, savePath) loadModel <- loadPlpModel(savePath) expect_s3_class(loadModel, "plpModel") predFit <- predictPlp(plpModel, testData, testData$labels) predLoad <- predictPlp(loadModel, testData, testData$labels) expect_equal(predLoad$value, predFit$value, tolerance = 1e-10) expect_true(all(predLoad$value >= 0)) expect_true(all(predLoad$value <= 1)) oneModel <- fitPlp( trainData = oneTrainData, modelSettings = modelSettings, analysisId = "NaiveBayesOne", analysisPath = tempdir() ) onePredictions <- predictPlp(oneModel, oneTrainData, oneTrainData$labels) oneTrainPredictions <- oneModel$prediction %>% dplyr::filter(.data$evaluationType == "Train") %>% dplyr::pull(.data$value) expect_equal(mean(onePredictions$value), mean(oneTrainPredictions)) expect_correct_fitPlp(oneModel, oneTrainData, testLocation = FALSE) }) test_that("Support vector machine fit works", { skip_if_not_installed("reticulate") skip_on_cran() modelSettings <- setSVM( C = list(1), degree = list(1), gamma = list("scale"), classWeight = list(NULL) ) plpModel <- fitPlp( trainData = tinyTrainData, modelSettings = modelSettings, analysisId = "SVM", analysisPath = tempdir() ) expect_correct_fitPlp(plpModel, trainData, testLocation = FALSE) }) test_that("Sklearn predict works", { skip_if_not_installed("reticulate") skip_on_cran() modelSettings <- setAdaBoost( nEstimators = list(10), learningRate = list(0.1), ) plpModel <- fitPlp( trainData = tinyTrainData, modelSettings = modelSettings, analysisId = "Adaboost", analysisPath = tempdir() ) predictions <- predictSklearn( plpModel, testData, population ) expect_correct_predictions(predictions, testData) })