#################### # Author: James Hickey # # Series of tests to check the helper functions # within the package # #################### context("Testing checks on GBM S3 objects") test_that("check_if_gbm_data throws an error if given an object not of class GBMData", { # Given an object not of class GBMData obj <- list() # When passed to check_if_gbm_data # Then an error is thrown expect_error(check_if_gbm_data(obj)) }) test_that("check_if_gbm_data does not throw an error if given an object of class GBMData", { # Given an object of class GBMData obj <- list() class(obj) <- "GBMData" # When passed to check_if_gbm_data # Then no error is thrown expect_error(check_if_gbm_data(obj), NA) }) test_that("check_if_gbm_dist throws an error if given an object not of class GBMDist", { # Given an object not of class GBMDist obj <- list() # When passed to check_if_gbm_dist # Then an error is thrown expect_error(check_if_gbm_dist(obj)) }) test_that("check_if_gbm_dist does not throw an error if given an object of class GBMDist", { # Given an object of class GBMDist obj <- list() class(obj) <- "AdaBoostGBMDist" # When passed to check_if_gbm_dist # Then no error is thrown expect_error(check_if_gbm_dist(obj), NA) }) test_that("check_if_gbm_train_params throws an error if given object not of class GBMTrainParams", { # Given an object not of class GBMTrainParams obj <- list() # When passed to check_if_gbm_train_params # Then an error is thrown expect_error(check_if_gbm_train_params(obj)) }) test_that("check_if_gbm_train_params does not throw an error if given object of class GBMTrainParams", { # Given an object of class GBMTrainParams obj <- list() class(obj) <- "GBMTrainParams" # When passed to check_if_gbm_train_params # Then no error is thrown expect_error(check_if_gbm_train_params(obj), NA) }) test_that("check_if_gbm_fit throws an error if given an object not of class GBMFit", { # Given an object not of class GBMFit obj <- list() # When passed to check_if_gbm_fit # Then an error is thrown expect_error(check_if_gbm_fit(obj)) }) test_that("check_if_gbm_fit does not throw an error if given an object of class GBMFit", { # Given an object of class GBMFit obj <- list() class(obj) <- "GBMFit" # When passed to check_if_gbm_fit # Then no error is thrown expect_error(check_if_gbm_fit(obj), NA) }) test_that("check_if_gbm_var_container throws an error if given an object not of class GBMVarCont", { # Given an object not of class GBMVarCont obj <- list() # When passed to check_if_gbm_var_container # Then an error is thrown expect_error(check_if_gbm_var_container(obj)) }) test_that("check_if_gbm_var_container does not throw an error if given an object of class GBMVarCont", { # Given an object of class GBMVarCont obj <- list() class(obj) <- "GBMVarCont" # When passed to check_if_gbm_var_container # Then no error is thrown expect_error(check_if_gbm_var_container(obj), NA) }) context("Testing checks on inputs for creation of S3 objects") test_that("check_weights returns a vector of 1s if passed an obj of length 0", { # Given an empty vector w <- c() n <- 100 # When check_weights is called w <- check_weights(w, n) # Then returns a vector of 1s expect_equal(w, rep(1, n)) }) test_that("check_weights throws an error if any not a double", { # Given a vector of weights n <- 100 w <- rep(1, 100) # When one weight is not a double w[1] <- NA # Then check_weights will throw an error expect_error(check_weights(w, n)) }) test_that("check_weights throws an error if any are < 0", { # Given a vector of weights w <- rep(1, 100) # When one weight is set < 0 w[1] <- -1 # Then check_weights will throw an error expect_error(check_weights(w, 100)) }) test_that("check_interaction_depth throws error if < 1 or > 49", { # Given interaction depths of 0 and 50 id1 <- 0 id2 <- 50 # When they're checked # Then error expect_error(check_interaction_depth(id1)) expect_error(check_interaction_depth(id2)) }) test_that("checkMissing throws error if NaN in predictors", { # Given predictors and response N <- 100 x <- runif(N) y <- runif(N) # When x has a NaN in it x[1] <- NaN # Then error thrown by checkMissing expect_error(checkMissing(x, y)) }) test_that("checkMissing throws error if missing value in response", { # Given predictors and response N <- 100 x <- runif(N) y <- runif(N) # When y has a missing value y[1] <- NA # Then error thrown by checkMissing expect_error(checkMissing(x, y)) }) test_that("warnNoVariation warns if a variable has no variation", { x <- c(1.2, 1.2) expect_warning(warnNoVariation(x, 1, 'test'), "variable 1: test has no variation", fixed=TRUE) }) test_that("warnNoVariation passes OK if variable does vary", { x <- c(0, 1) expect_warning(warnNoVariation(x, 1, 'test'), regexp=NA) }) test_that("warnNoVariation passes OK if variable does vary (with NA)", { x <- c(0, 1, NA) expect_warning(warnNoVariation(x, 1, 'test'), regexp=NA) }) test_that("get_var_names gets colnames if passed a matrix", { # Given a matrix with 3 columns and names x <- matrix(ncol=3) colnames(x) <- c("V1", "V2", "V3") # When passed to get_var_names # Then column names returned expect_equal(get_var_names(x), colnames(x)) }) test_that("get_var_names gets the names if passed a data.frame", { # Given a data.frame with 3 columns and names x <- data.frame(X1=NA, X2=NA, X3=NA) # When passed to get_var_names # Then names returned expect_equal(get_var_names(x), names(x)) }) test_that("check_sanity only throws an error if nrows(x) is not equal to the length of y", { # Given two sets of predictors one of length y, the other not N <- 100 x1 <- data.frame(runif(N)) x2 <- data.frame(runif(N-1)) y <- runif(N) # When check_sanity is called # Then error thrown for call where lengths are not equal expect_error(check_sanity(x1, y), NA) expect_error(check_sanity(x2, y)) }) test_that("check_if_natural_number throws error if not passed a whole number >=1" , { # When check_if_natural_number is not passed a whole number >= 1 # Then an error is thrown expect_error(check_if_natural_number(-1, "Arg Name")) expect_error(check_if_natural_number(c(1, 2), "Arg Name")) expect_error(check_if_natural_number(1.4, "Arg Name")) expect_error(check_if_natural_number(Inf, "Arg Name")) expect_error(check_if_natural_number(list(), "Arg Name")) expect_error(check_if_natural_number(NA, "Arg Name")) expect_error(check_if_natural_number(NaN, "Arg Name")) }) test_that("convertY changes factors with 2 levels to numeric", { # Given a vector of 2-level factors N <- 100 y <- as.factor(sample(c(0, 1), N, replace=TRUE)) # When convertY is called # Then converted to numeric expect_equal(convertY(y), as.numeric(y==levels(y)[2])) }) test_that("convertY does nothing to factors with levels != 2", { # Given of a vector of 3-level factors N <- 100 y <- as.factor(sample(c(0, 1, 2), N, replace=TRUE)) # When convertY is called # Then remains the same expect_equal(convertY(y), y) }) test_that("check_var_type throws error when passed excessive levels in x", { testExcess <- data.frame( y = sample(c(0,1), 1025, replace = TRUE) ,x1 = runif(1025) ,x2 = factor(1:1025) ) expect_error( check_var_type(x = testExcess[,c('x1', 'x2')] , y = testExcess$y) ,'gbm does not currently handle categorical variables with more than 1024 levels\\. Variable 2\\: x2 has 1025 levels\\.') }) test_that("check_var_type throws an error when passed Inacceptable classes", { testClasses <- data.frame( y = sample(c(0,1), 15, replace = TRUE) ,x1 = runif(15) ,x2 = seq(as.Date('2015-01-01'), as.Date('2015-01-15'), 'days') ) expect_error( check_var_type(x = testClasses[,c('x1', 'x2')] , y = testClasses$y) ,'variable 2\\: x2 is not of type - numeric, ordered or factor\\.') }) test_that("check_offset default returns a vector of 0s when offset set to NULL - irrespective of distribution", { # Given an offset=NULL and responses (and all distributions) N <- 100 y <- runif(N) offset <- NULL dist_1 <- gbm_dist("AdaBoost") dist_2 <- gbm_dist("Bernoulli") dist_3 <- gbm_dist("CoxPH") dist_4 <- gbm_dist("Gamma") dist_5 <- gbm_dist("Gaussian") dist_6 <- gbm_dist("Huberized") dist_7 <- gbm_dist("Laplace") dist_8 <- gbm_dist("Pairwise") dist_9 <- gbm_dist("Poisson") dist_10 <- gbm_dist("Quantile") dist_11 <- gbm_dist("TDist") dist_12 <- gbm_dist("Tweedie") # Then check_offset returns a vector of zeros # equal to length of response expect_equal(check_offset(offset, y, dist_1), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_2), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_3), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_4), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_5), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_6), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_7), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_8), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_9), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_10), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_11), rep(0, length(y))) expect_equal(check_offset(offset, y, dist_12), rep(0, length(y))) }) test_that("check_offset throws an error length of offset does not equal the length of the response - and not CoxPH", { # Given an offset and vector of responses # offset is different length to responses N <- 100 y <- runif(N) offset <- runif(N-2) dist_1 <- gbm_dist("AdaBoost") dist_2 <- gbm_dist("Bernoulli") dist_3 <- gbm_dist("CoxPH") dist_4 <- gbm_dist("Gamma") dist_5 <- gbm_dist("Gaussian") dist_6 <- gbm_dist("Huberized") dist_7 <- gbm_dist("Laplace") dist_8 <- gbm_dist("Pairwise") dist_9 <- gbm_dist("Poisson") dist_10 <- gbm_dist("Quantile") dist_11 <- gbm_dist("TDist") dist_12 <- gbm_dist("Tweedie") # Then check_offset throws an error - if not CoxPH expect_error(check_offset(offset, y, dist_1)) expect_error(check_offset(offset, y, dist_2)) expect_error(check_offset(offset, y, dist_3), NA) expect_error(check_offset(offset, y, dist_4)) expect_error(check_offset(offset, y, dist_5)) expect_error(check_offset(offset, y, dist_6)) expect_error(check_offset(offset, y, dist_7)) expect_error(check_offset(offset, y, dist_8)) expect_error(check_offset(offset, y, dist_9)) expect_error(check_offset(offset, y, dist_10)) expect_error(check_offset(offset, y, dist_11)) expect_error(check_offset(offset, y, dist_12)) }) test_that("check_offset throws an error if the offset contains a NA", { # Given an offset and vector of responses - irrespective of distribution N <- 100 y <- runif(N) offset <- runif(N) dist_1 <- gbm_dist("AdaBoost") dist_2 <- gbm_dist("Bernoulli") dist_3 <- gbm_dist("CoxPH") dist_4 <- gbm_dist("Gamma") dist_5 <- gbm_dist("Gaussian") dist_6 <- gbm_dist("Huberized") dist_7 <- gbm_dist("Laplace") dist_8 <- gbm_dist("Pairwise") dist_9 <- gbm_dist("Poisson") dist_10 <- gbm_dist("Quantile") dist_11 <- gbm_dist("TDist") dist_12 <- gbm_dist("Tweedie") # When an elemenet of offset is NA offset[1] <- NA # Then check_offset throws an error expect_error(check_offset(offset, y, dist_1)) expect_error(check_offset(offset, y, dist_2)) expect_error(check_offset(offset, y, dist_3)) expect_error(check_offset(offset, y, dist_4)) expect_error(check_offset(offset, y, dist_5)) expect_error(check_offset(offset, y, dist_6)) expect_error(check_offset(offset, y, dist_7)) expect_error(check_offset(offset, y, dist_8)) expect_error(check_offset(offset, y, dist_9)) expect_error(check_offset(offset, y, dist_10)) expect_error(check_offset(offset, y, dist_11)) expect_error(check_offset(offset, y, dist_12)) }) test_that("check_offset throws an error if the offset contains a non-numeric", { # Given an offset and vector of responses - irrespective of distribution N <- 100 y <- runif(N) offset <- runif(N) dist_1 <- gbm_dist("AdaBoost") dist_2 <- gbm_dist("Bernoulli") dist_3 <- gbm_dist("CoxPH") dist_4 <- gbm_dist("Gamma") dist_5 <- gbm_dist("Gaussian") dist_6 <- gbm_dist("Huberized") dist_7 <- gbm_dist("Laplace") dist_8 <- gbm_dist("Pairwise") dist_9 <- gbm_dist("Poisson") dist_10 <- gbm_dist("Quantile") dist_11 <- gbm_dist("TDist") dist_12 <- gbm_dist("Tweedie") # When an elemenet of offset is non-numeric offset[1] <- "string" # Then check_offset throws an error expect_error(check_offset(offset, y, dist_1)) expect_error(check_offset(offset, y, dist_2)) expect_error(check_offset(offset, y, dist_3)) expect_error(check_offset(offset, y, dist_4)) expect_error(check_offset(offset, y, dist_5)) expect_error(check_offset(offset, y, dist_6)) expect_error(check_offset(offset, y, dist_7)) expect_error(check_offset(offset, y, dist_8)) expect_error(check_offset(offset, y, dist_9)) expect_error(check_offset(offset, y, dist_10)) expect_error(check_offset(offset, y, dist_11)) expect_error(check_offset(offset, y, dist_12)) }) test_that("check_cv_parameters throws an error if cv_folds is not a natural number >= 1", { # When cv_folds is not a natural number >=1 # Then an error is thrown expect_error(check_cv_parameters(cv_folds=-1, cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=FALSE, cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds="string", cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=c(1, 2), cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=2.1, cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=NA, cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=NaN, cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=Inf, cv_class_stratify=FALSE, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) }) test_that("check_cv_parameters throws an error if cv_class_stratify is not a logical", { # When cv_class_stratify is not a logical # Then check_cv_parameters throws an error expect_error(check_cv_parameters(cv_folds=5, cv_class_stratify="Help", fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=5, cv_class_stratify=NULL, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=5, cv_class_stratify=NaN, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=5, cv_class_stratify=c(1, 2), fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) expect_error(check_cv_parameters(cv_folds=5, cv_class_stratify=Inf, fold_id=NULL, train_params=training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6))) }) test_that("check_cv_parameters throws an error if train_params not of class GBMTrainParams", { # Given training parameters params <- training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=1:1000, num_train=1000, num_features=6) # When class removed class(params) <- "" # Then check_cv_parameters throws an error expect_error(check_cv_parameters(cv_folds=5, cv_class_stratify=FALSE, fold_id=NULL, train_params=params)) }) test_that("check_cv_parameters throws an error if fold_id has observations across different folds", { # Given training parameters and obs id N <- 2000 obs_id <- c(rep(1, N/2), rep(2, N/2)) params <- training_params(num_trees=2000, interaction_depth=3, min_num_obs_in_node=10, shrinkage=0.005, bag_fraction=0.5, id=obs_id, num_train=N/2, num_features=6) # When fold_id not NULL and has observations across folds cv_folds <- 5 fold_id <- sample(seq_len(cv_folds), replace=TRUE) # Then check_cv_parameters throws an error expect_error(check_cv_parameters(cv_folds=cv_folds, cv_class_stratify=FALSE, fold_id=fold_id, train_params=params)) }) test_that("convert_strata does not alter a NA", { expect_true(is.na(convert_strata(NA))) }) test_that("convert_strata converts a vector of factors to integers", { # Given a vector of factors test_strata <- as.factor(c("a", "b", "b", "a")) # When converted converted <- convert_strata(test_strata) # Then is a vector of integers expect_equal(converted, as.integer(test_strata)) }) test_that("convert_strata throws an error when not passed a vector of integers or strata", { expect_error(convert_strata(data.frame(FALSE))) expect_error(convert_strata(data.frame(c(1, 2)))) expect_error(convert_strata(matrix(FALSE))) expect_error(convert_strata(matrix(c(1, 2)))) expect_error(convert_strata(Inf)) expect_error(convert_strata("Error")) }) test_that("guess_distribution makes correct guesses and displays message", { require(survival) # Given responses b_resp <- c(0, 1, 0, 1, 0, 1) c_resp <- Surv(c(1.2, 2.3, 4.5, 6.7), c(0, 0, 0, 1)) other_resp <- "Default" # When guessing # Then correct guess and message expect_equal(guess_distribution(b_resp)$name, "Bernoulli") expect_equal(guess_distribution(c_resp)$name, "CoxPH") expect_equal(guess_distribution(other_resp)$name, "Gaussian") expect_message(guess_distribution(b_resp), "Distribution not specified, assuming Bernoulli ...") expect_message(guess_distribution(c_resp), "Distribution not specified, assuming CoxPH ...") expect_message(guess_distribution(other_resp), "Distribution not specified, assuming Gaussian ...") })