test_that('model from dense design matrix has expected fields', { dataset <- make_mtcars_test() depth <- 2 trees <- 3 m_xrf <- xrf( am ~ mpg + as.factor(cyl) + disp, dataset, family = 'binomial', xgb_control = list(nrounds = trees, max_depth = depth), sparse = FALSE ) test_expected_fields(m_xrf, depth, trees) }) test_that('model from sparse design matrix has expected fields', { dataset <- make_mtcars_test() depth <- 2 trees <- 3 m_xrf <- xrf( am ~ mpg + cyl + disp, dataset, family = 'binomial', xgb_control = list(nrounds = 3, max_depth = 2) ) test_expected_fields(m_xrf, depth, trees) }) test_that('model predicts binary outcome', { dataset <- make_mtcars_test() m_xrf <- xrf( am ~ mpg + cyl + disp + hp + drat + wt + qsec, dataset, family = 'binomial', xgb_control = list(nrounds = 3, max_depth = 2), glm_control = list(type.measure = 'deviance', nfolds = 10) ) preds_response_dense <- predict( m_xrf, dataset, type = 'response', sparse = FALSE ) preds_response_sparse <- predict( m_xrf, dataset, type = 'response', sparse = TRUE ) preds_link <- predict(m_xrf, dataset, type = 'link') expect_equal(preds_response_dense, preds_response_sparse) expect_true( all(preds_response_dense < 1 & preds_response_dense > 0) ) expect_true( all(preds_response_sparse < 1 & preds_response_sparse > 0), ) # since we are using deviance on the LASSO fit, the model should be calibrated expect_equal(mean(preds_response_dense), mean(dataset$am == '1')) expect_true( any(preds_link < 0 | preds_link > 1) ) }) test_that('model predicts continuous outcome', { dataset <- make_mtcars_test() m_xrf <- xrf( mpg ~ ., dataset, family = 'gaussian', xgb_control = list(nrounds = 3, max_depth = 2), glm_control = list(type.measure = 'deviance', nfolds = 10) ) preds <- predict(m_xrf, dataset, type = 'response', sparse = FALSE) expect_equal(mean(preds), mean(dataset$mpg)) mae <- mean(abs(preds - dataset$mpg)) expect_lt(mae, 5) # since this should be highly parameterized / overfit }) test_that('model predicts without outcome column', { # issue #9 mod <- xrf( mpg ~ ., data = mtcars[-(1:5), ], xgb_control = list(nrounds = 5, max_depth = 2), family = "gaussian" ) expect_equal( predict(mod, mtcars[1:5, ])[, 1], predict(mod, mtcars[1:5, -1])[, 1] ) mod_nsp <- xrf( mpg ~ ., data = mtcars[-(1:5), ], xgb_control = list(nrounds = 5, max_depth = 2), family = "gaussian", sparse = FALSE ) expect_equal( predict(mod_nsp, mtcars[1:5, ], sparse = FALSE)[, 1], predict(mod_nsp, mtcars[1:5, -1], sparse = FALSE)[, 1] ) }) test_that('call scrubbed', { mod_nsp <- xrf( mpg ~ ., data = mtcars[-(1:5), ], xgb_control = list(nrounds = 5, max_depth = 2), family = "gaussian", sparse = FALSE ) # in previous version: # > object.size(mod_nsp$glm$model$call) # 211544 bytes expect_true(object.size(mod_nsp$glm$model$call) < 211544) }) test_that('single feature model', { set.seed(55414) x1 <- rbinom(100, 1, .7) y <- rnorm(100, 0, .5) + x1 dat <- data.frame(y, x1) mod <- xrf( y ~ x1, data = dat, xgb_control = list(nrounds = 5, max_depth = 2), family = "gaussian", sparse = FALSE ) expect_gt(nrow(mod$rules), 0) })