## Tests for random forests for survival analysis library(ranger) library(survival) context("ranger_surv") ## Initialize the random forest for survival analysis rg.surv <- ranger(Surv(time, status) ~ ., data = veteran, num.trees = 10) ## Basic tests (for all random forests equal) test_that("survival result is of class ranger with 18 elements", { expect_is(rg.surv, "ranger") expect_equal(length(rg.surv), 18) }) test_that("results have right number of trees", { expect_equal(rg.surv$num.trees, 10) }) test_that("results have right number of independent variables", { expect_equal(rg.surv$num.independent.variables, ncol(veteran) - 2) }) test_that("Alternative interface works for survival", { rf <- ranger(dependent.variable.name = "time", status.variable.name = "status", data = veteran, num.trees = 10) expect_equal(rf$treetype, "Survival") }) test_that("Alternative interface prediction works for survival", { rf <- ranger(dependent.variable.name = "time", status.variable.name = "status", data = veteran, num.trees = 10) expect_equal(predict(rf, veteran)$num.independent.variables, ncol(veteran) - 2) expect_equal(predict(rf, veteran[, setdiff(names(veteran), c("time", "status"))])$num.independent.variables, ncol(veteran) - 2) }) test_that("Matrix interface works for survival", { rf <- ranger(dependent.variable.name = "time", status.variable.name = "status", data = data.matrix(veteran), write.forest = TRUE, num.trees = 10) expect_equal(rf$treetype, "Survival") expect_equal(rf$forest$independent.variable.names, colnames(veteran)[c(1:2, 5:8)]) }) test_that("Matrix interface prediction works for survival", { dat <- data.matrix(veteran) rf <- ranger(dependent.variable.name = "time", status.variable.name = "status", data = dat, write.forest = TRUE, num.trees = 10) expect_silent(predict(rf, dat)) }) test_that("growing works for single observations, survival", { rf <- ranger(Surv(time, status) ~ ., veteran[1, ], write.forest = TRUE, num.trees = 10) expect_is(rf$survival, "matrix") }) test_that("predict works for single observations, survival", { rf <- ranger(Surv(time, status) ~ ., veteran, write.forest = TRUE, num.trees = 10) pred <- predict(rf, head(veteran, 1)) expect_equal(length(pred$survival), length(rf$unique.death.times)) }) ## Special tests for random forests for survival analysis test_that("unique death times in survival result is right", { expect_equal(rg.surv$unique.death.times, sort(unique(veteran$time[veteran$status > 0]))) }) test_that("C-index splitting works", { rf <- ranger(Surv(time, status) ~ ., data = veteran, splitrule = "C", num.trees = 10) expect_equal(rf$treetype, "Survival") }) test_that("C-index splitting not working on classification data", { expect_error(ranger(Species ~ ., iris, splitrule = "C", num.trees = 10)) }) test_that("Logrank splitting not working on classification data", { expect_error(ranger(Species ~ ., iris, splitrule = "logrank", num.trees = 10)) }) test_that("No error if survival tree without OOB observations", { dat <- data.frame(time = c(1,2), status = c(0,1), x = c(1,2)) expect_silent(ranger(Surv(time, status) ~ ., dat, num.trees = 1, num.threads = 1)) }) test_that("predict.all for survival returns 3d array of size samples x times x trees", { rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5) pred <- predict(rf, veteran, predict.all = TRUE) expect_is(pred$survival, "array") expect_equal(dim(pred$survival), c(nrow(veteran), length(pred$unique.death.times), rf$num.trees)) expect_is(pred$chf, "array") expect_equal(dim(pred$chf), c(nrow(veteran), length(pred$unique.death.times), rf$num.trees)) }) test_that("Mean of predict.all for survival is equal to forest prediction", { rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5) pred_forest <- predict(rf, veteran, predict.all = FALSE) pred_trees <- predict(rf, veteran, predict.all = TRUE) expect_equal(apply(pred_trees$chf, 1:2, mean), pred_forest$chf) }) test_that("timepoints() function returns timepoints", { rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5) expect_equal(timepoints(rf), rf$unique.death.times) pred <- predict(rf, veteran) expect_equal(timepoints(pred), rf$unique.death.times) }) test_that("timepoints() working on survival forest only", { rf <- ranger(Species ~ ., iris, num.trees = 5) expect_error(timepoints(rf), "No timepoints found. Object is no Survival forest.") pred <- predict(rf, iris) expect_error(timepoints(pred), "No timepoints found. Object is no Survival prediction object.") }) test_that("Survival error without covariates", { expect_error(ranger(Surv(time, status) ~ ., veteran[, c("time", "status")], num.trees = 5), "Error: No covariates found.") }) test_that("Survival error for competing risk data", { sobj <- Surv(veteran$time, factor(sample(1:3, nrow(veteran), replace = TRUE))) expect_error(ranger(y = sobj, x = veteran[, 1:2], num.trees = 5), "Error: Competing risks not supported yet\\. Use status=1 for events and status=0 for censoring\\.") }) test_that("Right unique time points without time.interest", { times <- sort(unique(veteran$time[veteran$status > 0])) rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5) expect_equal(timepoints(rf), times) rf <- ranger(y = Surv(veteran$time, veteran$status), x = veteran[, c(-3, -4)], num.trees = 5) expect_equal(timepoints(rf), times) }) test_that("time.interest results in the right number of time points", { rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, time.interest = 20) expect_equal(length(timepoints(rf)), 20) rf <- ranger(y = Surv(veteran$time, veteran$status), x = veteran[, c(-3, -4)], num.trees = 5, time.interest = 20) expect_equal(length(timepoints(rf)), 20) rf <- ranger(y = cbind(veteran$time, veteran$status), x = veteran[, c(-3, -4)], num.trees = 5, time.interest = 20) expect_equal(length(timepoints(rf)), 20) rf <- ranger(dependent.variable.name = "time", status.variable.name = "status", data = veteran, num.trees = 5, time.interest = 20) expect_equal(length(timepoints(rf)), 20) }) test_that("time.interest results in the right time points", { times <- c(20, 100, 200, 1000) rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, time.interest = times) expect_equal(timepoints(rf), times) rf <- ranger(y = Surv(veteran$time, veteran$status), x = veteran[, c(-3, -4)], num.trees = 5, time.interest = times) expect_equal(timepoints(rf), times) rf <- ranger(y = cbind(veteran$time, veteran$status), x = veteran[, c(-3, -4)], num.trees = 5, time.interest = times) expect_equal(timepoints(rf), times) rf <- ranger(dependent.variable.name = "time", status.variable.name = "status", data = veteran, num.trees = 5, time.interest = times) expect_equal(timepoints(rf), times) }) test_that("If more unique time points requested then observed, use observed times", { times <- sort(unique(veteran$time[veteran$status > 0])) rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, time.interest = 200) expect_equal(timepoints(rf), times) })