library(testthat) library(data.table) test_that("resampling error if no group", { itask <- mlr3::TaskClassif$new("iris", iris, target="Species") same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_error({ same_other$instantiate(itask) }, 'task has no subset, but at least one subset variable is required', fixed=TRUE) }) test_that("resampling error if no strata", { iris.dt <- data.table(iris)[, g := rep(1:3, l=.N)] itask <- mlr3::TaskClassif$new("iris", iris.dt, target="Species") itask$col_roles$subset <- "g" same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_error({ same_other$instantiate(itask) }, 'task has no strata, but at least one stratum variable is required; at least assign the subset variable to a stratum', fixed=TRUE) }) test_that("instantiation creates instance", { iris.dt <- data.table(iris)[, g := rep(1:3, l=.N)] itask <- mlr3::TaskClassif$new("iris", iris.dt, target="Species") itask$col_roles$subset <- "g" itask$col_roles$stratum <- "g" same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_identical(same_other$instance, NULL) same_other$instantiate(itask) expect_identical(same_other$instance$id.dt$g, iris.dt$g) }) test_that("error for subset named subset", { iris.dt <- data.table(iris)[, subset := rep(1:3, l=.N)] itask <- mlr3::TaskClassif$new("iris", iris.dt, target="Species") itask$col_roles$subset <- "subset" itask$col_roles$stratum <- "subset" same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_identical(same_other$instance, NULL) expect_error({ same_other$instantiate(itask) }, "col with role subset must not be named subset; please fix by renaming subset col") }) test_that("error for group named row_id", { iris.dt <- data.table(iris)[, row_id := rep(1:3, l=.N)] itask <- mlr3::TaskClassif$new("iris", iris.dt, target="Species") itask$col_roles$subset <- "row_id" itask$col_roles$stratum <- "row_id" same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_identical(same_other$instance, NULL) expect_error({ same_other$instantiate(itask) }, "col with role subset must not be named row_id; please fix by renaming row_id col") }) test_that("error for group named fold", { iris.dt <- data.table(iris)[, fold := rep(1:3, l=.N)] itask <- mlr3::TaskClassif$new("iris", iris.dt, target="Species") itask$col_roles$subset <- "fold" itask$col_roles$stratum <- "fold" same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_identical(same_other$instance, NULL) expect_error({ same_other$instantiate(itask) }, "col with role subset must not be named fold; please fix by renaming fold col") }) test_that("error for group named display_row", { iris.dt <- data.table(iris)[, display_row := rep(1:3, l=.N)] itask <- mlr3::TaskClassif$new("iris", iris.dt, target="Species") itask$col_roles$subset <- "display_row" itask$col_roles$stratum <- "display_row" same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_identical(same_other$instance, NULL) expect_error({ same_other$instantiate(itask) }, "col with role subset must not be named display_row; please fix by renaming display_row col") }) test_that("error for group named test", { iris.dt <- data.table(iris)[, test := rep(1:3, l=.N)] itask <- mlr3::TaskClassif$new("iris", iris.dt, target="Species") itask$col_roles$subset <- "test" itask$col_roles$stratum <- "test" same_other <- mlr3resampling::ResamplingSameOtherCV$new() expect_identical(same_other$instance, NULL) expect_error({ same_other$instantiate(itask) }, "col with role subset must not be named test; please fix by renaming test col") }) test_that("errors and result for 10 train data in small stratum", { size_cv <- mlr3resampling::ResamplingVariableSizeTrainCV$new() size_cv$param_set$values$folds <- 2 i10.dt <- data.table(iris)[1:70] i10.task <- mlr3::TaskClassif$new( "i10", i10.dt, target="Species" )$set_col_roles("Species",c("target","stratum")) expect_error({ size_cv$instantiate(i10.task) }, "max_train_data=10 (in smallest stratum) but should be larger than min_train_data=10, please fix by decreasing min_train_data", fixed=TRUE) size_cv$param_set$values$min_train_data <- 9 expect_error({ size_cv$instantiate(i10.task) }, "train sizes not unique, please decrease train_sizes", fixed=TRUE) size_cv$param_set$values$train_sizes <- 2 size_cv$instantiate(i10.task) size.tab <- table(size_cv$instance$iteration.dt[["small_stratum_size"]]) expect_identical(names(size.tab), c("9","10")) }) test_that("strata respected in all sizes", { size_cv <- mlr3resampling::ResamplingVariableSizeTrainCV$new() size_cv$param_set$values$min_train_data <- 5 size_cv$param_set$values$folds <- 5 N <- 100 imbalance <- 4 strat.vec <- ifelse((1:imbalance)