library(testthat) library(recipes) skip_if_not_installed("modeldata") data(biomass, package = "modeldata") rec <- recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass ) test_that("training in stages", { whole_recipe <- rec %>% step_center(carbon, hydrogen, oxygen, nitrogen, sulfur) %>% step_rm(sulfur) %>% step_scale(carbon, hydrogen, oxygen, nitrogen) at_same_time <- prep(whole_recipe, training = biomass) ## not train in stages center_first <- rec %>% step_center(carbon, hydrogen, oxygen, nitrogen, sulfur) center_first_trained <- prep(center_first, training = biomass) no_sulfur <- center_first_trained %>% step_rm(sulfur) expect_snapshot( no_sulfur_trained <- prep(no_sulfur) ) scale_last <- no_sulfur_trained %>% step_scale(carbon, hydrogen, oxygen, nitrogen) expect_snapshot( sequentially <- prep(scale_last) ) in_stages <- center_first_trained %>% step_rm(sulfur) %>% step_scale(carbon, hydrogen, oxygen, nitrogen) expect_snapshot( in_stages_trained <- prep(in_stages) ) in_stages_retrained <- prep(in_stages, training = biomass, fresh = TRUE) # check baked values expect_equal( bake(at_same_time, head(biomass)), bake(sequentially, head(biomass)) ) expect_equal( bake(at_same_time, head(biomass)), bake(in_stages_trained, head(biomass)) ) expect_equal( bake(at_same_time, head(biomass)), bake(in_stages_retrained, head(biomass)) ) # variable lists expect_equal( summary(at_same_time), summary(sequentially) ) expect_equal( summary(at_same_time), summary(in_stages_trained) ) expect_equal( summary(at_same_time), summary(in_stages_retrained) ) expect_snapshot( rec %>% step_center(carbon, hydrogen, oxygen, nitrogen, sulfur) %>% prep(training = biomass) %>% step_rm(sulfur) %>% prep(training = biomass) ) })