library(testthat) library(recipes) skip_if_not_installed("modeldata") data(biomass, package = "modeldata") biomass_tr <- biomass[1:10, ] biomass_te <- biomass[c(13:14, 19, 522), ] rec <- recipe(HHV ~ carbon + hydrogen, data = biomass_tr ) test_that("correct values", { standardized <- rec %>% step_range(carbon, hydrogen, min = -12, id = "") standardized_trained <- prep(standardized, training = biomass_tr, verbose = FALSE) obs_pred <- bake(standardized_trained, new_data = biomass_te, all_predictors()) obs_pred <- as.matrix(obs_pred) mins <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, min) maxs <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, max) new_min <- -12 new_max <- 1 new_range <- new_max - new_min carb <- ((new_range * (biomass_te$carbon - mins["carbon"])) / (maxs["carbon"] - mins["carbon"])) + new_min carb <- ifelse(carb > new_max, new_max, carb) carb <- ifelse(carb < new_min, new_min, carb) hydro <- ((new_range * (biomass_te$hydrogen - mins["hydrogen"])) / (maxs["hydrogen"] - mins["hydrogen"])) + new_min hydro <- ifelse(hydro > new_max, new_max, hydro) hydro <- ifelse(hydro < new_min, new_min, hydro) exp_pred <- cbind(carb, hydro) colnames(exp_pred) <- c("carbon", "hydrogen") expect_equal(exp_pred, obs_pred) rng_tibble_un <- tibble( terms = c("carbon", "hydrogen"), min = rep(NA_real_, 2), max = rep(NA_real_, 2), id = "" ) rng_tibble_tr <- tibble( terms = c("carbon", "hydrogen"), min = unname(mins), max = unname(maxs), id = "" ) expect_equal(tidy(standardized, 1), rng_tibble_un) expect_equal(tidy(standardized_trained, 1), rng_tibble_tr) }) test_that("defaults", { standardized <- rec %>% step_range(carbon, hydrogen) standardized_trained <- prep(standardized, training = biomass_tr, verbose = FALSE) obs_pred <- bake(standardized_trained, new_data = biomass_te, all_predictors()) obs_pred <- as.matrix(obs_pred) mins <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, min) maxs <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, max) new_min <- 0 new_max <- 1 new_range <- new_max - new_min carb <- ((new_range * (biomass_te$carbon - mins["carbon"])) / (maxs["carbon"] - mins["carbon"])) + new_min carb <- ifelse(carb > new_max, new_max, carb) carb <- ifelse(carb < new_min, new_min, carb) hydro <- ((new_range * (biomass_te$hydrogen - mins["hydrogen"])) / (maxs["hydrogen"] - mins["hydrogen"])) + new_min hydro <- ifelse(hydro > new_max, new_max, hydro) hydro <- ifelse(hydro < new_min, new_min, hydro) exp_pred <- cbind(carb, hydro) colnames(exp_pred) <- c("carbon", "hydrogen") expect_equal(exp_pred, obs_pred) }) test_that("one variable", { standardized <- rec %>% step_range(carbon) standardized_trained <- prep(standardized, training = biomass_tr, verbose = FALSE) obs_pred <- bake(standardized_trained, new_data = biomass_te) mins <- min(biomass_tr$carbon) maxs <- max(biomass_tr$carbon) new_min <- 0 new_max <- 1 new_range <- new_max - new_min carb <- ((new_range * (biomass_te$carbon - mins)) / (maxs - mins)) + new_min carb <- ifelse(carb > new_max, new_max, carb) carb <- ifelse(carb < new_min, new_min, carb) expect_equal(carb, obs_pred$carbon) }) test_that("correct values", { standardized <- rec %>% step_range(carbon, hydrogen, min = -12, id = "", clipping = FALSE) standardized_trained <- prep(standardized, training = biomass_tr, verbose = FALSE) obs_pred <- bake(standardized_trained, new_data = biomass_te, all_predictors()) obs_pred <- as.matrix(obs_pred) mins <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, min) maxs <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, max) new_min <- -12 new_max <- 1 new_range <- new_max - new_min carb <- ((new_range * (biomass_te$carbon - mins["carbon"])) / (maxs["carbon"] - mins["carbon"])) + new_min hydro <- ((new_range * (biomass_te$hydrogen - mins["hydrogen"])) / (maxs["hydrogen"] - mins["hydrogen"])) + new_min exp_pred <- cbind(carb, hydro) colnames(exp_pred) <- c("carbon", "hydrogen") expect_equal(exp_pred, obs_pred) }) test_that("backwards compatibility for before clipping <= 1.0.2 (#1090)", { standardized <- rec %>% step_range(carbon, hydrogen, min = -12, id = "", clipping = TRUE) standardized_trained <- prep(standardized, training = biomass_tr, verbose = FALSE) # simulates old recipe standardized_trained$steps[[1]]$clipping <- NULL obs_pred <- bake(standardized_trained, new_data = biomass_te, all_predictors()) obs_pred <- as.matrix(obs_pred) mins <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, min) maxs <- apply(biomass_tr[, c("carbon", "hydrogen")], 2, max) new_min <- -12 new_max <- 1 new_range <- new_max - new_min carb <- ((new_range * (biomass_te$carbon - mins["carbon"])) / (maxs["carbon"] - mins["carbon"])) + new_min carb <- ifelse(carb > new_max, new_max, carb) carb <- ifelse(carb < new_min, new_min, carb) hydro <- ((new_range * (biomass_te$hydrogen - mins["hydrogen"])) / (maxs["hydrogen"] - mins["hydrogen"])) + new_min hydro <- ifelse(hydro > new_max, new_max, hydro) hydro <- ifelse(hydro < new_min, new_min, hydro) exp_pred <- cbind(carb, hydro) colnames(exp_pred) <- c("carbon", "hydrogen") expect_equal(exp_pred, obs_pred) }) # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { standardized <- rec %>% step_range(carbon, hydrogen, min = -12) %>% update_role(carbon, hydrogen, new_role = "potato") %>% update_role_requirements(role = "potato", bake = FALSE) standardized_trained <- prep(standardized, training = biomass_tr, verbose = FALSE) expect_error(bake(standardized_trained, new_data = biomass_te[, 1:3]), class = "new_data_missing_column") }) test_that("empty printing", { rec <- recipe(mpg ~ ., mtcars) rec <- step_range(rec) expect_snapshot(rec) rec <- prep(rec, mtcars) expect_snapshot(rec) }) test_that("empty selection prep/bake is a no-op", { rec1 <- recipe(mpg ~ ., mtcars) rec2 <- step_range(rec1) rec1 <- prep(rec1, mtcars) rec2 <- prep(rec2, mtcars) baked1 <- bake(rec1, mtcars) baked2 <- bake(rec2, mtcars) expect_identical(baked1, baked2) }) test_that("empty selection tidy method works", { rec <- recipe(mpg ~ ., mtcars) rec <- step_range(rec) expect <- tibble(terms = character(), min = double(), max = double(), id = character()) expect_identical(tidy(rec, number = 1), expect) rec <- prep(rec, mtcars) expect_identical(tidy(rec, number = 1), expect) }) test_that("printing", { rec <- recipe(mpg ~ ., data = mtcars) %>% step_range(disp, wt) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) })