skip_if_not_installed("recipes") skip_if_not_installed("rsample") # Load libraries suppressPackageStartupMessages(library(recipes)) suppressPackageStartupMessages(library(rsample)) suppressPackageStartupMessages(library(modeldata)) # Data sets used for testing data(biomass) biomass_tr <- biomass[biomass$dataset == "Training", ] biomass_te <- biomass[biomass$dataset == "Testing", ] data(credit_data) set.seed(55) train_test_split <- initial_split(credit_data) credit_tr <- training(train_test_split) # Additional data sets used data(covers) data(Sacramento) # Test helpers terms_empty_env <- function(axed, step_number) { expect_identical( attr(axed$steps[[step_number]]$terms[[1]], ".Environment"), rlang::base_env() ) } impute_empty_env <- function(axed, step_number) { expect_identical( attr(axed$steps[[step_number]]$impute_with[[1]], ".Environment"), rlang::base_env() ) } inputs_empty_env <- function(axed, input_number) { expect_identical( attr(axed$steps[[1]]$input[[input_number]], ".Environment"), rlang::base_env() ) } test_en <- rlang::base_env() test_that("recipe + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_center(all_predictors()) |> step_scale(all_predictors()) |> step_spatialsign(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) terms_empty_env(x, 2) terms_empty_env(x, 3) }) test_that("recipe + step_impute_knn + axe_env() works", { rec <- recipe(credit_tr) |> step_impute_knn(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) impute_empty_env(x, 1) }) test_that("recipe + step_impute_lower + axe_env() works", { rec <- recipe(credit_tr) |> step_impute_lower(Time, Expenses, threshold = c(40, 40)) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_impute_roll + axe_env() works", { rec <- recipe(credit_tr) |> step_impute_roll(Time, statistic = median, window = 3) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_BoxCox + axe_env() works", { rec <- recipe(~., data = as.data.frame(state.x77)) |> step_BoxCox(rec, all_numeric()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_bs + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_bs(carbon, hydrogen) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_hyperbolic + axe_env() works", { skip_if_recipes_pre_0.2.1() rec <- recipe(~., data = as.data.frame(state.x77)) |> step_hyperbolic(Income, func = "cosh", inverse = FALSE) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_inverse + axe_env() works", { rec <- recipe(~., data = as.data.frame(state.x77)) |> step_inverse(Income) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_invlogit + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_center(carbon, hydrogen) |> step_scale(carbon, hydrogen) |> step_invlogit(carbon, hydrogen) x <- axe_env(rec) terms_empty_env(x, 1) terms_empty_env(x, 2) terms_empty_env(x, 3) }) test_that("recipe + step_log + axe_env() works", { rec <- recipe(~., data = as.data.frame(state.x77)) |> step_log(Income) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_logit + axe_env() works", { rec <- recipe(~., data = as.data.frame(state.x77)) |> step_logit(Income) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_mutate + axe_env() works", { rec <- recipe(~., data = iris) |> step_mutate( dbl_width = Sepal.Width * 2, half_length = Sepal.Length / 2 ) x <- axe_env(rec) inputs_empty_env(x, 1) inputs_empty_env(x, 2) }) test_that("recipe + step_ns + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_ns(carbon, hydrogen) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_poly + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_poly(carbon, hydrogen) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_relu + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_relu(carbon, shift = 40) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_sqrt + axe_env() works", { rec <- recipe(~., data = as.data.frame(state.x77)) |> step_sqrt(all_numeric()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_YeoJohnson + axe_env() works", { rec <- recipe(~., data = as.data.frame(state.x77)) |> step_YeoJohnson(all_numeric()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_discretize + axe_env() works", { rec <- recipe(~., data = as.data.frame(state.x77)) |> step_discretize(Income) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_bin2factor + axe_env() works", { rec <- recipe(~description, covers) |> step_regex(description, pattern = "(rock|stony)", result = "rocks") |> step_regex(description, pattern = "(rock|stony)", result = "more_rocks") |> step_bin2factor(rocks) x <- axe_env(rec) terms_empty_env(x, 1) terms_empty_env(x, 2) terms_empty_env(x, 3) }) test_that("recipe + step_count + axe_env() works", { rec <- recipe(~description, covers) |> step_count(description, pattern = "(rock|stony)", result = "rocks") x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_date + axe_env() works", { examples <- data.frame( Dan = as.Date("2002-03-04") + 1:10, Stefan = as.Date("2006-01-13") + 1:10 ) rec <- recipe(~ Dan + Stefan, examples) |> step_date(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_dummy + axe_env() works", { rec <- recipe(~ city + sqft + price, data = Sacramento) |> step_dummy(city) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_string2factor + axe_env() works", { rec <- recipe(~ city + sqft + price, data = Sacramento) |> step_factor2string(city) |> step_string2factor(city) x <- axe_env(rec) terms_empty_env(x, 1) terms_empty_env(x, 2) }) test_that("recipe + step_factor2string + axe_env() works", { rec <- recipe(~ city + sqft + price, data = Sacramento) |> step_factor2string(city) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_holiday + axe_env() works", { examples <- data.frame(someday = Sys.Date() + 1:40) rec <- recipe(~someday, examples) |> step_holiday(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_integer + axe_env() works", { rec <- Sacramento |> dplyr::select(type, sqft, price, beds) |> recipe(type ~ .) |> step_integer(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_novel + axe_env() works", { sacr_tr <- Sacramento[1:500, ] |> dplyr::mutate(city = as.character(city)) sacr_te <- Sacramento[501:nrow(Sacramento), ] |> dplyr::mutate(city = as.character(city)) sacr_te$city[3] <- "boopville" sacr_te$city[4] <- "beeptown" rec <- recipe(type ~ ., data = sacr_tr) |> step_novel(city, zip) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_num2factor + axe_env() works", { iris2 <- iris iris2$Species <- as.numeric(iris2$Species) rec <- recipe(~., data = iris2) |> step_num2factor( Species, levels = c("setosa", "versicolor", "virginica") ) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_ordinalscore + axe_env() works", { fail_lvls <- c("meh", "annoying", "really_bad") ord_data <- data.frame( item = c("paperclip", "twitter", "airbag"), fail_severity = factor(fail_lvls, levels = fail_lvls, ordered = TRUE) ) rec <- recipe(~ item + fail_severity, data = ord_data) |> step_dummy(item) |> step_ordinalscore(fail_severity) x <- axe_env(rec) terms_empty_env(x, 1) terms_empty_env(x, 2) }) test_that("recipe + step_other + axe_env() works", { rec <- recipe(~ city + zip, data = Sacramento) |> step_other(city, zip, threshold = .1, other = "other values") x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_unorder + axe_env() works", { lmh <- c("Low", "Med", "High") examples <- data.frame( X1 = factor(rep(letters[1:4], each = 3)), X2 = ordered(rep(lmh, each = 4), levels = lmh) ) rec <- recipe(~ X1 + X2, data = examples) |> step_unorder(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_interact + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_interact(terms = ~ carbon:hydrogen) x <- axe_env(rec) expect_identical(attr(x$steps[[1]]$terms, ".Environment"), test_en) }) test_that("recipe + step_range + axe_env() works", { rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass_tr ) |> step_range(carbon, hydrogen) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_geodist + axe_env() works", { data(Smithsonian) rec <- recipe(~., data = Smithsonian) |> step_geodist( lat = latitude, lon = longitude, log = FALSE, ref_lat = 38.8986312, ref_lon = -77.0062457 ) x <- axe_env(rec) expect_identical(attr(x$steps[[1]]$lon[[1]], ".Environment"), test_en) expect_identical(attr(x$steps[[1]]$lat[[1]], ".Environment"), test_en) }) test_that("recipe + step_ratio + axe_env() works", { data(biomass) biomass$total <- apply(biomass[, 3:7], 1, sum) biomass_tr <- biomass[biomass$dataset == "Training", ] rec <- recipe( HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur + total, data = biomass_tr ) |> step_ratio(all_predictors(), denom = denom_vars(total)) x <- axe_env(rec) expect_identical(attr(x$steps[[1]]$terms[[1]], ".Environment"), test_en) expect_identical(attr(x$steps[[1]]$denom[[1]], ".Environment"), test_en) }) test_that("recipe + step_arrange + axe_env() works", { sort_vars <- c("Sepal.Length", "Petal.Length") rec <- recipe(~., data = iris) |> step_arrange(!!!syms(sort_vars)) x <- axe_env(rec) inputs_empty_env(x, 1) inputs_empty_env(x, 2) }) test_that("recipe + step_filter + axe_env() works", { rec <- recipe(~., data = iris) |> step_filter(Sepal.Length > 4.5, Species == "setosa") x <- axe_env(rec) inputs_empty_env(x, 1) inputs_empty_env(x, 2) }) test_that("recipe + step_slice + axe_env() works", { rec <- recipe(~., data = iris) |> step_slice(1:3) x <- axe_env(rec) inputs_empty_env(x, 1) }) test_that("recipe + step_zv + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_zv(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_window + axe_env() works", { rec <- recipe(Species ~ ., data = iris) |> step_window( starts_with("Sepal"), size = 3, statistic = "median", names = paste0("med_3pt_", 1:2), role = "outcome" ) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_unorder + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_unorder(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_spatialsign + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_spatialsign(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_shuffle + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_shuffle(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_rm + axe_env() works", { rec <- recipe(Species ~ ., data = iris) |> step_rm(contains("Sepal")) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_pls + axe_env() works", { # recipes `step_pls()` was changed in 0.1.13 to use mixOmics under the hood skip_if_not_installed("mixOmics") rec <- recipe(HHV ~ ., data = biomass_tr) |> step_pls(all_predictors, outcome = "HHV") x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_pca + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_pca(all_numeric(), num_comp = 3) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_impute_bag + axe_env() works", { rec <- recipe(Price ~ ., data = credit_tr) |> step_impute_bag(Status, Home, Marital, Job, Income, Assets, Debt) x <- axe_env(rec) terms_empty_env(x, 1) impute_empty_env(x, 1) }) test_that("recipe + step_classdist + axe_env() works", { rec <- recipe(Species ~ ., data = iris) |> step_classdist( all_predictors(), class = "Species", pool = FALSE, mean_func = mean ) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_corr + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_corr(all_numeric(), threshold = .5) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_depth + axe_env() works", { rec <- recipe(Species ~ ., data = iris) |> step_depth(all_predictors(), class = "Species") x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_isomap + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_isomap(all_predictors(), neighbors = 5, num_terms = 2) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_kpca + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_kpca(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_lag + axe_env() works", { df <- data.frame(x = runif(20), index = 1:20) rec <- recipe(~., data = df) |> step_lag(index, lag = 2:3) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_lincomb + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_lincomb(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_impute_mean + axe_env() works", { rec <- recipe(Price ~ ., data = credit_tr) |> step_impute_mean(Income, Assets, Debt) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_impute_median + axe_env() works", { rec <- recipe(Price ~ ., data = credit_tr) |> step_impute_median(Income, Assets, Debt) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_impute_mode + axe_env() works", { rec <- recipe(Price ~ ., data = credit_tr) |> step_impute_mode(Income, Assets, Debt) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_naomit + axe_env() works", { rec <- recipe(~., data = Sacramento) |> step_naomit(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + step_nzv + axe_env() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_nzv(all_predictors()) x <- axe_env(rec) terms_empty_env(x, 1) }) test_that("recipe + axe_fitted() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_nzv(all_predictors()) x <- axe_fitted(rec) expect_identical(x$template, as_tibble(biomass_tr[integer(), ])) }) test_that("recipe + bake() works", { rec <- recipe(HHV ~ ., data = biomass_tr) |> step_nzv(all_predictors()) |> prep() x <- butcher(rec) expect_identical(bake(x, biomass_te), bake(rec, biomass_te)) })