test_that("can mold simple formulas", { sparse_bp <- default_formula_blueprint(composition = "dgCMatrix") matrix_bp <- default_formula_blueprint(composition = "matrix") x1 <- mold(fac_1 ~ num_1, example_train) x2 <- mold(fac_1 ~ num_1, example_train, blueprint = sparse_bp) x3 <- mold(fac_1 ~ num_1, example_train, blueprint = matrix_bp) expect_s3_class(x1$predictors, "tbl_df") expect_s4_class(x2$predictors, "dgCMatrix") expect_matrix(x3$predictors) expect_equal(colnames(x1$predictors), "num_1") expect_equal(colnames(x2$predictors), "num_1") expect_equal(colnames(x3$predictors), "num_1") expect_s3_class(x1$outcomes, "tbl_df") expect_s3_class(x2$outcomes, "tbl_df") expect_s3_class(x3$outcomes, "tbl_df") expect_equal(colnames(x1$outcomes), "fac_1") expect_equal(x1$outcomes, x2$outcomes) expect_equal(x1$outcomes, x3$outcomes) expect_s3_class(x1$blueprint, "default_formula_blueprint") }) test_that("can mold multivariate formulas", { sparse_bp <- default_formula_blueprint(composition = "dgCMatrix") matrix_bp <- default_formula_blueprint(composition = "matrix") x1 <- mold(num_1 + num_2 ~ num_3, example_train) x2 <- mold(num_1 + num_2 ~ num_3, example_train, blueprint = sparse_bp) x3 <- mold(num_1 + num_2 ~ num_3, example_train, blueprint = matrix_bp) expect_s3_class(x1$outcomes, "tbl_df") expect_equal(colnames(x1$outcomes), c("num_1", "num_2")) expect_equal(x1$outcomes, x2$outcomes) expect_equal(x1$outcomes, x3$outcomes) y1 <- mold(log(num_2) + poly(num_2, degree = 2) ~ fac_1, example_train) y2 <- mold(log(num_2) + poly(num_2, degree = 2) ~ fac_1, example_train, blueprint = sparse_bp) y3 <- mold(log(num_2) + poly(num_2, degree = 2) ~ fac_1, example_train, blueprint = matrix_bp) expect_equal( colnames(y1$outcomes), c( "log(num_2)", "poly(num_2, degree = 2).1", "poly(num_2, degree = 2).2" ) ) expect_equal(y1$outcomes, y2$outcomes) expect_equal(y1$outcomes, y3$outcomes) }) test_that("factor predictors with no intercept are fully expanded", { x1 <- mold( num_1 ~ fac_1, example_train, blueprint = default_formula_blueprint(intercept = TRUE) ) x2 <- mold( num_1 ~ fac_1, example_train, blueprint = default_formula_blueprint(intercept = TRUE, composition = "matrix") ) y1 <- mold( num_1 ~ fac_1, example_train, blueprint = default_formula_blueprint(intercept = FALSE, indicators = "one_hot") ) y2 <- mold( num_1 ~ fac_1, example_train, blueprint = default_formula_blueprint( intercept = FALSE, indicators = "one_hot", composition = "matrix" ) ) expect_equal( colnames(x1$predictors), c("(Intercept)", "fac_1b", "fac_1c") ) expect_equal( colnames(x2$predictors), c("(Intercept)", "fac_1b", "fac_1c") ) expect_equal( colnames(y1$predictors), c("fac_1a", "fac_1b", "fac_1c") ) expect_equal( colnames(y2$predictors), c("fac_1a", "fac_1b", "fac_1c") ) }) test_that("can mold and not expand dummies", { x <- mold( num_1 ~ fac_1, example_train, blueprint = default_formula_blueprint(indicators = "none") ) expect_equal(colnames(x$predictors), "fac_1") expect_s3_class(x$predictors$fac_1, "factor") expect_equal(x$blueprint$indicators, "none") }) test_that("errors are thrown if `indicator = 'none'` and factor interactions exist", { expect_error( mold(~fac_1, example_train, blueprint = default_formula_blueprint(indicators = "none")), NA ) expect_snapshot(error = TRUE, { mold( num_1 ~ fac_1:num_2, example_train, blueprint = default_formula_blueprint(indicators = "none") ) }) # Checking various types of generated interactions expect_snapshot(error = TRUE, { mold( num_1 ~ fac_1:num_2, example_train, blueprint = default_formula_blueprint(indicators = "none") ) }) expect_snapshot(error = TRUE, { mold( num_1 ~ fac_1 * num_2, example_train, blueprint = default_formula_blueprint(indicators = "none") ) }) expect_snapshot(error = TRUE, { mold( num_1 ~ (fac_1 + num_2)^2, example_train, blueprint = default_formula_blueprint(indicators = "none") ) }) expect_snapshot(error = TRUE, { mold( num_1 ~ fac_1 %in% num_2, example_train, blueprint = default_formula_blueprint(indicators = "none") ) }) example_train2 <- example_train example_train2$fac_12 <- example_train2$fac_1 expect_snapshot(error = TRUE, { mold( ~ fac_1:fac_12, example_train2, blueprint = default_formula_blueprint(indicators = "none") ) }) }) test_that("errors are thrown if `indicator = 'none'` and factors are used in inline functions", { blueprint_no_indicators <- default_formula_blueprint(indicators = "none") expect_snapshot(error = TRUE, { mold(~ paste0(fac_1), example_train, blueprint = blueprint_no_indicators) }) expect_snapshot(error = TRUE, { mold(~ paste0(fac_1), example_train, blueprint = blueprint_no_indicators) }) expect_snapshot(error = TRUE, { mold(~ fac_1 %>% paste0(), example_train, blueprint = blueprint_no_indicators) }) expect_snapshot(error = TRUE, { mold(~ paste0(fac_1 + fac_1), example_train, blueprint = blueprint_no_indicators) }) expect_snapshot(error = TRUE, { mold(~ (fac_1) & num_1, example_train, blueprint = blueprint_no_indicators) }) expect_snapshot(error = TRUE, { mold(~ (fac_1 & num_1), example_train, blueprint = blueprint_no_indicators) }) example_train2 <- example_train example_train2$fac_12 <- example_train2$fac_1 expect_snapshot(error = TRUE, { mold(~ paste0(fac_1) + paste0(fac_12), example_train2, blueprint = blueprint_no_indicators) }) }) test_that("`indicators = 'none'` doesn't error for allowed inline functions", { df <- tibble(y = 1:2, x = factor(c("a", "b")), x2 = c(2, 3)) blueprint_no_indicators <- default_formula_blueprint(indicators = "none") out <- mold(y ~ (x), df, blueprint = blueprint_no_indicators) expect_identical(out$predictors$x, df$x) out <- mold(y ~ (x) + x, df, blueprint = blueprint_no_indicators) expect_identical(out$predictors$x, df$x) out <- mold(y ~ (x) - x2, df, blueprint = blueprint_no_indicators) expect_identical(out$predictors, df["x"]) out <- mold(y ~ 1 + x2 + x, df, blueprint = blueprint_no_indicators) expect_identical(out$predictors$x, df$x) out <- mold(y ~ 1 - x2 + (x), df, blueprint = blueprint_no_indicators) expect_identical(out$predictors$x, df$x) }) test_that("`indicators = 'none'` doesn't error for names with spaces in them (#217)", { df <- vctrs::data_frame(y = 1:2, `foo bar` = factor(c("a", "b"))) blueprint_no_indicators <- default_formula_blueprint(indicators = "none") out <- mold(y ~ `foo bar`, df, blueprint = blueprint_no_indicators) expect_identical(out$predictors[["foo bar"]], df[["foo bar"]]) }) test_that("`indicators = 'none'` doesn't error if a non-factor name regex-matches a factor name (#182)", { df <- vctrs::data_frame(y = 1:2, x = factor(c("a", "b")), x2 = c(2, 3)) blueprint_no_indicators <- default_formula_blueprint(indicators = "none") out <- mold(y ~ x + x2, df, blueprint = blueprint_no_indicators) expect_identical(out$predictors$x, df$x) expect_identical(out$predictors$x2, df$x2) }) test_that("`indicators = 'none'` doesn't error if an inline function regex-matches a factor name (#182)", { df <- vctrs::data_frame(y = 1:2, identity = factor(c("a", "b")), x2 = c(2, 3)) blueprint_no_indicators <- default_formula_blueprint(indicators = "none") out <- mold(y ~ identity + identity(x2), df, blueprint = blueprint_no_indicators) expect_identical(out$predictors$`identity(x2)`, df$x2) expect_identical(out$predictors$identity, df$identity) }) test_that("`indicators = 'none'` works fine in strange formulas", { x <- mold( ~NULL, example_train, blueprint = default_formula_blueprint(indicators = "none", intercept = TRUE) ) expect_equal( colnames(x$predictors), "(Intercept)" ) }) test_that("formula intercepts can be added", { x1 <- mold( fac_1 ~ num_1, example_train, blueprint = default_formula_blueprint(intercept = TRUE) ) x2 <- mold( fac_1 ~ num_1, example_train, blueprint = default_formula_blueprint(intercept = TRUE, composition = "dgCMatrix") ) expect_true("(Intercept)" %in% colnames(x1$predictors)) expect_true("(Intercept)" %in% colnames(x2$predictors)) expect_equal(attr(x1$blueprint$terms$predictors, "intercept"), 1) expect_equal(attr(x2$blueprint$terms$predictors, "intercept"), 1) # Don't want intercept in original predictors expect_false("(Intercept)" %in% colnames(x1$blueprint$ptypes$predictors)) expect_false("(Intercept)" %in% colnames(x2$blueprint$ptypes$predictors)) }) test_that("can mold formulas with special terms", { bp <- default_formula_blueprint(composition = "dgCMatrix") x1 <- mold(fac_1 ~ num_1:num_2 + I(num_1^2), example_train) x2 <- mold(fac_1 ~ num_1:num_2 + I(num_1^2), example_train, blueprint = bp) y1 <- mold(fac_1 ~ poly(num_1, degree = 2), example_train) y2 <- mold(fac_1 ~ poly(num_1, degree = 2), example_train, blueprint = bp) expect_equal( colnames(x1$predictors), c("I(num_1^2)", "num_1:num_2") ) expect_equal( colnames(x2$predictors), c("I(num_1^2)", "num_1:num_2") ) expect_equal( colnames(x1$blueprint$ptypes$predictors), c("num_1", "num_2") ) expect_equal( colnames(x2$blueprint$ptypes$predictors), c("num_1", "num_2") ) }) test_that("formulas with non-existent columns are caught", { bp <- default_formula_blueprint(composition = "dgCMatrix") expect_error( mold(fac_1 ~ y + z, example_train), "predictors were not found in `data`: 'y', 'z'" ) expect_error( mold(fac_1 ~ y + z, example_train, blueprint = bp), "predictors were not found in `data`: 'y', 'z'" ) expect_error( mold(y + z ~ fac_1, example_train), "outcomes were not found in `data`: 'y', 'z'" ) expect_error( mold(y + z ~ fac_1, example_train, blueprint = bp), "outcomes were not found in `data`: 'y', 'z'" ) }) test_that("global environment variables cannot be used", { expect_error( { y <- 1 mold(fac_1 ~ y, example_train) }, "predictors were not found in `data`: 'y'" ) }) test_that("cannot manually remove intercept in the formula itself", { bp <- default_formula_blueprint(composition = "dgCMatrix") expect_error( mold(fac_1 ~ y + 0, example_train), "`formula` must not contain" ) expect_error( mold(fac_1 ~ y + 0, example_train, blueprint = bp), "`formula` must not contain" ) expect_error( mold(fac_1 ~ 0 + y, example_train), "`formula` must not contain" ) expect_error( mold(fac_1 ~ y - 1, example_train), "`formula` must not contain" ) }) test_that("RHS with _only_ intercept related terms are caught", { bp <- default_formula_blueprint(composition = "dgCMatrix") expect_snapshot(error = TRUE, { mold(~0, example_train) }) expect_snapshot(error = TRUE, { mold(~0, example_train, blueprint = bp) }) expect_snapshot(error = TRUE, { mold(~1, example_train) }) expect_snapshot(error = TRUE, { mold(~ -1, example_train) }) }) test_that("`NULL` can be used to represent empty RHS formulas", { bp <- default_formula_blueprint(composition = "dgCMatrix") expect_snapshot(error = TRUE, { mold(~0, example_train) }) expect_snapshot(error = TRUE, { mold(~0, example_train, blueprint = bp) }) expect_error( x1 <- mold(~NULL, example_train), NA ) expect_error( x2 <- mold(~NULL, example_train, blueprint = bp), NA ) expect_equal(nrow(x1$predictors), 12) expect_equal(nrow(x1$outcomes), 12) expect_equal(nrow(x2$predictors), 12) expect_equal(nrow(x2$outcomes), 12) expect_error( y <- mold(~NULL, example_train, blueprint = default_formula_blueprint(intercept = TRUE)), NA ) expect_equal(colnames(y$predictors), "(Intercept)") }) test_that("intercepts can still be added when not using indicators (i.e. model.matrix())", { x <- mold( num_2 ~ fac_1, example_train, blueprint = default_formula_blueprint(intercept = TRUE, indicators = "none") ) expect_true( "(Intercept)" %in% colnames(x$predictors) ) expect_s3_class( x$predictors$fac_1, "factor" ) }) test_that("`data` is validated", { bp <- default_formula_blueprint(composition = "dgCMatrix") expect_error( mold(fac_1 ~ num_2, 1), "`data` must be a data.frame or a matrix" ) expect_error( mold(fac_1 ~ num_2, 1, blueprint = bp), "`data` must be a data.frame or a matrix" ) }) test_that("full interaction syntax is supported", { expect_equal( mold(~ fac_1 * num_2, example_train)$predictors, mold(~ fac_1 + num_2 + fac_1:num_2, example_train)$predictors ) expect_equal( mold(~ fac_1 * num_2 - fac_1:num_2, example_train)$predictors, mold(~ fac_1 + num_2, example_train)$predictors ) expect_equal( mold(~ (num_2 + num_1 + num_3)^2, example_train)$predictors, mold( ~ num_2 + num_1 + num_3 + num_2:num_1 + num_2:num_3 + num_1:num_3, example_train )$predictors ) expect_equal( mold(~ num_2 + num_1 %in% num_2, example_train)$predictors, mold(~ num_2 + num_2:num_1, example_train)$predictors ) }) test_that("`indicators = 'none'` runs numeric interactions", { x <- mold(~ num_1:num_2, example_train, blueprint = default_formula_blueprint(indicators = "none") ) expect_equal( colnames(x$predictors), "num_1:num_2" ) }) test_that("LHS of the formula cannot contain interactions", { expect_snapshot(error = TRUE, { mold(num_1:num_2 ~ num_2, example_train) }) expect_snapshot(error = TRUE, { mold(num_1 * num_2 ~ num_2, example_train) }) expect_snapshot(error = TRUE, { mold(num_1 %in% num_2 ~ num_2, example_train) }) expect_snapshot(error = TRUE, { mold((num_1 + num_2)^2 ~ num_2, example_train) }) expect_snapshot(error = TRUE, { mold(num_1:num_2 + fac_1:num_1 ~ num_2, example_train) }) expect_snapshot(error = TRUE, { mold(num_1 / num_2 ~ num_2, example_train) }) }) test_that("LHS of the formula won't misinterpret `::` as an interaction (#174)", { out <- mold(base::cbind(num_1, num_2) ~ num_3, example_train) expect_identical(ncol(out$outcomes), 2L) }) test_that("original predictor and outcome classes are recorded", { bp <- default_formula_blueprint(composition = "dgCMatrix") x1 <- mold(log(num_1) ~ log(num_2), example_train) x2 <- mold(log(num_1) ~ log(num_2), example_train, blueprint = bp) expect_equal( get_data_classes(x1$blueprint$ptypes$predictors), list(num_2 = "numeric") ) expect_equal( get_data_classes(x2$blueprint$ptypes$predictors), list(num_2 = "numeric") ) expect_equal( get_data_classes(x1$blueprint$ptypes$outcomes), list(num_1 = "integer") ) expect_equal( get_data_classes(x2$blueprint$ptypes$outcomes), list(num_1 = "integer") ) }) test_that("`.` notation works as expected", { bp <- default_formula_blueprint(composition = "dgCMatrix") x1 <- mold(fac_1 ~ ., example_train) x2 <- mold(fac_1 ~ ., example_train, blueprint = bp) # no fac_1 columns in predictors expect_equal( colnames(x1$blueprint$ptypes$predictors), c("num_1", "num_2", "num_3", "fac_2") ) expect_equal( colnames(x2$blueprint$ptypes$predictors), c("num_1", "num_2", "num_3", "fac_2") ) # fac_1 is the outcome expect_equal( colnames(x1$blueprint$ptypes$outcomes), "fac_1" ) expect_equal( colnames(x2$blueprint$ptypes$outcomes), "fac_1" ) }) # `expand_formula_dot_notation()` does not expand LHS dots, and we check # for them in `get_all_outcomes()`. That calls `all.vars()`, which returns # the `"."` as a variable. test_that("`.` notation fails on the LHS", { bp <- default_formula_blueprint(composition = "dgCMatrix") expect_error( mold(. ~ fac_1, example_train), "The left hand side of the formula cannot contain `.`" ) expect_error( mold(. ~ fac_1, example_train, blueprint = bp), "The left hand side of the formula cannot contain `.`" ) }) test_that("`.` notation with variable as predictor and outcome", { bp <- default_formula_blueprint(composition = "dgCMatrix") x1 <- mold(num_2 ~ . + num_2, example_train) x2 <- mold(num_2 ~ . + num_2, example_train) # num_2 IS a predictor expect_true( "num_2" %in% colnames(x1$blueprint$ptypes$predictors) ) expect_true( "num_2" %in% colnames(x2$blueprint$ptypes$predictors) ) # num_2 IS the outcome expect_equal( colnames(x1$blueprint$ptypes$outcomes), "num_2" ) expect_equal( colnames(x2$blueprint$ptypes$outcomes), "num_2" ) y1 <- mold(num_2 ~ . + log(num_2), example_train) y2 <- mold(num_2 ~ . + log(num_2), example_train, blueprint = bp) # num_2 IS a predictor expect_true( "num_2" %in% colnames(y1$blueprint$ptypes$predictors) ) expect_true( "num_2" %in% colnames(y2$blueprint$ptypes$predictors) ) # num_2 IS the outcome expect_equal( colnames(y1$blueprint$ptypes$outcomes), "num_2" ) expect_equal( colnames(y2$blueprint$ptypes$outcomes), "num_2" ) }) test_that("`.` notation with no outcome works fine", { bp <- default_formula_blueprint(composition = "dgCMatrix") # Uses all columns of example_train x1 <- mold(~., example_train) x2 <- mold(~., example_train, blueprint = bp) expect_equal( ncol(x1$predictors), 7 ) expect_equal( ncol(x2$predictors), 7 ) expect_equal( colnames(x1$blueprint$ptypes$predictors), c("num_1", "num_2", "num_3", "fac_1", "fac_2") ) expect_equal( colnames(x2$blueprint$ptypes$predictors), c("num_1", "num_2", "num_3", "fac_1", "fac_2") ) }) test_that("`-var` still registers var as a predictor", { # This is expected, and is the same as base R x <- mold(num_2 ~ . - num_1, example_train) # num_1 IS a predictor expect_true( "num_1" %in% colnames(x$blueprint$ptypes$predictors) ) }) test_that("Missing y value returns a 0 column tibble for `outcomes`", { bp <- default_formula_blueprint(composition = "dgCMatrix") x1 <- mold(~num_2, example_train) x2 <- mold(NULL ~ num_2, example_train) x3 <- mold(~num_2, example_train, blueprint = bp) x4 <- mold(NULL ~ num_2, example_train, blueprint = bp) expect_equal(nrow(x1$outcomes), 12) expect_equal(ncol(x1$outcomes), 0) expect_equal(x1$outcomes, x2$outcomes) expect_equal(x1$outcomes, x3$outcomes) expect_equal(x1$outcomes, x4$outcomes) }) test_that("Missing y value returns a 0 column / 0 row tibble for `ptype`", { bp <- default_formula_blueprint(composition = "dgCMatrix") x1 <- mold(~num_2, example_train) x2 <- mold(~num_2, example_train, blueprint = bp) expect_equal(x1$blueprint$ptypes$outcomes, tibble()) expect_equal(x2$blueprint$ptypes$outcomes, tibble()) }) test_that("Missing y value still has outcome `terms` present", { x <- mold(~num_2, example_train) expect_equal( f_rhs(x$blueprint$terms$outcomes), expr(NULL + 0) ) }) test_that("`blueprint` is validated", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { mold(~x, df, blueprint = 1) }) }) # ------------------------------------------------------------------------------ # Character predictors test_that("character predictors are treated as factors when `indicators` is not 'none'", { df <- data.frame( y = 1:2, x = c("a", "b"), z = c("c", "d"), stringsAsFactors = FALSE ) bp1 <- default_formula_blueprint(indicators = "traditional") bp2 <- default_formula_blueprint(indicators = "one_hot") bp3 <- default_formula_blueprint(indicators = "traditional", composition = "matrix") bp4 <- default_formula_blueprint(indicators = "one_hot", composition = "matrix") x1 <- mold(y ~ x + z, df, blueprint = bp1) x2 <- mold(y ~ x + z, df, blueprint = bp2) x3 <- mold(y ~ x + z, df, blueprint = bp3) x4 <- mold(y ~ x + z, df, blueprint = bp4) expect_identical( colnames(x1$predictors), c("xa", "xb", "zd") ) expect_identical( colnames(x3$predictors), c("xa", "xb", "zd") ) expect_identical( colnames(x2$predictors), c("xa", "xb", "zc", "zd") ) expect_identical( colnames(x4$predictors), c("xa", "xb", "zc", "zd") ) }) test_that("character predictors are left as characters when `indicators` is 'none'", { df <- data.frame( y = 1:2, x = c("a", "b"), z = c("c", "d"), stringsAsFactors = FALSE ) bp <- default_formula_blueprint(indicators = "none") x <- mold(y ~ x + z, df, blueprint = bp) expect_identical( colnames(x$predictors), c("x", "z") ) expect_true(is.character(x$predictors$x)) expect_true(is.character(x$predictors$z)) expect_true(is.character(x$blueprint$ptypes$predictors$x)) expect_true(is.character(x$blueprint$ptypes$predictors$z)) }) test_that("character vectors with `indicators = traditional/one_hot` store levels in `levels` (#213)", { df <- tibble(x = c("a", "b", "c"), y = factor(c("d", "e", "e")), z = c("g", "f", "g")) bp <- default_formula_blueprint(indicators = "traditional") x <- mold(~x + y + z, df, blueprint = bp) # Only from character columns, and the levels get sorted # (like in base R's `model.matrix()` and `prep(strings_as_factors = TRUE)`) expect_identical( x$blueprint$levels, list( x = c("a", "b", "c"), z = c("f", "g") ) ) # We leave the `ptype` untouched, mirroring the original data expect_identical(x$blueprint$ptypes$predictors$x, character()) expect_identical(x$blueprint$ptypes$predictors$y, vec_ptype(df$y)) expect_identical(x$blueprint$ptypes$predictors$z, character()) bp <- default_formula_blueprint(indicators = "one_hot") x <- mold(~x + y + z, df, blueprint = bp) # Only from character columns, and the levels get sorted # (like in base R's `model.matrix()` and `prep(strings_as_factors = TRUE)`) expect_identical( x$blueprint$levels, list( x = c("a", "b", "c"), z = c("f", "g") ) ) }) test_that("character vectors with `indicators = none` don't use `levels` (#213)", { df <- tibble(x = c("a", "b", "c"), y = factor(c("d", "e", "e")), z = c("g", "f", "g")) bp <- default_formula_blueprint(indicators = "none") x <- mold(~x + y + z, df, blueprint = bp) expect_identical(x$blueprint$levels, list()) expect_identical(x$blueprint$ptypes$predictors$x, character()) expect_identical(x$blueprint$ptypes$predictors$y, vec_ptype(df$y)) expect_identical(x$blueprint$ptypes$predictors$z, character()) }) test_that("character vectors with `indicators = none` works with constant columns (#213)", { df <- tibble(x = "a", y = factor("d"), z = "g") bp <- default_formula_blueprint(indicators = "none") x <- mold(~x + y + z, df, blueprint = bp) expect_identical(x$blueprint$ptypes$predictors$x, character()) expect_identical(x$blueprint$ptypes$predictors$y, vec_ptype(df$y)) expect_identical(x$blueprint$ptypes$predictors$z, character()) expect_identical(x$predictors$x, df$x) expect_identical(x$predictors$y, df$y) expect_identical(x$predictors$z, df$z) }) # ------------------------------------------------------------------------------ # Factor encodings test_that("traditional encoding and no intercept", { df <- data.frame( x = 1:12, y = factor(rep(letters[1:3], each = 4)), z = factor(rep(LETTERS[1:2], 6)) ) bp1 <- default_formula_blueprint( intercept = FALSE, indicators = "traditional" ) bp2 <- default_formula_blueprint( intercept = FALSE, indicators = "traditional", composition = "matrix" ) x1 <- mold(x ~ y + z, df, blueprint = bp1) x2 <- mold(x ~ y + z, df, blueprint = bp2) expect_identical( colnames(x1$predictors), c("ya", "yb", "yc", "zB") ) expect_identical( colnames(x2$predictors), c("ya", "yb", "yc", "zB") ) expect_false(x1$blueprint$intercept) expect_false(x2$blueprint$intercept) }) test_that("traditional encoding and intercept", { df <- data.frame( x = 1:12, y = factor(rep(letters[1:3], each = 4)), z = factor(rep(LETTERS[1:2], 6)) ) bp1 <- default_formula_blueprint( intercept = TRUE, indicators = "traditional" ) bp2 <- default_formula_blueprint( intercept = TRUE, indicators = "traditional", composition = "matrix" ) x1 <- mold(x ~ y + z, df, blueprint = bp1) x2 <- mold(x ~ y + z, df, blueprint = bp2) expect_identical( colnames(x1$predictors), c("(Intercept)", "yb", "yc", "zB") ) expect_identical( colnames(x2$predictors), c("(Intercept)", "yb", "yc", "zB") ) expect_true(x1$blueprint$intercept) expect_true(x2$blueprint$intercept) }) test_that("one-hot encoding and no intercept", { df <- data.frame( x = 1:12, y = factor(rep(letters[1:3], each = 4)), z = factor(rep(LETTERS[1:2], 6)) ) bp1 <- default_formula_blueprint( intercept = FALSE, indicators = "one_hot" ) bp2 <- default_formula_blueprint( intercept = FALSE, indicators = "one_hot", composition = "matrix" ) x1 <- mold(x ~ y + z, df, blueprint = bp1) x2 <- mold(x ~ y + z, df, blueprint = bp2) expect_identical( colnames(x1$predictors), c("ya", "yb", "yc", "zA", "zB") ) expect_identical( colnames(x2$predictors), c("ya", "yb", "yc", "zA", "zB") ) expect_false(x1$blueprint$intercept) expect_false(x2$blueprint$intercept) }) test_that("one-hot encoding and intercept", { df <- data.frame( x = 1:12, y = factor(rep(letters[1:3], each = 4)), z = factor(rep(LETTERS[1:2], 6)) ) bp1 <- default_formula_blueprint( intercept = TRUE, indicators = "one_hot" ) bp2 <- default_formula_blueprint( intercept = TRUE, indicators = "one_hot", composition = "matrix" ) x1 <- mold(x ~ y + z, df, blueprint = bp1) x2 <- mold(x ~ y + z, df, blueprint = bp2) expect_identical( colnames(x1$predictors), c("(Intercept)", "ya", "yb", "yc", "zA", "zB") ) expect_identical( colnames(x2$predictors), c("(Intercept)", "ya", "yb", "yc", "zA", "zB") ) expect_true(x1$blueprint$intercept) expect_true(x2$blueprint$intercept) })