library(recipes)
library(testthat)


n <- 200

set.seed(8575)
tr_dat <- data.frame(
  v = sample(letters[1:3], size = n, replace = TRUE),
  w = sample(LETTERS[1:2], size = n, replace = TRUE),
  x = factor(rep_len(month.abb, n)),
  y = factor(rep_len(month.name[-1], n), ordered = TRUE),
  z = factor(rep_len(month.name[-1], n), ordered = TRUE, levels = month.name),
  stringsAsFactors = FALSE
)

tr_bad <- tr_dat
levels(tr_bad$x) <- c(levels(tr_bad$x), "new")

te_dat <- data.frame(
  v = letters[1:5],
  w = LETTERS[1:5],
  x = factor(month.abb[1:5]),
  y = factor(month.name[1:5], ordered = TRUE),
  z = factor(month.name[1:5], ordered = TRUE, levels = month.name),
  stringsAsFactors = FALSE
)

te_miss <- te_dat
te_miss$y[1] <- NA
te_miss$z[1] <- NA

rec <- recipe(~., data = tr_dat)

test_that("basic functionality", {
  ex_1 <- rec %>%
    step_novel(all_predictors()) %>%
    prep(tr_dat, strings_as_factors = FALSE)

  ex_1_tr <- bake(ex_1, new_data = tr_dat)
  ex_1_te <- bake(ex_1, new_data = te_dat)

  all(ex_1_te$v[!(ex_1_te$v %in% letters[1:3])] == "new")


  expect_true(all(vapply(ex_1_tr, is.factor, logical(1))))
  expect_true(all(vapply(ex_1_te, is.factor, logical(1))))

  for (i in names(ex_1_tr)) {
    expect_true(
      all.equal(
        as.character(tr_dat[[i]]),
        as.character(ex_1_tr[[i]])
      )
    )
  }
  expect_true(
    all(ex_1_te$v[!(ex_1_te$v %in% letters[1:3])] == "new")
  )
  expect_true(
    all(ex_1_te$w[!(ex_1_te$w %in% LETTERS[1:2])] == "new")
  )
  expect_true(
    all(as.character(te_dat$x) == as.character(ex_1_te$x))
  )
  expect_true(ex_1_te$y[1] == "new")
  expect_true(
    all(as.character(te_dat$z[-1]) == as.character(ex_1_te$z[-1]))
  )
  expect_true(
    all(as.character(te_dat$z) == as.character(ex_1_te$z))
  )

  expect_true(is.ordered(ex_1_te$y))
  expect_true(is.ordered(ex_1_te$z))
})

test_that("bad args", {
  expect_snapshot(error = TRUE,
    recipe(~., data = iris) %>%
      step_novel(all_predictors()) %>%
      prep(iris)
  )
  expect_snapshot(error = TRUE,
    recipe(~., data = tr_bad) %>%
      step_novel(all_predictors()) %>%
      prep(tr_bad)
  )
})

test_that("missing values", {
  ex_2 <- rec %>%
    step_novel(all_predictors()) %>%
    prep(training = tr_dat)
  ex_2_te <- bake(ex_2, new_data = te_miss)
  expect_equal(which(is.na(te_miss$y)), which(is.na(ex_2_te$y)))
  expect_equal(which(is.na(te_miss$z)), which(is.na(ex_2_te$z)))
})

# Infrastructure ---------------------------------------------------------------

test_that("bake method errors when needed non-standard role columns are missing", {
  ex_1 <- rec %>%
    step_novel(x) %>%
    update_role(x, new_role = "potato") %>%
    update_role_requirements(role = "potato", bake = FALSE)%>%
    prep(tr_dat, strings_as_factors = FALSE)

  expect_error(bake(ex_1, new_data = tr_dat[, c(-3)]),
               class = "new_data_missing_column")
})

test_that("empty printing", {
  rec <- recipe(mpg ~ ., mtcars)
  rec <- step_novel(rec)

  expect_snapshot(rec)

  rec <- prep(rec, mtcars)

  expect_snapshot(rec)
})

test_that("empty selection prep/bake is a no-op", {
  rec1 <- recipe(mpg ~ ., mtcars)
  rec2 <- step_novel(rec1)

  rec1 <- prep(rec1, mtcars)
  rec2 <- prep(rec2, mtcars)

  baked1 <- bake(rec1, mtcars)
  baked2 <- bake(rec2, mtcars)

  expect_identical(baked1, baked2)
})

test_that("empty selection tidy method works", {
  rec <- recipe(mpg ~ ., mtcars)
  rec <- step_novel(rec)

  expect <- tibble(terms = character(), value = character(), id = character())

  expect_identical(tidy(rec, number = 1), expect)

  rec <- prep(rec, mtcars)

  expect_identical(tidy(rec, number = 1), expect)
})

test_that("printing", {
  rec <- recipe(~., data = tr_dat) %>%
    step_novel(all_predictors())

  expect_snapshot(print(rec))
  expect_snapshot(prep(rec))
})