library(dplyr)
library(testthat)

data(iris)
iris <- iris[iris$Species != "setosa", ]  # Binary classification
iris$Species <- factor(iris$Species)


test_that("'label' is not available in the data", {
  expect_error({
    # Train models with Bayesian optimization
    model <- fastml(
      data = iris,
      label = "Unknown",
      algorithms = c("rand_forest")
    )
  })
})


test_that("model fails if reponse variable is not of supported type", {
  tmp <- iris %>%
    mutate(
      Date = as.Date("2024-12-19")
    )

  expect_error({
    # Train models with Bayesian optimization
    model <- fastml(
      data = iris,
      label = "Date",
      algorithms = c("rand_forest")
    )
  })
})


test_that("stop if requested metric is not allowed.", {
  expect_error({
    # Train models with Bayesian optimization
    model <- fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      metric = "unknown"
    )
  })

  expect_error({
    # Example 2: Using the mtcars dataset for regression
    # Train models
    model <- fastml(
      data = mtcars,
      label = "mpg",
      algorithms = c("rand_forest"),
      metric = "unknown"
    )
  })
})

test_that("check for supported algorithms", {
  expect_warning({
    # Train models with Bayesian optimization
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest", "unknown")
    )
  })

  expect_error({
    # Train models with Bayesian optimization
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("unknown")
    )
  })
})

test_that("variables successfuly excluded", {
  expect_error({
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      exclude = "Species"
    )
  })

  expect_warning({
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      exclude = c("Sepal.Length", "unknown")
    )
  })
})

test_that("checks for impute_method", {
  expect_warning(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      impute_method = "medianImpute"
    ),
    "Missing values in numeric predictors are being imputed using the median."
  )

  expect_warning(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      impute_method = "knnImpute"
    ),
    "Missing values are being imputed using KNN"
  )

  expect_warning(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      impute_method = "bagImpute"
    ),
    "Missing values are being imputed using bagging"
  )

  expect_warning(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      impute_method = "remove"
    ),
    "Rows with missing values in predictors are being removed."
  )

  expect_error(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      impute_method = "unknown"
    ),
    "Invalid impute_method specified."
  )
})

test_that("stop if recipe is not correctly specified.", {
  expect_error({
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      recipe = "unknown"
    )
  })
})

test_that("evaluate_models works with a single workflow", {
  rec <- recipes::recipe(Species ~ ., data = iris)
  spec <- parsnip::logistic_reg() %>% parsnip::set_engine("glm")
  wf <- workflows::workflow() %>%
    workflows::add_model(spec) %>%
    workflows::add_recipe(rec)
  fitted_wf <- parsnip::fit(wf, data = iris)
  models <- list(log_reg = fitted_wf)
  eval_res <- evaluate_models(models, iris, iris,
                              label = "Species", task = "classification",
                              metric = "accuracy", event_class = "second")
  expect_true("log_reg" %in% names(eval_res$performance))
})

test_that("process_model works without global variables", {
  rec <- recipes::recipe(Species ~ ., data = iris)
  spec <- parsnip::logistic_reg() %>% parsnip::set_engine("glm")
  wf <- workflows::workflow() %>%
    workflows::add_model(spec) %>%
    workflows::add_recipe(rec)
  fitted_wf <- parsnip::fit(wf, data = iris)

  res <- process_model(fitted_wf, model_id = "log_reg", task = "classification",
                       test_data = iris, label = "Species",
                       event_class = "second", engine = "glm",
                       train_data = iris, metric = "accuracy")
  expect_s3_class(res$performance, "tbl_df")
  expect_equal(nrow(res$predictions), nrow(iris))
})

test_that("regression model successful.", {
  res <- fastml(
    data = iris[, -5],
    label = "Sepal.Length",
    algorithms = c("linear_reg")
  )
  expect_s3_class(res, "fastml")
})


test_that("multicore tasks successful.", {
  res <- fastml(
    data = iris[, -5],
    label = "Sepal.Length",
    algorithms = c("linear_reg"),
    n_cores = 2
  )
  expect_s3_class(res, "fastml")
})

test_that("stop if unsupported metric is selected.", {
  expect_error({
    fastml(
      data = iris[,-5],
      label = "Sepal.Length",
      algorithms = c("linear_reg"),
      n_cores = 2,
      metric = "unkown"
    )
  })

  expect_no_error({
    fastml(
      data = iris[,-5],
      label = "Sepal.Length",
      algorithms = c("linear_reg"),
      n_cores = 2,
      metric = "rmse"
    )
  })
})

test_that("invalid tuning_strategy triggers error", {
  expect_error(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      tuning_strategy = "invalid"
    ),
    "should be one of"
  )
})

test_that("grid tuning executes successfully", {
  res <- fastml(
    data = iris,
    label = "Species",
    algorithms = c("rand_forest"),
    use_default_tuning = TRUE,
    tuning_strategy = "grid",
    resampling_method = "cv",
    folds = 5
  )
  expect_s3_class(res, "fastml")
  expect_true(length(res$models) > 0)
})

test_that("Bayesian tuning executes successfully", {
  res <- fastml(
    data = iris,
    label = "Species",
    algorithms = c("rand_forest"),
    use_default_tuning = TRUE,
    tuning_strategy = "bayes",
    tuning_iterations = 2,
    resampling_method = "cv",
    folds = 5
  )
  expect_s3_class(res, "fastml")
  expect_true(length(res$models) > 0)
})

test_that("adaptive tuning executes successfully", {
  res <- fastml(
    data = iris,
    label = "Species",
    algorithms = c("rand_forest"),
    use_default_tuning = TRUE,
    tuning_strategy = "grid",
    adaptive = TRUE,
    resampling_method = "cv",
    folds = 5
  )
  expect_s3_class(res, "fastml")
})

test_that("early_stopping does not warn with grid tuning", {
  expect_warning(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      use_default_tuning = TRUE,
      tuning_strategy = "grid",
      early_stopping = TRUE,
      resampling_method = "cv",
      folds = 5
    ),
    regexp = NA
  )
})

test_that("early stopping with bayesian tuning works", {
  res <- fastml(
    data = iris,
    label = "Species",
    algorithms = c("rand_forest"),
    use_default_tuning = TRUE,
    tuning_strategy = "bayes",
    tuning_iterations = 2,
    early_stopping = TRUE,
    resampling_method = "cv",
    folds = 5
  )
  expect_s3_class(res, "fastml")
})

test_that("invalid tuning_iterations triggers error", {
  expect_error(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      tuning_strategy = "bayes",
      tuning_iterations = 0,
      resampling_method = "cv",
      folds = 5
    ),
    "tuning_iterations"
  )
})

test_that("tuning_iterations ignored for non-bayesian strategies", {
  expect_no_error(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      use_default_tuning = TRUE,
      tuning_strategy = "grid",
      tuning_iterations = 0,
      resampling_method = "cv",
      folds = 5
    )
  )
  expect_no_error(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      tuning_strategy = "none",
      tuning_iterations = -1,
      resampling_method = "none"
    )
  )
})

test_that("adaptive ignored with bayesian tuning", {
  expect_warning(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      use_default_tuning = TRUE,
      tuning_strategy = "bayes",
      adaptive = TRUE,
      tuning_iterations = 1,
      resampling_method = "cv",
      folds = 5
    ),
    "adaptive"
  )
})

test_that("warning when tune_params ignored with no tuning", {
  tune <- list(rand_forest = list(ranger = list(mtry = c(1, 2))))
  expect_warning(
    fastml(
      data = iris,
      label = "Species",
      algorithms = c("rand_forest"),
      tune_params = tune,
      tuning_strategy = "none",
      use_default_tuning = TRUE,
      resampling_method = "none"
    ),
    "tune_params"
  )
})