# Create a data frame for testing

df <- data.frame(
  time_period = c(2022, 2022, 2022),
  time_identifier = c("Calendar year", "Calendar year", "Calendar year"),
  geographic_level = c("National", "Regional", "Regional"),
  country_code = c("E92000001", "E92000001", "E92000001"),
  country_name = c("England", "England", "England"),
  region_code = c(NA, "E12000001", "E12000002"),
  region_name = c(NA, "North East", "North West"),
  mystery_count = c(42, 25, NA)
)

test_that("z_replace outputs are as expected", {
  # testing standard functionality
  expect_equal(z_replace(df), data.frame(
    time_period = c(2022, 2022, 2022),
    time_identifier = c("Calendar year", "Calendar year", "Calendar year"),
    geographic_level = c("National", "Regional", "Regional"),
    country_code = c("E92000001", "E92000001", "E92000001"),
    country_name = c("England", "England", "England"),
    region_code = c(NA, "E12000001", "E12000002"),
    region_name = c(NA, "North East", "North West"),
    mystery_count = c(42, 25, "z")
  ))

  # testing alternative replacement

  expect_equal(z_replace(df, replacement_alt = "x"), data.frame(
    time_period = c(2022, 2022, 2022),
    time_identifier = c("Calendar year", "Calendar year", "Calendar year"),
    geographic_level = c("National", "Regional", "Regional"),
    country_code = c("E92000001", "E92000001", "E92000001"),
    country_name = c("England", "England", "England"),
    region_code = c(NA, "E12000001", "E12000002"),
    region_name = c(NA, "North East", "North West"),
    mystery_count = c(42, 25, "x")
  ))


  expect_equal(z_replace(df, replacement_alt = "c"), data.frame(
    time_period = c(2022, 2022, 2022),
    time_identifier = c("Calendar year", "Calendar year", "Calendar year"),
    geographic_level = c("National", "Regional", "Regional"),
    country_code = c("E92000001", "E92000001", "E92000001"),
    country_name = c("England", "England", "England"),
    region_code = c(NA, "E12000001", "E12000002"),
    region_name = c(NA, "North East", "North West"),
    mystery_count = c(42, 25, "c")
  ))
})

# check error messages for non-empty data frames

test_that("Error messages are as expected in non-empty frames", {
  # testing error for non character strings in replacement_alt
  expect_error(
    z_replace(df, replacement_alt = 1),
    cat(
      "You provided a numeric input for replacement_alt.\n",
      "Please amend replace it with a character vector."
    )
  )

  # testing error for multiple vectors in replacement_alt
  expect_error(
    z_replace(df, replacement_alt = c("a", "z", "x")),
    cat(
      "You provided multiple values for replacement_alt.\n",
      "Please, only provide a single value."
    )
  )
})
# Create a table to text exclude_columns

df <- data.frame(
  a = c("1", "2", "3", "z"),
  b = c("1", "2", "z", "4"),
  county_name = c("county1", "county2", NA_character_, "county3"),
  country_code = c("country1", NA_character_, "country2", "country3"),
  time_period = c(2008, 2023, 2024, as.double(NA))
)

# without including county_name in exclude_columns
test_that("exclude_columns works", {
  # without including county_name in exclude_columns
  expect_equal(z_replace(df), data.frame(
    a = c("1", "2", "3", "z"),
    b = c("1", "2", "z", "4"),
    county_name = c("county1", "county2", "z", "county3"),
    country_code = c("country1", NA_character_, "country2", "country3"),
    time_period = c(2008, 2023, 2024, as.double(NA))
  ))


  # including county_name in exclude_columns
  expect_equal(z_replace(df, exclude_columns = "county_name"), data.frame(
    a = c("1", "2", "3", "z"),
    b = c("1", "2", "z", "4"),
    county_name = c("county1", "county2", NA_character_, "county3"),
    country_code = c("country1", NA_character_, "country2", "country3"),
    time_period = c(2008, 2023, 2024, as.double(NA))
  ))
})


# Checking speed of the function

# make this reproducible
set.seed(123)
# create table with randomly generated numbers
df <- data.frame(
  a = sample(1:1000, 10000, replace = TRUE),
  b = sample(1:1000, 10000, replace = TRUE),
  c = sample(1:1000, 10000, replace = TRUE),
  d = sample(1:1000, 10000, replace = TRUE),
  e = sample(1:1000, 10000, replace = TRUE),
  f = sample(1:1000, 10000, replace = TRUE),
  e = sample(1:1000, 10000, replace = TRUE),
  h = sample(1:1000, 10000, replace = TRUE),
  i = sample(1:1000, 10000, replace = TRUE),
  j = sample(1:1000, 10000, replace = TRUE),
  school_urn = sample(1:1000, 10000, replace = TRUE)
)

# putting NAs in the table
df <- df %>%
  dplyr::mutate(across(
    a:school_urn,
    ~ dplyr::if_else(. < 300, as.double(NA), .)
  ))

start_time <- Sys.time()
z_replace(df)
end_time <- Sys.time()
test_time <- difftime(end_time, start_time, units = "secs")

# calculating the time it takes

# testing that the speed is less than 0.25 second
test_that("Speed of the function", {
  expect_equal(test_time < 0.25, TRUE)
})

# Check error message for empty data frame

# create table
df <- data.frame()

test_that("Error messages are as expected", {
  expect_error(z_replace(df), "Table is empty or contains no rows.")

  expect_error(
    z_replace(df, replacement_alt = "x"),
    "Table is empty or contains no rows."
  )
})


# Check error messages for when tables contain geography
# and time columns from th ees screener but different formatting

df <- data.frame(
  GEOGRAPHIC_LEVEL = c("level1", "level2", "level3", NA_character_),
  time_period = c(2008, 2023, 2024, as.double(NA))
)

test_that("Formatting of column names are checked", {
  expect_error(
    z_replace(df),
    cat(
      "Your table has geography and/or time column(s) that are not",
      "in snake_case.\nPlease amend your column names to match the formatting",
      "to dfeR::geog_time_identifiers."
    )
  )
})