test_that("correct matches for test6 dataset", {
  skip_if_no_default_backbone()
  df <- get_testset(mutation = 0) |>
    wcvp_matching(prefilter_genus = TRUE)

  expect_false(all(df$direct_match))

  df <- get_testset(mutation = 1) |>
    wcvp_matching(prefilter_genus = TRUE)
  expect_false(any(df$direct_match))
  expect_true(all(df$genus_match))
  expect_false(all(df$suffix_match_species_within_genus | df$fuzzy_match_species_within_genus))
})

#test_that("test random characters", {
#  set.seed(111)
#
#  random <- tibble::tibble(Genus = sapply(vector("list", 10), FUN = function(x) paste(sample(letters, size = 6, replace = TRUE), collapse = '')),
#                        Species = sapply(vector("list", 10), FUN = function(x) paste(sample(letters, size = 8, replace = TRUE), collapse = '')))
#  random
#  matched_random <- random |>
#    dplyr::mutate(Genus = stringr::str_to_title(Genus)) |>
#    wcvp_matching(prefilter_genus = TRUE)
#
#  expect_false(any(matched_random$matched) | any(matched_random$direct_match) | any(matched_random$genus_match) | any(matched_random$fuzzy_match_genus))
#  expect_true(all(is.na(matched_random)[,c('Matched.Genus', 'Matched.Species', 'direct_match_species_within_genus', 'suffix_match_species_within_genus', 'fuzzy_match_species_within_genus', 'fuzzy_genus_dist', 'fuzzy_species_dist')]))
#})

# test_that("test empty dataframe Genus, Species", {
#   res <- get_db() |>
#     dplyr::sample_n(0) |>
#     dplyr::select(Genus, Species) |>
#     wcvp_matching(prefilter_genus = TRUE)
#   res
#   expect_true(nrow(res) == 0)
#   expect_true(all(c("Matched.Genus", 'Matched.Species') %in% colnames(res)))
#   res <- get_db() |>
#     dplyr::sample_n(0) |>
#     dplyr::select(Genus, Species) |>
#     dplyr::rename(Orig.Genus = Genus, Orig.Species = Species) |>
#     wcvp_matching(prefilter_genus = TRUE)
#   expect_true(nrow(res) == 0)
#   expect_true(all(c("Matched.Genus", 'Matched.Species') %in% colnames(res)))
# })

test_that("test inconsistent input types", {
  target_df <- tibble::tibble(
    genus = "Fagus",
    species = "sylvatica",
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )
  run_match <- function(input) {
    wcvp_matching(input, prefilter_genus = TRUE, target_df = target_df)
  }

  ###
  # These cases should cause errors
  ###

  ## NA in input
  input <- tibble::tibble(Genus = c('Fagus'), Species = c(NA))
  expect_error(run_match(input))
  input <- tibble::tibble(Genus = c(NA), Species = c('sylvatica'))
  expect_error(run_match(input))
  ## duplicates
  input <- tibble::tibble(Genus = rep('Fagus', 2), Species = rep('sylvatica', 2))
  expect_error(run_match(input))
  ## Genus first letter upper case
  input <- tibble::tibble(Genus = c('fagus'), Species = c('sylvatica'))
  expect_error(run_match(input))
  ## Only one uppercase letter in Genus
  input <- tibble::tibble(Genus = c('Fagus-Pinus'), Species = c('sylvatica'))
  expect_error(run_match(input))
  input <- tibble::tibble(Genus = c('FAGUS'), Species = c('sylvatica'))
  expect_error(run_match(input))
  input <- tibble::tibble(Genus = c('FAgus'), Species = c('sylvatica'))
  expect_error(run_match(input))
  ## No uppercase letter in Species
  input <- tibble::tibble(Genus = c('Fagus'), Species = c('Sylvatica'))
  expect_error(run_match(input))
  input <- tibble::tibble(Genus = c('Fagus'), Species = c('sylvaTica'))
  expect_error(run_match(input))
  input <- tibble::tibble(Genus = c('Fagus'), Species = c('SYLVATICA'))
  expect_error(run_match(input))
  ## no hybrid characters in Genus name
  input <- tibble::tibble(Genus = c('Fagus\u00D7pinus'), Species = c('sylvatica'))
  expect_error(run_match(input))
  input <- tibble::tibble(Genus = c('\u00D7Fagus'), Species = c('sylvatica'))
  expect_error(run_match(input))
  input <- tibble::tibble(Genus = c('Fagus\u00D7'), Species = c('sylvatica'))
  expect_error(run_match(input))

  ###
  # These cases should trigger warnings
  ###
  ## trailing spaces
  input <- tibble::tibble(Genus = c('Fagus'), Species = c('sylvatica '))
  expect_warning(run_match(input))
  input <- tibble::tibble(Genus = c('Fagus '), Species = c('sylvatica'))
  expect_warning(run_match(input))
  ## leading spaces
  input <- tibble::tibble(Genus = c('Fagus'), Species = c(' sylvatica'))
  expect_warning(run_match(input))
  input <- tibble::tibble(Genus = c(' Fagus'), Species = c('sylvatica'))
  expect_warning(run_match(input))

  ###
  # These should trigger messages
  ###
  ## input is data.frame and not tibble::tibble
  input <- data.frame(Genus = c('Fagus'), Species = c('sylvatica'))
  expect_message(run_match(input))

})

test_that("check_df_format trims whitespace before consistency regex checks", {
  input <- tibble::tibble(Orig.Genus = "Fagus", Orig.Species = "sylvatica ", Rank = 2)

  expect_warning(input <- check_df_format(input))
  expect_no_error(check_df_consistency(input))
  expect_equal(input$Orig.Species[1], "sylvatica")
})

test_that("matching output always includes Input.Name", {
  target_df <- tibble::tibble(
    genus = "Fagus",
    species = "sylvatica",
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input_auto <- tibble::tibble(Genus = "Fagus",
                               Species = "sylvatica")

  out_auto <- wcvp_matching(input_auto,
                            target_df = target_df)

  expect_true("input_name" %in% names(out_auto))
  expect_equal(out_auto$input_name[1], "Fagus sylvatica")

  input_keep <- tibble::tibble(
    Genus = "Fagus",
    Species = "sylvatica",
    Input.Name = "Fagus sylvatica L."
  )
  out_keep <- wcvp_matching(input_keep, target_df = target_df)
  expect_equal(out_keep$input_name[1], "Fagus sylvatica L.")
})

test_that("matching with prefilter_genus keeps same result on small custom target", {
  target_df <- tibble::tibble(
    genus = c("Acer", "Quercus", "Abies"),
    species = c("rubrum", "robur", "alba"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input <- tibble::tibble(
    Genus = c("Acer", "Acr"),
    Species = c("rubrum", "rubrum")
  )

  out_base <- wcvp_matching(input, target_df = target_df, max_dist = 1, method = "osa")
  out_pref <- wcvp_matching(input, target_df = target_df, prefilter_genus = TRUE, max_dist = 1, method = "osa")

  expect_equal(out_pref$matched, out_base$matched)
  expect_equal(out_pref$matched_genus, out_base$matched_genus)
  expect_equal(out_pref$matched_species, out_base$matched_species)
})

test_that("matching can expand duplicate inputs with input_index when allow_duplicates is TRUE", {
  target_df <- tibble::tibble(
    genus = c("Opuntia", "Opuntia"),
    species = c("corotilla", "yanganucensis"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input <- tibble::tibble(
    Genus = c("Opuntia", "Opuntia", "Opuntia"),
    Species = c("yanganucensis", "yanganucensis", "corotilla"),
    Input.Name = c("Opuntia yanganucensis", "Opuntia yanganucensis", "Opuntia corotilla")
  )

  expect_no_error(
    out <- wcvp_matching(input, target_df = target_df, allow_duplicates = TRUE)
  )

  expect_true("input_index" %in% names(out))
  expect_equal(nrow(out), nrow(input))
  expect_equal(out$input_index, c(1, 2, 3))
  expect_true(all(out$matched))
})

test_that("matching duplicate error suggests allow_duplicates option", {
  target_df <- tibble::tibble(
    genus = "Opuntia",
    species = "yanganucensis",
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input <- tibble::tibble(
    Genus = c("Opuntia", "Opuntia"),
    Species = c("yanganucensis", "yanganucensis")
  )

  expect_error(
    wcvp_matching(input, target_df = target_df, allow_duplicates = FALSE),
    "allow_duplicates = TRUE"
  )
})

test_that("wcvp_matching propagates ambiguous genus fuzzy ties", {
  target_df <- tibble::tibble(
    genus = c("Aaa", "Aab"),
    species = c("beta", "beta"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input <- tibble::tibble(
    Genus = "Aac",
    Species = "beta"
  )

  expect_warning(
    out <- wcvp_matching(input, target_df = target_df, max_dist = 1, method = "osa"),
    "Multiple fuzzy matches"
  )

  amb <- attr(out, "ambiguous_genus")
  expect_false(is.null(amb))
  expect_true(nrow(amb) >= 2)
})

test_that("wcvp_matching propagates ambiguous species fuzzy ties", {
  target_df <- tibble::tibble(
    genus = c("Abc", "Abc"),
    species = c("alba", "alga"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input <- tibble::tibble(
    Genus = "Abc",
    Species = "alca"
  )

  expect_warning(
    out <- wcvp_matching(input, target_df = target_df, max_dist = 1, method = "osa"),
    "Multiple fuzzy matches"
  )

  amb <- attr(out, "ambiguous_species")
  expect_false(is.null(amb))
  expect_true(nrow(amb) >= 2)
})

test_that("matching returns matched and accepted author columns when available", {
  target_df <- tibble::tibble(
    genus = c("Veronica", "Veronica", "Veronica"),
    species = c("vulcanica", "vulcanica", "spathulata"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_,
    plant_name_id = c(10, 11, 200),
    taxon_name = c("Veronica vulcanica", "Veronica vulcanica", "Veronica spathulata"),
    taxon_authors = c("A.Author", "B.Author", "C.Author"),
    taxon_status = c("Synonym", "Synonym", "Accepted"),
    accepted_plant_name_id = c(200, 200, 200)
  )

  input <- tibble::tibble(
    Orig.Genus = "Veronica",
    Orig.Species = "vulcanica",
    Rank = 2
  )

  out <- wcvp_matching(input, target_df = target_df)

  expect_true(all(c("matched_taxon_authors", "accepted_taxon_authors") %in% names(out)))
  expect_true(out$matched_taxon_authors[1] %in% c("A.Author", "B.Author"))
  expect_equal(out$accepted_taxon_authors[1], "C.Author")
})

test_that("matching can standardize output names to snake_case", {
  target_df <- tibble::tibble(
    genus = "Fagus",
    species = "sylvatica",
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input <- tibble::tibble(
    Genus = "Fagus",
    Species = "sylvatica"
  )

  out <- wcvp_matching(input, target_df = target_df, output_name_style = "snake_case")

  expect_true(all(c("orig_genus", "orig_species", "matched_genus", "matched_species", "input_name") %in% names(out)))
  expect_false(any(c("Orig.Genus", "Orig.Species", "Matched.Genus", "Input.Name") %in% names(out)))
})

test_that("original combination statuses are normalized to a public accepted-or-synonym status", {
  target_df <- tibble::tibble(
    genus = c("Arvicola", "Microtus", "Neacomys", "Papio"),
    species = c("mexicanus", "mexicanus", "serranensis", "ursinus"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_,
    plant_name_id = c(10, 20, 30, 40),
    taxon_name = c(
      "Arvicola mexicanus",
      "Microtus mexicanus",
      "Neacomys serranensis",
      "Papio ursinus"
    ),
    taxon_status = c(
      "original_name_combination",
      "Accepted",
      "original_combination",
      "Accepted"
    ),
    accepted_plant_name_id = c(20, 20, 30, 40)
  )

  input <- tibble::tibble(
    Orig.Genus = c("Arvicola", "Neacomys"),
    Orig.Species = c("mexicanus", "serranensis"),
    Rank = 2
  )

  out <- wcvp_matching(input, target_df = target_df)

  expect_equal(out$taxon_status, c("synonym", "accepted"))
  expect_equal(out$is_accepted_name, c(FALSE, TRUE))
  expect_equal(out$accepted_taxon_name, c("Microtus mexicanus", "Neacomys serranensis"))
})

test_that("matching can add fast pairwise input-vs-matched name distance", {
  target_df <- tibble::tibble(
    genus = c("Fagus", "Quercus"),
    species = c("sylvatica", "robur"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_
  )

  input <- tibble::tibble(
    Genus = c("Fagus", "Qercus"),
    Species = c("sylvatica", "robur")
  )

  out <- wcvp_matching(
    input,
    target_df = target_df,
    add_name_distance = TRUE,
    name_distance_method = "osa"
  )

  expect_true("matched_dist" %in% names(out))
  expect_true(is.numeric(out$matched_dist))
  expect_true(all(!is.na(out$matched_dist[out$matched])))
})

test_that("matched TRUE is always coherent with matched_taxon_name", {
  target_df <- tibble::tibble(
    genus = c("Aniba", "Jaltomata"),
    species = c("heterotepala", "sagastegui"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_,
    plant_name_id = c(1, 2),
    taxon_name = c("Aniba heterotepala", "Jaltomata sagastegui"),
    taxon_status = c("Accepted", "Accepted"),
    accepted_plant_name_id = c(1, 2)
  )

  input <- classify_spnames(c(
    "Aniba heterotepala",
    "Jaltometa sagasteguii"
  ))

  out <- wcvp_matching(
    input,
    target_df = target_df,
    max_dist = 2,
    method = "osa",
    allow_duplicates = TRUE
  )

  expect_true(all(!out$matched | !is.na(out$matched_taxon_name)))
})

test_that("matching can attach per-stage timings for profiling", {
  target_df <- tibble::tibble(
    genus = c("Aniba", "Jaltomata"),
    species = c("heterotepala", "sagastegui"),
    infraspecific_rank = NA_character_,
    infraspecies = NA_character_,
    plant_name_id = c(1, 2),
    taxon_name = c("Aniba heterotepala", "Jaltomata sagastegui"),
    taxon_status = c("Accepted", "Accepted"),
    accepted_plant_name_id = c(1, 2)
  )

  input <- classify_spnames(c(
    "Aniba heterotepala",
    "Jaltometa sagasteguii"
  ))

  out <- wcvp_matching(
    input,
    target_df = target_df,
    max_dist = 2,
    method = "osa",
    allow_duplicates = TRUE,
    add_name_distance = TRUE,
    profile = TRUE
  )

  timings <- attr(out, "timings")
  expect_true(tibble::is_tibble(timings))
  expect_true(all(c("stage", "elapsed_seconds", "rows") %in% names(timings)))
  expect_true("total" %in% timings$stage)
  expect_true(all(is.finite(timings$elapsed_seconds)))
})