test_that("basic country standardization works", {
  # nolint start
  test_df <- tibble::tribble(
    ~entity         , ~code ,
    NA              , "USA" ,
    "united.states" , NA    ,
    "us"            , "US"
  )
  # nolint end

  result <- standardize_entity(test_df, entity, code)

  expect_equal(
    result$entity_name,
    c(
      "United States",
      "United States",
      "United States"
    )
  )
  expect_equal(
    result$entity_id,
    c("USA", "USA", "USA")
  )
})

test_that("unmatched entities are not filled from existing cols by default", {
  # nolint start
  test_df <- tibble::tribble(
    ~entity       , ~code ,
    "EU"          , NA    ,
    "NotACountry" , NA
  )
  # nolint end

  result <- standardize_entity(test_df, entity, code)

  expect_equal(
    result$entity_name,
    c(
      NA_character_,
      NA_character_
    )
  )
  expect_equal(
    result$entity_id,
    c(NA_character_, NA_character_)
  )
})

# TODO: Test that unmatched entities are filled from existing cols when fill
# mapping is provided

test_that("column order prioritizes matches from earlier columns", {
  # nolint start
  test_df <- tibble::tribble(
    ~name           , ~code ,
    "United States" , "FRA" ,
    "France"        , NA
  )
  # nolint end

  # Should prefer first column match
  result <- standardize_entity(test_df, code, name)
  expect_equal(result$entity_id, c("FRA", "FRA"))

  # Reversing column order should change the results
  result2 <- standardize_entity(test_df, name, code)
  expect_equal(result2$entity_id, c("USA", "FRA"))
})

test_that("standardization works with a single target column", {
  # nolint start
  test_df <- tibble::tribble(
    ~country        ,
    "United States" ,
    "France"        ,
    "NotACountry"
  )
  # nolint end

  result <- standardize_entity(test_df, country)

  expect_equal(result$entity_name, c("United States", "France", NA_character_))
  expect_equal(result$entity_id, c("USA", "FRA", NA_character_))
})

test_that("standardization fails with invalid output columns", {
  # nolint start
  test_df <- tibble::tribble(
    ~country        ,
    "United States"
  )
  # nolint end

  # Test single invalid column
  expect_error(
    standardize_entity(
      test_df,
      country,
      output_cols = "invalid_col"
    ),
    "Output columns"
  )

  # Test mix of valid and invalid columns
  expect_error(
    standardize_entity(
      test_df,
      country,
      output_cols = c("entity_name", "bad_col", "worse_col")
    ),
    "Output columns"
  )
})

test_that("match_entities_with_patterns performs case-insensitive matching", {
  # Create a test dataframe with different case variations
  # nolint start
  test_df <- tibble::tribble(
    ~country ,
    "FRANCE" ,
    "france" ,
    "FrAnCe" ,
    "fra"    ,
    "FRA"
  )
  # nolint end

  # Test the function directly - expect a data frame result with mapped entity
  # columns
  result <- match_entities_with_patterns(
    test_df,
    target_cols = "country",
    patterns = list_entity_patterns(),
    warn_ambiguous = FALSE
  )

  # Expected result should be a data frame with unique combinations of target
  # columns mapped to the selected output columns
  expect_s3_class(result, "data.frame")
  expect_true(
    all(
      c("country", "entity_id", "entity_name", "entity_type") %in% names(result)
    )
  )
  expect_equal(nrow(result), 5) # One row for each unique input
  expect_equal(result$entity_id, rep("FRA", 5))
  expect_equal(result$entity_name, rep("France", 5))
  expect_equal(result$entity_type, rep("economy", 5))
})

test_that("match_entities_with_patterns handles multiple target columns", {
  # nolint start
  test_df <- tibble::tribble(
    ~name           , ~code     , ~abbr ,
    "United States" , NA        , "US"  ,
    NA              , "FRA"     , NA    ,
    "Unknown"       , "Unknown" , "UNK"
  )
  # nolint end

  # Should try each column in sequence and return a data frame
  result <- match_entities_with_patterns(
    test_df,
    target_cols = c("name", "code", "abbr"),
    patterns = list_entity_patterns(),
    warn_ambiguous = FALSE
  )

  # Expected result should be a data frame with all target columns and selected
  # output columns
  expect_s3_class(result, "data.frame")
  expect_true(
    all(
      c("name", "code", "abbr", "entity_id", "entity_name") %in% names(result)
    )
  )
  expect_equal(nrow(result), 3)

  # Check entity_id mapping
  expect_equal(result$entity_id, c("USA", "FRA", NA_character_))

  # Check entity_name mapping
  expect_equal(result$entity_name, c("United States", "France", NA_character_))

  # Changing column order should affect results for the first row
  result2 <- match_entities_with_patterns(
    test_df,
    target_cols = c("abbr", "name", "code"),
    patterns = list_entity_patterns(),
    warn_ambiguous = FALSE
  )

  expect_equal(result2$entity_id, c("USA", "FRA", NA_character_))
  expect_equal(result2$entity_name, c("United States", "France", NA_character_))
})

test_that("match_entities_with_patterns handles output_cols parameter", {
  # nolint start
  test_df <- tibble::tribble(
    ~country        ,
    "United States" ,
    "France"        ,
    "Germany"
  )
  # nolint end

  # Test with different combinations of output_cols
  # Note: We're testing if the right columns come through, not the parameter
  # itself
  result_all <- match_entities_with_patterns(
    test_df,
    target_cols = "country",
    patterns = list_entity_patterns(),
    warn_ambiguous = FALSE
  )

  # Should include all entity columns
  expect_true(
    all(
      c(
        "country",
        "entity_id",
        "entity_name",
        "entity_type",
        "iso3c",
        "iso2c"
      ) %in%
        names(result_all)
    )
  )

  # We can't test with subset of output_cols as the parameter doesn't exist
  # Instead validate the columns that should always be present
  expect_true(
    all(c("country", "entity_id", "entity_name") %in% names(result_all))
  )

  # Check that data is correctly mapped
  expect_equal(result_all$entity_id, c("USA", "FRA", "DEU"))
  expect_equal(result_all$iso3c, c("USA", "FRA", "DEU"))
})

test_that("match_entities_with_patterns handles ambiguous matches", {
  # Create a mock entity_patterns with ambiguous patterns
  # Make sure it has the same structure as the real patterns dataframe
  mock_patterns <- tibble::tibble(
    entity_id = c("USA", "USB"),
    entity_name = c("United States A", "United States B"),
    entity_type = c("economy", "economy"),
    iso3c = c("USA", "USB"),
    iso2c = c("US", "UB"),
    entity_regex = c("^us$", "^us$")
  )

  # Use local_mocked_bindings to temporarily mock the list_entity_patterns
  # function
  local_mocked_bindings(
    list_entity_patterns = function() {
      mock_patterns
    }
  )

  # Create a test dataframe
  test_df <- tibble::tibble(
    country = "us"
  )

  # Test with warn_ambiguous = TRUE
  # This should warn about ambiguous matches and return a data frame with
  # both matches (duplicates)
  expect_warning(
    {
      result <- match_entities_with_patterns(
        test_df,
        target_cols = "country",
        patterns = mock_patterns,
        warn_ambiguous = TRUE
      )
    },
    "Ambiguous match"
  )

  # Should return a data frame with both matches for ambiguous entries
  expect_s3_class(result, "data.frame")
  expect_equal(nrow(result), 2) # Now expect 2 rows instead of 1

  # Check that both matches are present
  expect_true(all(c("USA", "USB") %in% result$entity_id))
  expect_true(
    all(c("United States A", "United States B") %in% result$entity_name)
  )

  # All rows should have the same country value
  expect_equal(result$country, c("us", "us"))
})

test_that("output_cols argument correctly filters columns", {
  valid_cols <- c(
    "entity_name",
    "entity_type",
    "entity_id",
    "iso3c",
    "iso2c"
  )
  # nolint start
  test_df <- tibble::tribble(
    ~entity         , ~code ,
    "United States" , "USA" ,
    "France"        , "FRA"
  )
  # nolint end

  # Test subset of valid columns
  result <- standardize_entity(
    test_df,
    entity,
    code,
    output_cols = c("entity_id", "iso3c")
  )

  # Verify included columns
  expect_true(
    all(c("entity", "code", "entity_id", "iso3c") %in% names(result))
  )
  # Verify excluded valid columns and regex column
  expect_false(
    any(
      c(
        "entity_name",
        "entity_type",
        "iso2c",
        "entity_regex"
      ) %in%
        names(result)
    )
  )

  # Test all valid columns
  result_all <- standardize_entity(
    test_df,
    entity,
    code,
    output_cols = valid_cols
  )

  # Verify all valid columns present with original columns
  expect_true(all(c("entity", "code", valid_cols) %in% names(result_all)))
  # Ensure regex column still excluded
  expect_false("entity_regex" %in% names(result_all))
})

test_that("output columns are added in correct order", {
  # nolint start
  test_df <- tibble::tribble(
    ~country        ,
    "United States" ,
    "France"
  )
  # nolint end

  # Test with specific output columns
  result <- standardize_entity(
    test_df,
    country,
    output_cols = c("entity_id", "entity_name", "entity_type")
  )

  # Verify new columns are added to the left side of the dataframe
  # (default behavior)
  expect_equal(
    names(result),
    c("entity_id", "entity_name", "entity_type", "country")
  )

  # Test with different order
  result_reversed <- standardize_entity(
    test_df,
    country,
    output_cols = c("entity_type", "entity_name", "entity_id")
  )

  # Verify new columns are added to the left in specified order
  expect_equal(
    names(result_reversed),
    c("entity_type", "entity_name", "entity_id", "country")
  )

  # Test with single output column
  result_single <- standardize_entity(
    test_df,
    country,
    output_cols = "entity_id"
  )

  # Verify single column is added to the left
  expect_equal(
    names(result_single),
    c("entity_id", "country")
  )
})

test_that("handles existing entity columns correctly", {
  # Create test data with existing entity columns
  df <- data.frame(
    country = c("USA", "China"),
    entity_id = c("old_id1", "old_id2"),
    entity_name = c("Old Name 1", "Old Name 2")
  )

  # Should warn when warn_overwrite = TRUE
  expect_warning(
    standardize_entity(
      df,
      country,
      warn_overwrite = TRUE
    ),
    "Overwriting existing entity columns"
  )

  # Should not warn when warn_overwrite = FALSE
  expect_no_warning(
    standardize_entity(
      df,
      country,
      warn_overwrite = FALSE
    )
  )

  # Should actually overwrite the columns
  expect_warning(
    result <- standardize_entity(df, country),
    "Overwriting existing entity columns"
  )
  expect_false(identical(df$entity_id, result$entity_id))
  expect_false(identical(df$entity_name, result$entity_name))
})

test_that("prefix parameter works correctly", {
  # nolint start
  test_df <- tibble::tribble(
    ~country_name , ~counterpart_name ,
    "USA"         , "France"          ,
    "Germany"     , "Italy"
  )
  # nolint end

  # Test with prefix
  result <- test_df |>
    standardize_entity(
      country_name,
      prefix = "country"
    ) |>
    standardize_entity(
      counterpart_name,
      prefix = "counterpart"
    )

  # Check that prefixed columns exist
  expect_true(all(
    c(
      "country_entity_id",
      "country_entity_name",
      "country_entity_type",
      "counterpart_entity_id",
      "counterpart_entity_name",
      "counterpart_entity_type"
    ) %in%
      names(result)
  ))

  # Check that values are correct
  expect_equal(result$country_entity_id, c("USA", "DEU"))
  expect_equal(result$counterpart_entity_id, c("FRA", "ITA"))
})

test_that("default_entity_type parameter works correctly", {
  # nolint start
  test_df <- tibble::tribble(
    ~entity         ,
    "United States" ,
    "NotACountry"
  )
  # nolint end

  # Test with default_entity_type
  result <- standardize_entity(
    test_df,
    entity,
    default_entity_type = "other"
  )

  # Check that entity_type is set correctly
  expect_equal(result$entity_type, c("economy", "other"))

  # Test with different default_entity_type
  result2 <- standardize_entity(
    test_df,
    entity,
    default_entity_type = "organization"
  )

  # Check that entity_type is set correctly
  expect_equal(result2$entity_type, c("economy", "organization"))
})

test_that("column placement works without .before", {
  # Create a test dataframe with columns in a specific order
  test_df <- tibble::tibble(
    id = 1:2,
    extra1 = c("a", "b"),
    name = c("United States", "France"),
    code = c("USA", "FRA"),
    extra2 = c("x", "y")
  )

  # Standardize with multiple target columns *without* .before
  result <- standardize_entity(
    test_df,
    name,
    code
  )

  # Check that output columns are placed at the left side of the dataframe
  # (default behavior)
  expected_order <- c(
    "entity_id",
    "entity_name",
    "entity_type",
    "id",
    "extra1",
    "name",
    "code",
    "extra2"
  )
  expect_equal(names(result), expected_order)
})

test_that(".before parameter works correctly", {
  # Create a test dataframe with columns in a specific order
  test_df <- tibble::tibble(
    id = 1:2,
    extra1 = c("a", "b"),
    name = c("United States", "France"),
    code = c("USA", "FRA"),
    extra2 = c("x", "y")
  )

  # Test placing before a different column
  result_before_id <- standardize_entity(
    test_df,
    name,
    code,
    .before = "id"
  )
  expected_before_id_order <- c(
    "entity_id",
    "entity_name",
    "entity_type",
    "id",
    "extra1",
    "name",
    "code",
    "extra2"
  )
  expect_equal(names(result_before_id), expected_before_id_order)

  # Test placing before the last column
  result_before_extra2 <- standardize_entity(
    test_df,
    name,
    code,
    .before = "extra2"
  )
  expected_before_extra2_order <- c(
    "id",
    "extra1",
    "name",
    "code",
    "entity_id",
    "entity_name",
    "entity_type",
    "extra2"
  )
  expect_equal(names(result_before_extra2), expected_before_extra2_order)

  # Test placing before a column that doesn't exist
  expect_error(
    standardize_entity(
      test_df,
      name,
      code,
      .before = "not_a_column"
    ),
    "Can't select columns that don't exist"
  )
})

test_that("fill_mapping parameter works correctly", {
  # nolint start
  test_df <- tibble::tribble(
    ~entity         , ~code ,
    "United States" , "USA" , # Should match via patterns
    "NotACountry"   , "ABC" # No match, should use fill_mapping
  )
  # nolint end

  # Test with fill_mapping
  result <- standardize_entity(
    test_df,
    entity,
    code,
    fill_mapping = c(entity_id = "code", entity_name = "entity")
  )

  # Check that matched entities are filled from the database
  expect_equal(result$entity_id[1], "USA")
  expect_equal(result$entity_name[1], "United States")

  # Check that unmatched entities are filled from the specified columns
  expect_equal(result$entity_id[2], "ABC")
  expect_equal(result$entity_name[2], "NotACountry")

  # Test without fill_mapping (should leave NA for unmatched)
  result_no_fill <- standardize_entity(
    test_df,
    entity,
    code
  )

  expect_equal(result_no_fill$entity_id[2], NA_character_)
  expect_equal(result_no_fill$entity_name[2], NA_character_)

  # Test with partial fill_mapping
  result_partial <- standardize_entity(
    test_df,
    entity,
    code,
    fill_mapping = c(entity_id = "code") # Only fill entity_id
  )

  expect_equal(result_partial$entity_id[2], "ABC") # Should be filled
  expect_equal(result_partial$entity_name[2], NA_character_) # Should remain NA
})

test_that("fill_mapping works with prefix", {
  test_df <- tibble::tribble(
    # nolint start
    ~country_name   , ~country_code ,
    "United States" , "USA"         , # Should match
    "Unknown"       , "XYZ" # No match
  )
  # nolint end

  # Test with prefix and fill_mapping
  result <- standardize_entity(
    test_df,
    country_name,
    country_code,
    prefix = "country",
    fill_mapping = c(entity_id = "country_code", entity_name = "country_name")
  )

  # Check prefixed column values
  expect_equal(result$country_entity_id[1], "USA") # Matched
  expect_equal(result$country_entity_name[1], "United States") # Matched

  expect_equal(result$country_entity_id[2], "XYZ") # Filled from mapping
  expect_equal(result$country_entity_name[2], "Unknown") # Filled from mapping
})

test_that("fill_mapping validation works", {
  # nolint start
  test_df <- tibble::tribble(
    ~entity , ~code ,
    "US"    , "USA"
  )
  # nolint end

  # Invalid output column name
  expect_error(
    standardize_entity(
      test_df,
      entity,
      code,
      fill_mapping = c(invalid_col = "code")
    ),
    "fill_mapping names.*must be valid output column names"
  )

  # Invalid input column name
  expect_error(
    standardize_entity(
      test_df,
      entity,
      code,
      fill_mapping = c(entity_id = "missing_column")
    ),
    "fill_mapping values.*must be columns in the data frame"
  )

  # Not a named vector
  expect_error(
    standardize_entity(
      test_df,
      entity,
      code,
      fill_mapping = c("entity", "code")
    ),
    "fill_mapping must be a named character vector"
  )
})

test_that("fill_mapping handles empty and partial vectors correctly", {
  # nolint start
  test_df <- tibble::tribble(
    ~entity         , ~code ,
    "United States" , "USA" , # Should match via patterns
    "NotACountry"   , "ABC" # No match, should use fill_mapping
  )
  # nolint end

  # Test with empty fill_mapping vector
  result_empty <- standardize_entity(
    test_df,
    entity,
    code,
    fill_mapping = c()
  )

  # Should behave the same as NULL (no filling)
  expect_equal(result_empty$entity_id[2], NA_character_)
  expect_equal(result_empty$entity_name[2], NA_character_)

  # Test with only entity_id in fill_mapping
  result_id_only <- standardize_entity(
    test_df,
    entity,
    code,
    fill_mapping = c(entity_id = "code")
  )

  # Should fill entity_id but not entity_name
  expect_equal(result_id_only$entity_id[2], "ABC")
  expect_equal(result_id_only$entity_name[2], NA_character_)

  # Test with only entity_name in fill_mapping
  result_name_only <- standardize_entity(
    test_df,
    entity,
    code,
    fill_mapping = c(entity_name = "entity")
  )

  # Should fill entity_name but not entity_id
  expect_equal(result_name_only$entity_id[2], NA_character_)
  expect_equal(result_name_only$entity_name[2], "NotACountry")
})

test_that("match_entities_with_patterns handles empty or all-NA data", {
  # Test with empty data frame
  empty_df <- tibble::tibble(country = character(0))

  result_empty <- match_entities_with_patterns(
    empty_df,
    target_cols = "country",
    patterns = list_entity_patterns(),
    warn_ambiguous = FALSE
  )

  expect_s3_class(result_empty, "data.frame")
  expect_equal(nrow(result_empty), 0)
  expect_true(
    all(c("country", "entity_id", "entity_name") %in% names(result_empty))
  )

  # Test with all NA values
  na_df <- tibble::tibble(country = c(NA_character_, NA_character_))

  result_na <- match_entities_with_patterns(
    na_df,
    target_cols = "country",
    patterns = list_entity_patterns(),
    warn_ambiguous = FALSE
  )

  expect_s3_class(result_na, "data.frame")
  # Should have one row for the unique NA value
  expect_equal(nrow(result_na), 1)
  expect_true(
    all(c("country", "entity_id", "entity_name") %in% names(result_na))
  )
  expect_true(is.na(result_na$entity_id[1]))
  expect_true(is.na(result_na$entity_name[1]))
})

test_that("match_entities_with_patterns keeps all unique target col combos", {
  # Test with multiple columns where some combinations are duplicated
  # nolint start
  test_df <- tibble::tribble(
    ~name     , ~code , ~year ,
    "France"  , "FRA" ,  2020 ,
    "France"  , "FRA" ,  2021 , # Duplicate name-code combination, different year
    "France"  , "FR"  ,  2020 , # Different code
    "Germany" , "DEU" ,  2020 ,
    "Germany" , "DEU" ,  2020 # Complete duplicate row
  )
  # nolint end

  result <- match_entities_with_patterns(
    test_df,
    target_cols = c("name", "code"),
    patterns = list_entity_patterns(),
    warn_ambiguous = FALSE
  )

  # Should have 3 unique name-code combinations
  expect_equal(nrow(result), 3)

  # Should include all target columns
  expect_true(
    all(c("name", "code", "entity_id", "entity_name") %in% names(result))
  )

  # Check mappings for each unique combination
  expect_equal(
    dplyr::arrange(result, name, code)$entity_id,
    c("FRA", "FRA", "DEU") # Both "France" rows map to FRA, Germany to DEU
  )
})

test_that("match_entities_with_patterns fails gracefully with invalid input", {
  # nolint start
  test_df <- tibble::tribble(
    ~country        ,
    "United States"
  )
  # nolint end

  # Test with invalid target column
  expect_error(
    match_entities_with_patterns(
      test_df,
      target_cols = "invalid_column",
      patterns = list_entity_patterns(),
      warn_ambiguous = FALSE
    ),
    "target_cols"
  )
})

test_that("match_entities_with_patterns handles multiple ambiguous matches", {
  # Create mock patterns with multiple ambiguous matches
  mock_patterns <- tibble::tibble(
    entity_id = c("USA", "USB", "FRA", "FRB"),
    entity_name = c(
      "United States A",
      "United States B",
      "France A",
      "France B"
    ),
    entity_type = c("economy", "economy", "economy", "economy"),
    iso3c = c("USA", "USB", "FRA", "FRB"),
    iso2c = c("US", "UB", "FR", "FB"),
    entity_regex = c("^us$", "^us$", "^fr$", "^fr$") # Ambiguous patterns
  )

  # Use local_mocked_bindings to temporarily mock the list_entity_patterns
  # function
  local_mocked_bindings(
    list_entity_patterns = function() {
      mock_patterns
    }
  )

  # Create a test dataframe with multiple entities that have ambiguous
  # matches
  test_df <- tibble::tibble(
    country = c("us", "fr", "de") # "us" and "fr" are ambiguous, "de" not
  )

  # Test with warn_ambiguous = TRUE
  # Should warn about ambiguous matches and return duplicates for each
  # ambiguous entity
  expect_warning(
    expect_warning(
      {
        result <- match_entities_with_patterns(
          test_df,
          target_cols = "country",
          patterns = mock_patterns,
          warn_ambiguous = TRUE
        )
      },
      "Ambiguous match for fr"
    ),
    "Ambiguous match for us"
  )

  # Should return a data frame with duplicates for ambiguous entries
  expect_s3_class(result, "data.frame")
  # 2 rows for "us", 2 rows for "fr", 1 row for "de"
  expect_equal(nrow(result), 5)

  # Check US matches
  us_matches <- result[result$country == "us", ]
  expect_equal(nrow(us_matches), 2)
  expect_true(all(c("USA", "USB") %in% us_matches$entity_id))

  # Check FR matches
  fr_matches <- result[result$country == "fr", ]
  expect_equal(nrow(fr_matches), 2)
  expect_true(all(c("FRA", "FRB") %in% fr_matches$entity_id))

  # Check DE (no match)
  de_match <- result[result$country == "de", ]
  expect_equal(nrow(de_match), 1)
  expect_true(is.na(de_match$entity_id))
})

test_that("match_entities_with_patterns suppresses warnings per option", {
  # Create mock patterns with ambiguous matches
  mock_patterns <- tibble::tibble(
    entity_id = c("USA", "USB"),
    entity_name = c("United States A", "United States B"),
    entity_type = c("economy", "economy"),
    iso3c = c("USA", "USB"), # Add missing columns
    iso2c = c("US", "UB"), # Add missing columns
    entity_regex = c("^us$", "^us$") # Both patterns match "us"
  )

  # Use local_mocked_bindings to temporarily mock the list_entity_patterns
  # function
  local_mocked_bindings(
    list_entity_patterns = function() {
      mock_patterns
    }
  )

  # Create a test dataframe
  test_df <- tibble::tibble(
    country = "us"
  )

  # Test with warn_ambiguous = FALSE
  # This should NOT warn about ambiguous matches but still return all matches
  expect_no_warning(
    {
      result <- match_entities_with_patterns(
        test_df,
        target_cols = "country",
        patterns = mock_patterns,
        warn_ambiguous = FALSE
      )
    }
  )

  # Should still return a data frame with both matches despite no warning
  expect_s3_class(result, "data.frame")
  expect_equal(nrow(result), 2)
  expect_true(all(c("USA", "USB") %in% result$entity_id))
  expect_true(
    all(c("United States A", "United States B") %in% result$entity_name)
  )
})

test_that("match_entities_with_patterns handles case insensitive matches", {
  # Create mock patterns
  mock_patterns <- tibble::tibble(
    entity_id = c("USA"),
    entity_name = c("United States"),
    entity_type = c("economy"),
    iso3c = c("USA"), # Add missing columns
    iso2c = c("US"), # Add missing columns
    entity_regex = c("^united states|usa|us$")
  )

  # Use local_mocked_bindings to temporarily mock the list_entity_patterns
  # function
  local_mocked_bindings(
    list_entity_patterns = function() {
      mock_patterns
    }
  )

  # Create a test dataframe with different case variations
  test_df <- tibble::tibble(
    country = c("us", "US", "Us", "uS")
  )

  # This should not warn about ambiguous matches as these are the same pattern
  # just with different cases
  expect_no_warning(
    {
      result <- match_entities_with_patterns(
        test_df,
        target_cols = "country",
        patterns = mock_patterns,
        warn_ambiguous = TRUE # Even with warnings enabled
      )
    }
  )

  # Should return a data frame with one row for each unique input
  expect_s3_class(result, "data.frame")
  expect_equal(nrow(result), 4) # One per case variation

  # All should be matched to USA
  expect_equal(unique(result$entity_id), "USA")
  expect_equal(unique(result$entity_name), "United States")

  # Each row should preserve its original case
  expect_equal(result$country, c("us", "US", "Us", "uS"))
})

test_that("match_entities_with_patterns performs multiple passes correctly", {
  # Create a test dataframe with different columns that should be matched
  # sequentially
  # nolint start
  test_df <- tibble::tribble(
    ~id , ~name         , ~code , ~description    ,
      1 , NA            , "USA" , "First entry"   , # Should match on code
      2 , "France"      , NA    , "Second entry"  , # Should match on name
      3 , NA            , NA    , "United States" , # Should match on description
      4 , "not a match" , "XXX" , "no match here" # No match in any column
  )
  # nolint end

  # Mock the patterns for this test to ensure predictable matching
  mock_patterns <- tibble::tibble(
    entity_id = c("USA", "FRA"),
    entity_name = c("United States", "France"),
    entity_type = c("economy", "economy"),
    iso3c = c("USA", "FRA"),
    iso2c = c("US", "FR"),
    entity_regex = c("^(united states|usa|us)$", "^(france|fra|fr)$")
  )

  # Use local_mocked_bindings to temporarily mock the list_entity_patterns
  # function
  local_mocked_bindings(
    list_entity_patterns = function() {
      mock_patterns
    }
  )

  # Test the function
  result <- match_entities_with_patterns(
    test_df,
    target_cols = c("name", "code", "description"),
    patterns = mock_patterns,
    warn_ambiguous = FALSE
  )

  # Should be a data frame with all target columns and requested output columns
  expect_s3_class(result, "data.frame")
  expect_true(all(
    c(
      "name",
      "code",
      "description",
      "entity_id",
      "entity_name",
      "iso3c"
    ) %in%
      names(result)
  ))

  # Should have 4 rows (one for each unique combination of target columns)
  expect_equal(nrow(result), 4)

  # Row with code="USA" should match USA
  matched_usa_by_code <- result |>
    dplyr::filter(code == "USA")
  expect_equal(matched_usa_by_code$entity_id, "USA")
  expect_equal(matched_usa_by_code$entity_name, "United States")

  # Row with name="France" should match FRA
  matched_france_by_name <- result |>
    dplyr::filter(name == "France")
  expect_equal(matched_france_by_name$entity_id, "FRA")
  expect_equal(matched_france_by_name$entity_name, "France")

  # Row with description="United States" should match USA
  matched_usa_by_desc <- result |>
    dplyr::filter(description == "United States")
  expect_equal(matched_usa_by_desc$entity_id, "USA")
  expect_equal(matched_usa_by_desc$entity_name, "United States")

  # Row with no matches should have NAs
  no_match_row <- result |>
    dplyr::filter(name == "not a match")
  expect_true(is.na(no_match_row$entity_id))
  expect_true(is.na(no_match_row$entity_name))

  # Change column order to verify priority
  result2 <- match_entities_with_patterns(
    test_df,
    target_cols = c("description", "code", "name"),
    patterns = mock_patterns,
    warn_ambiguous = FALSE
  )

  # Row with description="United States" should match USA
  matched_usa_by_desc2 <- result2 |>
    dplyr::filter(description == "United States")
  expect_equal(matched_usa_by_desc2$entity_id, "USA")

  # Row with code="USA" should still match USA
  matched_usa_by_code2 <- result2 |>
    dplyr::filter(code == "USA")
  expect_equal(matched_usa_by_code2$entity_id, "USA")
})

test_that("fill_mapping validates uniqueness of entity_id values", {
  # Create test data with an entity that won't match any pattern
  test_df <- tibble::tribble(
    # nolint start
    ~entity       , ~code ,
    "NotACountry" , "USA" # Using "USA" which already exists in entity_patterns
  ) # nolint end

  # Use local_mocked_bindings to mock list_entity_patterns
  local_mocked_bindings(
    list_entity_patterns = function() {
      tibble::tibble(
        entity_id = c("USA", "FRA", "DEU"),
        entity_name = c("United States", "France", "Germany"),
        entity_type = c("economy", "economy", "economy"),
        iso3c = c("USA", "FRA", "DEU"),
        iso2c = c("US", "FR", "DE"),
        entity_regex = c("^united states|us$", "^france|fra$", "^germany|deu$")
      )
    }
  )

  # Should throw a warning when trying to fill with an existing entity_id
  expect_warning(
    result <- standardize_entity(
      test_df,
      entity,
      fill_mapping = c(entity_id = "code") # "code" contains "USA"
    ),
    "The entity_id value"
  )

  #But should still perform the fill
  expect_equal(result$entity_id, "USA")

  # But should work when filling with a different, non-conflicting ID
  test_df2 <- tibble::tribble(
    # nolint start
    ~entity       , ~code ,
    "NotACountry" , "XYZ" # XYZ doesn't exist in entity_patterns
  ) # nolint end

  # This should work fine
  result <- standardize_entity(
    test_df2,
    entity,
    fill_mapping = c(entity_id = "code")
  )

  expect_equal(result$entity_id, "XYZ")
})

test_that("validate_entity_inputs catches invalid inputs", {
  # Test invalid data frame input
  expect_error(
    standardize_entity(
      list(a = 1, b = 2), # Not a data frame
      col1,
      output_cols = c("entity_id", "entity_name")
    ),
    "Input .* must be a data frame or tibble"
  )

  # Test non-existent target columns
  test_df <- tibble::tribble(
    # nolint start
    ~existing_col   ,
    "United States"
  ) # nolint end

  expect_error(
    standardize_entity(
      test_df,
      non_existent_col, # Column that doesn't exist
      output_cols = c("entity_id", "entity_name")
    ),
    "Target column\\(s\\) .* must be found in data"
  )
})

test_that("prefix validation works correctly", {
  test_df <- tibble::tribble(
    # nolint start
    ~country        ,
    "United States"
  ) # nolint end

  # Test invalid prefix types
  expect_error(
    standardize_entity(
      test_df,
      country,
      prefix = c("prefix1", "prefix2") # Multiple strings
    ),
    "Prefix must be a single character string"
  )

  expect_error(
    standardize_entity(
      test_df,
      country,
      prefix = 123 # Number instead of string
    ),
    "Prefix must be a single character string"
  )

  # Verify that a valid prefix still works
  expect_no_error(
    standardize_entity(
      test_df,
      country,
      prefix = "test"
    )
  )
})

test_that("entity_id as target column gets replaced with standardized values", {
  df <- data.frame(
    entity_name = c("United States", "China", "NotACountry"),
    entity_id = c("USA", "CHN", "ZZZ"),
    obs_value = c(1, 2, 3)
  )

  expect_warning(
    result <- standardize_entity(
      data = df,
      entity_id,
      entity_name
    )
  )

  # Check structure
  expect_true(all(
    c("entity_id", "entity_name", "entity_type", "obs_value") %in% names(result)
  ))

  # Should have correct number of rows (no duplication)
  expect_equal(nrow(result), 3)

  # USA and CHN should match and get standardized values

  expect_equal(result$entity_id[1], "USA")
  expect_equal(result$entity_id[2], "CHN")

  # ZZZ should not match, so entity_id should be NA
  expect_true(is.na(result$entity_id[3]))

  # entity_name should be populated for matched rows
  expect_equal(result$entity_name[1], "United States")
  expect_equal(result$entity_name[2], "China")
  expect_true(is.na(result$entity_name[3]))

  # Original obs_value should be preserved
  expect_equal(result$obs_value, c(1, 2, 3))
})