box::use(
  artma / data / smart_detection[
    detect_delimiter,
    detect_encoding,
    smart_read_csv,
    validate_df_structure
  ]
)

test_that <- getFromNamespace("test_that", "testthat")
expect_equal <- getFromNamespace("expect_equal", "testthat")
expect_true <- getFromNamespace("expect_true", "testthat")
expect_error <- getFromNamespace("expect_error", "testthat")
expect_no_error <- getFromNamespace("expect_no_error", "testthat")


# Helper function to create temp CSV files for testing
create_temp_csv <- function(content, delimiter = ",") {
  tmp_file <- tempfile(fileext = ".csv")
  lines <- if (is.character(content)) {
    content
  } else {
    paste(apply(content, 1, paste, collapse = delimiter), collapse = "\n")
  }
  writeLines(lines, tmp_file)
  tmp_file
}


test_that("detect_delimiter correctly identifies comma delimiter", {
  csv_content <- c(
    "name,age,city",
    "Alice,30,NYC",
    "Bob,25,LA"
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  delim <- detect_delimiter(tmp_file)
  expect_equal(delim, ",")
})


test_that("detect_delimiter correctly identifies semicolon delimiter", {
  csv_content <- c(
    "name;age;city",
    "Alice;30;NYC",
    "Bob;25;LA"
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  delim <- detect_delimiter(tmp_file)
  expect_equal(delim, ";")
})


test_that("detect_delimiter correctly identifies tab delimiter", {
  csv_content <- "name\tage\tcity\nAlice\t30\tNYC\nBob\t25\tLA"
  tmp_file <- tempfile(fileext = ".csv")
  writeLines(csv_content, tmp_file)
  on.exit(unlink(tmp_file))

  delim <- detect_delimiter(tmp_file)
  expect_equal(delim, "\t")
})


test_that("detect_delimiter correctly identifies pipe delimiter", {
  csv_content <- c(
    "name|age|city",
    "Alice|30|NYC",
    "Bob|25|LA"
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  delim <- detect_delimiter(tmp_file)
  expect_equal(delim, "|")
})


test_that("detect_delimiter returns comma for empty file", {
  tmp_file <- tempfile(fileext = ".csv")
  writeLines(character(0), tmp_file)
  on.exit(unlink(tmp_file))

  delim <- detect_delimiter(tmp_file)
  expect_equal(delim, ",")
})


test_that("detect_delimiter handles inconsistent delimiters by choosing most consistent", {
  # File with mixed delimiters but semicolon is most consistent
  csv_content <- c(
    "name;age;city",
    "Alice;30;NYC",
    "Bob;25;LA,Extra"
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  delim <- detect_delimiter(tmp_file)
  expect_equal(delim, ";")
})


test_that("detect_encoding returns valid encoding", {
  tmp_file <- tempfile(fileext = ".csv")
  writeLines("test,data", tmp_file)
  on.exit(unlink(tmp_file))

  encoding <- detect_encoding(tmp_file)
  expect_true(encoding %in% c("UTF-8", "latin1", "ISO-8859-1", "CP1252"))
})


test_that("smart_read_csv reads comma-delimited file correctly", {
  csv_content <- c(
    "name,age,city",
    "Alice,30,NYC",
    "Bob,25,LA"
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  df <- smart_read_csv(tmp_file)

  expect_equal(nrow(df), 2)
  expect_equal(ncol(df), 3)
  expect_equal(names(df), c("name", "age", "city"))
  expect_equal(df$name, c("Alice", "Bob"))
})


test_that("smart_read_csv reads semicolon-delimited file correctly", {
  csv_content <- c(
    "name;age;city",
    "Alice;30;NYC",
    "Bob;25;LA"
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  df <- smart_read_csv(tmp_file)

  expect_equal(nrow(df), 2)
  expect_equal(ncol(df), 3)
  expect_equal(names(df), c("name", "age", "city"))
})


test_that("smart_read_csv handles NA values correctly", {
  csv_content <- c(
    "name,age,city",
    "Alice,30,NYC",
    "Bob,NA,LA",
    "Charlie,," # Empty city
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  df <- smart_read_csv(tmp_file)

  expect_true(is.na(df$age[2]))
  expect_true(is.na(df$city[3]))
})


test_that("smart_read_csv handles quoted fields", {
  csv_content <- c(
    "name,description", # nolint
    '"Alice","Works in tech, enjoys coding"',
    '"Bob","Likes music, plays guitar"'
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  df <- smart_read_csv(tmp_file)

  expect_equal(nrow(df), 2)
  expect_true(grepl("tech", df$description[1]))
})


test_that("smart_read_csv can use explicit delimiter", {
  csv_content <- c(
    "name|age|city",
    "Alice|30|NYC"
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  df <- smart_read_csv(tmp_file, delim = "|")

  expect_equal(ncol(df), 3)
  expect_equal(df$name, "Alice")
})


test_that("validate_df_structure removes empty columns", {
  df <- data.frame(
    name = c("Alice", "Bob"),
    age = c(30, 25),
    empty = c(NA, NA)
  )

  cleaned <- validate_df_structure(df, "test_path")

  expect_equal(ncol(cleaned), 2)
  expect_true(!"empty" %in% names(cleaned))
})


test_that("validate_df_structure handles duplicate column names", {
  df <- data.frame(
    name = c("Alice", "Bob"),
    age = c(30, 25)
  )
  names(df) <- c("name", "name")

  cleaned <- validate_df_structure(df, "test_path")

  expect_equal(ncol(cleaned), 2)
  expect_true("name" %in% names(cleaned))
  expect_true("name_1" %in% names(cleaned))
})


test_that("validate_df_structure removes trailing empty rows", {
  df <- data.frame(
    name = c("Alice", "Bob", NA, NA),
    age = c(30, 25, NA, NA)
  )

  cleaned <- validate_df_structure(df, "test_path")

  expect_equal(nrow(cleaned), 2)
})


test_that("validate_df_structure errors on empty data frame after cleaning", {
  df <- data.frame(
    empty1 = c(NA, NA),
    empty2 = c(NA, NA)
  )

  expect_error(
    validate_df_structure(df, "test_path"),
    "empty"
  )
})


test_that("validate_df_structure errors on zero-row data frame", {
  df <- data.frame(name = character(0), age = numeric(0))

  expect_error(
    validate_df_structure(df, "test_path"),
    "empty.*0 rows"
  )
})


test_that("validate_df_structure errors on zero-column data frame", {
  df <- data.frame(row.names = 1:5)

  expect_error(
    validate_df_structure(df, "test_path"),
    "no columns"
  )
})


test_that("smart_read_csv with auto-detection works end-to-end", {
  # Create a complex CSV with semicolons and special characters
  csv_content <- c(
    '"study_id";"effect";"se";"n_obs"',
    '"Study A";10.5;2.3;100',
    '"Study B";8.2;1.8;150',
    '"Study C";;0.9;200' # Missing effect
  )
  tmp_file <- create_temp_csv(csv_content)
  on.exit(unlink(tmp_file))

  df <- smart_read_csv(tmp_file)

  expect_equal(nrow(df), 3)
  expect_equal(ncol(df), 4)
  expect_true(is.na(df$effect[3]))
  expect_equal(df$study_id[1], "Study A")
})