# tests/testthat/test-file-utilities-more.R

testthat::test_that("import_gamet: ID normalization, NaN→NA, numeric-like conversion, zero-division guard", {
  # Build a tiny GAMET-like CSV
  tmp <- withr::local_tempfile(fileext = ".csv")
  gam <- data.frame(
    filename      = c("C:/any/path/sub/file001.csv", "/whatever/dir/file002.csv"),
    error_count   = c("2", "NaN"),            # numeric-like + "NaN"
    word_count    = c(100, 0),                # second row 0 to hit NA guard
    grammar       = c("3", "5"),
    misspelling   = c("1", "2"),
    duplication   = c("0", "1"),
    typographical = c("0", "1"),
    whitespace    = c("1", "0"),
    stringsAsFactors = FALSE
  )
  utils::write.csv(gam, tmp, row.names = FALSE)

  out <- writeAlizer::import_gamet(tmp)

  # Columns retained + derived
  expect_true(all(c("ID","error_count","word_count","grammar","misspelling",
                    "duplication","typographical","whitespace","per_gram","per_spell") %in% names(out)))

  # ID is basename sans extension (character)
  expect_identical(out$ID, c("file001", "file002"))

  # "NaN" turned into NA_real_ (after numeric-like conversion)
  expect_true(is.na(out$error_count[2]))
  expect_true(is.numeric(out$error_count))
  expect_true(is.numeric(out$grammar))

  # per_* computed; guarded when word_count == 0
  expect_equal(out$per_gram[1], 3/100)
  expect_equal(out$per_spell[1], 1/100)
  expect_true(is.na(out$per_gram[2]))
  expect_true(is.na(out$per_spell[2]))
})

testthat::test_that("import_coh: TextID→ID, numeric-like coercion, stable sort", {
  tmp <- withr::local_tempfile(fileext = ".csv")
  coh <- data.frame(
    TextID    = c("z2", "a1"),           # out-of-order to verify sorting
    SomeVar   = c("10", "NaN"),          # numeric-like + "NaN"
    OtherVar  = c("1.5e1", "2.0"),       # exponent form
    stringsAsFactors = FALSE
  )
  utils::write.csv(coh, tmp, row.names = FALSE)

  out <- writeAlizer::import_coh(tmp)

  # ID created and sorting applied (character order "a1","z2")
  expect_identical(out$ID, c("a1", "z2"))

  # numeric-like columns coerced; "NaN" -> NA_real_
  expect_true(is.numeric(out$SomeVar))
  expect_true(is.na(out$SomeVar[1]))    # after sort, "NaN" row is first
  expect_equal(out$OtherVar, c(2.0, 15.0))
})

testthat::test_that("import_merge_gamet_rb: merges on ID as character and preserves per_*", {
  # Create GAMET file
  gm <- data.frame(
    filename      = c("1.csv", "2.csv"),
    error_count   = c(0, 1),
    word_count    = c(10, 10),
    grammar       = c(1, 2),
    misspelling   = c(0, 1),
    duplication   = c(0, 0),
    typographical = c(0, 0),
    whitespace    = c(0, 0),
    stringsAsFactors = FALSE
  )
  tmp_gm <- withr::local_tempfile(fileext = ".csv")
  utils::write.csv(gm, tmp_gm, row.names = FALSE)

  # Mock import_rb() to avoid CSV format brittleness but still exercise merge logic
  testthat::local_mocked_bindings(
    .package = "writeAlizer",
    import_rb = function(path) {
      data.frame(
        ID = c("1", "2"),
        F2 = c(0.1, 0.2),
        F3 = c(0.3, 0.4),
        stringsAsFactors = FALSE
      )
    }
  )

  # Dummy RB path (not used by our mock); function under test will call mocked import_rb()
  tmp_rb <- "dummy_rb.csv"

  merged <- writeAlizer::import_merge_gamet_rb(tmp_rb, tmp_gm)

  # Merged on ID; per_* present from GAMET import; and ID stays character and ordered
  expect_true(all(c("ID","per_gram","per_spell","F2","F3") %in% names(merged)))
  expect_identical(merged$ID, c("1","2"))
  expect_true(is.character(merged$ID))
  expect_equal(merged$per_gram, c(1/10, 2/10))
  expect_equal(merged$per_spell, c(0/10, 1/10))
})