test_that("btw_tool_files_read() works", {
  withr::local_dir(withr::local_tempdir())

  write.csv(mtcars, "mtcars.csv", row.names = FALSE)
  saveRDS(mtcars, "mtcars.rds")

  expect_btw_tool_result(
    btw_tool_files_read("mtcars.csv"),
    has_data = FALSE
  )

  expect_equal(
    btw_tool_files_read("mtcars.csv")@extra$path,
    "mtcars.csv",
    ignore_attr = TRUE
  )

  # btw_this() returns clean code block (no hashlines)
  # read tool @value has hashlines, display.markdown has clean code block
  expect_equal(
    btw_tool_files_read("mtcars.csv")@extra$display$markdown,
    btw_this("./mtcars.csv")
  )

  expect_match(
    btw_tool_files_read(
      "mtcars.csv",
      line_start = 1,
      line_end = 1
    )@value,
    "^1:[a-f0-9]{3}\\|"
  )

  expect_match(
    btw_tool_files_read(
      "mtcars.csv",
      line_start = 32,
      line_end = 35
    )@value,
    "^32:[a-f0-9]{3}\\|"
  )

  skip_if_not_snapshot_env()

  expect_snapshot(
    btw_tool_files_read("mtcars.rds"),
    error = TRUE
  )

  expect_snapshot(
    btw_tool_files_read("../mtcars.rds"),
    error = TRUE
  )
})

# is_text_file() and CJK multi-byte UTF-8 boundary truncation
# https://github.com/posit-dev/btw/issues/170
describe("is_text_file()", {
  # Helper: create a file of n copies of a 3-byte CJK character (测 = 0xe6 0xb5 0x8b)
  # UTF-8 encodes CJK characters as 3-byte sequences:
  #   Byte 1 (lead):         0xE0-0xEF
  #   Byte 2 (continuation): 0x80-0xBF
  #   Byte 3 (continuation): 0x80-0xBF
  #
  # is_text_file() reads 8192 bytes. When a file is filled with 3-byte chars,
  # the buffer boundary can land at three positions within a character:
  #   - After byte 3: complete sequence (no truncation)
  #   - After byte 1: lead byte only (2 continuation bytes missing)
  #   - After byte 2: lead + 1 continuation (1 continuation byte missing)
  write_cjk_file <- function(path, n_chars) {
    raw_char <- charToRaw("\u6d4b") # 测
    stopifnot(length(raw_char) == 3L)
    bytes <- rep(raw_char, n_chars)
    writeBin(bytes, path)
  }

  it("accepts an empty file", {
    tmp <- withr::local_tempfile()
    file.create(tmp)
    expect_true(is_text_file(tmp))
  })

  it("rejects a binary file with NULL bytes", {
    tmp <- withr::local_tempfile()
    writeBin(as.raw(c(0x00, 0x01, 0x02, 0xff, 0x00)), tmp)
    expect_false(is_text_file(tmp))
  })

  it("accepts a small CJK file shorter than the 8192-byte buffer", {
    tmp <- withr::local_tempfile()
    # 100 chars * 3 bytes = 300 bytes, well under the 8192 buffer.
    write_cjk_file(tmp, 100)
    expect_equal(file.size(tmp), 300)
    expect_true(is_text_file(tmp))
  })

  describe("with CJK text at the 8192-byte buffer boundary", {
    it("accepts when the buffer ends on a complete character", {
      tmp <- withr::local_tempfile()
      # 2730 chars * 3 bytes = 8190 bytes; buffer reads all 8190 bytes.
      # Last byte in buffer is byte 3 of a complete sequence.
      write_cjk_file(tmp, 2730)
      expect_equal(file.size(tmp), 8190)
      expect_true(is_text_file(tmp))
    })

    it("accepts when the buffer truncates after lead + 1 continuation byte", {
      tmp <- withr::local_tempfile()
      # 2731 chars * 3 bytes = 8193 bytes; buffer reads 8192 bytes.
      # Byte 8191 = lead byte of char 2731 (0xe6)
      # Byte 8192 = first continuation byte (0xb5) -- last byte read
      # Byte 8193 = second continuation byte (0x8b) -- NOT read
      write_cjk_file(tmp, 2731)
      expect_equal(file.size(tmp), 8193)
      expect_true(is_text_file(tmp))
    })

    it("accepts when the buffer truncates after a lone lead byte", {
      tmp <- withr::local_tempfile()
      # 2730 complete chars (8190 bytes) + 1 orphan lead byte = 8191 bytes.
      # Buffer reads all 8191 bytes. Last byte is an incomplete lead byte.
      raw_char <- charToRaw("\u6d4b")
      bytes <- c(rep(raw_char, 2730), raw_char[1])
      writeBin(bytes, tmp)
      expect_equal(file.size(tmp), 8191)
      expect_true(is_text_file(tmp))
    })
  })
})

test_that("btw_tool_files_read() works with UTF-8 files containing non-ASCII characters", {
  withr::local_dir(withr::local_tempdir())

  # Create a file with Cyrillic characters (UTF-8)
  writeLines(c("# Тест", "1 + 1"), "test.R", useBytes = FALSE)

  # Verify the file is valid UTF-8
  con <- file("test.R", "rb")
  bytes <- readBin(con, what = "raw", n = file.size("test.R"))
  close(con)
  text <- rawToChar(bytes)
  expect_true(validUTF8(text))

  # Should be able to read the file
  result <- btw_tool_files_read("test.R")
  expect_btw_tool_result(result, has_data = FALSE)
  expect_equal(result@extra$path, "test.R", ignore_attr = TRUE)

  # Check that the file content was read (the main point of this test is that
  # the file can be read at all - previously this errored on Windows)
  expect_match(result@value, "1 \\+ 1")
})

test_that("btw_tool_files_read() returns hashline-annotated output", {
  withr::local_dir(withr::local_tempdir())
  writeLines(c("hello", "world", "foo"), "test.txt")

  result <- btw_tool_files_read("test.txt")

  # Value has hashlines (model-facing)
  lines <- strsplit(result@value, "\n")[[1]]
  expect_length(lines, 3)
  expect_match(lines[1], "^1:[a-f0-9]{3}\\|hello$")
  expect_match(lines[2], "^2:[a-f0-9]{3}\\|world$")
  expect_match(lines[3], "^3:[a-f0-9]{3}\\|foo$")

  # Display markdown is clean code block (user-facing)
  expect_match(result@extra$display$markdown, "^```")
  expect_no_match(result@extra$display$markdown, "^1:[a-f0-9]{3}\\|")
})

test_that("btw_tool_files_read() hashlines use actual line numbers for ranges", {
  withr::local_dir(withr::local_tempdir())
  writeLines(paste0("line", 1:50), "test.txt")

  result <- btw_tool_files_read("test.txt", line_start = 10, line_end = 12)
  lines <- strsplit(result@value, "\n")[[1]]
  expect_match(lines[1], "^10:")
  expect_match(lines[2], "^11:")
  expect_match(lines[3], "^12:")
})