test_that("translate_to_messages works with example turns", {
  skip_on_cran()
  ellmer_messages <- translate_to_messages(example_ellmer_solver()$set_system_prompt(
    NULL
  ))

  inspect_messages <- example_inspect_log()[["samples"]][[1]][["messages"]]

  zap_id <- function(l) {
    l[["id"]] <- NULL
    l
  }
  expect_equal(
    purrr::map(ellmer_messages, zap_id),
    purrr::map(inspect_messages, zap_id)
  )
})

test_that("translate_to_messages handles image inputs", {
  key_get("ANTHROPIC_API_KEY")
  tmp_dir <- withr::local_tempdir()
  withr::local_envvar(list(VITALS_LOG_DIR = tmp_dir))
  withr::local_options(cli.default_handler = function(...) {})
  local_mocked_bindings(interactive = function(...) FALSE)
  library(ellmer)

  dataset <- tibble::tibble(
    input = "What does this image show?",
    target = "The image shows a bike."
  )

  image_solver <- function(
    inputs,
    solver_chat = chat_claude(model = "claude-sonnet-4-5-20250929")
  ) {
    image_file <- system.file("test/x.png", package = "vitals")
    ch <- solver_chat$clone()
    ch$chat(inputs[1], content_image_file(image_file), echo = FALSE)
    list(result = ch$last_turn()@text, solver_chat = list(ch))
  }

  tsk <- Task$new(
    dataset = dataset,
    solver = image_solver,
    scorer = model_graded_qa()
  )

  tsk$eval()
  expect_valid_log(tsk$log())
  expect_true(any(grepl("data:image", readLines(tsk$log()), fixed = TRUE)))
})

test_that("translate_to_messages handles remote image URLs", {
  skip_on_cran()
  key_get("ANTHROPIC_API_KEY")
  tmp_dir <- withr::local_tempdir()
  withr::local_envvar(list(VITALS_LOG_DIR = tmp_dir))
  withr::local_options(cli.default_handler = function(...) {})
  local_mocked_bindings(interactive = function(...) FALSE)
  library(ellmer)

  dataset <- tibble::tibble(
    input = "What does this image show?",
    target = "The R logo."
  )

  image_url_solver <- function(
    inputs,
    solver_chat = chat_claude(model = "claude-sonnet-4-5-20250929")
  ) {
    ch <- solver_chat$clone()
    ch$chat(
      inputs[1],
      content_image_url("https://www.r-project.org/Rlogo.png"),
      echo = FALSE
    )
    list(result = ch$last_turn()@text, solver_chat = list(ch))
  }

  tsk <- Task$new(
    dataset = dataset,
    solver = image_url_solver,
    scorer = model_graded_qa()
  )

  tsk$eval()
  log_path <- tsk$log()
  expect_valid_log(log_path)

  log_content <- readLines(log_path)
  expect_true(any(grepl("data:image", log_content, fixed = TRUE)))
  expect_false(any(grepl("https://www.r-project.org", log_content, fixed = TRUE)))
})


test_that("logs including system prompts are compatible with inspect", {
  vcr::local_cassette("translate-messages-system-prompts")
  key_get("OPENAI_API_KEY")
  tmp_dir <- withr::local_tempdir()
  withr::local_envvar(list(VITALS_LOG_DIR = tmp_dir))
  withr::local_options(cli.default_handler = function(...) {})
  local_mocked_bindings(interactive = function(...) FALSE)

  library(ellmer)

  simple_addition <- tibble::tibble(
    input = c("What's 2+2?", "What's 2+3?"),
    target = c("4", "5")
  )

  tsk <- Task$new(
    dataset = simple_addition,
    solver = generate(chat_openai(
      system_prompt = "Be terse.",
      model = "gpt-4.1-nano"
    )),
    scorer = model_graded_qa()
  )

  tsk$eval()
  expect_valid_log(tsk$log())
  expect_true(any(grepl("Be terse", readLines(tsk$log()))))
})