#' LLM integration tests for agenticr
#'
#' These tests require a configured API key (via agentic_config or env vars).
#' They are skipped if no API key is available.
#'
#' @keywords internal

has_api_key <- function() {
  if (nchar(Sys.getenv("AGENTICR_API_KEY", unset = "")) > 0) return(TRUE)
  if (nchar(Sys.getenv("DEEPSEEK_API_KEY", unset = "")) > 0) return(TRUE)
  tryCatch({
    cfg <- get_api_config()
    if (identical(cfg$provider, "local")) {
      resp <- httr::GET("http://localhost:11434/api/tags", httr::timeout(5))
      return(httr::status_code(resp) == 200)
    }
    if (identical(cfg$provider, "custom") && nchar(cfg$base_url) > 0) {
      # custom provider: verify endpoint is reachable
      return(httr::status_code(
        httr::GET(paste0(cfg$base_url, "/models"), httr::timeout(2))) == 200)
    }
    # All other providers require a valid API key
    nchar(cfg$api_key) > 0
  }, error = function(e) FALSE)
}

skip_if_no_api <- function() {
  if (!has_api_key()) {
    testthat::skip("No API key configured")
  }
  key <- Sys.getenv("AGENTICR_API_KEY", unset = Sys.getenv("DEEPSEEK_API_KEY", unset = ""))
  if (nchar(key) > 0) {
    tryCatch(
      agentic_config(api_key = key, save = FALSE),
      error = function(e) NULL
    )
  }
}

# ============================================================================
# Simple NL queries — structural verification
# ============================================================================

test_that("LLM: simple calculation returns numeric result", {
  skip_if_no_api()
  messages <- list(list(role = "system", content = SYSTEM_PROMPT))
  messages <- c(messages, list(list(role = "user", content = "what is 2 + 2? just the number")))
  resp <- chat_completion(messages, tools = get_tool_definitions())
  content <- resp$choices[[1]]$message$content
  expect_true(!is.null(content) || !is.null(resp$choices[[1]]$message$tool_calls))
})

test_that("LLM: NL query about mtcars uses tools", {
  skip_if_no_api()
  messages <- list(list(role = "system", content = SYSTEM_PROMPT))
  messages <- c(messages, list(list(role = "user", content = "what is the mean of mpg in mtcars?")))
  resp <- chat_completion(messages, tools = get_tool_definitions())
  msg <- resp$choices[[1]]$message
  has_tools <- !is.null(msg$tool_calls) && length(msg$tool_calls) > 0
  has_content <- !is.null(msg$content) && nchar(msg$content) > 0
  expect_true(has_tools || has_content)
})

test_that("LLM: data inspection query triggers get_dataframe_info", {
  skip_if_no_api()
  messages <- list(list(role = "system", content = SYSTEM_PROMPT))
  messages <- c(messages, list(list(role = "user", content = "show me the structure of the mtcars dataset")))
  resp <- chat_completion(messages, tools = get_tool_definitions())
  msg <- resp$choices[[1]]$message
  tool_names <- character(0)
  if (!is.null(msg$tool_calls)) {
    for (tc in msg$tool_calls) {
      tool_names <- c(tool_names, tc$`function`$name)
    }
  }
  expect_true("get_dataframe_info" %in% tool_names || !is.null(msg$content))
})

test_that("LLM: plot request generates R code", {
  skip_if_no_api()
  messages <- list(list(role = "system", content = SYSTEM_PROMPT))
  messages <- c(messages, list(list(role = "user", content = "make a histogram of mpg from mtcars")))
  resp <- chat_completion(messages, tools = get_tool_definitions())
  msg <- resp$choices[[1]]$message
  tool_names <- character(0)
  if (!is.null(msg$tool_calls)) {
    for (tc in msg$tool_calls) {
      tool_names <- c(tool_names, tc$`function`$name)
    }
  }
  content_lower <- tolower(if (is.null(msg$content)) "" else msg$content)
  has_hist <- grepl("hist", content_lower) || "execute_r_code" %in% tool_names
  expect_true(has_hist)
})

# ============================================================================
# Tool execution verification
# ============================================================================

test_that("LLM: execute_r_code tool runs R code", {
  skip_if_no_api()
  messages <- list(list(role = "system", content = SYSTEM_PROMPT))
  messages <- c(messages, list(list(role = "user", content = "run: mean(c(1,2,3,4,5))")))
  resp <- chat_completion(messages, tools = get_tool_definitions())
  msg <- resp$choices[[1]]$message
  if (!is.null(msg$tool_calls) && length(msg$tool_calls) > 0) {
    tc <- msg$tool_calls[[1]]
    result <- execute_tool(tc$`function`$name,
      jsonlite::fromJSON(tc$`function`$arguments, simplifyVector = FALSE))
    expect_match(result, "3", all = FALSE)
  } else if (!is.null(msg$content)) {
    expect_match(msg$content, "3", all = FALSE)
  } else {
    fail("Response had no tool calls and no content")
  }
})

test_that("LLM: search_variables finds mtcars", {
  skip_if_no_api()
  result <- tool_search_variables("mtcars")
  expect_match(result, "mtcars")
})

test_that("LLM: get_dataframe_info works on mtcars", {
  skip_if_no_api()
  result <- tool_get_dataframe_info("mtcars")
  expect_match(result, "Data frame: mtcars")
  expect_match(result, "32 rows")
  expect_match(result, "mpg")
})

# ============================================================================
# Error repair — agent fixes broken R code
# ============================================================================

test_that("LLM: agent fixes missing function error", {
  skip_if_no_api()
  messages <- list(list(role = "system", content = SYSTEM_PROMPT))
  messages <- c(messages, list(list(role = "user",
    content = "the user typed: meen(mtcars$mpg)\nThe error: could not find function \"meen\"\nFix it")))
  resp <- chat_completion(messages, tools = get_tool_definitions())
  msg <- resp$choices[[1]]$message
  if (!is.null(msg$tool_calls) && length(msg$tool_calls) > 0) {
    tc <- msg$tool_calls[[1]]
    if (tc$`function`$name == "execute_r_code") {
      args <- jsonlite::fromJSON(tc$`function`$arguments, simplifyVector = FALSE)
      expect_match(args$code, "mean", ignore.case = TRUE)
    } else {
      fail(paste("Expected execute_r_code, got", tc$`function`$name))
    }
  } else if (!is.null(msg$content)) {
    expect_match(msg$content, "mean", ignore.case = TRUE)
  } else {
    fail("No tool calls and no content in response")
  }
})

test_that("LLM: agent shows first 6 rows of a dataset", {
  skip_if_no_api()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(
    process_with_agent("show the first 6 rows of mtcars"),
    NA
  )
  conv <- agenticr_env$conversation
  expect_true(length(conv) > 0)
  # Result must contain actual mtcars data, not just a function name
  msgs <- Filter(function(m) m$role == "tool", conv)
  expect_true(length(msgs) > 0)
  combined <- paste(sapply(msgs, `[[`, "content"), collapse = " ")
  expect_true(grepl("Mazda|mpg|21\\.0|Hornet", combined))
})

# ============================================================================
# Statistical analysis
# ============================================================================

test_that("LLM: t-test request generates correct analysis", {
  skip_if_no_api()
  skip_on_cran()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(
    process_with_agent("run a t-test comparing mpg between 4 and 8 cylinder cars in mtcars"),
    NA
  )
  conv <- agenticr_env$conversation
  expect_true(length(conv) > 0)
  combined <- paste(sapply(conv, function(m) paste(m$content %||% "", collapse = " ")), collapse = " ")
  expect_true(grepl("t\\.test|t-test|t test", combined, ignore.case = TRUE))
})

test_that("LLM: correlation request produces result", {
  skip_if_no_api()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(
    process_with_agent("what is the correlation between mpg and hp in mtcars?"),
    NA
  )
  conv <- agenticr_env$conversation
  expect_true(length(conv) > 0)
  combined <- paste(sapply(conv, function(m) paste(m$content %||% "", collapse = " ")), collapse = " ")
  expect_true(grepl("cor|0\\.77|-0\\.78|lm\\(", combined, ignore.case = TRUE))
})

# ============================================================================
# Multi-turn conversation — context persistence
# ============================================================================

test_that("LLM: conversation context persists across turns", {
  skip_if_no_api()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  agenticr:::process_with_agent("what columns does mtcars have? just list column names briefly")
  conv1_len <- length(agenticr_env$conversation)
  expect_true(conv1_len > 0)

  agenticr:::process_with_agent("now show the mean of the first column you listed")
  conv2_len <- length(agenticr_env$conversation)
  expect_true(conv2_len >= conv1_len)
})

test_that("LLM: multi-step analysis across turns", {
  skip_if_no_api()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(agenticr:::process_with_agent("look at the mtcars dataset structure"), NA)
  expect_error(agenticr:::process_with_agent("what is the average mpg for each cylinder group?"), NA)
  expect_error(agenticr:::process_with_agent("make a bar chart of those averages"), NA)
})

# ============================================================================
# Data transformation
# ============================================================================

test_that("LLM: group-by summarise produces result", {
  skip_if_no_api()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(
    process_with_agent("calculate mean mpg grouped by cyl in mtcars"),
    NA
  )
  conv <- agenticr_env$conversation
  expect_true(length(conv) > 0)
  combined <- paste(sapply(conv, function(m) paste(m$content %||% "", collapse = " ")), collapse = " ")
  expect_true(grepl("26\\.|aggregate|group_by|tapply|by\\(", combined, ignore.case = TRUE))
})

test_that("LLM: filter request produces result", {
  skip_if_no_api()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(
    process_with_agent("show me cars in mtcars with mpg greater than 20"),
    NA
  )
  conv <- agenticr_env$conversation
  expect_true(length(conv) > 0)
  combined <- paste(sapply(conv, function(m) paste(m$content %||% "", collapse = " ")), collapse = " ")
  expect_true(grepl("Mazda|Merc|Hornet|filter|subset|\\[.*mpg", combined, ignore.case = TRUE))
})

# ============================================================================
# Sanitization keeps message chains valid
# ============================================================================

test_that("LLM: tool_calls/tool pairing stays valid across turns", {
  skip_if_no_api()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(
    agenticr:::process_with_agent("what is the mean of mpg by cylinder in mtcars?"),
    NA
  )
  expect_error(
    agenticr:::process_with_agent("now show the count of cars per cylinder"),
    NA
  )
})

# ============================================================================
# Error-loop detection — integration test
# ============================================================================

test_that("LLM: agent completes repo-analysis task without silent hang", {
  skip_if_no_api()
  skip_on_cran()
  agenticr_env$context_injected <- FALSE
  agenticr_env$stable_summary <- NULL
  agenticr_env$conversation <- list()
  agenticr_env$ask_permission <- function(prompt) FALSE

  expect_error(
    agenticr:::process_with_agent(
      "read the tests/testthat/test-llm.R file, summarize what it tests, and list 3 area of improvements. do not edit code."
    ),
    NA
  )
  conv <- agenticr_env$conversation
  expect_true(length(conv) > 0)
  # Verify the conversation has assistant content (not just errors)
  msgs <- Filter(function(m) m$role == "assistant" && nchar(m$content %||% "") > 20, conv)
  expect_true(length(msgs) > 0)
})