# Tests for DeepSeek Provider library(testthat) library(aisdk) # Load helper functions (for environment variable handling) helper_path <- file.path(test_path("helper-env.R")) source(helper_path) deepseek_model <- Sys.getenv("DEEPSEEK_MODEL", "deepseek-chat") test_that("create_deepseek() creates a provider with correct defaults", { # Use safe provider creation provider <- safe_create_provider(create_deepseek) expect_s3_class(provider, "DeepSeekProvider") expect_equal(provider$specification_version, "v1") }) test_that("DeepSeek provider creates language model correctly", { provider <- safe_create_provider(create_deepseek) model <- provider$language_model(deepseek_model) expect_s3_class(model, "DeepSeekLanguageModel") expect_equal(model$model_id, deepseek_model) expect_equal(model$provider, "deepseek") expect_equal(model$specification_version, "v1") }) test_that("DeepSeek provider uses default model when none specified", { # Isolate from a user's local .Renviron override of DEEPSEEK_MODEL so the # test asserts the *built-in* default, not whatever the developer set. withr::with_envvar(c(DEEPSEEK_MODEL = NA), { provider <- safe_create_provider(create_deepseek) model <- provider$language_model() expect_s3_class(model, "DeepSeekLanguageModel") # Default is deepseek-chat expect_equal(model$model_id, "deepseek-chat") }) }) test_that("DeepSeek v4 models are marked as reasoning-capable", { provider <- safe_create_provider(create_deepseek) for (model_id in c("deepseek-v4", "deepseek-v4-flash", "deepseek-v4-pro")) { model <- provider$language_model(model_id) expect_true(isTRUE(model$capabilities$is_reasoning_model), info = model_id) expect_true(isTRUE(model$capabilities$reasoning), info = model_id) } }) test_that("DeepSeek provider forwards thinking-mode parameters", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-v4") payload <- model$build_payload(list( messages = list(list(role = "user", content = "Hello")), thinking = TRUE, thinking_budget = 2048, reasoning_effort = "low", max_tokens = 1000, temperature = 0.2, top_p = 0.9, presence_penalty = 0.1, frequency_penalty = 0.2 )) # logical TRUE is auto-converted to DeepSeek API format expect_equal(payload$body$thinking, list(type = "enabled")) expect_equal(payload$body$thinking_budget, 2048) expect_equal(payload$body$reasoning_effort, "high") expect_equal(payload$body$max_completion_tokens, 1000) expect_null(payload$body$max_tokens) expect_null(payload$body$temperature) expect_null(payload$body$top_p) expect_null(payload$body$presence_penalty) expect_null(payload$body$frequency_penalty) }) test_that("DeepSeek reasoning_effort maps to supported thinking-mode levels", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-v4") low_payload <- model$build_payload(list( messages = list(list(role = "user", content = "Hello")), reasoning_effort = "medium" )) max_payload <- model$build_payload(list( messages = list(list(role = "user", content = "Hello")), reasoning_effort = "xhigh" )) expect_equal(low_payload$body$reasoning_effort, "high") expect_equal(max_payload$body$reasoning_effort, "max") }) test_that("DeepSeek chat model keeps sampling params unless thinking is enabled", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-chat") plain_payload <- model$build_payload(list( messages = list(list(role = "user", content = "Hello")), temperature = 0.2 )) thinking_payload <- model$build_payload(list( messages = list(list(role = "user", content = "Hello")), thinking = TRUE, temperature = 0.2 )) expect_equal(plain_payload$body$temperature, 0.2) expect_null(thinking_payload$body$temperature) }) test_that("DeepSeek thinking parameter accepts native API format", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-v4") payload <- model$build_payload(list( messages = list(list(role = "user", content = "Hello")), thinking = list(type = "enabled"), max_tokens = 1000 )) # native list format is passed through as-is expect_equal(payload$body$thinking, list(type = "enabled")) }) test_that("DeepSeek thinking_budget does not partially match thinking", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-v4") payload <- model$build_payload(list( messages = list(list(role = "user", content = "Hello")), thinking_budget = 2048 )) expect_null(aisdk:::list_get_exact(payload$body, "thinking")) expect_equal(payload$body$thinking_budget, 2048) }) test_that("DeepSeek stream payload forwards thinking-mode parameters", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-v4") payload <- model$build_stream_payload(list( messages = list(list(role = "user", content = "Hello")), thinking = FALSE, thinking_budget = 512, reasoning_effort = "medium" )) # logical FALSE is auto-converted to DeepSeek API format expect_equal(payload$body$thinking, list(type = "disabled")) expect_equal(payload$body$thinking_budget, 512) expect_equal(payload$body$reasoning_effort, "high") }) test_that("DeepSeek tool turns preserve reasoning_content when thinking is enabled", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-v4-flash") test_tool <- Tool$new( name = "get_time", description = "Get the current time", parameters = z_object(.dummy = z_string("Unused")), execute = function(args) "12:00" ) calls <- 0 captured_bodies <- list() testthat::local_mocked_bindings( post_to_api = function(url, headers, body, ...) { calls <<- calls + 1 captured_bodies[[calls]] <<- body if (calls == 1) { return(list( choices = list(list( message = list( content = "", reasoning_content = "Need the time tool.", tool_calls = list(list( id = "call_1", type = "function", `function` = list( name = "get_time", arguments = "{\".dummy\":\"unused\"}" ) )) ), finish_reason = "tool_calls" )), usage = list(prompt_tokens = 8, completion_tokens = 4, total_tokens = 12) )) } list( choices = list(list( message = list( content = "The current time is 12:00.", reasoning_content = NULL ), finish_reason = "stop" )), usage = list(prompt_tokens = 20, completion_tokens = 6, total_tokens = 26) ) }, .package = "aisdk" ) result <- generate_text( model, "What time is it?", tools = list(test_tool), max_steps = 2, thinking = TRUE, max_tokens = 100 ) expect_equal(result$text, "The current time is 12:00.") expect_equal(calls, 2) assistant_message <- captured_bodies[[2]]$messages[[2]] expect_equal(assistant_message$role, "assistant") expect_equal(assistant_message$reasoning_content, "Need the time tool.") expect_length(assistant_message$tool_calls, 1) expect_equal(result$messages_added[[1]]$reasoning_content, "Need the time tool.") expect_equal(result$messages_added[[2]]$role, "tool") expect_equal(result$messages_added[[3]]$role, "assistant") expect_equal(result$messages_added[[3]]$content, "The current time is 12:00.") }) test_that("DeepSeek ChatSession persists tool-turn reasoning_content for later replay", { provider <- suppressWarnings(create_deepseek(api_key = "test-key")) model <- provider$language_model("deepseek-v4-flash") test_tool <- Tool$new( name = "get_time", description = "Get the current time", parameters = z_object(.dummy = z_string("Unused")), execute = function(args) "12:00" ) calls <- 0 captured_bodies <- list() testthat::local_mocked_bindings( post_to_api = function(url, headers, body, ...) { calls <<- calls + 1 captured_bodies[[calls]] <<- body if (calls == 1) { return(list( choices = list(list( message = list( content = "", reasoning_content = "Need the time tool.", tool_calls = list(list( id = "call_1", type = "function", `function` = list( name = "get_time", arguments = "{\".dummy\":\"unused\"}" ) )) ), finish_reason = "tool_calls" )), usage = list(prompt_tokens = 8, completion_tokens = 4, total_tokens = 12) )) } if (calls == 2) { return(list( choices = list(list( message = list( content = "The current time is 12:00.", reasoning_content = NULL ), finish_reason = "stop" )), usage = list(prompt_tokens = 20, completion_tokens = 6, total_tokens = 26) )) } list( choices = list(list( message = list( content = "Earlier tool context is intact.", reasoning_content = NULL ), finish_reason = "stop" )), usage = list(prompt_tokens = 30, completion_tokens = 5, total_tokens = 35) ) }, .package = "aisdk" ) session <- create_chat_session( model = model, tools = list(test_tool), max_steps = 3 ) result <- session$send("What time is it?", thinking = TRUE, max_tokens = 100) expect_equal(result$text, "The current time is 12:00.") history <- session$get_history() expect_equal(vapply(history, `[[`, character(1), "role"), c("user", "assistant", "tool", "assistant")) expect_equal(history[[2]]$reasoning_content, "Need the time tool.") expect_length(history[[2]]$tool_calls, 1) session$send("Can you still answer?", thinking = TRUE, max_tokens = 100) replayed_assistant <- captured_bodies[[3]]$messages[[2]] expect_equal(replayed_assistant$role, "assistant") expect_equal(replayed_assistant$reasoning_content, "Need the time tool.") expect_length(replayed_assistant$tool_calls, 1) }) test_that("create_deepseek() accepts custom base_url", { provider <- safe_create_provider(create_deepseek, base_url = "https://custom.deepseek.com" ) model <- provider$language_model(deepseek_model) # Model should be created successfully expect_s3_class(model, "DeepSeekLanguageModel") }) test_that("create_deepseek() warns when API key is missing", { # Temporarily unset API key old_key <- Sys.getenv("DEEPSEEK_API_KEY") Sys.setenv(DEEPSEEK_API_KEY = "") on.exit(Sys.setenv(DEEPSEEK_API_KEY = old_key)) expect_warning( create_deepseek(), "DeepSeek API key not set" ) }) test_that("DeepSeek provider forwards timeout_seconds to OpenAI-compatible requests", { provider <- suppressWarnings(create_deepseek(api_key = "test-key", timeout_seconds = 600)) model <- provider$language_model("deepseek-chat") captured_timeout <- NULL testthat::local_mocked_bindings( post_to_api = function(url, headers, body, timeout_seconds = NULL, ...) { captured_timeout <<- timeout_seconds list( choices = list(list( message = list( content = "ok", reasoning_content = NULL ), finish_reason = "stop" )), usage = list(prompt_tokens = 1, completion_tokens = 1, total_tokens = 2) ) }, .package = "aisdk" ) result <- model$do_generate(list( messages = list(list(role = "user", content = "Hello")) )) expect_equal(result$text, "ok") expect_equal(captured_timeout, 600) }) # Live API tests (only run when API key is available) test_that("DeepSeek provider can make real API calls", { skip_if_no_api_key("DeepSeek") skip_on_cran() provider <- create_deepseek() model <- provider$language_model("deepseek-chat") # Make a simple API call result <- model$generate( messages = list( list(role = "user", content = "Say 'Hello, World!'") ), max_tokens = 10 ) # Check that we got a response expect_true(!is.null(result$text)) expect_true(nchar(result$text) > 0) }) test_that("DeepSeek reasoner model returns reasoning content", { skip_if_no_api_key("DeepSeek") skip_on_cran() provider <- create_deepseek() model <- provider$language_model("deepseek-reasoner") # Make a call that should trigger reasoning result <- model$generate( messages = list( list(role = "user", content = "What is 15 * 23? Think step by step.") ), max_tokens = 500 ) # Check that we got a response expect_true(!is.null(result$text)) expect_true(nchar(result$text) > 0) # Reasoning content should be present for deepseek-reasoner # Note: This may be NULL if the model doesn't return reasoning for simple queries # So we just check that the field exists expect_true("reasoning" %in% names(result)) }) test_that("DeepSeek provider handles tool calls", { skip_if_no_api_key("DeepSeek") skip_on_cran() provider <- create_deepseek() model <- provider$language_model("deepseek-chat") # Create a simple test tool test_tool <- Tool$new( name = "get_time", description = "Get the current time", parameters = z_object(.dummy = z_string("Unused")), execute = function(args) { paste0("Current time: ", Sys.time()) } ) # Call model with tool result <- model$generate( messages = list( list(role = "user", content = "What time is it?") ), tools = list(test_tool), max_tokens = 50 ) # Check response expect_true(!is.null(result$text) || !is.null(result$tool_calls)) }) # ============================================================================ # DeepSeek Anthropic API Tests # ============================================================================ test_that("create_deepseek_anthropic() creates an Anthropic provider with DeepSeek config", { # Use safe provider creation provider <- safe_create_provider(create_deepseek_anthropic) expect_s3_class(provider, "AnthropicProvider") expect_equal(provider$specification_version, "v1") }) test_that("DeepSeek Anthropic provider creates language model correctly", { provider <- safe_create_provider(create_deepseek_anthropic) model <- provider$language_model("deepseek-chat") expect_s3_class(model, "AnthropicLanguageModel") expect_equal(model$model_id, "deepseek-chat") expect_equal(model$provider, "deepseek") }) test_that("create_deepseek_anthropic() warns when API key is missing", { # Temporarily unset API key old_key <- Sys.getenv("DEEPSEEK_API_KEY") old_anthropic_key <- Sys.getenv("ANTHROPIC_API_KEY") Sys.setenv(DEEPSEEK_API_KEY = "") Sys.setenv(ANTHROPIC_API_KEY = "") on.exit({ Sys.setenv(DEEPSEEK_API_KEY = old_key) Sys.setenv(ANTHROPIC_API_KEY = old_anthropic_key) }) expect_warning( create_deepseek_anthropic(), "Anthropic API key not set" ) })