testthat::test_that("build_gemini_batch_requests builds valid requests", {
data("example_writing_samples", package = "pairwiseLLM")
pairs <- make_pairs(example_writing_samples)
pairs <- pairs[1:2, ]
td <- trait_description("overall_quality")
tmpl <- set_prompt_template()
batch <- build_gemini_batch_requests(
pairs = pairs,
model = "gemini-3-pro-preview",
trait_name = td$name,
trait_description = td$description,
prompt_template = tmpl,
thinking_level = "low"
)
testthat::expect_s3_class(batch, "tbl_df")
testthat::expect_equal(nrow(batch), 2L)
testthat::expect_true(all(c("custom_id", "request") %in% names(batch)))
# Basic structure checks on first request
r1 <- batch$request[[1]]
testthat::expect_true(is.list(r1$contents))
testthat::expect_true(is.list(r1$generationConfig))
# User message should contain SAMPLE_1 / SAMPLE_2 labels in the text
msg1 <- r1$contents[[1]]
testthat::expect_equal(msg1$role, "user")
parts <- msg1$parts
testthat::expect_true(is.list(parts))
text_block <- parts[[1]]$text
# We now just require that the labels SAMPLE_1 / SAMPLE_2 appear somewhere,
# not necessarily wrapped in angle brackets.
testthat::expect_true(grepl("SAMPLE_1", text_block, fixed = TRUE))
testthat::expect_true(grepl("SAMPLE_2", text_block, fixed = TRUE))
})
testthat::test_that("parse_gemini_batch_output handles succeeded and errored
results", {
tmp <- tempfile(fileext = ".jsonl")
on.exit(unlink(tmp), add = TRUE)
# Succeeded result line, similar in spirit to live responses
succ_resp <- list(
model = "gemini-3-pro-preview",
candidates = list(
list(
content = list(
parts = list(
list(
text = "SAMPLE_2 Hello!"
)
)
)
)
),
usageMetadata = list(
promptTokenCount = 10L,
candidatesTokenCount = 5L,
totalTokenCount = 15L
)
)
line_ok <- list(
custom_id = "GEM_S01_vs_S02",
result = list(
type = "succeeded",
response = succ_resp
)
)
# Errored result line
line_err <- list(
custom_id = "GEM_S03_vs_S04",
result = list(
type = "errored",
error = list(
code = 400L,
message = "Validation error",
status = "INVALID_ARGUMENT"
)
)
)
json_lines <- c(
jsonlite::toJSON(line_ok, auto_unbox = TRUE),
jsonlite::toJSON(line_err, auto_unbox = TRUE)
)
writeLines(json_lines, con = tmp, useBytes = TRUE)
# New API: parse_gemini_batch_output() expects a requests_tbl with
# custom_id / ID1 / ID2 in the same order as the requests.
requests_tbl <- tibble::tibble(
custom_id = c("GEM_S01_vs_S02", "GEM_S03_vs_S04"),
ID1 = c("S01", "S03"),
ID2 = c("S02", "S04")
)
res <- parse_gemini_batch_output(
results_path = tmp,
requests_tbl = requests_tbl
)
testthat::expect_s3_class(res, "tbl_df")
testthat::expect_equal(nrow(res), 2L)
# First row: succeeded
r1 <- res[1, ]
testthat::expect_equal(r1$custom_id, "GEM_S01_vs_S02")
testthat::expect_equal(r1$ID1, "S01")
testthat::expect_equal(r1$ID2, "S02")
testthat::expect_equal(r1$result_type, "succeeded")
testthat::expect_equal(r1$status_code, 200L)
testthat::expect_true(is.na(r1$error_message))
testthat::expect_equal(r1$model, "gemini-3-pro-preview")
testthat::expect_equal(r1$better_sample, "SAMPLE_2")
testthat::expect_equal(r1$better_id, "S02")
testthat::expect_equal(r1$prompt_tokens, 10)
testthat::expect_equal(r1$completion_tokens, 5)
testthat::expect_equal(r1$total_tokens, 15)
# Second row: errored
r2 <- res[2, ]
testthat::expect_equal(r2$custom_id, "GEM_S03_vs_S04")
testthat::expect_equal(r2$ID1, "S03")
testthat::expect_equal(r2$ID2, "S04")
testthat::expect_equal(r2$result_type, "errored")
testthat::expect_true(is.na(r2$status_code))
testthat::expect_match(r2$error_message, "Validation error")
testthat::expect_true(is.na(r2$content))
testthat::expect_true(is.na(r2$better_sample))
testthat::expect_true(is.na(r2$better_id))
})
testthat::test_that("parse_gemini_batch_output handles invalid JSON lines
gracefully", {
tmp <- tempfile(fileext = ".jsonl")
on.exit(unlink(tmp), add = TRUE)
writeLines("not-json", con = tmp, useBytes = TRUE)
# Even for invalid JSON, we now must pass a requests_tbl; IDs here are dummies.
requests_tbl <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02"
)
res <- parse_gemini_batch_output(
results_path = tmp,
requests_tbl = requests_tbl
)
testthat::expect_equal(nrow(res), 1L)
testthat::expect_true(is.na(res$custom_id))
testthat::expect_match(res$error_message, "Failed to parse JSON line")
})
testthat::test_that("run_gemini_batch_pipeline works with polling and parsing
(mocked)", {
pairs <- tibble::tibble(
ID1 = "S01",
text1 = "Text 1",
ID2 = "S02",
text2 = "Text 2"
)
# New requests_tbl shape: custom_id + ID1 + ID2 + request
fake_req_tbl <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02",
request = list(list(dummy = TRUE))
)
fake_batch_initial <- list(
name = "batches/123",
metadata = list(state = "JOB_STATE_RUNNING")
)
fake_batch_final <- list(
name = "batches/123",
metadata = list(state = "JOB_STATE_SUCCEEDED")
)
fake_results <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02",
model = "gemini-3-pro-preview",
object_type = "generateContent",
status_code = 200L,
result_type = "succeeded",
error_message = NA_character_,
content = "SAMPLE_1",
better_sample = "SAMPLE_1",
better_id = "S01",
prompt_tokens = 10,
completion_tokens = 5,
total_tokens = 15
)
created_batch_name <- NULL
polled_batch_name <- NULL
download_batch_obj <- NULL
parsed_path <- NULL
td <- list(name = "Overall quality", description = "Quality")
tmpl <- set_prompt_template()
testthat::with_mocked_bindings(
build_gemini_batch_requests = function(pairs, model, trait_name,
trait_description,
prompt_template, thinking_level,
...) {
fake_req_tbl
},
gemini_create_batch = function(requests, model, api_key, api_version,
display_name = NULL) {
created_batch_name <<- "batches/123"
fake_batch_initial
},
gemini_poll_batch_until_complete = function(batch_name, interval_seconds,
timeout_seconds, api_key,
api_version, verbose) {
polled_batch_name <<- batch_name
fake_batch_final
},
gemini_download_batch_results = function(batch, requests_tbl, output_path,
api_key, api_version) {
download_batch_obj <<- batch
# Write a dummy .jsonl file so that parse_* can read it
writeLines('{"dummy": true}', con = output_path)
invisible(output_path)
},
# New signature: parse_gemini_batch_output(results_path, requests_tbl)
parse_gemini_batch_output = function(results_path, requests_tbl) {
parsed_path <<- results_path
fake_results
},
{
res <- run_gemini_batch_pipeline(
pairs = pairs,
model = "gemini-3-pro-preview",
trait_name = td$name,
trait_description = td$description,
prompt_template = tmpl,
thinking_level = "low",
interval_seconds = 0,
timeout_seconds = 10,
verbose = FALSE
)
testthat::expect_equal(created_batch_name, "batches/123")
testthat::expect_equal(polled_batch_name, "batches/123")
testthat::expect_identical(download_batch_obj, fake_batch_final)
testthat::expect_true(file.exists(res$batch_input_path))
testthat::expect_true(file.exists(res$batch_output_path))
testthat::expect_true(file.exists(parsed_path))
# Return structure should mirror other batch pipelines
testthat::expect_true(all(c(
"batch_input_path", "batch_output_path", "file", "batch", "results"
) %in% names(res)))
testthat::expect_null(res$file)
testthat::expect_equal(res$batch$metadata$state, "JOB_STATE_SUCCEEDED")
testthat::expect_equal(res$results$better_id, "S01")
}
)
})
testthat::test_that("run_gemini_batch_pipeline does not poll or parse when
poll = FALSE", {
pairs <- tibble::tibble(
ID1 = "S01",
text1 = "Text 1",
ID2 = "S02",
text2 = "Text 2"
)
fake_req_tbl <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02",
request = list(list(dummy = TRUE))
)
fake_batch_initial <- list(
name = "batches/123",
metadata = list(state = "JOB_STATE_RUNNING")
)
poll_called <- FALSE
download_called <- FALSE
parse_called <- FALSE
td <- list(name = "Overall quality", description = "Quality")
tmpl <- set_prompt_template()
testthat::with_mocked_bindings(
build_gemini_batch_requests = function(pairs, model, trait_name,
trait_description,
prompt_template, thinking_level,
...) {
fake_req_tbl
},
gemini_create_batch = function(requests, model, api_key, api_version,
display_name = NULL) {
fake_batch_initial
},
gemini_poll_batch_until_complete = function(batch_name, interval_seconds,
timeout_seconds, api_key,
api_version, verbose) {
poll_called <<- TRUE
stop("Polling should not be called when poll = FALSE")
},
gemini_download_batch_results = function(batch, requests_tbl, output_path,
api_key, api_version) {
download_called <<- TRUE
stop("Download should not be called when poll = FALSE")
},
# New signature: parse_gemini_batch_output(results_path, requests_tbl)
parse_gemini_batch_output = function(results_path, requests_tbl) {
parse_called <<- TRUE
stop("Parse should not be called when poll = FALSE")
},
{
res <- run_gemini_batch_pipeline(
pairs = pairs,
model = "gemini-3-pro-preview",
trait_name = td$name,
trait_description = td$description,
prompt_template = tmpl,
thinking_level = "low",
poll = FALSE
)
testthat::expect_false(poll_called)
testthat::expect_false(download_called)
testthat::expect_false(parse_called)
testthat::expect_true(file.exists(res$batch_input_path))
testthat::expect_null(res$batch_output_path)
testthat::expect_null(res$results)
# Standardised return structure
testthat::expect_true(all(c(
"batch_input_path", "batch_output_path", "file", "batch", "results"
) %in% names(res)))
testthat::expect_null(res$file)
testthat::expect_equal(res$batch$metadata$state, "JOB_STATE_RUNNING")
}
)
})
# tests/testthat/test-gemini-batch-api.R
# ------------------------------------------------------------------------------
# build_gemini_batch_requests
# ------------------------------------------------------------------------------
testthat::test_that("build_gemini_batch_requests builds valid requests", {
data("example_writing_samples", package = "pairwiseLLM")
pairs <- make_pairs(example_writing_samples)
pairs <- pairs[1:2, ]
td <- trait_description("overall_quality")
tmpl <- set_prompt_template()
batch <- build_gemini_batch_requests(
pairs = pairs,
model = "gemini-3-pro-preview",
trait_name = td$name,
trait_description = td$description,
prompt_template = tmpl,
thinking_level = "low"
)
testthat::expect_s3_class(batch, "tbl_df")
testthat::expect_equal(nrow(batch), 2L)
testthat::expect_true(all(c("custom_id", "request") %in% names(batch)))
# Basic structure checks on first request
r1 <- batch$request[[1]]
testthat::expect_true(is.list(r1$contents))
testthat::expect_true(is.list(r1$generationConfig))
# User message should contain SAMPLE_1 / SAMPLE_2 labels in the text
msg1 <- r1$contents[[1]]
testthat::expect_equal(msg1$role, "user")
parts <- msg1$parts
testthat::expect_true(is.list(parts))
text_block <- parts[[1]]$text
# We now just require that the labels SAMPLE_1 / SAMPLE_2 appear somewhere,
# not necessarily wrapped in angle brackets.
testthat::expect_true(grepl("SAMPLE_1", text_block, fixed = TRUE))
testthat::expect_true(grepl("SAMPLE_2", text_block, fixed = TRUE))
})
# ------------------------------------------------------------------------------
# parse_gemini_batch_output: normal + error-shaped lines
# ------------------------------------------------------------------------------
testthat::test_that(
"parse_gemini_batch_output handles succeeded and errored results",
{
tmp <- tempfile(fileext = ".jsonl")
on.exit(unlink(tmp), add = TRUE)
# Succeeded result line, similar in spirit to live responses
succ_resp <- list(
model = "gemini-3-pro-preview",
candidates = list(
list(
content = list(
parts = list(
list(
text = "SAMPLE_2 Hello!"
)
)
)
)
),
usageMetadata = list(
promptTokenCount = 10L,
candidatesTokenCount = 5L,
totalTokenCount = 15L
)
)
line_ok <- list(
custom_id = "GEM_S01_vs_S02",
result = list(
type = "succeeded",
response = succ_resp
)
)
# Errored result line
line_err <- list(
custom_id = "GEM_S03_vs_S04",
result = list(
type = "errored",
error = list(
code = 400L,
message = "Validation error",
status = "INVALID_ARGUMENT"
)
)
)
json_lines <- c(
jsonlite::toJSON(line_ok, auto_unbox = TRUE),
jsonlite::toJSON(line_err, auto_unbox = TRUE)
)
writeLines(json_lines, con = tmp, useBytes = TRUE)
# parse_gemini_batch_output() expects a requests_tbl with custom_id / ID1 / ID2
requests_tbl <- tibble::tibble(
custom_id = c("GEM_S01_vs_S02", "GEM_S03_vs_S04"),
ID1 = c("S01", "S03"),
ID2 = c("S02", "S04")
)
res <- parse_gemini_batch_output(
results_path = tmp,
requests_tbl = requests_tbl
)
testthat::expect_s3_class(res, "tbl_df")
testthat::expect_equal(nrow(res), 2L)
# First row: succeeded
r1 <- res[1, ]
testthat::expect_equal(r1$custom_id, "GEM_S01_vs_S02")
testthat::expect_equal(r1$ID1, "S01")
testthat::expect_equal(r1$ID2, "S02")
testthat::expect_equal(r1$result_type, "succeeded")
testthat::expect_equal(r1$status_code, 200L)
testthat::expect_true(is.na(r1$error_message))
testthat::expect_equal(r1$model, "gemini-3-pro-preview")
testthat::expect_equal(r1$better_sample, "SAMPLE_2")
testthat::expect_equal(r1$better_id, "S02")
testthat::expect_equal(r1$prompt_tokens, 10)
testthat::expect_equal(r1$completion_tokens, 5)
testthat::expect_equal(r1$total_tokens, 15)
# Second row: errored
r2 <- res[2, ]
testthat::expect_equal(r2$custom_id, "GEM_S03_vs_S04")
testthat::expect_equal(r2$ID1, "S03")
testthat::expect_equal(r2$ID2, "S04")
testthat::expect_equal(r2$result_type, "errored")
testthat::expect_true(is.na(r2$status_code))
testthat::expect_match(r2$error_message, "Validation error")
testthat::expect_true(is.na(r2$content))
testthat::expect_true(is.na(r2$better_sample))
testthat::expect_true(is.na(r2$better_id))
}
)
testthat::test_that(
"parse_gemini_batch_output handles invalid JSON lines gracefully",
{
tmp <- tempfile(fileext = ".jsonl")
on.exit(unlink(tmp), add = TRUE)
writeLines("not-json", con = tmp, useBytes = TRUE)
# We must still pass a requests_tbl; IDs here are dummies.
requests_tbl <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02"
)
res <- parse_gemini_batch_output(
results_path = tmp,
requests_tbl = requests_tbl
)
testthat::expect_equal(nrow(res), 1L)
testthat::expect_true(is.na(res$custom_id))
testthat::expect_match(res$error_message, "Failed to parse JSON line")
}
)
# ------------------------------------------------------------------------------
# gemini_download_batch_results: validations + mismatch warning + JSONL writing
# ------------------------------------------------------------------------------
testthat::test_that(
"gemini_download_batch_results validates requests_tbl structure",
{
batch <- list(response = list(inlinedResponses = data.frame(x = 1)))
bad_requests <- tibble::tibble(id = 1:2)
tmp <- tempfile(fileext = ".jsonl")
on.exit(unlink(tmp), add = TRUE)
testthat::expect_error(
gemini_download_batch_results(
batch = batch,
requests_tbl = bad_requests,
output_path = tmp,
api_key = "TEST_KEY",
api_version = "v1beta"
),
"requests_tbl.*custom_id",
fixed = FALSE
)
}
)
testthat::test_that(
"gemini_download_batch_results errors when inline responses are missing",
{
# response$inlinedResponses is NULL -> error branch
batch <- list(response = list(inlinedResponses = NULL))
requests_tbl <- tibble::tibble(custom_id = "id1")
tmp <- tempfile(fileext = ".jsonl")
on.exit(unlink(tmp), add = TRUE)
testthat::expect_error(
gemini_download_batch_results(
batch = batch,
requests_tbl = requests_tbl,
output_path = tmp,
api_key = "TEST_KEY",
api_version = "v1beta"
),
"Batch does not contain response\\$inlinedResponses\\$inlinedResponses",
fixed = FALSE
)
}
)
testthat::test_that(
"gemini_download_batch_results writes JSONL and warns on count mismatch",
{
# inlinedResponses is already a response data.frame
inlined_df <- data.frame(
score = c(1, 2),
stringsAsFactors = FALSE
)
batch <- list(
response = list(
inlinedResponses = inlined_df
)
)
# 3 requests vs 2 responses -> mismatch warning
requests_tbl <- tibble::tibble(
custom_id = c("req1", "req2", "req3")
)
tmp <- tempfile(fileext = ".jsonl")
on.exit(unlink(tmp), add = TRUE)
testthat::expect_warning(
{
out_path <- gemini_download_batch_results(
batch = batch,
requests_tbl = requests_tbl,
output_path = tmp,
api_key = "TEST_KEY",
api_version = "v1beta"
)
testthat::expect_true(file.exists(out_path))
lines <- readLines(out_path, warn = FALSE, encoding = "UTF-8")
# Only min(3,2) = 2 lines should be written
testthat::expect_equal(length(lines), 2L)
},
"Number of inlined responses",
fixed = FALSE
)
}
)
# ------------------------------------------------------------------------------
# gemini_poll_batch_until_complete: validation + timeout branch
# ------------------------------------------------------------------------------
testthat::test_that(
"gemini_poll_batch_until_complete validates batch_name",
{
testthat::expect_error(
gemini_poll_batch_until_complete(
batch_name = "",
interval_seconds = 0,
timeout_seconds = 1,
api_key = "TEST_KEY",
api_version = "v1beta",
verbose = FALSE
),
"`batch_name` must be a non-empty character scalar.",
fixed = TRUE
)
}
)
testthat::test_that(
"gemini_poll_batch_until_complete respects timeout_seconds and returns last batch",
{
# Always return a non-terminal state so timeout logic is hit
testthat::with_mocked_bindings(
gemini_get_batch = function(batch_name, api_key, api_version = "v1beta") {
list(
name = batch_name,
metadata = list(state = "BATCH_STATE_RUNNING")
)
},
{
testthat::expect_warning(
{
batch <- gemini_poll_batch_until_complete(
batch_name = "batches/timeout",
interval_seconds = 0,
timeout_seconds = 0,
api_key = "TEST_KEY",
api_version = "v1beta",
verbose = TRUE
)
testthat::expect_type(batch, "list")
testthat::expect_equal(batch$metadata$state, "BATCH_STATE_RUNNING")
},
"Timeout reached while waiting for Gemini batch to complete",
fixed = FALSE
)
}
)
}
)
# ------------------------------------------------------------------------------
# run_gemini_batch_pipeline (mocked end-to-end)
# ------------------------------------------------------------------------------
testthat::test_that(
"run_gemini_batch_pipeline works with polling and parsing (mocked)",
{
pairs <- tibble::tibble(
ID1 = "S01",
text1 = "Text 1",
ID2 = "S02",
text2 = "Text 2"
)
# New requests_tbl shape: custom_id + ID1 + ID2 + request
fake_req_tbl <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02",
request = list(list(dummy = TRUE))
)
fake_batch_initial <- list(
name = "batches/123",
metadata = list(state = "JOB_STATE_RUNNING")
)
fake_batch_final <- list(
name = "batches/123",
metadata = list(state = "JOB_STATE_SUCCEEDED")
)
fake_results <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02",
model = "gemini-3-pro-preview",
object_type = "generateContent",
status_code = 200L,
result_type = "succeeded",
error_message = NA_character_,
content = "SAMPLE_1",
better_sample = "SAMPLE_1",
better_id = "S01",
prompt_tokens = 10,
completion_tokens = 5,
total_tokens = 15
)
created_batch_name <- NULL
polled_batch_name <- NULL
download_batch_obj <- NULL
parsed_path <- NULL
td <- list(name = "Overall quality", description = "Quality")
tmpl <- set_prompt_template()
testthat::with_mocked_bindings(
build_gemini_batch_requests = function(pairs, model, trait_name,
trait_description,
prompt_template, thinking_level,
...) {
fake_req_tbl
},
gemini_create_batch = function(requests, model, api_key, api_version,
display_name = NULL) {
created_batch_name <<- "batches/123"
fake_batch_initial
},
gemini_poll_batch_until_complete = function(batch_name, interval_seconds,
timeout_seconds, api_key,
api_version, verbose) {
polled_batch_name <<- batch_name
fake_batch_final
},
gemini_download_batch_results = function(batch, requests_tbl, output_path,
api_key, api_version) {
download_batch_obj <<- batch
# Write a dummy .jsonl file so that parse_* can read it
writeLines('{"dummy": true}', con = output_path)
invisible(output_path)
},
parse_gemini_batch_output = function(results_path, requests_tbl) {
parsed_path <<- results_path
fake_results
},
{
res <- run_gemini_batch_pipeline(
pairs = pairs,
model = "gemini-3-pro-preview",
trait_name = td$name,
trait_description = td$description,
prompt_template = tmpl,
thinking_level = "low",
interval_seconds = 0,
timeout_seconds = 10,
verbose = FALSE
)
testthat::expect_equal(created_batch_name, "batches/123")
testthat::expect_equal(polled_batch_name, "batches/123")
testthat::expect_identical(download_batch_obj, fake_batch_final)
testthat::expect_true(file.exists(res$batch_input_path))
testthat::expect_true(file.exists(res$batch_output_path))
testthat::expect_true(file.exists(parsed_path))
# Standardised return structure
testthat::expect_true(all(c(
"batch_input_path", "batch_output_path", "file", "batch", "results"
) %in% names(res)))
testthat::expect_null(res$file)
testthat::expect_equal(res$batch$metadata$state, "JOB_STATE_SUCCEEDED")
testthat::expect_equal(res$results$better_id, "S01")
}
)
}
)
testthat::test_that(
"run_gemini_batch_pipeline does not poll or parse when poll = FALSE",
{
pairs <- tibble::tibble(
ID1 = "S01",
text1 = "Text 1",
ID2 = "S02",
text2 = "Text 2"
)
fake_req_tbl <- tibble::tibble(
custom_id = "GEM_S01_vs_S02",
ID1 = "S01",
ID2 = "S02",
request = list(list(dummy = TRUE))
)
fake_batch_initial <- list(
name = "batches/123",
metadata = list(state = "JOB_STATE_RUNNING")
)
poll_called <- FALSE
download_called <- FALSE
parse_called <- FALSE
td <- list(name = "Overall quality", description = "Quality")
tmpl <- set_prompt_template()
testthat::with_mocked_bindings(
build_gemini_batch_requests = function(pairs, model, trait_name,
trait_description,
prompt_template, thinking_level,
...) {
fake_req_tbl
},
gemini_create_batch = function(requests, model, api_key, api_version,
display_name = NULL) {
fake_batch_initial
},
gemini_poll_batch_until_complete = function(batch_name, interval_seconds,
timeout_seconds, api_key,
api_version, verbose) {
poll_called <<- TRUE
stop("Polling should not be called when poll = FALSE")
},
gemini_download_batch_results = function(batch, requests_tbl, output_path,
api_key, api_version) {
download_called <<- TRUE
stop("Download should not be called when poll = FALSE")
},
parse_gemini_batch_output = function(results_path, requests_tbl) {
parse_called <<- TRUE
stop("Parse should not be called when poll = FALSE")
},
{
res <- run_gemini_batch_pipeline(
pairs = pairs,
model = "gemini-3-pro-preview",
trait_name = td$name,
trait_description = td$description,
prompt_template = tmpl,
thinking_level = "low",
poll = FALSE
)
testthat::expect_false(poll_called)
testthat::expect_false(download_called)
testthat::expect_false(parse_called)
testthat::expect_true(file.exists(res$batch_input_path))
testthat::expect_null(res$batch_output_path)
testthat::expect_null(res$results)
# Standardised return structure
testthat::expect_true(all(c(
"batch_input_path", "batch_output_path", "file", "batch", "results"
) %in% names(res)))
testthat::expect_null(res$file)
testthat::expect_equal(res$batch$metadata$state, "JOB_STATE_RUNNING")
}
)
}
)
testthat::test_that("build_gemini_batch_requests validates inputs and handles parameters", {
td <- trait_description("overall_quality")
tmpl <- set_prompt_template()
# 1. Missing columns
bad_pairs <- tibble::tibble(ID1 = "A", text1 = "txt")
testthat::expect_error(
build_gemini_batch_requests(bad_pairs, "gemini-model", td$name, td$description),
"must contain columns"
)
# 2. Invalid model
pairs <- tibble::tibble(ID1 = "A", text1 = "t", ID2 = "B", text2 = "t")
testthat::expect_error(
build_gemini_batch_requests(pairs, "", td$name, td$description),
"model.*must be a non-empty character"
)
# 3. Warnings for thinking_budget and medium level
testthat::expect_warning(
build_gemini_batch_requests(
pairs, "gemini-model", td$name, td$description,
thinking_budget = 1000 # Should trigger warning
),
"thinking_budget.*is ignored"
)
testthat::expect_warning(
build_gemini_batch_requests(
pairs, "gemini-model", td$name, td$description,
thinking_level = "medium" # Should trigger warning
),
"thinking_level = \"medium\".*mapping to \"High\""
)
# 4. Check parameter passthrough (temperature, top_p, etc.)
batch <- build_gemini_batch_requests(
pairs, "gemini-model", td$name, td$description,
temperature = 0.7,
top_p = 0.9,
include_thoughts = TRUE
)
config <- batch$request[[1]]$generationConfig
testthat::expect_equal(config$temperature, 0.7)
testthat::expect_equal(config$topP, 0.9)
testthat::expect_true(config$thinkingConfig$includeThoughts)
})
testthat::test_that("gemini_create_batch validates inputs", {
testthat::expect_error(
gemini_create_batch(list(), "model"),
"must be a non-empty list"
)
testthat::expect_error(
gemini_create_batch(list(a = 1), ""),
"model.*must be a non-empty character"
)
})
testthat::test_that("gemini_download_batch_results handles batch name string and count mismatch", {
# Mock get_batch to return a fake batch object from a string name
# We construct a data frame where the 'response' column is a nested data frame,
# satisfying is.data.frame() checks in the function.
resp_col <- data.frame(a = 1:2)
inlined <- data.frame(row_id = 1:2)
inlined$response <- resp_col
mock_batch <- list(
response = list(
inlinedResponses = inlined
)
)
# Mismatch: 3 requests, but only 2 responses in mock_batch
req_tbl <- tibble::tibble(custom_id = c("1", "2", "3"))
tmp <- tempfile()
on.exit(unlink(tmp), add = TRUE)
testthat::with_mocked_bindings(
gemini_get_batch = function(...) mock_batch,
{
# Should warn about mismatch
testthat::expect_warning(
gemini_download_batch_results("batches/123", req_tbl, tmp),
"Number of inlined responses.*does not match"
)
# Should resolve string "batches/123" to mock_batch via get_batch
testthat::expect_true(file.exists(tmp))
}
)
})
testthat::test_that(".parse_gemini_pair_response logic extracts thoughts correctly", {
# Internal function is available directly
# 1. Error response handling
err_resp <- list(error = list(message = "Blocked"))
res_err <- .parse_gemini_pair_response("id", "A", "B", err_resp)
testthat::expect_equal(res_err$result_type, "errored")
testthat::expect_equal(res_err$error_message, "Blocked")
# 2. Thoughts extraction: include_thoughts=TRUE, 2 parts
# Part 1: Thought, Part 2: Answer
resp_thoughts <- list(
candidates = list(
list(
content = list(
parts = list(
list(text = "Thinking..."),
list(text = "Answer")
)
)
)
)
)
res_t <- .parse_gemini_pair_response("id", "A", "B", resp_thoughts, include_thoughts = TRUE)
testthat::expect_equal(res_t$thoughts, "Thinking...")
testthat::expect_equal(res_t$content, "Answer")
# 3. Thoughts extraction: include_thoughts=TRUE, but only 1 part
# Should fallback to treating it as content, thoughts = NA
resp_single <- list(
candidates = list(
list(
content = list(
parts = list(
list(text = "Just answer")
)
)
)
)
)
res_s <- .parse_gemini_pair_response("id", "A", "B", resp_single, include_thoughts = TRUE)
testthat::expect_true(is.na(res_s$thoughts))
testthat::expect_equal(res_s$content, "Just answer")
})
testthat::test_that("parse_gemini_batch_output detects include_thoughts from request column", {
tmp <- tempfile()
on.exit(unlink(tmp), add = TRUE)
# Create a result file with 2 parts (thought + content)
resp_data <- list(
candidates = list(
list(
content = list(
parts = list(
list(text = "My thought process"),
list(text = "Final Answer")
)
)
)
)
)
line <- list(
custom_id = "CID",
result = list(type = "succeeded", response = resp_data)
)
writeLines(jsonlite::toJSON(line, auto_unbox = TRUE), tmp)
# Case A: Request column exists and has includeThoughts = TRUE
req_tbl_true <- tibble::tibble(
custom_id = "CID", ID1 = "A", ID2 = "B",
request = list(
list(generationConfig = list(thinkingConfig = list(includeThoughts = TRUE)))
)
)
res_true <- parse_gemini_batch_output(tmp, req_tbl_true)
testthat::expect_equal(res_true$thoughts, "My thought process")
testthat::expect_equal(res_true$content, "Final Answer")
# Case B: Request column exists but includeThoughts = FALSE (or missing)
req_tbl_false <- tibble::tibble(
custom_id = "CID", ID1 = "A", ID2 = "B",
request = list(
list(generationConfig = list(thinkingConfig = list(includeThoughts = FALSE)))
)
)
res_false <- parse_gemini_batch_output(tmp, req_tbl_false)
# When include_thoughts is false, everything is concatenated into content
testthat::expect_true(is.na(res_false$thoughts))
testthat::expect_equal(res_false$content, "My thought processFinal Answer")
})
testthat::test_that("build_gemini_batch_requests validates inputs and warns on medium thinking", {
td <- trait_description("overall_quality")
tmpl <- set_prompt_template()
pairs <- tibble::tibble(ID1 = "A", text1 = "t", ID2 = "B", text2 = "t")
# Error on missing columns
bad_pairs <- tibble::tibble(ID1 = "A", text1 = "t")
testthat::expect_error(
build_gemini_batch_requests(bad_pairs, "gemini-1.5-pro", td$name, td$description),
"must contain columns"
)
# Warning on thinking_level = "medium"
testthat::expect_warning(
req <- build_gemini_batch_requests(
pairs, "gemini-1.5-pro", td$name, td$description,
thinking_level = "medium"
),
"mapping to \"High\" internally"
)
# Verify the mapping occurred in the request body
config <- req$request[[1]]$generationConfig
testthat::expect_equal(config$thinkingConfig$thinkingLevel, "High")
})
testthat::test_that("gemini_download_batch_results warns if response count mismatches request count", {
# Mock a batch object with 2 responses.
# We construct a data frame where the 'response' column is a nested data frame.
# This ensures inlined$response satisfies is.data.frame().
resp_df <- data.frame(
candidates = I(list(list(content=list(parts=list(list(text="A")))),
list(content=list(parts=list(list(text="B"))))))
)
inlined <- data.frame(row_id = 1:2)
inlined$response <- resp_df
mock_batch <- list(
response = list(
inlinedResponses = inlined
)
)
# Provide 3 requests (mismatch with 2 responses)
req_tbl <- tibble::tibble(custom_id = c("1", "2", "3"))
tmp <- tempfile()
on.exit(unlink(tmp), add = TRUE)
testthat::with_mocked_bindings(
gemini_get_batch = function(...) mock_batch,
{
testthat::expect_warning(
gemini_download_batch_results("batch_123", req_tbl, tmp),
"does not match number of requests"
)
}
)
})