test_that("sensitivity_summary dispatches on prompt_sensitivity", { fake <- structure( list( results = tibble::tibble( prompt = rep(c("a", "b"), 2), response = c("pos", "neg", "pos", "pos"), status = "success", cost = 0.01, variant = rep(c("v1", "v2"), each = 2) ), agreement = tibble::tibble( model_1 = "v1", model_2 = "v2", pct_agree = 0.5 ), n_texts = 2L, variants = c("v1", "v2") ), class = "prompt_sensitivity" ) smry <- sensitivity_summary(fake) expect_s3_class(smry, "tbl_df") expect_equal(nrow(smry), 2) expect_true("variant" %in% names(smry)) expect_true(all(smry$pct_successful == 1)) }) test_that("sensitivity_summary dispatches on temperature_sensitivity", { fake <- structure( list( results = tibble::tibble( prompt = rep(c("a", "b"), 3), response = rep("pos", 6), status = "success", cost = 0.01, temperature = rep(c(0, 0.5, 1), each = 2), replicate = 1L ), temperatures = c(0, 0.5, 1), n_reps = 1L, n_texts = 2L ), class = "temperature_sensitivity" ) smry <- sensitivity_summary(fake) expect_equal(nrow(smry), 3) expect_true("temperature" %in% names(smry)) }) test_that("sensitivity_summary dispatches on model_sensitivity", { fake <- structure( list( results = tibble::tibble( prompt = rep(c("a", "b"), 2), response = c("pos", "neg", "pos", "neg"), status = "success", cost = 0.01, model = rep(c("gpt4", "claude"), each = 2) ), agreement = tibble::tibble( model_1 = "gpt4", model_2 = "claude", pct_agree = 1 ), n_texts = 2L, models = c("gpt4", "claude") ), class = "model_sensitivity" ) smry <- sensitivity_summary(fake) expect_equal(nrow(smry), 2) expect_true("model" %in% names(smry)) }) test_that("print methods work without error", { ps <- structure( list( results = tibble::tibble(), agreement = tibble::tibble( model_1 = "v1", model_2 = "v2", pct_agree = 0.8 ), n_texts = 5L, variants = c("v1", "v2") ), class = "prompt_sensitivity" ) expect_output(print(ps), "Prompt Sensitivity") ts <- structure( list( results = tibble::tibble(x = 1), temperatures = c(0, 1), n_reps = 2L, n_texts = 10L ), class = "temperature_sensitivity" ) expect_output(print(ts), "Temperature Sensitivity") ms <- structure( list( results = tibble::tibble(), agreement = tibble::tibble( model_1 = "a", model_2 = "b", pct_agree = 0.9 ), n_texts = 5L, models = c("a", "b") ), class = "model_sensitivity" ) expect_output(print(ms), "Model Sensitivity") }) test_that("downstream_sensitivity handles no matching texts", { fake_ps <- structure( list( results = tibble::tibble( prompt = c("x", "y"), response = c("pos", "neg"), status = "success", cost = 0.01, variant = "v1" ), agreement = tibble::tibble(), n_texts = 2L, variants = "v1" ), class = "prompt_sensitivity" ) data <- data.frame( text = c("no_match_1", "no_match_2"), outcome = c(1, 2) ) result <- suppressWarnings( downstream_sensitivity(fake_ps, data, outcome ~ llm_label) ) expect_true(all(is.na(result$estimate))) }) test_that("downstream_sensitivity returns coefficients when texts match", { fake_ps <- structure( list( results = tibble::tibble( prompt = c("a", "b", "c", "d", "e"), response = c("pos", "neg", "pos", "neg", "pos"), status = "success", cost = 0.01, variant = "v1" ), agreement = tibble::tibble(), n_texts = 5L, variants = "v1" ), class = "prompt_sensitivity" ) data <- data.frame( text = c("a", "b", "c", "d", "e"), outcome = c(5, 2, 4, 1, 6), covar = c(1, 2, 3, 4, 5) ) result <- downstream_sensitivity(fake_ps, data, outcome ~ llm_label + covar) expect_s3_class(result, "tbl_df") expect_true("estimate" %in% names(result)) expect_true("p_value" %in% names(result)) expect_true(all(result$condition == "v1")) })