test_that("cross_design creates correct factorial grid", { chats <- list(gpt = "mock_chat", claude = "mock_chat") design <- cross_design( prompts = c("a", "b"), chats = chats, temperatures = c(0.0, 1.0) ) expect_s3_class(design, "tbl_df") # 2 prompts x 2 chats x 2 temps = 8 expect_equal(nrow(design), 8) expect_true(all(c("condition_id", "prompt", "chat_name", "temperature") %in% names(design))) expect_setequal(unique(design$prompt), c("a", "b")) expect_setequal(unique(design$chat_name), c("gpt", "claude")) expect_setequal(unique(design$temperature), c(0.0, 1.0)) }) test_that("cross_design validates inputs", { expect_error(cross_design(prompts = character(0), chats = list(a = 1))) expect_error(cross_design(prompts = "x", chats = list(1))) }) test_that("replicate_design expands correctly", { chats <- list(gpt = "mock") design <- cross_design(prompts = c("a", "b"), chats = chats) rep_design <- replicate_design(design, n = 3) expect_equal(nrow(rep_design), 6) # 2 conditions x 3 reps expect_true("replicate" %in% names(rep_design)) expect_setequal(unique(rep_design$replicate), 1:3) }) test_that("randomize_design is reproducible with seed", { chats <- list(a = "m") design <- cross_design(prompts = letters[1:5], chats = chats) r1 <- randomize_design(design, seed = 123) r2 <- randomize_design(design, seed = 123) expect_equal(r1$condition_id, r2$condition_id) }) test_that("randomize_design changes order", { chats <- list(a = "m") design <- cross_design(prompts = letters[1:20], chats = chats) randomized <- randomize_design(design, seed = 99) # Very unlikely to be in original order expect_false(all(randomized$condition_id == design$condition_id)) })