test_that("create_gold_standard creates valid object", {
  gs <- create_gold_standard(
    texts = c("good", "bad", "ok"),
    labels = c("pos", "neg", "neu")
  )

  expect_s3_class(gs, "gold_standard")
  expect_equal(gs$n, 3)
  expect_setequal(gs$classes, c("neg", "neu", "pos"))
  expect_type(gs$hash, "character")
  expect_equal(nchar(gs$hash), 64)  # SHA-256 hex
})

test_that("gold_standard hash is deterministic", {
  gs1 <- create_gold_standard(c("a", "b"), c("x", "y"))
  gs2 <- create_gold_standard(c("a", "b"), c("x", "y"))
  expect_equal(gs1$hash, gs2$hash)
})

test_that("gold_standard hash changes with different labels", {
  gs1 <- create_gold_standard(c("a", "b"), c("x", "y"))
  gs2 <- create_gold_standard(c("a", "b"), c("x", "z"))
  expect_false(gs1$hash == gs2$hash)
})

test_that("create_gold_standard validates input lengths", {
  expect_error(create_gold_standard(c("a"), c("x", "y")))
})

test_that("sample_for_validation returns correct size", {
  texts <- paste("doc", 1:100)
  s <- sample_for_validation(texts, n = 10, seed = 42)

  expect_s3_class(s, "tbl_df")
  expect_equal(nrow(s), 10)
  expect_true(all(c("idx", "text") %in% names(s)))
  expect_true(all(s$idx >= 1 & s$idx <= 100))
})

test_that("sample_for_validation is reproducible", {
  texts <- paste("doc", 1:50)
  s1 <- sample_for_validation(texts, n = 10, seed = 99)
  s2 <- sample_for_validation(texts, n = 10, seed = 99)
  expect_equal(s1$idx, s2$idx)
})

test_that("sample_for_validation rejects n > length(texts)", {
  expect_error(sample_for_validation(c("a", "b"), n = 5))
})

test_that("estimate_cost returns reasonable structure", {
  texts <- c("This is a short text", "Another one", "A third text here")
  result <- estimate_cost(texts, chat = "mock", n_reps = 2)

  expect_type(result, "list")
  expect_equal(result$n_texts, 3)
  expect_equal(result$n_reps, 2)
  expect_true(result$est_cost_usd >= 0)
  expect_true(result$est_input_tokens > 0)
})