test_that("pc_config get/set works for known keys", {
  cfg0 <- pc_config()
  expect_true(is.list(cfg0))
  expect_true("rate_limit" %in% names(cfg0))

  cfg1 <- pc_config(rate_limit = 4)
  expect_equal(cfg1$rate_limit, 4)

  # restore default used in package initialization
  pc_config(rate_limit = 5)
})

test_that("pc_config validates finite numeric rate_limit and cache_ttl", {
  expect_error(
    pc_config(rate_limit = NA_real_),
    "rate_limit"
  )
  expect_error(
    pc_config(rate_limit = Inf),
    "rate_limit"
  )
  expect_error(
    pc_config(rate_limit = 0),
    "rate_limit"
  )
  expect_error(
    pc_config(cache_ttl = NA_real_),
    "cache_ttl"
  )
  expect_error(
    pc_config(cache_ttl = Inf),
    "cache_ttl"
  )
  expect_error(
    pc_config(cache_ttl = -1),
    "cache_ttl"
  )
})

test_that("pc_response creates PubChemResult from fault payload", {
  payload <- '{"Fault":{"Code":"PUGREST.BadRequest","Message":"Invalid request"}}'
  res <- pc_response(payload, request = list(url = "https://example.org"))

  expect_s3_class(res, "PubChemResult")
  expect_false(res$success)
  expect_s3_class(res$error, "PubChemError")
  expect_match(res$error$code, "PUGREST")
})

test_that("pc_response handles non-list Waiting payloads without error", {
  payload <- '{"Waiting":"queued"}'
  res <- pc_response(payload, request = list(url = "https://example.org"))

  expect_s3_class(res, "PubChemResult")
  expect_true(res$success)
  expect_false(isTRUE(res$pending))
  expect_null(res$listkey)
})

test_that("pc_batch chunks and summarizes execution", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  b <- pc_batch(ids = 1:5, fn = dummy, chunk_size = 2, parallel = FALSE)
  expect_s3_class(b, "PubChemBatchResult")
  expect_equal(length(b$chunks), 3)
  expect_equal(length(b$results), 3)

  tbl <- as_tibble(b)
  expect_s3_class(tbl, "tbl_df")
  expect_equal(nrow(tbl), 3)
})

test_that("pc_batch validates integer chunk_size and workers", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  expect_error(
    pc_batch(ids = 1:5, fn = dummy, chunk_size = 0.5),
    "chunk_size"
  )
  expect_error(
    pc_batch(ids = 1:5, fn = dummy, chunk_size = 2, workers = 0),
    "workers"
  )
  expect_error(
    pc_batch(ids = 1:5, fn = dummy, chunk_size = 2, workers = 1.5),
    "workers"
  )
  expect_error(
    pc_batch(ids = 1:5, fn = dummy, chunk_size = 2, workers = Inf),
    "workers"
  )
})

test_that("pc_benchmark returns scenario metrics", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  bm <- pc_benchmark(
    ids = 1:10,
    fn = dummy,
    chunk_sizes = c(2, 5),
    parallel_options = c(FALSE)
  )

  expect_s3_class(bm, "tbl_df")
  expect_equal(nrow(bm), 2)
  expect_true(all(c("chunk_size", "elapsed_sec", "successful_chunks") %in% names(bm)))
})

test_that("pc_benchmark validates chunk_sizes and workers", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  expect_error(
    pc_benchmark(
      ids = 1:10,
      fn = dummy,
      chunk_sizes = c(2, 2.5),
      parallel_options = FALSE
    ),
    "chunk_sizes"
  )
  expect_error(
    pc_benchmark(
      ids = 1:10,
      fn = dummy,
      chunk_sizes = 2,
      parallel_options = TRUE,
      workers = 0
    ),
    "workers"
  )
})

test_that("pc_benchmark_harness supports 10/1000/100000 scenarios", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  out <- pc_benchmark_harness(
    fn = dummy,
    ids = 1:10,
    scenario_sizes = c(10L, 1000L, 100000L),
    chunk_sizes = 100000L,
    parallel_options = FALSE,
    thresholds = list(
      elapsed_sec = Inf,
      failed_chunk_ratio = 0
    )
  )

  expect_s3_class(out, "PubChemBenchmarkReport")
  expect_s3_class(out$details, "tbl_df")
  expect_s3_class(out$summary, "tbl_df")
  expect_equal(sort(unique(out$details$scenario_size)), c(10L, 1000L, 100000L))
  expect_equal(nrow(out$summary), 3)
  expect_true(all(out$summary$all_runs_pass))
})

test_that("pc_benchmark_harness enforces threshold gates", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  elapsed_fail <- pc_benchmark_harness(
    fn = dummy,
    ids = 1:10,
    scenario_sizes = 10L,
    chunk_sizes = 10L,
    thresholds = list(
      elapsed_sec = -1,
      failed_chunk_ratio = 1
    )
  )

  expect_false(elapsed_fail$details$run_pass[[1]])
  expect_false(elapsed_fail$summary$all_runs_pass[[1]])

  flaky <- function(ids) {
    if (as.integer(ids[[1]]) == 6L) {
      stop("synthetic chunk failure")
    }
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  ratio_fail <- pc_benchmark_harness(
    fn = flaky,
    ids = 1:10,
    scenario_sizes = 10L,
    chunk_sizes = 5L,
    thresholds = list(
      elapsed_sec = Inf,
      failed_chunk_ratio = 0.4
    )
  )

  expect_equal(ratio_fail$details$failed_chunks[[1]], 1)
  expect_gt(ratio_fail$details$failed_chunk_ratio[[1]], 0.4)
  expect_false(ratio_fail$details$run_pass[[1]])
})

test_that("pc_benchmark_harness writes markdown, csv, and rds reports", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  formats <- c(markdown = ".md", csv = ".csv", rds = ".rds")

  for (fmt in names(formats)) {
    path <- tempfile(fileext = formats[[fmt]])
    out <- pc_benchmark_harness(
      fn = dummy,
      ids = 1:10,
      scenario_sizes = 10L,
      chunk_sizes = 10L,
      thresholds = list(
        elapsed_sec = Inf,
        failed_chunk_ratio = 0
      ),
      report_path = path,
      report_format = fmt
    )

    expect_true(file.exists(path))

    if (fmt == "markdown") {
      txt <- readLines(path, warn = FALSE)
      expect_true(any(grepl("PubChem Benchmark Harness Report", txt, fixed = TRUE)))
    } else if (fmt == "csv") {
      csv <- utils::read.csv(path)
      expect_true("scenario_size" %in% names(csv))
    } else {
      obj <- readRDS(path)
      expect_s3_class(obj, "PubChemBenchmarkReport")
      expect_s3_class(out, "PubChemBenchmarkReport")
    }
  }
})

test_that("pc_benchmark_harness validates key inputs", {
  dummy <- function(ids) {
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  expect_error(
    pc_benchmark_harness(
      fn = dummy,
      ids = 1:10,
      scenario_sizes = 10L,
      chunk_sizes = 0
    ),
    "chunk_sizes"
  )

  expect_error(
    pc_benchmark_harness(
      fn = dummy,
      ids = 1:10,
      scenario_sizes = 10L,
      id_generator = 123
    ),
    "id_generator"
  )

  expect_error(
    pc_benchmark_harness(
      fn = dummy,
      ids = 1:10,
      scenario_sizes = 10L,
      thresholds = 1
    ),
    "thresholds"
  )

  expect_error(
    pc_benchmark_harness(
      fn = dummy,
      ids = 1:10,
      scenario_sizes = 10L,
      chunk_sizes = 2.5
    ),
    "chunk_sizes"
  )

  expect_error(
    pc_benchmark_harness(
      fn = dummy,
      ids = 1:10,
      scenario_sizes = 10.5
    ),
    "scenario_sizes"
  )

  expect_error(
    pc_benchmark_harness(
      fn = dummy,
      ids = 1:10,
      scenario_sizes = 10L,
      workers = 0
    ),
    "workers"
  )
})

test_that("pc_calibrate_benchmark_thresholds applies quantile-based calibration floors", {
  history <- data.frame(
    scenario_size = c(10, 10, 10, 1000, 1000, 1000),
    max_elapsed_sec = c(10, 12, 8, 200, 240, 260),
    max_failed_chunk_ratio = c(0, 0.02, 0, 0.01, 0.03, 0.02),
    stringsAsFactors = FALSE
  )

  calibrated <- pc_calibrate_benchmark_thresholds(
    history = history,
    baseline = list(
      elapsed_sec = c(`10` = 30, `1000` = 300),
      failed_chunk_ratio = c(`10` = 0, `1000` = 0.01)
    ),
    quantile_prob = 0.95,
    elapsed_buffer = 1.25,
    failed_ratio_buffer = 1.5,
    min_runs = 3
  )

  expect_true(is.list(calibrated))
  expect_true(all(c("elapsed_sec", "failed_chunk_ratio") %in% names(calibrated)))
  expect_true(calibrated$elapsed_sec[["10"]] >= 30)
  expect_true(calibrated$elapsed_sec[["1000"]] >= 300)
  expect_true(calibrated$failed_chunk_ratio[["10"]] >= 0)
  expect_true(calibrated$failed_chunk_ratio[["1000"]] >= 0.01)
})

test_that("pc_calibrate_benchmark_thresholds validates history schema", {
  bad <- data.frame(
    scenario_size = 10,
    max_elapsed_sec = 1,
    stringsAsFactors = FALSE
  )

  expect_error(
    pc_calibrate_benchmark_thresholds(history = bad),
    "history"
  )
})

test_that("pc_batch checkpoint manifest is created and resume can rerun failed chunks", {
  td <- tempfile("pc-batch-checkpoint-")
  dir.create(td, recursive = TRUE)

  e <- new.env(parent = emptyenv())
  e$calls <- 0L
  e$fail_once <- TRUE

  flaky <- function(ids) {
    e$calls <- e$calls + 1L
    if (as.integer(ids[[1]]) == 3L && isTRUE(e$fail_once)) {
      e$fail_once <- FALSE
      stop("transient chunk error")
    }
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  b1 <- pc_batch(
    ids = 1:5,
    fn = flaky,
    chunk_size = 2,
    checkpoint_dir = td,
    checkpoint_id = "resume_case"
  )

  expect_false(all(b1$success))
  expect_true(file.exists(file.path(td, "pc_batch_resume_case_manifest.rds")))
  expect_equal(e$calls, 3L)

  b2 <- pc_resume_batch(
    fn = flaky,
    checkpoint_dir = td,
    checkpoint_id = "resume_case"
  )

  expect_true(all(b2$success))
  expect_true(isTRUE(b2$checkpoint$resumed))
  expect_equal(e$calls, 4L)
})

test_that("pc_resume_batch does not rerun completed chunks", {
  td <- tempfile("pc-batch-checkpoint-")
  dir.create(td, recursive = TRUE)

  e <- new.env(parent = emptyenv())
  e$calls <- 0L

  dummy <- function(ids) {
    e$calls <- e$calls + 1L
    pc_response('{"IdentifierList":{"CID":[2244]}}', request = list(identifier = ids))
  }

  b1 <- pc_batch(
    ids = 1:4,
    fn = dummy,
    chunk_size = 2,
    checkpoint_dir = td,
    checkpoint_id = "no_rerun_case"
  )

  expect_true(all(b1$success))
  calls_after_first <- e$calls

  b2 <- pc_resume_batch(
    fn = dummy,
    checkpoint_dir = td,
    checkpoint_id = "no_rerun_case"
  )

  expect_true(all(b2$success))
  expect_equal(e$calls, calls_after_first)
})

test_that("pc_resume_batch errors when checkpoint manifest is missing", {
  td <- tempfile("pc-batch-checkpoint-")
  dir.create(td, recursive = TRUE)

  expect_error(
    pc_resume_batch(
      fn = function(ids) ids,
      checkpoint_dir = td,
      checkpoint_id = "missing_case"
    ),
    "No checkpoint manifest found"
  )
})

test_that("pc_request cache flag can mark cache hit", {
  skip_on_cran()
  skip_if_not_live_smoke()
  skip_if_offline()

  pc_cache_clear()

  a <- pc_request(
    domain = "compound",
    namespace = "cid",
    identifier = 2244,
    output = "JSON",
    cache = TRUE
  )
  expect_true(a$success)

  b <- pc_request(
    domain = "compound",
    namespace = "cid",
    identifier = 2244,
    output = "JSON",
    cache = TRUE
  )

  expect_true(b$success)
  expect_true(isTRUE(b$from_cache))
})

test_that("pc_request offline mode returns cache-miss error when absent", {
  pc_cache_clear()

  out <- pc_request(
    domain = "compound",
    namespace = "cid",
    identifier = 2244,
    output = "JSON",
    cache = TRUE,
    offline = TRUE
  )

  expect_s3_class(out, "PubChemResult")
  expect_false(out$success)
  expect_equal(out$error$code, "OfflineCacheMiss")
})

test_that("pc_request validates cache_ttl and rate_limit", {
  expect_error(
    pc_request(cache_ttl = NA_real_, offline = TRUE),
    "cache_ttl"
  )
  expect_error(
    pc_request(cache_ttl = Inf, offline = TRUE),
    "cache_ttl"
  )
  expect_error(
    pc_request(rate_limit = NA_real_, offline = TRUE),
    "rate_limit"
  )
  expect_error(
    pc_request(rate_limit = Inf, offline = TRUE),
    "rate_limit"
  )
  expect_error(
    pc_request(rate_limit = 0, offline = TRUE),
    "rate_limit"
  )
})

test_that("pc_cache_info returns a diagnostics tibble", {
  info <- pc_cache_info()
  expect_s3_class(info, "tbl_df")
  expect_equal(nrow(info), 1)
  expect_true(all(c("memory_entries", "disk_entries", "disk_size_bytes", "cache_dir") %in% names(info)))
})

test_that("pc_feature_table produces a tabular feature set", {
  skip_on_cran()
  skip_if_not_live_smoke()
  skip_if_offline()

  tbl <- pc_feature_table(
    identifier = c(2244, 3672),
    properties = c("MolecularWeight", "XLogP"),
    namespace = "cid",
    cache = TRUE
  )

  expect_s3_class(tbl, "tbl_df")
  expect_true(nrow(tbl) >= 1)
  expect_true(any(c("MolecularWeight", "XLogP") %in% names(tbl)))
})

test_that("pc_feature_table supports typed result failures via error_mode", {
  fail_res <- pc_make_result(
    success = FALSE,
    request = list(domain = "compound", identifier = "2244"),
    error = pc_make_error("TransportError", "synthetic failure"),
    status = 500
  )

  local_mocked_bindings(
    pc_property = function(...) fail_res,
    .package = "PubChemR"
  )

  out <- pc_feature_table(
    identifier = 2244,
    properties = c("MolecularWeight"),
    error_mode = "result"
  )

  expect_s3_class(out, "PubChemResult")
  expect_false(out$success)
  expect_equal(out$error$code, "TransportError")

  expect_error(
    pc_feature_table(
      identifier = 2244,
      properties = c("MolecularWeight"),
      error_mode = "stop"
    ),
    "Property retrieval failed"
  )
})