test_that("gutenberg_download and gutenberg_add_sections work with single book", {
  skip_if_not_integration()
  skip_on_cran()
  skip_if_offline()

  result <- gutenberg_download(
    2, # US Bill of Rights
    strip = TRUE,
    verbose = FALSE,
    use_cache = FALSE
  ) |>
    gutenberg_add_sections(
      pattern = "^[IVX]+$"
    )

  expect_s3_class(result, "tbl_df")
  expect_named(result, c("gutenberg_id", "text", "section"))
  expect_true(nrow(result) > 0)
  expect_equal(unique(result$gutenberg_id), 2)
  expect_type(result$text, "character")

  all_text <- paste(result$text, collapse = " ")
  expect_true(nchar(all_text) > 100)

  sections <- unique(result$section[!is.na(result$section)])
  expect_equal(length(sections), 10)
})

test_that("gutenberg_download and gutenberg_add_sections work with multiple books and auto-grouping", {
  skip_if_not_integration()
  skip_on_cran()
  skip_if_offline()

  result <- gutenberg_download(
    c(84, 2), # Frankenstein (Letters + Chapters) and Bill of Rights (Articles)
    strip = TRUE,
    verbose = FALSE,
    use_cache = FALSE
  ) |>
    gutenberg_add_sections(
      pattern = "^(Letter \\d+|Chapter \\d+|[IVX]+)$"
    )

  expect_s3_class(result, "tbl_df")
  expect_named(result, c("gutenberg_id", "text", "section"))
  expect_true(nrow(result) > 0)
  expect_setequal(unique(result$gutenberg_id), c(84, 2))

  counts <- table(result$gutenberg_id)
  expect_true(all(counts > 0))

  # Frankenstein has 4 letters + 24 chapters = 28 sections
  frankenstein_section_nrows <- result |>
    dplyr::filter(gutenberg_id == 84, !is.na(section)) |>
    dplyr::distinct(section) |>
    nrow()
  expect_equal(frankenstein_section_nrows, 28)

  # Bill of Rights has 10 articles
  bill_section_nrows <- result |>
    dplyr::filter(gutenberg_id == 2, !is.na(section)) |>
    dplyr::distinct(section) |>
    nrow()
  expect_equal(bill_section_nrows, 10)

  # Count letters and chapters
  frankenstein_sections <- result |>
    dplyr::filter(gutenberg_id == 84, !is.na(section)) |>
    dplyr::pull(section) |>
    unique()

  letter_count <- sum(grepl("^Letter", frankenstein_sections))
  chapter_count <- sum(grepl("^Chapter", frankenstein_sections))
  expect_equal(letter_count, 4)
  expect_equal(chapter_count, 24)
})

test_that("gutenberg_download with meta_fields works with real API", {
  skip_if_not_integration()
  skip_on_cran()
  skip_if_offline()

  result <- gutenberg_download(
    1,
    meta_fields = c("title", "author"),
    strip = TRUE,
    verbose = FALSE,
    use_cache = FALSE
  )

  expect_s3_class(result, "tbl_df")
  expect_true(all(
    c("gutenberg_id", "text", "title", "author") %in% names(result)
  ))
  expect_true(nrow(result) > 0)

  expect_equal(length(unique(result$title)), 1)
  expect_true(!is.na(unique(result$title)))
})

test_that("gutenberg_download strip parameter works with real API", {
  skip_if_not_integration()
  skip_on_cran()
  skip_if_offline()

  result_stripped <- gutenberg_download(
    1,
    strip = TRUE,
    verbose = FALSE,
    use_cache = FALSE
  )

  result_unstripped <- gutenberg_download(
    1,
    strip = FALSE,
    verbose = FALSE,
    use_cache = FALSE
  )

  expect_true(nrow(result_unstripped) > nrow(result_stripped))
  expect_true(nrow(result_stripped) > 0)
  expect_true(nrow(result_unstripped) > 0)
})

test_that("gutenberg_download caching works with real API", {
  skip_if_not_integration()
  skip_on_cran()
  skip_if_offline()

  with_gutenberg_cache({
    # First download - should hit the API
    network_result <- gutenberg_download(
      1,
      strip = TRUE,
      verbose = FALSE,
      use_cache = TRUE
    )

    cache_files <- gutenberg_cache_list(verbose = FALSE)
    expect_true(nrow(cache_files) > 0)
    expect_true(cache_files$file %in% c("1.rds"))

    # Second download - should use cache
    # We can't directly test if it used cache, but we can verify results match
    cache_result <- gutenberg_download(
      1,
      strip = TRUE,
      verbose = FALSE,
      use_cache = TRUE
    )

    expect_identical(network_result, cache_result)
  })
})

test_that("gutenberg_download works with data frame input", {
  skip_if_not_integration()
  skip_on_cran()
  skip_if_offline()

  books_df <- data.frame(gutenberg_id = c(1, 2))

  result <- gutenberg_download(
    books_df,
    strip = TRUE,
    verbose = FALSE,
    use_cache = FALSE
  )

  expect_s3_class(result, "tbl_df")
  expect_true(nrow(result) > 0)
  expect_setequal(unique(result$gutenberg_id), c(1, 2))
})