# The bundled openalex_works.csv is a small 30-row fixture (a subset of
# the full OpenAlex "Works" export). It is read-only here, so parse it
# ONCE at file scope and share `oa` across the read_openalex_csv()
# assertions instead of re-parsing per test_that(). The read_biblio
# blocks deliberately keep their own calls — they exercise a different
# code path (format auto-detection) and must not reuse `oa`.
f  <- system.file("extdata", "openalex_works.csv", package = "bibnets")
oa <- read_openalex_csv(f)

test_that("read_openalex_csv returns standard columns", {
  expect_true(all(c("id", "title", "year", "journal", "doi",
                    "cited_by_count", "abstract", "type",
                    "authors", "references", "keywords",
                    "affiliations", "countries") %in% names(oa)))
})

test_that("read_openalex_csv returns 30 rows from bundled fixture", {
  expect_equal(nrow(oa), 30L)
})

test_that("read_openalex_csv strips OpenAlex URL prefix from id", {
  expect_false(any(grepl("https://openalex.org/", oa$id)))
  expect_true(all(grepl("^W[0-9]+$", oa$id)))
})

test_that("read_openalex_csv strips DOI URL prefix", {
  doi_present <- oa$doi[!is.na(oa$doi)]
  expect_false(any(grepl("^https://doi.org/", doi_present)))
})

test_that("read_openalex_csv produces list-columns for authors, references, keywords", {
  expect_true(is.list(oa$authors))
  expect_true(is.list(oa$references))
  expect_true(is.list(oa$keywords))
  expect_true(is.list(oa$affiliations))
  expect_true(is.list(oa$countries))
})

test_that("read_openalex_csv pipe-splits authors correctly", {
  multi_author <- Filter(function(x) length(x) > 1, oa$authors)
  expect_true(length(multi_author) > 0)
})

test_that("read_openalex_csv references column is always empty", {
  expect_true(all(vapply(oa$references, length, integer(1)) == 0L))
})

test_that("read_openalex_csv abstract column is all NA", {
  expect_true(all(is.na(oa$abstract)))
})

test_that("read_openalex_csv year is integer", {
  expect_type(oa$year, "integer")
})

test_that("read_openalex_csv cited_by_count is integer with no NAs", {
  expect_type(oa$cited_by_count, "integer")
  expect_false(any(is.na(oa$cited_by_count)))
})

test_that("read_biblio auto-detects openalex_csv format", {
  f <- system.file("extdata", "openalex_works.csv", package = "bibnets")
  d <- read_biblio(f)
  expect_equal(nrow(d), 30L)
  expect_true(is.list(d$authors))
})

test_that("read_biblio with format='openalex_csv' works explicitly", {
  f <- system.file("extdata", "openalex_works.csv", package = "bibnets")
  d <- read_biblio(f, format = "openalex_csv")
  expect_equal(nrow(d), 30L)
})

test_that("read_openalex_csv countries are pipe-split into character vectors", {
  multi_country <- Filter(function(x) length(x) > 1, oa$countries)
  expect_true(length(multi_country) > 0)
  all_codes <- unlist(oa$countries)
  expect_true(all(nchar(all_codes) == 2L))
})

test_that("read_openalex_csv keywords are single-element lists from primary_topic", {
  kw_lengths <- vapply(oa$keywords, length, integer(1))
  expect_true(all(kw_lengths %in% c(0L, 1L)))
  expect_true(any(kw_lengths == 1L))
})

test_that("read_openalex_csv errors on non-existent file", {
  expect_error(read_openalex_csv("no_such_file.csv"))
})

test_that("read_biblio row-binds files with source-specific columns", {
  oa <- tempfile(fileext = ".csv")
  bib <- tempfile(fileext = ".bib")

  writeLines(c(
    "id,display_name,publication_year,primary_location.source.display_name,doi,cited_by_count,type,authorships.author.display_name,authorships.institutions.display_name,authorships.countries,primary_topic.display_name",
    "https://openalex.org/W1,OpenAlex paper,2024,Journal A,https://doi.org/10.1/oa,2,article,Alice|Bob,Uni A|Uni B,US|GB,Networks"
  ), oa)
  writeLines(c(
    "@article{key1,",
    "  title = {BibTeX paper},",
    "  author = {Smith, Jane},",
    "  year = {2023},",
    "  journal = {Journal B}",
    "}"
  ), bib)

  d <- read_biblio(c(oa, bib))

  expect_equal(nrow(d), 2L)
  expect_true(all(c("countries", "affiliations", "authors") %in% names(d)))
  expect_true(is.list(d$countries))
  expect_true(is.list(d$authors))
})

test_that("read_bibtex extracts non-standard cited references", {
  bib <- tempfile(fileext = ".bib")
  writeLines(c(
    "@article{key1,",
    "  title = {BibTeX paper},",
    "  author = {Smith, Jane},",
    "  year = {2023},",
    "  cited-references = {Ref A, 2020; Ref B, 2021}",
    "}"
  ), bib)

  d <- read_bibtex(bib)

  expect_equal(d$references[[1]], c("REF A, 2020", "REF B, 2021"))
})