## Tests for read_ris()
## testthat 3.0 — no network calls, all data via tempfile()

## Helper: write RIS text to a temp file and return the path
ris_tempfile <- function(text) {
  f <- tempfile(fileext = ".ris")
  writeLines(text, f)
  f
}

## ── Standard columns ─────────────────────────────────────────────────────────

test_that("read_ris returns the 11 standard bibnets columns", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "AU  - Smith, J.",
    "TI  - A test title",
    "JO  - Test Journal",
    "PY  - 2021",
    "DO  - 10.1000/xyz",
    "AB  - An abstract.",
    "KW  - network",
    "ER  - "
  ))
  d <- read_ris(f)
  expected <- c("id", "title", "year", "journal", "doi",
                "cited_by_count", "abstract", "type",
                "authors", "references", "keywords")
  expect_true(all(expected %in% names(d)))
})

test_that("read_ris column order matches bibnets standard schema", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "AU  - Smith, J.",
    "TI  - Title",
    "JO  - Journal",
    "PY  - 2020",
    "ER  - "
  ))
  d <- read_ris(f)
  scalar_cols <- c("id", "title", "year", "journal", "doi",
                   "cited_by_count", "abstract", "type")
  expect_equal(names(d)[seq_along(scalar_cols)], scalar_cols)
})

## ── Single record: values ────────────────────────────────────────────────────

test_that("read_ris single record: scalar fields parsed correctly", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "AU  - Smith, Jane",
    "TI  - Bibliometric analysis",
    "JO  - Test Journal",
    "PY  - 2020",
    "DO  - 10.1000/test",
    "AB  - An abstract here.",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(nrow(d), 1L)
  expect_equal(d$title,   "Bibliometric analysis")
  expect_equal(d$journal, "Test Journal")
  expect_equal(d$year,    2020L)
  expect_equal(d$doi,     "10.1000/test")
  expect_equal(d$abstract, "An abstract here.")
  expect_equal(d$type,    "JOUR")
})

test_that("read_ris year is integer type", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Title",
    "PY  - 2019",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_type(d$year, "integer")
})

test_that("read_ris cited_by_count is always NA_integer_", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Title",
    "PY  - 2020",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.na(d$cited_by_count))
  expect_type(d$cited_by_count, "integer")
})

## ── Multi-record ─────────────────────────────────────────────────────────────

test_that("read_ris multi-record: correct row count", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Paper One",
    "PY  - 2020",
    "ER  - ",
    "TY  - CONF",
    "TI  - Paper Two",
    "PY  - 2021",
    "ER  - ",
    "TY  - BOOK",
    "TI  - Paper Three",
    "PY  - 2022",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(nrow(d), 3L)
})

test_that("read_ris multi-record: no field bleeding between records", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - First Paper",
    "AU  - Alpha, A.",
    "PY  - 2000",
    "DO  - 10.1/first",
    "ER  - ",
    "TY  - JOUR",
    "TI  - Second Paper",
    "AU  - Beta, B.",
    "PY  - 2001",
    "DO  - 10.1/second",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$title[1],  "First Paper")
  expect_equal(d$title[2],  "Second Paper")
  expect_equal(d$doi[1],   "10.1/first")
  expect_equal(d$doi[2],   "10.1/second")
  expect_equal(d$year[1],  2000L)
  expect_equal(d$year[2],  2001L)
})

test_that("read_ris multi-record: each row has its own author list", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Paper One",
    "AU  - Alpha, A.",
    "AU  - Beta, B.",
    "ER  - ",
    "TY  - JOUR",
    "TI  - Paper Two",
    "AU  - Gamma, G.",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(length(d$authors[[1]]), 2L)
  expect_equal(length(d$authors[[2]]), 1L)
})

## ── Authors list-column ──────────────────────────────────────────────────────

test_that("read_ris authors list-column has correct length for multi-author record", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Collab paper",
    "AU  - Smith, John",
    "AU  - Jones, Kate",
    "AU  - Brown, Lee",
    "PY  - 2022",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(length(d$authors[[1]]), 3L)
})

test_that("read_ris authors are uppercased (standardize_authors)", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Title",
    "AU  - Smith, Jane",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$authors[[1]], toupper(d$authors[[1]]))
})

test_that("read_ris authors is always a list", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Title",
    "AU  - Smith, J.",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.list(d$authors))
})

## ── Keywords list-column ─────────────────────────────────────────────────────

test_that("read_ris keywords list-column has correct length for multi-keyword record", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Paper",
    "KW  - networks",
    "KW  - bibliometrics",
    "KW  - co-citation",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(length(d$keywords[[1]]), 3L)
})

test_that("read_ris keywords values are trimmed", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Paper",
    "KW  -   spaced keyword  ",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$keywords[[1]], "spaced keyword")
})

test_that("read_ris keywords is always a list", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Title",
    "KW  - science",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.list(d$keywords))
})

## ── References column ────────────────────────────────────────────────────────

test_that("read_ris references column is a list", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Title",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.list(d$references))
})

test_that("read_ris references are empty for standard RIS (no references field)", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Title",
    "AU  - Smith, J.",
    "KW  - networks",
    "PY  - 2020",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(length(d$references[[1]]), 0L)
  expect_true(all(vapply(d$references, length, integer(1)) == 0L))
})

## ── Missing optional tags → NA / empty list ──────────────────────────────────

test_that("read_ris missing DOI yields NA", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - No DOI paper",
    "PY  - 2018",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.na(d$doi))
})

test_that("read_ris missing abstract yields NA", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - No abstract",
    "PY  - 2018",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.na(d$abstract))
})

test_that("read_ris missing keywords yields empty character vector in list", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - No keywords",
    "PY  - 2019",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$keywords[[1]], character(0))
})

test_that("read_ris missing authors yields empty character vector in list", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - No authors",
    "PY  - 2020",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$authors[[1]], character(0))
})

test_that("read_ris missing year yields NA_integer_", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - No year",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.na(d$year))
  expect_type(d$year, "integer")
})

test_that("read_ris missing title yields NA", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "PY  - 2020",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_true(is.na(d$title))
})

## ── Alternative tags ─────────────────────────────────────────────────────────

test_that("read_ris accepts Y1 as year fallback", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Y1 year",
    "Y1  - 2015/03/01",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$year, 2015L)
})

test_that("read_ris accepts T2 as journal fallback", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - T2 journal",
    "T2  - Fallback Journal",
    "PY  - 2016",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$journal, "Fallback Journal")
})

test_that("read_ris accepts T1 as title fallback", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "T1  - T1 title here",
    "PY  - 2016",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$title, "T1 title here")
})

test_that("read_ris accepts N2 as abstract fallback", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Paper",
    "N2  - Abstract from N2.",
    "PY  - 2018",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$abstract, "Abstract from N2.")
})

test_that("read_ris accepts A1 as author fallback when AU absent", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Paper",
    "A1  - Doe, John",
    "PY  - 2015",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(length(d$authors[[1]]), 1L)
  expect_equal(d$authors[[1]], "DOE, JOHN")
})

## ── ID assignment ────────────────────────────────────────────────────────────

test_that("read_ris uses DO as id when present", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - Has DOI",
    "DO  - 10.99/test",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_equal(d$id, "10.99/test")
})

test_that("read_ris generates RIS-prefixed id when DO absent", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - No DOI",
    "PY  - 2020",
    "ER  - "
  ))
  d <- read_ris(f)
  expect_match(d$id, "^RIS")
})

## ── Empty file / empty records ───────────────────────────────────────────────

test_that("read_ris empty file returns zero-row data frame with correct columns", {
  f <- ris_tempfile(character(0))
  d <- read_ris(f)
  expect_equal(nrow(d), 0L)
  expect_true(all(c("id", "title", "year", "journal", "doi",
                    "cited_by_count", "abstract", "type",
                    "authors", "references", "keywords") %in% names(d)))
})

test_that("read_ris file with only blank lines returns zero-row data frame", {
  f <- ris_tempfile(c("", "  ", ""))
  d <- read_ris(f)
  expect_equal(nrow(d), 0L)
})

## ── Error path ───────────────────────────────────────────────────────────────

test_that("read_ris errors informatively on non-existent file", {
  expect_error(read_ris("/tmp/does-not-exist.ris"), "File not found")
})

## ── Non-ASCII / UTF-8 author names ───────────────────────────────────────────

test_that("read_ris UTF-8 author names round-trip cleanly", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "TI  - UTF-8 test",
    "AU  - Échalas, Mélanie",
    "AU  - García, José",
    "PY  - 2023",
    "ER  - "
  ))
  d <- read_ris(f, encoding = "UTF-8")
  expect_equal(length(d$authors[[1]]), 2L)
  expect_true(is.character(d$authors[[1]]))
  ## Both names should survive (non-empty) after uppercasing
  expect_true(all(nchar(d$authors[[1]]) > 0L))
})

## ── read_biblio auto-detection ───────────────────────────────────────────────

test_that("read_biblio auto-detects RIS format and parses correctly", {
  f <- ris_tempfile(c(
    "TY  - JOUR",
    "AU  - Smith, J.",
    "TI  - Auto-detect test",
    "JO  - Some Journal",
    "PY  - 2020",
    "DO  - 10.1000/autodetect",
    "ER  - "
  ))
  d <- read_biblio(f)
  expect_equal(nrow(d), 1L)
  expect_equal(d$title, "Auto-detect test")
  expect_true(is.list(d$authors))
})