# Custom `field` + `sep` support across all network builders.
# A custom CSV column (any name, any separator) must produce the identical
# network as the canonical list-column input.

## ── Synthetic fixtures ──────────────────────────────────────────────────────

# Canonical list-column data
canonical <- data.frame(id = c("P1", "P2", "P3"), stringsAsFactors = FALSE)
canonical$authors <- list(c("Alice", "Bob"), c("Alice", "Carol"), c("Bob", "Carol"))
canonical$keywords <- list(c("ml", "ai"), c("ml", "nlp"), c("ai", "nlp"))
canonical$references <- list(c("R1", "R2"), c("R1", "R3"), c("R2", "R3"))
canonical$countries <- list(c("FI", "SE"), c("FI", "DE"), c("SE", "DE"))
canonical$affiliations <- list(c("UEF", "KTH"), c("UEF", "TUM"), c("KTH", "TUM"))

# Same data as a custom CSV would arrive: odd column names, comma-separated
custom <- data.frame(
  id = c("P1", "P2", "P3"),
  `Author Names` = c("Alice, Bob", "Alice, Carol", "Bob, Carol"),
  Tags = c("ml, ai", "ml, nlp", "ai, nlp"),
  `Cited Refs` = c("R1, R2", "R1, R3", "R2, R3"),
  Nations = c("FI, SE", "FI, DE", "SE, DE"),
  Orgs = c("UEF, KTH", "UEF, TUM", "KTH, TUM"),
  references = I(list(c("R1", "R2"), c("R1", "R3"), c("R2", "R3"))),
  check.names = FALSE,
  stringsAsFactors = FALSE
)

expect_same_network <- function(a, b) {
  ord <- function(x) {
    x <- as.data.frame(x)
    x[order(x$from, x$to), c("from", "to", "weight", "count")]
  }
  expect_equal(ord(a), ord(b), ignore_attr = TRUE)
}

## ── Builders accept custom field + sep ──────────────────────────────────────

test_that("author_network works with a custom column and separator", {
  ref <- author_network(canonical, "collaboration")
  out <- author_network(custom, "collaboration",
                        authors = "Author Names", sep = ",")
  expect_same_network(ref, out)
})

test_that("author_network positional counting respects custom field/sep order", {
  ref <- author_network(canonical, "collaboration", counting = "harmonic")
  out <- author_network(custom, "collaboration",
                        authors = "Author Names", sep = ",",
                        counting = "harmonic")
  expect_same_network(ref, out)
})

test_that("author_network attention works with custom field/sep", {
  ref <- author_network(canonical, attention = "lead")
  out <- author_network(custom, authors = "Author Names", sep = ",",
                        attention = "lead")
  expect_same_network(ref, out)
})

test_that("author_network coupling works with custom author field", {
  ref <- author_network(canonical, "coupling")
  out <- author_network(custom, "coupling",
                        authors = "Author Names", sep = ",")
  expect_same_network(ref, out)
})

test_that("author_network splits ' and ' separated strings", {
  d <- data.frame(id = 1:2,
                  authors = c("Alice and Bob", "Alice and Carol"),
                  stringsAsFactors = FALSE)
  out <- author_network(d, sep = " and ")
  expect_setequal(unique(c(out$from, out$to)), c("ALICE", "BOB", "CAROL"))
})

test_that("keyword_network works with a custom column and separator", {
  ref <- keyword_network(canonical)
  out <- keyword_network(custom, keywords = "Tags", sep = ",")
  expect_same_network(ref, out)
})

test_that("reference_network works with a custom column and separator", {
  ref <- reference_network(canonical)
  out <- reference_network(custom, references = "Cited Refs", sep = ",")
  expect_same_network(ref, out)
})

test_that("document_network coupling works with a custom column", {
  ref <- document_network(canonical, "coupling")
  out <- document_network(custom, "coupling",
                          references = "Cited Refs", sep = ",")
  expect_same_network(ref, out)
})

test_that("document_network direct citation works with a custom column", {
  d <- data.frame(id = c("A", "B", "C"),
                  cites = c("", "A", "A, B"),
                  stringsAsFactors = FALSE)
  out <- document_network(d, "citation", references = "cites", sep = ",")
  expect_equal(nrow(out), 3L)
  expect_setequal(out$to[out$from == "C"], c("A", "B"))
})

test_that("country_network works with a custom column and separator", {
  ref <- country_network(canonical, "collaboration")
  out <- country_network(custom, "collaboration",
                         countries = "Nations", sep = ",")
  expect_same_network(ref, out)
})

test_that("institution_network works with a custom column and separator", {
  ref <- institution_network(canonical, "collaboration")
  out <- institution_network(custom, "collaboration",
                             affiliations = "Orgs", sep = ",")
  expect_same_network(ref, out)
})

test_that("source_network works with a custom source column", {
  d_ref <- canonical
  d_ref$journal <- c("J1", "J2", "J1")
  d_cus <- custom
  d_cus$`Source title` <- c("J1", "J2", "J1")
  ref <- source_network(d_ref, "coupling")
  out <- source_network(d_cus, "coupling", journal = "Source title")
  expect_same_network(ref, out)
})

test_that("local_citations and historiograph work with a custom column", {
  d <- data.frame(id = c("A", "B", "C"),
                  cites = c("", "A", "A, B"),
                  year = c(2000L, 2005L, 2010L),
                  stringsAsFactors = FALSE)
  lc <- local_citations(d, references = "cites", sep = ",")
  expect_equal(lc$lcs[lc$id == "A"], 2L)
  h <- historiograph(d, references = "cites", sep = ",")
  expect_equal(nrow(h$edges), 3L)
})

## ── String columns in builders that previously assumed list-columns ─────────

test_that("positional counting on a string author column splits correctly", {
  # build_author_bipartite previously read string columns as one author each
  d <- data.frame(id = 1:2,
                  authors = c("Alice; Bob; Carol", "Alice; Carol"),
                  stringsAsFactors = FALSE)
  out <- author_network(d, counting = "first")
  # "first" counting keeps only first authors -> Alice solo, no edges
  expect_setequal(unique(c(out$from, out$to)), character(0))
  out_full <- author_network(d, counting = "harmonic")
  expect_setequal(unique(c(out_full$from, out_full$to)),
                  c("ALICE", "BOB", "CAROL"))
})

## ── Wrong-separator guard ───────────────────────────────────────────────────

test_that("a structural wrong separator triggers a warning", {
  # Pipe-delimited data read with the default ';' sep -> no splits, warn.
  d <- data.frame(id = 1:3,
                  authors = c("Smith J| Doe A", "Smith J| Lee K",
                              "Doe A| Lee K"),
                  stringsAsFactors = FALSE)
  expect_warning(author_network(d), "\\|")
})

test_that("the correct separator produces no warning", {
  d <- data.frame(id = 1:3,
                  authors = c("Smith J; Doe A", "Smith J; Lee K",
                              "Doe A; Lee K"),
                  stringsAsFactors = FALSE)
  expect_no_warning(author_network(d))
})

test_that("single-entity columns without alt separators stay silent", {
  d <- data.frame(id = 1:3,
                  keywords = c("ml", "ai", "nlp"),
                  stringsAsFactors = FALSE)
  expect_no_warning(keyword_network(d))
})

test_that("valid 'Last, First' single-author data does not warn", {
  # Internal commas are part of the label, not a delimiter mistake.
  d <- data.frame(id = 1:3,
                  authors = c("Smith, John", "Doe, Jane", "Lee, Kim"),
                  stringsAsFactors = FALSE)
  expect_no_warning(author_network(d))
})

test_that("one-reference-per-row strings with commas do not warn", {
  d <- data.frame(id = 1:3,
                  references = c("Smith J, 2020, Journal X",
                                 "Doe A, 2019, Journal Y",
                                 "Lee K, 2021, Journal Z"),
                  stringsAsFactors = FALSE)
  expect_no_warning(reference_network(d))
})

test_that("' and '-joined organisation names do not warn", {
  d <- data.frame(id = 1:3,
                  keywords = c("Smith and Sons", "Black and Decker",
                               "Marks and Spencer"),
                  stringsAsFactors = FALSE)
  expect_no_warning(keyword_network(d))
})

## ── read_biblio generic reader ──────────────────────────────────────────────

test_that("read_generic warns about misspelled actor columns", {
  tmp <- tempfile(fileext = ".csv")
  on.exit(unlink(tmp))
  utils::write.csv(data.frame(id = 1:2,
                              Authors = c("A| B", "A| C"),
                              stringsAsFactors = FALSE),
                   tmp, row.names = FALSE)
  expect_warning(
    read_biblio(tmp, format = "generic", list_cols = "Authorz", sep = "|"),
    "Authorz"
  )
})

test_that("read_generic end-to-end: custom CSV to author network", {
  tmp <- tempfile(fileext = ".csv")
  on.exit(unlink(tmp))
  utils::write.csv(data.frame(PaperID = c("P1", "P2"),
                              Authors = c("Alice| Bob", "Alice| Carol"),
                              stringsAsFactors = FALSE),
                   tmp, row.names = FALSE)
  d <- read_biblio(tmp, format = "generic", id = "PaperID",
                   list_cols = "Authors", sep = "|")
  out <- author_network(d, authors = "Authors")
  expect_setequal(unique(c(out$from, out$to)), c("ALICE", "BOB", "CAROL"))
})

## ── Regression: previously dropped arguments now honored ────────────────────

test_that("author_network co_citation honors self_loops", {
  d <- data.frame(id = c("P1", "P2"), stringsAsFactors = FALSE)
  d$authors <- list("X", "Y")
  d$cited_first_authors <- list(c("A", "B"), c("A", "B"))
  out <- author_network(d, "co_citation", self_loops = TRUE)
  expect_true(any(out$from == out$to))
})

test_that("author_network equivalence forwards deduplicate", {
  d <- data.frame(id = c("P1", "P2"), stringsAsFactors = FALSE)
  d$authors <- list(c("A", "A", "B"), c("A", "B"))
  dedup <- author_network(d, "equivalence")
  raw <- author_network(d, "equivalence", deduplicate = FALSE)
  w_dedup <- dedup$weight[dedup$from == "A" & dedup$to == "B"]
  w_raw <- raw$weight[raw$from == "A" & raw$to == "B"]
  expect_false(isTRUE(all.equal(w_dedup, w_raw)))
})

test_that("keyword_network 'field' still works but warns deprecated", {
  d <- data.frame(id = 1:3, Tags = c("ml; ai", "ml; nlp", "ai; nlp"),
                  stringsAsFactors = FALSE)
  expect_warning(out <- keyword_network(d, field = "Tags"), "deprecated")
  expect_setequal(unique(c(out$from, out$to)), c("AI", "ML", "NLP"))
})

## ── references_sep: references column with a custom separator ────────────────

test_that("author_network coupling honors references_sep", {
  # references comma-separated AND authors comma-separated
  d <- data.frame(
    id = c("P1", "P2", "P3"),
    auth = c("Alice, Bob", "Alice, Carol", "Bob, Carol"),
    references = c("R1, R2", "R1, R3", "R2, R3"),
    stringsAsFactors = FALSE
  )
  out <- author_network(d, "coupling", authors = "auth", sep = ",",
                        references_sep = ",")
  # Each author should couple via individual refs R1/R2/R3, not one mega-ref.
  # Alice (R1,R2) & Bob (R1,R2,R3) share R1,R2 -> nonzero coupling
  expect_true(nrow(out) > 0)
  expect_true(all(out$count >= 1))
  # The references must have been split: a node label must not contain a comma
  refs_used <- attr(out, "network_type")
  expect_equal(refs_used, "author_coupling")
})

test_that("references_sep default ';' unchanged for standard data", {
  d <- data.frame(id = c("P1", "P2", "P3"), stringsAsFactors = FALSE)
  d$auth <- list(c("Alice", "Bob"), c("Alice", "Carol"), c("Bob", "Carol"))
  d$references <- c("R1; R2", "R1; R3", "R2; R3")
  out <- author_network(d, "coupling", authors = "auth")
  expect_true(nrow(out) > 0)
})

test_that("wrong references separator collapses refs (the bug we fixed)", {
  # Without references_sep, comma refs split on ';' -> one giant ref per paper
  # -> every paper shares the same single (distinct) ref with none other,
  # so coupling is empty. With references_sep=',' it is non-empty.
  d <- data.frame(
    id = c("P1", "P2"),
    auth = c("Alice, Bob", "Alice, Carol"),
    references = c("R1, R2", "R1, R3"),
    stringsAsFactors = FALSE
  )
  # Wrong sep: references split on ';' -> each paper is one giant ref
  # ("R1, R2" vs "R1, R3"), no shared ref -> empty coupling.
  suppressWarnings(
    wrong <- author_network(d, "coupling", authors = "auth", sep = ",")
  )
  # Correct sep: papers share R1 -> non-empty coupling.
  fixed <- author_network(d, "coupling", authors = "auth", sep = ",",
                          references_sep = ",")
  expect_true(nrow(fixed) > nrow(wrong))
})

## ── strip_quotes: quoted entities ───────────────────────────────────────────

test_that("strip_quotes removes surrounding quotes from split strings", {
  d <- data.frame(id = 1:3,
                  authors = c('"Alice"; "Bob"', '"Alice"; "Carol"',
                              '"Bob"; "Carol"'),
                  stringsAsFactors = FALSE)
  out <- author_network(d)
  expect_setequal(unique(c(out$from, out$to)), c("ALICE", "BOB", "CAROL"))
})

test_that("strip_quotes handles CSV doubled quotes \"\"", {
  d <- data.frame(id = 1:2,
                  keywords = c('""ml""; ""ai""', '""ml""; ""nlp""'),
                  stringsAsFactors = FALSE)
  out <- keyword_network(d)
  expect_setequal(unique(c(out$from, out$to)), c("ML", "AI", "NLP"))
})

test_that("strip_quotes applies to provided list-columns too", {
  d <- data.frame(id = 1:3, stringsAsFactors = FALSE)
  d$authors <- list(c('"Alice"', '"Bob"'), c('"Alice"', '"Carol"'),
                    c('"Bob"', '"Carol"'))
  out <- author_network(d)
  expect_setequal(unique(c(out$from, out$to)), c("ALICE", "BOB", "CAROL"))
})

test_that("strip_quotes = FALSE keeps quotes as part of the label", {
  d <- data.frame(id = 1:2,
                  keywords = c('"ml"; "ai"', '"ml"; "nlp"'),
                  stringsAsFactors = FALSE)
  out <- keyword_network(d, strip_quotes = FALSE)
  nodes <- unique(c(out$from, out$to))
  expect_true(any(grepl('"', nodes, fixed = TRUE)))
})

test_that("strip_quotes leaves internal apostrophes intact", {
  d <- data.frame(id = 1:2,
                  authors = c("O'Brien; Smith", "O'Brien; Jones"),
                  stringsAsFactors = FALSE)
  out <- author_network(d)
  expect_true("O'BRIEN" %in% c(out$from, out$to))
})

test_that("strip_quotes works on a scalar source/journal column", {
  d <- data.frame(id = c("P1", "P2", "P3"),
                  journal = c('"J1"', '"J2"', '"J1"'),
                  stringsAsFactors = FALSE)
  d$references <- list(c("R1", "R2"), c("R1", "R3"), c("R2", "R3"))
  out <- source_network(d, "coupling")
  nodes <- unique(c(out$from, out$to))
  expect_false(any(grepl('"', nodes, fixed = TRUE)))
  expect_true(all(nodes %in% c("J1", "J2")))
})

test_that("historiograph forwards strip_quotes consistently to LCS", {
  d <- data.frame(
    id = c("A", "B", "C"),
    references = c("", '"A"', '"A"; "B"'),
    year = c(2000L, 2005L, 2010L),
    stringsAsFactors = FALSE
  )
  # With stripping (default) quoted refs resolve to ids -> edges found,
  # and node selection (LCS) agrees with the edge-building labels.
  h <- historiograph(d, min_lcs = 1)
  expect_true(nrow(h$edges) >= 2)
  lc <- local_citations(d)
  cited <- lc$id[lc$lcs > 0]
  expect_true(all(h$edges$to %in% cited))
})