## Extra tests for R/bipartite.R — targets build_bipartite_long() and
## edge-case branches of build_bipartite() to raise coverage from 53.57% → ≥85%.
##
## All tests run under devtools::test() which exposes internal functions.

# ── build_bipartite_long() — basic usage ────────────────────────────────────

test_that("build_bipartite_long returns a dgCMatrix with correct dims", {
  edges <- data.frame(
    source = c("W1", "W1", "W2", "W3"),
    target = c("R1", "R2", "R1", "R3"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  expect_true(is(B, "dgCMatrix"))
  expect_equal(nrow(B), 3L)   # W1, W2, W3
  expect_equal(ncol(B), 3L)   # R1, R2, R3
})

test_that("build_bipartite_long uppercases and trims row/col names", {
  edges <- data.frame(
    source = c("  w1 ", "w2"),
    target = c("  r1 ", "r2"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  expect_equal(rownames(B), c("W1", "W2"))
  expect_equal(colnames(B), c("R1", "R2"))
})

test_that("build_bipartite_long assigns 1s in correct cells", {
  edges <- data.frame(
    source = c("A", "A", "B"),
    target = c("X", "Y", "X"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  # rows sorted: A, B; cols sorted: X, Y
  expect_equal(as.numeric(B["A", ]), c(1, 1))
  expect_equal(as.numeric(B["B", ]), c(1, 0))
})

test_that("build_bipartite_long sorts row and column names lexicographically", {
  edges <- data.frame(
    source = c("W3", "W1", "W2"),
    target = c("RC", "RA", "RB"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  expect_equal(rownames(B), c("W1", "W2", "W3"))
  expect_equal(colnames(B), c("RA", "RB", "RC"))
})

# ── build_bipartite_long() — min_freq filtering ─────────────────────────────

test_that("build_bipartite_long filters targets by min_freq", {
  edges <- data.frame(
    source = c("W1", "W2", "W3", "W4"),
    target = c("R1", "R1", "R2", "R3"),
    stringsAsFactors = FALSE
  )
  # R1 appears 2×, R2 1×, R3 1×
  B2 <- build_bipartite_long(edges, min_freq = 2L)

  expect_equal(ncol(B2), 1L)
  expect_equal(colnames(B2), "R1")
})

test_that("build_bipartite_long min_freq=1 keeps all targets", {
  edges <- data.frame(
    source = c("W1", "W2", "W3"),
    target = c("R1", "R2", "R3"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges, min_freq = 1L)
  expect_equal(ncol(B), 3L)
})

test_that("build_bipartite_long min_freq=3 keeps only targets appearing 3+ times", {
  edges <- data.frame(
    source = c("W1", "W2", "W3", "W4", "W5"),
    target = c("R1", "R1", "R1", "R2", "R2"),
    stringsAsFactors = FALSE
  )
  B3 <- build_bipartite_long(edges, min_freq = 3L)
  expect_equal(ncol(B3), 1L)
  expect_equal(colnames(B3), "R1")
})

# ── build_bipartite_long() — NA / empty handling ────────────────────────────

test_that("build_bipartite_long drops rows where source or target is NA", {
  edges <- data.frame(
    source = c("W1", NA_character_,  "W3"),
    target = c("R1", "R2",           NA_character_),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  # Only W1→R1 survives
  expect_equal(nrow(B), 1L)
  expect_equal(ncol(B), 1L)
  expect_equal(rownames(B), "W1")
  expect_equal(colnames(B), "R1")
})

test_that("build_bipartite_long drops rows with empty-string source or target", {
  edges <- data.frame(
    source = c("W1", "",  "W3"),
    target = c("",   "R2", "R3"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  # W1→"" and ""→R2 are both dropped; only W3→R3 survives
  expect_equal(nrow(B), 1L)
  expect_equal(rownames(B), "W3")
  expect_equal(colnames(B), "R3")
})

# ── build_bipartite_long() — error paths ────────────────────────────────────

test_that("build_bipartite_long errors when 'source' column is missing", {
  bad <- data.frame(from = "W1", target = "R1", stringsAsFactors = FALSE)
  expect_error(build_bipartite_long(bad))
})

test_that("build_bipartite_long errors when 'target' column is missing", {
  bad <- data.frame(source = "W1", to = "R1", stringsAsFactors = FALSE)
  expect_error(build_bipartite_long(bad))
})

test_that("build_bipartite_long errors when input is not a data frame", {
  expect_error(build_bipartite_long(list(source = "W1", target = "R1")))
})

# ── build_bipartite_long() — single-row edge ────────────────────────────────

test_that("build_bipartite_long handles a single-row input", {
  edges <- data.frame(source = "W1", target = "R1", stringsAsFactors = FALSE)
  B <- build_bipartite_long(edges)

  expect_equal(dim(B), c(1L, 1L))
  expect_equal(B[1L, 1L], 1)
})

# ── build_bipartite_long() — many-to-one / one-to-many ──────────────────────

test_that("build_bipartite_long handles multiple sources sharing same target", {
  edges <- data.frame(
    source = c("W1", "W2", "W3"),
    target = c("R1", "R1", "R1"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  expect_equal(ncol(B), 1L)
  expect_equal(sum(B), 3)   # each source has one 1
})

test_that("build_bipartite_long handles one source pointing to many targets", {
  edges <- data.frame(
    source = c("W1", "W1", "W1"),
    target = c("R1", "R2", "R3"),
    stringsAsFactors = FALSE
  )
  B <- build_bipartite_long(edges)

  expect_equal(nrow(B), 1L)
  expect_equal(sum(B["W1", ]), 3)
})

# ── build_bipartite() — deduplicate = FALSE ──────────────────────────────────
# (the TRUE branch is already covered 135× by existing tests; FALSE is new)

test_that("build_bipartite deduplicate=FALSE keeps repeated (paper, entity) pairs", {
  d <- data.frame(id = "W1", stringsAsFactors = FALSE)
  d$kw <- list(c("ML", "ML", "DL"))  # ML appears twice

  B_dup   <- build_bipartite(d, "kw", deduplicate = FALSE)
  B_dedup <- build_bipartite(d, "kw", deduplicate = TRUE)

  # With dedup TRUE: ML=1, DL=1 (binary)
  expect_equal(as.numeric(B_dedup["W1", "ML"]), 1)
  # With dedup FALSE: ML=2, DL=1 (counts raw occurrences)
  expect_equal(as.numeric(B_dup["W1", "ML"]), 2)
  expect_equal(as.numeric(B_dup["W1", "DL"]), 1)
})

test_that("build_bipartite deduplicate=FALSE with no repeats matches deduplicate=TRUE", {
  d <- data.frame(id = c("W1", "W2"), stringsAsFactors = FALSE)
  d$kw <- list(c("A", "B"), c("B", "C"))

  B_t <- build_bipartite(d, "kw", deduplicate = TRUE)
  B_f <- build_bipartite(d, "kw", deduplicate = FALSE)

  expect_equal(as.matrix(B_t), as.matrix(B_f))
})

# ── build_bipartite() — ensure_list_column auto-split ───────────────────────

test_that("build_bipartite auto-splits semicolon-delimited character column", {
  d <- data.frame(
    id  = c("W1", "W2"),
    kw  = c("ML; DL", "NLP; CV; ML"),
    stringsAsFactors = FALSE
  )
  # kw is a plain character vector, not a list
  B <- build_bipartite(d, "kw")

  expect_true(is(B, "dgCMatrix"))
  # Uppercased entities: CV, DL, ML, NLP
  expect_true("ML" %in% colnames(B))
  expect_true("DL" %in% colnames(B))
  expect_equal(ncol(B), 4L)
  expect_equal(as.numeric(B["W1", "ML"]), 1)
  expect_equal(as.numeric(B["W2", "NLP"]), 1)
})

# ── build_bipartite() — edge cases ──────────────────────────────────────────

test_that("build_bipartite with min_freq=2 excludes singletons", {
  d <- data.frame(id = c("W1", "W2", "W3"), stringsAsFactors = FALSE)
  d$kw <- list(c("A", "B"), c("A", "C"), c("B", "D"))
  # A: 2, B: 2, C: 1, D: 1 -> keep A and B only

  B <- build_bipartite(d, "kw", min_freq = 2L)
  expect_equal(sort(colnames(B)), c("A", "B"))
})

test_that("build_bipartite with min_freq=3 keeps only universal entities", {
  d <- make_test_data()
  B <- build_bipartite(d, "references", min_freq = 3L)

  expect_equal(colnames(B), "R2")   # R2 is in all 3 papers
  expect_equal(nrow(B), 3L)
})

test_that("build_bipartite entity labels are always uppercased", {
  d <- data.frame(id = "W1", stringsAsFactors = FALSE)
  d$auth <- list(c("alice", "Bob", " CAROL "))

  B <- build_bipartite(d, "auth")
  expect_equal(sort(colnames(B)), c("ALICE", "BOB", "CAROL"))
})

test_that("build_bipartite errors when required column is missing", {
  d <- data.frame(id = "W1", stringsAsFactors = FALSE)
  expect_error(
    build_bipartite(d, "authors"),
    regexp = "authors"
  )
})

test_that("build_bipartite errors when 'id' column is missing", {
  d <- data.frame(authors = I(list(c("Alice"))), stringsAsFactors = FALSE)
  expect_error(
    build_bipartite(d, "authors"),
    regexp = "id"
  )
})

test_that("build_bipartite errors when data is not a data frame", {
  expect_error(
    build_bipartite(list(id = "W1", authors = list("Alice")), "authors"),
    regexp = "data frame"
  )
})

test_that("build_bipartite handles all-empty list-column entries gracefully", {
  d <- data.frame(id = c("W1", "W2"), stringsAsFactors = FALSE)
  d$kw <- list(character(0), character(0))

  B <- build_bipartite(d, "kw")
  expect_equal(ncol(B), 0L)
  expect_equal(nrow(B), 2L)
})

test_that("build_bipartite handles single-record input", {
  d <- data.frame(id = "W1", stringsAsFactors = FALSE)
  d$kw <- list(c("A", "B", "C"))

  B <- build_bipartite(d, "kw")
  expect_equal(nrow(B), 1L)
  expect_equal(ncol(B), 3L)
  expect_equal(sum(B), 3)
})

test_that("build_bipartite handles single entity per record", {
  d <- data.frame(id = c("W1", "W2"), stringsAsFactors = FALSE)
  d$kw <- list("X", "Y")

  B <- build_bipartite(d, "kw")
  expect_equal(nrow(B), 2L)
  expect_equal(ncol(B), 2L)
  expect_equal(sum(diag(as.matrix(B))), 2)
})