# ---- cooccurrence() tests ----

.test_list <- list(c("A", "B", "C"), c("B", "C"), c("A", "C"))

# ========================================
# 1. Returns tidy data.frame
# ========================================

test_that("cooccurrence returns a tidy data.frame", {
  res <- cooccurrence(.test_list)
  expect_s3_class(res, "cooccurrence")
  expect_s3_class(res, "data.frame")
  expect_true(all(c("from", "to", "weight", "count") %in% names(res)))
  expect_true(nrow(res) > 0)
})

test_that("co() alias works", {
  res <- co(.test_list)
  expect_s3_class(res, "cooccurrence")
})

test_that("edges are sorted by weight descending", {
  res <- cooccurrence(.test_list)
  expect_true(all(diff(res$weight) <= 0))
})

# ========================================
# 2. Input formats
# ========================================

test_that("delimited input works", {
  df <- data.frame(items = c("A;B;C", "B;C", "A;C"), stringsAsFactors = FALSE)
  res <- cooccurrence(df, field = "items", sep = ";")
  expect_equal(nrow(res), 3L)  # 3 pairs: A-B, A-C, B-C
})

test_that("multi-column delimited works", {
  df <- data.frame(c1 = c("A;B", "B", "A"), c2 = c("C", "C", "C"),
                   stringsAsFactors = FALSE)
  res <- cooccurrence(df, field = c("c1", "c2"), sep = ";")
  expect_equal(nrow(res), 3L)
})

test_that("long/bipartite works", {
  df <- data.frame(doc = c(1,1,1,2,2,3,3),
                   item = c("A","B","C","B","C","A","C"),
                   stringsAsFactors = FALSE)
  res <- cooccurrence(df, field = "item", by = "doc")
  expect_equal(nrow(res), 3L)
})

test_that("binary matrix works", {
  bin <- matrix(c(1,1,1, 0,1,1, 1,0,1), nrow = 3, byrow = TRUE,
                dimnames = list(NULL, c("A","B","C")))
  res <- cooccurrence(bin)
  expect_equal(nrow(res), 3L)
})

test_that("wide sequence works with field = 'all'", {
  df <- data.frame(V1 = c("A","B","A"), V2 = c("B","C","C"),
                   V3 = c("C", NA, NA), stringsAsFactors = FALSE)
  res <- cooccurrence(df, field = "all")
  expect_equal(nrow(res), 3L)
})

test_that("non-binary data frame without field = 'all' errors", {
  df <- data.frame(V1 = c("A","B","A"), V2 = c("B","C","C"),
                   stringsAsFactors = FALSE)
  expect_error(cooccurrence(df), "field = \"all\"")
})

test_that("list input works", {
  res <- cooccurrence(.test_list)
  expect_equal(nrow(res), 3L)
})

test_that("all formats give same counts", {
  res_list <- cooccurrence(.test_list)

  df_del <- data.frame(items = c("A;B;C", "B;C", "A;C"),
                        stringsAsFactors = FALSE)
  res_del <- cooccurrence(df_del, field = "items", sep = ";")

  # Sort both by from+to for comparison
  key <- function(r) paste(pmin(r$from, r$to), pmax(r$from, r$to))
  r1 <- res_list[order(key(res_list)), ]
  r2 <- res_del[order(key(res_del)), ]

  expect_equal(r1$count, r2$count)
  expect_equal(r1$weight, r2$weight)
})

# ========================================
# 3. Similarity measures
# ========================================

test_that("similarity = 'none' gives raw counts", {
  res <- cooccurrence(.test_list, similarity = "none")
  # A-C co-occur 2 times, so weight = 2
  ac <- res[res$from == "A" & res$to == "C", ]
  expect_equal(ac$weight, 2)
  expect_equal(ac$count, 2L)
})

test_that("similarity = 'jaccard' is correct", {
  res <- cooccurrence(.test_list, similarity = "jaccard")
  ab <- res[res$from == "A" & res$to == "B", ]
  # Jaccard(A,B) = 1 / (2+2-1) = 1/3
  expect_equal(ab$weight, 1 / 3)
})

test_that("similarity = 'cosine' is correct", {
  res <- cooccurrence(.test_list, similarity = "cosine")
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(ab$weight, 0.5)
})

test_that("similarity = 'association' is correct", {
  res <- cooccurrence(.test_list, similarity = "association")
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(ab$weight, 0.25)
})

test_that("similarity = 'equivalence' is correct", {
  res <- cooccurrence(.test_list, similarity = "equivalence")
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(ab$weight, 0.25)
})

test_that("similarity = 'dice' is correct", {
  res <- cooccurrence(.test_list, similarity = "dice")
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(ab$weight, 0.5)
})

test_that("similarity = 'inclusion' is correct", {
  res <- cooccurrence(.test_list, similarity = "inclusion")
  ac <- res[res$from == "A" & res$to == "C", ]
  expect_equal(ac$weight, 1)
})

test_that("similarity = 'relative' row-normalizes", {
  res <- cooccurrence(.test_list, similarity = "relative")
  mat <- as_matrix(res)
  expect_true(all(abs(rowSums(mat) - 1) < 1e-10 | rowSums(mat) == 0))
})

# ========================================
# 4. Scaling
# ========================================

test_that("scale = 'minmax' scales to [0,1]", {
  res <- cooccurrence(.test_list, scale = "minmax")
  expect_true(max(res$weight) <= 1)
  expect_true(min(res$weight) >= 0)
})

test_that("scale = 'log' applies log(1+w)", {
  res_raw <- cooccurrence(.test_list)
  res_log <- cooccurrence(.test_list, scale = "log")
  expect_equal(res_log$weight, log(1 + res_raw$weight))
})

test_that("scale = 'binary' gives 0/1 weights", {
  res <- cooccurrence(.test_list, scale = "binary")
  expect_true(all(res$weight %in% c(0, 1)))
})

test_that("scale = 'sqrt' applies square root", {
  res_raw <- cooccurrence(.test_list)
  res_sqrt <- cooccurrence(.test_list, scale = "sqrt")
  expect_equal(res_sqrt$weight, sqrt(res_raw$weight))
})

test_that("scale = 'zscore' standardizes", {
  res <- cooccurrence(.test_list, scale = "zscore")
  vals <- res$weight[res$weight != 0]
  if (length(vals) > 1) {
    expect_equal(mean(vals), 0, tolerance = 1e-10)
  }
})

test_that("scale = 'proportion' normalizes by total weight", {
  res <- cooccurrence(.test_list, scale = "proportion")
  # The full symmetric matrix sums to 1 (upper + lower triangle)
  mat <- as_matrix(res)
  expect_equal(sum(mat), 1, tolerance = 1e-10)
})

# ========================================
# 5. Parameters
# ========================================

test_that("threshold filters edges", {
  res <- cooccurrence(.test_list, threshold = 2)
  expect_true(all(res$weight >= 2))
  expect_equal(nrow(res), 2L)  # Only A-C and B-C have count >= 2
})

test_that("min_occur drops rare entities", {
  trans <- list(c("A","B"), c("A","B"), c("A","D"))
  res <- cooccurrence(trans, min_occur = 2L)
  expect_false("D" %in% c(res$from, res$to))
})

test_that("top_n limits edges", {
  res <- cooccurrence(.test_list, top_n = 1L)
  expect_equal(nrow(res), 1L)
})

# ========================================
# 6. Attributes
# ========================================

test_that("attributes are stored", {
  res <- cooccurrence(.test_list, similarity = "jaccard", scale = "log")
  expect_equal(attr(res, "similarity"), "jaccard")
  expect_equal(attr(res, "scale"), "log")
  expect_equal(attr(res, "n_transactions"), 3L)
  expect_equal(attr(res, "n_items"), 3L)
  expect_true(!is.null(attr(res, "matrix")))
  expect_true(!is.null(attr(res, "items")))
  expect_true(!is.null(attr(res, "frequencies")))
})

# ========================================
# 7. Converters
# ========================================

test_that("as_matrix returns correct matrix", {
  res <- cooccurrence(.test_list)
  mat <- as_matrix(res)
  expect_true(is.matrix(mat))
  expect_equal(nrow(mat), 3L)
  expect_equal(ncol(mat), 3L)
  expect_true(isSymmetric(mat))
})

test_that("as_matrix type='raw' gives counts", {
  res <- cooccurrence(.test_list, similarity = "jaccard")
  raw <- as_matrix(res, type = "raw")
  expect_equal(raw["A", "B"], 1)
  expect_equal(raw["A", "C"], 2)
})

test_that("as_igraph works", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list)
  g <- as_igraph(res)
  expect_true(igraph::is_igraph(g))
  expect_equal(igraph::vcount(g), 3L)
  expect_false(igraph::is_directed(g))
})

test_that("as_cograph works", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  expect_true(inherits(net, "cograph_network"))
  expect_equal(net$n_nodes, 3L)
})

test_that("as_netobject works", {
  skip_if_not_installed("Nestimate")
  res <- cooccurrence(.test_list)
  net <- as_netobject(res)
  expect_true(inherits(net, "netobject"))
  expect_true(inherits(net, "cograph_network"))
  expect_false(net$directed)
})

test_that("as_tidygraph works", {
  skip_if_not_installed("tidygraph")
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list)
  tg <- as_tidygraph(res)
  expect_true(inherits(tg, "tbl_graph"))
})

# ========================================
# 8. Edge cases
# ========================================

test_that("error on empty input", {
  expect_error(cooccurrence(list()), "No non-empty transactions")
})

test_that("single-item transactions give no edges", {
  res <- cooccurrence(list(c("A"), c("B"), c("C")))
  expect_equal(nrow(res), 0L)
})

test_that("count is always the raw co-occurrence", {
  res <- cooccurrence(.test_list, similarity = "jaccard", scale = "log")
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(ab$count, 1L)
})

# ========================================
# 9. split_by
# ========================================

test_that("split_by produces a group column", {
  df <- data.frame(
    year = c(2020, 2020, 2020, 2021, 2021, 2021),
    kw = c("A; B; C", "B; C", "A; C", "B; C; D", "C; D", "B; D"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";", split_by = "year")
  expect_s3_class(res, "cooccurrence")
  expect_true("group" %in% names(res))
  expect_equal(sort(unique(res$group)), c("2020", "2021"))
})

test_that("split_by computes separate networks per group", {
  df <- data.frame(
    grp = c("X", "X", "Y", "Y"),
    kw = c("A; B", "A; B", "C; D", "C; D"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";", split_by = "grp")
  # Group X has only A-B; group Y has only C-D
  res_x <- res[res$group == "X", ]
  res_y <- res[res$group == "Y", ]
  expect_equal(nrow(res_x), 1L)
  expect_equal(nrow(res_y), 1L)
  expect_true(all(c(res_x$from, res_x$to) %in% c("A", "B")))
  expect_true(all(c(res_y$from, res_y$to) %in% c("C", "D")))
})

test_that("split_by + similarity works", {
  df <- data.frame(
    grp = c("X", "X", "X", "Y", "Y", "Y"),
    kw = c("A; B; C", "B; C", "A; C", "A; B; C", "B; C", "A; C"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";",
                      split_by = "grp", similarity = "jaccard")
  # Both groups have the same data → same weights
  res_x <- res[res$group == "X", ]
  res_y <- res[res$group == "Y", ]
  expect_equal(sort(res_x$weight), sort(res_y$weight))
})

test_that("split_by + top_n applies per group", {
  df <- data.frame(
    grp = c("X", "X", "X", "Y", "Y", "Y"),
    kw = c("A; B; C", "B; C", "A; C", "D; E; F", "E; F", "D; F"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";",
                      split_by = "grp", top_n = 1L)
  # 1 edge per group → 2 total
  expect_equal(nrow(res), 2L)
  expect_equal(length(unique(res$group)), 2L)
})

test_that("split_by skips groups with no edges", {
  df <- data.frame(
    grp = c("X", "X", "Y"),
    kw = c("A; B", "A; B", "C"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";", split_by = "grp")
  # Y has only one item → no edges → should be absent
  expect_equal(unique(res$group), "X")
})

test_that("split_by stores attributes", {
  df <- data.frame(
    year = c(2020, 2021),
    kw = c("A; B", "A; B"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";", split_by = "year")
  expect_equal(attr(res, "split_by"), "year")
  expect_equal(sort(attr(res, "groups")), c("2020", "2021"))
})

# ========================================
# 10. Output formats
# ========================================

test_that("output = 'gephi' renames columns", {
  res <- cooccurrence(.test_list, output = "gephi")
  expect_true(all(c("Source", "Target", "Weight", "Type", "Count") %in% names(res)))
  expect_false("from" %in% names(res))
  expect_equal(unique(res$Type), "Undirected")
})

test_that("output = 'gephi' with split_by includes group", {
  df <- data.frame(grp = c("X", "X", "Y", "Y"),
                   kw = c("A; B", "A; B", "C; D", "C; D"),
                   stringsAsFactors = FALSE)
  res <- cooccurrence(df, field = "kw", sep = ";",
                      split_by = "grp", output = "gephi")
  expect_true("group" %in% names(res))
  expect_true("Source" %in% names(res))
})

test_that("output = 'matrix' returns a matrix", {
  res <- cooccurrence(.test_list, output = "matrix")
  expect_true(is.matrix(res))
  expect_equal(nrow(res), 3L)
})

test_that("output = 'igraph' returns igraph object", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list, output = "igraph")
  expect_true(igraph::is_igraph(res))
})

test_that("output = 'cograph' returns cograph_network", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list, output = "cograph")
  expect_true(inherits(res, "cograph_network"))
})

# ========================================
# 11. Counting methods
# ========================================

# Transactions: {A,B,C} (n=3), {B,C} (n=2), {A,C} (n=2)

test_that("counting = 'full' gives raw counts (default)", {
  res <- cooccurrence(.test_list, counting = "full")
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(ab$weight, 1)
  expect_equal(ab$count, 1L)
})

test_that("counting = 'fractional' weights by 1/(n-1)", {
  # {A,B,C}: n=3, each pair gets 1/(3-1) = 0.5
  # {B,C}: n=2, each pair gets 1/(2-1) = 1
  # {A,C}: n=2, each pair gets 1
  # A-B: only in {A,B,C} → 0.5
  # B-C: in {A,B,C}(0.5) + {B,C}(1.0) = 1.5
  # A-C: in {A,B,C}(0.5) + {A,C}(1.0) = 1.5
  res <- cooccurrence(.test_list, counting = "fractional")
  ab <- res[res$from == "A" & res$to == "B", ]
  bc <- res[res$from == "B" & res$to == "C", ]
  ac <- res[res$from == "A" & res$to == "C", ]
  expect_equal(ab$weight, 0.5, tolerance = 1e-10)
  expect_equal(bc$weight, 1.5, tolerance = 1e-10)
  expect_equal(ac$weight, 1.5, tolerance = 1e-10)
  # count is always raw
  expect_equal(ab$count, 1L)
  expect_equal(bc$count, 2L)
})

test_that("counting = 'fractional' + similarity works", {
  res <- cooccurrence(.test_list, counting = "fractional", similarity = "jaccard")
  expect_true(nrow(res) > 0)
  # Weights should differ from full counting
  res_full <- cooccurrence(.test_list, counting = "full", similarity = "jaccard")
  expect_false(isTRUE(all.equal(res$weight, res_full$weight)))
})

test_that("counting with long/bipartite format", {
  df <- data.frame(
    doc = c(1, 1, 1, 2, 2, 3, 3),
    item = c("A", "B", "C", "B", "C", "A", "C"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "item", by = "doc", counting = "fractional")
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(ab$weight, 0.5, tolerance = 1e-10)
})

# ========================================
# 11b. field without sep (field_only format)
# ========================================

test_that("field without sep treats each column value as one item", {
  df <- data.frame(g1 = c("A", "B", "A"), g2 = c("B", "C", "C"),
                   stringsAsFactors = FALSE)
  expect_warning(res <- cooccurrence(df, field = c("g1", "g2")),
                 "without `sep`")
  expect_s3_class(res, "cooccurrence")
  expect_equal(nrow(res), 3L)
  expect_true(all(c("A", "B", "C") %in% c(res$from, res$to)))
})

test_that("field without sep ignores other columns", {
  df <- data.frame(id = 1:3, x = c("A", "B", "A"), y = c("B", "C", "C"),
                   stringsAsFactors = FALSE)
  expect_warning(res <- cooccurrence(df, field = c("x", "y")),
                 "without `sep`")
  items <- unique(c(res$from, res$to))
  expect_false(any(items %in% c("1", "2", "3")))
})

test_that("single field without sep gives one-item transactions (no edges)", {
  df <- data.frame(genre = c("Action", "Comedy", "Action"),
                   stringsAsFactors = FALSE)
  expect_warning(res <- cooccurrence(df, field = "genre"), "without `sep`")
  expect_equal(nrow(res), 0L)
})

test_that("field without sep matches multi_delimited with no delimiters", {
  df <- data.frame(g1 = c("A", "B", "A"), g2 = c("B", "C", "C"),
                   stringsAsFactors = FALSE)
  expect_warning(res_field <- cooccurrence(df, field = c("g1", "g2")),
                 "without `sep`")
  res_multi <- cooccurrence(df, field = c("g1", "g2"), sep = ";")
  key <- function(r) paste(pmin(r$from, r$to), pmax(r$from, r$to))
  r1 <- res_field[order(key(res_field)), ]
  r2 <- res_multi[order(key(res_multi)), ]
  expect_equal(r1$count, r2$count)
  expect_equal(r1$weight, r2$weight)
})

test_that("field without sep works with similarity", {
  df <- data.frame(g1 = c("A", "B", "A"), g2 = c("B", "C", "C"),
                   stringsAsFactors = FALSE)
  expect_warning(
    res <- cooccurrence(df, field = c("g1", "g2"), similarity = "jaccard"),
    "without `sep`"
  )
  expect_true(nrow(res) > 0)
  expect_true(all(res$weight >= 0 & res$weight <= 1))
})

test_that("field without sep handles NAs", {
  df <- data.frame(g1 = c("A", "B", NA), g2 = c("B", NA, "C"),
                   stringsAsFactors = FALSE)
  expect_warning(res <- cooccurrence(df, field = c("g1", "g2")),
                 "without `sep`")
  expect_s3_class(res, "cooccurrence")
})

test_that("field without sep suggests separator when found", {
  df <- data.frame(items = c("A;B;C", "B;C", "A;C"),
                   stringsAsFactors = FALSE)
  expect_warning(cooccurrence(df, field = "items"),
                 'sep = ";"')
})

test_that("field without sep suggests comma separator", {
  df <- data.frame(items = c("A,B,C", "B,C", "A,C"),
                   stringsAsFactors = FALSE)
  expect_warning(cooccurrence(df, field = "items"),
                 'sep = ","')
})

test_that("field without sep gives generic warning when no separator found", {
  df <- data.frame(g1 = c("A", "B"), g2 = c("C", "D"),
                   stringsAsFactors = FALSE)
  expect_warning(cooccurrence(df, field = c("g1", "g2")),
                 "without `sep`")
})

# ========================================
# 12. print.cooccurrence
# ========================================

test_that("print.cooccurrence shows header and edges", {
  res <- cooccurrence(.test_list)
  out <- capture.output(print(res))
  expect_true(any(grepl("cooccurrence:", out)))
  expect_true(any(grepl("nodes", out)))
  expect_true(any(grepl("edges", out)))
  expect_true(any(grepl("transactions", out)))
})

test_that("print.cooccurrence returns x invisibly", {
  res <- cooccurrence(.test_list)
  out <- withVisible(capture.output(ret <- print(res)))
  expect_identical(ret, res)
})

test_that("print.cooccurrence shows similarity and scale", {
  res <- cooccurrence(.test_list, similarity = "jaccard", scale = "log")
  out <- capture.output(print(res))
  expect_true(any(grepl("similarity: jaccard", out)))
  expect_true(any(grepl("scale: log", out)))
})

test_that("print.cooccurrence with split_by shows group info", {
  df <- data.frame(
    grp = c("X", "X", "Y", "Y"),
    kw = c("A; B", "A; B", "C; D", "C; D"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";", split_by = "grp")
  out <- capture.output(print(res))
  expect_true(any(grepl("split_by: grp", out)))
  expect_true(any(grepl("2 groups", out)))
})

# ========================================
# 13. weight_by (weighted long format)
# ========================================

.theta <- data.frame(
  doc   = c("d1","d1","d1","d2","d2","d3","d3"),
  topic = c("T1","T2","T3","T1","T3","T2","T3"),
  prob  = c(0.6, 0.3, 0.1, 0.4, 0.6, 0.5, 0.5),
  stringsAsFactors = FALSE
)

test_that("weight_by returns cooccurrence object with correct columns", {
  res <- cooccurrence(.theta, field = "topic", by = "doc", weight_by = "prob")
  expect_s3_class(res, "cooccurrence")
  expect_true(all(c("from", "to", "weight", "count") %in% names(res)))
})

test_that("weight_by computes correct weighted co-occurrence values", {
  res <- cooccurrence(.theta, field = "topic", by = "doc", weight_by = "prob")
  get_w <- function(a, b) {
    row <- res[res$from == a & res$to == b | res$from == b & res$to == a, ]
    row$weight
  }
  expect_equal(get_w("T1", "T3"), 0.6*0.1 + 0.4*0.6, tolerance = 1e-10)
  expect_equal(get_w("T2", "T3"), 0.3*0.1 + 0.5*0.5, tolerance = 1e-10)
  expect_equal(get_w("T1", "T2"), 0.6*0.3,            tolerance = 1e-10)
})

test_that("weight_by count column reflects binary co-occurrence", {
  res <- cooccurrence(.theta, field = "topic", by = "doc", weight_by = "prob")
  # T1-T2 appear together only in d1 -> count = 1
  row <- res[res$from == "T1" & res$to == "T2" |
             res$from == "T2" & res$to == "T1", ]
  expect_equal(row$count, 1L)
  # T1-T3 appear in d1 and d2 -> count = 2
  row <- res[res$from == "T1" & res$to == "T3" |
             res$from == "T3" & res$to == "T1", ]
  expect_equal(row$count, 2L)
})

test_that("weight_by works with similarity normalization", {
  res <- cooccurrence(.theta, field = "topic", by = "doc",
                      weight_by = "prob", similarity = "cosine")
  expect_s3_class(res, "cooccurrence")
  expect_true(all(res$weight >= 0 & res$weight <= 1))
})

test_that("weight_by respects min_occur", {
  res <- cooccurrence(.theta, field = "topic", by = "doc",
                      weight_by = "prob", min_occur = 2)
  # T1-T2 pair: T2 appears in d1 and d3 (2 docs), T1 in d1 and d2 (2 docs)
  # All topics appear in >= 2 docs, so all 3 edges remain
  expect_equal(nrow(res), 3L)
})

test_that("weight_by errors on non-long format", {
  df <- data.frame(items = c("A;B", "B;C"), w = c(1, 2),
                   stringsAsFactors = FALSE)
  expect_error(
    cooccurrence(df, field = "items", sep = ";", weight_by = "w"),
    "long format"
  )
})

test_that("weight_by works with split_by", {
  df <- data.frame(
    doc   = c("d1","d1","d2","d2","d3","d3"),
    topic = c("T1","T2","T1","T2","T1","T2"),
    prob  = c(0.6, 0.4, 0.3, 0.7, 0.5, 0.5),
    year  = c(2020, 2020, 2020, 2020, 2021, 2021),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "topic", by = "doc",
                      weight_by = "prob", split_by = "year")
  expect_true("group" %in% names(res))
  expect_equal(sort(unique(res$group)), c("2020", "2021"))
})

test_that("print.cooccurrence respects n parameter", {
  res <- cooccurrence(.test_list)
  out <- capture.output(print(res, n = 1L))
  expect_true(any(grepl("more edges", out)))
})

test_that("print.cooccurrence handles zero edges", {
  res <- cooccurrence(list(c("A"), c("B"), c("C")))
  out <- capture.output(print(res))
  expect_true(any(grepl("no edges", out)))
})

test_that("print.cooccurrence omits similarity when 'none'", {
  res <- cooccurrence(.test_list, similarity = "none")
  out <- capture.output(print(res))
  expect_false(any(grepl("similarity:", out)))
})

# ========================================
# 13. summary.cooccurrence
# ========================================

test_that("summary.cooccurrence shows network stats", {
  res <- cooccurrence(.test_list, similarity = "jaccard")
  out <- capture.output(summary(res))
  expect_true(any(grepl("cooccurrence network", out)))
  expect_true(any(grepl("Nodes", out)))
  expect_true(any(grepl("Edges", out)))
  expect_true(any(grepl("Density", out)))
  expect_true(any(grepl("Transactions", out)))
  expect_true(any(grepl("Similarity.*jaccard", out)))
  expect_true(any(grepl("Weight range", out)))
  expect_true(any(grepl("Count range", out)))
  expect_true(any(grepl("Top nodes", out)))
})

test_that("summary.cooccurrence returns object invisibly", {
  res <- cooccurrence(.test_list)
  capture.output(ret <- summary(res))
  expect_identical(ret, res)
})

test_that("summary.cooccurrence shows scale when not 'none'", {
  res <- cooccurrence(.test_list, scale = "minmax")
  out <- capture.output(summary(res))
  expect_true(any(grepl("Scale.*minmax", out)))
})

test_that("summary.cooccurrence omits scale when 'none'", {
  res <- cooccurrence(.test_list)
  out <- capture.output(summary(res))
  expect_false(any(grepl("Scale", out)))
})

test_that("summary.cooccurrence handles zero edges", {
  res <- cooccurrence(list(c("A"), c("B"), c("C")))
  out <- capture.output(summary(res))
  expect_true(any(grepl("Edges.*: 0", out)))
  # Should NOT print weight/count/top nodes sections
  expect_false(any(grepl("Weight range", out)))
})

# ========================================
# 14. plot.cooccurrence
# ========================================

test_that("plot.cooccurrence heatmap works", {
  pdf(NULL); on.exit(dev.off(), add = TRUE)
  res <- cooccurrence(.test_list)
  expect_invisible(plot(res, type = "heatmap"))
})

test_that("plot.cooccurrence heatmap works when matrix attr is NULL", {
  pdf(NULL); on.exit(dev.off(), add = TRUE)
  res <- cooccurrence(.test_list)
  attr(res, "matrix") <- NULL
  expect_invisible(plot(res, type = "heatmap"))
})

test_that("plot.cooccurrence network requires igraph", {
  skip_if_not_installed("igraph")
  pdf(NULL); on.exit(dev.off(), add = TRUE)
  res <- cooccurrence(.test_list)
  expect_invisible(plot(res, type = "network"))
})

test_that("plot.cooccurrence returns x invisibly", {
  pdf(NULL); on.exit(dev.off(), add = TRUE)
  res <- cooccurrence(.test_list)
  ret <- plot(res, type = "heatmap")
  expect_identical(ret, res)
})

# ========================================
# 15. Converter edge cases
# ========================================

test_that("as_matrix rebuilds from edges when attribute is lost", {
  res <- cooccurrence(.test_list, similarity = "jaccard")
  attr(res, "matrix") <- NULL
  mat <- as_matrix(res)
  expect_true(is.matrix(mat))
  expect_equal(nrow(mat), 3L)
  expect_true(isSymmetric(mat))
  # Should match the weight values
  ab <- res[res$from == "A" & res$to == "B", ]
  expect_equal(mat["A", "B"], ab$weight)
})

test_that("as_matrix rebuilds raw from edges when attribute is lost", {
  res <- cooccurrence(.test_list, similarity = "jaccard")
  attr(res, "raw_matrix") <- NULL
  raw <- as_matrix(res, type = "raw")
  expect_true(is.matrix(raw))
  expect_equal(raw["A", "B"], 1)
  expect_equal(raw["A", "C"], 2)
})

test_that("as_igraph works when items attribute is NULL", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list)
  attr(res, "items") <- NULL
  g <- as_igraph(res)
  expect_true(igraph::is_igraph(g))
  expect_equal(igraph::vcount(g), 3L)
})

test_that("as_cograph structure is complete", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  expect_false(net$directed)
  expect_equal(net$n_edges, 3L)
  expect_true(is.matrix(net$weights))
  expect_true(all(c("id", "label", "name", "x", "y") %in% names(net$nodes)))
  expect_true(all(c("from", "to", "weight") %in% names(net$edges)))
  expect_equal(net$meta$source, "cooccure")
})

test_that("as_netobject structure is complete", {
  skip_if_not_installed("Nestimate")
  res <- cooccurrence(.test_list, similarity = "jaccard", threshold = 0)
  net <- as_netobject(res)
  expect_equal(net$method, "cooccurrence")
  expect_equal(net$params$similarity, "jaccard")
  expect_true(is.matrix(net$weights))
  expect_true(all(c("id", "label", "name") %in% names(net$nodes)))
  expect_true(all(c("from", "to", "weight") %in% names(net$edges)))
  expect_equal(net$meta$tna$method, "cooccurrence")
})

# ========================================
# 16. Converter error paths (missing packages)
# ========================================

test_that("as_tidygraph errors when tidygraph is not available", {
  skip_if(requireNamespace("tidygraph", quietly = TRUE),
          "tidygraph is installed; cannot test missing-package path")
  res <- cooccurrence(.test_list)
  expect_error(as_tidygraph(res), "tidygraph")
})

test_that("as_netobject errors when Nestimate is not available", {
  skip_if(requireNamespace("Nestimate", quietly = TRUE),
          "Nestimate is installed; cannot test missing-package path")
  res <- cooccurrence(.test_list)
  expect_error(as_netobject(res), "Nestimate")
})

# ========================================
# 17. as_cograph / as_netobject with zero edges
# ========================================

test_that("as_cograph handles zero edges", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(list(c("A"), c("B")))
  net <- as_cograph(res)
  expect_equal(net$n_edges, 0L)
  expect_equal(nrow(net$edges), 0L)
  expect_true(all(c("from", "to", "weight") %in% names(net$edges)))
})

test_that("as_netobject handles zero edges", {
  skip_if_not_installed("Nestimate")
  res <- cooccurrence(list(c("A"), c("B")))
  net <- as_netobject(res)
  expect_equal(net$n_edges, 0L)
  expect_equal(nrow(net$edges), 0L)
})

# ========================================
# 18. as_tidygraph full path
# ========================================

test_that("as_tidygraph produces correct tbl_graph", {
  skip_if_not_installed("tidygraph")
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list)
  tg <- as_tidygraph(res)
  expect_true(inherits(tg, "tbl_graph"))
  expect_equal(igraph::vcount(tg), 3L)
  expect_equal(igraph::ecount(tg), 3L)
})

# ========================================
# 19. as_igraph edge attributes & vertex details
# ========================================

test_that("as_igraph preserves edge weights and counts", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list, similarity = "jaccard")
  g <- as_igraph(res)
  expect_true("weight" %in% igraph::edge_attr_names(g))
  expect_true("count" %in% igraph::edge_attr_names(g))
  expect_equal(igraph::ecount(g), nrow(res))
})

test_that("as_igraph includes isolated nodes from items attr", {
  skip_if_not_installed("igraph")
  # min_occur filters D from edges, but items attr includes all original items
  trans <- list(c("A", "B"), c("A", "B"), c("A", "D"))
  res <- cooccurrence(trans, min_occur = 2L)
  # Items attr still has D if it appeared in transactions
  g <- as_igraph(res)
  expect_true(igraph::is_igraph(g))
})

# ========================================
# 20. as_cograph detailed checks
# ========================================

test_that("as_cograph nodes have sequential ids", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  expect_equal(net$nodes$id, seq_len(net$n_nodes))
})

test_that("as_cograph edges reference valid node ids", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  all_ids <- net$nodes$id
  expect_true(all(net$edges$from %in% all_ids))
  expect_true(all(net$edges$to %in% all_ids))
})

test_that("as_cograph weights matrix matches normalized matrix", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list, similarity = "cosine")
  net <- as_cograph(res)
  mat <- as_matrix(res, type = "normalized")
  expect_equal(net$weights, mat)
})

# ========================================
# 21. as_matrix thorough
# ========================================

test_that("as_matrix diagonal is zero (no self-loops)", {
  res <- cooccurrence(.test_list)
  mat <- as_matrix(res)
  expect_true(all(diag(mat) == 0))
  raw <- as_matrix(res, type = "raw")
  expect_true(all(diag(raw) == 0))
})

test_that("as_matrix dimnames are sorted unique items", {
  res <- cooccurrence(.test_list)
  mat <- as_matrix(res)
  items <- sort(unique(c(res$from, res$to)))
  expect_equal(rownames(mat), items)
  expect_equal(colnames(mat), items)
})

test_that("as_matrix rebuilt normalized matches stored matrix", {
  res <- cooccurrence(.test_list, similarity = "jaccard")
  ## Stored attribute is a sparse Matrix in the current engine; densify for
  ## comparison against the dense fallback rebuilt from the edge list.
  stored <- as.matrix(attr(res, "matrix"))
  attr(res, "matrix") <- NULL
  rebuilt <- as_matrix(res)
  # Diagonal is lost when rebuilding from edges (no self-edges), so compare off-diagonal
  diag(stored) <- 0
  expect_equal(rebuilt, stored)
})

test_that("as_matrix rebuilt raw is symmetric", {
  res <- cooccurrence(.test_list, similarity = "cosine")
  attr(res, "raw_matrix") <- NULL
  raw <- as_matrix(res, type = "raw")
  expect_true(isSymmetric(raw))
})

test_that("as_matrix with scaled result returns scaled values", {
  res <- cooccurrence(.test_list, similarity = "jaccard", scale = "minmax")
  mat <- as_matrix(res)
  expect_true(all(mat >= 0))
  expect_true(all(mat <= 1))
})

test_that("as_matrix on zero-edge result returns zero off-diagonal", {
  res <- cooccurrence(list(c("A"), c("B"), c("C")))
  mat <- as_matrix(res, type = "raw")
  diag(mat) <- 0
  expect_true(all(mat == 0))
})

test_that("as_matrix on split_by falls back to rebuild from edges", {
  df <- data.frame(
    grp = c("X", "X", "Y", "Y"),
    kw = c("A; B", "A; B", "C; D", "C; D"),
    stringsAsFactors = FALSE
  )
  res <- cooccurrence(df, field = "kw", sep = ";", split_by = "grp")
  # split_by does not store per-group matrices in attr, but the combined
  # result may not have a matrix attr → rebuild path
  mat <- as_matrix(res)
  expect_true(is.matrix(mat))
  expect_true(isSymmetric(mat))
})

# ========================================
# 22. Generic dispatch errors
# ========================================

test_that("as_matrix errors on non-cooccurrence input", {
  expect_error(as_matrix(42))
})

test_that("as_igraph errors on non-cooccurrence input", {
  expect_error(as_igraph(42))
})

test_that("as_cograph errors on non-cooccurrence input", {
  expect_error(as_cograph(42))
})

test_that("as_tidygraph errors on non-cooccurrence input", {
  expect_error(as_tidygraph(42))
})

test_that("as_netobject errors on non-cooccurrence input", {
  expect_error(as_netobject(42))
})

# ========================================
# 23. as_igraph thorough
# ========================================

test_that("as_igraph vertex names match items attribute", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list)
  g <- as_igraph(res)
  expect_setequal(igraph::V(g)$name, attr(res, "items"))
})

test_that("as_igraph edge weights match cooccurrence weights", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list, similarity = "jaccard")
  g <- as_igraph(res)
  el <- igraph::as_data_frame(g, what = "edges")
  # Sort both for comparison
  el <- el[order(el$from, el$to), ]
  co_sorted <- res[order(res$from, res$to), ]
  expect_equal(el$weight, co_sorted$weight)
  expect_equal(el$count, co_sorted$count)
})

test_that("as_igraph with zero edges gives edgeless graph", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(list(c("A"), c("B"), c("C")))
  g <- as_igraph(res)
  expect_equal(igraph::ecount(g), 0L)
  # Vertices still present if items attr exists
  items <- attr(res, "items")
  if (!is.null(items)) {
    expect_true(igraph::vcount(g) >= length(items))
  }
})

test_that("as_igraph with similarity + scale produces valid graph", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list, similarity = "dice", scale = "sqrt")
  g <- as_igraph(res)
  expect_true(igraph::is_igraph(g))
  expect_true(all(igraph::E(g)$weight >= 0))
})

test_that("as_igraph roundtrip preserves edge count", {
  skip_if_not_installed("igraph")
  res <- cooccurrence(.test_list)
  g <- as_igraph(res)
  el <- igraph::as_data_frame(g, what = "edges")
  expect_equal(nrow(el), nrow(res))
})

# ========================================
# 24. as_cograph thorough
# ========================================

test_that("as_cograph node labels match sorted items", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  mat <- as_matrix(res, type = "normalized")
  expect_equal(net$nodes$label, colnames(mat))
  expect_equal(net$nodes$name, colnames(mat))
})

test_that("as_cograph edge count matches non-zero upper triangle", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list, similarity = "jaccard")
  net <- as_cograph(res)
  mat <- as_matrix(res, type = "normalized")
  expected_edges <- sum(upper.tri(mat) & mat != 0)
  expect_equal(net$n_edges, expected_edges)
  expect_equal(nrow(net$edges), expected_edges)
})

test_that("as_cograph meta$layout is NULL", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  expect_null(net$meta$layout)
})

test_that("as_cograph with single pair", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(list(c("X", "Y"), c("X", "Y")))
  net <- as_cograph(res)
  expect_equal(net$n_nodes, 2L)
  expect_equal(net$n_edges, 1L)
  expect_equal(nrow(net$nodes), 2L)
  expect_equal(nrow(net$edges), 1L)
})

test_that("as_cograph with different similarity measures", {
  skip_if_not_installed("cograph")
  for (sim in c("none", "jaccard", "cosine", "dice")) {
    res <- cooccurrence(.test_list, similarity = sim)
    net <- as_cograph(res)
    expect_true(inherits(net, "cograph_network"))
    expect_equal(net$n_nodes, 3L)
  }
})

test_that("as_cograph node x and y are NA", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  expect_true(all(is.na(net$nodes$x)))
  expect_true(all(is.na(net$nodes$y)))
})

test_that("as_cograph edges$from and edges$to are integer", {
  skip_if_not_installed("cograph")
  res <- cooccurrence(.test_list)
  net <- as_cograph(res)
  expect_type(net$edges$from, "integer")
  expect_type(net$edges$to, "integer")
})

# ========================================
# 14. Large-dataset scalability
# ========================================

test_that("cooccurrence scales to wide sparse inputs without densifying", {
  ## Regression guard: the engine must stay in sparse representation.
  ## The dense predecessor allocated n * k logicals plus a k * k weight matrix;
  ## at these sizes that would be ~60 GB and fail with `vector memory limit`.
  skip_on_cran()
  n_docs  <- 20000L
  k_items <- 20000L
  items_per_doc <- 20L  # typical citation count

  set.seed(42)
  transactions <- lapply(seq_len(n_docs), function(i) {
    paste0("R", sample.int(k_items, items_per_doc))
  })

  ## Run both counting methods; just assert we return an edge list without
  ## blowing up memory or time.
  t0 <- Sys.time()
  res_full <- cooccurrence(transactions, counting = "full")
  res_frac <- cooccurrence(transactions, counting = "fractional")
  elapsed <- as.numeric(difftime(Sys.time(), t0, units = "secs"))

  expect_s3_class(res_full, "cooccurrence")
  expect_s3_class(res_frac, "cooccurrence")
  expect_gt(nrow(res_full), 0L)
  expect_gt(nrow(res_frac), 0L)

  ## The stored matrix attribute must be sparse, not a dense k x k base matrix.
  expect_true(inherits(attr(res_full, "matrix"), "Matrix"))
  expect_true(inherits(attr(res_frac, "matrix"), "Matrix"))

  ## Sanity ceiling on wall time — catches accidental dense regressions that
  ## would take minutes instead of seconds.
  expect_lt(elapsed, 120)
})