# ===========================================================================
# Section 1: Internal — .mogen_count_kgrams
# ===========================================================================

test_that(".mogen_count_kgrams counts 1-grams correctly", {
  trajs <- list(c("A", "B", "C"), c("A", "B", "D"))
  kg <- .mogen_count_kgrams(trajs, 1L)

  expect_true("A" %in% kg$nodes)
  expect_true("B" %in% kg$nodes)
  expect_true("C" %in% kg$nodes)
  expect_true("D" %in% kg$nodes)

  # Transitions: A->B(x2), B->C(x1), B->D(x1), C->D(0) etc
  ab <- kg$edges[kg$edges$from == "A" & kg$edges$to == "B", "weight"]
  expect_equal(ab, 2L)
  bc <- kg$edges[kg$edges$from == "B" & kg$edges$to == "C", "weight"]
  expect_equal(bc, 1L)
})

test_that(".mogen_count_kgrams counts 2-grams correctly", {
  trajs <- list(c("A", "B", "C", "D"), c("A", "B", "D", "C"))
  kg <- .mogen_count_kgrams(trajs, 2L)

  sep <- .HON_SEP
  # 2-grams from traj 1: A.B, B.C, C.D
  # 2-grams from traj 2: A.B, B.D, D.C
  ab <- paste("A", "B", sep = sep)
  bc <- paste("B", "C", sep = sep)
  bd <- paste("B", "D", sep = sep)

  expect_true(ab %in% kg$nodes)
  expect_true(bc %in% kg$nodes)
  expect_true(bd %in% kg$nodes)

  # A.B appears in both trajectories
  expect_equal(unname(kg$node_counts[ab]), 2L)

  # Transitions: A.B -> B.C (x1), A.B -> B.D (x1)
  ab_bc <- kg$edges[kg$edges$from == ab & kg$edges$to == bc, "weight"]
  expect_equal(ab_bc, 1L)
  ab_bd <- kg$edges[kg$edges$from == ab & kg$edges$to == bd, "weight"]
  expect_equal(ab_bd, 1L)
})

test_that(".mogen_count_kgrams handles short trajectories", {
  trajs <- list(c("A", "B"), c("A"))  # second has length 1 < k=2
  kg <- .mogen_count_kgrams(trajs, 2L)

  # Only first trajectory contributes
  expect_equal(length(kg$nodes), 1L)  # just A.B
  expect_equal(nrow(kg$edges), 0L)    # no transitions
})

# ===========================================================================
# Section 2: Internal — .mogen_transition_matrix
# ===========================================================================

test_that(".mogen_transition_matrix builds row-stochastic matrix", {
  nodes <- c("A", "B", "C")
  edges <- data.frame(
    from = c("A", "A", "B"), to = c("B", "C", "C"),
    weight = c(3L, 1L, 2L), stringsAsFactors = FALSE
  )
  tm <- .mogen_transition_matrix(nodes, edges)

  expect_equal(dim(tm), c(3L, 3L))
  expect_equal(rownames(tm), nodes)

  # Row A: 3 to B, 1 to C => 0.75, 0.25
  expect_equal(tm["A", "B"], 0.75)
  expect_equal(tm["A", "C"], 0.25)
  # Row B: 2 to C => 1.0
  expect_equal(tm["B", "C"], 1.0)
  # Row C: no outgoing => all zeros
  expect_equal(sum(tm["C", ]), 0)
})

# ===========================================================================
# Section 3: Internal — .mogen_marginal
# ===========================================================================

test_that(".mogen_marginal computes correct probabilities", {
  trajs <- list(c("A", "B", "A"), c("B", "B"))
  m <- .mogen_marginal(trajs)

  # Total: A=2, B=3 => P(A)=2/5, P(B)=3/5
  expect_equal(unname(m["A"]), 2 / 5)
  expect_equal(unname(m["B"]), 3 / 5)
})

# ===========================================================================
# Section 4: Internal — .mogen_log_likelihood
# ===========================================================================

test_that(".mogen_log_likelihood computes correct value for order 1", {
  trajs <- list(c("A", "B", "C"))
  marginal <- c(A = 1 / 3, B = 1 / 3, C = 1 / 3)

  # Order-1 transition matrix: deterministic
  tm1 <- matrix(c(0, 1, 0, 0, 0, 1, 0, 0, 0), 3, 3, byrow = TRUE,
                dimnames = list(c("A", "B", "C"), c("A", "B", "C")))

  trans_mats <- list(marginal, tm1)

  ll <- .mogen_log_likelihood(trajs, 1L, trans_mats)

  # Expected: log(1/3) + log(T[A,B]) + log(T[B,C]) = log(1/3) + 0 + 0
  expect_equal(ll, log(1 / 3))
})

test_that(".mogen_log_likelihood uses hierarchy for order 2", {
  trajs <- list(c("A", "B", "C", "D"))
  marginal <- c(A = 0.25, B = 0.25, C = 0.25, D = 0.25)

  # Order-1: A->B(1), B->C(1), C->D(1)
  tm1 <- matrix(0, 4, 4, dimnames = list(LETTERS[1:4], LETTERS[1:4]))
  tm1["A", "B"] <- 1
  tm1["B", "C"] <- 1
  tm1["C", "D"] <- 1

  # Order-2
  sep <- .HON_SEP
  nodes2 <- c(paste("A", "B", sep = sep), paste("B", "C", sep = sep),
              paste("C", "D", sep = sep))
  tm2 <- matrix(0, 3, 3, dimnames = list(nodes2, nodes2))
  tm2[1, 2] <- 1  # A.B -> B.C
  tm2[2, 3] <- 1  # B.C -> C.D

  trans_mats <- list(marginal, tm1, tm2)

  ll <- .mogen_log_likelihood(trajs, 2L, trans_mats)

  # Step 1: log P(A) = log(0.25)
  # Step 2: order min(1,2)=1, log T^1[A,B] = log(1) = 0
  # Step 3: order min(2,2)=2, log T^2[A.B, B.C] = log(1) = 0
  # Step 4: order min(3,2)=2, log T^2[B.C, C.D] = log(1) = 0
  expect_equal(ll, log(0.25))
})

# ===========================================================================
# Section 5: Internal — .mogen_layer_dof
# ===========================================================================

test_that(".mogen_layer_dof computes correct DOF", {
  # 3x3 matrix with 2 non-zero per row
  tm <- matrix(c(0.5, 0.5, 0, 0, 0.3, 0.7, 0, 0, 0), 3, 3, byrow = TRUE)
  expect_equal(.mogen_layer_dof(tm), 2L)  # (2-1) + (2-1) + (0) = 2

  # Identity-like: 1 non-zero per row
  tm2 <- diag(3)
  expect_equal(.mogen_layer_dof(tm2), 0L)  # (1-1)*3 = 0

  # Full 2x2
  tm3 <- matrix(c(0.6, 0.4, 0.3, 0.7), 2, 2, byrow = TRUE)
  expect_equal(.mogen_layer_dof(tm3), 2L)  # (2-1)*2 = 2
})

# ===========================================================================
# Section 6: build_mogen end-to-end
# ===========================================================================

test_that("build_mogen returns net_mogen class", {
  trajs <- list(c("A", "B", "C", "D"), c("A", "B", "D", "C"),
                c("B", "C", "D", "A"), c("C", "D", "A", "B"))
  m <- build_mogen(trajs, max_order = 3L)

  expect_s3_class(m, "net_mogen")
  expect_true(m$optimal_order %in% 0L:3L)
  expect_equal(length(m$aic), 4L)  # orders 0-3
  expect_equal(length(m$transition_matrices), 4L)
  expect_equal(m$n_paths, 4L)
})

test_that("build_mogen selects order 1 for first-order Markov data", {
  # Generate data from a known order-1 model
  set.seed(42)
  states <- c("A", "B", "C")
  tm <- matrix(c(0.1, 0.7, 0.2,
                 0.3, 0.1, 0.6,
                 0.5, 0.3, 0.2), 3, 3, byrow = TRUE,
               dimnames = list(states, states))

  # Generate 200 paths of length 20
  trajs <- lapply(seq_len(200L), function(i) {
    path <- character(20L)
    path[1L] <- sample(states, 1)
    vapply(2L:20L, function(t) {
      path[t] <<- sample(states, 1, prob = tm[path[t - 1L], ])
      ""
    }, character(1L))
    path
  })

  m <- build_mogen(trajs, max_order = 3L, criterion = "bic")

  # BIC should select order 1 (data is first-order Markov)
  expect_equal(m$optimal_order, 1L)
})

test_that("build_mogen selects higher order for second-order data", {
  # Data with strong second-order dependency
  set.seed(123)
  trajs <- lapply(seq_len(300L), function(i) {
    path <- character(15L)
    path[1L] <- sample(c("A", "B", "C"), 1)
    path[2L] <- sample(c("A", "B", "C"), 1)
    vapply(3L:15L, function(t) {
      # After A->B: always go to C
      # After C->B: always go to A
      # Otherwise: uniform
      prev2 <- path[t - 2L]
      prev1 <- path[t - 1L]
      if (prev2 == "A" && prev1 == "B") {
        path[t] <<- "C"
      } else if (prev2 == "C" && prev1 == "B") {
        path[t] <<- "A"
      } else {
        path[t] <<- sample(c("A", "B", "C"), 1)
      }
      ""
    }, character(1L))
    path
  })

  m <- build_mogen(trajs, max_order = 4L, criterion = "aic")

  # Should detect order >= 2 due to second-order dependency

  expect_true(m$optimal_order >= 2L)
})

test_that("build_mogen handles data.frame input", {
  df <- data.frame(T1 = c("A", "B"), T2 = c("B", "C"),
                   T3 = c("C", "A"), T4 = c("D", "B"))
  m <- build_mogen(df, max_order = 2L)
  expect_s3_class(m, "net_mogen")
})

test_that("build_mogen caps max_order at path length", {
  trajs <- list(c("A", "B", "C"))  # length 3
  expect_message(build_mogen(trajs, max_order = 10L), "capped")
})

test_that("build_mogen LRT criterion works", {
  trajs <- list(c("A", "B", "C", "D"), c("A", "B", "D", "C"),
                c("B", "C", "D", "A"), c("C", "D", "A", "B"))
  m <- build_mogen(trajs, max_order = 2L, criterion = "lrt")

  expect_s3_class(m, "net_mogen")
  expect_true(m$optimal_order %in% 0L:2L)
  expect_equal(m$criterion, "lrt")
})

test_that("build_mogen log_likelihoods increase with order", {
  set.seed(42)
  trajs <- lapply(seq_len(50L), function(i) {
    sample(LETTERS[1:5], 10, replace = TRUE)
  })
  m <- build_mogen(trajs, max_order = 3L)

  # Log-likelihood should generally increase with order
  # (more parameters = better fit)
  ll <- m$log_likelihood
  expect_true(ll[2] >= ll[1])  # order 1 >= order 0
})

# ===========================================================================
# Section 7: S3 methods
# ===========================================================================

test_that("print.net_mogen works", {
  trajs <- list(c("A", "B", "C"), c("B", "C", "A"))
  m <- build_mogen(trajs, max_order = 2L)
  out <- capture.output(print(m))
  expect_true(any(grepl("MOGen", out)))
})

test_that("summary.net_mogen works", {
  trajs <- list(c("A", "B", "C"), c("B", "C", "A"))
  m <- build_mogen(trajs, max_order = 2L)
  out <- capture.output(summary(m))
  expect_true(any(grepl("order", out)))
})

test_that("plot.net_mogen works", {
  trajs <- list(c("A", "B", "C", "D"), c("B", "C", "D", "A"))
  m <- build_mogen(trajs, max_order = 2L)
  expect_no_error(plot(m))
  expect_no_error(plot(m, type = "likelihood"))
})

# ===========================================================================
# Section 8: Input validation
# ===========================================================================

test_that("build_mogen rejects invalid input", {
  expect_error(build_mogen(42), "data.frame or list")
  expect_error(build_mogen(list(c("A", "B")), max_order = 0L), "max_order")
})

test_that("build_mogen AIC decreases then increases", {
  # With enough data, AIC should show U-shape: decrease then increase
  set.seed(42)
  states <- c("A", "B", "C", "D")
  trajs <- lapply(seq_len(100L), function(i) {
    sample(states, 8, replace = TRUE)
  })
  m <- build_mogen(trajs, max_order = 5L)

  # DOF should increase with order
  expect_true(all(diff(m$dof) >= 0))
})

# ===========================================================================
# Section 9: Coverage for previously uncovered paths
# ===========================================================================

# --- .mogen_log_likelihood: empty trajectory returns 0 ---
test_that(".mogen_log_likelihood handles empty trajectory", {
  trajs <- list(character(0L))
  marginal <- c(A = 0.5, B = 0.5)
  tm1 <- matrix(c(0, 1, 1, 0), 2, 2, byrow = TRUE,
                dimnames = list(c("A", "B"), c("A", "B")))
  trans_mats <- list(marginal, tm1)
  ll <- .mogen_log_likelihood(trajs, 1L, trans_mats)
  expect_equal(ll, 0)
})

# --- .mogen_log_likelihood: single-state trajectory returns just log(p0) ---
test_that(".mogen_log_likelihood handles single-state trajectory", {
  trajs <- list(c("A"))
  marginal <- c(A = 0.6, B = 0.4)
  tm1 <- matrix(c(0, 1, 1, 0), 2, 2, byrow = TRUE,
                dimnames = list(c("A", "B"), c("A", "B")))
  trans_mats <- list(marginal, tm1)
  ll <- .mogen_log_likelihood(trajs, 1L, trans_mats)
  expect_equal(ll, log(0.6))
})

# --- .mogen_log_likelihood: order_used == 0 branch (k=0, step >= 2) ---
test_that(".mogen_log_likelihood uses marginal at order 0 for all steps", {
  trajs <- list(c("A", "B", "A"))
  marginal <- c(A = 0.6, B = 0.4)
  trans_mats <- list(marginal)  # only order 0
  ll <- .mogen_log_likelihood(trajs, 0L, trans_mats)
  expected <- log(0.6) + log(0.4) + log(0.6)
  expect_equal(ll, expected, tolerance = 1e-10)
})

# --- .mogen_log_likelihood: key not in trans_mat uses log_eps ---
test_that(".mogen_log_likelihood uses log_eps for missing key", {
  trajs <- list(c("A", "B", "C"))
  marginal <- c(A = 1 / 3, B = 1 / 3, C = 1 / 3)
  # Empty tm1: no transitions recorded
  tm1 <- matrix(0, 3, 3, dimnames = list(c("A", "B", "C"), c("A", "B", "C")))
  trans_mats <- list(marginal, tm1)
  ll <- .mogen_log_likelihood(trajs, 1L, trans_mats)
  log_eps <- log(.Machine$double.eps)
  # Step 1: log(1/3), steps 2-3: log_eps each (missing keys → zero probs)
  expected <- log(1 / 3) + log_eps + log_eps
  expect_equal(ll, expected, tolerance = 1e-10)
})

# --- build_mogen: no valid trajectories ---
test_that("build_mogen stops when no valid trajectories", {
  df <- data.frame(T1 = c("A", "B"), stringsAsFactors = FALSE)
  expect_error(build_mogen(df, max_order = 2L), "No valid trajectories")
})

# --- mogen_transitions: default order (NULL) uses optimal_order ---
test_that("mogen_transitions defaults to optimal_order when order=NULL", {
  trajs <- list(c("A", "B", "C", "D"), c("A", "B", "D", "C"),
                c("B", "C", "D", "A"), c("C", "D", "A", "B"))
  m <- build_mogen(trajs, max_order = 2L)
  tr_default <- mogen_transitions(m)
  tr_explicit <- mogen_transitions(m, order = m$optimal_order)
  expect_equal(nrow(tr_default), nrow(tr_explicit))
})

# --- mogen_transitions: empty return when min_count filters all ---
test_that("mogen_transitions returns empty data.frame when all filtered", {
  trajs <- list(c("A", "B", "C"), c("B", "C", "A"))
  m <- build_mogen(trajs, max_order = 1L)
  tr <- mogen_transitions(m, order = 1L, min_count = 9999L)
  expect_true(is.data.frame(tr))
  expect_equal(nrow(tr), 0L)
  expect_true(all(c("path", "count", "probability", "from", "to") %in%
                    names(tr)))
})

# --- path_counts: data.frame input branch ---
test_that("path_counts works with data.frame input", {
  df <- data.frame(T1 = c("A", "B"), T2 = c("B", "C"),
                   T3 = c("C", "A"), stringsAsFactors = FALSE)
  result <- path_counts(df, k = 2L)
  expect_true(is.data.frame(result))
  expect_true(all(c("path", "count", "proportion") %in% names(result)))
  expect_true(nrow(result) > 0L)
})

# --- path_counts: list input branch ---
test_that("path_counts works with list input", {
  trajs <- list(c("A", "B", "C"), c("A", "B", "D"))
  result <- path_counts(trajs, k = 2L)
  expect_true(is.data.frame(result))
  expect_true(nrow(result) > 0L)
})

# --- path_counts: trajectories shorter than k contribute nothing ---
test_that("path_counts handles trajectories shorter than k", {
  trajs <- list(c("A"), c("B"), c("A", "B", "C"))
  result <- path_counts(trajs, k = 3L)
  # Only the third trajectory contributes 1 path: A -> B -> C
  expect_equal(nrow(result), 1L)
  expect_equal(result$count[1L], 1L)
})

# --- path_counts: top parameter limits output ---
test_that("path_counts top parameter limits rows returned", {
  trajs <- list(c("A", "B", "C", "D"), c("A", "B", "D", "C"))
  result_all <- path_counts(trajs, k = 2L)
  result_top <- path_counts(trajs, k = 2L, top = 1L)
  expect_equal(nrow(result_top), 1L)
  expect_true(nrow(result_all) >= nrow(result_top))
})

# --- path_counts: k must be >= 2 ---
test_that("path_counts rejects k < 2", {
  trajs <- list(c("A", "B", "C"))
  expect_error(path_counts(trajs, k = 1L), "k.*must be >= 2")
})

# --- state_frequencies: data.frame input branch ---
test_that("state_frequencies works with data.frame input", {
  df <- data.frame(T1 = c("A", "B"), T2 = c("B", "A"),
                   stringsAsFactors = FALSE)
  result <- state_frequencies(df)
  expect_true(is.data.frame(result))
  expect_true(all(c("state", "count", "proportion") %in% names(result)))
  expect_equal(sum(result$proportion), 1.0, tolerance = 1e-10)
})

# --- state_frequencies: list input branch ---
test_that("state_frequencies works with list input", {
  trajs <- list(c("A", "B", "A"), c("B", "C"))
  result <- state_frequencies(trajs)
  expect_true(is.data.frame(result))
  expect_equal(sum(result$proportion), 1.0, tolerance = 1e-10)
  expect_equal(result$count[result$state == "A"], 2L)
})

# --- print.net_mogen: LRT criterion branch uses AIC label ---
test_that("print.net_mogen displays AIC when criterion is 'lrt'", {
  trajs <- list(c("A", "B", "C", "D"), c("A", "B", "D", "C"),
                c("B", "C", "D", "A"), c("C", "D", "A", "B"))
  m <- build_mogen(trajs, max_order = 2L, criterion = "lrt")
  out <- capture.output(print(m))
  expect_true(any(grepl("AIC|lrt", out)))
})


# ===========================================================================
# Section 10: pathways() tests for MOGen
# ===========================================================================

test_that("pathways.net_mogen returns character vector", {
  set.seed(42)
  trajs <- lapply(seq_len(30L), function(i) sample(LETTERS[1:4], 6, replace = TRUE))
  m <- build_mogen(trajs, max_order = 2L)
  pw <- pathways(m)
  expect_true(is.character(pw))
})

test_that("pathways.net_mogen order=0 returns empty character", {
  trajs <- list(c("A", "B", "C"), c("B", "C", "A"))
  m <- build_mogen(trajs, max_order = 1L)
  pw <- pathways(m, order = 0L)
  expect_equal(pw, character(0))
})

test_that("pathways.net_mogen min_count filters paths", {
  set.seed(42)
  trajs <- lapply(seq_len(30L), function(i) sample(LETTERS[1:4], 6, replace = TRUE))
  m <- build_mogen(trajs, max_order = 2L)
  pw_all  <- pathways(m, min_count = 1L)
  pw_filt <- pathways(m, min_count = 9999L)
  expect_equal(length(pw_filt), 0L)
  expect_true(length(pw_all) >= length(pw_filt))
})

test_that("pathways.net_mogen top limits output", {
  set.seed(42)
  trajs <- lapply(seq_len(30L), function(i) sample(LETTERS[1:4], 6, replace = TRUE))
  m <- build_mogen(trajs, max_order = 2L)
  pw_top <- pathways(m, top = 2L)
  expect_true(length(pw_top) <= 2L)
})

test_that("pathways.net_mogen min_prob filters by probability", {
  set.seed(42)
  trajs <- lapply(seq_len(30L), function(i) sample(LETTERS[1:4], 6, replace = TRUE))
  m <- build_mogen(trajs, max_order = 2L)
  pw_all  <- pathways(m, min_prob = 0)
  pw_filt <- pathways(m, min_prob = 0.99)
  expect_true(length(pw_all) >= length(pw_filt))
})