make_split_spec_graph <- function(time_source = c("index", "precedence")) {
  time_source <- match.arg(time_source)

  meta <- data.frame(
    sample_id = c("S1", "S2", "S3", "S4", "S5"),
    subject_id = c("P1", "P1", "P2", "P3", "P4"),
    batch_id = c("B1", "B2", "B1", "B3", NA),
    study_id = c("ST1", "ST1", "ST2", "ST2", "ST3"),
    timepoint_id = c("T0", "T1", "T0", "T2", NA),
    stringsAsFactors = FALSE
  )

  sample_nodes <- create_nodes(meta, type = "Sample", id_col = "sample_id")
  subject_nodes <- create_nodes(meta, type = "Subject", id_col = "subject_id")
  batch_nodes <- create_nodes(meta, type = "Batch", id_col = "batch_id")
  study_nodes <- create_nodes(meta, type = "Study", id_col = "study_id")

  time_data <- unique(meta[!is.na(meta$timepoint_id), "timepoint_id", drop = FALSE])
  time_nodes <- if (identical(time_source, "index")) {
    create_nodes(
      transform(time_data, time_index = c(0L, 1L, 2L)),
      type = "Timepoint",
      id_col = "timepoint_id",
      attr_cols = c("time_index")
    )
  } else {
    create_nodes(time_data, type = "Timepoint", id_col = "timepoint_id")
  }

  edge_sets <- list(
    create_edges(meta, "sample_id", "subject_id", "Sample", "Subject", "sample_belongs_to_subject"),
    create_edges(meta, "sample_id", "batch_id", "Sample", "Batch", "sample_processed_in_batch", allow_missing = TRUE),
    create_edges(meta, "sample_id", "study_id", "Sample", "Study", "sample_from_study"),
    create_edges(meta, "sample_id", "timepoint_id", "Sample", "Timepoint", "sample_collected_at_timepoint", allow_missing = TRUE)
  )

  if (identical(time_source, "precedence")) {
    edge_sets <- c(edge_sets, list(
      create_edges(
        data.frame(
          from_timepoint = c("T0", "T1"),
          to_timepoint = c("T1", "T2"),
          stringsAsFactors = FALSE
        ),
        "from_timepoint", "to_timepoint", "Timepoint", "Timepoint", "timepoint_precedes"
      )
    ))
  }

  build_dependency_graph(
    nodes = list(sample_nodes, subject_nodes, batch_nodes, study_nodes, time_nodes),
    edges = edge_sets,
    graph_name = paste0("split_spec_graph_", time_source),
    dataset_name = "SplitSpecDemo"
  )
}

test_that("as_split_spec translates constraints into canonical sample data", {
  graph <- make_split_spec_graph("index")
  constraint <- derive_split_constraints(graph, mode = "batch")
  spec <- as_split_spec(constraint, graph = graph)
  validation <- validate_split_spec(spec)

  expect_s3_class(spec, "split_spec")
  expect_true(all(c(
    "sample_id", "sample_node_id", "group_id", "primary_group", "batch_group",
    "study_group", "timepoint_id", "time_index", "order_rank"
  ) %in% names(spec$sample_data)))
  expect_equal(spec$group_var, "group_id")
  expect_true("batch_group" %in% spec$block_vars)
  expect_equal(spec$recommended_resampling, "blocked_cv")
  expect_equal(spec$sample_data$batch_group[spec$sample_data$sample_id == "S1"], "B1")
  expect_equal(spec$sample_data$study_group[spec$sample_data$sample_id == "S1"], "ST1")
  expect_equal(spec$sample_data$order_rank[spec$sample_data$sample_id == "S4"], 3L)
  expect_equal(spec$metadata$source_mode, "batch")
  expect_equal(spec$metadata$source_strategy, "batch")
  expect_true(validation$valid)
})

test_that("as_split_spec preserves time-aware ordering semantics", {
  graph <- make_split_spec_graph("precedence")
  constraint <- derive_split_constraints(graph, mode = "time")
  spec <- as_split_spec(constraint, graph = graph)
  validation <- validate_split_spec(spec)

  expect_s3_class(spec, "split_spec")
  expect_identical(spec$time_var, "order_rank")
  expect_false(isTRUE(spec$ordering_required))
  expect_equal(spec$sample_data$order_rank[spec$sample_data$sample_id == "S2"], 2L)
  expect_equal(spec$recommended_resampling, "ordered_split")
  expect_true(validation$valid)
})

test_that("partial time coverage does not force invalid ordering requirements in generated split specs", {
  graph <- make_split_spec_graph("index")
  constraint <- derive_split_constraints(graph, mode = "time")
  spec <- as_split_spec(constraint, graph = graph)
  validation <- validate_split_spec(spec)

  expect_false(isTRUE(spec$ordering_required))
  expect_identical(spec$time_var, "order_rank")
  expect_true(any(is.na(spec$sample_data$order_rank)))
  expect_true(validation$valid)
  expect_false(any(validation$issues$code == "missing_required_ordering"))
})

test_that("validate_split_spec detects missing ordering and duplicate samples", {
  graph <- make_split_spec_graph("index")
  constraint <- derive_split_constraints(graph, mode = "time")
  spec <- as_split_spec(constraint, graph = graph)
  broken <- spec
  broken$ordering_required <- TRUE
  broken$sample_data$order_rank[] <- NA_integer_
  broken$sample_data <- rbind(broken$sample_data, broken$sample_data[1, , drop = FALSE])

  validation <- validate_split_spec(broken)

  expect_s3_class(validation, "split_spec_validation")
  expect_false(validation$valid)
  expect_true(any(validation$issues$code == "duplicate_sample_id"))
  expect_true(any(validation$issues$code == "missing_required_ordering"))
  expect_true(length(capture.output(print(validation))) > 0L)
})

test_that("validate_split_spec is deterministic across repeated runs", {
  graph <- make_split_spec_graph("index")
  constraint <- derive_split_constraints(graph, mode = "time")
  spec <- as_split_spec(constraint, graph = graph)
  broken <- spec
  broken$sample_data$order_rank[] <- NA_integer_
  broken$sample_data <- rbind(broken$sample_data, broken$sample_data[1, , drop = FALSE])

  validation_a <- validate_split_spec(broken)
  validation_b <- validate_split_spec(broken)

  expect_identical(validation_a$issues$code, validation_b$issues$code)
  expect_identical(validation_a$issues$message, validation_b$issues$message)
  expect_identical(validation_a$issues$n_affected, validation_b$issues$n_affected)
  expect_identical(validation_a$issues$issue_id, validation_b$issues$issue_id)
})

test_that("summarize_leakage_risks combines graph, constraint, and split-spec diagnostics", {
  graph <- make_split_spec_graph("index")
  constraint <- derive_split_constraints(graph, mode = "composite", strategy = "strict", via = c("Subject", "Batch"))
  spec <- as_split_spec(constraint, graph = graph)
  summary_obj <- summarize_leakage_risks(graph, constraint = constraint, split_spec = spec)

  expect_s3_class(summary_obj, "leakage_risk_summary")
  expect_true(is.character(summary_obj$overview))
  expect_true(all(c("severity", "category", "message", "source", "n_affected") %in% names(summary_obj$diagnostics)))
  expect_true(any(summary_obj$diagnostics$source == "validation"))
  expect_true(any(summary_obj$diagnostics$source == "constraint"))
  expect_true(any(summary_obj$diagnostics$source == "split_spec"))
  expect_true(any(summary_obj$diagnostics$category == "blocking_available"))
  expect_true(any(summary_obj$diagnostics$category == "ordering_available"))
  expect_true("block_vars" %in% names(summary_obj$split_spec_summary))
  expect_true("ordering_required" %in% names(summary_obj$split_spec_summary))
  expect_true(length(capture.output(print(summary_obj))) > 0L)
})