# test-pipeline.R
# Integration tests for the full cpam pipeline.
# These test the PUBLIC API contract — they should NOT need updating
# during internal refactors unless the user-facing behavior changes.


# --- Helper: run the full pipeline on example data ---
run_pipeline <- function(subset_genes = paste0("g", sprintf("%03d", 1:30)),
                         q_threshold = 0.05) {
  load(system.file("extdata", "count_matrix_example.rda", package = "cpam"))
  load(system.file("extdata", "exp_design_example.rda", package = "cpam"))

  cpo <- prepare_cpam(
    exp_design = exp_design_example,
    count_matrix = count_matrix_example[subset_genes, ],
    gene_level = TRUE,
    num_cores = 1
  )

  cpo <- compute_p_values(cpo)

  sig <- cpo$p_table$target_id[cpo$p_table$q_val_target < q_threshold]
  if (length(sig) > 0) {
    set.seed(42)
    cpo <- estimate_changepoint(cpo, subset = sig)
    cpo <- select_shape(cpo)
  }

  cpo
}


# ====================================================================
# 1. Structural validation of the pre-computed example object
# ====================================================================

test_that("cpo_example has expected structure", {
  load(system.file("extdata", "cpo_example.rda", package = "cpam"))

  expect_s3_class(cpo_example, "cpam")

  # Core data slots exist
  expect_true(!is.null(cpo_example$exp_design))
  expect_true(!is.null(cpo_example$count_matrix_raw))
  expect_true(!is.null(cpo_example$data_long))
  expect_true(!is.null(cpo_example$target_to_keep))

  # Pipeline result slots exist
  expect_true(!is.null(cpo_example$p_table))
  expect_true(!is.null(cpo_example$changepoints))
  expect_true(!is.null(cpo_example$shapes))
})

test_that("cpo_example results() returns valid output", {
  load(system.file("extdata", "cpo_example.rda", package = "cpam"))
  res <- results(cpo_example)

  expect_s3_class(res, "tbl_df")
  expect_true(nrow(res) > 0)
  expect_true(all(c("target_id", "p", "cp", "shape") %in% names(res)))

  # p-values sorted ascending
  expect_equal(res$p, sort(res$p))

  # Values in valid ranges
  expect_true(all(res$p >= 0 & res$p <= 1))
  expect_true(all(res$cp %in% cpo_example$times))
})


# ====================================================================
# 2. Full pipeline from raw data — structural invariants
# ====================================================================

test_that("prepare_cpam returns valid structure", {
  load(system.file("extdata", "count_matrix_example.rda", package = "cpam"))
  load(system.file("extdata", "exp_design_example.rda", package = "cpam"))

  subset_genes <- paste0("g", sprintf("%03d", 1:30))
  cpo <- prepare_cpam(
    exp_design = exp_design_example,
    count_matrix = count_matrix_example[subset_genes, ],
    gene_level = TRUE,
    num_cores = 1
  )

  expect_s3_class(cpo, "cpam")
  expect_equal(cpo$model_type, "case-only")
  expect_true(cpo$regularize)
  expect_true(length(cpo$target_to_keep) > 0)
  expect_true(length(cpo$target_to_keep) <= 30)

  # data_long has required columns
  expect_true(all(c("target_id", "sample", "counts", "time", "norm_factor")
                  %in% names(cpo$data_long)))

  # Count matrix dimensions consistent with design
  expect_equal(ncol(cpo$count_matrix_raw), nrow(cpo$exp_design))
})

test_that("compute_p_values returns valid p-values", {
  cpo <- run_pipeline()

  expect_true(!is.null(cpo$p_table))
  expect_true(all(c("target_id", "p_val_target", "q_val_target")
                  %in% names(cpo$p_table)))

  # All kept targets have p-values
  expect_equal(nrow(cpo$p_table), length(cpo$target_to_keep))

  # p-values in valid range
  pvals <- cpo$p_table$p_val_target
  expect_true(all(pvals >= 0 & pvals <= 1, na.rm = TRUE))

  # q-values >= p-values (BH adjustment inflates)
  qvals <- cpo$p_table$q_val_target
  non_na <- !is.na(pvals) & !is.na(qvals)
  expect_true(all(qvals[non_na] >= pvals[non_na] - 1e-15))
})

test_that("estimate_changepoint returns valid changepoints", {
  cpo <- run_pipeline()

  if (is.null(cpo$changepoints)) skip("No significant targets for changepoint estimation")

  expect_s3_class(cpo$changepoints, "tbl_df")
  expect_true(all(c("target_id", "cp_min", "cp_1se") %in% names(cpo$changepoints)))

  # Changepoints within observed time range
  time_range <- range(cpo$exp_design$time)
  expect_true(all(cpo$changepoints$cp_min >= time_range[1] &
                    cpo$changepoints$cp_min <= time_range[2]))
  expect_true(all(cpo$changepoints$cp_1se >= time_range[1] &
                    cpo$changepoints$cp_1se <= time_range[2]))
})

test_that("select_shape returns valid shapes", {
  cpo <- run_pipeline()

  if (is.null(cpo$shapes)) skip("No significant targets for shape selection")

  expect_s3_class(cpo$shapes, "tbl_df")
  expect_true(all(c("target_id", "cp", "shape1", "shape2") %in% names(cpo$shapes)))

  valid_shapes <- c("micv", "mdcx", "cv", "cx", "lin", "ilin", "dlin", "tp", "null")
  expect_true(all(cpo$shapes$shape1 %in% valid_shapes))
  expect_true(all(cpo$shapes$shape2 %in% valid_shapes))
})

test_that("results returns valid output from fresh pipeline", {
  cpo <- run_pipeline()

  if (is.null(cpo$shapes)) skip("No significant targets for results")

  res <- results(cpo)
  expect_s3_class(res, "tbl_df")
  expect_true(nrow(res) > 0)
  expect_true(all(c("target_id", "p", "cp", "shape") %in% names(res)))

  # p-values sorted ascending
  expect_equal(res$p, sort(res$p))
})


# ====================================================================
# 3. Cross-step consistency
# ====================================================================

test_that("targets flow consistently through pipeline steps", {
  cpo <- run_pipeline()

  if (is.null(cpo$shapes)) skip("No significant targets")

  # Every target in shapes must be in changepoints
  expect_true(all(cpo$shapes$target_id %in% cpo$changepoints$target_id))

  # Every target in changepoints must be in p_table
  expect_true(all(cpo$changepoints$target_id %in% cpo$p_table$target_id))

  # Changepoints used in shapes must match
  for (tid in cpo$shapes$target_id) {
    shape_cp <- cpo$shapes$cp[cpo$shapes$target_id == tid]
    cp_1se <- cpo$changepoints$cp_1se[cpo$changepoints$target_id == tid]
    expect_equal(shape_cp, cp_1se)
  }
})


# ====================================================================
# 4. Numerical reference values (snapshot)
# ====================================================================

test_that("pipeline p-values match reference", {
  cpo <- run_pipeline()

  expect_snapshot_value(
    round(cpo$p_table$p_val_target, digits = 8),
    style = "serialize"
  )
})

test_that("pipeline changepoints match reference", {
  cpo <- run_pipeline()

  if (is.null(cpo$changepoints)) skip("No significant targets")

  expect_snapshot_value(
    cpo$changepoints[, c("target_id", "cp_min", "cp_1se")],
    style = "serialize"
  )
})

test_that("pipeline shapes match reference", {
  cpo <- run_pipeline()

  if (is.null(cpo$shapes)) skip("No significant targets")

  expect_snapshot_value(
    cpo$shapes[, c("target_id", "cp", "shape1", "shape2")],
    style = "serialize"
  )
})

test_that("pipeline final results match reference", {
  cpo <- run_pipeline()

  if (is.null(cpo$shapes)) skip("No significant targets")

  res <- results(cpo)

  expect_snapshot_value(
    data.frame(
      target_id = res$target_id,
      p = round(res$p, 8),
      cp = res$cp,
      shape = res$shape
    ),
    style = "serialize"
  )
})