# Tests for data.R - pepdiff_data class and import functions

# =============================================================================
# Test pepdiff_data constructor and validator
# =============================================================================

test_that("new_pepdiff_data creates object with correct structure", {
  data <- tibble::tibble(
    peptide = c("P1", "P1", "P2", "P2"),
    gene_id = c("G1", "G1", "G1", "G1"),
    treatment = c("ctrl", "trt", "ctrl", "trt"),
    bio_rep = c("1", "1", "1", "1"),
    value = c(100, 200, 150, 300)
  )

  obj <- new_pepdiff_data(
    data = data,
    factors = "treatment",
    design = tibble::tibble(treatment = c("ctrl", "trt"), n_reps = c(1, 1), n_peptides = c(2, 2), n_observations = c(2, 2)),
    missingness = tibble::tibble(peptide = c("P1", "P2"), na_rate = c(0, 0), mnar_score = c(0, 0), mean_abundance = c(150, 225)),
    peptides = c("P1", "P2"),
    call = quote(test())
  )

  expect_s3_class(obj, "pepdiff_data")
  expect_equal(obj$factors, "treatment")
  expect_equal(length(obj$peptides), 2)
})


test_that("validate_pepdiff_data catches missing components", {
  # Missing required component
  bad_obj <- structure(
    list(data = tibble::tibble(), factors = "a"),
    class = "pepdiff_data"
  )

  expect_error(
    validate_pepdiff_data(bad_obj),
    "missing required components"
  )
})


test_that("validate_pepdiff_data catches missing columns in data", {
  bad_obj <- structure(
    list(
      data = tibble::tibble(peptide = "P1"),  # missing other columns
      factors = "treatment",
      design = tibble::tibble(),
      missingness = tibble::tibble(),
      peptides = "P1",
      call = quote(test())
    ),
    class = "pepdiff_data"
  )

  expect_error(
    validate_pepdiff_data(bad_obj),
    "missing required columns"
  )
})


# =============================================================================
# Test read_pepdiff
# =============================================================================

test_that("read_pepdiff imports CSV correctly", {
  # Create test data and write to temp file
  test_data <- make_minimal_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  result <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep"
  )

  expect_s3_class(result, "pepdiff_data")
  expect_equal(result$factors, "treatment")
  expect_equal(length(result$peptides), 5)
})


test_that("read_pepdiff handles multiple factors", {
  test_data <- make_factorial_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  result <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = c("treatment", "timepoint"),
    replicate = "bio_rep"
  )

  expect_equal(result$factors, c("treatment", "timepoint"))
  expect_equal(nrow(result$design), 4)  # 2x2 factorial
})


test_that("read_pepdiff errors on missing file", {
  expect_error(
    read_pepdiff(
      file = "nonexistent.csv",
      id = "peptide",
      gene = "gene_id",
      value = "value",
      factors = "treatment",
      replicate = "bio_rep"
    ),
    "File not found"
  )
})


test_that("read_pepdiff errors on missing columns", {
  test_data <- make_minimal_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  expect_error(
    read_pepdiff(
      file = test_file,
      id = "nonexistent_col",
      gene = "gene_id",
      value = "value",
      factors = "treatment",
      replicate = "bio_rep"
    ),
    "not found in file"
  )
})


test_that("read_pepdiff preserves tech_rep column when specified", {
  test_data <- make_tech_rep_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  result <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep",
    tech_rep = "tech_rep"
  )

  expect_true("tech_rep" %in% names(result$data))
})


test_that("read_pepdiff removes duplicate rows", {
  # Create data with duplicates
  test_data <- make_minimal_test_data()
  test_data <- rbind(test_data, test_data[1:2, ])
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  result <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep"
  )

  # Should have removed duplicates
  expect_equal(nrow(result$data), nrow(make_minimal_test_data()))
})


# =============================================================================
# Test combine_tech_reps
# =============================================================================

test_that("combine_tech_reps.pepdiff_data combines technical replicates", {
  test_data <- make_tech_rep_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  imported <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep",
    tech_rep = "tech_rep"
  )

  # Count rows before combining
  n_before <- nrow(imported$data)

  combined <- combine_tech_reps(imported)

  # Should have fewer rows after combining
  expect_lt(nrow(combined$data), n_before)
  expect_false("tech_rep" %in% names(combined$data))
})


test_that("combine_tech_reps.pepdiff_data warns when no tech_rep column", {
  test_data <- make_minimal_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  imported <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep"
  )

  expect_warning(
    combine_tech_reps(imported),
    "No 'tech_rep' column found"
  )
})


test_that("combine_tech_reps uses custom function", {
  test_data <- make_tech_rep_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  imported <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep",
    tech_rep = "tech_rep"
  )

  combined_mean <- combine_tech_reps(imported, fun = mean)
  combined_median <- combine_tech_reps(imported, fun = median)

  # Values should potentially differ with different functions
  expect_s3_class(combined_mean, "pepdiff_data")
  expect_s3_class(combined_median, "pepdiff_data")
})


# =============================================================================
# Test print method
# =============================================================================

test_that("print.pepdiff_data produces output", {
  test_data <- make_minimal_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  imported <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep"
  )

  output <- capture.output(print(imported))

  expect_true(any(grepl("pepdiff_data", output)))
  expect_true(any(grepl("Peptides:", output)))
  expect_true(any(grepl("Factors:", output)))
})


# =============================================================================
# Test summary method
# =============================================================================

test_that("summary.pepdiff_data produces detailed output", {
  test_data <- make_factorial_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  imported <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = c("treatment", "timepoint"),
    replicate = "bio_rep"
  )

  output <- capture.output(result <- summary(imported))

  expect_true(any(grepl("Summary", output)))
  expect_true(any(grepl("Experimental factors", output)))
  expect_type(result, "list")
  expect_true("n_peptides" %in% names(result))
})


# =============================================================================
# Test subset method
# =============================================================================

test_that("subset.pepdiff_data filters by peptide IDs", {
  test_data <- make_minimal_test_data()
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  imported <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep"
  )

  # Get first two peptide IDs
  keep_peptides <- imported$peptides[1:2]

  subsetted <- subset(imported, peptides = keep_peptides)

  expect_equal(length(subsetted$peptides), 2)
  expect_true(all(subsetted$data$peptide %in% keep_peptides))
})


# =============================================================================
# Test with missing values
# =============================================================================

test_that("read_pepdiff handles missing values correctly", {
  test_data <- make_test_data(na_rate = 0.1)
  test_file <- write_test_csv(test_data)
  on.exit(unlink(test_file))

  result <- read_pepdiff(
    file = test_file,
    id = "peptide",
    gene = "gene_id",
    value = "value",
    factors = "treatment",
    replicate = "bio_rep"
  )

  expect_s3_class(result, "pepdiff_data")
  expect_true(any(result$missingness$na_rate > 0))
})