# Phase C: Missing Data Handling Tests
# Tests for missingness computation, MNAR detection, and simulation with missingness

# --- C1: compute_missingness ---

test_that("compute_missingness returns expected structure", {
  values <- c(1, 2, NA, 4, NA, 6)
  result <- compute_missingness(values)

  expect_type(result, "list")
  expect_named(result, c("n_total", "n_missing", "na_rate"))
})

test_that("compute_missingness does not produce wilcox.test warnings", {
  # Values with ties
  values <- c(1, 1, 2, 2, NA, NA)
  expect_no_warning(compute_missingness(values))
})

test_that("compute_missingness calculates correct NA rate", {
  # 2 out of 6 values are NA = 33.3%
  values <- c(1, 2, NA, 4, NA, 6)
  result <- compute_missingness(values)

  expect_equal(result$n_total, 6)
  expect_equal(result$n_missing, 2)
  expect_equal(result$na_rate, 2/6, tolerance = 0.001)
})

test_that("compute_missingness handles no missing values", {
  values <- c(1, 2, 3, 4, 5)
  result <- compute_missingness(values)

  expect_equal(result$n_missing, 0)
  expect_equal(result$na_rate, 0)
})

test_that("compute_missingness handles all missing values", {
  values <- c(NA, NA, NA)
  result <- compute_missingness(values)

  expect_equal(result$na_rate, 1)
})


# --- C2: fit_distributions with missingness slot ---

test_that("fit_distributions includes missingness slot", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:3), each = 20),
    condition = rep("control", 60),
    abundance = c(
      c(rgamma(15, shape = 2, rate = 0.1), rep(NA, 5)),
      rnorm(20, mean = 50, sd = 10),
      c(rlnorm(18, meanlog = 3, sdlog = 0.5), rep(NA, 2))
    )
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  expect_true("missingness" %in% names(fits))
  expect_s3_class(fits$missingness, "tbl_df")
  expect_true("na_rate" %in% names(fits$missingness))
})

test_that("missingness slot has correct dimensions", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:5), each = 20),
    condition = rep("control", 100),
    abundance = rgamma(100, shape = 2, rate = 0.1)
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  expect_equal(nrow(fits$missingness), 5)
})

# --- C3: print/summary with missingness ---

test_that("print.peppwr_fits shows missingness summary", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:3), each = 20),
    condition = rep("control", 60),
    abundance = c(
      c(rgamma(15, shape = 2, rate = 0.1), rep(NA, 5)),
      rnorm(20, mean = 50, sd = 10),
      rlnorm(20, meanlog = 3, sdlog = 0.5)
    )
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  output <- capture.output(print(fits))
  # Should mention missingness or NA somewhere
  expect_true(any(grepl("missing|NA|missingness", output, ignore.case = TRUE)))
})

test_that("summary.peppwr_fits includes missingness statistics", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:3), each = 20),
    condition = rep("control", 60),
    abundance = c(
      c(rgamma(15, shape = 2, rate = 0.1), rep(NA, 5)),
      rnorm(20, mean = 50, sd = 10),
      rlnorm(20, meanlog = 3, sdlog = 0.5)
    )
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  summ <- summary(fits)
  expect_true("missingness" %in% names(summ) || "mean_na_rate" %in% names(summ))
})

# --- C4: simulate_with_missingness ---

test_that("simulate_with_missingness returns samples with NAs", {
  set.seed(42)
  result <- simulate_with_missingness(
    distribution = "norm",
    params = list(mean = 100, sd = 10),
    n_per_group = 20,
    effect_size = 2,
    na_rate = 0.2
  )

  expect_type(result, "list")
  expect_named(result, c("control", "treatment"))
  # Should have some NAs (roughly 20%)
  expect_true(sum(is.na(result$control)) > 0 || sum(is.na(result$treatment)) > 0)
})

test_that("simulate_with_missingness respects na_rate", {
  set.seed(42)
  # Run multiple simulations to check average NA rate
  na_rates <- replicate(100, {
    result <- simulate_with_missingness(
      distribution = "norm",
      params = list(mean = 100, sd = 10),
      n_per_group = 50,
      effect_size = 1,
      na_rate = 0.3
    )
    mean(is.na(c(result$control, result$treatment)))
  })

  # Average NA rate should be close to 0.3
  expect_equal(mean(na_rates), 0.3, tolerance = 0.05)
})

test_that("simulate_with_missingness applies MNAR pattern", {
  set.seed(42)
  result <- simulate_with_missingness(
    distribution = "lnorm",
    params = list(meanlog = 3, sdlog = 1),
    n_per_group = 100,
    effect_size = 1,
    na_rate = 0.3,
    mnar_score = 2  # Strong MNAR - low values more likely missing
  )

  control_non_na <- result$control[!is.na(result$control)]
  control_na_positions <- which(is.na(result$control))

  # If MNAR is applied, observed values should be biased higher
  # (This is a probabilistic test, may occasionally fail)
  if (length(control_non_na) > 10) {
    # Mean of observed should be higher than overall mean of distribution
    expect_gt(mean(control_non_na), exp(3))  # exp(3) is median of lnorm
  }
})

# --- C5: run_power_sim_with_missingness ---

test_that("run_power_sim_with_missingness returns power between 0 and 1", {
  set.seed(42)
  power <- run_power_sim_with_missingness(
    distribution = "norm",
    params = list(mean = 100, sd = 10),
    n_per_group = 10,
    effect_size = 2,
    na_rate = 0.1,
    n_sim = 100
  )

  expect_type(power, "double")
  expect_true(power >= 0 && power <= 1)
})

test_that("run_power_sim_with_missingness gives lower power with more missingness", {
  set.seed(42)
  power_low_na <- run_power_sim_with_missingness(
    distribution = "norm",
    params = list(mean = 100, sd = 10),
    n_per_group = 10,
    effect_size = 2,
    na_rate = 0.1,
    n_sim = 200
  )

  power_high_na <- run_power_sim_with_missingness(
    distribution = "norm",
    params = list(mean = 100, sd = 10),
    n_per_group = 10,
    effect_size = 2,
    na_rate = 0.5,
    n_sim = 200
  )

  # Higher missingness should generally reduce power
  expect_lte(power_high_na, power_low_na + 0.2)  # Allow some variance
})

# --- C6: power_analysis with include_missingness ---

test_that("power_analysis.peppwr_fits accepts include_missingness parameter", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:3), each = 20),
    condition = rep("control", 60),
    abundance = c(
      c(rgamma(15, shape = 2, rate = 0.1), rep(NA, 5)),
      rnorm(20, mean = 50, sd = 10),
      rlnorm(20, meanlog = 3, sdlog = 0.5)
    )
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  # Should work with include_missingness = TRUE
  result <- power_analysis(
    fits,
    effect_size = 2,
    n_per_group = 6,
    find = "power",
    include_missingness = TRUE,
    n_sim = 50
  )

  expect_s3_class(result, "peppwr_power")
})

test_that("include_missingness affects power estimates", {
  set.seed(42)
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:5), each = 30),
    condition = rep("control", 150),
    abundance = c(
      c(rgamma(20, shape = 2, rate = 0.1), rep(NA, 10)),
      c(rnorm(25, mean = 50, sd = 10), rep(NA, 5)),
      c(rlnorm(22, meanlog = 3, sdlog = 0.5), rep(NA, 8)),
      c(rgamma(28, shape = 3, rate = 0.2), rep(NA, 2)),
      c(rnorm(18, mean = 80, sd = 15), rep(NA, 12))
    )
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  result_with <- power_analysis(
    fits,
    effect_size = 2,
    n_per_group = 6,
    find = "power",
    include_missingness = TRUE,
    n_sim = 50
  )

  result_without <- power_analysis(
    fits,
    effect_size = 2,
    n_per_group = 6,
    find = "power",
    include_missingness = FALSE,
    n_sim = 50
  )

  # Results should be different (missingness reduces power)
  # But both should be valid power values
  expect_true(result_with$answer >= 0 && result_with$answer <= 1)
  expect_true(result_without$answer >= 0 && result_without$answer <= 1)
})

# --- C7: plot_missingness ---

test_that("plot_missingness returns ggplot", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:10), each = 20),
    condition = rep("control", 200),
    abundance = c(
      c(rgamma(15, shape = 2, rate = 0.1), rep(NA, 5)),
      rnorm(20, mean = 50, sd = 10),
      c(rlnorm(18, meanlog = 3, sdlog = 0.5), rep(NA, 2)),
      rgamma(20, shape = 3, rate = 0.2),
      c(rnorm(17, mean = 80, sd = 15), rep(NA, 3)),
      rgamma(20, shape = 1.5, rate = 0.15),
      c(rnorm(19, mean = 60, sd = 12), rep(NA, 1)),
      rlnorm(20, meanlog = 2.5, sdlog = 0.4),
      c(rgamma(16, shape = 2.5, rate = 0.18), rep(NA, 4)),
      rnorm(20, mean = 70, sd = 10)
    )
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  p <- plot_missingness(fits)
  expect_s3_class(p, "ggplot")
})

test_that("plot_missingness shows NA rate distribution", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:5), each = 20),
    condition = rep("control", 100),
    abundance = c(
      c(rgamma(15, shape = 2, rate = 0.1), rep(NA, 5)),
      rnorm(20, mean = 50, sd = 10),
      c(rlnorm(18, meanlog = 3, sdlog = 0.5), rep(NA, 2)),
      rgamma(20, shape = 3, rate = 0.2),
      c(rnorm(17, mean = 80, sd = 15), rep(NA, 3))
    )
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  p <- plot_missingness(fits)

  # Should return a ggplot or grid arrangement (from cowplot)
  expect_true(inherits(p, "ggplot") || inherits(p, "gtable") ||
                inherits(p, "grob") || inherits(p, "gTree"))
})

test_that("plot_missingness handles data with no missingness", {
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:3), each = 20),
    condition = rep("control", 60),
    abundance = rgamma(60, shape = 2, rate = 0.1)
  )

  fits <- fit_distributions(
    test_data,
    id = "peptide",
    group = "condition",
    value = "abundance"
  )

  # Should still work
  p <- plot_missingness(fits)
  expect_s3_class(p, "ggplot")
})

test_that("plot_missingness errors on invalid input", {
  expect_error(plot_missingness("not a fits object"))
})

# --- v2.1: MNAR Visualization Enhancements ---

test_that("missingness slot includes mean_abundance", {
  test_data <- tibble::tibble(
    peptide = rep("pep1", 8),
    condition = rep("ctrl", 8),
    abundance = c(10, 20, 30, 40, NA, NA, 70, 80)
  )

  fits <- fit_distributions(test_data, "peptide", "condition", "abundance")

  expect_true("mean_abundance" %in% names(fits$missingness))
  # Mean of observed: (10+20+30+40+70+80)/6 = 41.67
  expect_equal(fits$missingness$mean_abundance[1], mean(c(10, 20, 30, 40, 70, 80)), tolerance = 0.01)
})

test_that("mean_abundance handles all NA values", {
  test_data <- tibble::tibble(
    peptide = rep(c("pep1", "pep2"), each = 4),
    condition = rep("ctrl", 8),
    abundance = c(rep(NA, 4), 10, 20, 30, 40)
  )

  fits <- fit_distributions(test_data, "peptide", "condition", "abundance")

  # pep1 should have NaN for mean_abundance (all NA)
  expect_true(is.nan(fits$missingness$mean_abundance[1]) || is.na(fits$missingness$mean_abundance[1]))
  # pep2 should have mean of 25
  expect_equal(fits$missingness$mean_abundance[2], 25, tolerance = 0.01)
})


test_that("plot_missingness includes abundance vs NA rate panel", {
  set.seed(42)
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:10), each = 10),
    condition = rep("ctrl", 100),
    abundance = rlnorm(100, meanlog = 3, sdlog = 1)
  )
  # Introduce missingness
  test_data$abundance <- ifelse(
    runif(100) < 0.2,
    NA, test_data$abundance
  )

  fits <- fit_distributions(test_data, "peptide", "condition", "abundance")
  p <- plot_missingness(fits)

  # Should return a gtable (from cowplot::plot_grid) for 2-panel layout
  expect_true(inherits(p, "gtable") || inherits(p, "ggplot") || inherits(p, "grob"))
})

test_that("print.peppwr_fits shows MNAR pattern count", {
  set.seed(42)
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:10), each = 10),
    condition = rep("ctrl", 100),
    abundance = rlnorm(100, meanlog = 3, sdlog = 1)
  )
  # Introduce MNAR pattern - low values missing
  threshold <- quantile(test_data$abundance, 0.25)
  test_data$abundance <- ifelse(
    test_data$abundance < threshold & runif(100) < 0.5,
    NA, test_data$abundance
  )

  fits <- fit_distributions(test_data, "peptide", "condition", "abundance")
  output <- capture.output(print(fits))

  # Should mention MNAR pattern in output
  expect_true(any(grepl("MNAR", output, ignore.case = TRUE)))
})

# --- Dataset-level MNAR tests ---

test_that("compute_dataset_mnar returns expected structure", {
  missingness <- tibble::tibble(
    na_rate = c(0.5, 0.3, 0.1, 0.2, 0.4, 0.0),
    mean_abundance = c(10, 20, 50, 30, 15, 100)
  )

  result <- compute_dataset_mnar(missingness)

  expect_type(result, "list")
  expect_named(result, c("correlation", "p_value", "n_peptides", "interpretation"))
  expect_true(is.numeric(result$correlation) || is.na(result$correlation))
  expect_true(is.numeric(result$p_value) || is.na(result$p_value))
  expect_equal(result$n_peptides, 5)  # Excludes peptide with na_rate = 0
  expect_type(result$interpretation, "character")
})

test_that("compute_dataset_mnar detects strong MNAR pattern", {
  # Strong negative correlation: low abundance -> high NA rate
  set.seed(42)
  missingness <- tibble::tibble(
    mean_abundance = c(1, 2, 5, 10, 20, 50, 100, 200, 500, 1000),
    na_rate = c(0.9, 0.8, 0.7, 0.5, 0.4, 0.3, 0.2, 0.1, 0.05, 0.02)
  )

  result <- compute_dataset_mnar(missingness)

  # Should detect negative correlation
  expect_lt(result$correlation, -0.5)
  expect_lt(result$p_value, 0.05)
  expect_true(grepl("MNAR", result$interpretation))
  expect_true(grepl("low-abundance", result$interpretation) ||
              grepl("evidence", result$interpretation, ignore.case = TRUE))
})

test_that("compute_dataset_mnar handles insufficient data", {
  # Only 3 peptides with missing data
  missingness <- tibble::tibble(
    na_rate = c(0.5, 0.3, 0.1, 0.0, 0.0),
    mean_abundance = c(10, 20, 50, 100, 200)
  )

  result <- compute_dataset_mnar(missingness)

  expect_equal(result$n_peptides, 3)
  expect_true(is.na(result$correlation))
  expect_true(is.na(result$p_value))
  expect_true(grepl("Insufficient", result$interpretation))
})

test_that("compute_dataset_mnar handles no peptides with missing data", {
  missingness <- tibble::tibble(
    na_rate = c(0, 0, 0, 0, 0),
    mean_abundance = c(10, 20, 50, 100, 200)
  )

  result <- compute_dataset_mnar(missingness)

  expect_equal(result$n_peptides, 0)
  expect_true(is.na(result$correlation))
  expect_true(grepl("Insufficient", result$interpretation))
})

test_that("compute_dataset_mnar handles NA/NaN in mean_abundance", {
  missingness <- tibble::tibble(
    na_rate = c(0.5, 0.3, 0.1, 0.2, 0.4, 0.5),
    mean_abundance = c(10, NA, 50, NaN, 15, 0)  # NA, NaN, and 0 should be excluded
  )

  result <- compute_dataset_mnar(missingness)

  # Only 3 valid peptides (10, 50, 15) - excludes NA, NaN, and 0
  expect_equal(result$n_peptides, 3)
  expect_true(is.na(result$correlation))  # < 5 peptides
})

test_that("fit_distributions includes dataset_mnar slot", {
  set.seed(42)
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:10), each = 10),
    condition = rep("ctrl", 100),
    abundance = rlnorm(100, meanlog = 3, sdlog = 1)
  )
  # Introduce MNAR pattern
  threshold <- quantile(test_data$abundance, 0.3)
  test_data$abundance <- ifelse(
    test_data$abundance < threshold & runif(100) < 0.5,
    NA, test_data$abundance
  )

  fits <- fit_distributions(test_data, "peptide", "condition", "abundance")

  expect_true("dataset_mnar" %in% names(fits))
  expect_type(fits$dataset_mnar, "list")
  expect_true(all(c("correlation", "p_value", "n_peptides", "interpretation") %in%
                    names(fits$dataset_mnar)))
})

test_that("print.peppwr_fits shows dataset-level MNAR correlation", {
  set.seed(42)
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:20), each = 10),
    condition = rep("ctrl", 200),
    abundance = rlnorm(200, meanlog = 3, sdlog = 1)
  )
  # Strong MNAR pattern
  threshold <- quantile(test_data$abundance, 0.4)
  test_data$abundance <- ifelse(
    test_data$abundance < threshold & runif(200) < 0.6,
    NA, test_data$abundance
  )

  fits <- fit_distributions(test_data, "peptide", "condition", "abundance")
  output <- capture.output(print(fits))

  # Should mention MNAR detection with correlation
  expect_true(any(grepl("MNAR detection", output)))
  expect_true(any(grepl("Correlation|r =", output)))
})


test_that("summary.peppwr_fits includes dataset_mnar metrics", {
  set.seed(42)
  test_data <- tibble::tibble(
    peptide = rep(paste0("pep", 1:15), each = 10),
    condition = rep("ctrl", 150),
    abundance = rlnorm(150, meanlog = 3, sdlog = 1)
  )
  # Introduce MNAR pattern
  threshold <- quantile(test_data$abundance, 0.3)
  test_data$abundance <- ifelse(
    test_data$abundance < threshold & runif(150) < 0.5,
    NA, test_data$abundance
  )

  fits <- fit_distributions(test_data, "peptide", "condition", "abundance")
  summ <- summary(fits)

  expect_true("missingness" %in% names(summ))
  expect_true("dataset_mnar_correlation" %in% names(summ$missingness))
  expect_true("dataset_mnar_pvalue" %in% names(summ$missingness))
  expect_true("dataset_mnar_interpretation" %in% names(summ$missingness))
})

test_that("interpret_dataset_mnar provides correct interpretations", {
  # Strong negative correlation (strong MNAR)
  interp1 <- interpret_dataset_mnar(-0.6, 0.001, 20)
  expect_true(grepl("Strong evidence", interp1))
  expect_true(grepl("low-abundance", interp1))

  # Weak negative correlation
  interp2 <- interpret_dataset_mnar(-0.2, 0.1, 20)
  expect_true(grepl("Weak evidence", interp2))

  # No correlation
  interp3 <- interpret_dataset_mnar(0.05, 0.8, 20)
  expect_true(grepl("No evidence", interp3))

  # Positive correlation (unusual)
  interp4 <- interpret_dataset_mnar(0.4, 0.05, 20)
  expect_true(grepl("unusual", interp4, ignore.case = TRUE))
})