# =============================================================================
# test-batch_analysis.R
# Tests for the batch_analysis() function
# =============================================================================

# --- Common test data ---
# Single site data (no Site column)
df_single <- data.frame(
  Species   = c("sp1", "sp2", "sp3", "sp4"),
  Genus     = c("G1", "G1", "G2", "G2"),
  Family    = c("F1", "F1", "F1", "F2"),
  Order     = c("O1", "O1", "O1", "O1"),
  Abundance = c(10, 20, 15, 5),
  stringsAsFactors = FALSE
)

# Multi-site data (with Site column)
df_multi <- data.frame(
  Site      = c("A", "A", "A", "A", "B", "B", "B", "B"),
  Species   = c("sp1", "sp2", "sp3", "sp4", "sp1", "sp2", "sp3", "sp4"),
  Genus     = c("G1", "G1", "G2", "G2", "G1", "G1", "G2", "G2"),
  Family    = c("F1", "F1", "F1", "F2", "F1", "F1", "F1", "F2"),
  Order     = c("O1", "O1", "O1", "O1", "O1", "O1", "O1", "O1"),
  Abundance = c(10, 20, 15, 5, 5, 5, 5, 5),
  stringsAsFactors = FALSE
)

# Taxonomic tree (compatible with compare_indices tests)
tax <- data.frame(
  Species = c("sp1", "sp2", "sp3", "sp4"),
  Genus   = c("G1", "G1", "G2", "G2"),
  Family  = c("F1", "F1", "F1", "F2"),
  Order   = c("O1", "O1", "O1", "O1"),
  stringsAsFactors = FALSE
)


# ---- Test 1: Single site — without Site column ----
test_that("single site data returns correct results", {
  result <- batch_analysis(df_single)

  # Is it a data frame?
  expect_true(is.data.frame(result))

  # Should have 1 row (single site)
  expect_equal(nrow(result), 1)

  # 16 columns: Site + N_Species + 14 indices (6 classic + 4 PTO + 4 PTO max)
  expect_equal(ncol(result), 16)

  # Site column should be "All" (automatic)
  expect_equal(result$Site, "All")

  # Should have N_Species
  expect_equal(result$N_Species, 4L)

  # All index columns should be numeric
  index_cols <- setdiff(names(result), "Site")
  for (col in index_cols) {
    expect_true(is.numeric(result[[col]]),
                info = paste(col, "should be numeric"))
  }
})


# ---- Test 2: Multiple sites — with Site column ----
test_that("multi-site data returns correct results", {
  result <- batch_analysis(df_multi)

  # Should have 2 rows
  expect_equal(nrow(result), 2)

  # Site names should be preserved
  expect_equal(result$Site, c("A", "B"))

  # Each row should have 16 columns
  expect_equal(ncol(result), 16)

  # N_Species for each site
  expect_equal(result$N_Species, c(4L, 4L))
})


# ---- Test 3: Consistency with compare_indices ----
# batch_analysis results should match compare_indices results for the same data
test_that("single site results match compare_indices", {
  batch_result <- batch_analysis(df_single)

  # Run compare_indices with the same data
  comm <- c(sp1 = 10, sp2 = 20, sp3 = 15, sp4 = 5)
  ci_result <- compare_indices(comm, tax)

  # Does Shannon match?
  expect_equal(batch_result$Shannon, ci_result$Shannon)

  # Does Simpson match?
  expect_equal(batch_result$Simpson, ci_result$Simpson)

  # Does Delta match?
  expect_equal(batch_result$Delta, ci_result$Delta)

  # Does Delta_star match?
  expect_equal(batch_result$Delta_star, ci_result$Delta_star)

  # Does AvTD match?
  expect_equal(batch_result$AvTD, ci_result$AvTD)

  # Does VarTD match?
  expect_equal(batch_result$VarTD, ci_result$VarTD)

  # pTO components
  expect_equal(batch_result$uTO, ci_result$uTO)
  expect_equal(batch_result$TO, ci_result$TO)
  expect_equal(batch_result$uTO_plus, ci_result$uTO_plus)
  expect_equal(batch_result$TO_plus, ci_result$TO_plus)

  # pTO max components
  expect_equal(batch_result$uTO_max, ci_result$uTO_max)
  expect_equal(batch_result$TO_max, ci_result$TO_max)
  expect_equal(batch_result$uTO_plus_max, ci_result$uTO_plus_max)
  expect_equal(batch_result$TO_plus_max, ci_result$TO_plus_max)
})


# ---- Test 4: Multiple sites — match compare_indices for each site ----
test_that("multi-site results match compare_indices", {
  batch_result <- batch_analysis(df_multi)

  # Site A: comm_A = c(sp1=10, sp2=20, sp3=15, sp4=5)
  comm_A <- c(sp1 = 10, sp2 = 20, sp3 = 15, sp4 = 5)
  ci_A <- compare_indices(comm_A, tax)
  expect_equal(batch_result$Shannon[1], ci_A$Shannon)
  expect_equal(batch_result$Simpson[1], ci_A$Simpson)

  # Site B: comm_B = c(sp1=5, sp2=5, sp3=5, sp4=5)
  comm_B <- c(sp1 = 5, sp2 = 5, sp3 = 5, sp4 = 5)
  ci_B <- compare_indices(comm_B, tax)
  expect_equal(batch_result$Shannon[2], ci_B$Shannon)
  expect_equal(batch_result$Simpson[2], ci_B$Simpson)
})


# ---- Test 5: Automatic Site column detection ----
# "Site", "Alan", "Plot" names should be automatically detected
test_that("Site column is automatically detected", {
  # With the name "Site"
  result_site <- batch_analysis(df_multi)
  expect_equal(nrow(result_site), 2)

  # With the name "Alan"
  df_alan <- df_multi
  names(df_alan)[1] <- "Alan"
  result_alan <- batch_analysis(df_alan)
  expect_equal(nrow(result_alan), 2)

  # With the name "Plot"
  df_plot <- df_multi
  names(df_plot)[1] <- "Plot"
  result_plot <- batch_analysis(df_plot)
  expect_equal(nrow(result_plot), 2)
})


# ---- Test 6: With site_column parameter ----
test_that("specifying site_column parameter works", {
  df_custom <- df_multi
  names(df_custom)[1] <- "Lokasyon"
  result <- batch_analysis(df_custom, site_column = "Lokasyon")
  expect_equal(nrow(result), 2)
  expect_equal(result$Site, c("A", "B"))
})


# ---- Test 7: Empty Site column — should work as single site ----
test_that("empty Site column works as single site", {
  df_empty_site <- df_single
  df_empty_site$Site <- ""
  result <- batch_analysis(df_empty_site)
  expect_equal(nrow(result), 1)
  expect_equal(result$Site, "All")
})


# ---- Test 8: NA Site column — should work as single site ----
test_that("NA Site column works as single site", {
  df_na_site <- df_single
  df_na_site$Site <- NA
  result <- batch_analysis(df_na_site)
  expect_equal(nrow(result), 1)
  expect_equal(result$Site, "All")
})


# ---- Test 9: Case-insensitive abundance column ----
test_that("abundance column matches case-insensitively", {
  df_lower <- df_single
  names(df_lower)[names(df_lower) == "Abundance"] <- "abundance"
  result <- batch_analysis(df_lower)
  expect_equal(nrow(result), 1)
})


# ---- Test 10: Invalid input checks ----
test_that("throws error on invalid input", {
  # Not a data.frame
  expect_error(batch_analysis("not_a_df"), "data.*must be a data frame")

  # Empty data.frame
  expect_error(batch_analysis(data.frame()), "no rows")

  # No Abundance column
  df_no_abd <- df_single[, -5]
  expect_error(batch_analysis(df_no_abd), "Abundance.*not found")

  # Insufficient taxonomic columns
  df_no_tax <- data.frame(
    Species   = c("sp1", "sp2"),
    Abundance = c(10, 20),
    stringsAsFactors = FALSE
  )
  expect_error(batch_analysis(df_no_tax), "auto-detect")

  # Specified site_column not found
  expect_error(batch_analysis(df_single, site_column = "Nonexistent"),
               "not found")
})


# ---- Test 11: With tax_columns parameter ----
test_that("works with tax_columns parameter", {
  df_custom_names <- data.frame(
    Tur     = c("sp1", "sp2", "sp3", "sp4"),
    Cins    = c("G1", "G1", "G2", "G2"),
    Familya = c("F1", "F1", "F1", "F2"),
    Takim   = c("O1", "O1", "O1", "O1"),
    Bolluk  = c(10, 20, 15, 5),
    stringsAsFactors = FALSE
  )
  result <- batch_analysis(df_custom_names,
                           tax_columns = c("Tur", "Cins", "Familya", "Takim"),
                           abundance_column = "Bolluk")
  expect_equal(nrow(result), 1)
  expect_true(is.numeric(result$Shannon))
})


# ---- Test 12: Diversity is higher with even distribution ----
test_that("diversity is higher with even distribution", {
  result <- batch_analysis(df_multi)

  # Site B (even distribution) should have higher Shannon
  expect_gt(result$Shannon[result$Site == "B"],
            result$Shannon[result$Site == "A"])
})


# ---- Test 13: Automatic detection with Turkish site name ----
test_that("automatic detection works with site name", {
  df_alan <- df_multi
  names(df_alan)[1] <- "alan"  # lowercase
  result <- batch_analysis(df_alan)
  expect_equal(nrow(result), 2)
})


# ---- Test 14: 3 or more sites ----
test_that("works with 3 sites", {
  df_three <- rbind(
    data.frame(Site = "X", df_single, stringsAsFactors = FALSE),
    data.frame(Site = "Y", df_single, stringsAsFactors = FALSE),
    data.frame(Site = "Z", df_single, stringsAsFactors = FALSE)
  )
  # Different abundance at site Y
  df_three$Abundance[df_three$Site == "Y"] <- c(5, 5, 5, 5)
  df_three$Abundance[df_three$Site == "Z"] <- c(50, 1, 1, 1)

  result <- batch_analysis(df_three)
  expect_equal(nrow(result), 3)
  expect_equal(result$Site, c("X", "Y", "Z"))
})


# ---- Test 15: Zero abundance species should be filtered ----
test_that("zero abundance species are filtered", {
  df_zeros <- data.frame(
    Species   = c("sp1", "sp2", "sp3", "sp4", "sp5"),
    Genus     = c("G1", "G1", "G2", "G2", "G3"),
    Family    = c("F1", "F1", "F1", "F2", "F2"),
    Order     = c("O1", "O1", "O1", "O1", "O1"),
    Abundance = c(10, 20, 15, 5, 0),
    stringsAsFactors = FALSE
  )
  # Should not throw an error — sp5 (abundance = 0) should be skipped
  result <- batch_analysis(df_zeros)
  expect_equal(nrow(result), 1)
  expect_true(is.numeric(result$Shannon))
})