# =============================================================================
# Transformation & Pipeline Tests (TRANS, META, PIPE families)
#
# Tests clean_unicef_data(), filter_unicef_data(), and post-production
# transformations using deterministic fixtures. No network access required.
#
# Fixtures: tests/fixtures/deterministic/ (shared across Python/R/Stata)
# Uses helper-fixtures.R for path resolution (auto-loaded by testthat)
# =============================================================================

# FIXTURES_DIR provided by helper-fixtures.R
FIXTURES_DIR <- get_fixtures_dir()

# ===========================================================================
# PIPE-01: clean_unicef_data column renaming
# ===========================================================================

test_that("PIPE-01: clean_unicef_data renames SDMX columns", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)
  cleaned <- unicefData:::clean_unicef_data(df)

  expect_true("iso3" %in% names(cleaned), info = "REF_AREA -> iso3")
  expect_true("period" %in% names(cleaned), info = "TIME_PERIOD -> period")
  expect_true("value" %in% names(cleaned), info = "OBS_VALUE -> value")
  expect_true("indicator" %in% names(cleaned), info = "INDICATOR -> indicator")
  expect_true("sex" %in% names(cleaned), info = "SEX -> sex")
})

# ===========================================================================
# PIPE-02: Period conversion
# ===========================================================================

test_that("PIPE-02: period is numeric after cleaning", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2015_2023.csv"),
                 stringsAsFactors = FALSE)
  cleaned <- unicefData:::clean_unicef_data(df)

  expect_true(is.numeric(cleaned$period))
  expect_equal(min(cleaned$period), 2015)
  expect_equal(max(cleaned$period), 2023)
})

# ===========================================================================
# PIPE-03: Value conversion
# ===========================================================================

test_that("PIPE-03: value is numeric after cleaning", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)
  cleaned <- unicefData:::clean_unicef_data(df)

  expect_true(is.numeric(cleaned$value))
  # USA U5MR should be ~6.47
  total_val <- cleaned$value[cleaned$sex == "_T"]
  expect_equal(total_val, 6.4688, tolerance = 0.01)
})

# ===========================================================================
# PIPE-04: filter_unicef_data sex filter
# ===========================================================================

test_that("PIPE-04: filter by sex=_T keeps only totals", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)
  # filter_unicef_data expects SDMX column names (SEX not sex)
  filtered <- unicefData:::filter_unicef_data(df, sex = "_T", verbose = FALSE)

  expect_true(all(filtered$SEX == "_T"),
              info = "Only _T rows should remain after sex=_T filter")
  expect_equal(nrow(filtered), 1)
})

test_that("PIPE-04: filter by sex=M keeps only male", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)
  filtered <- unicefData:::filter_unicef_data(df, sex = "M", verbose = FALSE)

  expect_true(all(filtered$SEX == "M"))
  expect_equal(nrow(filtered), 1)
})

# ===========================================================================
# PIPE-05: geo_type assignment
# ===========================================================================

test_that("PIPE-05: geo_type = 0 for country codes", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)
  cleaned <- unicefData:::clean_unicef_data(df)

  if ("geo_type" %in% names(cleaned)) {
    expect_true(all(cleaned$geo_type == 0),
                info = "USA should have geo_type=0 (country, not region)")
  }
})

# ===========================================================================
# PIPE-06: Standard column order
# ===========================================================================

test_that("PIPE-06: standard column order after cleaning", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_BRA_2020.csv"),
                 stringsAsFactors = FALSE)
  cleaned <- unicefData:::clean_unicef_data(df)

  # Core columns should all be present (order may vary due to country join)
  core_cols <- c("indicator", "indicator_name", "iso3", "country",
                 "geo_type", "period", "value")
  for (col in core_cols) {
    expect_true(col %in% names(cleaned), info = paste("Missing core column:", col))
  }
})

# ===========================================================================
# PIPE-07: Multi-country cleaning preserves all countries
# ===========================================================================

test_that("PIPE-07: multi-country cleaning preserves all", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_multi_2018_2023.csv"),
                 stringsAsFactors = FALSE)
  cleaned <- unicefData:::clean_unicef_data(df)

  expect_equal(length(unique(cleaned$iso3)), 5)
  expect_true("USA" %in% cleaned$iso3)
  expect_true("BRA" %in% cleaned$iso3)
})

# ===========================================================================
# PIPE-08: Empty DataFrame handling
# ===========================================================================

test_that("PIPE-08: empty DataFrame returns 0 rows", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)
  empty <- df[df$REF_AREA == "NONEXISTENT", ]
  expect_equal(nrow(empty), 0)

  cleaned <- unicefData:::clean_unicef_data(empty)
  expect_equal(nrow(cleaned), 0)
})

# ===========================================================================
# DL-08: Wealth quintile data structure
# ===========================================================================

test_that("DL-08: wealth quintile values in BRA fixture", {
  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_BRA_sex_2020.csv"),
                 stringsAsFactors = FALSE)

  wq <- unique(df$WEALTH_QUINTILE)
  expect_true("_T" %in% wq, info = "Total wealth quintile should be present")
  # BRA fixture has Q1-Q5 wealth quintiles
  expect_true(any(grepl("^Q[1-5]$", wq)),
              info = "Should have Q1-Q5 wealth quintile values")
})

test_that("DL-08: Q1 (poorest) > Q5 (richest) mortality", {
  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_BRA_sex_2020.csv"),
                 stringsAsFactors = FALSE)

  q1 <- df$OBS_VALUE[df$WEALTH_QUINTILE == "Q1"]
  q5 <- df$OBS_VALUE[df$WEALTH_QUINTILE == "Q5"]
  if (length(q1) > 0 && length(q5) > 0) {
    expect_gt(q1[1], q5[1],
              label = "Poorest quintile should have higher U5MR than richest")
  }
})

# ===========================================================================
# EDGE-02: Single-observation stability
# ===========================================================================

test_that("EDGE-02: single-row cleaning works", {
  skip_if_not_installed("unicefData")

  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)
  single <- df[df$SEX == "_T", ]
  expect_equal(nrow(single), 1)

  cleaned <- unicefData:::clean_unicef_data(single)
  expect_equal(nrow(cleaned), 1)
  expect_true("iso3" %in% names(cleaned))
})

# ===========================================================================
# EDGE-03: Special characters
# ===========================================================================

test_that("EDGE-03: unit of measure with comma parses correctly", {
  df <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                 stringsAsFactors = FALSE)

  if ("Unit.of.measure" %in% names(df)) {
    unit <- df$Unit.of.measure[1]
    expect_true(grepl("1,000|1000", unit))
  }
})

# ===========================================================================
# Cross-dataflow column schema difference
# ===========================================================================

test_that("CME vs IMMUNISATION have different column schemas", {
  cme <- read.csv(file.path(FIXTURES_DIR, "CME_MRY0T4_USA_2020_pinning.csv"),
                  stringsAsFactors = FALSE)
  imm <- read.csv(file.path(FIXTURES_DIR, "IM_MCV1_USA_BRA_2015_2023.csv"),
                  stringsAsFactors = FALSE)

  # CME has WEALTH_QUINTILE, IMMUNISATION has VACCINE
  cme_only <- setdiff(names(cme), names(imm))
  imm_only <- setdiff(names(imm), names(cme))

  # They should not be identical schemas

  expect_gt(length(cme_only) + length(imm_only), 0,
            label = "CME and IMMUNISATION should have different columns")
})