# Test construct_admm_inputs -------------------------------------------------------

test_that("construct_admm_inputs handles basic case with single exact term", {
  # Setup test data
  data <- data.frame(
    x = factor(c("a", "b", "a", "b", "a")),
    y = factor(c("1", "1", "2", "2", "1"))
  )

  # Create formula spec (simulating output from parse_raking_formula)
  formula_spec <- list(
    formula = ~x,
    terms = list(
      list(
        type = "exact",
        variables = "x",
        interaction = NULL
      )
    )
  )

  # Create target values
  target_values <- list(
    targets = list(
      exact_x = c(a = 0.6, b = 0.4)
    )
  )

  # Run function
  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Check structure
  expect_type(result, "list")
  expect_true("design_matrix" %in% names(result))
  expect_true("losses" %in% names(result))

  # Check design matrix
  expect_true(inherits(result$design_matrix, "Matrix"))
  expect_equal(dim(result$design_matrix), c(2, 5)) # 2 levels, 5 samples
  expect_equal(as.vector(result$design_matrix[1, ]), c(1, 0, 1, 0, 1)) # a's
  expect_equal(as.vector(result$design_matrix[2, ]), c(0, 1, 0, 1, 0)) # b's

  # Check losses
  expect_length(result$losses, 1)
  expect_true(all(c("fn", "target", "prox") %in% names(result$losses[[1]])))
  expect_equal(result$losses[[1]]$fn, equality_loss)
  expect_equal(result$losses[[1]]$prox, prox_equality)
  expect_equal(result$losses[[1]]$target, c(a = 0.6, b = 0.4))
})

test_that("construct_admm_inputs handles l2 terms", {
  # Setup test data
  data <- data.frame(
    x = factor(c("a", "b", "a", "b", "a"))
  )

  formula_spec <- list(
    formula = ~x,
    terms = list(
      list(
        type = "l2",
        variables = "x",
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      l2_x = c(a = 0.6, b = 0.4)
    )
  )

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Check loss function type
  expect_equal(result$losses[[1]]$fn, least_squares_loss)
  expect_equal(result$losses[[1]]$prox, prox_least_squares)
})

test_that("construct_admm_inputs handles interactions", {
  # Setup test data
  data <- data.frame(
    x = factor(c("a", "b", "a", "b", "a")),
    y = factor(c("1", "1", "2", "2", "1"))
  )

  formula_spec <- list(
    formula = ~ x:y,
    terms = list(
      list(
        type = "exact",
        variables = c("x", "y"),
        interaction = TRUE
      )
    )
  )

  target_values <- list(
    targets = list(
      "exact_x:y" = c("a:1" = 0.3, "a:2" = 0.3, "b:1" = 0.2, "b:2" = 0.2)
    )
  )

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Check design matrix dimensions (4 interaction levels, 5 samples)
  expect_equal(dim(result$design_matrix), c(4, 5))

  # Verify interaction encoding - must use Matrix::colSums explicitly for sparse matrices
  # to ensure correct method dispatch (base::colSums may not work as expected)
  expect_equal(Matrix::colSums(result$design_matrix), rep(1, 5)) # Each sample in exactly one category
})

test_that("construct_admm_inputs handles multiple terms", {
  # Setup test data
  data <- data.frame(
    x = factor(c("a", "b", "a", "b", "a")),
    y = factor(c("1", "1", "2", "2", "1"))
  )

  formula_spec <- list(
    formula = ~ x + y,
    terms = list(
      list(
        type = "exact",
        variables = "x",
        interaction = NULL
      ),
      list(
        type = "l2",
        variables = "y",
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      exact_x = c(a = 0.6, b = 0.4),
      l2_y = c("1" = 0.6, "2" = 0.4)
    )
  )

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Check structure
  expect_length(result$losses, 2)
  expect_equal(result$losses[[1]]$fn, equality_loss)
  expect_equal(result$losses[[2]]$fn, least_squares_loss)

  # Check design matrix dimensions (2 levels for x + 2 levels for y = 4 rows)
  expect_equal(dim(result$design_matrix), c(4, 5))
})

test_that("construct_admm_inputs errors on missing target values", {
  data <- data.frame(x = factor(c("a", "b")))

  formula_spec <- list(
    formula = ~x,
    terms = list(
      list(
        type = "exact",
        variables = "x",
        interaction = NULL
      )
    )
  )

  target_values <- list(targets = list()) # Empty targets

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "Missing target values for term: x"
  )
})

test_that("construct_admm_inputs handles character variables", {
  # Setup test data with character columns
  data <- data.frame(
    x = c("a", "b", "a", "b", "a"),
    y = c("1", "1", "2", "2", "1"),
    stringsAsFactors = FALSE
  )

  formula_spec <- list(
    formula = ~x,
    terms = list(
      list(
        type = "exact",
        variables = "x",
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      exact_x = c(a = 0.6, b = 0.4)
    )
  )

  # Should convert characters to factors internally
  expect_error(construct_admm_inputs(data, formula_spec, target_values), NA)
})

test_that("construct_admm_inputs errors on unknown term type", {
  data <- data.frame(x = factor(c("a", "b")))

  formula_spec <- list(
    formula = ~x,
    terms = list(
      list(
        type = "unknown",
        variables = "x",
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      unknown_x = c(a = 0.6, b = 0.4)
    )
  )

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "Unknown term type: unknown"
  )
})

test_that("construct_admm_inputs handles mixed exact and l2 loss types", {
  # This test covers the bug where ~ rr_exact(sex) + rr_l2(age) would fail
  # because model.frame with the original formula created columns named
  # "rr_exact(sex)" and "rr_l2(age)" instead of "sex" and "age"
  data <- data.frame(
    sex = factor(c("M", "F", "M", "F", "M")),
    age = factor(c("young", "old", "young", "old", "young"))
  )

  # Simulate what parse_raking_formula returns for ~ rr_exact(sex) + rr_l2(age)
  formula_spec <- list(
    formula = ~ rr_exact(sex) + rr_l2(age), # The wrapped formula
    terms = list(
      list(
        type = "exact",
        variables = "sex", # Variables are unwrapped
        interaction = NULL
      ),
      list(
        type = "l2",
        variables = "age", # Variables are unwrapped
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      exact_sex = c(M = 0.5, F = 0.5),
      l2_age = c(young = 0.6, old = 0.4)
    )
  )

  # Should not error - this was the original bug
  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Verify structure
  expect_type(result, "list")
  expect_true("design_matrix" %in% names(result))
  expect_true("losses" %in% names(result))

  # Verify loss functions are correct for each term
  expect_equal(result$losses[[1]]$fn, equality_loss)
  expect_equal(result$losses[[2]]$fn, least_squares_loss)

  # Verify design matrix dimensions (2 sex levels + 2 age levels = 4 rows, 5 samples)
  expect_equal(dim(result$design_matrix), c(4, 5))
})

test_that("construct_admm_inputs handles continuous variables", {
  # Test that numeric variables get actual values in design matrix, not indicators
  data <- data.frame(
    sex = factor(c("M", "F", "M", "F", "M")),
    income = c(50000, 75000, 45000, 80000, 55000) # Continuous
  )

  formula_spec <- list(
    formula = ~ sex + income,
    terms = list(
      list(
        type = "exact",
        variables = "sex",
        interaction = NULL
      ),
      list(
        type = "exact",
        variables = "income",
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      exact_sex = c(M = 0.5, F = 0.5),
      exact_income = c(mean = 60000) # Target mean income
    )
  )

  # Use normalize = FALSE to test raw values in design matrix
  result <- suppressWarnings(construct_admm_inputs(
    data,
    formula_spec,
    target_values,
    normalize = FALSE
  ))

  # Verify structure
  expect_type(result, "list")
  expect_true("design_matrix" %in% names(result))
  expect_true("losses" %in% names(result))

  # Verify design matrix dimensions (2 sex levels + 1 income row = 3 rows, 5 samples)
  expect_equal(dim(result$design_matrix), c(3, 5))

  # The income row should contain actual values, not 0/1 indicators
  # Row 3 (income) should be [50000, 75000, 45000, 80000, 55000]
  income_row <- as.vector(result$design_matrix[3, ])
  expect_equal(income_row, c(50000, 75000, 45000, 80000, 55000))

  # The sex rows should still be indicators
  expect_equal(as.vector(result$design_matrix[1, ]), c(0, 1, 0, 1, 0)) # F
  expect_equal(as.vector(result$design_matrix[2, ]), c(1, 0, 1, 0, 1)) # M
})

test_that("construct_admm_inputs handles continuous-only formula", {
  # Test case with only continuous variables
  data <- data.frame(
    age = c(25, 35, 45, 30, 40),
    income = c(50000, 75000, 45000, 80000, 55000)
  )

  formula_spec <- list(
    formula = ~ age + income,
    terms = list(
      list(
        type = "exact",
        variables = "age",
        interaction = NULL
      ),
      list(
        type = "exact",
        variables = "income",
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      exact_age = c(mean = 35),
      exact_income = c(mean = 60000)
    )
  )

  # Use normalize = FALSE to test raw values
  result <- construct_admm_inputs(
    data,
    formula_spec,
    target_values,
    normalize = FALSE
  )

  # Design matrix should have 2 rows (one per continuous variable)
  expect_equal(dim(result$design_matrix), c(2, 5))

  # Row 1 (age) should contain actual ages
  expect_equal(as.vector(result$design_matrix[1, ]), c(25, 35, 45, 30, 40))

  # Row 2 (income) should contain actual incomes
  expect_equal(
    as.vector(result$design_matrix[2, ]),
    c(50000, 75000, 45000, 80000, 55000)
  )
})

test_that("construct_admm_inputs normalizes continuous variables by default", {
  # Test that normalize = TRUE (default) scales continuous values by target
  data <- data.frame(
    income = c(50000, 75000, 45000, 80000, 55000)
  )

  formula_spec <- list(
    formula = ~income,
    terms = list(
      list(
        type = "exact",
        variables = "income",
        interaction = NULL
      )
    )
  )

  target_values <- list(
    targets = list(
      exact_income = c(mean = 60000)
    )
  )

  # With normalization (default)
  result_norm <- construct_admm_inputs(data, formula_spec, target_values)

  # Values should be scaled by target (60000)
  expected_normalized <- c(50000, 75000, 45000, 80000, 55000) / 60000
  expect_equal(as.vector(result_norm$design_matrix[1, ]), expected_normalized)

  # Target should be 1.0 (normalized)
  expect_equal(unname(result_norm$losses[[1]]$target), 1.0)
  expect_equal(names(result_norm$losses[[1]]$target), "mean")

  # Original target should be preserved
  expect_equal(unname(result_norm$losses[[1]]$original_target), 60000)

  # Scale factors should be tracked
  expect_true(!is.null(result_norm$scale_factors))
  expect_equal(unname(result_norm$scale_factors[[1]]$scale), 60000)
  expect_equal(result_norm$scale_factors[[1]]$index, 1)
})

test_that("construct_admm_inputs errors on continuous in interaction", {
  data <- data.frame(
    sex = factor(c("M", "F", "M")),
    age = c(25, 35, 45) # Continuous
  )

  formula_spec <- list(
    formula = ~ sex:age,
    terms = list(list(
      type = "exact",
      variables = c("sex", "age"),
      interaction = TRUE
    ))
  )

  target_values <- list(
    targets = list("exact_sex:age" = c(0.25, 0.25, 0.25, 0.25))
  )

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "Interactions with continuous variables are not supported"
  )
})

test_that("construct_admm_inputs handles variance constraint", {
  data <- data.frame(age = c(20, 30, 40, 50, 60))
  # Mean = 40, Variance = sum((x - 40)^2) / 5 = (400+100+0+100+400)/5 = 200

  formula_spec <- list(
    formula = ~age,
    terms = list(list(
      type = "var",
      variables = "age",
      interaction = NULL
    ))
  )

  # Target variance of 250
  target_values <- list(targets = list(var_age = c(var = 250)))

  # Without normalization to check raw values
  result <- suppressWarnings(construct_admm_inputs(
    data,
    formula_spec,
    target_values,
    normalize = FALSE
  ))

  # Verify structure
  expect_type(result, "list")
  expect_true("design_matrix" %in% names(result))
  expect_true("losses" %in% names(result))

  # Design matrix row should be (x - mean(x))^2
  mean_age <- mean(data$age) # 40
  expected <- (data$age - mean_age)^2 # c(400, 100, 0, 100, 400)
  expect_equal(as.vector(result$design_matrix[1, ]), expected)

  # Loss function should use equality (exact match on variance)
  expect_equal(result$losses[[1]]$fn, equality_loss)
  expect_equal(result$losses[[1]]$prox, prox_equality)
  expect_equal(unname(result$losses[[1]]$target), 250)
})

test_that("construct_admm_inputs centers rr_var on target mean when rr_mean exists", {
  data <- data.frame(age = c(20, 30, 40, 50, 60))

  formula_spec <- list(
    formula = ~age,
    terms = list(
      list(type = "exact", variables = "age", interaction = NULL),
      list(type = "var", variables = "age", interaction = NULL)
    )
  )

  target_values <- list(targets = list(
    exact_age = c(mean = 35),
    var_age = c(var = 250)
  ))

  result <- suppressWarnings(construct_admm_inputs(
    data,
    formula_spec,
    target_values,
    normalize = FALSE
  ))

  # Row 1 is mean constraint values, row 2 is variance values centered at target mean 35.
  expect_equal(as.vector(result$design_matrix[2, ]), c(225, 25, 25, 225, 625))
})

test_that("construct_admm_inputs warns and uses sample mean when rr_var has no rr_mean", {
  data <- data.frame(age = c(20, 30, 40, 50, 60))

  formula_spec <- list(
    formula = ~age,
    terms = list(list(
      type = "var",
      variables = "age",
      interaction = NULL
    ))
  )

  target_values <- list(targets = list(var_age = c(var = 250)))

  expect_warning(
    result <- construct_admm_inputs(
      data,
      formula_spec,
      target_values,
      normalize = FALSE
    ),
    "Centering on the sample mean"
  )

  # Centered at sample mean 40.
  expect_equal(as.vector(result$design_matrix[1, ]), c(400, 100, 0, 100, 400))
})

test_that("construct_admm_inputs normalizes variance constraint", {
  data <- data.frame(age = c(20, 30, 40, 50, 60))
  # Mean = 40, (x - mean)^2 = c(400, 100, 0, 100, 400)

  formula_spec <- list(
    formula = ~age,
    terms = list(list(
      type = "var",
      variables = "age",
      interaction = NULL
    ))
  )

  target_values <- list(targets = list(var_age = c(var = 200)))

  # With normalization (default)
  result <- suppressWarnings(construct_admm_inputs(data, formula_spec, target_values))

  # Values should be scaled by target
  mean_age <- mean(data$age)
  raw_values <- (data$age - mean_age)^2
  expected_normalized <- raw_values / 200
  expect_equal(as.vector(result$design_matrix[1, ]), expected_normalized)

  # Target should be 1.0 (normalized)
  expect_equal(unname(result$losses[[1]]$target), 1.0)
  expect_true(tolower(names(result$losses[[1]]$target)) %in% c("var", "variance"))

  # Original target should be preserved
  expect_equal(unname(result$losses[[1]]$original_target), 200)
})

test_that("construct_admm_inputs handles quantile constraint", {
  data <- data.frame(income = c(30000, 40000, 50000, 60000, 70000))
  # Target: 50% of weighted data should be <= 45000

  formula_spec <- list(
    formula = ~income,
    terms = list(list(
      type = "quantile",
      variables = "income",
      interaction = NULL,
      params = list(p = 0.5)
    ))
  )

  # Target quantile value is 45000, probability is 0.5
  target_values <- list(targets = list(quantile_income = c(q50 = 45000)))

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Verify structure
  expect_type(result, "list")
  expect_true("design_matrix" %in% names(result))
  expect_true("losses" %in% names(result))

  # Design matrix row should be indicators I(x <= 45000)
  # income values: 30000, 40000, 50000, 60000, 70000
  # indicators:    1,     1,     0,     0,     0  (30000 <= 45000, 40000 <= 45000)
  expected <- c(1, 1, 0, 0, 0)
  expect_equal(as.vector(result$design_matrix[1, ]), expected)

  # Loss function should use equality
  expect_equal(result$losses[[1]]$fn, equality_loss)
  expect_equal(result$losses[[1]]$prox, prox_equality)

  # Target should be the probability p, not the quantile value
  expect_equal(unname(result$losses[[1]]$target), 0.5)
  expect_equal(unname(result$losses[[1]]$original_target), 0.5)
  expect_equal(tolower(names(result$losses[[1]]$target)), "q50")
  expect_equal(tolower(names(result$losses[[1]]$original_target)), "q50")
})

test_that("construct_admm_inputs errors on multiple quantile targets", {
  data <- data.frame(income = c(30000, 40000, 50000))

  formula_spec <- list(
    formula = ~income,
    terms = list(list(
      type = "quantile",
      variables = "income",
      interaction = NULL,
      params = list(p = 0.5)
    ))
  )

  # Two targets for same variable - should error
  target_values <- list(
    targets = list(
      quantile_income = c(q25 = 35000, q50 = 45000)
    )
  )

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "Quantile constraint requires exactly one target value"
  )
})

# =============================================================================
# Tests for target ordering (bug fix verification)
# =============================================================================
# These tests verify that targets are correctly matched to design matrix rows
# regardless of the order in which levels appear in population_data.
# The design matrix rows follow R's alphabetical factor ordering.

test_that("targets are correctly ordered when pop_data order differs from alphabetical", {

  # Data with levels that would be ordered M, F alphabetically as F, M
  data <- data.frame(
    sex = factor(c("M", "F", "M", "F", "M"))
  )

  formula_spec <- list(
    formula = ~sex,
    terms = list(list(
      type = "exact",
      variables = "sex",
      interaction = NULL
    ))
  )

  # Pop_data style: M first, F second (non-alphabetical)
  target_values <- list(
    targets = list(
      exact_sex = c(M = 0.49, F = 0.51)
    )
  )

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Design matrix rows should be in alphabetical order: F, M
  # Row 1 = F (samples 2, 4), Row 2 = M (samples 1, 3, 5)
  expect_equal(as.vector(result$design_matrix[1, ]), c(0, 1, 0, 1, 0)) # F
  expect_equal(as.vector(result$design_matrix[2, ]), c(1, 0, 1, 0, 1)) # M

  # Targets should be reordered to match: F=0.51, M=0.49
  expect_equal(unname(result$losses[[1]]$target), c(0.51, 0.49))
  expect_equal(names(result$losses[[1]]$target), c("F", "M"))
})

test_that("targets are correctly ordered for multi-level factors", {
  data <- data.frame(
    age = factor(c("55+", "18-34", "35-54", "18-34", "55+"))
  )

  formula_spec <- list(
    formula = ~age,
    terms = list(list(
      type = "exact",
      variables = "age",
      interaction = NULL
    ))
  )

  # Pop_data order: 18-34, 35-54, 55+ (happens to match alphabetical for these)
  # But let's use a different order to test
  target_values <- list(
    targets = list(
      exact_age = c("55+" = 0.37, "18-34" = 0.28, "35-54" = 0.35)
    )
  )

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Alphabetical order: 18-34, 35-54, 55+
  expect_equal(names(result$losses[[1]]$target), c("18-34", "35-54", "55+"))
  expect_equal(unname(result$losses[[1]]$target), c(0.28, 0.35, 0.37))
})

test_that("interaction targets are correctly ordered", {
  data <- data.frame(
    sex = factor(c("M", "F", "M", "F")),
    region = factor(c("S", "N", "N", "S"))
  )

  formula_spec <- list(
    formula = ~ sex:region,
    terms = list(list(
      type = "exact",
      variables = c("sex", "region"),
      interaction = TRUE
    ))
  )

  # Pop_data order: M:N, M:S, F:N, F:S (non-alphabetical)
  target_values <- list(
    targets = list(
      "exact_sex:region" = c("M:N" = 0.20, "M:S" = 0.30, "F:N" = 0.25, "F:S" = 0.25)
    )
  )

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # R orders interactions by iterating through second var levels, then first var levels
  # So: region N (sex F, M), region S (sex F, M) → F:N, M:N, F:S, M:S
  expect_equal(names(result$losses[[1]]$target), c("F:N", "M:N", "F:S", "M:S"))
  expect_equal(unname(result$losses[[1]]$target), c(0.25, 0.20, 0.25, 0.30))
})

test_that("end-to-end raking applies targets to correct levels", {
  # This is the key test: verify that the achieved proportions match

  # the INTENDED targets, not swapped targets
  set.seed(123)
  n <- 500

  # Create sample with known imbalance
  sample_data <- data.frame(
    sex = sample(c("M", "F"), n, replace = TRUE, prob = c(0.7, 0.3))
  )

  # Target: M should be 40%, F should be 60% (opposite of sample)
  pop_data <- data.frame(
    variable = c("sex", "sex"),
    level = c("M", "F"),  # M listed first
    proportion = c(0.40, 0.60)
  )

  result <- regrake(
    data = sample_data,
    formula = ~ rr_exact(sex),
    population_data = pop_data,
    pop_type = "proportions"
  )

  # Verify achieved proportions match INTENDED targets
  w <- result$weights
  achieved_M <- sum(w[sample_data$sex == "M"]) / sum(w)
  achieved_F <- sum(w[sample_data$sex == "F"]) / sum(w)

  # M should be ~40% (down from ~70%), F should be ~60% (up from ~30%)
  expect_equal(achieved_M, 0.40, tolerance = 1e-4)
  expect_equal(achieved_F, 0.60, tolerance = 1e-4)

  # Also verify via balance table
  balance_M <- result$balance$achieved[result$balance$level == "M"]
  balance_F <- result$balance$achieved[result$balance$level == "F"]
  expect_equal(balance_M, 0.40, tolerance = 1e-4)
  expect_equal(balance_F, 0.60, tolerance = 1e-4)
})

test_that("end-to-end raking with interactions applies targets correctly", {
  set.seed(456)
  n <- 500

  sample_data <- data.frame(
    sex = sample(c("M", "F"), n, replace = TRUE, prob = c(0.6, 0.4)),
    region = sample(c("N", "S"), n, replace = TRUE, prob = c(0.7, 0.3))
  )

  # Targets with specific joint distribution
  pop_data <- data.frame(
    variable = c("sex", "sex", "region", "region", rep("sex:region", 4)),
    level = c("M", "F", "N", "S", "M:N", "M:S", "F:N", "F:S"),
    target = c(0.5, 0.5, 0.5, 0.5, 0.20, 0.30, 0.30, 0.20)
  )

  suppressWarnings({
    result <- regrake(
      data = sample_data,
      formula = ~ sex + region + sex:region,
      population_data = pop_data,
      pop_type = "proportions"
    )
  })

  w <- result$weights

  # Verify joint distribution targets are hit correctly
  achieved_MN <- sum(w[sample_data$sex == "M" & sample_data$region == "N"]) / sum(w)
  achieved_MS <- sum(w[sample_data$sex == "M" & sample_data$region == "S"]) / sum(w)
  achieved_FN <- sum(w[sample_data$sex == "F" & sample_data$region == "N"]) / sum(w)
  achieved_FS <- sum(w[sample_data$sex == "F" & sample_data$region == "S"]) / sum(w)

  expect_equal(achieved_MN, 0.20, tolerance = 1e-3)
  expect_equal(achieved_MS, 0.30, tolerance = 1e-3)
  expect_equal(achieved_FN, 0.30, tolerance = 1e-3)
  expect_equal(achieved_FS, 0.20, tolerance = 1e-3)
})

# =============================================================================
# Tests for NA handling
# =============================================================================

test_that("construct_admm_inputs errors when data contains NAs", {
  data <- data.frame(
    sex = factor(c("M", "F", NA, "M")),
    age = c(25, 35, 45, 30)
  )

  formula_spec <- list(
    formula = ~ sex,
    terms = list(list(
      type = "exact",
      variables = "sex",
      interaction = NULL
    ))
  )

  target_values <- list(
    targets = list(exact_sex = c(M = 0.5, F = 0.5))
  )

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "row.*contain missing values"
  )
})

test_that("construct_admm_inputs error message identifies NA variables", {
  data <- data.frame(
    sex = factor(c("M", "F", "M")),
    age = c(25, NA, 45)
  )

  formula_spec <- list(
    formula = ~ sex + age,
    terms = list(
      list(type = "exact", variables = "sex", interaction = NULL),
      list(type = "exact", variables = "age", interaction = NULL)
    )
  )

  target_values <- list(
    targets = list(
      exact_sex = c(M = 0.6, F = 0.4),
      exact_age = c(mean = 35)
    )
  )

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "age"
  )
})

test_that("construct_admm_inputs works with complete data", {
  data <- data.frame(
    sex = factor(c("M", "F", "M")),
    age = c(25, 35, 45)
  )

  formula_spec <- list(
    formula = ~ sex,
    terms = list(list(
      type = "exact",
      variables = "sex",
      interaction = NULL
    ))
  )

  target_values <- list(
    targets = list(exact_sex = c(M = 0.6, F = 0.4))
  )

  # Should not error
  result <- construct_admm_inputs(data, formula_spec, target_values)
  expect_true("design_matrix" %in% names(result))
})

# =============================================================================
# Tests for data/target level validation
# =============================================================================

test_that("construct_admm_inputs errors when data level has no target", {
  data <- data.frame(
    region = factor(c("N", "S", "E", "W")) # E and W exist in data
  )

  formula_spec <- list(
    formula = ~ region,
    terms = list(list(
      type = "exact",
      variables = "region",
      interaction = NULL
    ))
  )

  # Only N and S have targets
  target_values <- list(
    targets = list(exact_region = c(N = 0.5, S = 0.5))
  )

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "Data contains level.*that have no targets.*E.*W"
  )
})

test_that("construct_admm_inputs errors when hard-constraint target level not in data", {
  data <- data.frame(
    region = factor(c("N", "N", "S", "S")) # Only N and S
  )

  formula_spec <- list(
    formula = ~ region,
    terms = list(list(
      type = "exact",
      variables = "region",
      interaction = NULL
    ))
  )

  # Targets include E which doesn't exist in data
  target_values <- list(
    targets = list(exact_region = c(N = 0.4, S = 0.4, E = 0.2))
  )

  expect_error(
    {
      construct_admm_inputs(data, formula_spec, target_values)
    },
    "Infeasible hard constraint.*not present in data.*E"
  )
})

test_that("construct_admm_inputs still warns for soft-constraint unused targets", {
  data <- data.frame(
    region = factor(c("N", "N", "S", "S")) # Only N and S
  )

  formula_spec <- list(
    formula = ~ region,
    terms = list(list(
      type = "l2",
      variables = "region",
      interaction = NULL
    ))
  )

  target_values <- list(
    targets = list(l2_region = c(N = 0.4, S = 0.4, E = 0.2))
  )

  expect_warning(
    construct_admm_inputs(data, formula_spec, target_values),
    "levels not present in data.*E"
  )
})

test_that("construct_admm_inputs validates interaction levels", {
  data <- data.frame(
    sex = factor(c("M", "F")),
    region = factor(c("N", "S"))
  )

  formula_spec <- list(
    formula = ~ sex:region,
    terms = list(list(
      type = "exact",
      variables = c("sex", "region"),
      interaction = TRUE
    ))
  )

  # Missing M:S and F:N combinations in targets
  target_values <- list(
    targets = list("exact_sex:region" = c("M:N" = 0.5, "F:S" = 0.5))
  )

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "that have no targets"
  )
})

# =============================================================================
# Tests for range (inequality) constraints
# =============================================================================

test_that("construct_admm_inputs handles range constraint for continuous variable (margin mode)", {
  data <- data.frame(income = c(50000, 75000, 45000, 80000, 55000))

  formula_spec <- list(
    formula = ~income,
    terms = list(list(
      type = "range",
      variables = "income",
      interaction = NULL,
      params = list(mode = "margin", margin = 5000)
    ))
  )

  target_values <- list(targets = list(range_income = c(mean = 60000)))

  # With normalization (default)
  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Verify loss uses inequality prox
  expect_equal(result$losses[[1]]$fn, inequality_loss)
  expect_equal(result$losses[[1]]$prox, prox_inequality)

  # Verify lower/upper bounds are set (scaled by target)
  # Original: lower = -5000, upper = 5000
  # Normalized: lower = -5000/60000, upper = 5000/60000
  expect_equal(unname(result$losses[[1]]$lower), -5000 / 60000, tolerance = 1e-10)
  expect_equal(unname(result$losses[[1]]$upper), 5000 / 60000, tolerance = 1e-10)

  # Target should be normalized to 1.0
  expect_equal(unname(result$losses[[1]]$target), 1.0)
})

test_that("construct_admm_inputs errors on infeasible hard continuous exact target", {
  data <- data.frame(income = c(50000, 60000, 70000))

  formula_spec <- list(
    formula = ~income,
    terms = list(list(
      type = "exact",
      variables = "income",
      interaction = NULL
    ))
  )

  target_values <- list(targets = list(exact_income = c(mean = 100000)))

  expect_error(
    {
      construct_admm_inputs(data, formula_spec, target_values)
    },
    "outside achievable range"
  )
})

test_that("construct_admm_inputs errors on infeasible hard continuous range target", {
  data <- data.frame(income = c(50000, 60000, 70000))

  formula_spec <- list(
    formula = ~income,
    terms = list(list(
      type = "range",
      variables = "income",
      interaction = NULL,
      params = list(mode = "margin", margin = 1000)
    ))
  )

  target_values <- list(targets = list(range_income = c(mean = 100000)))

  expect_error(
    {
      construct_admm_inputs(data, formula_spec, target_values)
    },
    "does not overlap achievable range"
  )
})

test_that("construct_admm_inputs errors on infeasible quantile target threshold", {
  data <- data.frame(income = c(50000, 60000, 70000))

  formula_spec <- list(
    formula = ~income,
    terms = list(list(
      type = "quantile",
      variables = "income",
      interaction = NULL,
      params = list(p = 0.5)
    ))
  )

  # Threshold below min(income) => indicator is always 0, so p=0.5 is infeasible.
  target_values <- list(targets = list(quantile_income = c(q50 = 10000)))

  expect_error(
    {
      construct_admm_inputs(data, formula_spec, target_values)
    },
    "outside achievable range"
  )
})

test_that("construct_admm_inputs handles range constraint for continuous variable (bounds mode)", {
  data <- data.frame(age = c(30, 35, 40, 45, 50))

  formula_spec <- list(
    formula = ~age,
    terms = list(list(
      type = "range",
      variables = "age",
      interaction = NULL,
      params = list(mode = "bounds", lower = 35, upper = 45)
    ))
  )

  # Target from population data
  target_values <- list(targets = list(range_age = c(mean = 40)))

  result <- construct_admm_inputs(data, formula_spec, target_values, normalize = FALSE)

  # Verify bounds are computed as offsets from target
  # lower = 35 - 40 = -5, upper = 45 - 40 = 5
  expect_equal(unname(result$losses[[1]]$lower), -5)
  expect_equal(unname(result$losses[[1]]$upper), 5)
  expect_equal(unname(result$losses[[1]]$target), 40)
})

test_that("construct_admm_inputs handles range constraint for categorical variable (margin mode)", {
  data <- data.frame(sex = factor(c("M", "F", "M", "F", "M")))

  formula_spec <- list(
    formula = ~sex,
    terms = list(list(
      type = "range",
      variables = "sex",
      interaction = NULL,
      params = list(mode = "margin", margin = 0.05)
    ))
  )

  target_values <- list(targets = list(range_sex = c(F = 0.51, M = 0.49)))

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Verify loss uses inequality prox
  expect_equal(result$losses[[1]]$fn, inequality_loss)
  expect_equal(result$losses[[1]]$prox, prox_inequality)

  # Verify lower/upper bounds (same for all levels with single margin)
  # Targets are reordered to F, M (alphabetical)
  expect_equal(result$losses[[1]]$lower, c(-0.05, -0.05))
  expect_equal(result$losses[[1]]$upper, c(0.05, 0.05))
  expect_equal(unname(result$losses[[1]]$target), c(0.51, 0.49))
})

test_that("construct_admm_inputs handles range constraint with named vector margin", {
  data <- data.frame(sex = factor(c("M", "F", "M", "F", "M")))

  formula_spec <- list(
    formula = ~sex,
    terms = list(list(
      type = "range",
      variables = "sex",
      interaction = NULL,
      params = list(mode = "margin", margin = c(F = 0.02, M = 0.03))
    ))
  )

  target_values <- list(targets = list(range_sex = c(F = 0.51, M = 0.49)))

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Verify level-specific bounds (reordered to F, M)
  expect_equal(unname(result$losses[[1]]$lower), c(-0.02, -0.03))
  expect_equal(unname(result$losses[[1]]$upper), c(0.02, 0.03))
})

test_that("construct_admm_inputs handles range constraint for interaction", {
  data <- data.frame(
    sex = factor(c("M", "F", "M", "F")),
    region = factor(c("N", "S", "S", "N"))
  )

  formula_spec <- list(
    formula = ~ sex:region,
    terms = list(list(
      type = "range",
      variables = c("sex", "region"),
      interaction = TRUE,
      params = list(mode = "margin", margin = 0.02)
    ))
  )

  target_values <- list(
    targets = list("range_sex:region" = c(
      "F:N" = 0.25, "F:S" = 0.25,
      "M:N" = 0.25, "M:S" = 0.25
    ))
  )

  result <- construct_admm_inputs(data, formula_spec, target_values)

  # Verify loss uses inequality prox
  expect_equal(result$losses[[1]]$fn, inequality_loss)
  expect_equal(result$losses[[1]]$prox, prox_inequality)

  # Verify bounds are set for all 4 interaction levels
  expect_length(result$losses[[1]]$lower, 4)
  expect_length(result$losses[[1]]$upper, 4)
  expect_true(all(result$losses[[1]]$lower == -0.02))
  expect_true(all(result$losses[[1]]$upper == 0.02))
})

test_that("construct_admm_inputs errors on named vector margin with missing levels", {
  data <- data.frame(region = factor(c("N", "S", "E")))

  formula_spec <- list(
    formula = ~region,
    terms = list(list(
      type = "range",
      variables = "region",
      interaction = NULL,
      params = list(mode = "margin", margin = c(N = 0.02, S = 0.02))
    ))
  )

  target_values <- list(targets = list(range_region = c(N = 0.4, S = 0.4, E = 0.2)))

  expect_error(
    construct_admm_inputs(data, formula_spec, target_values),
    "Named margin vector missing levels"
  )
})