# Tests for the empirical distribution class

test_that("empirical_dist constructor creates valid univariate object from vector", {
  # Given: A numeric vector
  data <- c(1, 2, 3, 4, 5)

  # When: Creating an empirical distribution
  e <- empirical_dist(data)

  # Then: Object has correct class hierarchy
  expect_s3_class(e, "empirical_dist")
  expect_s3_class(e, "univariate_dist")
  expect_s3_class(e, "discrete_dist")
  expect_s3_class(e, "dist")
})

test_that("empirical_dist constructor creates valid multivariate object from matrix", {
  # Given: A numeric matrix
  data <- matrix(1:6, nrow = 3, ncol = 2)

  # When: Creating an empirical distribution
  e <- empirical_dist(data)

  # Then: Object has correct class hierarchy
  expect_s3_class(e, "empirical_dist")
  expect_s3_class(e, "multivariate_dist")
  expect_s3_class(e, "discrete_dist")
  expect_s3_class(e, "dist")
})

test_that("empirical_dist constructor validates non-empty data", {
  expect_error(empirical_dist(c()))
  expect_error(empirical_dist(numeric(0)))
})

test_that("is_empirical_dist identifies empirical_dist objects correctly", {
  e <- empirical_dist(1:5)

  expect_true(is_empirical_dist(e))
  expect_false(is_empirical_dist(list(data = 1:5)))
  expect_false(is_empirical_dist(normal()))
})

test_that("dim.empirical_dist returns correct dimensionality", {
  # Univariate
  e1 <- empirical_dist(1:5)
  expect_equal(dim(e1), 1)

  # Multivariate
  e2 <- empirical_dist(matrix(1:10, nrow = 5, ncol = 2))
  expect_equal(dim(e2), 2)
})

test_that("nobs.empirical_dist returns correct number of observations", {
  e <- empirical_dist(1:10)
  expect_equal(nobs(e), 10)

  e2 <- empirical_dist(matrix(1:20, nrow = 5, ncol = 4))
  expect_equal(nobs(e2), 5)
})

test_that("obs.empirical_dist returns the underlying data", {
  data <- c(1, 2, 3, 4, 5)
  e <- empirical_dist(data)

  result <- obs(e)
  expect_equal(as.vector(result), data)
})

test_that("mean.empirical_dist returns correct mean for univariate", {
  data <- c(1, 2, 3, 4, 5)
  e <- empirical_dist(data)

  expect_equal(mean(e), 3)
})

test_that("mean.empirical_dist returns correct column means for multivariate", {
  data <- matrix(c(1, 2, 3, 10, 20, 30), nrow = 3, ncol = 2)
  e <- empirical_dist(data)

  expect_equal(mean(e), c(2, 20))
})

test_that("vcov.empirical_dist returns correct variance for univariate", {
  data <- c(1, 2, 3, 4, 5)
  e <- empirical_dist(data)

  # vcov returns a matrix even for univariate data (cov() behavior)
  expect_equal(as.numeric(vcov(e)), var(data), tolerance = 1e-10)
})

test_that("vcov.empirical_dist returns correct covariance matrix for multivariate", {
  data <- matrix(c(1, 2, 3, 10, 20, 30), nrow = 3, ncol = 2)
  e <- empirical_dist(data)

  expect_equal(vcov(e), cov(data), tolerance = 1e-10)
})

test_that("params.empirical_dist returns NULL", {
  e <- empirical_dist(1:5)

  expect_null(params(e))
})

test_that("nparams.empirical_dist returns 0", {
  e <- empirical_dist(1:5)

  expect_equal(nparams(e), 0)
})

test_that("sampler.empirical_dist returns a function that resamples from data", {
  data <- c(1, 2, 3, 4, 5)
  e <- empirical_dist(data)
  samp_fn <- sampler(e)

  # The sampler should return a function
  expect_type(samp_fn, "closure")

  # Samples should be from the original data
  set.seed(42)
  samples <- samp_fn(100)
  expect_true(all(samples %in% data))
})

test_that("sampler.empirical_dist generates correct number of samples", {
  e <- empirical_dist(1:10)
  samp_fn <- sampler(e)

  samples <- samp_fn(50)
  expect_length(samples, 50)
})

test_that("sampler.empirical_dist works for multivariate data", {
  data <- matrix(1:10, nrow = 5, ncol = 2)
  e <- empirical_dist(data)
  samp_fn <- sampler(e)

  samples <- samp_fn(100)
  expect_true(is.matrix(samples))
  expect_equal(nrow(samples), 100)
  expect_equal(ncol(samples), 2)
})

test_that("density.empirical_dist returns correct probabilities", {
  data <- c(1, 1, 2, 3, 3, 3)
  e <- empirical_dist(data)
  pdf <- density(e)

  # 1 appears 2 times out of 6
  expect_equal(pdf(1), 2 / 6)
  # 2 appears 1 time out of 6
  expect_equal(pdf(2), 1 / 6)
  # 3 appears 3 times out of 6
  expect_equal(pdf(3), 3 / 6)
  # 4 never appears
  expect_equal(pdf(4), 0)
})

test_that("density.empirical_dist handles log argument correctly", {
  data <- c(1, 1, 2)
  e <- empirical_dist(data)
  pdf <- density(e)

  expect_equal(pdf(1, log = TRUE), log(2 / 3), tolerance = 1e-10)
})

test_that("cdf.empirical_dist returns empirical CDF function", {
  data <- c(1, 2, 3, 4, 5)
  e <- empirical_dist(data)
  cdf_fn <- cdf(e)

  # Check CDF values at data points
  expect_equal(cdf_fn(1), 0.2)
  expect_equal(cdf_fn(3), 0.6)
  expect_equal(cdf_fn(5), 1.0)
  expect_equal(cdf_fn(0), 0)
})

test_that("cdf.empirical_dist errors for multivariate distribution", {
  data <- matrix(1:6, nrow = 3, ncol = 2)
  e <- empirical_dist(data)

  expect_error(cdf(e))
})

test_that("sup.empirical_dist returns finite_set of observed values", {
  data <- c(1, 2, 3)
  e <- empirical_dist(data)
  s <- sup(e)

  expect_s3_class(s, "finite_set")
  expect_true(s$has(1))
  expect_true(s$has(2))
  expect_true(s$has(3))
  expect_false(s$has(4))
})

test_that("marginal.empirical_dist returns correct marginal distribution", {
  data <- matrix(c(1, 2, 3, 10, 20, 30), nrow = 3, ncol = 2)
  e <- empirical_dist(data)

  # Marginal of first column
  marg <- marginal(e, 1)

  expect_s3_class(marg, "empirical_dist")
  expect_equal(dim(marg), 1)
  expect_equal(as.vector(obs(marg)), c(1, 2, 3))
})

test_that("marginal.empirical_dist validates indices", {
  e <- empirical_dist(matrix(1:6, nrow = 3, ncol = 2))

  expect_error(marginal(e, 0))
  expect_error(marginal(e, 3))
})

test_that("conditional.empirical_dist filters data by predicate", {
  data <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
  e <- empirical_dist(data)

  # Condition on values > 5
  cond_e <- conditional(e, function(x) x > 5)

  expect_s3_class(cond_e, "empirical_dist")
  expect_equal(nobs(cond_e), 5)
  expect_true(all(obs(cond_e) > 5))
})

test_that("conditional.empirical_dist works with multivariate data", {
  data <- matrix(c(1, 2, 3, 4, 10, 20, 30, 40), nrow = 4, ncol = 2)
  e <- empirical_dist(data)

  # Condition on first column > 2
  cond_e <- conditional(e, function(x) x[1] > 2)

  expect_equal(nobs(cond_e), 2)
})

test_that("rmap.empirical_dist applies function to observations", {
  data <- c(1, 2, 3, 4, 5)
  e <- empirical_dist(data)

  # Square each observation
  mapped <- rmap(e, function(x) x^2)

  expect_s3_class(mapped, "empirical_dist")
  expect_equal(as.vector(obs(mapped)), c(1, 4, 9, 16, 25))
})

test_that("rmap.empirical_dist handles dimension changes", {
  data <- matrix(c(1, 2, 3, 10, 20, 30), nrow = 3, ncol = 2)
  e <- empirical_dist(data)

  # Sum across columns (reduces to univariate)
  mapped <- rmap(e, function(x) sum(x))

  expect_s3_class(mapped, "empirical_dist")
  expect_equal(dim(mapped), 1)
  expect_equal(as.vector(obs(mapped)), c(11, 22, 33))
})

test_that("expectation.empirical_dist computes correct expectation", {
  data <- c(1, 2, 3, 4, 5)
  e <- empirical_dist(data)

  # E[X] = mean
  expect_equal(expectation(e), mean(data))

  # E[X^2]
  expect_equal(expectation(e, function(x) x^2), mean(data^2))
})

test_that("expectation.empirical_dist computes stats when requested", {
  data <- 1:100
  e <- empirical_dist(data)

  result <- expectation(e, control = list(compute_stats = TRUE))

  expect_type(result, "list")
  expect_true("value" %in% names(result))
  expect_true("ci" %in% names(result))
})