# test-lpr-hetero.R - Unit tests for heteroscedastic regression (mean + variance)
#
# Author: Ying Wang, Min Li
# Copyright (c) 2020-2025 fastLPR Development Team
# License: GNU General Public License v3.0

context("Heteroscedastic Regression (Mean + Variance)")

test_that("cv_fastlpr estimates mean and variance", {
  # Unified with MATLAB: n=500, h=[0.01,0.5], nh=10
  set.seed(42)
  n <- 500
  x <- matrix(sort(runif(n)), ncol = 1)
  y_true <- sin(2 * pi * x)
  var_true <- 0.1 + 0.3 * x^2
  y <- y_true + sqrt(var_true) * rnorm(n)
  y <- matrix(y, ncol = 1)
  hlist <- matrix(10^seq(log10(0.01), log10(0.5), length.out = 10), ncol = 1)
  opt <- list(order = 1, calc_dof = TRUE, y_type_out = "mean")

  regs <- cv_fastlpr(x, y, hlist, opt)

  expect_true(!is.null(regs$yhat))
  expect_equal(length(regs$yhat), n)
})

test_that("mean prediction is accurate", {
  # Unified with MATLAB/Python: n=500, h=[0.01,0.3], nh=15, heteroscedastic noise
  set.seed(42)
  n <- 500
  x <- matrix(sort(runif(n)), ncol = 1)
  y_true <- sin(2 * pi * x)
  var_true <- 0.1 + 0.3 * x^2
  y <- y_true + sqrt(var_true) * rnorm(n)
  y <- matrix(y, ncol = 1)
  hlist <- matrix(10^seq(log10(0.01), log10(0.3), length.out = 15), ncol = 1)
  opt <- list(order = 1, y_type_out = "mean")

  regs <- cv_fastlpr(x, y, hlist, opt)

  mse <- mean((regs$yhat - y_true)^2)
  expect_true(mse < 0.05)
})

test_that("variance estimates are non-negative", {
  # Unified with MATLAB: n=500, h=[0.02,0.4], nh=10
  # Uses two-step workflow: mean estimation -> squared residuals -> variance estimation
  set.seed(42)
  n <- 500
  x <- matrix(sort(runif(n)), ncol = 1)
  var_true <- 0.1 + 0.5 * x
  y <- x^2 + sqrt(var_true) * rnorm(n)
  y <- matrix(y, ncol = 1)
  hlist <- matrix(10^seq(log10(0.02), log10(0.4), length.out = 10), ncol = 1)

  # Step 1: Mean estimation
  opt_mean <- list(order = 1, y_type_out = "mean")
  regs_mean <- cv_fastlpr(x, y, hlist, opt_mean)

  # Step 2: Compute squared residuals
  residuals_sq <- (y - regs_mean$yhat)^2

  # Step 3: Variance estimation
  opt_var <- list(order = 1, y_type_out = "variance")
  regs_var <- cv_fastlpr(x, residuals_sq, hlist, opt_var)

  expect_true(all(regs_var$yhat >= 0))
})

test_that("2D heteroscedastic variance estimation works", {
  # Unified with MATLAB: n=400, h=[0.05,0.5], nh=5x5
  set.seed(42)
  n <- 400
  x <- matrix(runif(n * 2), ncol = 2)
  y_true <- sin(2 * pi * x[, 1]) * cos(2 * pi * x[, 2])
  var_true <- 0.1 + 0.2 * x[, 1]^2 + 0.2 * x[, 2]^2
  y <- y_true + sqrt(var_true) * rnorm(n)
  y <- matrix(y, ncol = 1)

  hlist <- get_hlist(c(5, 5), list(c(0.05, 0.5), c(0.05, 0.5)))

  # Step 1: Mean estimation
  opt_mean <- list(order = 1, y_type_out = "mean")
  regs_mean <- cv_fastlpr(x, y, hlist, opt_mean)

  # Step 2: Compute squared residuals
  residuals_sq <- (y - regs_mean$yhat)^2

  # Step 3: Variance estimation
  opt_var <- list(order = 1, y_type_out = "variance")
  regs_var <- cv_fastlpr(x, residuals_sq, hlist, opt_var)

  expect_true(!is.null(regs_var$yhat))
  expect_equal(length(regs_var$yhat), n)
  expect_true(all(regs_var$yhat >= 0))
})

test_that("heteroscedastic regression is reproducible", {
  # Unified with MATLAB: n=200, h=[0.1,0.5], nh=5
  set.seed(42)
  n <- 200
  x <- matrix(sort(runif(n)), ncol = 1)
  var_true <- 0.1 + 0.3 * x^2
  y <- sin(2 * pi * x) + sqrt(var_true) * rnorm(n)
  y <- matrix(y, ncol = 1)

  hlist <- matrix(10^seq(log10(0.1), log10(0.5), length.out = 5), ncol = 1)

  # Step 1: Mean estimation (run twice)
  opt_mean <- list(order = 1, y_type_out = "mean")
  regs_mean1 <- cv_fastlpr(x, y, hlist, opt_mean)
  regs_mean2 <- cv_fastlpr(x, y, hlist, opt_mean)

  # Verify mean estimation is reproducible
  expect_equal(regs_mean1$yhat, regs_mean2$yhat)

  # Step 2: Compute squared residuals
  residuals_sq <- (y - regs_mean1$yhat)^2

  # Step 3: Variance estimation (run twice)
  opt_var <- list(order = 1, y_type_out = "variance")
  regs_var1 <- cv_fastlpr(x, residuals_sq, hlist, opt_var)
  regs_var2 <- cv_fastlpr(x, residuals_sq, hlist, opt_var)

  # Verify variance estimation is reproducible
  expect_equal(regs_var1$yhat, regs_var2$yhat)
})