# Test Phase 3: Kernels, Smoothing, Clustering, and Utilities

# =============================================================================
# Kernel Function Tests
# =============================================================================

test_that("Ker.norm is a proper kernel", {
  u <- seq(-3, 3, length.out = 100)
  k <- Ker.norm(u)

  # Non-negative
  expect_true(all(k >= 0))

  # Symmetric
  expect_equal(k[1:50], rev(k[51:100]), tolerance = 1e-10)

  # Integrates to approximately 1 (for standard normal)
  integral <- sum(k * diff(c(u[1], u))) # simple approximation
  expect_lt(abs(integral - 1), 0.1)
})

test_that("Ker.epa has correct support", {
  u_inside <- c(-0.5, 0, 0.5)
  u_outside <- c(-1.5, 1.5)

  expect_true(all(Ker.epa(u_inside) > 0))
  expect_equal(Ker.epa(u_outside), c(0, 0))
})

test_that("Ker.tri has correct support", {
  u_inside <- c(-0.5, 0, 0.5)
  u_outside <- c(-1.5, 1.5)

  expect_true(all(Ker.tri(u_inside) > 0))
  expect_equal(Ker.tri(u_outside), c(0, 0))
})

test_that("Ker.quar has correct support", {
  u_inside <- c(-0.5, 0, 0.5)
  u_outside <- c(-1.5, 1.5)

  expect_true(all(Ker.quar(u_inside) > 0))
  expect_equal(Ker.quar(u_outside), c(0, 0))
})

test_that("Ker.cos has correct support", {
  u_inside <- c(-0.5, 0, 0.5)
  u_outside <- c(-1.5, 1.5)

  expect_true(all(Ker.cos(u_inside) > 0))
  expect_equal(Ker.cos(u_outside), c(0, 0))
})

test_that("Ker.unif has correct support", {
  u_inside <- c(-0.5, 0, 0.5)
  u_outside <- c(-1.5, 1.5)

  expect_equal(Ker.unif(u_inside), c(0.5, 0.5, 0.5))
  expect_equal(Ker.unif(u_outside), c(0, 0))
})

test_that("Asymmetric kernels are one-sided", {
  u_positive <- c(0, 0.5, 0.9)
  u_negative <- c(-0.5, -0.1)

  expect_true(all(AKer.norm(u_positive) > 0))
  expect_equal(AKer.norm(u_negative), c(0, 0))

  expect_true(all(AKer.epa(u_positive) > 0))
  expect_equal(AKer.epa(u_negative), c(0, 0))
})

test_that("Integrated kernels range from 0 to 1", {
  u <- seq(-1.5, 1.5, length.out = 100)

  for (ker in c("norm", "epa", "tri", "quar", "cos", "unif")) {
    ik <- Kernel.integrate(u, ker)
    # All values should be between 0 and 1
    expect_true(all(ik >= 0 & ik <= 1))
    # Should be monotonically increasing
    expect_true(all(diff(ik) >= -1e-10))
  }

  # Test boundary values specifically at -1 and 1
  u_bounds <- c(-1, 0, 1)
  for (ker in c("epa", "tri", "quar", "cos", "unif")) {
    ik <- Kernel.integrate(u_bounds, ker)
    expect_equal(ik[1], 0, tolerance = 0.01)  # At -1
    expect_equal(ik[3], 1, tolerance = 0.01)  # At 1
  }
})

test_that("Kernel wrapper works for all types", {
  u <- c(-0.5, 0, 0.5)

  expect_equal(Kernel(u, "norm"), Ker.norm(u))
  expect_equal(Kernel(u, "epa"), Ker.epa(u))
  expect_equal(Kernel(u, "tri"), Ker.tri(u))
  expect_equal(Kernel(u, "quar"), Ker.quar(u))
  expect_equal(Kernel(u, "cos"), Ker.cos(u))
  expect_equal(Kernel(u, "unif"), Ker.unif(u))
})

# =============================================================================
# Smoothing Function Tests
# =============================================================================

test_that("S.NW produces valid smoother matrix", {
  tt <- seq(0, 1, length.out = 20)
  h <- 0.1
  S <- S.NW(tt, h)

  # Square matrix
  expect_equal(dim(S), c(20, 20))

  # Rows sum to 1
  row_sums <- rowSums(S)
  expect_equal(row_sums, rep(1, 20), tolerance = 1e-10)

  # Non-negative
  expect_true(all(S >= 0))
})

test_that("S.LLR produces valid smoother matrix", {
  tt <- seq(0, 1, length.out = 20)
  h <- 0.15
  S <- S.LLR(tt, h)

  # Square matrix
  expect_equal(dim(S), c(20, 20))

  # Rows sum to 1 (linear regression preserves means)
  row_sums <- rowSums(S)
  expect_equal(row_sums, rep(1, 20), tolerance = 1e-8)
})

test_that("S.LPR with p=0 equals S.NW", {
  tt <- seq(0, 1, length.out = 20)
  h <- 0.15

  S_nw <- S.NW(tt, h, Ker = "norm")
  S_lpr0 <- S.LPR(tt, h, p = 0, Ker = "norm")

  expect_equal(S_nw, S_lpr0, tolerance = 1e-10)
})

test_that("S.LPR with p=1 equals S.LLR", {
  tt <- seq(0, 1, length.out = 20)
  h <- 0.15

  S_llr <- S.LLR(tt, h, Ker = "norm")
  S_lpr1 <- S.LPR(tt, h, p = 1, Ker = "norm")

  expect_equal(S_llr, S_lpr1, tolerance = 1e-10)
})

test_that("S.LCR calls S.LPR with p=3", {
  tt <- seq(0, 1, length.out = 20)
  h <- 0.2

  S_lcr <- S.LCR(tt, h)
  S_lpr3 <- S.LPR(tt, h, p = 3)

  expect_equal(S_lcr, S_lpr3)
})

test_that("S.KNN produces valid smoother matrix", {
  tt <- seq(0, 1, length.out = 20)
  S <- S.KNN(tt, knn = 5)

  expect_equal(dim(S), c(20, 20))
  row_sums <- rowSums(S)
  expect_equal(row_sums, rep(1, 20), tolerance = 1e-10)
})

test_that("CV smoother has zero diagonal", {
  tt <- seq(0, 1, length.out = 20)
  h <- 0.15
  S_cv <- S.NW(tt, h, cv = TRUE)

  expect_equal(diag(S_cv), rep(0, 20))
})

test_that("h.default returns reasonable bandwidth", {
  tt <- seq(0, 1, length.out = 50)
  h <- h.default(tt)

  expect_true(h > 0)
  expect_true(h < 1)  # Should be smaller than the domain range
})

test_that("CV.S computes cross-validation score", {
  tt <- seq(0, 1, length.out = 50)
  y <- sin(2 * pi * tt) + rnorm(50, sd = 0.1)

  cv <- CV.S(S.NW, tt, h = 0.1, y = y)
  expect_true(is.numeric(cv))
  expect_true(cv > 0)
})

test_that("GCV.S computes GCV score", {
  tt <- seq(0, 1, length.out = 50)
  y <- sin(2 * pi * tt) + rnorm(50, sd = 0.1)

  gcv <- GCV.S(S.NW, tt, h = 0.1, y = y)
  expect_true(is.numeric(gcv))
  expect_true(gcv > 0)
})

# =============================================================================
# Clustering Tests
# =============================================================================

test_that("cluster.kmeans clusters correctly", {
  set.seed(123)
  t <- seq(0, 1, length.out = 50)
  n <- 30

  # Create two distinct groups
  X <- matrix(0, n, 50)
  true_cluster <- rep(1:2, each = 15)
  for (i in 1:n) {
    if (true_cluster[i] == 1) {
      X[i, ] <- sin(2*pi*t) + rnorm(50, sd = 0.1)
    } else {
      X[i, ] <- cos(2*pi*t) + rnorm(50, sd = 0.1)
    }
  }
  fd <- fdata(X, argvals = t)

  result <- cluster.kmeans(fd, ncl = 2, seed = 42)

  # Check structure
  expect_s3_class(result, "cluster.kmeans")
  expect_length(result$cluster, n)
  expect_equal(nrow(result$centers$data), 2)
  expect_length(result$size, 2)
  expect_equal(sum(result$size), n)

  # Clusters should be 1 or 2
  expect_true(all(result$cluster %in% c(1, 2)))

  # Should separate the groups reasonably well
  # (allowing for some misclassification due to randomness)
  tab <- table(result$cluster, true_cluster)
  accuracy <- max(sum(diag(tab)), sum(diag(tab[2:1, ]))) / n
  expect_gt(accuracy, 0.7)
})

test_that("cluster.kmeans handles single cluster", {
  t <- seq(0, 1, length.out = 50)
  X <- matrix(rnorm(20 * 50), 20, 50)
  fd <- fdata(X, argvals = t)

  result <- cluster.kmeans(fd, ncl = 1, seed = 123)

  expect_equal(unique(result$cluster), 1)
  expect_equal(result$size, 20)
})

test_that("cluster.init returns correct number of centers", {
  t <- seq(0, 1, length.out = 50)
  X <- matrix(rnorm(20 * 50), 20, 50)
  fd <- fdata(X, argvals = t)

  centers <- cluster.init(fd, ncl = 3, seed = 123)

  expect_equal(nrow(centers$data), 3)
})

# =============================================================================
# Utility Function Tests
# =============================================================================

test_that("int.simpson integrates correctly", {
  t <- seq(0, 2*pi, length.out = 100)

  # Integral of sin from 0 to 2*pi is 0
  X1 <- matrix(sin(t), nrow = 1)
  fd1 <- fdata(X1, argvals = t)
  expect_equal(int.simpson(fd1)[1], 0, tolerance = 0.01)

  # Integral of cos^2 from 0 to 2*pi is pi
  X2 <- matrix(cos(t)^2, nrow = 1)
  fd2 <- fdata(X2, argvals = t)
  expect_equal(int.simpson(fd2)[1], pi, tolerance = 0.05)
})

test_that("inprod.fdata computes inner products", {
  t <- seq(0, 2*pi, length.out = 100)

  # sin and cos are orthogonal on [0, 2*pi]
  X1 <- matrix(sin(t), nrow = 1)
  X2 <- matrix(cos(t), nrow = 1)
  fd1 <- fdata(X1, argvals = t)
  fd2 <- fdata(X2, argvals = t)

  ip <- inprod.fdata(fd1, fd2)
  expect_equal(ip[1, 1], 0, tolerance = 0.1)

  # sin with itself: integral of sin^2 from 0 to 2*pi is pi
  ip_self <- inprod.fdata(fd1, fd1)
  expect_equal(ip_self[1, 1], pi, tolerance = 0.05)
})

test_that("inprod.fdata handles multiple curves", {
  t <- seq(0, 1, length.out = 50)
  X1 <- matrix(rnorm(10 * 50), 10, 50)
  X2 <- matrix(rnorm(5 * 50), 5, 50)
  fd1 <- fdata(X1, argvals = t)
  fd2 <- fdata(X2, argvals = t)

  ip <- inprod.fdata(fd1, fd2)
  expect_equal(dim(ip), c(10, 5))
})

test_that("r.ou generates valid functional data", {
  t <- seq(0, 1, length.out = 50)
  fd <- r.ou(n = 10, t = t, seed = 123)

  expect_s3_class(fd, "fdata")
  expect_equal(nrow(fd$data), 10)
  expect_equal(ncol(fd$data), 50)
})

test_that("r.brownian generates valid functional data", {
  t <- seq(0, 1, length.out = 50)
  fd <- r.brownian(n = 10, t = t, seed = 123)

  expect_s3_class(fd, "fdata")
  expect_equal(fd$data[, 1], rep(0, 10))  # Starts at 0
})

test_that("r.bridge returns to zero at end", {
  t <- seq(0, 1, length.out = 50)
  fd <- r.bridge(n = 10, t = t, seed = 123)

  expect_s3_class(fd, "fdata")
  # Brownian bridge should end near 0
  expect_true(all(abs(fd$data[, 50]) < 0.5))
})

test_that("Prediction metrics work correctly", {
  y_true <- c(1, 2, 3, 4, 5)
  y_pred <- c(1, 2, 3, 4, 5)  # Perfect prediction

  expect_equal(pred.MAE(y_true, y_pred), 0)
  expect_equal(pred.MSE(y_true, y_pred), 0)
  expect_equal(pred.RMSE(y_true, y_pred), 0)
  expect_equal(pred.R2(y_true, y_pred), 1)
})

test_that("Prediction metrics handle imperfect predictions", {
  y_true <- c(1, 2, 3, 4, 5)
  y_pred <- c(1.1, 2.2, 2.9, 4.1, 4.8)

  expect_gt(pred.MAE(y_true, y_pred), 0)
  expect_gt(pred.MSE(y_true, y_pred), 0)
  expect_gt(pred.RMSE(y_true, y_pred), 0)
  expect_lt(pred.R2(y_true, y_pred), 1)
  expect_gt(pred.R2(y_true, y_pred), 0.9)  # Still a good fit
})

# =============================================================================
# Integration Tests - Smoothing with fdata
# =============================================================================

test_that("Smoothing can be applied to fdata", {
  set.seed(123)
  t <- seq(0, 1, length.out = 50)
  y <- sin(2 * pi * t) + rnorm(50, sd = 0.2)
  fd <- fdata(matrix(y, nrow = 1), argvals = t)

  # Compute smoother and apply
  S <- S.NW(t, h = 0.1)
  y_smooth <- as.vector(S %*% y)

  # Smoothed curve should be closer to true curve
  true_y <- sin(2 * pi * t)
  error_original <- mean((y - true_y)^2)
  error_smooth <- mean((y_smooth - true_y)^2)

  expect_lt(error_smooth, error_original)
})

test_that("optim.np finds reasonable bandwidth", {
  set.seed(123)
  t <- seq(0, 1, length.out = 50)
  y <- sin(2 * pi * t) + rnorm(50, sd = 0.1)
  fd <- fdata(matrix(y, nrow = 1), argvals = t)

  result <- optim.np(fd, S.NW)

  expect_true(result$h.opt > 0)
  expect_true(result$h.opt < 1)
})

# =============================================================================
# Additional Smoothing Edge Cases
# =============================================================================

test_that("S.NW warns on custom kernel function", {
  tt <- seq(0, 1, length.out = 20)
  expect_warning(S.NW(tt, h = 0.1, Ker = function(u) dnorm(u)), "Custom kernel")
})

test_that("S.NW with explicit weights", {
  tt <- seq(0, 1, length.out = 20)
  w <- rep(1, 20)
  w[1:5] <- 2  # higher weight for first 5 points
  S <- S.NW(tt, h = 0.1, w = w)

  expect_equal(dim(S), c(20, 20))
  expect_true(all(S >= 0))
})

test_that("S.NW with different kernel types", {
  tt <- seq(0, 1, length.out = 20)
  for (ker in c("norm", "epa", "tri", "quar", "cos", "unif")) {
    S <- S.NW(tt, h = 0.15, Ker = ker)
    expect_equal(dim(S), c(20, 20))
    row_sums <- rowSums(S)
    expect_equal(row_sums, rep(1, 20), tolerance = 1e-8)
  }
})

test_that("S.LPR with p=2 produces valid smoother", {
  tt <- seq(0, 1, length.out = 20)
  S <- S.LPR(tt, h = 0.2, p = 2)

  expect_equal(dim(S), c(20, 20))
  row_sums <- rowSums(S)
  expect_equal(row_sums, rep(1, 20), tolerance = 1e-6)
})

test_that("S.KNN with varying knn values", {
  tt <- seq(0, 1, length.out = 30)

  S5 <- S.KNN(tt, knn = 5)
  S15 <- S.KNN(tt, knn = 15)

  expect_equal(dim(S5), c(30, 30))
  expect_equal(dim(S15), c(30, 30))

  # Larger knn should produce smoother matrix (more non-zero entries per row)
  expect_true(sum(S15 > 1e-10) >= sum(S5 > 1e-10))
})

test_that("h.default works with fdata input", {
  t <- seq(0, 1, length.out = 50)
  X <- matrix(rnorm(50), 1, 50)
  fd <- fdata(X, argvals = t)

  h <- h.default(fd)
  expect_true(h > 0)
  expect_true(h < 1)
})

test_that("optim.np with criterion='CV'", {
  set.seed(123)
  t <- seq(0, 1, length.out = 50)
  y <- sin(2 * pi * t) + rnorm(50, sd = 0.1)
  fd <- fdata(matrix(y, nrow = 1), argvals = t)

  result <- optim.np(fd, S.NW, criterion = "CV")
  expect_true(result$h.opt > 0)
  expect_equal(result$criterion, "CV")
})

test_that("optim.np validates input", {
  expect_error(optim.np(matrix(1:10, 2, 5), S.NW), "fdata")
})

test_that("GCV.S returns Inf for degenerate case", {
  # Very small h relative to grid spacing should make trace(S) ~ n
  tt <- seq(0, 1, length.out = 10)
  y <- rnorm(10)
  gcv <- GCV.S(S.NW, tt, h = 1e-8, y = y)
  expect_true(is.infinite(gcv) || gcv > 1e10)
})

# =============================================================================
# Additional Kernel Edge Cases
# =============================================================================

test_that("Kernel.integrate matches individual IKer functions", {
  u <- c(-1.5, -0.5, 0, 0.5, 1.5)

  expect_equal(Kernel.integrate(u, "norm"), IKer.norm(u))
  expect_equal(Kernel.integrate(u, "epa"), IKer.epa(u))
  expect_equal(Kernel.integrate(u, "tri"), IKer.tri(u))
  expect_equal(Kernel.integrate(u, "quar"), IKer.quar(u))
  expect_equal(Kernel.integrate(u, "cos"), IKer.cos(u))
  expect_equal(Kernel.integrate(u, "unif"), IKer.unif(u))
})

test_that("Kernel rejects invalid type", {
  expect_error(Kernel(0, type.Ker = "invalid"))
})

test_that("Kernel.asymmetric rejects invalid type", {
  expect_error(Kernel.asymmetric(0, type.Ker = "invalid"))
})

test_that("Kernel.integrate rejects invalid type", {
  expect_error(Kernel.integrate(0, Ker = "invalid"))
})

test_that("Asymmetric kernels integrate to approximately 1 on [0,1]", {
  u <- seq(0, 1, length.out = 1000)
  du <- 1 / (length(u) - 1)

  for (ker_fn in list(AKer.epa, AKer.tri, AKer.quar, AKer.cos, AKer.unif)) {
    integral <- sum(ker_fn(u)) * du
    expect_true(abs(integral - 1) < 0.05)
  }
})

test_that("Symmetric kernels are symmetric: K(u) == K(-u)", {
  u <- c(0.1, 0.3, 0.5, 0.7, 0.9)

  for (ker_fn in list(Ker.norm, Ker.epa, Ker.tri, Ker.quar, Ker.cos, Ker.unif)) {
    expect_equal(ker_fn(u), ker_fn(-u))
  }
})

# =============================================================================
# Additional Utility Edge Cases
# =============================================================================

test_that("inprod.fdata self inner product with NULL fdata2", {
  t <- seq(0, 2 * pi, length.out = 100)
  X <- matrix(sin(t), nrow = 1)
  fd1 <- fdata(X, argvals = t)

  ip <- inprod.fdata(fd1, NULL)
  expect_equal(dim(ip), c(1, 1))
  expect_equal(ip[1, 1], pi, tolerance = 0.1)
})

test_that("inprod.fdata errors on non-fdata", {
  expect_error(inprod.fdata(matrix(1:10, 2, 5)), "fdata")
})

test_that("inprod.fdata errors on incompatible argvals", {
  fd1 <- fdata(matrix(1:10, 1, 10), argvals = 1:10)
  fd2 <- fdata(matrix(1:5, 1, 5), argvals = 1:5)
  expect_error(inprod.fdata(fd1, fd2), "same number")
})

test_that("int.simpson errors on 2D fdata", {
  X <- array(rnorm(2 * 3 * 4), dim = c(2, 3, 4))
  fd2d <- fdata(X, argvals = list(1:3, 1:4), fdata2d = TRUE)
  expect_error(int.simpson(fd2d), "2D")
})

test_that("r.ou is reproducible with seed", {
  t <- seq(0, 1, length.out = 50)
  fd1 <- r.ou(n = 5, t = t, seed = 42)
  fd2 <- r.ou(n = 5, t = t, seed = 42)
  expect_equal(fd1$data, fd2$data)
})

test_that("r.ou with custom parameters", {
  t <- seq(0, 1, length.out = 50)
  fd <- r.ou(n = 10, t = t, mu = 5, theta = 3, sigma = 0.5, x0 = 1, seed = 42)

  expect_s3_class(fd, "fdata")
  expect_equal(fd$data[, 1], rep(1, 10))  # starts at x0
})

test_that("r.brownian starts at custom x0", {
  t <- seq(0, 1, length.out = 50)
  fd <- r.brownian(n = 5, t = t, x0 = 3, seed = 42)

  expect_equal(fd$data[, 1], rep(3, 5))
})

test_that("r.bridge is reproducible with seed", {
  t <- seq(0, 1, length.out = 50)
  fd1 <- r.bridge(n = 5, t = t, seed = 42)
  fd2 <- r.bridge(n = 5, t = t, seed = 42)
  expect_equal(fd1$data, fd2$data)
})

test_that("pred.RMSE equals sqrt(pred.MSE)", {
  y_true <- c(1, 2, 3, 4, 5)
  y_pred <- c(1.1, 2.3, 2.8, 4.2, 5.1)

  expect_equal(pred.RMSE(y_true, y_pred), sqrt(pred.MSE(y_true, y_pred)))
})