# Validation tests for clustering functions
# Tests for cluster.kmeans and cluster.optim

# =============================================================================
# Test Data Setup
# =============================================================================

create_clustered_data <- function(n_per_cluster = 10, m = 50, seed = 42) {
  set.seed(seed)
  t_grid <- seq(0, 1, length.out = m)

  # Cluster 1: sine-like curves
  data1 <- matrix(0, nrow = n_per_cluster, ncol = m)
  for (i in 1:n_per_cluster) {
    data1[i, ] <- sin(2 * pi * t_grid) + rnorm(m, 0, 0.1)
  }

  # Cluster 2: cosine-like curves
  data2 <- matrix(0, nrow = n_per_cluster, ncol = m)
  for (i in 1:n_per_cluster) {
    data2[i, ] <- cos(2 * pi * t_grid) + rnorm(m, 0, 0.1)
  }

  # Cluster 3: linear curves
  data3 <- matrix(0, nrow = n_per_cluster, ncol = m)
  for (i in 1:n_per_cluster) {
    data3[i, ] <- t_grid + rnorm(m, 0, 0.1)
  }

  data_mat <- rbind(data1, data2, data3)
  true_clusters <- rep(1:3, each = n_per_cluster)

  list(
    fd = fdars::fdata(data_mat, argvals = t_grid),
    X = data_mat,
    t_grid = t_grid,
    true_clusters = true_clusters,
    n = 3 * n_per_cluster,
    m = m
  )
}

# =============================================================================
# cluster.kmeans Tests
# =============================================================================

test_that("cluster.kmeans returns correct structure", {
  data <- create_clustered_data()
  result <- fdars::cluster.kmeans(data$fd, ncl = 3, seed = 123)

  expect_s3_class(result, "cluster.kmeans")
  expect_true("cluster" %in% names(result))
  expect_true("centers" %in% names(result))
  expect_true("size" %in% names(result))
  expect_true("withinss" %in% names(result))
  expect_true("tot.withinss" %in% names(result))
  expect_true("fdataobj" %in% names(result))

  # Check dimensions
  expect_length(result$cluster, data$n)
  expect_s3_class(result$centers, "fdata")
  expect_equal(nrow(result$centers$data), 3)
  expect_equal(ncol(result$centers$data), data$m)
})

test_that("cluster.kmeans cluster assignments are valid", {
  data <- create_clustered_data()
  result <- fdars::cluster.kmeans(data$fd, ncl = 3, seed = 123)

  # Cluster labels should be 1, 2, or 3
  expect_true(all(result$cluster %in% 1:3))

  # All clusters should have at least one member
  expect_equal(length(unique(result$cluster)), 3)
})

test_that("cluster.kmeans is reproducible with seed", {
  data <- create_clustered_data()
  result1 <- fdars::cluster.kmeans(data$fd, ncl = 3, seed = 456)
  result2 <- fdars::cluster.kmeans(data$fd, ncl = 3, seed = 456)

  expect_equal(result1$cluster, result2$cluster)
  expect_equal(result1$centers$data, result2$centers$data)
})

test_that("cluster.kmeans finds reasonable clusters for well-separated data", {
  data <- create_clustered_data()
  result <- fdars::cluster.kmeans(data$fd, ncl = 3, seed = 123)

  # Most curves should be correctly clustered
  # Allow for some misclassification due to random initialization
  # Check that each true cluster maps mostly to one predicted cluster
  for (true_cl in 1:3) {
    idx <- data$true_clusters == true_cl
    pred_clusters <- result$cluster[idx]
    most_common <- as.integer(names(which.max(table(pred_clusters))))
    accuracy <- mean(pred_clusters == most_common)
    expect_gt(accuracy, 0.7)  # At least 70% should be in same cluster
  }
})

test_that("cluster.kmeans with different metrics", {
  data <- create_clustered_data(n_per_cluster = 8)

  # Test L2 metric (default)
  result_l2 <- fdars::cluster.kmeans(data$fd, ncl = 3, metric = "L2", seed = 123)
  expect_s3_class(result_l2, "cluster.kmeans")

  # Test L1 metric
  result_l1 <- fdars::cluster.kmeans(data$fd, ncl = 3, metric = "L1", seed = 123)
  expect_s3_class(result_l1, "cluster.kmeans")

  # Test Linf metric
  result_linf <- fdars::cluster.kmeans(data$fd, ncl = 3, metric = "Linf", seed = 123)
  expect_s3_class(result_linf, "cluster.kmeans")
})

test_that("cluster.kmeans with custom metric function", {
  data <- create_clustered_data(n_per_cluster = 8)

  # Use metric.lp as function
  result <- fdars::cluster.kmeans(data$fd, ncl = 3, metric = fdars::metric.lp, seed = 123)
  expect_s3_class(result, "cluster.kmeans")
  expect_length(result$cluster, data$n)
})

test_that("cluster.kmeans nstart improves results", {
  data <- create_clustered_data()

  # Multiple starts should give more stable results
  result1 <- fdars::cluster.kmeans(data$fd, ncl = 3, nstart = 1, seed = 789)
  result10 <- fdars::cluster.kmeans(data$fd, ncl = 3, nstart = 10, seed = 789)

  # Both should return valid results
  expect_s3_class(result1, "cluster.kmeans")
  expect_s3_class(result10, "cluster.kmeans")
})

# =============================================================================
# cluster.optim Tests
# =============================================================================

test_that("cluster.optim returns correct structure", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 2:5,
                                  criterion = "silhouette", seed = 123)

  expect_s3_class(result, "cluster.optim")
  expect_true("optimal.k" %in% names(result))
  expect_true("criterion" %in% names(result))
  expect_true("scores" %in% names(result))
  expect_true("models" %in% names(result))
  expect_true("best.model" %in% names(result))

  # Check dimensions
  expect_length(result$scores, 4)  # k = 2, 3, 4, 5
  expect_length(result$models, 4)
  expect_s3_class(result$best.model, "cluster.kmeans")
})

test_that("cluster.optim silhouette finds correct k", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 2:6,
                                  criterion = "silhouette", seed = 123)

  # Should find k=3 for well-separated 3-cluster data
  expect_equal(result$optimal.k, 3)

  # Silhouette scores should be between -1 and 1
  expect_true(all(result$scores >= -1 & result$scores <= 1))
})

test_that("cluster.optim Calinski-Harabasz finds correct k", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 2:6,
                                  criterion = "CH", seed = 123)

  # Should find k=3 for well-separated 3-cluster data
  expect_equal(result$optimal.k, 3)

  # CH index should be positive
  expect_true(all(result$scores > 0))
})

test_that("cluster.optim elbow finds reasonable k", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 2:6,
                                  criterion = "elbow", seed = 123)

  # Should find k around 3 for well-separated 3-cluster data
  expect_true(result$optimal.k >= 2 && result$optimal.k <= 4)

  # WCSS should be positive and decreasing
  expect_true(all(result$scores > 0))
  expect_true(all(diff(result$scores) <= 0))
})

test_that("cluster.optim is reproducible with seed", {
  data <- create_clustered_data()
  result1 <- fdars::cluster.optim(data$fd, ncl.range = 2:5,
                                   criterion = "silhouette", seed = 999)
  result2 <- fdars::cluster.optim(data$fd, ncl.range = 2:5,
                                   criterion = "silhouette", seed = 999)

  expect_equal(result1$optimal.k, result2$optimal.k)
  expect_equal(result1$scores, result2$scores)
})

test_that("cluster.optim print method works", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 2:4,
                                  criterion = "silhouette", seed = 123)

  # Print should not error
  expect_output(print(result), "Optimal K-Means Clustering")
  expect_output(print(result), "silhouette")
  expect_output(print(result), "Optimal k:")
})

test_that("cluster.optim plot method works", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 2:4,
                                  criterion = "silhouette", seed = 123)

  # Plot should not error
  expect_no_error(plot(result))
})

test_that("cluster.optim best.model matches optimal.k", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 2:5,
                                  criterion = "silhouette", seed = 123)

  # best.model should have optimal.k clusters
  n_clusters <- length(unique(result$best.model$cluster))
  expect_equal(n_clusters, result$optimal.k)
})

test_that("cluster.optim with different metrics", {
  data <- create_clustered_data(n_per_cluster = 8)

  # Test with L1 metric
  result_l1 <- fdars::cluster.optim(data$fd, ncl.range = 2:4,
                                     criterion = "silhouette",
                                     metric = "L1", seed = 123)
  expect_s3_class(result_l1, "cluster.optim")

  # Test with Linf metric
  result_linf <- fdars::cluster.optim(data$fd, ncl.range = 2:4,
                                       criterion = "silhouette",
                                       metric = "Linf", seed = 123)
  expect_s3_class(result_linf, "cluster.optim")
})

# =============================================================================
# Input Validation Tests
# =============================================================================

test_that("cluster.kmeans rejects non-fdata input", {
  X <- matrix(rnorm(100), 10, 10)
  expect_error(fdars::cluster.kmeans(X, ncl = 2))
})

test_that("cluster.optim rejects non-fdata input", {
  X <- matrix(rnorm(100), 10, 10)
  expect_error(fdars::cluster.optim(X, ncl.range = 2:4))
})

test_that("cluster.optim handles invalid ncl.range gracefully", {
  data <- create_clustered_data()

  # k=1 is filtered out, so 2:3 is used
  result1 <- fdars::cluster.optim(data$fd, ncl.range = 1:3, seed = 123)
  expect_true(min(result1$ncl.range) >= 2)

  # k > n is filtered out
  result2 <- fdars::cluster.optim(data$fd, ncl.range = 2:100, seed = 123)
  expect_true(max(result2$ncl.range) < data$n)
})

test_that("cluster.optim errors when all ncl.range is invalid", {
  # Create tiny dataset
  set.seed(42)
  fd <- fdars::fdata(matrix(rnorm(20), 2, 10))

  # All values in 1:1 are invalid (k=1 not allowed)
  expect_error(fdars::cluster.optim(fd, ncl.range = 1))
})

test_that("cluster.optim rejects invalid criterion", {
  data <- create_clustered_data()
  expect_error(fdars::cluster.optim(data$fd, ncl.range = 2:4,
                                     criterion = "invalid"))
})

# =============================================================================
# Edge Cases
# =============================================================================

test_that("cluster.kmeans with k=2", {
  data <- create_clustered_data()
  result <- fdars::cluster.kmeans(data$fd, ncl = 2, seed = 123)

  expect_s3_class(result, "cluster.kmeans")
  expect_equal(length(unique(result$cluster)), 2)
})

test_that("cluster.kmeans with small dataset", {
  set.seed(42)
  n <- 6
  m <- 20
  t_grid <- seq(0, 1, length.out = m)
  X <- matrix(rnorm(n * m), n, m)
  fd <- fdars::fdata(X, argvals = t_grid)

  result <- fdars::cluster.kmeans(fd, ncl = 2, seed = 123)
  expect_s3_class(result, "cluster.kmeans")
  expect_length(result$cluster, n)
})

test_that("cluster.optim with narrow range", {
  data <- create_clustered_data()
  result <- fdars::cluster.optim(data$fd, ncl.range = 3:4,
                                  criterion = "silhouette", seed = 123)

  expect_s3_class(result, "cluster.optim")
  expect_true(result$optimal.k %in% 3:4)
})

# =============================================================================
# cluster.kmeans Print and Plot Tests
# =============================================================================

test_that("print.cluster.kmeans works", {
  data <- create_clustered_data()
  result <- fdars::cluster.kmeans(data$fd, ncl = 3, seed = 123)

  expect_output(print(result), "K-Means Clustering")
  expect_output(print(result), "clusters")
})

test_that("plot.cluster.kmeans works", {
  data <- create_clustered_data()
  result <- fdars::cluster.kmeans(data$fd, ncl = 3, seed = 123)

  expect_no_error(plot(result))
})

# =============================================================================
# cluster.fcm Tests (Fuzzy C-Means)
# =============================================================================

test_that("cluster.fcm returns correct structure", {
  data <- create_clustered_data()
  result <- fdars::cluster.fcm(data$fd, ncl = 3, seed = 123)

  expect_s3_class(result, "cluster.fcm")
  expect_true("cluster" %in% names(result))
  expect_true("membership" %in% names(result))
  expect_true("centers" %in% names(result))
  expect_true("fdataobj" %in% names(result))

  # Check dimensions
  expect_length(result$cluster, data$n)
  expect_equal(nrow(result$membership), data$n)
  expect_equal(ncol(result$membership), 3)
})

test_that("cluster.fcm membership is valid", {
  data <- create_clustered_data()
  result <- fdars::cluster.fcm(data$fd, ncl = 3, seed = 123)

  # Membership values should be in [0, 1]
  expect_true(all(result$membership >= 0 & result$membership <= 1))

  # Rows should sum to 1
  row_sums <- rowSums(result$membership)
  expect_equal(row_sums, rep(1, data$n), tolerance = 1e-6)
})

test_that("cluster.fcm cluster assignments match max membership", {
  data <- create_clustered_data()
  result <- fdars::cluster.fcm(data$fd, ncl = 3, seed = 123)

  # Each cluster assignment should be the column with max membership
  expected_clusters <- apply(result$membership, 1, which.max)
  expect_equal(result$cluster, expected_clusters)
})

test_that("cluster.fcm is reproducible with seed", {
  data <- create_clustered_data()
  result1 <- fdars::cluster.fcm(data$fd, ncl = 3, seed = 456)
  result2 <- fdars::cluster.fcm(data$fd, ncl = 3, seed = 456)

  expect_equal(result1$cluster, result2$cluster)
  expect_equal(result1$membership, result2$membership)
})

test_that("cluster.fcm fuzziness parameter works", {
  data <- create_clustered_data()

  # Low fuzziness (closer to hard clustering)
  result_low <- fdars::cluster.fcm(data$fd, ncl = 3, m = 1.1, seed = 123)

  # High fuzziness (more fuzzy membership)
  result_high <- fdars::cluster.fcm(data$fd, ncl = 3, m = 3.0, seed = 123)

  expect_s3_class(result_low, "cluster.fcm")
  expect_s3_class(result_high, "cluster.fcm")

  # High fuzziness should have more even membership distribution
  max_membership_low <- max(apply(result_low$membership, 1, max))
  max_membership_high <- max(apply(result_high$membership, 1, max))

  # Generally, higher m should lead to more evenly distributed membership
  # (but this is data-dependent)
})

test_that("cluster.fcm with different fuzziness", {
  data <- create_clustered_data(n_per_cluster = 8)

  result_m2 <- fdars::cluster.fcm(data$fd, ncl = 3, m = 2, seed = 123)
  expect_s3_class(result_m2, "cluster.fcm")

  result_m3 <- fdars::cluster.fcm(data$fd, ncl = 3, m = 3, seed = 123)
  expect_s3_class(result_m3, "cluster.fcm")
})

test_that("print.cluster.fcm works", {
  data <- create_clustered_data()
  result <- fdars::cluster.fcm(data$fd, ncl = 3, seed = 123)

  expect_output(print(result), "Fuzzy C-Means")
  expect_output(print(result), "clusters")
})

test_that("plot.cluster.fcm works", {
  data <- create_clustered_data()
  result <- fdars::cluster.fcm(data$fd, ncl = 3, seed = 123)

  expect_no_error(plot(result))
})

test_that("cluster.fcm with k=2", {
  data <- create_clustered_data()
  result <- fdars::cluster.fcm(data$fd, ncl = 2, seed = 123)

  expect_s3_class(result, "cluster.fcm")
  expect_equal(ncol(result$membership), 2)
})

test_that("cluster.fcm with tolerance parameter", {
  data <- create_clustered_data(n_per_cluster = 8)

  result <- fdars::cluster.fcm(data$fd, ncl = 3, tol = 1e-8, seed = 123)
  expect_s3_class(result, "cluster.fcm")
})

test_that("cluster.fcm rejects non-fdata input", {
  X <- matrix(rnorm(100), 10, 10)
  expect_error(fdars::cluster.fcm(X, ncl = 2))
})

# =============================================================================
# cluster.init Tests
# =============================================================================

test_that("cluster.init returns valid initial centers", {
  data <- create_clustered_data()
  centers <- fdars::cluster.init(data$fd, ncl = 3, seed = 123)

  expect_s3_class(centers, "fdata")
  expect_equal(nrow(centers$data), 3)
  expect_equal(ncol(centers$data), data$m)
})

test_that("cluster.init with L1 metric works", {
  data <- create_clustered_data()
  centers <- fdars::cluster.init(data$fd, ncl = 3, metric = "L1", seed = 123)

  expect_s3_class(centers, "fdata")
  expect_equal(nrow(centers$data), 3)
})

test_that("cluster.init is reproducible with seed", {
  data <- create_clustered_data()
  centers1 <- fdars::cluster.init(data$fd, ncl = 3, seed = 789)
  centers2 <- fdars::cluster.init(data$fd, ncl = 3, seed = 789)

  expect_equal(centers1$data, centers2$data)
})