# Tests for Normalization Functions # ============================================================================ # Layer Normalization # ============================================================================ test_that("ggml_norm computes layer normalization", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(1, 2, 3, 4)) result <- ggml_norm(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) # Layer norm: (x - mean) / sqrt(var + eps) input <- c(1, 2, 3, 4) m <- mean(input) v <- var(input) * (length(input) - 1) / length(input) # population variance expected <- (input - m) / sqrt(v + 1e-5) expect_equal(output, expected, tolerance = 1e-4) }) test_that("ggml_norm_inplace works", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(1, 2, 3, 4)) result <- ggml_norm_inplace(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) # Result should be normalized (mean ~ 0, std ~ 1) expect_equal(mean(output), 0, tolerance = 1e-4) expect_equal(sd(output) * sqrt(3/4), 1, tolerance = 1e-4) # adjust for population std }) test_that("ggml_norm on 2D tensor normalizes along first dimension", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) # 4x2 tensor a <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 4, 2) ggml_set_f32(a, c(1, 2, 3, 4, 5, 6, 7, 8)) result <- ggml_norm(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) expect_length(output, 8) expect_false(any(is.na(output))) }) # ============================================================================ # RMS Normalization # ============================================================================ test_that("ggml_rms_norm computes RMS normalization", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(1, 2, 3, 4)) result <- ggml_rms_norm(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) # RMS norm: x / sqrt(mean(x^2) + eps) input <- c(1, 2, 3, 4) rms <- sqrt(mean(input^2) + 1e-5) expected <- input / rms expect_equal(output, expected, tolerance = 1e-4) }) test_that("ggml_rms_norm_inplace works", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(1, 2, 3, 4)) result <- ggml_rms_norm_inplace(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) # After RMS norm, sqrt(mean(output^2)) should be ~1 rms_output <- sqrt(mean(output^2)) expect_equal(rms_output, 1, tolerance = 1e-4) }) test_that("ggml_rms_norm with different epsilon values", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(0.001, 0.002, 0.003, 0.004)) # Small values # With larger epsilon, normalization is more stable result <- ggml_rms_norm(ctx, a, eps = 1e-3) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) expect_false(any(is.na(output))) expect_false(any(is.infinite(output))) }) # ============================================================================ # Group Normalization # ============================================================================ test_that("ggml_group_norm computes group normalization", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) # For group norm, tensor should be at least 2D # Shape: [4, 8] with 2 groups means each group has 4 channels a <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 4, 8) ggml_set_f32(a, rnorm(32)) result <- ggml_group_norm(ctx, a, n_groups = 2, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) expect_length(output, 32) expect_false(any(is.na(output))) expect_false(any(is.infinite(output))) }) test_that("ggml_group_norm_inplace works", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 4, 8) ggml_set_f32(a, rnorm(32)) result <- ggml_group_norm_inplace(ctx, a, n_groups = 2, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) expect_length(output, 32) expect_false(any(is.na(output))) }) # ============================================================================ # L2 Normalization # ============================================================================ test_that("ggml_l2_norm normalizes to unit length", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(3, 0, 4, 0)) # L2 norm = 5 result <- ggml_l2_norm(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) # L2 norm of output should be 1 l2_norm <- sqrt(sum(output^2)) expect_equal(l2_norm, 1, tolerance = 1e-4) # Check values expect_equal(output, c(0.6, 0, 0.8, 0), tolerance = 1e-4) }) test_that("ggml_l2_norm_inplace works", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(1, 2, 2, 4)) # L2 norm = 5 result <- ggml_l2_norm_inplace(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) # L2 norm of output should be 1 l2_norm <- sqrt(sum(output^2)) expect_equal(l2_norm, 1, tolerance = 1e-4) }) test_that("ggml_l2_norm handles near-zero vectors with epsilon", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(0.0001, 0.0001, 0.0001, 0.0001)) # Very small result <- ggml_l2_norm(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) expect_false(any(is.na(output))) expect_false(any(is.infinite(output))) }) # ============================================================================ # RMS Norm Backward (for training) # ============================================================================ test_that("ggml_rms_norm_back computes gradient", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) # Forward input a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(a, c(1, 2, 3, 4)) # Upstream gradient b <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4) ggml_set_f32(b, c(1, 1, 1, 1)) # All ones for simplicity result <- ggml_rms_norm_back(ctx, a, b, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) expect_length(output, 4) expect_false(any(is.na(output))) expect_false(any(is.infinite(output))) }) # ============================================================================ # Edge Cases # ============================================================================ test_that("normalization handles single element", { ctx <- ggml_init(16 * 1024 * 1024) on.exit(ggml_free(ctx)) a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1) ggml_set_f32(a, 5) # RMS norm of single element result <- ggml_rms_norm(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) expect_false(is.na(output)) }) test_that("normalization handles large tensors", { ctx <- ggml_init(64 * 1024 * 1024) on.exit(ggml_free(ctx)) n <- 4096 # Typical hidden size a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n) ggml_set_f32(a, rnorm(n)) result <- ggml_rms_norm(ctx, a, eps = 1e-5) graph <- ggml_build_forward_expand(ctx, result) ggml_graph_compute(ctx, graph) output <- ggml_get_f32(result) rms_output <- sqrt(mean(output^2)) expect_equal(rms_output, 1, tolerance = 1e-3) })