# Tests for low-level quantization functions library(ggmlR) # ============================================================================ # Basic quantization (q4_0, q4_1, q5_0, q5_1, q8_0) # ============================================================================ test_that("basic quants roundtrip works", { n <- 64 original <- runif(n, -1, 1) # Q4_0 q <- quantize_row_q4_0_ref(original, n) expect_type(q, "raw") d <- dequantize_row_q4_0(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.9) # Q4_1 q <- quantize_row_q4_1_ref(original, n) d <- dequantize_row_q4_1(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.9) # Q5_0 q <- quantize_row_q5_0_ref(original, n) d <- dequantize_row_q5_0(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.9) # Q5_1 q <- quantize_row_q5_1_ref(original, n) d <- dequantize_row_q5_1(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.9) # Q8_0 (higher precision) q <- quantize_row_q8_0_ref(original, n) d <- dequantize_row_q8_0(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.99) # Q8_1 q <- quantize_row_q8_1_ref(original, n) expect_type(q, "raw") }) test_that("quantize with imatrix works", { n <- 64 original <- runif(n, -1, 1) imatrix <- rep(1.0, n) # With imatrix q <- quantize_q4_0(original, 1, n, imatrix) expect_type(q, "raw") d <- dequantize_row_q4_0(q, n) expect_length(d, n) # Without imatrix q <- quantize_q4_0(original, 1, n, NULL) expect_type(q, "raw") # Multiple rows q <- quantize_q4_0(rep(original, 2), 2, n, NULL) expect_type(q, "raw") }) test_that("ggml_quant_block_info works", { info <- ggml_quant_block_info(GGML_TYPE_Q4_0) expect_equal(info$type_name, "q4_0") expect_true(info$is_quantized) expect_equal(info$block_size, 32L) info <- ggml_quant_block_info(GGML_TYPE_F32) expect_equal(info$type_name, "f32") expect_false(info$is_quantized) }) # ============================================================================ # K-quants (q2_K through q8_K) - use 256-element blocks # ============================================================================ test_that("K-quants roundtrip works", { n <- 256 original <- runif(n, -1, 1) # Q2_K q <- quantize_q2_K(original, 1, n, NULL) expect_type(q, "raw") d <- dequantize_row_q2_K(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.7) # Q3_K q <- quantize_q3_K(original, 1, n, NULL) d <- dequantize_row_q3_K(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.8) # Q4_K q <- quantize_q4_K(original, 1, n, NULL) d <- dequantize_row_q4_K(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.9) # Q5_K q <- quantize_q5_K(original, 1, n, NULL) d <- dequantize_row_q5_K(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.95) # Q6_K q <- quantize_q6_K(original, 1, n, NULL) d <- dequantize_row_q6_K(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.98) }) test_that("K-quants row_ref works", { n <- 256 original <- runif(n, -1, 1) expect_type(quantize_row_q2_K_ref(original, n), "raw") expect_type(quantize_row_q3_K_ref(original, n), "raw") expect_type(quantize_row_q4_K_ref(original, n), "raw") expect_type(quantize_row_q5_K_ref(original, n), "raw") expect_type(quantize_row_q6_K_ref(original, n), "raw") # Q8_K dequantize q <- quantize_row_q8_K_ref(original, n) d <- dequantize_row_q8_K(q, n) expect_length(d, n) expect_gt(cor(original, d), 0.99) }) # ============================================================================ # Ternary quants (TQ1_0, TQ2_0) # ============================================================================ test_that("ternary quants work", { skip_if(Sys.getenv("GGMLR_SLOW_TESTS") != "1", "Slow test - set GGMLR_SLOW_TESTS=1 to run") n <- 256 original <- runif(n, -1, 1) # TQ1_0 (type 34) ggml_quantize_init(34L) q <- quantize_row_tq1_0_ref(original, n) expect_type(q, "raw") d <- dequantize_row_tq1_0(q, n) expect_length(d, n) ggml_quantize_free() # TQ2_0 (type 35) ggml_quantize_init(35L) q <- quantize_row_tq2_0_ref(original, n) expect_type(q, "raw") d <- dequantize_row_tq2_0(q, n) expect_length(d, n) ggml_quantize_free() }) # ============================================================================ # IQ quants (IQ3, IQ4) - require ggml_quantize_init # ============================================================================ test_that("IQ3/IQ4 quants work", { skip_if(Sys.getenv("GGMLR_SLOW_TESTS") != "1", "Slow test - set GGMLR_SLOW_TESTS=1 to run") n <- 256 original <- runif(n, -1, 1) # IQ3_XXS (type 18) ggml_quantize_init(18L) q <- quantize_row_iq3_xxs_ref(original, n) expect_type(q, "raw") d <- dequantize_row_iq3_xxs(q, n) expect_length(d, n) ggml_quantize_free() # IQ3_S (type 21) ggml_quantize_init(21L) q <- quantize_row_iq3_s_ref(original, n) expect_type(q, "raw") d <- dequantize_row_iq3_s(q, n) expect_length(d, n) ggml_quantize_free() # IQ4_NL (type 20) ggml_quantize_init(20L) q <- quantize_row_iq4_nl_ref(original, n) expect_type(q, "raw") d <- dequantize_row_iq4_nl(q, n) expect_length(d, n) ggml_quantize_free() # IQ4_XS (type 23) ggml_quantize_init(23L) q <- quantize_row_iq4_xs_ref(original, n) expect_type(q, "raw") d <- dequantize_row_iq4_xs(q, n) expect_length(d, n) ggml_quantize_free() # IQ2_S (type 22) ggml_quantize_init(22L) q <- quantize_row_iq2_s_ref(original, n) expect_type(q, "raw") d <- dequantize_row_iq2_s(q, n) expect_length(d, n) ggml_quantize_free() }) # ============================================================================ # IQ2/IQ1 quants - require iq2xs_init_impl + importance matrix # ============================================================================ test_that("IQ2_XXS quant with imatrix works", { skip_if(Sys.getenv("GGMLR_SLOW_TESTS") != "1", "Slow test - set GGMLR_SLOW_TESTS=1 to run") n <- 256 original <- runif(n, -1, 1) imatrix <- rep(1.0, n) iq2xs_init_impl(16L) ggml_quantize_init(16L) q <- quantize_iq2_xxs(original, 1, n, imatrix) expect_type(q, "raw") d <- dequantize_row_iq2_xxs(q, n) expect_length(d, n) ggml_quantize_free() iq2xs_free_impl(16L) }) test_that("IQ2_XS quant with imatrix works", { skip_if(Sys.getenv("GGMLR_SLOW_TESTS") != "1", "Slow test - set GGMLR_SLOW_TESTS=1 to run") n <- 256 original <- runif(n, -1, 1) imatrix <- rep(1.0, n) iq2xs_init_impl(17L) ggml_quantize_init(17L) q <- quantize_iq2_xs(original, 1, n, imatrix) expect_type(q, "raw") d <- dequantize_row_iq2_xs(q, n) expect_length(d, n) ggml_quantize_free() iq2xs_free_impl(17L) }) test_that("IQ1_S quant with imatrix works", { skip_if(Sys.getenv("GGMLR_SLOW_TESTS") != "1", "Slow test - set GGMLR_SLOW_TESTS=1 to run") n <- 256 original <- runif(n, -1, 1) imatrix <- rep(1.0, n) iq2xs_init_impl(19L) ggml_quantize_init(19L) q <- quantize_iq1_s(original, 1, n, imatrix) expect_type(q, "raw") d <- dequantize_row_iq1_s(q, n) expect_length(d, n) ggml_quantize_free() iq2xs_free_impl(19L) }) test_that("IQ1_M quant with imatrix works", { skip_if(Sys.getenv("GGMLR_SLOW_TESTS") != "1", "Slow test - set GGMLR_SLOW_TESTS=1 to run") n <- 256 original <- runif(n, -1, 1) imatrix <- rep(1.0, n) iq2xs_init_impl(29L) ggml_quantize_init(29L) q <- quantize_iq1_m(original, 1, n, imatrix) expect_type(q, "raw") d <- dequantize_row_iq1_m(q, n) expect_length(d, n) ggml_quantize_free() iq2xs_free_impl(29L) }) # ============================================================================ # MXFP4 # ============================================================================ test_that("MXFP4 quantize/dequantize works", { n <- 64 original <- runif(n, -1, 1) q <- quantize_row_mxfp4_ref(original, n) expect_type(q, "raw") d <- dequantize_row_mxfp4(q, n) expect_length(d, n) }) # ============================================================================ # Additional quantize functions # ============================================================================ test_that("quantize functions with imatrix work", { n <- 64 original <- runif(n, -1, 1) expect_type(quantize_q4_1(original, 1, n, NULL), "raw") expect_type(quantize_q5_0(original, 1, n, NULL), "raw") expect_type(quantize_q5_1(original, 1, n, NULL), "raw") expect_type(quantize_q8_0(original, 1, n, NULL), "raw") }) test_that("different quant sizes comparison", { n <- 256 original <- runif(n, -1, 1) q4 <- quantize_q4_0(original, 1, n, NULL) q8 <- quantize_q8_0(original, 1, n, NULL) # Q8 should be larger than Q4 expect_gt(length(q8), length(q4)) })