# Chain tests: Quantized ops # DequantizeLinear, QuantizeLinear, QLinearConv, QLinearAdd, # QLinearMatMul, QLinearSigmoid, QLinearConcat # # Pattern: quantized inference pipeline (INT8 simulated as F32) run_onnx <- function(path, inputs, device = "cpu") { m <- onnx_load(path, device = device) res <- onnx_run(m, inputs) res[[1]] } # Helper: make scalar F32 tensor initializer make_scalar <- function(name, val) { raw <- .float_bytes(val) t <- .onnx_tensor(name, c(1L), 1L, raw) vi <- .onnx_value_info(name, 1L, c(1L)) list(t = t, vi = vi) } # ── Minimal (2 ops): DequantizeLinear → Relu ──────────────── test_that("chain quant: DequantizeLinear→Relu (minimal)", { # Dequant: y = (x - zp) * scale, then Relu inp <- .onnx_value_info("X", 1L, c(4L)) outp <- .onnx_value_info("Y", 1L, c(4L)) sc <- make_scalar("sc", 0.1) zp <- make_scalar("zp", 128.0) dq_node <- .onnx_node("DequantizeLinear", c("X", "sc", "zp"), "dq") relu_node <- .onnx_node("Relu", "dq", "Y") graph <- .onnx_graph("test", list(dq_node, relu_node), list(inp, sc$vi, zp$vi), list(outp), list(sc$t, zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) # Simulated uint8 values as floats: 200, 100, 128, 255 x <- c(200, 100, 128, 255) result <- run_onnx(path, list(X = x)) r <- as.numeric(result) # (x - 128) * 0.1 = 7.2, -2.8, 0.0, 12.7 # After Relu: 7.2, 0, 0, 12.7 expected <- pmax((x - 128) * 0.1, 0) expect_equal(r, expected, tolerance = 1e-3) }) # ── Minimal (2 ops): QuantizeLinear → DequantizeLinear ────── test_that("chain quant: QuantizeLinear→DequantizeLinear (round-trip)", { inp <- .onnx_value_info("X", 1L, c(4L)) outp <- .onnx_value_info("Y", 1L, c(4L)) sc <- make_scalar("sc", 0.5) zp <- make_scalar("zp", 10.0) # QuantizeLinear: y = x / scale + zp q_node <- .onnx_node("QuantizeLinear", c("X", "sc", "zp"), "q") # DequantizeLinear: y = (x - zp) * scale dq_node <- .onnx_node("DequantizeLinear", c("q", "sc", "zp"), "Y") graph <- .onnx_graph("test", list(q_node, dq_node), list(inp, sc$vi, zp$vi), list(outp), list(sc$t, zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) x <- c(1.0, 2.5, -1.0, 0.0) result <- run_onnx(path, list(X = x)) r <- as.numeric(result) # Round-trip: (x/0.5 + 10 - 10) * 0.5 = x (no rounding in our impl) expect_equal(r, x, tolerance = 1e-3) }) # ── QLinearMatMul: quantized matmul ───────────────────────── test_that("chain quant: QLinearMatMul (quantized matmul)", { # A[2,3] @ B[3,2] → Y[2,2], all with scale/zp inp <- .onnx_value_info("X", 1L, c(2L, 3L)) outp <- .onnx_value_info("Y", 1L, c(2L, 2L)) a_sc <- make_scalar("a_sc", 0.1) a_zp <- make_scalar("a_zp", 0.0) # B weight: [3, 2] stored as simulated quant values b_data <- c(10, 20, 30, 10, 20, 30) b_raw <- unlist(lapply(b_data, .float_bytes)) b_t <- .onnx_tensor("B", c(3L, 2L), 1L, b_raw) b_vi <- .onnx_value_info("B", 1L, c(3L, 2L)) b_sc <- make_scalar("b_sc", 0.1) b_zp <- make_scalar("b_zp", 0.0) y_sc <- make_scalar("y_sc", 1.0) y_zp <- make_scalar("y_zp", 0.0) qmm_node <- .onnx_node("QLinearMatMul", c("X", "a_sc", "a_zp", "B", "b_sc", "b_zp", "y_sc", "y_zp"), "Y") graph <- .onnx_graph("test", list(qmm_node), list(inp, a_sc$vi, a_zp$vi, b_vi, b_sc$vi, b_zp$vi, y_sc$vi, y_zp$vi), list(outp), list(a_sc$t, a_zp$t, b_t, b_sc$t, b_zp$t, y_sc$t, y_zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) # A (simulated quant): scale=0.1, zp=0 → dequant = A*0.1 # B: scale=0.1, zp=0 → dequant = B*0.1 # Y = (A*0.1) @ (B*0.1) / 1.0 + 0 x <- c(10, 20, 30, 40, 50, 60) result <- run_onnx(path, list(X = x)) r <- as.numeric(result) expect_equal(length(r), 4) # dA = [[1,2,3],[4,5,6]], dB = [[1,2],[3,1],[2,3]] # dA @ dB = [[1+9+6, 2+3+9],[4+15+12, 8+5+18]] = [[16,14],[31,31]] # requant: /1+0 = same # Note: ggml matmul layout may differ, just check finite & reasonable expect_true(all(is.finite(r))) expect_true(all(abs(r) < 100)) }) # ── QLinearAdd: quantized residual ────────────────────────── test_that("chain quant: QLinearAdd (quantized residual)", { inp_a <- .onnx_value_info("A", 1L, c(4L)) inp_b <- .onnx_value_info("B", 1L, c(4L)) outp <- .onnx_value_info("Y", 1L, c(4L)) a_sc <- make_scalar("a_sc", 0.1) a_zp <- make_scalar("a_zp", 128.0) b_sc <- make_scalar("b_sc", 0.2) b_zp <- make_scalar("b_zp", 128.0) y_sc <- make_scalar("y_sc", 0.1) y_zp <- make_scalar("y_zp", 128.0) qadd_node <- .onnx_node("QLinearAdd", c("A", "a_sc", "a_zp", "B", "b_sc", "b_zp", "y_sc", "y_zp"), "Y") graph <- .onnx_graph("test", list(qadd_node), list(inp_a, a_sc$vi, a_zp$vi, inp_b, b_sc$vi, b_zp$vi, y_sc$vi, y_zp$vi), list(outp), list(a_sc$t, a_zp$t, b_sc$t, b_zp$t, y_sc$t, y_zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) a <- c(138, 148, 118, 128) # dequant: (a-128)*0.1 = 1.0, 2.0, -1.0, 0.0 b <- c(133, 128, 138, 123) # dequant: (b-128)*0.2 = 1.0, 0.0, 2.0, -1.0 # sum: 2.0, 2.0, 1.0, -1.0 # requant: sum/0.1 + 128 = 148, 148, 138, 118 result <- run_onnx(path, list(A = a, B = b)) r <- as.numeric(result) expected <- c(148, 148, 138, 118) expect_equal(r, expected, tolerance = 0.5) }) # ── QLinearSigmoid: quantized activation ──────────────────── test_that("chain quant: QLinearSigmoid (quantized sigmoid)", { inp <- .onnx_value_info("X", 1L, c(4L)) outp <- .onnx_value_info("Y", 1L, c(4L)) x_sc <- make_scalar("x_sc", 0.1) x_zp <- make_scalar("x_zp", 128.0) y_sc <- make_scalar("y_sc", 1.0) y_zp <- make_scalar("y_zp", 0.0) qs_node <- .onnx_node("QLinearSigmoid", c("X", "x_sc", "x_zp", "y_sc", "y_zp"), "Y") graph <- .onnx_graph("test", list(qs_node), list(inp, x_sc$vi, x_zp$vi, y_sc$vi, y_zp$vi), list(outp), list(x_sc$t, x_zp$t, y_sc$t, y_zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) # Input quant values: 128=0.0, 138=1.0, 118=-1.0, 178=5.0 x <- c(128, 138, 118, 178) result <- run_onnx(path, list(X = x)) r <- as.numeric(result) # dequant: (x-128)*0.1 = 0, 1, -1, 5 # sigmoid: 0.5, 0.731, 0.269, 0.993 # requant: /1+0 = same expected <- 1 / (1 + exp(-c(0, 1, -1, 5))) expect_equal(r, expected, tolerance = 0.01) }) # ── QLinearConcat: quantized concat ───────────────────────── test_that("chain quant: QLinearConcat (quantized concat)", { # Concat two [4] vectors → [8] inp_a <- .onnx_value_info("A", 1L, c(4L)) inp_b <- .onnx_value_info("B", 1L, c(4L)) outp <- .onnx_value_info("Y", 1L, c(8L)) y_sc <- make_scalar("y_sc", 0.1) y_zp <- make_scalar("y_zp", 0.0) a_sc <- make_scalar("a_sc", 0.1) a_zp <- make_scalar("a_zp", 0.0) b_sc <- make_scalar("b_sc", 0.2) b_zp <- make_scalar("b_zp", 0.0) # QLinearConcat inputs: y_scale, y_zp, x1, x1_scale, x1_zp, x2, x2_scale, x2_zp qcat_node <- .onnx_node("QLinearConcat", c("y_sc", "y_zp", "A", "a_sc", "a_zp", "B", "b_sc", "b_zp"), "Y", attrs = list(.onnx_attr_int("axis", 0L))) graph <- .onnx_graph("test", list(qcat_node), list(inp_a, inp_b, y_sc$vi, y_zp$vi, a_sc$vi, a_zp$vi, b_sc$vi, b_zp$vi), list(outp), list(y_sc$t, y_zp$t, a_sc$t, a_zp$t, b_sc$t, b_zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) a <- c(10, 20, 30, 40) # dequant: a*0.1 = 1,2,3,4 b <- c(5, 10, 15, 20) # dequant: b*0.2 = 1,2,3,4 result <- run_onnx(path, list(A = a, B = b)) r <- as.numeric(result) expect_equal(length(r), 8) # dequant a=[1,2,3,4], dequant b=[1,2,3,4] # concat: [1,2,3,4,1,2,3,4] # requant: /0.1 + 0 = [10,20,30,40,10,20,30,40] expect_equal(r, c(10, 20, 30, 40, 10, 20, 30, 40), tolerance = 0.5) }) # ── Real (4 ops): DequantizeLinear → Conv → Sigmoid → QuantizeLinear ── test_that("chain quant: Dequant→Conv→Sigmoid→Quant (INT8 pipeline)", { # Simulates quantized inference: dequant input → conv → sigmoid → requant # Input: [1, 2, 4, 4] (need 2+ channels for ggml 4D weight detection) inp <- .onnx_value_info("X", 1L, c(1L, 2L, 4L, 4L)) outp <- .onnx_value_info("Y", 1L, c(1L, 2L, 4L, 4L)) # DequantizeLinear params dq_sc <- make_scalar("dq_sc", 0.1) dq_zp <- make_scalar("dq_zp", 128.0) # Conv 2→2, 1x1 kernel (identity-like) w_data <- c(1, 0, 0, 1) # 2x2 identity across channels w_raw <- unlist(lapply(w_data, .float_bytes)) w_t <- .onnx_tensor("W", c(2L, 2L, 1L, 1L), 1L, w_raw) w_vi <- .onnx_value_info("W", 1L, c(2L, 2L, 1L, 1L)) # QuantizeLinear params q_sc <- make_scalar("q_sc", 0.01) q_zp <- make_scalar("q_zp", 128.0) dq_node <- .onnx_node("DequantizeLinear", c("X", "dq_sc", "dq_zp"), "dq") conv_node <- .onnx_node("Conv", c("dq", "W"), "conv", attrs = list(.onnx_attr_ints("kernel_shape", c(1L, 1L)))) sig_node <- .onnx_node("Sigmoid", "conv", "sig") q_node <- .onnx_node("QuantizeLinear", c("sig", "q_sc", "q_zp"), "Y") graph <- .onnx_graph("test", list(dq_node, conv_node, sig_node, q_node), list(inp, dq_sc$vi, dq_zp$vi, w_vi, q_sc$vi, q_zp$vi), list(outp), list(dq_sc$t, dq_zp$t, w_t, q_sc$t, q_zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) set.seed(42) x <- runif(32, 100, 156) # simulated uint8 around 128 result <- run_onnx(path, list(X = x)) r <- as.numeric(result) expect_equal(length(r), 32) expect_true(all(is.finite(r))) # After sigmoid→quantize, values should be around 128 + sigmoid/0.01 expect_true(all(r > 50 & r < 250)) }) # ── Boundary: zero-point = 0, scale = 1 (passthrough) ────── test_that("chain quant: scale=1 zp=0 passthrough (boundary)", { inp <- .onnx_value_info("X", 1L, c(4L)) outp <- .onnx_value_info("Y", 1L, c(4L)) sc <- make_scalar("sc", 1.0) zp <- make_scalar("zp", 0.0) # QuantizeLinear then DequantizeLinear with scale=1, zp=0 → identity q_node <- .onnx_node("QuantizeLinear", c("X", "sc", "zp"), "q") dq_node <- .onnx_node("DequantizeLinear", c("q", "sc", "zp"), "Y") graph <- .onnx_graph("test", list(q_node, dq_node), list(inp, sc$vi, zp$vi), list(outp), list(sc$t, zp$t)) path <- tempfile(fileext = ".onnx") writeBin(.onnx_model(graph), path) x <- c(-3.14, 0, 2.71, 100) result <- run_onnx(path, list(X = x)) r <- as.numeric(result) expect_equal(r, x, tolerance = 1e-3) })