library(ggmlR) skip_if_no_vulkan <- function() { skip_if(!ggml_vulkan_available(), "Vulkan not available") skip_if(ggml_vulkan_device_count() == 0L, "No Vulkan devices") } # ---- caps fields ------------------------------------------------------------- test_that("ggml_vulkan_device_caps returns supports_256_push_constants and max_push_constants_size", { skip_if_no_vulkan() caps <- ggml_vulkan_device_caps(0L) expect_true("supports_256_push_constants" %in% names(caps)) expect_true("max_push_constants_size" %in% names(caps)) expect_type(caps$supports_256_push_constants, "logical") expect_type(caps$max_push_constants_size, "integer") }) test_that("max_push_constants_size meets Vulkan spec minimum of 128 bytes", { skip_if_no_vulkan() caps <- ggml_vulkan_device_caps(0L) expect_gte(caps$max_push_constants_size, 128L) }) test_that("supports_256_push_constants is consistent with max_push_constants_size", { skip_if_no_vulkan() caps <- ggml_vulkan_device_caps(0L) if (caps$max_push_constants_size >= 256L) { expect_true(caps$supports_256_push_constants) } else { expect_false(caps$supports_256_push_constants) } }) test_that("supports_256_push_constants is TRUE — ggml_vulkan_init would have aborted otherwise", { skip_if_no_vulkan() # ggml_vk_init() calls r_ggml_error() if maxPushConstantsSize < 256, # so if we reach this point the Vulkan backend is already initialised and # the capability must be TRUE. caps <- ggml_vulkan_device_caps(0L) expect_true(caps$supports_256_push_constants) }) # ---- 5D ops require 256-byte push constants --------------------------------- run_5d_add <- function(device, a_vals, b_vals) { ne <- c(4L, 3L, 2L, 5L, 2L) n <- prod(ne) env <- new.env(parent = emptyenv()) env$a <- NULL; env$b <- NULL build <- function(ctx) { env$a <- ggml_new_tensor(ctx, GGML_TYPE_F32, 5L, ne) env$b <- ggml_new_tensor(ctx, GGML_TYPE_F32, 5L, ne) ggml_add(ctx, env$a, env$b) } ctx <- ggml_init(mem_size = 16L * 1024L * 1024L, no_alloc = TRUE) out <- build(ctx) backend <- if (device == "cpu") { b <- ggml_backend_cpu_init(); ggml_backend_cpu_set_n_threads(b, 2L); b } else { ggml_vulkan_init(0L) } buf <- ggml_backend_alloc_ctx_tensors(ctx, backend) ggml_backend_tensor_set_data(env$a, a_vals) ggml_backend_tensor_set_data(env$b, b_vals) graph <- ggml_build_forward_expand(ctx, out) ggml_backend_graph_compute(backend, graph) result <- ggml_backend_tensor_get_data(out, n_elements = n) ggml_backend_buffer_free(buf) ggml_backend_free(backend) ggml_free(ctx) result } test_that("5D add CPU matches R reference", { ne <- c(4L, 3L, 2L, 5L, 2L); n <- prod(ne) set.seed(1L); a <- runif(n); b <- runif(n) r <- run_5d_add("cpu", a, b) expect_length(r, n) expect_true(all(is.finite(r))) expect_lt(max(abs(r - (a + b))), 1e-4, label = "5D add CPU vs R reference") }) test_that("5D add Vulkan matches R reference", { skip_if_no_vulkan() ne <- c(4L, 3L, 2L, 5L, 2L); n <- prod(ne) set.seed(1L); a <- runif(n); b <- runif(n) gpu <- run_5d_add("vulkan", a, b) expect_length(gpu, n) expect_true(all(is.finite(gpu)), label = "GPU output contains NaN/Inf") expect_lt(max(abs(gpu - (a + b))), 1e-4, label = "5D add GPU vs R reference") }) run_5d_concat <- function(device, a_vals, b_vals) { ne <- c(4L, 3L, 2L, 5L, 2L) n_out <- prod(c(4L, 3L, 2L, 5L, 4L)) env <- new.env(parent = emptyenv()) ctx <- ggml_init(mem_size = 16L * 1024L * 1024L, no_alloc = TRUE) env$a <- ggml_new_tensor(ctx, GGML_TYPE_F32, 5L, ne) env$b <- ggml_new_tensor(ctx, GGML_TYPE_F32, 5L, ne) out <- ggml_concat(ctx, env$a, env$b, dim = 4L) backend <- if (device == "cpu") { b <- ggml_backend_cpu_init(); ggml_backend_cpu_set_n_threads(b, 2L); b } else { ggml_vulkan_init(0L) } buf <- ggml_backend_alloc_ctx_tensors(ctx, backend) ggml_backend_tensor_set_data(env$a, a_vals) ggml_backend_tensor_set_data(env$b, b_vals) graph <- ggml_build_forward_expand(ctx, out) ggml_backend_graph_compute(backend, graph) result <- ggml_backend_tensor_get_data(out, n_elements = n_out) ggml_backend_buffer_free(buf); ggml_backend_free(backend); ggml_free(ctx) result } test_that("5D concat axis=4 CPU produces finite output", { ne <- c(4L, 3L, 2L, 5L, 2L); n <- prod(ne) n_out <- prod(c(4L, 3L, 2L, 5L, 4L)) set.seed(2L); a <- runif(n); b <- runif(n) r <- run_5d_concat("cpu", a, b) expect_length(r, n_out) expect_true(all(is.finite(r))) }) test_that("5D concat axis=4 Vulkan matches CPU", { skip_if_no_vulkan() ne <- c(4L, 3L, 2L, 5L, 2L); n <- prod(ne) n_out <- prod(c(4L, 3L, 2L, 5L, 4L)) set.seed(2L); a <- runif(n); b <- runif(n) cpu <- run_5d_concat("cpu", a, b) gpu <- run_5d_concat("vulkan", a, b) expect_length(gpu, n_out) expect_true(all(is.finite(gpu)), label = "GPU concat axis=4 output contains NaN/Inf") expect_lt(max(abs(cpu - gpu)), 1e-4, label = sprintf("5D concat axis=4 CPU vs GPU max diff = %.2e", max(abs(cpu - gpu)))) })