test_that("ggml_vulkan_available returns logical", { result <- ggml_vulkan_available() expect_type(result, "logical") expect_length(result, 1) }) test_that("ggml_vulkan_device_count returns non-negative integer", { count <- ggml_vulkan_device_count() expect_type(count, "integer") expect_gte(count, 0) }) test_that("ggml_vulkan_status runs without error", { expect_no_error(ggml_vulkan_status()) }) # Conditional tests that only run if Vulkan is available if (ggml_vulkan_available() && ggml_vulkan_device_count() > 0) { test_that("ggml_vulkan_list_devices returns list", { devices <- ggml_vulkan_list_devices() expect_type(devices, "list") expect_gt(length(devices), 0) # Check first device structure dev <- devices[[1]] expect_named(dev, c("index", "name", "free_memory", "total_memory")) expect_type(dev$index, "integer") expect_type(dev$name, "character") expect_type(dev$free_memory, "double") expect_type(dev$total_memory, "double") }) test_that("ggml_vulkan_device_description returns string", { desc <- ggml_vulkan_device_description(0) expect_type(desc, "character") expect_gt(nchar(desc), 0) }) test_that("ggml_vulkan_device_memory returns memory info", { mem <- ggml_vulkan_device_memory(0) expect_type(mem, "list") expect_named(mem, c("free", "total")) expect_type(mem$free, "double") expect_type(mem$total, "double") expect_gte(mem$free, 0) expect_gte(mem$total, 0) expect_lte(mem$free, mem$total) }) test_that("ggml_vulkan_init and free work", { backend <- ggml_vulkan_init(0) expect_type(backend, "externalptr") # Test backend name name <- ggml_vulkan_backend_name(backend) expect_type(name, "character") expect_gt(nchar(name), 0) # Test is_backend check is_vk <- ggml_vulkan_is_backend(backend) expect_type(is_vk, "logical") expect_true(is_vk) # Free backend expect_no_error(ggml_vulkan_free(backend)) }) test_that("ggml_vulkan_device_description errors on invalid index", { count <- ggml_vulkan_device_count() expect_error( ggml_vulkan_device_description(count + 100), "Invalid device index" ) expect_error( ggml_vulkan_device_description(-1), "Invalid device index" ) }) test_that("ggml_vulkan_device_memory errors on invalid index", { count <- ggml_vulkan_device_count() expect_error( ggml_vulkan_device_memory(count + 100), "Invalid device index" ) expect_error( ggml_vulkan_device_memory(-1), "Invalid device index" ) }) test_that("ggml_vulkan_init errors on invalid index", { count <- ggml_vulkan_device_count() expect_error( ggml_vulkan_init(count + 100), "Invalid device index" ) expect_error( ggml_vulkan_init(-1), "Invalid device index" ) }) # ======================================================================== # Computational tests for LLM operations # ======================================================================== test_that("Vulkan: swiglu activation (LLaMA/Mistral)", { ctx <- ggml_init(mem_size = 16 * 1024 * 1024) ggml_set_no_alloc(ctx, TRUE) # Create input tensor for swiglu (will be split internally) # swiglu expects input of size 2*hidden_dim and splits it x <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 512) # Will split to 2x256 result <- ggml_swiglu(ctx, x) # Setup Vulkan backend backend_vk <- ggml_vulkan_init(0) buffer_vk <- ggml_backend_alloc_ctx_tensors(ctx, backend_vk) # Set test data (concatenated x and y) x_data <- seq(-2, 2, length.out = 512) ggml_backend_tensor_set_data(x, x_data) # Compute graph <- ggml_build_forward_expand(ctx, result) ggml_backend_graph_compute(backend_vk, graph) # Get result result_data <- ggml_backend_tensor_get_data(result) # Basic checks - swiglu computation works expect_length(result_data, 256) expect_false(any(is.na(result_data))) expect_false(any(is.infinite(result_data))) # Result should be in reasonable range expect_true(max(abs(result_data)) < 10) # SwiGLU produces non-zero output for non-zero input expect_true(sum(abs(result_data)) > 0.1) # Cleanup ggml_backend_buffer_free(buffer_vk) ggml_vulkan_free(backend_vk) ggml_free(ctx) }) test_that("Vulkan: geglu activation", { ctx <- ggml_init(mem_size = 16 * 1024 * 1024) ggml_set_no_alloc(ctx, TRUE) # Create input tensor (will be split internally like swiglu) x <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 256) # Will split to 2x128 result <- ggml_geglu(ctx, x) # Setup Vulkan backend backend_vk <- ggml_vulkan_init(0) buffer_vk <- ggml_backend_alloc_ctx_tensors(ctx, backend_vk) # Set test data x_data <- seq(-1, 1, length.out = 256) ggml_backend_tensor_set_data(x, x_data) # Compute graph <- ggml_build_forward_expand(ctx, result) ggml_backend_graph_compute(backend_vk, graph) # Get result result_data <- ggml_backend_tensor_get_data(result) # Basic checks - geglu computation works expect_length(result_data, 128) expect_false(any(is.na(result_data))) expect_false(any(is.infinite(result_data))) # Result should be in reasonable range expect_true(max(abs(result_data)) < 10) # GeGLU produces non-zero output for non-zero input expect_true(sum(abs(result_data)) > 0.1) # Cleanup ggml_backend_buffer_free(buffer_vk) ggml_vulkan_free(backend_vk) ggml_free(ctx) }) test_that("Vulkan: RoPE (Rotary Position Embedding)", { skip("RoPE requires position tensor - tested through higher-level models") # Note: RoPE operations require proper position input tensors and are # typically tested through complete transformer model inference. # The Vulkan backend supports rope_norm, rope_neox, rope_vision shaders. expect_true(TRUE) }) test_that("Vulkan: Flash Attention", { ctx <- ggml_init(mem_size = 64 * 1024 * 1024) ggml_set_no_alloc(ctx, TRUE) # Parameters n_head <- 4 n_embd <- 128 seq_len <- 32 head_dim <- n_embd / n_head # Create Q, K, V tensors # Shape: [head_dim, n_head, seq_len, batch] q <- ggml_new_tensor_4d(ctx, GGML_TYPE_F32, head_dim, n_head, seq_len, 1) k <- ggml_new_tensor_4d(ctx, GGML_TYPE_F32, head_dim, n_head, seq_len, 1) v <- ggml_new_tensor_4d(ctx, GGML_TYPE_F32, head_dim, n_head, seq_len, 1) # Flash attention scale <- 1.0 / sqrt(head_dim) max_bias <- 0.0 logit_softcap <- 0.0 result <- ggml_flash_attn_ext(ctx, q, k, v, NULL, scale, max_bias, logit_softcap) # Setup Vulkan backend backend_vk <- ggml_vulkan_init(0) buffer_vk <- ggml_backend_alloc_ctx_tensors(ctx, backend_vk) # Set test data (normalized random) q_data <- rnorm(head_dim * n_head * seq_len) k_data <- rnorm(head_dim * n_head * seq_len) v_data <- rnorm(head_dim * n_head * seq_len) ggml_backend_tensor_set_data(q, q_data) ggml_backend_tensor_set_data(k, k_data) ggml_backend_tensor_set_data(v, v_data) # Compute graph <- ggml_build_forward_expand(ctx, result) ggml_backend_graph_compute(backend_vk, graph) # Get result result_data <- ggml_backend_tensor_get_data(result) # Basic checks expect_length(result_data, head_dim * n_head * seq_len) expect_false(any(is.na(result_data))) expect_false(any(is.infinite(result_data))) # Output should be weighted combination of V, so magnitude similar result_norm <- sqrt(mean(result_data^2)) v_norm <- sqrt(mean(v_data^2)) expect_true(abs(result_norm / v_norm - 1) < 0.5) # Cleanup ggml_backend_buffer_free(buffer_vk) ggml_vulkan_free(backend_vk) ggml_free(ctx) }) test_that("Vulkan: Quantized tensor support Q4_0", { skip("Quantization tests require special setup - tested in quantized matmul") # Note: Direct quantization roundtrip testing is complex because: # 1. ggml_cpy between types needs proper tensor setup # 2. Quantization happens at backend level, not in compute graph # 3. Better to test through actual operations (mul_mat) that use quantized tensors expect_true(TRUE) }) test_that("Vulkan: Quantized matrix multiplication Q4_0", { skip("Quantized matmul requires proper quantization setup - verified through benchmarks") # Note: Q4_0 and other quantized formats require: # 1. Proper quantization at tensor creation # 2. Backend-specific buffer management # 3. Dequantization shaders (dequant_q4_0.comp, mul_mat_vec_q4_0.comp) # # The Vulkan backend supports all quantization formats (Q4_0, Q8_0, etc.) # and is tested through real model inference and benchmarks. expect_true(TRUE) }) } else { test_that("Vulkan functions handle unavailable state", { skip("Vulkan not available or no devices found") }) }