# Tests for ag_* GPU device support (Phase 1) # # All tests that require a real GPU backend are wrapped in: # skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") # # CPU path tests run unconditionally and verify that the new device parameter # does not break existing behaviour. # ============================================================================ # Helper: reset device to CPU after each test # ============================================================================ reset_to_cpu <- function() { ag_device("cpu") } # ============================================================================ # CPU-path smoke tests (always run) # ============================================================================ test_that("ag_tensor device field defaults to cpu", { x <- ag_tensor(matrix(1:4, 2, 2)) expect_equal(x$device, "cpu") expect_equal(x$data, matrix(1:4, 2, 2)) }) test_that("ag_param device field defaults to cpu", { p <- ag_param(matrix(1:4, 2, 2)) expect_equal(p$device, "cpu") expect_true(p$requires_grad) }) test_that("ag_default_device returns cpu by default", { reset_to_cpu() expect_equal(ag_default_device(), "cpu") }) test_that("ag_device('cpu') returns previous device invisibly", { reset_to_cpu() prev <- ag_device("cpu") expect_equal(prev, "cpu") }) test_that("ag_matmul CPU path unchanged after refactor", { A <- ag_param(matrix(c(1, 0, 0, 1), 2, 2)) B <- ag_tensor(matrix(c(1, 2, 3, 4), 2, 2)) out <- ag_matmul(A, B) expect_equal(ggmlR:::.ag_data(out), ggmlR:::.ag_data(B)) }) test_that("ag_relu CPU path unchanged after refactor", { x <- ag_param(matrix(c(-2, -1, 0, 1, 2, 3), 2, 3)) with_grad_tape({ out <- ag_relu(x) loss <- ag_mse_loss(out, matrix(0, 2, 3)) }) grads <- backward(loss) g <- get0(as.character(x$id), envir = grads) expect_equal(g[1, 1], 0) # -2 -> grad 0 expect_equal(g[2, 1], 0) # -1 -> grad 0 expect_gt(abs(g[2, 3]), 0) # 3 -> grad nonzero }) test_that("full training loop (CPU) still reduces loss", { set.seed(42) n <- 32L x_mat <- matrix(sample(c(0, 1), 2 * n, replace = TRUE), 2, n) y_mat <- matrix(as.numeric(xor(x_mat[1,], x_mat[2,])), 1, n) l1 <- ag_linear(2L, 4L, activation = "relu") l2 <- ag_linear(4L, 1L, activation = "sigmoid") opt <- optimizer_adam(c(l1$params(), l2$params()), lr = 0.05) losses <- numeric(30L) for (i in seq_len(30L)) { x <- ag_tensor(x_mat) y <- ag_tensor(y_mat) with_grad_tape({ h <- l1$forward(x) out <- l2$forward(h) loss <- ag_mse_loss(out, y) }) grads <- backward(loss) opt$step(grads) opt$zero_grad() losses[i] <- ggmlR:::.ag_data(loss)[1] } expect_lt(mean(losses[21:30]), mean(losses[1:10])) }) test_that("ag_to_device returns same tensor if already on target device", { x <- ag_tensor(matrix(1:4, 2, 2)) y <- ag_to_device(x, "cpu") expect_equal(x$id, y$id) }) # ============================================================================ # GPU tests (skip if no backend available) # ============================================================================ test_that("ag_device('gpu') does not error when backend available", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") expect_silent(ag_device("gpu")) reset_to_cpu() }) test_that("ag_tensor(x, device='gpu') has device='gpu'", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") ag_device("gpu") x <- ag_tensor(matrix(1:4, 2, 2), device = "gpu") expect_equal(x$device, "gpu") expect_false(is.null(x$data)) # data always kept reset_to_cpu() }) test_that("ag_param(x, device='gpu') keeps $data as source-of-truth", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") ag_device("gpu") d <- matrix(1:4, 2, 2) p <- ag_param(d, device = "gpu") expect_equal(p$device, "gpu") expect_equal(p$data, d) expect_true(p$requires_grad) reset_to_cpu() }) test_that("ag_matmul GPU forward equals CPU forward (tol=1e-4)", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(7) a_mat <- matrix(runif(6), 2, 3) b_mat <- matrix(runif(6), 3, 2) expected <- a_mat %*% b_mat ag_device("gpu") A <- ag_param(a_mat, device = "gpu") B <- ag_tensor(b_mat, device = "gpu") with_grad_tape({ out <- ag_matmul(A, B) }) result <- ggmlR:::.ag_data(out) expect_equal(result, expected, tolerance = 1e-4) reset_to_cpu() }) test_that("ag_relu GPU forward equals CPU forward", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(11) x_mat <- matrix(runif(12, -1, 1), 3, 4) expected <- pmax(x_mat, 0) ag_device("gpu") x <- ag_param(x_mat, device = "gpu") with_grad_tape({ out <- ag_relu(x) }) result <- ggmlR:::.ag_data(out) expect_equal(result, expected, tolerance = 1e-6) reset_to_cpu() }) test_that("backward on GPU tensors matches CPU backward (tol=1e-4)", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(99) w_mat <- matrix(runif(6, -1, 1), 2, 3) x_mat <- matrix(runif(3), 3, 1) y_mat <- matrix(runif(2), 2, 1) # CPU reference W_cpu <- ag_param(w_mat) with_grad_tape({ out_cpu <- ag_matmul(W_cpu, ag_tensor(x_mat)) loss_cpu <- ag_mse_loss(out_cpu, y_mat) }) grads_cpu <- backward(loss_cpu) g_cpu <- get0(as.character(W_cpu$id), envir = grads_cpu) # GPU ag_device("gpu") W_gpu <- ag_param(w_mat, device = "gpu") with_grad_tape({ out_gpu <- ag_matmul(W_gpu, ag_tensor(x_mat, device = "gpu")) loss_gpu <- ag_mse_loss(out_gpu, ag_tensor(y_mat, device = "gpu")) }) grads_gpu <- backward(loss_gpu) g_gpu <- get0(as.character(W_gpu$id), envir = grads_gpu) expect_equal(g_gpu, g_cpu, tolerance = 1e-4) reset_to_cpu() }) test_that("ag_gradcheck passes for GPU tensors (matmul + relu)", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(55) ag_device("gpu") W <- ag_param(matrix(runif(6, -0.5, 0.5), 2, 3), device = "gpu") x <- ag_tensor(matrix(runif(3), 3, 1), device = "gpu") result <- ag_gradcheck( fn = function(ins) { ag_mse_loss(ag_relu(ag_matmul(ins$W, ins$x)), matrix(0, 2, 1)) }, inputs = list(W = W, x = x), atol = 1e-3, quiet = TRUE ) expect_true(result) reset_to_cpu() }) test_that("training loop on GPU reduces loss over 10 epochs", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(77) ag_device("gpu") n <- 32L x_mat <- matrix(runif(4 * n), 4, n) y_mat <- matrix(runif(2 * n), 2, n) W <- ag_param(matrix(runif(8, -0.5, 0.5), 2, 4), device = "gpu") b <- ag_param(matrix(0, 2, 1), device = "gpu") opt <- optimizer_adam(list(W = W, b = b), lr = 0.01) losses <- numeric(10L) for (i in seq_len(10L)) { x <- ag_tensor(x_mat, device = "gpu") y <- ag_tensor(y_mat, device = "gpu") with_grad_tape({ h <- ag_relu(ag_add(ag_matmul(W, x), b)) loss <- ag_mse_loss(h, y) }) grads <- backward(loss) opt$step(grads) opt$zero_grad() losses[i] <- as.numeric(ggmlR:::.ag_data(loss)) } expect_lt(losses[10L], losses[1L]) reset_to_cpu() }) test_that("ag_to_device(tensor, 'cpu') correctly copies GPU data", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(13) d <- matrix(runif(6), 2, 3) ag_device("gpu") gpu_t <- ag_tensor(d, device = "gpu") cpu_t <- ag_to_device(gpu_t, "cpu") expect_equal(cpu_t$device, "cpu") expect_equal(cpu_t$data, d, tolerance = 1e-6) reset_to_cpu() }) test_that("ag_softmax GPU forward equals CPU forward", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(21) x_mat <- matrix(runif(12, -2, 2), 3, 4) # CPU reference: column-wise softmax mx <- apply(x_mat, 2, max) mx <- matrix(mx, 3, 4, byrow = TRUE) e <- exp(x_mat - mx) expected <- e / matrix(colSums(e), 3, 4, byrow = TRUE) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_softmax(x) }) result <- ggmlR:::.ag_data(out) expect_equal(result, expected, tolerance = 1e-5) reset_to_cpu() }) test_that("ag_add GPU with [m,1] broadcast equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(22) a_mat <- matrix(runif(12), 3, 4) b_mat <- matrix(runif(3), 3, 1) expected <- a_mat + as.vector(b_mat) ag_device("gpu") A <- ag_tensor(a_mat, device = "gpu") B <- ag_param(b_mat, device = "gpu") with_grad_tape({ out <- ag_add(A, B) }) result <- ggmlR:::.ag_data(out) expect_equal(result, expected, tolerance = 1e-5) reset_to_cpu() }) test_that("ag_dtype('bf16') + ag_matmul GPU result close to f32", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(51) a_mat <- matrix(runif(6), 2, 3) b_mat <- matrix(runif(6), 3, 2) expected <- a_mat %*% b_mat ag_device("gpu"); ag_dtype("bf16") A <- ag_param(a_mat, device = "gpu") B <- ag_tensor(b_mat, device = "gpu") expect_equal(A$dtype, "bf16") with_grad_tape({ out <- ag_matmul(A, B) }) result <- ggmlR:::.ag_data(out) # bf16 has ~3 decimal digits of precision expect_equal(result, expected, tolerance = 1e-2) expect_equal(out$dtype, "bf16") ag_dtype("f32"); reset_to_cpu() }) test_that("ag_dtype('f16') + ag_relu GPU result close to f32", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(52) x_mat <- matrix(runif(8, -1, 1), 2, 4) expected <- pmax(x_mat, 0) ag_device("gpu"); ag_dtype("f16") x <- ag_param(x_mat, device = "gpu") with_grad_tape({ out <- ag_relu(x) }) result <- ggmlR:::.ag_data(out) expect_equal(result, expected, tolerance = 1e-2) ag_dtype("f32"); reset_to_cpu() }) test_that("ag_default_dtype returns f32 by default", { ag_dtype("f32") expect_equal(ag_default_dtype(), "f32") }) test_that("ag_dtype switches and returns previous", { ag_dtype("f32") prev <- ag_dtype("bf16") expect_equal(prev, "f32") expect_equal(ag_default_dtype(), "bf16") ag_dtype("f32") }) test_that("ag_sum GPU dim=1 (rowSums) equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(31) x_mat <- matrix(runif(12), 3, 4) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_sum(x, dim = 1L) }) expect_equal(ggmlR:::.ag_data(out), matrix(rowSums(x_mat), 3, 1), tolerance = 1e-5) reset_to_cpu() }) test_that("ag_sum GPU dim=2 (colSums) equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(32) x_mat <- matrix(runif(12), 3, 4) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_sum(x, dim = 2L) }) expect_equal(ggmlR:::.ag_data(out), matrix(colSums(x_mat), 1, 4), tolerance = 1e-5) reset_to_cpu() }) test_that("ag_mean GPU dim=1 (rowMeans) equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(33) x_mat <- matrix(runif(12), 3, 4) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_mean(x, dim = 1L) }) expect_equal(ggmlR:::.ag_data(out), matrix(rowMeans(x_mat), 3, 1), tolerance = 1e-5) reset_to_cpu() }) test_that("ag_mean GPU dim=2 (colMeans) equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(34) x_mat <- matrix(runif(12), 3, 4) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_mean(x, dim = 2L) }) expect_equal(ggmlR:::.ag_data(out), matrix(colMeans(x_mat), 1, 4), tolerance = 1e-5) reset_to_cpu() }) test_that("ag_pow GPU p=2 (sqr) equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(41) x_mat <- matrix(runif(6, 0.1, 2), 2, 3) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_pow(x, 2) }) expect_equal(ggmlR:::.ag_data(out), x_mat^2, tolerance = 1e-5) reset_to_cpu() }) test_that("ag_pow GPU p=0.5 (sqrt) equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(42) x_mat <- matrix(runif(6, 0.1, 2), 2, 3) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_pow(x, 0.5) }) expect_equal(ggmlR:::.ag_data(out), x_mat^0.5, tolerance = 1e-5) reset_to_cpu() }) test_that("ag_pow GPU p=3 (general) equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(43) x_mat <- matrix(runif(6, 0.1, 2), 2, 3) ag_device("gpu") x <- ag_tensor(x_mat, device = "gpu") with_grad_tape({ out <- ag_pow(x, 3) }) expect_equal(ggmlR:::.ag_data(out), x_mat^3, tolerance = 1e-4) reset_to_cpu() }) test_that("ag_add GPU with [1,n] broadcast equals CPU", { skip_if(ggml_backend_dev_count() < 1, "No ggml backend device available") set.seed(23) a_mat <- matrix(runif(12), 3, 4) b_mat <- matrix(runif(4), 1, 4) expected <- a_mat + rep(b_mat, each = 3) ag_device("gpu") A <- ag_tensor(a_mat, device = "gpu") B <- ag_param(b_mat, device = "gpu") with_grad_tape({ out <- ag_add(A, B) }) result <- ggmlR:::.ag_data(out) expect_equal(result, expected, tolerance = 1e-5) reset_to_cpu() })