# Tests for new ops and gradcheck # Every op is tested: (a) forward value, (b) gradcheck passes source_autograd <- function() { # loaded by devtools in package context; source directly in standalone tests if (!exists("ag_tensor", mode = "function")) source("R/autograd.R") } source_autograd() # ---- helpers ---------------------------------------------------------------- # Tiny wrapper: reset tape counter between tests so IDs are stable reset_tape <- function() { .ag_tape$nodes <- list() .ag_tape$enabled <- FALSE } # ---- ag_sum ----------------------------------------------------------------- test_that("ag_sum(all): forward correct", { x <- ag_tensor(matrix(1:6, 2, 3)) expect_equal(as.numeric(ag_sum(x)$data), 21) }) test_that("ag_sum(dim=1, keepdim=FALSE): shape and value", { x <- ag_tensor(matrix(1:6, 2, 3)) # [[1,3,5],[2,4,6]] s <- ag_sum(x, dim = 1L) expect_equal(dim(s$data), c(2L, 1L)) expect_equal(as.numeric(s$data), c(9, 12)) }) test_that("ag_sum(dim=2): shape and value", { x <- ag_tensor(matrix(1:6, 2, 3)) s <- ag_sum(x, dim = 2L) expect_equal(dim(s$data), c(1L, 3L)) expect_equal(as.numeric(s$data), c(3, 7, 11)) }) test_that("ag_sum(dim=1, keepdim=TRUE): shape preserved", { x <- ag_tensor(matrix(1:6, 2, 3)) s <- ag_sum(x, dim = 1L, keepdim = TRUE) expect_equal(dim(s$data), c(2L, 1L)) }) test_that("ag_sum(dim=2, keepdim=TRUE): shape preserved", { x <- ag_tensor(matrix(1:6, 2, 3)) s <- ag_sum(x, dim = 2L, keepdim = TRUE) expect_equal(dim(s$data), c(1L, 3L)) }) test_that("gradcheck: ag_sum(dim=2, keepdim=TRUE)", { set.seed(3) W <- ag_param(matrix(runif(6, 0.5, 1.5), 2, 3)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_sum(ins$W, dim = 2L, keepdim = TRUE)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: ag_sum(all)", { set.seed(1) W <- ag_param(matrix(runif(6, -1, 1), 2, 3)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_relu(ins$W)), inputs = list(W = W), atol = 1e-4, verbose = FALSE, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: ag_sum(dim=1)", { set.seed(2) W <- ag_param(matrix(runif(6, 0.5, 1.5), 2, 3)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_sum(ins$W, dim = 1L)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_mean ---------------------------------------------------------------- test_that("ag_mean(all): forward", { x <- ag_tensor(matrix(1:4, 2, 2)) expect_equal(as.numeric(ag_mean(x)$data), 2.5) }) test_that("gradcheck: ag_mean", { set.seed(3) W <- ag_param(matrix(runif(4, -1, 1), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_mean(ag_sigmoid(ins$W)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_log ----------------------------------------------------------------- test_that("ag_log forward", { x <- ag_tensor(matrix(c(1, exp(1), exp(2)), 1, 3)) out <- ag_log(x) expect_equal(as.numeric(out$data), c(0, 1, 2), tolerance = 1e-6) }) test_that("gradcheck: ag_log", { set.seed(4) W <- ag_param(matrix(runif(4, 0.5, 2), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_log(ins$W)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_exp ----------------------------------------------------------------- test_that("ag_exp forward", { x <- ag_tensor(matrix(c(0, 1, 2), 1, 3)) out <- ag_exp(x) expect_equal(as.numeric(out$data), exp(c(0, 1, 2)), tolerance = 1e-6) }) test_that("gradcheck: ag_exp", { set.seed(5) W <- ag_param(matrix(runif(4, -0.5, 0.5), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_exp(ins$W)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_reshape ------------------------------------------------------------- test_that("ag_reshape: data unchanged, shape changes", { x <- ag_tensor(matrix(1:6, 2, 3)) out <- ag_reshape(x, 3L, 2L) expect_equal(dim(out$data), c(3L, 2L)) expect_equal(as.numeric(out$data), as.numeric(x$data)) }) test_that("gradcheck: ag_reshape", { set.seed(6) W <- ag_param(matrix(runif(6, -1, 1), 2, 3)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_reshape(ins$W, 3L, 2L)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_transpose ----------------------------------------------------------- test_that("ag_transpose forward", { x <- ag_tensor(matrix(1:6, 2, 3)) out <- ag_transpose(x) expect_equal(dim(out$data), c(3L, 2L)) expect_equal(out$data, t(x$data)) }) test_that("gradcheck: ag_transpose", { set.seed(7) W <- ag_param(matrix(runif(4, -1, 1), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_relu(ag_transpose(ins$W))), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_clamp --------------------------------------------------------------- test_that("ag_clamp forward", { x <- ag_param(matrix(c(-2, 0, 3), 1, 3)) out <- ag_clamp(x, lo = -1, hi = 2) expect_equal(as.numeric(out$data), c(-1, 0, 2)) }) test_that("gradcheck: ag_clamp (interior points only)", { set.seed(8) # Use values safely inside (lo, hi) so grad = 1 everywhere W <- ag_param(matrix(runif(4, -0.5, 0.5), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_clamp(ins$W, lo = -1, hi = 1)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_pow ----------------------------------------------------------------- test_that("ag_pow forward", { x <- ag_tensor(matrix(c(2, 3, 4), 1, 3)) out <- ag_pow(x, 2) expect_equal(as.numeric(out$data), c(4, 9, 16)) }) test_that("gradcheck: ag_pow (p=2)", { set.seed(9) W <- ag_param(matrix(runif(4, 0.5, 2), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_pow(ins$W, 2)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: ag_pow (p=0.5, sqrt)", { set.seed(10) W <- ag_param(matrix(runif(4, 0.5, 2), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_pow(ins$W, 0.5)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- gradcheck: composite chain --------------------------------------------- test_that("gradcheck: matmul -> relu -> sum", { set.seed(11) W <- ag_param(matrix(runif(6, -1, 1), 2, 3)) x <- ag_tensor(matrix(runif(3, -1, 1), 3, 1)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_relu(ag_matmul(ins$W, x))), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: add with bias broadcast", { set.seed(12) W <- ag_param(matrix(runif(8, -1, 1), 4, 2)) b <- ag_param(matrix(runif(4, -0.5, 0.5), 4, 1)) x <- ag_tensor(matrix(runif(2, -1, 1), 2, 1)) ok <- ag_gradcheck( fn = function(ins) ag_mse_loss(ag_add(ag_matmul(ins$W, x), ins$b), matrix(0, 4, 1)), inputs = list(W = W, b = b), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: sigmoid + generic cross_entropy", { # ag_cross_entropy_loss has exact grad -t/p/n, works with any pred (not just softmax) set.seed(13) W <- ag_param(matrix(runif(6, -0.5, 0.5), 3, 2)) x <- ag_tensor(matrix(runif(2, -1, 1), 2, 1)) y <- matrix(c(1, 0, 0), 3, 1) ok <- ag_gradcheck( fn = function(ins) ag_cross_entropy_loss(ag_sigmoid(ag_matmul(ins$W, x)), y), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: fused softmax_cross_entropy_loss", { set.seed(131) W <- ag_param(matrix(runif(6, -0.5, 0.5), 3, 2)) x <- ag_tensor(matrix(runif(2, -1, 1), 2, 1)) y <- matrix(c(1, 0, 0), 3, 1) ok <- ag_gradcheck( fn = function(ins) ag_softmax_cross_entropy_loss(ag_matmul(ins$W, x), y), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: log(exp(x)) ~ identity gradient", { set.seed(14) W <- ag_param(matrix(runif(4, -0.5, 0.5), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_log(ag_exp(ins$W))), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: pow(x,2) vs mul(x,x)", { set.seed(15) W <- ag_param(matrix(runif(4, 0.5, 2), 2, 2)) W2 <- ag_param(W$data + 0) # same values, separate param ok_pow <- ag_gradcheck( fn = function(ins) ag_sum(ag_pow(ins$W, 2)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) ok_mul <- ag_gradcheck( fn = function(ins) ag_sum(ag_mul(ins$W2, ins$W2)), inputs = list(W2 = W2), atol = 1e-4, quiet = TRUE ) expect_true(ok_pow) expect_true(ok_mul) }) # ---- ag_sub ----------------------------------------------------------------- test_that("ag_sub: forward value correct", { A <- ag_tensor(matrix(c(5, 3, 2, 1), 2, 2)) B <- ag_tensor(matrix(c(1, 2, 1, 0), 2, 2)) C <- ag_sub(A, B) expect_equal(C$data, matrix(c(4, 1, 1, 1), 2, 2)) }) test_that("gradcheck: ag_sub", { set.seed(20) A <- ag_param(matrix(runif(4, -1, 1), 2, 2)) B <- ag_param(matrix(runif(4, -1, 1), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_mse_loss(ag_sub(ins$A, ins$B), matrix(0, 2, 2)), inputs = list(A = A, B = B), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("ag_sub broadcast [d,s] - [1,s]: forward correct", { A <- ag_tensor(matrix(c(1,2,3,4,5,6), 3, 2)) # [3,2] B <- ag_tensor(matrix(c(1, 2), 1, 2)) # [1,2] C <- ag_sub(A, B) expect_equal(dim(C$data), c(3L, 2L)) expect_equal(C$data, matrix(c(0,1,2, 2,3,4), 3, 2)) }) test_that("gradcheck: ag_sub broadcast [d,s] - [1,s]", { set.seed(21) A <- ag_param(matrix(rnorm(12), 4, 3)) B <- ag_param(matrix(rnorm(3), 1, 3)) ok <- ag_gradcheck( fn = function(ins) ag_mse_loss(ag_sub(ins$A, ins$B), matrix(0, 4, 3)), inputs = list(A = A, B = B), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("ag_mean(dim=1, keepdim=TRUE): shape preserved", { x <- ag_tensor(matrix(1:6, 2, 3)) m <- ag_mean(x, dim = 1L, keepdim = TRUE) expect_equal(dim(m$data), c(2L, 1L)) }) test_that("ag_mean(dim=2, keepdim=TRUE): shape preserved", { x <- ag_tensor(matrix(1:6, 2, 3)) m <- ag_mean(x, dim = 2L, keepdim = TRUE) expect_equal(dim(m$data), c(1L, 3L)) }) test_that("gradcheck: ag_mean(dim=1, keepdim=TRUE)", { set.seed(22) W <- ag_param(matrix(rnorm(12), 4, 3)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_mean(ins$W, dim = 1L, keepdim = TRUE)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: ag_mean(dim=2, keepdim=TRUE)", { set.seed(23) W <- ag_param(matrix(rnorm(12), 4, 3)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_mean(ins$W, dim = 2L, keepdim = TRUE)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_scale --------------------------------------------------------------- test_that("ag_scale: forward value correct", { x <- ag_tensor(matrix(c(2, 4, 6, 8), 2, 2)) s <- ag_scale(x, 0.5) expect_equal(s$data, matrix(c(1, 2, 3, 4), 2, 2)) }) test_that("gradcheck: ag_scale", { set.seed(21) W <- ag_param(matrix(runif(4, -1, 1), 2, 2)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_scale(ins$W, 3.0)), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_softmax forward ----------------------------------------------------- test_that("ag_softmax: output sums to 1 per column", { set.seed(22) x <- ag_tensor(matrix(runif(3 * 8, -2, 2), 3, 8)) out <- ag_softmax(x) col_sums <- colSums(out$data) expect_equal(col_sums, rep(1.0, 8), tolerance = 1e-7) }) test_that("ag_softmax: all outputs positive", { x <- ag_tensor(matrix(c(-10, 0, 10), 3, 1)) out <- ag_softmax(x) expect_true(all(out$data > 0)) }) test_that("gradcheck: ag_softmax", { set.seed(23) W <- ag_param(matrix(runif(6, -1, 1), 3, 2)) x <- ag_tensor(matrix(runif(2, -1, 1), 2, 1)) ok <- ag_gradcheck( fn = function(ins) ag_sum(ag_softmax(ag_matmul(ins$W, x))), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_softmax_cross_entropy_loss forward ---------------------------------- test_that("ag_softmax_cross_entropy_loss: value is positive scalar", { set.seed(24) logits <- ag_param(matrix(runif(3 * 4, -2, 2), 3, 4)) y <- matrix(c(1,0,0, 0,1,0, 0,0,1, 1,0,0), 3, 4) with_grad_tape({ loss <- ag_softmax_cross_entropy_loss(logits, y) }) expect_equal(dim(loss$data), c(1L, 1L)) expect_gt(as.numeric(loss$data), 0) }) test_that("ag_softmax_cross_entropy_loss: perfect logits -> low loss", { # Large positive logits on correct class -> near-zero loss logits <- ag_tensor(matrix(c(10, -10, -10, -10, 10, -10), 3, 2)) y <- matrix(c(1, 0, 0, 0, 1, 0), 3, 2) loss <- ag_softmax_cross_entropy_loss(logits, y) expect_lt(as.numeric(loss$data), 0.01) }) # ---- print methods ---------------------------------------------------------- test_that("print.ag_tensor works without error", { x <- ag_param(matrix(c(1, 2, 3, 4), 2, 2)) expect_output(print(x), "ag_tensor") expect_output(print(x), "requires_grad") }) test_that("print.ag_optimizer_sgd works without error", { w <- ag_param(matrix(runif(4), 2, 2)) opt <- optimizer_sgd(list(w = w), lr = 0.05, momentum = 0.9) expect_output(print(opt), "SGD") expect_output(print(opt), "0.05") }) test_that("print.ag_optimizer_adam works without error", { w <- ag_param(matrix(runif(4), 2, 2)) opt <- optimizer_adam(list(w = w), lr = 1e-3) expect_output(print(opt), "Adam") }) test_that("print.ag_sequential works without error", { if (!exists("ag_sequential", mode = "function")) source("R/ag_layers.R") model <- ag_sequential( ag_linear(4L, 8L), ag_linear(8L, 2L) ) expect_output(print(model), "ag_sequential") expect_output(print(model), "2 layers") }) test_that("print.ag_dataloader works without error", { if (!exists("ag_dataloader", mode = "function")) source("R/ag_training.R") dl <- ag_dataloader(matrix(runif(4 * 32), 4, 32), batch_size = 8L) expect_output(print(dl), "ag_dataloader") expect_output(print(dl), "n=32") }) test_that("print.lr_scheduler_step works without error", { if (!exists("lr_scheduler_step", mode = "function")) source("R/ag_training.R") w <- ag_param(matrix(runif(4), 2, 2)) opt <- optimizer_adam(list(w = w), lr = 0.1) sch <- lr_scheduler_step(opt, step_size = 5L) expect_output(print(sch), "lr_scheduler_step") }) test_that("print.lr_scheduler_cosine works without error", { if (!exists("lr_scheduler_cosine", mode = "function")) source("R/ag_training.R") w <- ag_param(matrix(runif(4), 2, 2)) opt <- optimizer_adam(list(w = w), lr = 0.1) sch <- lr_scheduler_cosine(opt, T_max = 10L) expect_output(print(sch), "lr_scheduler_cosine") }) # ---- ag_mul broadcast ------------------------------------------------------- test_that("ag_mul broadcast [d,s] * [1,s]: forward correct", { A <- ag_tensor(matrix(c(1,2,3,4,5,6), 3, 2)) # [3,2] B <- ag_tensor(matrix(c(2, 3), 1, 2)) # [1,2] C <- ag_mul(A, B) expect_equal(dim(C$data), c(3L, 2L)) expect_equal(C$data, matrix(c(2,4,6, 12,15,18), 3, 2)) }) test_that("ag_mul broadcast [d,s] * [d,1]: forward correct", { A <- ag_tensor(matrix(c(1,2,3,4,5,6), 3, 2)) # [3,2] B <- ag_tensor(matrix(c(10, 20, 30), 3, 1)) # [3,1] C <- ag_mul(A, B) expect_equal(dim(C$data), c(3L, 2L)) expect_equal(C$data, matrix(c(10,40,90, 40,100,180), 3, 2)) }) test_that("gradcheck: ag_mul broadcast [d,s] * [1,s]", { set.seed(50) A <- ag_param(matrix(rnorm(12), 4, 3)) B <- ag_param(matrix(rnorm(3), 1, 3)) ok <- ag_gradcheck( fn = function(ins) ag_mse_loss(ag_mul(ins$A, ins$B), matrix(0, 4, 3)), inputs = list(A = A, B = B), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) test_that("gradcheck: ag_mul broadcast [d,s] * [d,1]", { set.seed(51) A <- ag_param(matrix(rnorm(12), 4, 3)) B <- ag_param(matrix(rnorm(4), 4, 1)) ok <- ag_gradcheck( fn = function(ins) ag_mse_loss(ag_mul(ins$A, ins$B), matrix(0, 4, 3)), inputs = list(A = A, B = B), atol = 1e-4, quiet = TRUE ) expect_true(ok) }) # ---- ag_softmax_cross_entropy_loss: integer targets ------------------------- test_that("ag_softmax_cross_entropy_loss: integer targets == one-hot", { set.seed(60) logits <- matrix(rnorm(16 * 8), 16, 8) idx <- c(2L, 0L, 5L, 1L, 3L, 7L, 4L, 6L) # 0-based oh <- matrix(0.0, 16, 8) for (i in seq_along(idx)) oh[idx[i] + 1L, i] <- 1.0 lt1 <- ag_param(logits); lt2 <- ag_param(logits) with_grad_tape({ l1 <- ag_softmax_cross_entropy_loss(lt1, idx) }) with_grad_tape({ l2 <- ag_softmax_cross_entropy_loss(lt2, oh) }) expect_equal(as.numeric(l1$data), as.numeric(l2$data), tolerance = 1e-6) }) test_that("ag_softmax_cross_entropy_loss: integer targets give sensible loss", { # Uniform logits -> loss ~ log(vocab_size) set.seed(61) logits <- ag_param(matrix(0.0, 8, 4)) targets <- c(0L, 1L, 2L, 3L) with_grad_tape({ loss <- ag_softmax_cross_entropy_loss(logits, targets) }) expect_equal(as.numeric(loss$data), log(8), tolerance = 1e-5) }) test_that("gradcheck: ag_softmax_cross_entropy_loss with integer targets", { set.seed(62) W <- ag_param(matrix(runif(24, -0.5, 0.5), 8, 3)) x <- ag_tensor(matrix(runif(3), 3, 1)) idx <- 2L # single position ok <- ag_gradcheck( fn = function(ins) ag_softmax_cross_entropy_loss(ag_matmul(ins$W, x), idx), inputs = list(W = W), atol = 1e-4, quiet = TRUE ) expect_true(ok) })