# Helpers for TabICL primitive parity tests. # # Fixtures under tests/testthat/fixtures/tabicl/ are generated from the Python # reference implementation by dev/tabicl/dump_primitives.py: each holds a # module's weights, the inputs, and the golden output. The tests build the R # module, copy the weights in, run a forward pass, and compare to the golden # output. See dev/tabicl/README.md. # Skip a test unless torch + safetensors are usable and the fixtures exist. skip_if_no_tabicl_fixtures <- function(name) { skip_on_cran() skip_if_not_installed("torch") skip_if_not_installed("safetensors") skip_if_not_installed("jsonlite") if (!torch::torch_is_installed()) { skip("libtorch not installed") } if (!file.exists(tabicl_fixture_path(name, "safetensors.gz"))) { skip(paste0("fixture not found: ", name)) } } tabicl_fixture_path <- function(name, ext) { testthat::test_path("fixtures", "tabicl", paste0(name, ".", ext)) } # Fixtures are stored gzip-compressed (see dev/tabicl/dump_primitives.py) so the # raw safetensors header never trips R CMD check's executable-magic heuristic. # safe_load_file needs a real path, so decompress to a temp file first. tabicl_load_fixture <- function(name) { gz <- tabicl_fixture_path(name, "safetensors.gz") con <- gzfile(gz, "rb") on.exit(close(con)) chunks <- list() repeat { chunk <- readBin(con, "raw", n = 1024L^2) if (length(chunk) == 0L) { break } chunks[[length(chunks) + 1L]] <- chunk } tmp <- tempfile(fileext = ".safetensors") writeBin(do.call(c, chunks), tmp) safetensors::safe_load_file(tmp, framework = "torch") } tabicl_fixture_meta <- function(name) { jsonlite::fromJSON(tabicl_fixture_path(name, "json")) } # Largest absolute elementwise difference between two tensors. tabicl_max_abs_diff <- function(a, b) { as.numeric(torch::torch_max(torch::torch_abs(a - b))) } # Relative max error: max abs diff scaled by the reference's max magnitude. Used # for stage outputs that legitimately contain large values (e.g. the column # embedder's CLS slots keep the -100 skip value through residual connections), # where an absolute float32 tolerance would be misleading. tabicl_rel_max_diff <- function(actual, expected) { scale <- max(as.numeric(torch::torch_max(torch::torch_abs(expected))), 1e-8) tabicl_max_abs_diff(actual, expected) / scale } # Copy a qassmax-mlp-elementwise SSMax layer's weights from a fixture. Python # nn.Sequential indices 0/2 (Linear, GELU, Linear) map to R 1-based [[1]]/[[3]]. tabicl_copy_ssmax <- function(layer, f, prefix = "ssmax.") { torch::with_no_grad({ layer$base_mlp[[1]]$weight$copy_(f[[paste0(prefix, "base_mlp.0.weight")]]) layer$base_mlp[[1]]$bias$copy_(f[[paste0(prefix, "base_mlp.0.bias")]]) layer$base_mlp[[3]]$weight$copy_(f[[paste0(prefix, "base_mlp.2.weight")]]) layer$base_mlp[[3]]$bias$copy_(f[[paste0(prefix, "base_mlp.2.bias")]]) layer$query_mlp[[1]]$weight$copy_(f[[paste0(prefix, "query_mlp.0.weight")]]) layer$query_mlp[[1]]$bias$copy_(f[[paste0(prefix, "query_mlp.0.bias")]]) layer$query_mlp[[3]]$weight$copy_(f[[paste0(prefix, "query_mlp.2.weight")]]) layer$query_mlp[[3]]$bias$copy_(f[[paste0(prefix, "query_mlp.2.bias")]]) }) invisible(layer) } # Copy the packed projection + output projection of a tabicl_mha from a fixture. tabicl_copy_mha <- function(mha, f) { torch::with_no_grad({ mha$in_proj_weight$copy_(f$in_proj_weight) mha$in_proj_bias$copy_(f$in_proj_bias) mha$out_proj$weight$copy_(f[["out_proj.weight"]]) mha$out_proj$bias$copy_(f[["out_proj.bias"]]) }) if (!is.null(mha$ssmax_layer)) { tabicl_copy_ssmax(mha$ssmax_layer, f) } invisible(mha) } # Copy a tabicl_mha_block's parameters from fixture tensors keyed by `prefix` # (e.g. "ri.tf_row.blocks.0."). tabicl_copy_mha_block <- function(block, f, prefix) { torch::with_no_grad({ block$norm1$weight$copy_(f[[paste0(prefix, "norm1.weight")]]) block$norm2$weight$copy_(f[[paste0(prefix, "norm2.weight")]]) if (!is.null(block$norm1$bias)) { block$norm1$bias$copy_(f[[paste0(prefix, "norm1.bias")]]) block$norm2$bias$copy_(f[[paste0(prefix, "norm2.bias")]]) } block$linear1$weight$copy_(f[[paste0(prefix, "linear1.weight")]]) block$linear1$bias$copy_(f[[paste0(prefix, "linear1.bias")]]) block$linear2$weight$copy_(f[[paste0(prefix, "linear2.weight")]]) block$linear2$bias$copy_(f[[paste0(prefix, "linear2.bias")]]) block$attn$in_proj_weight$copy_(f[[paste0(prefix, "attn.in_proj_weight")]]) block$attn$in_proj_bias$copy_(f[[paste0(prefix, "attn.in_proj_bias")]]) block$attn$out_proj$weight$copy_(f[[paste0( prefix, "attn.out_proj.weight" )]]) block$attn$out_proj$bias$copy_(f[[paste0(prefix, "attn.out_proj.bias")]]) }) if (!is.null(block$attn$ssmax_layer)) { tabicl_copy_ssmax( block$attn$ssmax_layer, f, prefix = paste0(prefix, "attn.ssmax_layer.") ) } invisible(block) } # Copy an induced-self-attention block (ISAB) from fixture tensors keyed by # `prefix` (e.g. "ce.tf_col.blocks.0."). tabicl_copy_isab <- function(isab, f, prefix) { torch::with_no_grad( isab$ind_vectors$copy_(f[[paste0(prefix, "ind_vectors")]]) ) tabicl_copy_mha_block( isab$multihead_attn1, f, paste0(prefix, "multihead_attn1.") ) tabicl_copy_mha_block( isab$multihead_attn2, f, paste0(prefix, "multihead_attn2.") ) invisible(isab) } # Copy a tabicl_col_embedding from fixture tensors keyed under `prefix` # (standalone fixtures use "ce."; the full-model fixture uses "col_embedder."). tabicl_copy_col_embedding <- function(ce, f, prefix = "ce.") { torch::with_no_grad({ ce$in_linear$weight$copy_(f[[paste0(prefix, "in_linear.weight")]]) ce$in_linear$bias$copy_(f[[paste0(prefix, "in_linear.bias")]]) ce$y_encoder$weight$copy_(f[[paste0(prefix, "y_encoder.weight")]]) ce$y_encoder$bias$copy_(f[[paste0(prefix, "y_encoder.bias")]]) }) for (i in seq_along(ce$tf_col$blocks)) { tabicl_copy_isab( ce$tf_col$blocks[[i]], f, prefix = sprintf("%stf_col.blocks.%d.", prefix, i - 1L) ) } invisible(ce) } # Copy a tabicl_row_interaction from fixture tensors keyed under `prefix` # ("ri." standalone; "row_interactor." in the full-model fixture). tabicl_copy_row_interaction <- function(ri, f, prefix = "ri.") { torch::with_no_grad({ ri$cls_tokens$copy_(f[[paste0(prefix, "cls_tokens")]]) ri$out_ln$weight$copy_(f[[paste0(prefix, "out_ln.weight")]]) if (!is.null(ri$out_ln$bias)) { ri$out_ln$bias$copy_(f[[paste0(prefix, "out_ln.bias")]]) } ri$tf_row$rope$freqs$copy_(f[[paste0(prefix, "tf_row.rope.freqs")]]) }) for (i in seq_along(ri$tf_row$blocks)) { tabicl_copy_mha_block( ri$tf_row$blocks[[i]], f, prefix = sprintf("%stf_row.blocks.%d.", prefix, i - 1L) ) } invisible(ri) } # Copy a tabicl_icl_learning from fixture tensors keyed under `prefix` # ("icl." standalone; "icl_predictor." in the full-model fixture). tabicl_copy_icl_learning <- function(icl, f, prefix = "icl.") { torch::with_no_grad({ if (!is.null(icl$ln)) { icl$ln$weight$copy_(f[[paste0(prefix, "ln.weight")]]) if (!is.null(icl$ln$bias)) { icl$ln$bias$copy_(f[[paste0(prefix, "ln.bias")]]) } } icl$y_encoder$weight$copy_(f[[paste0(prefix, "y_encoder.weight")]]) icl$y_encoder$bias$copy_(f[[paste0(prefix, "y_encoder.bias")]]) icl$decoder[[1]]$weight$copy_(f[[paste0(prefix, "decoder.0.weight")]]) icl$decoder[[1]]$bias$copy_(f[[paste0(prefix, "decoder.0.bias")]]) icl$decoder[[3]]$weight$copy_(f[[paste0(prefix, "decoder.2.weight")]]) icl$decoder[[3]]$bias$copy_(f[[paste0(prefix, "decoder.2.bias")]]) }) for (i in seq_along(icl$tf_icl$blocks)) { tabicl_copy_mha_block( icl$tf_icl$blocks[[i]], f, prefix = sprintf("%stf_icl.blocks.%d.", prefix, i - 1L) ) } invisible(icl) } # Write a converted-checkpoint directory from a full-model / engine fixture, for # testing the loader and the user-facing API. The two files use the task-prefixed # names brulee reads (classification.* / regression.*), derived from the config. tabicl_write_model_dir <- function(f, meta) { dir <- withr::local_tempdir(.local_envir = parent.frame()) if (meta$config$max_classes > 0) { task <- "classification" } else { task <- "regression" } files <- brulee:::tabicl_checkpoint_files(task) weight_keys <- grep( "^(col_embedder|row_interactor|icl_predictor)\\.", names(f), value = TRUE ) safetensors::safe_save_file( f[weight_keys], file.path(dir, files$weights) ) jsonlite::write_json( meta$config, file.path(dir, files$config), auto_unbox = TRUE ) dir } # Build a temporary brulee weight cache from a fixture and point the # `brulee.tabicl_cache_dir` option at it for the calling test. Mirrors the # ~/.cache/TabICL//// layout, holding only the two # task-prefixed files brulee reads. tabicl_local_cache <- function(f, meta) { root <- withr::local_tempdir(.local_envir = parent.frame()) if (meta$config$max_classes > 0) { task <- "classification" } else { task <- "regression" } if (task == "classification") { label <- "Classification" } else { label <- "Regression" } files <- brulee:::tabicl_checkpoint_files(task) dir <- file.path(root, "v0", "2020-01-01", label) dir.create(dir, recursive = TRUE) weight_keys <- grep( "^(col_embedder|row_interactor|icl_predictor)\\.", names(f), value = TRUE ) safetensors::safe_save_file(f[weight_keys], file.path(dir, files$weights)) jsonlite::write_json( meta$config, file.path(dir, files$config), auto_unbox = TRUE ) withr::local_options( list(brulee.tabicl_cache_dir = root), .local_envir = parent.frame() ) invisible(root) } # Copy a full tabicl_model from a full-model fixture (top-level module prefixes). tabicl_copy_model <- function(model, f) { tabicl_copy_col_embedding(model$col_embedder, f, prefix = "col_embedder.") tabicl_copy_row_interaction( model$row_interactor, f, prefix = "row_interactor." ) tabicl_copy_icl_learning(model$icl_predictor, f, prefix = "icl_predictor.") invisible(model) }