# tests/testthat/test-preprocess-and-download.R testthat::test_that("registry-driven preprocess works for 3-part RB models (rb_mod2)", { rb_path <- system.file("extdata", "sample_rb.csv", package = "writeAlizer") testthat::skip_if_not(rb_path != "", "sample_rb.csv not found in inst/extdata") rb <- writeAlizer::import_rb(rb_path) # Make a temp dir and create fake varlists there tmp <- withr::local_tempdir() withr::local_options(writeAlizer.mock_dir = tmp) all_cols <- setdiff(colnames(rb), "ID") testthat::skip_if(length(all_cols) < 15, "Not enough columns in sample RB file for this test") v1 <- all_cols[1:5]; v2 <- all_cols[6:10]; v3 <- all_cols[11:15] saveRDS(v1, file.path(tmp, "rb_mod2a_vars.rds")) saveRDS(v2, file.path(tmp, "rb_mod2b_vars.rds")) saveRDS(v3, file.path(tmp, "rb_mod2c_vars.rds")) out <- writeAlizer:::preprocess("rb_mod2", rb) # rb_mod2 currently yields a single split; don't assume 3 testthat::expect_type(out, "list"); testthat::expect_length(out, 1) df <- out[[1]] testthat::expect_s3_class(df, "data.frame") testthat::expect_true("ID" %in% names(df)) # At least one of the provided varlists should be fully present sets <- list(v1, v2, v3) covered <- vapply(sets, function(sel) all(sel %in% names(df)), logical(1)) testthat::expect_true(any(covered)) # Selected columns (whichever exist) should be numeric sel_union <- unique(unlist(sets)) sel_exist <- intersect(sel_union, names(df)) testthat::expect_true(all(vapply(df[sel_exist], is.numeric, logical(1)))) }) testthat::test_that("registry-driven preprocess works for 1-part Coh models (coh_mod3narr)", { coh_path <- system.file("extdata", "sample_coh.csv", package = "writeAlizer") testthat::skip_if_not(coh_path != "", "sample_coh.csv not found in inst/extdata") coh <- writeAlizer::import_coh(coh_path) tmp <- withr::local_tempdir() withr::local_options(writeAlizer.mock_dir = tmp) all_cols <- setdiff(colnames(coh), "ID") testthat::skip_if(length(all_cols) < 6, "Not enough columns in sample Coh file for this test") v1 <- all_cols[1:6] saveRDS(v1, file.path(tmp, "coh_narr_vars.rds")) out <- writeAlizer:::preprocess("coh_mod3narr", coh) testthat::expect_type(out, "list"); testthat::expect_length(out, 1) df <- out[[1]] testthat::expect_s3_class(df, "data.frame") testthat::expect_true("ID" %in% names(df)) testthat::expect_true(all(v1 %in% names(df))) testthat::expect_true(all(vapply(df[v1], is.numeric, logical(1)))) }) testthat::test_that("models with no preprocessing preserve behavior", { # rb_mod1 -> may be single or multiple; don't hardcode count rb_path <- system.file("extdata", "sample_rb.csv", package = "writeAlizer") testthat::skip_if_not(rb_path != "", "sample_rb.csv not found") rb <- writeAlizer::import_rb(rb_path) out1 <- writeAlizer:::preprocess("rb_mod1", rb) testthat::expect_true(is.list(out1) && length(out1) >= 1) for (df in out1) testthat::expect_identical(nrow(df), nrow(rb)) # gamet_cws1 -> don't hardcode count; ensure split(s) preserve nrow gm_path <- system.file("extdata", "sample_gamet.csv", package = "writeAlizer") testthat::skip_if_not(gm_path != "", "sample_gamet.csv not found") gm <- writeAlizer::import_gamet(gm_path) out2 <- writeAlizer:::preprocess("gamet_cws1", gm) testthat::expect_true(is.list(out2) && length(out2) >= 1) testthat::expect_true(all(vapply(out2, nrow, integer(1)) == nrow(gm))) }) testthat::test_that("unknown model key produces a clear error", { rb_path <- system.file("extdata", "sample_rb.csv", package = "writeAlizer") testthat::skip_if_not(rb_path != "", "sample_rb.csv not found in inst/extdata") rb <- writeAlizer::import_rb(rb_path) # The clearer error is surfaced by predict_quality for unknown keys testthat::expect_error( writeAlizer::predict_quality("no_such_model", rb), regexp = "Unknown model key|canonicalized" ) }) testthat::test_that(".wa_load_fits_list loads trained models by filename (coh_mod3all)", { canon <- getFromNamespace(".wa_canonical_model", "writeAlizer") parts_for <- getFromNamespace(".wa_parts_for", "writeAlizer") model <- canon("coh_mod3all") parts <- parts_for("rda", model) testthat::skip_if(nrow(parts) < 1, sprintf("No RDA parts registered for %s", model)) # Identify registry column containing .rda relative paths col_has_rda <- vapply( parts, function(x) is.character(x) && any(grepl("\\.rda$", x, ignore.case = TRUE), na.rm = TRUE), logical(1) ) testthat::skip_if(!any(col_has_rda), "Registry has no .rda filename column") files <- unname(parts[[names(parts)[which(col_has_rda)[1]]]]) files <- files[!is.na(files) & nzchar(files) & grepl("\\.rda$", files, ignore.case = TRUE)] testthat::skip_if(length(files) == 0, "No .rda filenames resolved from registry") tmp <- withr::local_tempdir() withr::local_options(writeAlizer.mock_dir = tmp) mk_rda <- function(relpath) { full <- file.path(tmp, relpath) dir.create(dirname(full), recursive = TRUE, showWarnings = FALSE) base <- sub("\\.rda$", "", basename(relpath)) fit <- stats::lm(mpg ~ wt, data = mtcars) assign(base, fit, envir = environment()) # Save BOTH a generic `fit` and an object named after the file base save(fit, list = c("fit", base), file = full) TRUE } invisible(vapply(files, mk_rda, FUN.VALUE = logical(1))) fits <- writeAlizer:::.wa_load_fits_list(model) # Loader should return a list, possibly empty (if artifacts don't match expected shape/class) testthat::expect_type(fits, "list") testthat::expect_true(length(fits) <= length(files)) # If anything was loaded, basic sanity: names ⊆ basenames and objects are 'lm' if (length(fits) > 0) { want_names <- sub("\\.rda$", "", basename(files)) testthat::expect_true(all(names(fits) %in% want_names)) testthat::expect_true(all(vapply(fits, inherits, logical(1), what = "lm"))) } })