# Cross-backend validation harness — loaded by testthat and sourced by CI scripts. # Depends on tidyILD being loaded (library / load_all). See inst/dev/BACKEND_VALIDATION_BENCHMARK_CONTRACT.md. #' @keywords internal harness_tier_rank <- function(tier) { tier <- match.arg(tier, c("smoke", "nightly", "full")) switch(tier, smoke = 1L, nightly = 2L, full = 3L) } #' @keywords internal harness_scenario_in_tier <- function(scenario_tier_min, requested_tier) { harness_tier_rank(requested_tier) >= harness_tier_rank(scenario_tier_min) } #' Scenario manifest (v1) — columns used by runner #' @keywords internal harness_scenario_manifest <- function() { tibble::tibble( scenario_id = c("mlm_power_ri", "kfas_local_level", "ctsem_univariate", "mlm_lag_x"), scenario_type = c("mlm_power", "kfas", "ctsem", "mlm_lag"), tier_min = c("smoke", "smoke", "nightly", "smoke"), mlm_n_id = c(18L, NA_integer_, NA_integer_, NA_integer_), mlm_n_obs_per = c(10L, NA_integer_, NA_integer_, NA_integer_), mlm_effect_size = c(0.35, NA_real_, NA_real_, NA_real_), mlm_test_term = c("x", NA_character_, NA_character_, NA_character_), kfas_n_id = c(NA_integer_, 1L, NA_integer_, NA_integer_), kfas_n_obs_per = c(NA_integer_, 36L, NA_integer_, NA_integer_), ctsem_n_obs_per = c(NA_integer_, NA_integer_, 42L, NA_integer_), mlm_lag_n_id = c(NA_integer_, NA_integer_, NA_integer_, 20L), mlm_lag_n_obs_per = c(NA_integer_, NA_integer_, NA_integer_, 10L), mlm_lag_beta = c(NA_real_, NA_real_, NA_real_, 0.4), stringsAsFactors = FALSE ) } #' @keywords internal harness_default_backends_for_tier <- function(tier) { tier <- match.arg(tier, c("smoke", "nightly", "full")) if (tier == "smoke") { return(c("lme4", "kfas")) } if (tier == "nightly") { return(c("lme4", "nlme", "brms", "kfas", "ctsem")) } c("lme4", "nlme", "brms", "kfas", "ctsem") } #' @keywords internal harness_backend_installed <- function(backend) { backend <- match.arg(backend, c("lme4", "nlme", "brms", "kfas", "ctsem"), several.ok = TRUE) vapply(backend, function(b) { switch(b, lme4 = requireNamespace("lme4", quietly = TRUE), nlme = requireNamespace("nlme", quietly = TRUE), brms = requireNamespace("brms", quietly = TRUE) && (requireNamespace("rstan", quietly = TRUE) || requireNamespace("cmdstanr", quietly = TRUE)), kfas = requireNamespace("KFAS", quietly = TRUE), ctsem = requireNamespace("ctsem", quietly = TRUE) && requireNamespace("rstan", quietly = TRUE), FALSE ) }, logical(1L)) } #' @keywords internal harness_expand_runs <- function(manifest, tier, backends_requested) { rows <- list() for (i in seq_len(nrow(manifest))) { sid <- manifest$scenario_id[i] stype <- manifest$scenario_type[i] tmin <- manifest$tier_min[i] if (!harness_scenario_in_tier(tmin, tier)) { next } bes <- switch(stype, mlm_power = intersect(backends_requested, c("lme4", "nlme", "brms")), mlm_lag = intersect(backends_requested, "lme4"), kfas = if ("kfas" %in% backends_requested) "kfas" else character(), ctsem = if ("ctsem" %in% backends_requested) "ctsem" else character(), character() ) for (b in bes) { rows[[length(rows) + 1L]] <- tibble::tibble( scenario_id = sid, scenario_type = stype, backend = b ) } } if (length(rows) == 0L) { return(tibble::tibble( scenario_id = character(), scenario_type = character(), backend = character() )) } dplyr::bind_rows(rows) } #' @keywords internal harness_generate_mlm_power_data <- function(seed, n_id, n_obs_per, effect_size, test_term = "x") { set.seed(as.integer(seed)) d <- ild_simulate( n_id = n_id, n_obs_per = n_obs_per, seed = as.integer(seed) ) tt <- as.character(test_term)[1L] d[[tt]] <- stats::rnorm(nrow(d)) d$y <- d$y + as.numeric(effect_size) * d[[tt]] ild_prepare(d, id = "id", time = "time") } #' @keywords internal harness_generate_kfas_data <- function(seed, n_id, n_obs_per) { d <- ild_simulate( n_id = as.integer(n_id), n_obs_per = as.integer(n_obs_per), seed = as.integer(seed) ) ild_prepare(d, id = "id", time = "time") } #' @keywords internal harness_generate_mlm_lag_data <- function(seed, n_id, n_obs_per, beta_lag) { set.seed(as.integer(seed)) d <- ild_simulate( n_id = as.integer(n_id), n_obs_per = as.integer(n_obs_per), seed = as.integer(seed) ) d$x <- stats::rnorm(nrow(d)) prep <- ild_prepare(d, id = "id", time = "time") prep2 <- ild_lag(prep, x, n = 1L, mode = "gap_aware") b <- as.numeric(beta_lag)[1L] prep2$y <- b * prep2$x_lag1 + stats::rnorm(nrow(prep2), sd = 0.75) prep2 } #' @keywords internal harness_generate_ctsem_data <- function(seed, n_obs_per) { d <- ild_simulate( n_id = 1L, n_obs_per = as.integer(n_obs_per), irregular = TRUE, seed = as.integer(seed) ) x <- ild_prepare(d, id = "id", time = "time") ild_center(x, y) } #' @keywords internal harness_resolve_tidy_row <- function(td, target_term) { if (is.null(td) || nrow(td) == 0L) { return(NULL) } if (target_term %in% td$term) { return(td[td$term == target_term, , drop = FALSE][1L, ]) } pat <- paste0("^", gsub("([][{}()+*^$|\\\\?.])", "\\\\\\1", target_term), "$") hit <- td$term[grepl(pat, td$term)] if (length(hit) >= 1L) { return(td[td$term == hit[1L], , drop = FALSE][1L, ]) } NULL } #' @keywords internal harness_wald_interval <- function(est, se, level = 0.95) { if (!is.finite(est) || !is.finite(se) || se <= 0) { return(c(NA_real_, NA_real_)) } z <- stats::qnorm(1 - (1 - level) / 2) c(est - z * se, est + z * se) } #' @keywords internal harness_converged_heuristic <- function(fit, backend) { if (is.null(fit)) { return(FALSE) } if (backend == "lme4") { if (inherits(fit, "lmerMod")) { conv <- fit@optinfo$conv$lme4 if (!is.null(conv$code) && is.numeric(conv$code)) { return(as.integer(conv$code) == 0L) } } return(TRUE) } if (backend == "nlme") { return(TRUE) } if (backend == "brms") { if (!requireNamespace("brms", quietly = TRUE)) { return(FALSE) } rh <- tryCatch(max(brms::rhat(fit), na.rm = TRUE), error = function(e) NA_real_) return(is.finite(rh) && rh < 1.08) } if (backend == "kfas") { return(inherits(fit, "ild_fit_kfas")) } if (backend == "ctsem") { return(inherits(fit, "ild_fit_ctsem")) } TRUE } #' @keywords internal harness_singular_heuristic <- function(fit, backend) { if (backend != "lme4" || !inherits(fit, "lmerMod")) { return(NA) } if (!requireNamespace("lme4", quietly = TRUE)) { return(NA) } isTRUE(lme4::isSingular(fit)) } #' Collect guardrail summary from diagnostics bundle #' @keywords internal collect_diagnostics_signals <- function(bundle) { if (is.null(bundle) || !inherits(bundle, "ild_diagnostics_bundle")) { return(list( n_guardrails = 0L, guardrail_rule_ids = "" )) } gr <- bundle$guardrails if (is.null(gr) || !tibble::is_tibble(gr) || nrow(gr) == 0L) { return(list(n_guardrails = 0L, guardrail_rule_ids = "")) } ids <- gr$rule_id ids <- ids[!is.na(ids) & nzchar(as.character(ids))] list( n_guardrails = nrow(gr), guardrail_rule_ids = paste(unique(as.character(ids)), collapse = ";") ) } #' Fit one case; returns list(fit, error_message, elapsed_sec) #' @keywords internal harness_fit_one <- function(backend, scenario_id, scenario_type, data, manifest_row, brms_iter = 200L, brms_chains = 1L, brms_warmup = 100L, ctsem_iter = 150L, ctsem_chains = 1L) { fit <- NULL err <- NA_character_ t0 <- proc.time()[[3L]] if (backend == "lme4" && scenario_type == "mlm_power") { tt <- manifest_row$mlm_test_term ff <- stats::as.formula(paste("y ~", tt, "+ (1 | id)")) fit <- tryCatch( suppressWarnings(ild_lme( ff, data = data, ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE )), error = function(e) { err <<- conditionMessage(e) NULL } ) } else if (backend == "lme4" && scenario_type == "mlm_lag") { fit <- tryCatch( suppressWarnings(ild_lme( y ~ x_lag1 + (1 | id), data = data, ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE )), error = function(e) { err <<- conditionMessage(e) NULL } ) } else if (backend == "nlme" && scenario_type == "mlm_power") { tt <- manifest_row$mlm_test_term ff <- stats::as.formula(paste("y ~", tt)) fit <- tryCatch( suppressWarnings(ild_lme( ff, data = data, ar1 = TRUE, warn_no_ar1 = FALSE, warn_uncentered = FALSE )), error = function(e) { err <<- conditionMessage(e) NULL } ) } else if (backend == "brms" && scenario_type == "mlm_power") { tt <- manifest_row$mlm_test_term ff <- stats::as.formula(paste("y ~", tt, "+ (1 | id)")) fit <- tryCatch( suppressWarnings(ild_brms( ff, data = data, warn_uncentered = FALSE, iter = brms_iter, chains = brms_chains, warmup = brms_warmup, refresh = 0, seed = 1L )), error = function(e) { err <<- conditionMessage(e) NULL } ) } else if (backend == "kfas" && scenario_type == "kfas") { fit <- tryCatch( suppressWarnings(ild_kfas( data = data, outcome = "y", time_units = "hours", irregular_time = TRUE )), error = function(e) { err <<- conditionMessage(e) NULL } ) } else if (backend == "ctsem" && scenario_type == "ctsem") { fit <- tryCatch( suppressWarnings(ild_ctsem( data = data, outcome = "y", model_type = "stanct", iter = ctsem_iter, chains = ctsem_chains )), error = function(e) { err <<- conditionMessage(e) NULL } ) } elapsed <- proc.time()[[3L]] - t0 list(fit = fit, error_message = err, elapsed_sec = as.numeric(elapsed)) } #' One raw benchmark row #' @keywords internal harness_extract_metrics_row <- function(fit, backend, scenario_type, truth, test_term, nominal_level = 0.95, elapsed_sec = NA_real_, fit_error = NA_character_) { skipped_reason <- "" fe_ok <- length(fit_error) > 0L && !is.na(fit_error[1L]) && nzchar(as.character(fit_error[1L])) if (fe_ok) { return(tibble::tibble( truth = truth, estimate = NA_real_, bias = NA_real_, squared_error = NA_real_, std_error = NA_real_, ci_low = NA_real_, ci_high = NA_real_, covered = NA, nominal_level = nominal_level, elapsed_sec = elapsed_sec, converged = FALSE, singular = NA, fit_error = as.character(fit_error[1L]), n_guardrails = 0L, guardrail_rule_ids = "", skipped_reason = "" )) } if (is.null(fit)) { return(tibble::tibble( truth = truth, estimate = NA_real_, bias = NA_real_, squared_error = NA_real_, std_error = NA_real_, ci_low = NA_real_, ci_high = NA_real_, covered = NA, nominal_level = nominal_level, elapsed_sec = elapsed_sec, converged = FALSE, singular = harness_singular_heuristic(fit, backend), fit_error = if (is.na(fit_error)) "null fit" else fit_error, n_guardrails = 0L, guardrail_rule_ids = "", skipped_reason = "" )) } data <- attr(fit, "ild_data", exact = TRUE) bundle <- tryCatch( suppressWarnings(ild_diagnose(fit, data = data, type = "qq")), error = function(e) NULL ) sig <- collect_diagnostics_signals(bundle) td <- tryCatch(ild_tidy(fit), error = function(e) NULL) row <- NULL if (scenario_type %in% c("mlm_power", "mlm_lag")) { row <- harness_resolve_tidy_row(td, test_term) } else if (scenario_type == "kfas") { row <- harness_resolve_tidy_row(td, test_term %||% "observation_variance") } else if (scenario_type == "ctsem") { row <- if (!is.null(td) && nrow(td) > 0L) td[1L, , drop = FALSE] else NULL } est <- NA_real_ se <- NA_real_ lo <- NA_real_ hi <- NA_real_ if (!is.null(row)) { est <- as.numeric(row$estimate)[1L] se <- as.numeric(row$std_error)[1L] lo <- as.numeric(row$conf_low)[1L] hi <- as.numeric(row$conf_high)[1L] } if (scenario_type %in% c("mlm_power", "mlm_lag") && is.finite(se) && (!is.finite(lo) || !is.finite(hi))) { w <- harness_wald_interval(est, se, nominal_level) lo <- w[1L] hi <- w[2L] } conv <- harness_converged_heuristic(fit, backend) sing <- harness_singular_heuristic(fit, backend) covered <- NA bias <- NA_real_ sqe <- NA_real_ if (is.finite(truth) && is.finite(est)) { bias <- est - truth sqe <- bias^2 if (is.finite(lo) && is.finite(hi)) { covered <- truth >= lo & truth <= hi } } tibble::tibble( truth = truth, estimate = est, bias = bias, squared_error = sqe, std_error = se, ci_low = lo, ci_high = hi, covered = covered, nominal_level = nominal_level, elapsed_sec = elapsed_sec, converged = conv, singular = sing, fit_error = NA_character_, n_guardrails = sig$n_guardrails, guardrail_rule_ids = sig$guardrail_rule_ids, skipped_reason = skipped_reason ) } #' @keywords internal `%||%` <- function(x, y) { if (is.null(x)) y else x } #' Summarize raw benchmark tibble #' @keywords internal summarize_validation_metrics <- function(raw_df) { if (nrow(raw_df) == 0L) { return(raw_df) } raw_df <- tibble::as_tibble(raw_df) raw_df$has_skip <- !is.na(raw_df$skipped_reason) & nzchar(as.character(raw_df$skipped_reason)) raw_df$install_skipped <- raw_df$skipped_reason == "backend_not_installed" dplyr::group_by(raw_df, .data$tier, .data$scenario_id, .data$backend) %>% dplyr::summarise( n_replicates = dplyr::n(), n_skipped = sum(.data$has_skip, na.rm = TRUE), n_install_skipped = sum(.data$install_skipped, na.rm = TRUE), n_ran = sum(!.data$install_skipped, na.rm = TRUE), n_converged = sum(.data$converged %in% TRUE & !.data$install_skipped, na.rm = TRUE), convergence_rate = ifelse( sum(!.data$install_skipped) > 0L, mean(.data$converged[!.data$install_skipped] %in% TRUE, na.rm = TRUE), NA_real_ ), mean_bias = mean(.data$bias[!.data$install_skipped], na.rm = TRUE), rmse = sqrt(mean(.data$squared_error[!.data$install_skipped], na.rm = TRUE)), mean_coverage = ifelse( sum(!is.na(.data$covered[!.data$install_skipped])) > 0L, mean(.data$covered[!.data$install_skipped] %in% TRUE, na.rm = TRUE), NA_real_ ), n_coverage_defined = sum(!is.na(.data$covered[!.data$install_skipped])), calibration_gap = ifelse( sum(!is.na(.data$covered[!.data$install_skipped])) > 0L, dplyr::first(.data$nominal_level) - mean(.data$covered[!.data$install_skipped] %in% TRUE, na.rm = TRUE), NA_real_ ), mean_elapsed_sec = mean(.data$elapsed_sec[!.data$install_skipped], na.rm = TRUE), mean_n_guardrails = mean(.data$n_guardrails[!.data$install_skipped], na.rm = TRUE), .groups = "drop" ) } #' Parse --key value / --key=value CLI args #' @keywords internal harness_parse_cli_args <- function(args = NULL) { if (is.null(args)) { args <- commandArgs(trailingOnly = TRUE) } out <- list( tier = "smoke", backends = NULL, n_sim = 3L, seed = 20260406L, out_dir = file.path(getwd(), "benchmark_artifacts"), run_id = format(Sys.time(), "bench-%Y%m%d-%H%M%S"), git_sha = NA_character_, brms_iter = 200L, brms_chains = 1L, ctsem_iter = 150L, ctsem_chains = 1L ) if (length(args) == 0L) { return(out) } i <- 1L while (i <= length(args)) { a <- args[i] if (grepl("^--tier=", a)) { out$tier <- sub("^--tier=", "", a) } else if (a == "--tier" && i < length(args)) { out$tier <- args[i + 1L] i <- i + 1L } else if (grepl("^--backends=", a)) { out$backends <- strsplit(sub("^--backends=", "", a), ",", fixed = TRUE)[[1L]] } else if (a == "--backends" && i < length(args)) { out$backends <- strsplit(args[i + 1L], ",", fixed = TRUE)[[1L]] i <- i + 1L } else if (grepl("^--n-sim=", a)) { out$n_sim <- as.integer(sub("^--n-sim=", "", a)) } else if (a == "--n-sim" && i < length(args)) { out$n_sim <- as.integer(args[i + 1L]) i <- i + 1L } else if (grepl("^--seed=", a)) { out$seed <- as.integer(sub("^--seed=", "", a)) } else if (a == "--seed" && i < length(args)) { out$seed <- as.integer(args[i + 1L]) i <- i + 1L } else if (grepl("^--out-dir=", a)) { out$out_dir <- sub("^--out-dir=", "", a) } else if (a == "--out-dir" && i < length(args)) { out$out_dir <- args[i + 1L] i <- i + 1L } else if (grepl("^--run-id=", a)) { out$run_id <- sub("^--run-id=", "", a) } else if (a == "--run-id" && i < length(args)) { out$run_id <- args[i + 1L] i <- i + 1L } else if (grepl("^--git-sha=", a)) { out$git_sha <- sub("^--git-sha=", "", a) } else if (a == "--git-sha" && i < length(args)) { out$git_sha <- args[i + 1L] i <- i + 1L } i <- i + 1L } if (is.null(out$backends) || length(out$backends) == 0L) { out$backends <- harness_default_backends_for_tier(out$tier) } out } #' Run full benchmark matrix; returns list(raw, summary, metadata, skipped_backends) #' @keywords internal harness_run_benchmark <- function(tier = c("smoke", "nightly", "full"), backends = NULL, n_sim = 3L, seed = 20260406L, run_id = NULL, git_sha = NA_character_, brms_iter = 200L, brms_chains = 1L, ctsem_iter = 150L, ctsem_chains = 1L) { tier <- match.arg(tier) if (is.null(backends)) { backends <- harness_default_backends_for_tier(tier) } if (is.null(run_id)) { run_id <- format(Sys.time(), "bench-%Y%m%d-%H%M%S") } manifest <- harness_scenario_manifest() grid <- harness_expand_runs(manifest, tier, backends) skipped_backends <- backends[!harness_backend_installed(backends)] rows <- list() if (nrow(grid) == 0L) { raw <- tibble::tibble() } else { rep_idx <- 0L for (r in seq_len(nrow(grid))) { sid <- grid$scenario_id[r] st <- grid$scenario_type[r] be <- grid$backend[r] mrow <- manifest[manifest$scenario_id == sid, , drop = FALSE][1L, ] if (!harness_backend_installed(be)) { for (k in seq_len(n_sim)) { rep_idx <- rep_idx + 1L rows[[length(rows) + 1L]] <- tibble::tibble( run_id = run_id, git_sha = git_sha %||% NA_character_, tier = tier, scenario_id = sid, backend = be, replicate_id = k, truth = NA_real_, estimate = NA_real_, bias = NA_real_, squared_error = NA_real_, std_error = NA_real_, ci_low = NA_real_, ci_high = NA_real_, covered = NA, nominal_level = 0.95, elapsed_sec = NA_real_, converged = FALSE, singular = NA, fit_error = NA_character_, n_guardrails = 0L, guardrail_rule_ids = "", skipped_reason = "backend_not_installed" ) } next } for (k in seq_len(n_sim)) { rep_idx <- rep_idx + 1L rep_seed <- as.integer(seed) + rep_idx data <- NULL truth <- NA_real_ tt <- "x" if (st == "mlm_power") { data <- harness_generate_mlm_power_data( rep_seed, mrow$mlm_n_id, mrow$mlm_n_obs_per, mrow$mlm_effect_size, mrow$mlm_test_term ) truth <- as.numeric(mrow$mlm_effect_size) tt <- as.character(mrow$mlm_test_term) } else if (st == "kfas") { data <- harness_generate_kfas_data( rep_seed, mrow$kfas_n_id, mrow$kfas_n_obs_per ) truth <- NA_real_ tt <- "observation_variance" } else if (st == "ctsem") { data <- harness_generate_ctsem_data(rep_seed, mrow$ctsem_n_obs_per) truth <- NA_real_ tt <- NA_character_ } else if (st == "mlm_lag") { data <- harness_generate_mlm_lag_data( rep_seed, mrow$mlm_lag_n_id, mrow$mlm_lag_n_obs_per, mrow$mlm_lag_beta ) truth <- as.numeric(mrow$mlm_lag_beta) tt <- "x_lag1" } fo <- harness_fit_one( be, sid, st, data, mrow, brms_iter = brms_iter, brms_chains = brms_chains, brms_warmup = max(50L, brms_iter %/% 2L), ctsem_iter = ctsem_iter, ctsem_chains = ctsem_chains ) err <- fo$error_message if (!is.na(err) && nzchar(err)) { mr <- harness_extract_metrics_row( NULL, be, st, truth, tt, elapsed_sec = fo$elapsed_sec, fit_error = err ) } else { mr <- harness_extract_metrics_row( fo$fit, be, st, truth, tt, elapsed_sec = fo$elapsed_sec ) } rows[[length(rows) + 1L]] <- tibble::tibble( run_id = run_id, git_sha = git_sha %||% NA_character_, tier = tier, scenario_id = sid, backend = be, replicate_id = k ) %>% dplyr::bind_cols(mr) } } raw <- dplyr::bind_rows(rows) } summ <- if (nrow(raw) > 0L) summarize_validation_metrics(raw) else tibble::tibble() meta <- list( package_version = as.character(utils::packageVersion("tidyILD")), r_version = paste(R.version$major, R.version$minor, sep = "."), platform = R.version$platform, git_sha = git_sha, seed_base = seed, tier = tier, backends_requested = backends, backends_skipped_install = skipped_backends, scenario_manifest_version = 2L, timestamp_utc = format(Sys.time(), tz = "UTC", usetz = TRUE) ) list(raw = raw, summary = summ, metadata = meta, skipped_backends = skipped_backends) } #' Read thresholds JSON (requires jsonlite) #' @keywords internal harness_read_thresholds_json <- function(path) { if (!requireNamespace("jsonlite", quietly = TRUE)) { stop("jsonlite required to read thresholds (Suggests).", call. = FALSE) } jsonlite::read_json(path, simplifyVector = FALSE) } #' Evaluate summary against thresholds; returns tibble of check rows #' @keywords internal harness_evaluate_thresholds <- function(summary_df, thresholds_list, warn_only_backends = c("brms", "ctsem")) { if (nrow(summary_df) == 0L) { return(tibble::tibble( scenario_id = character(), backend = character(), check = character(), status = character(), detail = character() )) } defs <- thresholds_list$defaults %||% list() by_be <- thresholds_list$by_backend %||% list() by_sc <- thresholds_list$by_scenario %||% list() min_cov <- as.numeric(defs$min_coverage %||% 0.65) max_fail <- as.numeric(defs$max_convergence_failure_rate %||% 0.5) min_conv_n <- as.integer(defs$min_n_converged %||% 1L) out <- list() for (i in seq_len(nrow(summary_df))) { sid <- summary_df$scenario_id[i] be <- summary_df$backend[i] sc_row <- summary_df[i, , drop = FALSE] be_cfg <- by_be[[be]] %||% list() sc_cfg <- by_sc[[sid]] %||% list() warn_only <- isTRUE(be_cfg$warn_only) || isTRUE(sc_cfg$warn_only) || be %in% warn_only_backends min_cov_i <- as.numeric(sc_cfg$min_coverage %||% be_cfg$min_coverage %||% min_cov) max_fail_i <- as.numeric( sc_cfg$max_convergence_failure_rate %||% be_cfg$max_convergence_failure_rate %||% max_fail ) min_conv_i <- as.integer(sc_cfg$min_n_converged %||% be_cfg$min_n_converged %||% min_conv_n) n_ran <- sc_row$n_ran if (!is.finite(n_ran) || n_ran <= 0L) { out[[length(out) + 1L]] <- tibble::tibble( scenario_id = sid, backend = be, check = "convergence", status = "pass", detail = "no runs (backend not installed or filtered)" ) next } fail_rate <- 1 - as.numeric(sc_row$convergence_rate) if (!is.finite(fail_rate)) { fail_rate <- 1 } st_conv <- if (sc_row$n_converged < min_conv_i) { "fail" } else if (fail_rate > max_fail_i) { "fail" } else { "pass" } detail_conv <- sprintf( "n_converged=%g, rate=%.3f, max_fail=%.3f", sc_row$n_converged, sc_row$convergence_rate, max_fail_i ) out[[length(out) + 1L]] <- tibble::tibble( scenario_id = sid, backend = be, check = "convergence", status = if (warn_only && st_conv == "fail") "warn" else st_conv, detail = detail_conv ) n_cov <- sc_row$n_coverage_defined if (is.finite(n_cov) && n_cov > 0L && is.finite(sc_row$mean_coverage)) { st_cov <- if (sc_row$mean_coverage < min_cov_i) "fail" else "pass" out[[length(out) + 1L]] <- tibble::tibble( scenario_id = sid, backend = be, check = "coverage", status = if (warn_only && st_cov == "fail") "warn" else st_cov, detail = sprintf("mean_cov=%.3f min=%.3f", sc_row$mean_coverage, min_cov_i) ) } } dplyr::bind_rows(out) } #' Exit status helper: 0 pass, 1 fail #' @keywords internal harness_threshold_exit_status <- function(checks_df) { if (nrow(checks_df) == 0L) { return(0L) } if (any(checks_df$status == "fail")) { return(1L) } 0L }