library(aisdk) # --------------------------------------------------------------------------- # Factory & registration # --------------------------------------------------------------------------- test_that("create_r_introspect_tools returns the two diagnostic tools", { tools <- create_r_introspect_tools() expect_length(tools, 2) names <- vapply(tools, function(t) t$name, character(1)) expect_setequal(names, c("r_eval", "r_session_state")) expect_true(all(vapply(tools, function(t) inherits(t, "Tool"), logical(1)))) }) test_that("console minimal profile includes the new introspect tools", { tools <- create_console_tools(profile = "minimal") names <- vapply(tools, function(t) t$name, character(1)) expect_true("r_eval" %in% names) expect_true("r_session_state" %in% names) # original minimal tools still present expect_true("bash" %in% names) expect_true("read_file" %in% names) }) # --------------------------------------------------------------------------- # r_eval # --------------------------------------------------------------------------- skip_if_no_callr <- function() { testthat::skip_if_not_installed("callr") } test_that("r_eval captures a simple value and reports OK", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "1 + 1")) expect_match(out, "status: OK", fixed = TRUE) expect_match(out, "[value_repr_begin]", fixed = TRUE) expect_match(out, "2", fixed = TRUE) }) test_that("r_eval captures R-level errors with the error block populated", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "stop('boom')")) expect_match(out, "status: R_ERROR", fixed = TRUE) expect_match(out, "boom", fixed = TRUE) expect_match(out, "error_phase: eval", fixed = TRUE) }) test_that("r_eval captures stdout and warnings separately", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "cat('hi from stdout\\n'); warning('careful'); 42")) expect_match(out, "hi from stdout", fixed = TRUE) expect_match(out, "careful", fixed = TRUE) expect_match(out, "[warnings_begin]", fixed = TRUE) # the visible value of the final 42 should appear expect_match(out, "42", fixed = TRUE) }) test_that("r_eval accepts R calls with missing arguments", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list( code = "df <- data.frame(a = 1:3, b = 4:6); df[1, ]" )) expect_match(out, "status: OK", fixed = TRUE) expect_match(out, "a b", fixed = TRUE) }) test_that("r_eval captures subprocess stderr (critical for install-failure-style debugging)", { skip_if_no_callr() testthat::skip_on_os("windows") # /bin/sh availability tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") # Two representative paths: direct stderr write, and system() spawning a # grandchild that writes to stderr. The real-world install.packages / # processx / system() callers all use these paths. out <- tool$run(list( code = paste( "writeLines('from-direct-stderr', con = stderr());", "system('echo from-grandchild-stderr 1>&2')", sep = " " ) )) expect_match(out, "from-direct-stderr", fixed = TRUE) expect_match(out, "from-grandchild-stderr", fixed = TRUE) }) test_that("r_eval rejects empty code", { tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") expect_match(tool$run(list(code = "")), "non-empty", fixed = TRUE) }) test_that("r_eval tool schema rejects missing and empty code before execution", { tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") expect_true(isTRUE(tool$meta$validate_arguments)) missing <- aisdk:::execute_tool_calls( list(list(id = "call_1", name = "r_eval", arguments = list())), list(tool) ) empty <- aisdk:::execute_tool_calls( list(list(id = "call_2", name = "r_eval", arguments = list(code = ""))), list(tool) ) expect_true(missing[[1]]$is_validation_error) expect_match(missing[[1]]$result, "Missing required argument `code`", fixed = TRUE) expect_true(empty[[1]]$is_validation_error) expect_match(empty[[1]]$result, "at least 1 character", fixed = TRUE) }) test_that("r_eval times out on long-running code without hanging", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "Sys.sleep(10)", timeout_secs = 1L)) expect_match(out, "status: TIMEOUT", fixed = TRUE) }) test_that("r_eval caps the requested timeout at 120 seconds (issue #26)", { # Requesting a 10-minute timeout must not actually wait that long if the code # hangs. The tool reports the capped value in the result envelope so the LLM # can see what was applied. skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "1 + 1", timeout_secs = 600L)) expect_match(out, "timeout_secs: 120", fixed = TRUE) }) # --------------------------------------------------------------------------- # Issue #26 layers A/B/C/D: structured rejection, marker, credential scrub, # output truncation. # --------------------------------------------------------------------------- test_that("[A] r_eval rejects bare console_chat() at parse time (no timeout wait)", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") t0 <- Sys.time() out <- tool$run(list(code = "library(aisdk); console_chat()", timeout_secs = 30L)) elapsed <- as.numeric(difftime(Sys.time(), t0, units = "secs")) expect_lt(elapsed, 2) # must not wait the 120s timeout expect_match(out, "status: REJECTED", fixed = TRUE) expect_match(out, "rejection_kind: repl_launcher", fixed = TRUE) expect_match(out, "rejected_call: console_chat", fixed = TRUE) expect_match(out, "what_to_do_instead", fixed = TRUE) }) test_that("[A] r_eval rejects namespace-qualified aisdk::console_chat()", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "aisdk::console_chat()", timeout_secs = 30L)) expect_match(out, "status: REJECTED", fixed = TRUE) expect_match(out, "rejected_call: console_chat", fixed = TRUE) }) test_that("[A] r_eval rejects blind interactive prompts (readline, menu)", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "x <- readline('> ')")) expect_match(out, "status: REJECTED", fixed = TRUE) expect_match(out, "rejection_kind: blind_prompt", fixed = TRUE) expect_match(out, "rejected_call: readline", fixed = TRUE) out <- tool$run(list(code = "menu(c('a','b'))")) expect_match(out, "rejected_call: menu", fixed = TRUE) }) test_that("[A] readLines on a file is NOT rejected (only readLines on stdin)", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") tf <- tempfile() writeLines("hello", tf) on.exit(unlink(tf), add = TRUE) out <- tool$run(list(code = sprintf("readLines(%s)", deparse(tf)))) expect_match(out, "status: OK", fixed = TRUE) expect_match(out, "hello", fixed = TRUE) out <- tool$run(list(code = "readLines(stdin())")) expect_match(out, "status: REJECTED", fixed = TRUE) }) test_that("[B] dynamic-dispatch console_chat() still aborts via subprocess marker", { # If the agent tries do.call("console_chat", ...) or get("console_chat")() # to sneak past layer A, the in-package self-check on AISDK_INSIDE_R_EVAL # must still abort instead of entering the REPL. skip_if_no_callr() skip_if(system.file(package = "aisdk") == "", "aisdk not installed") tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") t0 <- Sys.time() out <- tool$run(list( code = "library(aisdk); do.call(\"console_chat\", list())", timeout_secs = 10L )) elapsed <- as.numeric(difftime(Sys.time(), t0, units = "secs")) expect_lt(elapsed, 5) expect_match(out, "refused to start inside an r_eval subprocess", fixed = TRUE) }) test_that("[B] subprocess sees AISDK_INSIDE_R_EVAL=1", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "Sys.getenv('AISDK_INSIDE_R_EVAL', unset = '')")) expect_match(out, "[1] \"1\"", fixed = TRUE) }) test_that("[C] API keys are scrubbed from the subprocess by default", { skip_if_no_callr() withr::with_envvar( c(OPENAI_API_KEY = "sk-real-key-1234", FAKE_TOKEN = "tk-test-9999"), { tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list( code = "list(openai = Sys.getenv('OPENAI_API_KEY', unset = ''), tok = Sys.getenv('FAKE_TOKEN', unset = ''))" )) # The secret value must not appear anywhere in the captured output. # The subprocess sees the env var as set-but-empty (we use empty-string # override + --no-environ because callr's env arg is additive), so # nzchar() in the subprocess returns FALSE -- equivalent to "unset" # for any code that checks for a credential before using it. expect_false(grepl("sk-real-key-1234", out, fixed = TRUE)) expect_false(grepl("tk-test-9999", out, fixed = TRUE)) } ) }) test_that("[C] share_credentials = TRUE forwards API keys when explicitly opted in", { skip_if_no_callr() # Use a token-like name that is unlikely to live in the user's real .Renviron # (.Renviron values would otherwise override our withr-scoped value via R's # startup sequence even with share_credentials = TRUE). withr::with_envvar( c(AISDK_TEST_FAKE_TOKEN = "tok-real-1234"), { tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") # default: scrubbed scrubbed <- tool$run(list( code = "Sys.getenv('AISDK_TEST_FAKE_TOKEN', unset = '')" )) expect_false(grepl("tok-real-1234", scrubbed, fixed = TRUE)) # opt-in: passes through shared <- tool$run(list( code = "Sys.getenv('AISDK_TEST_FAKE_TOKEN', unset = '')", share_credentials = TRUE )) expect_match(shared, "tok-real-1234", fixed = TRUE) } ) }) test_that("[C] non-sensitive env vars still pass through by default", { skip_if_no_callr() withr::with_envvar( c(MY_DEBUG_FLAG = "yes"), { tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "Sys.getenv('MY_DEBUG_FLAG', unset = '')")) expect_match(out, "\"yes\"", fixed = TRUE) } ) }) test_that("[D] huge stdout is truncated with a footer marker", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list( code = "for (i in 1:5000) cat(paste0(strrep('x', 200), '\\n'))", timeout_secs = 15L )) expect_match(out, "stdout_truncated:", fixed = TRUE) expect_match(out, "original_bytes=", fixed = TRUE) expect_match(out, "of output truncated", fixed = TRUE) # formatted output should stay well under 1MB even with the truncation footer expect_lt(nchar(out), 600 * 1024) }) test_that("[D] small output is not truncated", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") out <- tool$run(list(code = "cat('hi\\n'); 1")) expect_false(grepl("stdout_truncated:", out, fixed = TRUE)) }) test_that("[D2] truncated stdout persists the full original to disk for grep", { # When output is too large for the inline envelope, the full original is # written to a temp file so the agent can grep / read_file it instead of # losing the middle. We exercise both the result envelope (file path # reachable) and the rendered output (path mentioned with a grep hint). skip_if_no_callr() captured <- aisdk:::r_eval_subprocess( code = paste( "for (i in 1:5000)", "cat(paste0(strrep('x', 200), ' line', i, '\\n'))", sep = " " ), timeout_secs = 15L ) path <- captured$stdout_log_path expect_true(!is.null(path) && file.exists(path)) # full original is on disk -- ~1MB, with all 5000 lines expect_gt(file.info(path)$size, 900 * 1024) expect_equal(length(readLines(path, warn = FALSE)), 5000L) # rendered envelope mentions the path AND tells the agent to grep instead of # re-running r_eval rendered <- aisdk:::format_r_eval_result(captured, "x", 15L) expect_match(rendered, "Full original output is saved at:", fixed = TRUE) expect_match(rendered, path, fixed = TRUE) expect_match(rendered, "grep -n", fixed = TRUE) expect_match(rendered, "Do NOT retry r_eval", fixed = TRUE) unlink(path) }) test_that("[D2] non-truncated runs do NOT leave a log file behind", { skip_if_no_callr() captured <- aisdk:::r_eval_subprocess("cat('hi\\n'); 1", timeout_secs = 5L) expect_null(captured$stdout_log_path) expect_null(captured$stderr_log_path) }) test_that("r_eval kills the whole process tree on timeout (no orphaned grandchildren)", { skip_if_no_callr() testthat::skip_on_os("windows") # uses /bin/sh + pgrep skip_if(Sys.which("pgrep") == "", "pgrep not available") # Process-tree teardown semantics belong to processx/callr (kill_tree) # and to the kernel's reparent-to-init behavior, not to our code. On # GitHub Actions Linux runners, sh+sleep grandchildren sporadically get # adopted by PID 1 before kill_tree can reach them, even with multi-second # polling. Skip in CI so we are not asserting on an upstream guarantee # we do not own. The TIMEOUT status path -- which IS our responsibility # and the actual user-visible behavior of issue #26 -- is covered by the # "r_eval times out on long-running code without hanging" test above. testthat::skip_on_ci() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") # Use a unique marker that does not appear in any common command string. marker <- paste0("aisdk_orphan_marker_", as.integer(Sys.time()), "_", sample.int(1e6, 1)) cmd <- sprintf("sleep 30 # %s", marker) out <- tool$run(list( code = sprintf("system(\"%s\")", cmd), timeout_secs = 1L )) expect_match(out, "status: TIMEOUT", fixed = TRUE) # Reaping the orphan tree is asynchronous: poll up to ~8s on developer # machines (macOS reaps in milliseconds; a loaded host can take seconds). no_match <- FALSE for (i in seq_len(80)) { Sys.sleep(0.1) pgrep_out <- suppressWarnings(system2( "pgrep", c("-f", marker), stdout = TRUE, stderr = FALSE )) if (length(pgrep_out) == 0) { no_match <- TRUE break } } expect_true(no_match, info = "process tree was not fully reaped within 8s") }) test_that("r_eval does not mutate the parent session", { skip_if_no_callr() tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_eval") marker_name <- paste0(".aisdk_should_not_appear_", as.integer(Sys.time())) code <- sprintf("assign('%s', TRUE, envir = globalenv()); TRUE", marker_name) tool$run(list(code = code)) expect_false(exists(marker_name, envir = globalenv(), inherits = FALSE)) }) # --------------------------------------------------------------------------- # r_session_state # --------------------------------------------------------------------------- test_that("r_session_state returns the expected sections", { tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_session_state") out <- tool$run(list()) expect_match(out, "[r_session_state_begin]", fixed = TRUE) expect_match(out, "[platform]", fixed = TRUE) expect_match(out, "[libpaths]", fixed = TRUE) expect_match(out, "[repos]", fixed = TRUE) expect_match(out, "[envvars]", fixed = TRUE) expect_match(out, "[search_path]", fixed = TRUE) expect_match(out, "r_version", fixed = TRUE) }) test_that("r_session_state masks token-like env vars", { withr::with_envvar(c(GITHUB_PAT = "ghp_super_secret_value_1234"), { state <- aisdk:::collect_r_session_state(include = "envvars") expect_false(grepl("super_secret", state$envvars$GITHUB_PAT %||% "", fixed = TRUE)) expect_match(state$envvars$GITHUB_PAT %||% "", "\\*\\*\\*") }) }) test_that("r_session_state include filter works", { tool <- aisdk:::find_tool(create_r_introspect_tools(), "r_session_state") out <- tool$run(list(include = c("platform", "libpaths"))) expect_match(out, "[platform]", fixed = TRUE) expect_match(out, "[libpaths]", fixed = TRUE) expect_false(grepl("[envvars]", out, fixed = TRUE)) expect_false(grepl("[repos]", out, fixed = TRUE)) }) # --------------------------------------------------------------------------- # r-debug skill discovery # --------------------------------------------------------------------------- test_that("r-debug skill ships with the package and exposes references", { skill_dir <- system.file("skills", "r-debug", package = "aisdk") skip_if(skill_dir == "", "r-debug skill not installed yet (run devtools::install)") expect_true(file.exists(file.path(skill_dir, "SKILL.md"))) refs <- list.files(file.path(skill_dir, "references"), pattern = "\\.md$") expect_true(length(refs) >= 5) expect_true("install-failures.md" %in% refs) }) test_that("auto skill registry picks up r-debug by name", { skill_dir <- system.file("skills", "r-debug", package = "aisdk") skip_if(skill_dir == "", "r-debug skill not installed yet (run devtools::install)") registry <- aisdk:::create_auto_skill_registry() expect_true(registry$has_skill("r-debug")) })