# Test Orchestration: AgentRegistry and Flow # --- Mock Model Helper --- # A simple mock model that behaves predictably for testing MockModel <- R6::R6Class("MockModel", inherit = LanguageModelV1, public = list( provider = "mock", model_id = "mock-model", responses = list(), initialize = function(responses = list()) { self$responses <- responses }, do_generate = function(params) { if (length(self$responses) == 0) { return(list(text = "Mock response", tool_calls = NULL)) } # Pop the first response resp <- self$responses[[1]] self$responses <- self$responses[-1] # Allow response to be a function of params if (is.function(resp)) { return(resp(params)) } return(resp) }, # Helper to add a response to the queue add_response = function(text = NULL, tool_calls = NULL) { self$responses <- c(self$responses, list(list( text = text, tool_calls = tool_calls, finish_reason = "stop", usage = list(total_tokens = 10) ))) }, # Required for ReAct loop format_tool_result = function(tool_call_id, tool_name, result) { list( role = "tool", tool_call_id = tool_call_id, name = tool_name, content = result ) } ) ) # --- Mock Agent Helper --- MockAgent <- R6::R6Class("MockAgent", inherit = Agent, public = list( mock_run_func = NULL, initialize = function(name, description, run_func = NULL) { super$initialize(name, description) self$mock_run_func <- run_func }, run = function(task, session = NULL, context = NULL, model = NULL, max_steps = 10, ...) { if (!is.null(self$mock_run_func)) { return(self$mock_run_func(task, session, context, ...)) } list(text = paste0(self$name, " Done")) } ) ) # --- Tests --- test_that("AgentRegistry manages agents correctly", { registry <- AgentRegistry$new() agent1 <- Agent$new("A1", "Desc1") agent2 <- Agent$new("A2", "Desc2") registry$register(agent1) registry$register(agent2) expect_true(registry$has("A1")) expect_true(registry$has("A2")) expect_false(registry$has("A3")) expect_equal(registry$get("A1")$name, "A1") expect_length(registry$list_agents(), 2) # Generation of prompt prompt <- registry$generate_prompt_section() expect_true(grepl("A1", prompt)) expect_true(grepl("Desc2", prompt)) }) test_that("AgentRegistry creates delegation tools", { registry <- AgentRegistry$new() agent1 <- Agent$new("Worker", "Does work") registry$register(agent1) tools <- registry$generate_delegate_tools() expect_length(tools, 1) expect_equal(tools[[1]]$name, "delegate_to_Worker") expect_s3_class(tools[[1]], "Tool") }) # --- Flow Tests: Using Registry properly --- test_that("Flow manages stack depth with registry", { mock_model <- MockModel$new() session <- ChatSession$new(model = mock_model) # Create agents manager <- MockAgent$new("Manager", "The primary agent that delegates work") worker <- MockAgent$new("Worker", "Does the actual work") # Create registry and register the worker (not the manager) registry <- AgentRegistry$new() registry$register(worker) # Create flow WITH REGISTRY flow <- Flow$new(session = session, model = mock_model, registry = registry, max_depth = 3) expect_equal(flow$depth(), 0) # Program mock model to: # 1. Return a tool call for "delegate_task" # 2. Then return a final text response mock_model$add_response( text = NULL, tool_calls = list( list( id = "call_1", name = "delegate_task", arguments = list(agent_name = "Worker", task = "Do the work", context = "") ) ) ) mock_model$add_response(text = "Manager received Worker result. Final answer.") # Hook Worker to observe depth observed_depth <- -1 worker$mock_run_func <- function(task, session, context, ...) { observed_depth <<- flow$depth() list(text = "Worker completed the task") } # Run flow result <- flow$run(manager, "Root task for manager") # Assertions expect_equal(result$text, "Manager received Worker result. Final answer.") expect_equal(observed_depth, 1) # Stack should have manager pushed when worker runs expect_equal(flow$depth(), 0) # Stack should be empty after run }) test_that("Flow enforces max depth with registry", { mock_model <- MockModel$new() session <- ChatSession$new(model = mock_model) # Create agents manager <- MockAgent$new("Manager", "Primary agent") level1 <- MockAgent$new("Level1", "First level agent") level2 <- MockAgent$new("Level2", "Second level agent") # Create registry registry <- AgentRegistry$new() registry$register(level1) registry$register(level2) # Create flow with max_depth = 1 (only one level of delegation allowed) flow <- Flow$new(session = session, model = mock_model, registry = registry, max_depth = 1) # level1 tries to delegate to level2 (should fail because depth limit) level1$mock_run_func <- function(task, session, context, ...) { # This should fail because we are already at depth 1 result <- flow$delegate(level2, "Deep task") list(text = result) } # level2 would just complete (but should never be called) level2$mock_run_func <- function(...) { list(text = "Level2 should not run") } # Program mock model to delegate to level1 mock_model$add_response( tool_calls = list(list(id = "c1", name = "delegate_task", arguments = list(agent_name = "Level1", task = "Task"))) ) mock_model$add_response(text = "Final") # Run result <- flow$run(manager, "Root") # The tool result from level1 should contain the error message # But this propagates through the ReAct loop. Let's check the session memory # or verify level2 was NOT called. # Actually, level1's run function returns the delegation error as text. # That error text is then fed back to mock_model as tool result. # mock_model then responds with "Final". expect_equal(result$text, "Final") # More direct test: call delegate directly flow2 <- Flow$new(session = session, model = mock_model, registry = registry, max_depth = 0) direct_result <- flow2$delegate(level1, "Any task") expect_true(grepl("Maximum delegation depth", direct_result)) }) test_that("Flow constructs recursive context", { mock_model <- MockModel$new() session <- ChatSession$new(model = mock_model) manager <- MockAgent$new("Manager", "Primary agent") worker <- MockAgent$new("Worker", "Does work") registry <- AgentRegistry$new() registry$register(worker) flow <- Flow$new(session = session, model = mock_model, registry = registry) # Program mock model to delegate mock_model$add_response( tool_calls = list(list(id = "1", name = "delegate_task", arguments = list(agent_name = "Worker", task = "Work on it"))) ) mock_model$add_response(text = "Final") # Capture context passed to Worker captured_context <- NULL worker$mock_run_func <- function(task, session, context, ...) { captured_context <<- context list(text = "Worker Done") } flow$run(manager, "Do everything") # Verify context was passed expect_true(!is.null(captured_context)) expect_true(grepl("Manager", captured_context)) expect_true(grepl("Do everything", captured_context)) expect_true(grepl("Work on it", captured_context)) }) test_that("Flow delegate method works standalone", { mock_model <- MockModel$new() session <- ChatSession$new(model = mock_model) worker <- MockAgent$new("Worker", "Does work") flow <- Flow$new(session = session, model = mock_model, max_depth = 5) # Direct delegate call (not through run) worker$mock_run_func <- function(task, session, context, ...) { list(text = paste0("Completed: ", task)) } result <- flow$delegate(worker, "Clean the data") expect_equal(result, "Completed: Clean the data") expect_equal(flow$depth(), 0) })