test_that("no leakage: target never uses nuisance trained on its evaluation fold (trace-based)", {

  # We make folds explicit in the data, and we provide a deterministic fold_split().
  K <- 5L
  n_per_fold <- 3L
  dat <- data.frame(
    fold = rep(seq_len(K), each = n_per_fold),
    x = 0, y = 0
  )

  fold_split_det <- function(data, K) as.integer(data$fold)

  # Helper: extract the list of training folds from the model trace string
  extract_train_folds <- function(model_trace) {
    # model_trace looks like: "nuis_y trained_on [1,2,3]"
    inside <- sub(".*trained_on \\[([^\\]]+)\\].*", "\\1", model_trace)
    as.integer(strsplit(inside, ",", fixed = TRUE)[[1]])
  }

  # For counting target evaluations (one per panel)
  log_env <- new.env(parent = emptyenv())
  log_env$n_calls <- 0L
  log_env$msgs <- character(0)

  # Nuisance: fit() returns a string describing training folds.
  # predict() returns strings describing which model predicted on which eval fold.
  nuis_y <- create_nuisance(
    train_fold = 1L, # simplest: train on K-1 folds in standard cross-fitting
    fit = function(data, ...) {
      tr <- paste(sort(unique(data$fold)), collapse = ",")
      paste0("nuis_y trained_on [", tr, "]")
    },
    predict = function(model, data, ...) {
      ev <- paste(sort(unique(data$fold)), collapse = ",")
      rep(paste0(model, " predict_on [", ev, "]"), nrow(data))
    }
  )

  # Aggregators that work for character outputs (do not call mean())
  pick_first <- function(xs) xs[[1]]

  # Target: returns a trace string; ALSO asserts no leakage.
  target_trace <- function(data, nuis_y, ...) {
    log_env$n_calls <- log_env$n_calls + 1L

    eval_fold <- unique(data$fold)
    # With eval_fold = 1, target should see exactly one fold
    if (length(eval_fold) != 1L) {
      stop("Test expects eval window to be one fold; got: ", paste(eval_fold, collapse = ","))
    }

    # nuis_y is a vector of strings; take one representative and parse training folds
    one_pred <- nuis_y[[1]]
    # one_pred starts with the model string that contains trained_on [...]
    # e.g. "nuis_y trained_on [1,2,3,4] predict_on [5]"
    model_part <- sub("(nuis_y trained_on \\[[^\\]]+\\]).*", "\\1", one_pred)
    train_folds <- extract_train_folds(model_part)

    if (eval_fold %in% train_folds) {
      stop("Leakage detected: eval fold ", eval_fold,
           " appears in train folds [", paste(train_folds, collapse = ","), "]")
    }

    msg <- paste0(
      "target evaluated on fold [", eval_fold, "]",
      " with nuisance prediction: ", one_pred
    )
    log_env$msgs <- c(log_env$msgs, msg)
    msg
  }

  # Run the test for each allocation scheme (all must respect no leakage)
  for (alloc in c("independence", "overlap", "disjoint")) {

    log_env$n_calls <- 0L
    log_env$msgs <- character(0)

    method <- create_method(
      target = target_trace,
      list_nuisance = list(nuis_y = nuis_y),
      folds = K,
      repeats = 1L,
      eval_fold = 1L,
      mode = "estimate",
      fold_allocation = alloc,
      aggregate_panels  = pick_first,
      aggregate_repeats = pick_first
    )

    res <- crossfit_multi(
      data = dat,
      methods = list(trace = method),
      fold_split = fold_split_det,
      seed = 123,
      aggregate_panels  = pick_first,
      aggregate_repeats = pick_first
    )

    # If leakage happened, the target would have stopped with an error.
    expect_true(is.list(res))
    expect_true(is.character(res$estimates$trace))

    # target called once per panel
    expect_equal(log_env$n_calls, K)

    # Basic sanity: each msg should mention "target evaluated on fold"
    expect_true(all(grepl("target evaluated on fold", log_env$msgs, fixed = TRUE)))
  }
})