test_that("run() executes scripts in topological order", {
  temp_dir <- tempdir()
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")

  writeLines("data,value\nA,1\nB,2", "input.csv")

  script1_content <- '
library(bakepipe)
data <- read.csv(file_in("input.csv"))
data$processed <- data$value * 2
write.csv(data, file_out("intermediate.csv"), row.names = FALSE)
'
  writeLines(script1_content, "01_process.R")

  script2_content <- '
library(bakepipe)
data <- read.csv(file_in("intermediate.csv"))
summary_data <- data.frame(total = sum(data$processed))
write.csv(summary_data, file_out("final.csv"), row.names = FALSE)
'
  writeLines(script2_content, "02_summarize.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "input.csv"),
             file.path(temp_dir, "01_process.R"),
             file.path(temp_dir, "02_summarize.R"),
             file.path(temp_dir, "intermediate.csv"),
             file.path(temp_dir, "final.csv")))
  })

  result <- capture.output({result_value <- run()}); result <- result_value

  expect_true(file.exists("intermediate.csv"))
  expect_true(file.exists("final.csv"))

  final_data <- read.csv("final.csv")
  expect_equal(final_data$total, 6)

  expect_type(result, "character")
  expect_true("intermediate.csv" %in% result)
  expect_true("final.csv" %in% result)
})

test_that("run() returns empty vector when no scripts exist", {
  temp_dir <- tempdir()
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")

  on.exit({
    setwd(old_wd)
    unlink(file.path(temp_dir, "_bakepipe.R"))
  })

  result <- capture.output({result_value <- run()}); result <- result_value
  expect_type(result, "character")
  expect_length(result, 0)
})

test_that("run() handles scripts with no outputs", {
  temp_dir <- tempdir()
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")

  writeLines("data,value\nA,1\nB,2", "input.csv")

  script_content <- '
library(bakepipe)
data <- read.csv(file_in("input.csv"))
cat("Processing", nrow(data), "rows\n")
'
  writeLines(script_content, "process.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "input.csv"),
             file.path(temp_dir, "process.R")))
  })

  result <- capture.output({result_value <- run()}); result <- result_value
  expect_type(result, "character")
  expect_length(result, 0)
})

test_that("run() stops on script execution error", {
  temp_dir <- file.path(tempdir(), "test_script_error")
  dir.create(temp_dir, showWarnings = FALSE)
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")

  script_content <- '
library(bakepipe)
stop("Script error for testing")
'
  writeLines(script_content, "error_script.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "error_script.R"),
             file.path(temp_dir, ".bakepipe.state")))
  })

  # Test that an error occurs, but be more flexible about the exact message
  # since callr may wrap the error differently
  expect_error(capture.output(run()), "Error executing script.*error_script.R")
})

test_that("run() respects dependency order", {
  temp_dir <- file.path(tempdir(), "test_dependency_order")
  dir.create(temp_dir, showWarnings = FALSE)
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")

  writeLines("1,2,3", "data.csv")

  script1_content <- '
library(bakepipe)
data <- readLines(file_in("data.csv"))
writeLines(paste("step1:", data), file_out("step1.txt"))
'
  writeLines(script1_content, "01_first.R")

  script2_content <- '
library(bakepipe)
data <- readLines(file_in("step1.txt"))
writeLines(paste("step2:", data), file_out("step2.txt"))
'
  writeLines(script2_content, "02_second.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "data.csv"),
             file.path(temp_dir, "01_first.R"),
             file.path(temp_dir, "02_second.R"),
             file.path(temp_dir, "step1.txt"),
             file.path(temp_dir, "step2.txt"),
             file.path(temp_dir, ".bakepipe.state")))
  })

  result <- capture.output({result_value <- run()}); result <- result_value

  expect_true(file.exists("step1.txt"))
  expect_true(file.exists("step2.txt"))

  step2_content <- readLines("step2.txt")
  expect_true(grepl("step2: step1: 1,2,3", step2_content))

  expect_true("step1.txt" %in% result)
  expect_true("step2.txt" %in% result)
})

test_that("run() performs incremental execution based on state", {
  temp_dir <- file.path(tempdir(), "test_incremental_execution")
  dir.create(temp_dir, showWarnings = FALSE)
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")
  writeLines("data,value\nA,1\nB,2", "input.csv")

  script1_content <- '
library(bakepipe)
data <- read.csv(file_in("input.csv"))
data$processed <- data$value * 2
write.csv(data, file_out("intermediate.csv"), row.names = FALSE)
cat("Script 1 executed\n")
'
  writeLines(script1_content, "01_process.R")

  script2_content <- '
library(bakepipe)
data <- read.csv(file_in("intermediate.csv"))
summary_data <- data.frame(total = sum(data$processed))
write.csv(summary_data, file_out("final.csv"), row.names = FALSE)
cat("Script 2 executed\n")
'
  writeLines(script2_content, "02_summarize.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "input.csv"),
             file.path(temp_dir, "01_process.R"),
             file.path(temp_dir, "02_summarize.R"),
             file.path(temp_dir, "intermediate.csv"),
             file.path(temp_dir, "final.csv"),
             file.path(temp_dir, ".bakepipe.state")))
  })

  # First run - should run all scripts and create state file
  result1 <- capture.output({result1_value <- run()}); result1 <- result1_value
  expect_true(file.exists(".bakepipe.state"))
  expect_true(file.exists("intermediate.csv"))
  expect_true(file.exists("final.csv"))

  # Second run without changes - should run no scripts
  # We can't easily test console output, but we can test that files aren't recreated
  initial_intermediate_time <- file.info("intermediate.csv")$mtime
  initial_final_time <- file.info("final.csv")$mtime
  
  Sys.sleep(1) # Ensure time difference would be detectable
  
  result2 <- capture.output({result2_value <- run()}); result2 <- result2_value
  
  # Files should not have been recreated (same modification times)
  expect_equal(file.info("intermediate.csv")$mtime, initial_intermediate_time)
  expect_equal(file.info("final.csv")$mtime, initial_final_time)
  
  # Should return empty vector since no files were created
  expect_length(result2, 0)
})

test_that("run() detects changes and re-runs affected scripts", {
  temp_dir <- file.path(tempdir(), "test_change_detection")
  dir.create(temp_dir, showWarnings = FALSE)
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")
  writeLines("data,value\nA,1\nB,2", "input.csv")

  script1_content <- '
library(bakepipe)
data <- read.csv(file_in("input.csv"))
data$processed <- data$value * 2
write.csv(data, file_out("intermediate.csv"), row.names = FALSE)
'
  writeLines(script1_content, "01_process.R")

  script2_content <- '
library(bakepipe)
data <- read.csv(file_in("intermediate.csv"))
summary_data <- data.frame(total = sum(data$processed))
write.csv(summary_data, file_out("final.csv"), row.names = FALSE)
'
  writeLines(script2_content, "02_summarize.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "input.csv"),
             file.path(temp_dir, "01_process.R"),
             file.path(temp_dir, "02_summarize.R"),
             file.path(temp_dir, "intermediate.csv"),
             file.path(temp_dir, "final.csv"),
             file.path(temp_dir, ".bakepipe.state")))
  })

  # First run
  capture.output(run())
  
  # Modify input file
  Sys.sleep(1) # Ensure detectable time difference
  writeLines("data,value\nA,1\nB,2\nC,3", "input.csv")
  
  # Store modification times before second run
  initial_intermediate_time <- file.info("intermediate.csv")$mtime
  initial_final_time <- file.info("final.csv")$mtime
  
  Sys.sleep(1)
  
  # Second run should detect change and re-run both scripts
  result <- capture.output({result_value <- run()}); result <- result_value
  
  # Both output files should have been recreated
  expect_gt(file.info("intermediate.csv")$mtime, initial_intermediate_time)
  expect_gt(file.info("final.csv")$mtime, initial_final_time)
  
  # Should return updated files
  expect_true("intermediate.csv" %in% result)
  expect_true("final.csv" %in% result)
  
  # Check that final result reflects the change
  final_data <- read.csv("final.csv")
  expect_equal(final_data$total, 12) # (1+2+3)*2 = 12
})

test_that("run() updates state file after execution", {
  temp_dir <- file.path(tempdir(), "test_state_update")
  dir.create(temp_dir, showWarnings = FALSE)
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")
  writeLines("data,value\nA,1", "input.csv")

  script_content <- '
library(bakepipe)
data <- read.csv(file_in("input.csv"))
write.csv(data, file_out("output.csv"), row.names = FALSE)
'
  writeLines(script_content, "process.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "input.csv"),
             file.path(temp_dir, "process.R"),
             file.path(temp_dir, "output.csv"),
             file.path(temp_dir, ".bakepipe.state")))
  })

  # Run pipeline
  capture.output(run())
  
  # State file should exist and contain all relevant files
  expect_true(file.exists(".bakepipe.state"))
  
  state_data <- read.csv(".bakepipe.state", stringsAsFactors = FALSE)
  expect_true("process.R" %in% state_data$file)
  expect_true("input.csv" %in% state_data$file)
  expect_true("output.csv" %in% state_data$file)
  
  # All files should be marked as fresh
  expect_true(all(state_data$status == "fresh"))
  
  # Checksums should be non-empty for existing files
  existing_files <- state_data[state_data$file %in% c("process.R", "input.csv", "output.csv"), ]
  expect_true(all(nchar(existing_files$checksum) > 0))
})

test_that("run() executes scripts in isolated environments", {
  temp_dir <- file.path(tempdir(), "test_isolated_execution")
  dir.create(temp_dir, showWarnings = FALSE)
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")
  writeLines("data,value\nA,1\nB,2", "input.csv")

  # First script that creates a variable in its environment
  script1_content <- '
library(bakepipe)
data <- read.csv(file_in("input.csv"))
secret_variable <- "should_not_be_accessible"
data$processed <- data$value * 2
write.csv(data, file_out("intermediate.csv"), row.names = FALSE)
'
  writeLines(script1_content, "01_process.R")

  # Second script that tries to access the variable from first script
  # This should work because it reads from the file, not from the environment
  script2_content <- '
library(bakepipe)
data <- read.csv(file_in("intermediate.csv"))
# secret_variable should not be available from previous script
if (exists("secret_variable")) {
  stop("Script environments are not isolated - secret_variable is accessible")
}
summary_data <- data.frame(total = sum(data$processed))
write.csv(summary_data, file_out("final.csv"), row.names = FALSE)
'
  writeLines(script2_content, "02_summarize.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "input.csv"),
             file.path(temp_dir, "01_process.R"),
             file.path(temp_dir, "02_summarize.R"),
             file.path(temp_dir, "intermediate.csv"),
             file.path(temp_dir, "final.csv"),
             file.path(temp_dir, ".bakepipe.state")))
  })

  # This should succeed - scripts run in isolation
  result <- capture.output({result_value <- run()}); result <- result_value

  expect_true(file.exists("intermediate.csv"))
  expect_true(file.exists("final.csv"))

  final_data <- read.csv("final.csv")
  expect_equal(final_data$total, 6)
})

test_that("run() scripts cannot pollute global environment", {
  temp_dir <- file.path(tempdir(), "test_no_global_pollution")
  dir.create(temp_dir, showWarnings = FALSE)
  old_wd <- getwd()
  setwd(temp_dir)

  writeLines("# Bakepipe root marker", "_bakepipe.R")
  writeLines("test_value", "input.txt")

  # Script that tries to create a global variable
  script_content <- '
library(bakepipe)
content <- readLines(file_in("input.txt"))
global_pollution_test <- "this_should_not_appear_globally"
writeLines(paste("Processed:", content), file_out("output.txt"))
'
  writeLines(script_content, "process.R")

  on.exit({
    setwd(old_wd)
    unlink(c(file.path(temp_dir, "_bakepipe.R"),
             file.path(temp_dir, "input.txt"),
             file.path(temp_dir, "process.R"),
             file.path(temp_dir, "output.txt"),
             file.path(temp_dir, ".bakepipe.state")))
  })

  # Make sure the variable doesn't exist before
  expect_false(exists("global_pollution_test", envir = globalenv()))

  # Run pipeline
  capture.output(run())

  # The variable should still not exist in global environment
  expect_false(exists("global_pollution_test", envir = globalenv()))
  expect_true(file.exists("output.txt"))
})