#' Helper functions for downloading BIDS example datasets
#' 
#' These functions download datasets from the official BIDS examples repository
#' at https://github.com/bids-standard/bids-examples for use in tests.

library(httr)
library(fs)
library(jsonlite)

# Version pinning for reproducibility
BIDS_EXAMPLES_REPO <- "https://github.com/bids-standard/bids-examples"
BIDS_EXAMPLES_VERSION <- "master"  # Could pin to specific tag like "v1.9.0"

# Session-level cache for test performance
.bids_cache <- new.env()

#' Check if we have internet connectivity
#' @return logical
has_internet <- function() {
  tryCatch({
    httr::GET("https://github.com", httr::timeout(5))
    TRUE
  }, error = function(e) {
    FALSE
  })
}

#' Skip test if offline
skip_if_offline <- function() {
  if (!has_internet()) {
    testthat::skip("Internet connection required for BIDS examples")
  }
}

#' Get the base cache directory for BIDS examples
#' @return character path to cache directory
get_bids_cache_dir <- function() {
  cache_dir <- file.path(tempdir(), "bidser_bids_examples", Sys.getpid())
  if (!dir.exists(cache_dir)) {
    dir.create(cache_dir, recursive = TRUE)
  }
  return(cache_dir)
}

#' Download and extract BIDS examples repository
#' @param cache_dir Directory to store the downloaded repo
#' @param force_refresh Force re-download even if cached
#' @return Path to the extracted repository
download_bids_repo <- function(cache_dir = NULL, force_refresh = FALSE) {
  if (is.null(cache_dir)) {
    cache_dir <- get_bids_cache_dir()
  }
  
  repo_path <- file.path(cache_dir, "bids-examples-master")
  
  # Check if already cached
  if (dir.exists(repo_path) && !force_refresh) {
    return(repo_path)
  }
  
  # Skip if offline
  skip_if_offline()
  
  message("Downloading BIDS examples repository...")
  
  # Download the repository as a zip file
  zip_url <- paste0(BIDS_EXAMPLES_REPO, "/archive/refs/heads/", BIDS_EXAMPLES_VERSION, ".zip")
  zip_path <- file.path(cache_dir, "bids-examples.zip")
  
  # Create cache directory
  if (!dir.exists(cache_dir)) {
    dir.create(cache_dir, recursive = TRUE)
  }
  
  # Download
  tryCatch({
    response <- httr::GET(zip_url, httr::write_disk(zip_path, overwrite = TRUE))
    httr::stop_for_status(response)
  }, error = function(e) {
    stop("Failed to download BIDS examples: ", e$message)
  })
  
  # Extract
  tryCatch({
    utils::unzip(zip_path, exdir = cache_dir)
    file.remove(zip_path)  # Clean up zip file
  }, error = function(e) {
    stop("Failed to extract BIDS examples: ", e$message)
  })
  
  if (!dir.exists(repo_path)) {
    stop("Expected directory not found after extraction: ", repo_path)
  }
  
  message("BIDS examples downloaded successfully")
  return(repo_path)
}

#' Create minimal participants.tsv file if missing
#' @param dataset_path Path to the dataset
#' @return logical indicating if file was created
create_participants_tsv_if_missing <- function(dataset_path) {
  participants_file <- file.path(dataset_path, "participants.tsv")
  
  # Check if file already exists
  if (file.exists(participants_file)) {
    return(FALSE)
  }
  
  # Find subject directories
  all_dirs <- list.dirs(dataset_path, full.names = FALSE, recursive = FALSE)
  subject_dirs <- all_dirs[grepl("^sub-", all_dirs)]
  
  if (length(subject_dirs) == 0) {
    return(FALSE)
  }
  
  # Extract subject IDs
  subject_ids <- gsub("^sub-", "", subject_dirs)
  
  # Create minimal participants.tsv
  participants_df <- data.frame(
    participant_id = paste0("sub-", subject_ids),
    stringsAsFactors = FALSE
  )
  
  # Write the file
  utils::write.table(participants_df, participants_file, 
                     sep = "\t", row.names = FALSE, quote = FALSE)
  
  message("Created minimal participants.tsv with ", length(subject_ids), " subjects")
  return(TRUE)
}

#' Restructure derivatives-only dataset to expected format
#' @param dataset_path Path to the dataset
#' @param pipeline_name Name of the pipeline (e.g., "fmriprep")
#' @return logical indicating if restructuring was done
restructure_derivatives_dataset <- function(dataset_path, pipeline_name = "fmriprep") {
  # Check if derivatives directory already exists
  derivatives_dir <- file.path(dataset_path, "derivatives", pipeline_name)
  if (dir.exists(derivatives_dir)) {
    return(FALSE)  # Already structured correctly
  }
  
  # Find subject directories in root
  all_dirs <- list.dirs(dataset_path, full.names = FALSE, recursive = FALSE)
  subject_dirs <- all_dirs[grepl("^sub-", all_dirs)]
  
  if (length(subject_dirs) == 0) {
    return(FALSE)  # No subjects to move
  }
  
  # Create derivatives directory structure
  dir.create(derivatives_dir, recursive = TRUE)
  
  # Move subject directories
  for (sub_dir in subject_dirs) {
    old_path <- file.path(dataset_path, sub_dir)
    new_path <- file.path(derivatives_dir, sub_dir)
    
    if (dir.exists(old_path)) {
      file.rename(old_path, new_path)
    }
  }
  
  # Create pipeline dataset_description.json
  pipeline_desc <- list(
    Name = pipeline_name,
    BIDSVersion = "1.4.0",
    PipelineDescription = list(
      Name = pipeline_name,
      Version = "unknown"
    )
  )
  
  desc_file <- file.path(derivatives_dir, "dataset_description.json")
  jsonlite::write_json(pipeline_desc, desc_file, pretty = TRUE, auto_unbox = TRUE)
  
  message("Restructured derivatives dataset for ", pipeline_name)
  return(TRUE)
}

#' Create a mock derivatives dataset compatible with preproc_scans
#' @param base_dataset_name A base dataset to use for raw data structure
#' @param cache_dir Directory to cache the result
#' @return Path to the mock derivatives dataset
create_mock_derivatives_dataset <- function(base_dataset_name = "ds001", cache_dir = NULL) {
  if (is.null(cache_dir)) {
    cache_dir <- get_bids_cache_dir()
  }
  
  mock_path <- file.path(cache_dir, "mock_derivatives")
  
  # Check if already exists
  if (dir.exists(mock_path)) {
    return(mock_path)
  }
  
  # Download base dataset
  base_path <- download_bids_example(base_dataset_name, cache_dir, force_refresh = FALSE)
  
  # Create mock derivatives structure
  dir.create(mock_path, recursive = TRUE)
  
  # Copy basic structure from base dataset
  file.copy(file.path(base_path, "participants.tsv"), mock_path, overwrite = TRUE)
  if (file.exists(file.path(base_path, "dataset_description.json"))) {
    file.copy(file.path(base_path, "dataset_description.json"), mock_path, overwrite = TRUE)
  }
  
  # Create derivatives/fmriprep structure
  fmriprep_dir <- file.path(mock_path, "derivatives", "fmriprep")
  dir.create(fmriprep_dir, recursive = TRUE)
  
  # Create fmriprep dataset_description.json
  fmriprep_desc <- list(
    Name = "fMRIPrep",
    BIDSVersion = "1.4.0",
    PipelineDescription = list(
      Name = "fMRIPrep",
      Version = "20.2.3",
      Description = "Mock fMRIPrep derivatives for testing"
    )
  )
  
  jsonlite::write_json(fmriprep_desc, file.path(fmriprep_dir, "dataset_description.json"), 
                       pretty = TRUE, auto_unbox = TRUE)
  
  # Find subjects in base dataset
  base_subjects <- list.dirs(base_path, full.names = FALSE, recursive = FALSE)
  subjects <- base_subjects[grepl("^sub-", base_subjects)]
  
  # Create mock preprocessed files for each subject
  for (subject in subjects[1:min(3, length(subjects))]) {  # Limit to first 3 subjects
    sub_fmriprep_dir <- file.path(fmriprep_dir, subject)
    func_dir <- file.path(sub_fmriprep_dir, "func")
    dir.create(func_dir, recursive = TRUE)
    
    # Create mock preproc files using proper BIDS naming convention
    # Pattern: sub-XX_task-YY_run-ZZ_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz
    tasks <- c("rest", "task1")
    runs <- c("01", "02")
    
    for (task in tasks) {
      for (run in runs) {
        # Create dummy preproc file with proper BIDS naming
        preproc_filename <- paste0(subject, "_task-", task, "_run-", run, 
                                 "_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz")
        preproc_path <- file.path(func_dir, preproc_filename)
        
        # Create an empty file (just for testing)
        file.create(preproc_path)
        
        # Also create confounds file
        confounds_filename <- paste0(subject, "_task-", task, "_run-", run, "_desc-confounds_timeseries.tsv")
        confounds_path <- file.path(func_dir, confounds_filename)
        file.create(confounds_path)
        
        # Create brain mask file for this run
        mask_filename <- paste0(subject, "_task-", task, "_run-", run, 
                               "_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz")
        mask_path <- file.path(func_dir, mask_filename)
        file.create(mask_path)
      }
    }
    
    # Create anatomical derivatives too
    anat_dir <- file.path(sub_fmriprep_dir, "anat")
    dir.create(anat_dir, recursive = TRUE)
    
    # Create T1w preproc file with proper naming
    t1w_filename <- paste0(subject, "_space-MNI152NLin2009cAsym_desc-preproc_T1w.nii.gz")
    t1w_path <- file.path(anat_dir, t1w_filename)
    file.create(t1w_path)
    
    # Create brain mask for anatomical
    brain_mask_filename <- paste0(subject, "_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz")
    brain_mask_path <- file.path(anat_dir, brain_mask_filename)
    file.create(brain_mask_path)
  }
  
  message("Created mock derivatives dataset with proper BIDS naming conventions")
  return(mock_path)
}

#' Download a specific BIDS example dataset
#' @param dataset_name Name of the dataset (e.g., "ds001", "7t_trt")
#' @param cache_dir Directory to cache downloads (default: tempdir)
#' @param force_refresh Force re-download even if cached
#' @return Path to the specific dataset directory
download_bids_example <- function(dataset_name, cache_dir = NULL, force_refresh = FALSE) {
  # Special case for mock derivatives
  if (dataset_name == "mock_derivatives") {
    return(create_mock_derivatives_dataset(cache_dir = cache_dir))
  }
  
  # Check session cache first
  cache_key <- paste0(dataset_name, "_", force_refresh)
  if (exists(cache_key, envir = .bids_cache) && !force_refresh) {
    dataset_path <- .bids_cache[[cache_key]]
    if (dir.exists(dataset_path)) {
      return(dataset_path)
    }
  }
  
  # Download the full repo
  repo_path <- download_bids_repo(cache_dir, force_refresh)
  
  # Check if the specific dataset exists
  dataset_path <- file.path(repo_path, dataset_name)
  if (!dir.exists(dataset_path)) {
    available_datasets <- list.dirs(repo_path, full.names = FALSE, recursive = FALSE)
    available_datasets <- available_datasets[available_datasets != ""]
    stop("Dataset '", dataset_name, "' not found. Available datasets: ", 
         paste(head(available_datasets, 20), collapse = ", "),
         if (length(available_datasets) > 20) "..." else "")
  }
  
  # Create participants.tsv if missing (common for derivatives datasets)
  create_participants_tsv_if_missing(dataset_path)
  
  # Restructure if it's a derivatives-only dataset
  if (grepl("fmriprep", dataset_name, ignore.case = TRUE)) {
    restructure_derivatives_dataset(dataset_path, "fmriprep")
  }
  
  # Cache the result
  .bids_cache[[cache_key]] <- dataset_path
  
  return(dataset_path)
}

#' Setup a test dataset with proper cleanup
#' @param dataset_name Name of the dataset to download
#' @param cleanup Whether to cleanup after test (default: TRUE)
#' @return Path to the dataset
setup_test_dataset <- function(dataset_name, cleanup = TRUE) {
  dataset_path <- download_bids_example(dataset_name)
  
  # Register cleanup if requested
  if (cleanup) {
    # Note: We don't actually delete here since we're using session cache
    # The temp directory will be cleaned up when R session ends
    # But we could add more aggressive cleanup if needed
  }
  
  return(dataset_path)
}

#' List available BIDS example datasets
#' @param cache_dir Directory to check for cached repo
#' @return Character vector of available dataset names
list_bids_examples <- function(cache_dir = NULL) {
  tryCatch({
    repo_path <- download_bids_repo(cache_dir, force_refresh = FALSE)
    available <- list.dirs(repo_path, full.names = FALSE, recursive = FALSE)
    available <- available[available != ""]
    
    # Filter out non-dataset directories
    datasets <- available[!available %in% c(".github", "docs", ".git")]
    
    # Add our mock dataset
    datasets <- c(datasets, "mock_derivatives")
    
    return(sort(datasets))
  }, error = function(e) {
    message("Could not list BIDS examples: ", e$message)
    return(character(0))
  })
}

#' Cleanup BIDS examples cache
#' @param cache_dir Directory to clean (default: all temp cache)
cleanup_bids_cache <- function(cache_dir = NULL) {
  if (is.null(cache_dir)) {
    cache_dir <- file.path(tempdir(), "bidser_bids_examples")
  }
  
  if (dir.exists(cache_dir)) {
    unlink(cache_dir, recursive = TRUE)
    message("BIDS examples cache cleaned")
  }
  
  # Clear session cache
  rm(list = ls(envir = .bids_cache), envir = .bids_cache)
}