library(testthat)

# Copy generate_test_data from test_colocboost.R
generate_test_data <- function(n = 100, p = 20, L = 2, seed = 42) {
  set.seed(seed)
  
  # Generate X with LD structure
  sigma <- matrix(0, p, p)
  for (i in 1:p) {
    for (j in 1:p) {
      sigma[i, j] <- 0.9^abs(i - j)
    }
  }
  X <- MASS::mvrnorm(n, rep(0, p), sigma)
  colnames(X) <- paste0("SNP", 1:p)
  
  # Generate true effects - create a shared causal variant
  true_beta <- matrix(0, p, L)
  true_beta[5, 1] <- 0.5  # SNP5 affects trait 1
  true_beta[5, 2] <- 0.4  # SNP5 also affects trait 2 (colocalized)
  true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2
  
  # Generate Y with some noise
  Y <- matrix(0, n, L)
  for (l in 1:L) {
    Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1)
  }
  
  # Prepare LD matrix
  LD <- cor(X)
  
  # Return test objects
  list(
    X = X,
    Y = Y, 
    LD = LD,
    true_beta = true_beta
  )
}

# Use the existing test data generation function
# But extend it to create summary statistics directly
generate_sumstat_test_data <- function(n = 100, p = 20, L = 2, seed = 42) {
  # Get individual level data first
  test_data <- generate_test_data(n, p, L, seed)
  
  # Generate summary statistics from individual data
  X <- test_data$X
  Y <- test_data$Y
  LD <- test_data$LD
  
  # Calculate beta, se, and z-scores
  beta <- matrix(0, ncol(X), ncol(Y))
  se <- matrix(0, ncol(X), ncol(Y))
  z <- matrix(0, ncol(X), ncol(Y))
  
  for (i in 1:ncol(Y)) {
    output = matrix(0,ncol(X),2)
    for (j in 1:ncol(X)) {
      fit = summary(lm(Y[,i] ~ X[,j]))$coef
      if (nrow(fit) == 2)
        output[j,] = as.vector(fit[2,1:2])
      else
        output[j,] = c(0,0)
    }
    beta[,i] <- output[,1]
    se[,i] <- output[,2]
    z[,i] <- beta[,i]/se[,i]
  }
  
  # Create summary statistics data frames
  sumstat_list <- list()
  for (i in 1:ncol(Y)) {
    sumstat_list[[i]] <- data.frame(
      beta = beta[,i],
      sebeta = se[,i],
      z = z[,i],
      n = nrow(X),
      variant = colnames(X)
    )
  }
  
  # Add effect matrices for HyPrColoc format
  rownames(beta) <- rownames(se) <- colnames(X)
  colnames(beta) <- colnames(se) <- paste0("Y", 1:ncol(Y))
  
  # Return all formats
  list(
    X = X,
    Y = Y,
    LD = LD,
    sumstat = sumstat_list,
    effect_est = beta,
    effect_se = se,
    effect_n = rep(n, ncol(Y))
  )
}

# Create summary statistics test data
test_sumstat_data <- generate_sumstat_test_data()

# Test 1: Basic summary statistics input
test_that("colocboost runs with basic summary statistics format", {
  # Run colocboost with sumstat and single LD matrix
  warnings <- capture_warnings({
    result <- colocboost(
        sumstat = test_sumstat_data$sumstat,
        LD = test_sumstat_data$LD,
        M = 10,  # Small number of iterations for testing
        output_level = 2  # More detailed output for testing
    )
  })

  # Check if any of the expected warning patterns are present
  expect_true(
    any(grepl("did not coverage", warnings))
  )
  
  # Test that we get a colocboost object
  expect_s3_class(result, "colocboost")
  
  # Check structure of results
  expect_type(result$data_info, "list")
  expect_type(result$model_info, "list")
  
  # Check dimensions
  expect_equal(length(result$data_info$variables), ncol(test_sumstat_data$X))
  expect_equal(result$data_info$n_outcomes, 2)
})

# Test 2: HyPrColoc compatible format
test_that("colocboost runs with HyPrColoc compatible format", {
  # Run colocboost with effect matrices
  warnings <- capture_warnings({
    result <- colocboost(
        effect_est = test_sumstat_data$effect_est,
        effect_se = test_sumstat_data$effect_se,
        effect_n = test_sumstat_data$effect_n,
        LD = test_sumstat_data$LD,
        M = 10,  # Small number of iterations for testing
        output_level = 2  # More detailed output for testing
    )
  })

  # Check if any of the expected warning patterns are present
  expect_true(
    any(grepl("did not coverage", warnings))
  )
  
  # Test that we get a colocboost object
  expect_s3_class(result, "colocboost")
  
  # Check dimensions
  expect_equal(length(result$data_info$variables), ncol(test_sumstat_data$X))
  expect_equal(result$data_info$n_outcomes, 2)
})

# Test 3: Multiple LD matrices with summary statistics
test_that("colocboost runs with matched LD matrices", {
  # Create a list of identical LD matrices for demonstration
  LD_list <- list(test_sumstat_data$LD, test_sumstat_data$LD)
  
  # Run colocboost with summary statistics and multiple LD matrices
  warnings <- capture_warnings({
    result <- colocboost(
        sumstat = test_sumstat_data$sumstat,
        LD = LD_list,
        M = 10,  # Small number of iterations for testing
        output_level = 2  # More detailed output for testing
    )
  })

  # Check if any of the expected warning patterns are present
  expect_true(
    any(grepl("did not coverage", warnings))
  )
  
  # Test that we get a colocboost object
  expect_s3_class(result, "colocboost")
  
  # Check dimensions
  expect_equal(length(result$data_info$variables), ncol(test_sumstat_data$X))
  expect_equal(result$data_info$n_outcomes, 2)
})

# Test 4: Arbitrary LD matrices with dictionary mapping
test_that("colocboost runs with dictionary-mapped LD matrices", {
  # Create a list of identical LD matrices for demonstration
  LD_list <- list(test_sumstat_data$LD, test_sumstat_data$LD)
  
  # Create a dictionary mapping sumstat indices to LD indices
  # First sumstat uses first LD, second sumstat uses second LD
  dict_sumstatLD <- matrix(c(1:2, 1:2), ncol = 2)
  
  # Run colocboost with dictionary mapping
  warnings <- capture_warnings({
    result <- colocboost(
        sumstat = test_sumstat_data$sumstat,
        LD = LD_list,
        dict_sumstatLD = dict_sumstatLD,
        M = 10,  # Small number of iterations for testing
        output_level = 2  # More detailed output for testing
    )
  })

  # Check if any of the expected warning patterns are present
  expect_true(
    any(grepl("did not coverage", warnings))
  )
  
  # Test that we get a colocboost object
  expect_s3_class(result, "colocboost")
  
  # Check dimensions
  expect_equal(length(result$data_info$variables), ncol(test_sumstat_data$X))
  expect_equal(result$data_info$n_outcomes, 2)
})

# Test 5: Missing LD matrix (LD-free approach)
test_that("colocboost runs with missing LD matrix", {
  # Run colocboost without LD matrix (should default to diagonal)
  warnings <- capture_warnings({
    result <- colocboost(
        sumstat = test_sumstat_data$sumstat,
        M = 1,  # Only one iteration expected in LD-free case
        output_level = 2  # More detailed output for testing
    )
  })

  # Check if any of the expected warning patterns are present
  expect_true(
    any(grepl("The smallest number of variables across outcomes is 20 < 100", warnings)) 
  )
  
  # Test that we get a colocboost object
  expect_s3_class(result, "colocboost")
  
  # Check dimensions
  expect_equal(length(result$data_info$variables), ncol(test_sumstat_data$X))
  expect_equal(result$data_info$n_outcomes, 2)
})

# Test 6: Error handling for mismatched inputs
test_that("colocboost handles mismatched inputs correctly", {
  # Create mismatched effect matrices
  bad_effect_est <- test_sumstat_data$effect_est[1:(nrow(test_sumstat_data$effect_est)-1), ]
  
  # Expect error with mismatched dimensions
  expect_warning(
    colocboost(
      effect_est = bad_effect_est,
      effect_se = test_sumstat_data$effect_se
    ),
    "Error: effect_est and effect_se should be the same dimension!"
  )
})

# Test 7: Summary statistics without sample size
test_that("colocboost handles missing sample size correctly", {
  # Create summary stats without sample size
  sumstat_no_n <- lapply(test_sumstat_data$sumstat, function(df) {
    df$n <- NULL
    return(df)
  })
  
  # Should run but give a warning about missing sample size
  warnings <- capture_warnings({
    result <- colocboost(
      sumstat = sumstat_no_n,
      LD = test_sumstat_data$LD,
      M = 10,
      output_level = 2
    )
  })

  # Check if any of the expected warning patterns are present
  expect_true(
    any(grepl("Providing the sample size", warnings)) || 
    any(grepl("did not coverage", warnings))
  )
  
  # Still should get a valid result
  expect_s3_class(result, "colocboost")
})