#' @importFrom stats mahalanobis 
#'
#' @importFrom stats cov
#'
#'
#' @importFrom stats mahalanobis
#' @importFrom stats cov
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 theme_minimal
#' @importFrom reshape2 melt 
#'
#'
#'
#' @importFrom reshape2 melt  # Added reshape2 import
#'
#' @name cmahalanobis
#' @title Calculate the Mahalanobis distance for each species
#'
#' This function takes a list of data frames as input, where each data frame contains the observations of a species, and returns a matrix with the Mahalanobis distances between each pair of species.
#'
#' @param dataset A list of data frames, where each data frame contains the observations of a species.
#' 
#' 
#' @param plot Logical, if TRUE, a plot of the Mahalanobis distances is displayed.
#' @param p.value Logical, if TRUE, a matrix of p-values for the distances is returned.
#' @param plot_title The title to be used for the plot if plot is TRUE.
#' @return A list containing a matrix with the Mahalanobis distances between each pair of groups, and optionally a matrix of p-values and the plot.
#' 
#' 
#' 
#' @examples
#' # Example with the iris dataset
#' library(stats)
#' # Split the data into 3 parts for each species
#' setosa <- subset(iris, Species == "setosa")
#' setosa <- setosa[,-5]
#' versicolor <- subset(iris, Species == "versicolor")
#' versicolor <- versicolor[,-5]
#' virginica <- subset(iris, Species == "virginica")
#' virginica <- virginica[,-5]
#'
#' # Create a list with the three groups of flowers
#' groups <- list(setosa, versicolor, virginica)
#'
#' # Calculate the Mahalanobis distance with the cmahalanobis function
#' cmahalanobis(groups, plot = TRUE, p.value = FALSE, plot_title = "Mahalanobis Distance Between Groups")
#' 
#' ceuclide(groups, plot = TRUE, p.value = TRUE, num.permutations = 1000, plot_title = "Euclidean Distance Between Groups")
#' cmanhattan(groups, plot = TRUE, p.value = TRUE, num.permutations = 1000, plot_title = "Manhattan Distance Between Groups")
#' ccebyshev(groups, plot = TRUE, p.value = TRUE, num.permutations = 1000, plot_title = "Manhattan Distance Between Groups")
#'
#'
#' @export
cmahalanobis <- function(dataset, plot = TRUE, p.value = FALSE, plot_title = "Mahalanobis Distance Between Groups") {
  # Verify that the input is a list of data-frames
  if (!is.list(dataset)) {
    stop("The input must be a list of dataframes")
  }
  
  # Verify that each element of the list is a data frame
  for (i in seq_along(dataset)) {
    if (!is.data.frame(dataset[[i]])) {
      stop("Each element of the list must be a dataframe")
    }
  }
  
  # Remove variables not numerical
  dataset <- lapply(dataset, function(df) {
    num_vars <- sapply(df, is.numeric)
    df <- df[, num_vars]
    return(df)
  })
  
  # Replace missing values with arithmetic mean in each dataframe into the list 
  dataset <- lapply(dataset, impute_with_mean)
  
  # Obtain the number of groups
  n <- length(dataset)
  
  # Pre-calculate means and covariances
  means <- lapply(dataset, colMeans)
  covariances <- lapply(dataset, cov)
  
  # Create a empty matrix to store distances
  distances <- matrix(0, nrow = n, ncol = n)
  
  # Calculate Mahalanobis distance between each couple of groups
  for (i in 1:n) {
    mean_i <- means[[i]]
    cov_i <- covariances[[i]]
    for (j in 1:n) {
      if (i != j) {
        distances[i, j] <- mean(mahalanobis(x = dataset[[j]], center = mean_i, cov = cov_i))
      }
    }
  }
  
  # Initialize p-value matrix as NULL
  p_values <- NULL
  
  # Calculate p-values only if it is requested by users
  if (p.value) {
    p_values <- matrix(NA, nrow = n, ncol = n)
    for (i in 1:n) {
      df <- ncol(dataset[[i]]) # Degrees of freedom
      for (j in 1:n) {
        if (i != j) {
          # Calculate p-values using chi-squared distribution
          p_values[i, j] <- pchisq(distances[i, j], df, lower.tail = FALSE)
        }
      }
    }
  }
  
  # If plot is TRUE, call the "plot_mahalanobis_distances" function and print the plot
  if (plot) {
    print(plot_mahalanobis_distances(distances, plot_title))
  }
  
  # Return a list containing distances and optionally p-values
  return(list(distances = distances, p_values = p_values))
}

# Auxiliary function to impute missing values with the mean
impute_with_mean <- function(df) {
  for (i in seq_along(df)) {
    if (any(is.na(df[[i]]) | is.nan(df[[i]]))) {
      df[[i]][is.na(df[[i]]) | is.nan(df[[i]])] <- mean(df[[i]], na.rm = TRUE)
    }
  }
  return(df)
}

# Auxiliary function to print the Mahalanobis distances plot
plot_mahalanobis_distances <- function(distances, plot_title) {
  requireNamespace("ggplot2")
  requireNamespace("reshape2")
  output_df <- as.data.frame(distances)
  output_df$Species <- rownames(output_df)
  
  output_df_long <- melt(output_df, id.vars = "Species")
  colnames(output_df_long) <- c("Species", "Comparison", "Distance")
  
  ggplot2::ggplot(output_df_long, ggplot2::aes(x = Species, y = Distance, fill = Comparison)) +
    ggplot2::geom_bar(stat = "identity", position = "dodge") +
    ggplot2::labs(title = plot_title, x = "Species", y = "Distance", fill = "Comparison") +
    ggplot2::theme_minimal()
}

#' Calculate Euclidean distance
#'
#' @param dataset A list of dataframes
#' @param plot If TRUE, shows a plot of distances
#' @param p.value If TRUE, calculates p-values for distances
#' @param num.permutations Number of permutations to calculate p-values
#' @param plot_title The title of plot
#' @return A list containing distances and, optionally, p-values
#' @export
ceuclide <- function(dataset, plot = FALSE, p.value = FALSE, num.permutations = 1000, plot_title = "Euclidean Distance Between Groups") {
  # Verify that input is a list of dataframes
  if (!is.list(dataset)) {
    stop("The input must be a list of dataframes")
  }
  
  # Control that each element in the list is a dataframe with at least a numerical variable
  for (df in dataset) {
    if (!is.data.frame(df) || !any(sapply(df, is.numeric))) {
      stop("Ogni elemento della lista deve essere un data frame con almeno una variabile numerica")
    }
  }
  
  # Remove non-numerical variable (Na and NaN) and replace it with arithmetic mean of the variable where they appear 
  dataset <- lapply(dataset, function(df) {
    num_vars <- sapply(df, is.numeric)
    df <- df[, num_vars]
    df <- impute_with_mean(df)
    return(df)
  })
  
  # Pre-calculate mean to each group
  means <- lapply(dataset, colMeans)
  
  # Obtain the number of groups
  n <- length(dataset)
  
  # Create an empty matrix to save distances
  distances <- matrix(0, nrow = n, ncol = n)
  
  # Calculate euclidean distance between each pair of groups
  for (i in 1:n) {
    mean_i <- means[[i]] # Pre-calculated mean of group i
    for (j in 1:n) {
      if (i != j) {
        distances[i, j] <- sqrt(sum((mean_i - means[[j]])^2))
      }
    }
  }
  
  # Calculate p-values only if it is requested by users
  if (p.value) {
    p_values <- matrix(NA, nrow = n, ncol = n)
    for (i in 1:n) {
      mean_i <- means[[i]]  # Pre-calculated mean in the group i
      for (j in 1:n) {
        if (i != j) {
          observed_distance <- distances[i, j]
          permutation_distances <- numeric(num.permutations)
          
          for (k in 1:num.permutations) {
            permuted_data <- sample(unlist(dataset[[j]]))
            permuted_means <- colMeans(matrix(permuted_data, nrow = length(permuted_data)/length(mean_i), byrow = TRUE))
            permutation_distances[k] <- max(abs(mean_i - permuted_means))
          }
          
          p_values[i, j] <- mean(permutation_distances >= observed_distance)
        }
      }
    }
  }
  
  # If plot is TRUE, create a plot of distances
  if (plot) {
    print(plot_euclidean_distances(distances, plot_title))
  }
  
  # Return a list containing distances and optionally p-values
  return(list(distances = distances, p_values = p_values))
}

# Auxiliary function to replace Na and NaN values with the mean
impute_with_mean <- function(df) {
  for (i in seq_along(df)) {
    if (is.numeric(df[[i]])) {
      na_index <- is.na(df[[i]]) | is.nan(df[[i]])
      df[[i]][na_index] <- mean(df[[i]], na.rm = TRUE)
    }
  }
  return(df)
}

# Auxiliary function to create a plot of euclidean distances
plot_euclidean_distances <- function(distances, plot_title) {
  requireNamespace("ggplot2")
  requireNamespace("reshape2")
  output_df <- as.data.frame(distances)
  output_df$Species <- rownames(output_df)
  
  output_df_long <- melt(output_df, id.vars = "Species")
  colnames(output_df_long) <- c("Species", "Comparison", "Distance")
  
  ggplot2::ggplot(output_df_long, ggplot2::aes(x = Species, y = Distance, fill = Comparison)) +
    ggplot2::geom_bar(stat = "identity", position = "dodge") +
    ggplot2::labs(title = plot_title, x = "Species", y = "Distance", fill = "Comparison") +
    ggplot2::theme_minimal()
}


#' Calculate Manhattan distance
#'
#' @param dataset A list of dataframes
#' @param plot If TRUE, show a plot of distances
#' @param p.value If TRUE, calculates p-values to distances
#' @param num.permutations Number of permutations to calculate p-values
#' @param plot_title The title of plot
#' @return A list containing distances and, optionally, p-values
#' @export
# Auxiliary function to create a Manhattan distances plot
cmanhattan <- function(dataset, plot = FALSE, p.value = FALSE, num.permutations = 1000, plot_title = "Manhattan Distance Between Groups") {
  # Verify that input is a list of dataframes
  if (!is.list(dataset)) {
    stop("The input must be a list of dataframes")
  }
  
  # Control that each element in the list is a dataframe with at least a numerical variable
  for (df in dataset) {
    if (!is.data.frame(df) || !any(sapply(df, is.numeric))) {
      stop("Ogni elemento della lista deve essere un data frame con almeno una variabile numerica")
    }
  }
  
  # Remove not numerical variables and replace Na and NaN values with the mean
  dataset <- lapply(dataset, function(df) {
    num_vars <- sapply(df, is.numeric)
    df <- df[, num_vars]
    df <- impute_with_mean(df)
    return(df)
  })
  
  # Pre-calculate the mean to each group
  means <- lapply(dataset, colMeans)
  
  # Obtain the number of groups
  n <- length(dataset)
  
  # Create an empty matrix to store distances
  distances <- matrix(0, nrow = n, ncol = n)
  
  # Calculate Manhattan distances between each pair of groups
  for (i in 1:n) {
    mean_i <- means[[i]] # Pre-calculated mean in the group i
    for (j in 1:n) {
      if (i != j) {
        distances[i, j] <- sum(abs(mean_i - means[[j]]))
      }
    }
  }
  
  # Calculate p-values only if it is requested by users
  if (p.value) {
    p_values <- matrix(NA, nrow = n, ncol = n)
    for (i in 1:n) {
      mean_i <- means[[i]]  # Pre-calculated mean in the group i
      for (j in 1:n) {
        if (i != j) {
          observed_distance <- distances[i, j]
          permutation_distances <- numeric(num.permutations)
          
          for (k in 1:num.permutations) {
            permuted_data <- sample(unlist(dataset[[j]]))
            permuted_means <- colMeans(matrix(permuted_data, nrow = length(permuted_data)/length(mean_i), byrow = TRUE))
            permutation_distances[k] <- max(abs(mean_i - permuted_means))
          }
          
          p_values[i, j] <- mean(permutation_distances >= observed_distance)
        }
      }
    }
  }
  
  # If plot is TRUE, create a plot of distances
  if (plot) {
    print(plot_manhattan_distances(distances, plot_title))
  }
  
  # Return a list containing distances and optionally p-values
  return(list(distances = distances, p_values = p_values))
}

# Auxiliary function to replace Na and NaN values with the mean
impute_with_mean <- function(df) {
  for (i in seq_along(df)) {
    if (is.numeric(df[[i]])) {
      na_index <- is.na(df[[i]]) | is.nan(df[[i]])
      df[[i]][na_index] <- mean(df[[i]], na.rm = TRUE)
    }
  }
  return(df)
}


plot_manhattan_distances <- function(distances, plot_title) {
  requireNamespace("ggplot2")
  requireNamespace("reshape2")
  output_df <- as.data.frame(distances)
  output_df$Species <- rownames(output_df)
  
  output_df_long <- melt(output_df, id.vars = "Species")
  colnames(output_df_long) <- c("Species", "Comparison", "Distance")
  
  ggplot2::ggplot(output_df_long, ggplot2::aes(x = Species, y = Distance, fill = Comparison)) +
    ggplot2::geom_bar(stat = "identity", position = "dodge") +
    ggplot2::labs(title = plot_title, x = "Species", y = "Distance", fill = "Comparison") +
    ggplot2::theme_minimal()
}



#' Calculate Chebyshev distance
#'
#' @param dataset A list of dataframes
#' @param plot If TRUE, displays a plot of distances
#' @param p.value If TRUE, calculates p-values of distances
#' @param num.permutations Number of permutations to calculate p-values 
#' @param plot_title The title of plot
#' @return A list containing distances and, optionally, p-values
#' @export
cchebyshev <- function(dataset, plot = FALSE, p.value = FALSE, num.permutations = 1000, plot_title = "Chebyshev Distance Between Groups") {
  # Verify that input is a list o dataframes
  if (!is.list(dataset)) {
    stop("The input must be a list of dataframes")
  }
  
  # Control that each element of the list is a dataframes with at least a numerical variable
  for (df in dataset) {
    if (!is.data.frame(df) || !any(sapply(df, is.numeric))) {
      stop("Each element of the list must be dataframes with at least a numerical variable")
    }
  }
  
  # Remove not numerical variables and replace Na and NaN values with the arithmetic mean
  dataset <- lapply(dataset, function(df) {
    num_vars <- sapply(df, is.numeric)
    df <- df[, num_vars]
    df <- impute_with_mean(df)
    return(df)
  })
  
  # Pre-calculate the mean to each group
  means <- lapply(dataset, colMeans)
  
  # Obtain the number of groups
  n <- length(dataset)
  
  # Create an empty matrix to store distances
  distances <- matrix(0, nrow = n, ncol = n)
  
  # Calculate Chebyshev distance between each pair of groups
  for (i in 1:n) {
    mean_i <- means[[i]]  # Pre-calculated mean in the group i
    for (j in 1:n) {
      if (i != j) {
        distances[i, j] <- max(abs(mean_i - means[[j]]))
      }
    }
  }
  
  # Calculate p-values only if it is requested by users
  if (p.value) {
    p_values <- matrix(NA, nrow = n, ncol = n)
    for (i in 1:n) {
      mean_i <- means[[i]]  # Pre-calculated mean in the group i
      for (j in 1:n) {
        if (i != j) {
          observed_distance <- distances[i, j]
          permutation_distances <- numeric(num.permutations)
          
          for (k in 1:num.permutations) {
            permuted_data <- sample(unlist(dataset[[j]]))
            permuted_means <- colMeans(matrix(permuted_data, nrow = length(permuted_data)/length(mean_i), byrow = TRUE))
            permutation_distances[k] <- max(abs(mean_i - permuted_means))
          }
          
          p_values[i, j] <- mean(permutation_distances >= observed_distance)
        }
      }
    }
  }
  
  # If plot is TRUE, create a plot of distances
  if (plot) {
    print(plot_chebyshev_distances(distances, plot_title))
  }
  
  # Return a list containing distances and optionally p-values
  return(list(distances = distances, p_values = p_values))
}

# Auxiliary function to replace Na and NaN values with the arithmetic mean
impute_with_mean <- function(df) {
  for (i in seq_along(df)) {
    if (is.numeric(df[[i]])) {
      na_index <- is.na(df[[i]]) | is.nan(df[[i]])
      df[[i]][na_index] <- mean(df[[i]], na.rm = TRUE)
    }
  }
  return(df)
}

# Auxiliary function to create a plot of Chebyshev distances
plot_chebyshev_distances <- function(distances, plot_title) {
  requireNamespace("ggplot2")
  requireNamespace("reshape2")
  output_df <- as.data.frame(distances)
  output_df$Species <- rownames(output_df)
  
  output_df_long <- melt(output_df, id.vars = "Species")
  colnames(output_df_long) <- c("Species", "Comparison", "Distance")
  
  ggplot2::ggplot(output_df_long, ggplot2::aes(x = Species, y = Distance, fill = Comparison)) +
    ggplot2::geom_bar(stat = "identity", position = "dodge") +
    ggplot2::labs(title = plot_title, x = "Species", y = "Distance", fill = "Comparison") +
    ggplot2::theme_minimal()
}