set.seed(1)

glm_gaussian_data <- mock_glm_gaussian_data()
cat_init_glm_gaussian <- mock_cat_glm_gaussian_initialization(glm_gaussian_data)

glm_binomial_data <- mock_glm_binomial_data()
cat_init_glm_binomial <- mock_cat_glm_binomial_initialization(glm_binomial_data)

cox_data <- mock_cox_data()
cat_init_cox <- mock_cat_cox_initialization(cox_data)

lmm_data <- mock_lmm_data()
cat_init_lmm <- mock_cat_lmm_initialization(lmm_data)


# ----- GLMs -----
test_that("get_glm_sample_data works correctly", {
  # Test data generation for binomial family
  binomial_data <- get_glm_sample_data(
    family_string = "binomial",
    n = 20,
    mean = 0.3
  )
  expect_equal(length(binomial_data), 20)
  expect_true(all(binomial_data %in% c(0, 1)))

  # Test data generation for Gaussian family
  gaussian_data <- get_glm_sample_data(
    family_string = "gaussian",
    n = 20,
    mean = 5,
    sd = 2
  )
  expect_equal(length(gaussian_data), 20)
  expect_true(all(is.numeric(gaussian_data)))

  # Check for error with an invalid family
  expect_null(get_glm_sample_data(
    family_string = "invalid_family",
    n = 20,
    mean = 5,
    sd = 2
  ))
})

test_that("get_hmc_mcmc_result works correctly", {
  # Define a simple quadratic potential function and its gradient
  neg_log_den_func <- function(pos) {
    0.5 * sum(pos^2)
  }
  neg_log_den_grad_func <- function(pos) {
    pos
  }

  # Set initial position, iteration count, scale, and steps
  coefs_0 <- c(1, 1)
  iter <- 10
  hmc_scale <- 0.01
  hmc_steps <- 5

  # Run the function
  result <- get_hmc_mcmc_result(
    neg_log_den_func,
    neg_log_den_grad_func,
    coefs_0,
    iter = iter,
    hmc_scale = hmc_scale,
    hmc_steps = hmc_steps
  )

  # Check the output type and length
  expect_type(result, "double")
  expect_equal(length(result), length(coefs_0))

  result_2 <- get_hmc_mcmc_result(
    neg_log_den_func,
    neg_log_den_grad_func,
    coefs_0,
    iter = iter,
    hmc_scale = hmc_scale,
    hmc_steps = hmc_steps
  )

  # The results should not match exactly
  expect_false(all(result == result_2))
})

test_that("hmc_neal_2010 works correctly", {
  # Define a simple quadratic potential function and its gradient
  neg_log_den_func <- function(pos) {
    0.5 * sum(pos^2)
  }
  neg_log_den_grad_func <- function(pos) {
    pos
  }

  # Set parameters for the HMC function
  leapfrog_stepsize <- 0.1
  leapfrog_step <- 10
  current_pos <- c(1, 1)

  # Call the HMC function
  result <- hmc_neal_2010(
    neg_log_den_func, neg_log_den_grad_func,
    leapfrog_stepsize, leapfrog_step, current_pos
  )

  # Test that the output is a list
  expect_type(result, "list")

  # Test that the list has named elements position, potential_energy, and accepted
  expect_named(result, c("position", "potential_energy", "accepted"))

  # Test that position is a numeric vector of the same length as current_pos
  expect_type(result$position, "double")
  expect_equal(length(result$position), length(current_pos))

  # Test that potential_energy is a numeric scalar
  expect_type(result$potential_energy, "double")
  expect_length(result$potential_energy, 1)

  # Test that accepted is a logical value
  expect_type(result$accepted, "logical")
  expect_length(result$accepted, 1)

  # Run multiple iterations to see if the function accepts/rejects states correctly
  accept_count <- 0
  total_runs <- 100

  for (i in seq_len(total_runs)) {
    result <- hmc_neal_2010(neg_log_den_func, neg_log_den_grad_func,
      leapfrog_stepsize = 0.1, leapfrog_step = 10,
      current_pos = c(1, 1)
    )
    if (result$accepted) accept_count <- accept_count + 1
  }

  # The function should accept some of the proposals
  expect_gt(accept_count, 0)

  # The function should also reject some proposals in the set
  expect_lt(accept_count, total_runs)

  # Test for a small step size where energy should be conserved well
  leapfrog_stepsize <- 0.01
  leapfrog_step <- 20
  current_pos <- c(1, 1)
  result_small_stepsize <- hmc_neal_2010(
    neg_log_den_func, neg_log_den_grad_func,
    leapfrog_stepsize, leapfrog_step, current_pos
  )
})

test_that("get_glm_log_density_grad works correctly", {
  X <- matrix(rnorm(20), ncol = 5)
  Y_binomial <- c(1, 0, 1, 0)
  Y_gaussian <- rnorm(4)
  coefs <- rnorm(6)

  # Retrieve GLM models from glm_data
  model_binomial <- stats::glm(Y_binomial ~ X, binomial)
  model_gaussian <- stats::glm(Y_gaussian ~ X, gaussian)

  # Compute gradients
  grad_binomial <- get_glm_log_density_grad("binomial", X, Y_binomial, coefs)
  grad_gaussian <- get_glm_log_density_grad("gaussian", X, Y_gaussian, coefs)

  # Manually compute expected gradients for comparison
  mean_binomial <- get_glm_mean("binomial", X, coefs)
  grad_binomial_expected <- c(t(model_binomial$prior.weights * (Y_binomial - mean_binomial)) %*% cbind(1, X))

  mean_gaussian <- get_glm_mean("gaussian", X, coefs)
  grad_gaussian_expected <- c(t(model_gaussian$prior.weights * (Y_gaussian - mean_gaussian)) %*% cbind(1, X))

  # Test that computed values match expected values
  expect_equal(grad_binomial, grad_binomial_expected)
  expect_equal(grad_gaussian, grad_gaussian_expected)
})

test_that("get_glm_diag_approx_cov works correctly", {
  X <- matrix(rnorm(20), ncol = 5)
  y <- c(1, 0, 1, 0)

  # Fit a GLM model
  model <- stats::glm(y ~ X, family = binomial)

  # Compute the diagonal covariance matrix
  diag_cov <- get_glm_diag_approx_cov(X, model)

  # Manually compute the expected diagonal covariance for comparison
  hessian <- (t(cbind(1, X))) %*%
    diag(model$fitted.values * (1 - model$fitted.values) * model$prior.weights) %*%
    cbind(1, X)
  expected_diag_cov <- tryCatch(
    diag(solve(hessian)),
    error = function(.) {
      # Regularization for dealing with computationally singular issue
      diag(solve(hessian + diag(ncol(hessian)) * 1e-5))
    }
  )

  # Test that the computed and expected values match
  expect_equal(diag_cov, expected_diag_cov)

  # Check for correct error handling with mismatched dimensions (if applicable)
  expect_error(get_glm_diag_approx_cov(matrix(1:4, ncol = 2), model))
})

test_that("get_glm_custom_var works correctly", {
  # Call the function and check the result
  custom_var <- get_glm_custom_var(Y ~ ., cat_init_glm_gaussian, 1)

  # Expected custom variance calculation manually
  tau_model <- do.call(
    stats::glm,
    list(
      formula = Y ~ .,
      family = cat_init_glm_gaussian$family,
      data = cat_init_glm_gaussian$data,
      weights = c(
        rep(1, cat_init_glm_gaussian$obs_size),
        rep(1 / cat_init_glm_gaussian$syn_size, cat_init_glm_gaussian$syn_size)
      )
    )
  )
  expected_custom_var <- sum(stats::residuals(tau_model)^2) / stats::df.residual(tau_model) +
    sum(stats::residuals(tau_model)[1:cat_init_glm_gaussian$obs_size]^2) / cat_init_glm_gaussian$obs_size


  expect_equal(custom_var, expected_custom_var)
})

test_that("get_glm_lambda works correctly", {
  X <- matrix(rnorm(20), ncol = 5)
  coefs <- rnorm(6)

  # Expected outputs based on the given inputs
  expected_square_error <- 2 * get_glm_mean("gaussian", X, coefs)
  expected_classification_error <- 2 * get_glm_mean("binomial", X, coefs)
  expected_logistic_deviance <- get_linear_predictor(X, coefs)

  # Test for "square_error"
  expect_equal(get_glm_lambda("mean_square_error", X, coefs), expected_square_error)

  # Test for "classification_error"
  expect_equal(get_glm_lambda("mean_classification_error", X, coefs), expected_classification_error)

  # Test for "logistic_deviance"
  expect_equal(get_glm_lambda("logistic_deviance", X, coefs), expected_logistic_deviance)

  # Test that an error is thrown for invalid discrepancy method
  expect_error(get_glm_lambda("invalid_method", X, coefs))
})

test_that("get_glm_log_density works correctly", {
  X <- matrix(rnorm(20), ncol = 5)
  coefs <- rnorm(6)
  Y_binomial <- c(0, 1, 0, 1)
  Y_gaussian <- rnorm(4)
  weights <- 0.5

  # Test for binomial family
  log_density_binomial <- get_glm_log_density("binomial", X, Y_binomial, coefs, weights)
  lp_binomial <- get_linear_predictor(X, coefs)
  expected_log_density_binomial <- weights * sum((Y_binomial * lp_binomial - log(1 + exp(lp_binomial))))
  expect_equal(log_density_binomial, expected_log_density_binomial, tolerance = 1e-6)

  # Test for gaussian family
  log_density_gaussian <- get_glm_log_density("gaussian", X, Y_gaussian, coefs, weights)
  lp_gaussian <- get_linear_predictor(X, coefs)
  expected_log_density_gaussian <- weights * -0.5 * sum((log(2 * pi * max(mean((Y_gaussian - lp_gaussian)^2), 1e-5)) + 1))
  expect_equal(log_density_gaussian, expected_log_density_gaussian, tolerance = 1e-6)

  # Test for invalid family
  expect_null(get_glm_log_density("invalid_family", X, Y_binomial, coefs))
})

test_that("get_glm_mean works correctly", {
  X <- matrix(rnorm(20), ncol = 5)
  coefs <- rnorm(6)

  # Test for binomial family
  mean_binomial <- get_glm_mean("binomial", X, coefs)
  expected_binomial <- 1 / (1 + exp(-get_linear_predictor(X, coefs)))
  expect_equal(mean_binomial, expected_binomial, tolerance = 1e-6)

  # Test for gaussian family
  mean_gaussian <- get_glm_mean("gaussian", X, coefs)
  expected_gaussian <- get_linear_predictor(X, coefs)
  expect_equal(mean_gaussian, expected_gaussian, tolerance = 1e-6)

  # Test for invalid family
  expect_null(get_glm_mean("invalid_family", X, coefs))
})

test_that("get_glm_family_string works correctly", {
  # Test retrieving the family name
  gaussian_family_name <- get_glm_family_string("gaussian")
  expect_equal(gaussian_family_name, "gaussian")

  # Test retrieving the family name with the link function
  poisson_family_name_with_link <- get_glm_family_string("poisson", with_link = TRUE)
  expect_equal(poisson_family_name_with_link, "poisson [log]")

  # Test handling invalid family input
  expect_error(get_glm_family_string("invalid_family"), "'family' must be a valid GLM family.")
  expect_error(get_glm_family_string(function() "invalid"), "'family' must be a valid GLM family.")
})

test_that("validate_glm_input works correctly", {
  # Mock the menu function to simulate user input
  mock_menu <- function(...) {
    return(2) # Simulating user input "NO" to terminate
  }

  # Assign the mock_menu to replace the original menu function
  assign("menu", mock_menu, envir = globalenv())


  # Test for valid inputs
  expect_silent(
    validate_glm_input(
      formula = Y ~ X1 + X2,
      cat_init = cat_init_glm_binomial,
      tau = 0.1,
      tau_seq = seq(0.1, 1, by = 0.1),
      tau_0 = 0.1,
      parametric_bootstrap_iteration_times = 100,
      cross_validation_fold_num = 5,
      risk_estimate_method = "parametric_bootstrap",
      discrepancy_method = "logistic_deviance",
      binomial_tau_lower = 0.01,
      tau_alpha = 0.5,
      tau_gamma = 0.5,
      gibbs_iter = 1000,
      gibbs_warmup = 500,
      coefs_iter = 5,
      gaussian_variance_alpha = NULL,
      gaussian_variance_beta = NULL
    )
  )

  expect_silent(validate_glm_input(
    formula = Y ~ X1 + X2,
    cat_init = cat_init_glm_gaussian,
    tau = 0.1,
    tau_seq = seq(0.1, 1, by = 0.1),
    tau_0 = 0.1,
    parametric_bootstrap_iteration_times = 100,
    cross_validation_fold_num = 5,
    risk_estimate_method = "parametric_bootstrap",
    discrepancy_method = "mean_square_error",
    binomial_tau_lower = 0.01,
    tau_alpha = 0.5,
    tau_gamma = 0.5
  ))

  # Test for invalid formula with wrong response variance
  expect_error(validate_glm_input(
    formula = Y_wrong ~ .,
    cat_init = cat_init_glm_gaussian
  ))

  # Test for invalid cat_init object
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_cox
  ))

  # Test for invalid tau (negative value)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    tau = -0.1
  ))

  # Test for invalid tau (vectors)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    tau = c(1, 2)
  ))

  # Test for invalid tau_seq (negative value)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    tau_seq = c(-1, 0, 1)
  ))

  # Test for invalid tau_seq (single value)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    tau_seq = 1
  ))

  # Test for invalid tau_0 (zero value)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    tau_0 = 0
  ))

  # Test for invalid tau_0 (vector)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    tau_0 = c(1, 2)
  ))

  # Test for invalid parametric_bootstrap_iteration_times (zero value)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    parametric_bootstrap_iteration_times = 0
  ))

  # Test for invalid cross_validation_fold_num (zero value)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    cross_validation_fold_num = 0
  ))

  # Test for invalid cross_validation_fold_num (greater than number of rows)
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    risk_estimate_method = "cross_validation",
    discrepancy_method = "mean_square_error",
    cross_validation_fold_num = 100
  ))

  # Test for family == "gaussian" & risk_estimate_method == "steinian_estimate"
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    risk_estimate_method = "steinian_estimate",
    discrepancy_method = "mean_square_error"
  ))

  # Test for family == "binomial" & risk_estimate_method == "mallowian_estimate"
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    risk_estimate_method = "mallowian_estimate",
    discrepancy_method = "logistic_deviance"
  ))

  # Test for family == "gaussian" & discrepancy_method != "mean_square_error"
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    risk_estimate_method = "mallowian_estimate",
    discrepancy_method = "logistic_deviance"
  ))

  cat_init_glm_gaussian_large_size <- cat_init_glm_gaussian
  cat_init_glm_gaussian_large_size$obs_size <- 2000
  # Test for large data size when using parametric_bootstrap
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian_large_size,
    risk_estimate_method = "parametric_bootstrap",
    discrepancy_method = "mean_square_error"
  ))

  cat_init_glm_binomial_large_size <- cat_init_glm_binomial
  cat_init_glm_binomial_large_size$obs_size <- 2000
  # Test for large data size when using steinian_estimate
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial_large_size,
    risk_estimate_method = "steinian_estimate",
    discrepancy_method = "logistic_deviance"
  ))

  # Warn user when family == "binomial" & risk_estimate_method == "steinian_estimate" & discrepancy_method != "logistic_deviance"
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    risk_estimate_method = "steinian_estimate",
    discrepancy_method = "mean_classification_error"
  ))

  # Test for family == "binomial" & discrepancy_method == "mean_square_error"
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    risk_estimate_method = "steinian_estimate",
    discrepancy_method = "mean_square_error"
  ))

  # Test for binomial using gibbs function
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    gibbs_iter = 1000,
    gibbs_warmup = 200,
    coefs_iter = 5
  ))

  # Test for gibbs_warmup > gibbs_iter
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    gibbs_iter = 100,
    gibbs_warmup = 200,
    coefs_iter = 5
  ))

  # Test for zero gibbs_iter
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    gibbs_iter = 0,
    gibbs_warmup = 200,
    coefs_iter = 5
  ))

  # Test for zero gibbs_warmup
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    gibbs_iter = 100,
    gibbs_warmup = 0,
    coefs_iter = 5
  ))

  # Test for zero coefs_iter
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    gibbs_iter = 1000,
    gibbs_warmup = 200,
    coefs_iter = 0
  ))

  # Test for large coefs_iter
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    gibbs_iter = 1000,
    gibbs_warmup = 200,
    coefs_iter = 50
  ))

  # Test for zero gaussian_variance_alpha
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    gaussian_variance_alpha = 0
  ))

  # Test for zero gaussian_variance_beta
  expect_error(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    gaussian_variance_beta = 0
  ))

  # Test for ignore gaussian_variance_alpha when gaussian_known_variance = TRUE
  cat_init_glm_gaussian_known <- mock_cat_glm_gaussian_initialization(glm_gaussian_data, gaussian_known_variance = TRUE)
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian_known,
    gaussian_variance_alpha = 1
  ))

  # Test for ignore gaussian_variance_alpha when family is binomial
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    gaussian_variance_alpha = 1
  ))

  # Test for ignore gaussian_variance_beta when gaussian_known_variance = TRUE
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian_known,
    gaussian_variance_beta = 1
  ))

  # Test for ignore gaussian_variance_beta when family is binomial
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    gaussian_variance_beta = 1
  ))

  # Test for binomial features when family is gaussian
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    binomial_joint_theta = TRUE
  ))

  # Test for binomial features when family is gaussian
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_gaussian,
    binomial_joint_alpha = TRUE
  ))

  # Test for binomial features when theta is not activated with alpha
  expect_warning(validate_glm_input(
    formula = Y ~ .,
    cat_init = cat_init_glm_binomial,
    binomial_joint_theta = FALSE,
    binomial_joint_alpha = TRUE
  ))
})

test_that("validate_glm_initialization_input checks parameters correctly", {
  # Test for valid inputs
  expect_silent(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))


  # Test for complex formula
  expect_error(validate_glm_initialization_input(
    formula = Y ~ X1 + X2 + X3,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for wrong data format
  expect_error(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = as.matrix(glm_gaussian_data),
    syn_size = NULL,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for empty data
  expect_error(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = data.frame(),
    syn_size = NULL,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for lmm formula
  expect_error(validate_glm_initialization_input(
    formula = Y ~ 1 + (1 | X1),
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for cox formula
  expect_error(validate_glm_initialization_input(
    formula = survival::Surv(Y, X1) ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for zero synthetic data size
  expect_error(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = 0,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for positive synthetic data size
  expect_silent(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = 100,
    custom_variance = NULL,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for custom_variance equal to zero
  expect_error(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = 0,
    gaussian_known_variance = TRUE,
    x_degree = NULL
  ))

  # Test for positive custom_variance
  expect_silent(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = 1,
    gaussian_known_variance = TRUE,
    x_degree = NULL
  ))

  # Test for positive custom_variance but FALSE gaussian_known_variance
  expect_warning(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = 1,
    gaussian_known_variance = FALSE,
    x_degree = NULL
  ))

  # Test for positive custom_variance, TRUE gaussian_known_variance but binomial distribution
  expect_warning(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = binomial,
    data = glm_binomial_data,
    syn_size = NULL,
    custom_variance = 1,
    gaussian_known_variance = TRUE,
    x_degree = NULL
  ))

  # Test for x_degree being zero
  expect_error(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = 1,
    gaussian_known_variance = TRUE,
    x_degree = rep(0, 3)
  ))

  # Test for x_degree being positive
  expect_silent(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = 1,
    gaussian_known_variance = TRUE,
    x_degree = rep(1, 3)
  ))

  # Test for x_degree being a single value
  expect_error(validate_glm_initialization_input(
    formula = Y ~ 1,
    family = gaussian,
    data = glm_gaussian_data,
    syn_size = NULL,
    custom_variance = 1,
    gaussian_known_variance = TRUE,
    x_degree = 1
  ))
})

# ----- COX -----
test_that("get_cox_risk_and_failure_sets works correctly", {
  time_vector <- cox_data$time
  status_vector <- cox_data$status
  indicator_vector <- sort(unique(c(time_vector, time_vector + rexp(length(time_vector)))))

  # Calculate risk and failure sets
  sets <- get_cox_risk_and_failure_sets(time_vector, status_vector, indicator_vector)

  # Test that the result is a list with two elements
  expect_true(is.list(sets))
  expect_equal(length(sets), 2)

  # Test that the risk_set and failure_set are matrices
  expect_true(is.matrix(sets$risk_set))
  expect_true(is.matrix(sets$failure_set))

  # Test that the dimensions of risk_set and failure_set match the input
  expect_equal(dim(sets$risk_set), c(length(time_vector), length(indicator_vector)))
  expect_equal(dim(sets$failure_set), c(length(time_vector), length(indicator_vector)))

  # Test that failure_set has only one 1 per row where status = 1
  expect_true(all(rowSums(sets$failure_set[status_vector == 1, ]) == 1))

  # Test that failure_set is all zeros where status = 0
  expect_true(all(sets$failure_set[status_vector == 0, ] == 0))

  # Test that risk_set is all zeros or ones
  expect_true(all(sets$risk_set %in% c(0, 1)))
})

test_that("get_cox_kappa works correctly", {
  kappa <- get_cox_kappa(
    X = matrix(rnorm(100), ncol = 5),
    time = rexp(20),
    status = rbinom(20, 1, 0.5),
    hazard_constant = 0.02
  )

  # Test that kappa is a numeric value
  expect_true(is.numeric(kappa))

  # Test that kappa is a scalar
  expect_equal(length(kappa), 1)
})

test_that("get_cox_partial_likelihood works correctly", {
  partial_likelihood <- get_cox_partial_likelihood(
    X = matrix(rnorm(100), ncol = 5),
    time = rexp(20),
    status = rbinom(20, 1, 0.5),
    coefs = rnorm(5),
    entry_points = rep(0, 20)
  )

  # Test that the partial likelihood is a numeric scalar
  expect_true(is.numeric(partial_likelihood))
  expect_length(partial_likelihood, 1)
})

test_that("get_cox_qr_solve works correctly", {
  # Define valid input
  hessian_matrix <- matrix(c(2, 1, 1, 2), ncol = 2)
  gradient_vector <- c(5, 6)

  # Test that the computed solution matches the expected solution
  expect_equal(
    get_cox_qr_solve(
      hessian_matrix = hessian_matrix,
      gradient_vector = gradient_vector
    ),
    solve(hessian_matrix, gradient_vector)
  )

  # Test with matrix that might produce NA values
  expect_equal(
    get_cox_qr_solve(
      hessian_matrix = matrix(c(1, 1, 1, 1), ncol = 2),
      gradient_vector = c(1, 1)
    ),
    c(0.0001, 0.0001)
  )

  # Test with another case that produces errors
  expect_equal(
    get_cox_qr_solve(
      hessian_matrix = matrix(c(0, 0, 0, 0), ncol = 2),
      gradient_vector = c(1, 1)
    ),
    c(0.0001, 0.0001)
  )
})

test_that("get_cox_gradient works correctly", {
  X <- matrix(rnorm(15), ncol = 3)
  time <- rexp(5, rate = 0.1)
  status <- rbinom(5, 1, 0.5)
  coefs <- rnorm(3)
  entry_points <- rep(0, 5)

  gradient <- get_cox_gradient(
    X = X,
    time = time,
    status = status,
    coefs = coefs,
    entry_points = entry_points
  )

  expect_true(is.numeric(gradient))
  expect_equal(length(gradient), ncol(X))

  # Test with no events
  expect_equal(get_cox_gradient(
    X = X,
    time = time,
    status = rep(0, length(status)),
    coefs = coefs,
    entry_points = entry_points
  ), c(0, 0, 0))
})

test_that("get_cox_syn_gradient works correctly", {
  X <- matrix(rnorm(15), ncol = 3)
  time <- rexp(5, rate = 0.1)
  coefs <- rnorm(3)

  gradient <- get_cox_syn_gradient(
    X = X,
    time = time,
    coefs = coefs,
    hazard_constant = 0.5
  )

  # Test that the gradient is a numeric vector
  expect_true(is.numeric(gradient))

  # Test that the length of the gradient matches the number of columns in X
  expect_equal(length(gradient), ncol(X))
})

test_that("get_cox_risk_set_idx works correctly", {
  time_of_interest <- 5
  entry_vector <- c(0, 0, 0, 0, 0)
  time_vector <- c(3, 5, 8, 9, 10)
  status_vector <- c(1, 1, 0, 1, 0)

  risk_set_idx <- get_cox_risk_set_idx(
    time_of_interest = time_of_interest,
    entry_vector = entry_vector,
    time_vector = time_vector,
    status_vector = status_vector
  )

  # Test that the output is a vector
  expect_true(is.vector(risk_set_idx))

  # Test that all indices are within the range of the time_vector length
  expect_true(all(risk_set_idx >= 1 & risk_set_idx <= length(time_vector)))

  # Test that all subjects in the risk set entered before or at the time of interest
  expect_true(all(time_of_interest >= entry_vector[risk_set_idx]))

  # Test with a time_of_interest that is not in time_vector
  risk_set_idx_new <- get_cox_risk_set_idx(
    time_of_interest = max(time_vector) + 1,
    entry_vector = entry_vector,
    time_vector = time_vector,
    status_vector = status_vector
  )

  expect_true(length(risk_set_idx_new) == 0)
})

test_that("get_cox_hessian works correctly", {
  X <- matrix(rnorm(15), ncol = 3)
  time <- rexp(5, rate = 0.1)
  status <- rbinom(5, 1, 0.5)
  coefs <- rnorm(3)
  entry_points <- rep(0, 5)

  hessian_matrix <- get_cox_hessian(
    X = X,
    time = time,
    status = status,
    coefs = coefs,
    entry_points = entry_points
  )

  # Test if the computed Hessian is a matrix
  expect_true(is.matrix(hessian_matrix))

  # Test the dimensions of the Hessian matrix
  expect_equal(dim(hessian_matrix), c(ncol(X), ncol(X)))

  # Test that the Hessian is symmetric (should be if correctly calculated)
  expect_true(all.equal(hessian_matrix, t(hessian_matrix), tolerance = 1e-8))

  # Test with no events
  hessian_matrix_no_event <- get_cox_hessian(
    X = X,
    time = time,
    status = c(0, 0, 0, 0, 0),
    coefs = coefs,
    entry_points = entry_points
  )
  expect_equal(hessian_matrix_no_event, matrix(0, ncol = ncol(X), nrow = ncol(X)))
})

test_that("get_cox_syn_hessian works correctly", {
  X <- matrix(rnorm(15), ncol = 3)
  time <- rexp(5, rate = 0.1)
  coefs <- rnorm(3)
  hazard_constant <- 0.7

  hessian_matrix <- get_cox_syn_hessian(
    X = X,
    time = time,
    coefs = coefs,
    hazard_constant = hazard_constant
  )

  # Test if the computed Hessian is a matrix
  expect_true(is.matrix(hessian_matrix))

  # Test the dimensions of the Hessian matrix
  expect_equal(dim(hessian_matrix), c(ncol(X), ncol(X)))

  # Test that the Hessian is symmetric (should be if correctly calculated)
  expect_true(all.equal(hessian_matrix, t(hessian_matrix), tolerance = 1e-8))

  # Test with no variation in time (all times the same)
  hessian_matrix_no_var <- get_cox_syn_hessian(
    X = X,
    time = rep(1, nrow(X)),
    coefs = coefs,
    hazard_constant = hazard_constant
  )

  # Test that the Hessian is still a valid numeric matrix
  expect_true(is.matrix(hessian_matrix_no_var))
  expect_true(is.numeric(hessian_matrix_no_var))
  expect_equal(dim(hessian_matrix_no_var), c(ncol(X), ncol(X)))
})

test_that("validate_cox_input checks parameters correctly", {
  # Test for valid inputs
  cat_init <- get_adjusted_cat_init(
    cat_init = cat_init_cox,
    formula_rhs = ~ X1 + X2
  )

  expect_silent(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tau = 1,
    tau_seq = c(1, 2),
    init_coefficients = rep(0, ncol(cat_init$adj_x)),
    tol = 1e-3,
    max_iter = 30,
    cross_validation_fold_num = 2,
    hazard_beta = 2,
    tau_alpha = 2,
    tau_gamma = 1
  ))

  # Test for cat_init not generated from `cat_cox_initialization`
  cat_init_invalid <- cat_init_lmm
  cat_init_invalid$function_name <- "wrong_initialization"
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init_invalid
  ))

  # Test for fomrula contain wrong time/status column name
  expect_error(validate_cox_input(
    formula = survival::Surv(time_wrong, status_wrong) ~ X1 + X2,
    cat_init = cat_init
  ))

  # Test for tau being negative
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tau = -1
  ))

  # Test for tau being a vector
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tau = c(1, 2)
  ))

  # Test for tau_seq being negative
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tau_seq = c(-1, 1)
  ))

  # Test for tau_seq being a single value
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tau_seq = 1
  ))

  # Test for init_coefficients being different length
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    init_coefficients = rep(0, ncol(cat_init$adj_x) + 1)
  ))

  # Test for tol being 0
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tol = 0
  ))

  # Test for max_iter being 0
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    max_iter = 0
  ))

  # Test for cross_validation_fold_num being 0
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    cross_validation_fold_num = 0
  ))

  # Test for cross_validation_fold_num being large
  expect_warning(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    cross_validation_fold_num = 3
  ))

  # Test for cross_validation_fold_num being too large
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    cross_validation_fold_num = 100
  ))

  # Test for hazard_beta being 0
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    hazard_beta = 0
  ))

  # Test for tau_alpha being 0
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tau_alpha = 0
  ))

  # Test for tau_gamma being 0
  expect_error(validate_cox_input(
    formula = ~ X1 + X2,
    cat_init = cat_init,
    tau_gamma = 0
  ))
})

test_that("validate_cox_initialization_input checks parameters correctly", {
  # Test for valid inputs
  expect_silent(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))


  # Test for wrong data format
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = as.matrix(cox_data),
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for complex formula
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ X1 + X2 + X3,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for empty data
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = data.frame(),
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for lmm formula
  expect_error(validate_cox_initialization_input(
    formula = time ~ 1 + (1 | status),
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for glm formula
  expect_error(validate_cox_initialization_input(
    formula = time ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for zero synthetic data size
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = 0,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for positive synthetic data size
  expect_silent(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = 100,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for hazard constant equal to zero
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = 0,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for positive hazard constant
  expect_silent(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = 5,
    entry_points = NULL,
    x_degree = NULL
  ))

  # Test for not aligned entry point
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = c(0, 0, 0, 0, 0),
    x_degree = NULL
  ))

  # Test for right length of entry point
  expect_silent(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = rep(0, nrow(cox_data)),
    x_degree = NULL
  ))

  # Test for negative entry point
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = rep(-1, nrow(cox_data)),
    x_degree = NULL
  ))

  # Test for x_degree being zero
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = rep(0, 3)
  ))

  # Test for x_degree being positive
  expect_silent(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = rep(1, 3)
  ))

  # Test for x_degree being a single value
  expect_error(validate_cox_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = cox_data,
    syn_size = NULL,
    hazard_constant = NULL,
    entry_points = NULL,
    x_degree = 1
  ))
})

# ----- LMM -----
test_that("update_lmm_variance produces expected results for edge cases", {
  # Edge case with very small variances
  result_small_variance <- update_lmm_variance(
    residual_variance = 1e-10,
    random_effect_variance = 1e-10,
    obs_z_eigenvalues = c(1, 2, 3),
    syn_z_eigenvalues = c(1, 2, 3),
    obs_adjusted_residuals = c(1, 2, 3),
    syn_adjusted_residuals = c(1, 2, 3),
    tau = 1
  )
  expect_true(is.finite(result_small_variance))

  # Check monotonicity with varying tau
  result_low_tau <- update_lmm_variance(1, 1, c(1, 1.5, 2), c(1, 1.5, 2), c(1, 2, 3), c(1, 2, 3), 1)
  result_high_tau <- update_lmm_variance(1, 1, c(1, 1.5, 2), c(1, 1.5, 2), c(1, 2, 3), c(1, 2, 3), 10)
  expect_true(result_low_tau < result_high_tau) # Higher tau should increase likelihood

  # Symmetry check with swapped eigenvalues with tau equal to length of the eigenvalues
  result_obs_syn_swapped <- update_lmm_variance(0.5, 1.2, c(1, 2, 2.5), c(1, 2, 3), c(1, 2, 3), c(1, 2, 3), 3)
  result_syn_obs_swapped <- update_lmm_variance(0.5, 1.2, c(1, 2, 3), c(1, 2, 2.5), c(1, 2, 3), c(1, 2, 3), 3)
  expect_equal(result_obs_syn_swapped, result_syn_obs_swapped)
})

test_that("validate_lmm_input checks parameters correctly", {
  # Test for valid inputs
  expect_silent(validate_lmm_input(
    cat_init = cat_init_lmm,
    tau = 1,
    residual_variance_0 = 1,
    random_effect_variance_0 = 1,
    coefs_0 = rep(1, ncol(cat_init_lmm$obs_x)),
    optimize_domain = c(0, 10),
    max_iter = 100,
    tol = 0.001,
    tau_seq = c(0, 1),
    cross_validation_fold_num = 5
  ))

  # Test for cat_init not generated from `cat_lmm_initialization`
  cat_init_invalid <- cat_init_lmm
  cat_init_invalid$function_name <- "wrong_initialization"
  expect_error(validate_lmm_input(
    cat_init = cat_init_invalid
  ))

  # Test for tau being negative
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    tau = -1
  ))

  # Test for tau being a vector
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    tau = c(1, 2)
  ))

  # Test for residual_variance_0 being zero
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    residual_variance_0 = 0
  ))

  # Test for random_effect_variance_0 being zero
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    random_effect_variance_0 = 0
  ))

  # Test for coefs_0 being too short for observation x
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    coefs_0 = c(1)
  ))

  # Test for coefs_0 being too long for observation x
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    coefs_0 = rep(1, 5)
  ))

  # Test for optimize_domain being negative
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    optimize_domain = c(-1, 0)
  ))

  # Test for optimize_domain being a single value
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    optimize_domain = 1
  ))

  # Test for optimize_domain being too long
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    optimize_domain = c(1, 2, 3)
  ))

  # Test for max_iter being too zero
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    max_iter = 0
  ))

  # Test for max_iter being a vector
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    max_iter = c(1, 2)
  ))

  # Test for tol being zero
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    tol = 0
  ))

  # Test for tol being a vector
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    tol = c(1, 2)
  ))


  # Test for tau_seq being negative
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    tau_seq = c(-1, 1)
  ))

  # Test for tau_seq being a single value
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    tau_seq = 1
  ))

  # Test for cross_validation_fold_num being less than 2
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    cross_validation_fold_num = 1
  ))

  # Test for cross_validation_fold_num being larger than observation data size
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    cross_validation_fold_num = 100
  ))

  # Test for cross_validation_fold_num being a vector
  expect_error(validate_lmm_input(
    cat_init = cat_init_lmm,
    cross_validation_fold_num = c(1, 2)
  ))
})

test_that("validate_lmm_initialization_input checks parameters correctly", {
  # Test for valid inputs
  expect_silent(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for wrong data format
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = as.matrix(lmm_data),
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for empty data
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = data.frame(),
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for lmm formula
  expect_silent(validate_lmm_initialization_input(
    formula = Y ~ X1 + (1 | group),
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for surv formula
  expect_error(validate_lmm_initialization_input(
    formula = survival::Surv(time, status) ~ 1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for lack of x_cols
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for lack of z_cols
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for wrong x_cols
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3", "X4"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for wrong z_cols
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3", "Z4"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for y_col being vector
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2"),
    y_col = c("Y", "X3"),
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for y_col being wrong
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y1",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = "group",
    syn_size = NULL
  ))

  # Test for group_col being a vector
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2"),
    group_col = c("group", "Z3"),
    syn_size = NULL
  ))

  # Test for group_col being wrong
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = c("group1"),
    syn_size = NULL
  ))

  # Test for syn_size is 0
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = c("group"),
    syn_size = 0
  ))

  # Test for syn_size is positive and large enough
  expect_silent(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = c("group"),
    syn_size = 100
  ))

  # Test for syn_size is zero
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = c("group"),
    syn_size = 0
  ))

  # Test for syn_size is negative
  expect_error(validate_lmm_initialization_input(
    formula = Y ~ X1,
    data = lmm_data,
    x_cols = c("X1", "X2", "X3"),
    y_col = "Y",
    z_cols = c("Z1", "Z2", "Z3"),
    group_col = c("group"),
    syn_size = -1
  ))
})

# ----- General -----
test_that("validate_positive function works as expected", {
  # Test for a positive scalar input, should not raise an error
  expect_silent(validate_positive("param1", 5))

  # Test for a zero scalar input, incl_0 = TRUE, should not raise an error
  expect_silent(validate_positive("param1", 0, incl_0 = TRUE))

  # Test for a positive vector input, is_vector = TRUE, should not raise an error
  expect_silent(validate_positive("param1", c(2, 3, 4), is_vector = TRUE))

  # Test for a vector with zero values, incl_0 = TRUE, should not raise an error
  expect_silent(validate_positive("param1", c(0, 2, 3), incl_0 = TRUE, is_vector = TRUE))

  # Test for a negative scalar input, expect an error
  expect_error(
    validate_positive("param1", -5),
    "The provided `param1` should be a positive number."
  )

  # Test for a zero scalar input without incl_0 = TRUE, expect an error
  expect_error(
    validate_positive("param1", 0),
    "The provided `param1` should be a positive number."
  )

  # Test for a negative value in vector input, is_vector = TRUE, expect an error
  expect_error(
    validate_positive("param1", c(-1, 2, 3), is_vector = TRUE),
    "The provided `param1` should be a vector of positive numbers."
  )

  # Test for a non-vector input when is_vector = TRUE, expect an error
  expect_error(
    validate_positive("param1", 5, is_vector = TRUE),
    "The provided `param1` should be a vector of positive numbers."
  )

  # Test for a single element when is_vector = TRUE and incl_0 = TRUE, expect an error if length is 1
  expect_error(
    validate_positive("param1", 0, incl_0 = TRUE, is_vector = TRUE),
    "The provided `param1` should be a vector of non-negative numbers."
  )
})

test_that("get_stan_model loads the correct Stan model file", {
  # Test for default glm type with gaussian family with no additional options
  stan_model <- get_stan_model(type = "glm", glm_family_string = "gaussian")
  expect_s4_class(stan_model, "stanmodel") # Check that it returns a Stan model
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian") # Check filename pattern

  # Test for Cox model type
  stan_model <- get_stan_model(type = "cox")
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "cox")

  # Test fwith `joint_tau = TRUE` or Cox model type
  stan_model <- get_stan_model(type = "cox", joint_tau = TRUE)
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "cox_joint")

  # Test with `glm_family_string` = "binomial"
  stan_model <- get_stan_model(type = "glm", glm_family_string = "binomial")
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial")

  # Test with `joint_tau = TRUE` (and binomial family to test combined settings)
  stan_model <- get_stan_model(type = "glm", glm_family_string = "binomial", joint_tau = TRUE)
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint")

  # Test for `glm_gaussian_known_variance = TRUE` with gaussian family
  stan_model <- get_stan_model(type = "glm", glm_family_string = "gaussian", glm_gaussian_known_variance = TRUE)
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian_known_variance")

  # Test for `glm_gaussian_known_variance = TRUE` and `joint_tau = TRUE` with gaussian family
  stan_model <- get_stan_model(
    type = "glm", glm_family_string = "gaussian", joint_tau = TRUE,
    glm_gaussian_known_variance = TRUE
  )
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian_joint_known_variance")

  # Test for joint_tau = TRUE` with gaussian family
  stan_model <- get_stan_model(
    type = "glm", glm_family_string = "gaussian", joint_tau = TRUE
  )
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian_joint")

  # Test with multiple options set: `glm_binomial_joint_theta` and `glm_binomial_joint_alpha`
  stan_model <- get_stan_model(
    type = "glm",
    glm_family_string = "binomial",
    glm_binomial_joint_theta = TRUE,
    glm_binomial_joint_alpha = TRUE
  )
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint_theta_alpha")

  # Test with `glm_binomial_joint_theta`
  stan_model <- get_stan_model(
    type = "glm",
    glm_family_string = "binomial",
    glm_binomial_joint_theta = TRUE
  )
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint_theta")

  # Test with `glm_binomial_joint_alpha`
  stan_model <- get_stan_model(
    type = "glm",
    glm_family_string = "binomial",
    glm_binomial_joint_alpha = TRUE
  )
  expect_s4_class(stan_model, "stanmodel")
  expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint")

  # Edge case: invalid `glm_family_string`
  expect_error(get_stan_model(type = "glm", glm_family_string = "invalid_family"))
})

test_that("get_linear_predictor works correctly", {
  # Test with no intercept
  X <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
  coefs <- c(0.5, 1.5)

  expect_equal(
    get_linear_predictor(X, coefs),
    X %*% coefs
  )

  # Test with intercept
  coefs_with_intercept <- c(1, 0.5, 1.5)

  expect_equal(
    get_linear_predictor(X, coefs_with_intercept),
    coefs_with_intercept[1] + X %*% coefs_with_intercept[-1]
  )

  # Test with dimension mismatch
  expect_error(get_linear_predictor(X, c(0.5)))

  # Test with minimal input
  X_small <- matrix(1, ncol = 1)
  coefs_small <- c(1)
  expect_equal(
    get_linear_predictor(X_small, coefs_small),
    X_small %*% coefs_small
  )

  # Test with intercept and minimal input
  coefs_small_with_intercept <- c(1, 2)
  expect_equal(
    get_linear_predictor(X_small, coefs_small_with_intercept),
    coefs_small_with_intercept[1] + X_small %*% coefs_small_with_intercept[2]
  )
})

test_that("get_discrepancy computes in consistence", {
  X <- matrix(rnorm(15), ncol = 3)
  coefs <- rnorm(4)

  binomial_Y <- rbinom(5, 1, 0.5)
  gaussian_Y <- rnorm(5)

  lp <- coefs[1] + c(X %*% coefs[-1])
  binomial_est_Y <- 1 / (1 + exp(-lp))
  gaussian_est_Y <- lp

  # Test for logarithmic error
  expect_equal(
    get_discrepancy(
      discrepancy_method = "mean_logarithmic_error",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      coefs = coefs
    ),
    get_discrepancy(
      discrepancy_method = "mean_logarithmic_error",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      est_Y = binomial_est_Y
    )
  )

  # Test for mean square error
  expect_equal(
    get_discrepancy(
      discrepancy_method = "mean_square_error",
      family_string = "gaussian",
      X = X,
      Y = gaussian_Y,
      coefs = coefs
    ),
    get_discrepancy(
      discrepancy_method = "mean_square_error",
      family_string = "gaussian",
      X = X,
      Y = gaussian_Y,
      est_Y = gaussian_est_Y
    )
  )

  # Test for mean classification error
  expect_equal(
    get_discrepancy(
      discrepancy_method = "mean_classification_error",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      coefs = coefs
    ),
    get_discrepancy(
      discrepancy_method = "mean_classification_error",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      est_Y = binomial_est_Y
    )
  )

  # Test for logistic deviance with no coefs input
  expect_error(
    get_discrepancy(
      discrepancy_method = "logistic_deviance",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      est_Y = binomial_est_Y
    )
  )

  # Test for logistic deviance with coefs input
  expect_silent(
    get_discrepancy(
      discrepancy_method = "logistic_deviance",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      coefs = coefs
    )
  )

  # Test for logistic deviance with different coefs input
  expect_false(
    get_discrepancy(
      discrepancy_method = "logistic_deviance",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      coefs = coefs
    ) == get_discrepancy(
      discrepancy_method = "logistic_deviance",
      family_string = "binomial",
      X = X,
      Y = binomial_Y,
      coefs = rnorm(4)
    )
  )
})


test_that("get_adjusted_cat_init adjusts data correctly", {
  formula_rhs_0 <- ~1
  formula_rhs_1 <- ~X1
  formula_rhs_2 <- ~ X1 + X2
  formula_rhs_3 <- ~.

  # Adjust the data
  adjusted_cat_init_0 <- get_adjusted_cat_init(cat_init_glm_gaussian, formula_rhs_0)
  adjusted_cat_init_1 <- get_adjusted_cat_init(cat_init_glm_gaussian, formula_rhs_1)
  adjusted_cat_init_2 <- get_adjusted_cat_init(cat_init_glm_binomial, formula_rhs_2)
  adjusted_cat_init_3 <- get_adjusted_cat_init(cat_init_cox, formula_rhs_3)

  # Check that the adjusted data frame has the expected structure
  expect_s3_class(adjusted_cat_init_0$adj_x, "data.frame")
  expect_s3_class(adjusted_cat_init_1$adj_x, "data.frame")
  expect_s3_class(adjusted_cat_init_2$adj_x, "data.frame")
  expect_s3_class(adjusted_cat_init_3$adj_x, "data.frame")

  # Validate sizes and contents of adjusted observed and synthetic parts
  expect_equal(nrow(adjusted_cat_init_0$adj_obs_x), adjusted_cat_init_0$obs_size)
  expect_equal(nrow(adjusted_cat_init_0$adj_syn_x), adjusted_cat_init_0$size - adjusted_cat_init_0$obs_size)

  expect_equal(nrow(adjusted_cat_init_1$adj_obs_x), adjusted_cat_init_1$obs_size)
  expect_equal(nrow(adjusted_cat_init_1$adj_syn_x), adjusted_cat_init_1$size - adjusted_cat_init_1$obs_size)

  expect_equal(nrow(adjusted_cat_init_2$adj_obs_x), adjusted_cat_init_2$obs_size)
  expect_equal(nrow(adjusted_cat_init_2$adj_syn_x), adjusted_cat_init_2$size - adjusted_cat_init_2$obs_size)

  expect_equal(nrow(adjusted_cat_init_3$adj_obs_x), adjusted_cat_init_3$obs_size)
  expect_equal(nrow(adjusted_cat_init_3$adj_syn_x), adjusted_cat_init_3$size - adjusted_cat_init_3$obs_size)

  # Verify that the adjusted data contains the expected columns based on the formulas
  expect_equal(ncol(adjusted_cat_init_0$adj_x), 0)
  expect_true(all(c("X1") %in% colnames(adjusted_cat_init_1$adj_x)))
  expect_true(all(c("X1", "X2") %in% colnames(adjusted_cat_init_2$adj_x)))
  expect_true(all(c("X1", "X2", "X3") %in% colnames(adjusted_cat_init_3$adj_x)))

  expect_equal(ncol(adjusted_cat_init_0$adj_obs_x), 0)
  expect_true(all(c("X1") %in% colnames(adjusted_cat_init_1$adj_obs_x)))
  expect_true(all(c("X1", "X2") %in% colnames(adjusted_cat_init_2$adj_obs_x)))
  expect_true(all(c("X1", "X2", "X3") %in% colnames(adjusted_cat_init_3$adj_obs_x)))

  expect_equal(ncol(adjusted_cat_init_0$adj_syn_x), 0)
  expect_true(all(c("X1") %in% colnames(adjusted_cat_init_1$adj_syn_x)))
  expect_true(all(c("X1", "X2") %in% colnames(adjusted_cat_init_2$adj_syn_x)))
  expect_true(all(c("X1", "X2", "X3") %in% colnames(adjusted_cat_init_3$adj_syn_x)))
})

test_that("get_resampled_df works correctly for various cases", {
  mock_data <- data.frame(
    X1 = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1000), # Continuous variable (Test for Deskewing)
    X2 = c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1), # Categorical variable (Test for Flattening)
    X3 = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4), # Another continuous variable (Test for Smoothing)
    X4 = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5) # Another continuous variable (Test for no action)
  )
  data_degree <- c(1, 1, 3, 1)

  # Test basic resampling functionality
  resample_size <- 50
  result <- get_resampled_df(
    data = mock_data,
    resample_size = resample_size,
    data_degree = data_degree
  )

  expect_false(setequal(unique(result$resampled_df[, 1]), unique(mock_data[, 1])))
  expect_true(setequal(unique(result$resampled_df[, 2]), unique(mock_data[, 2])))
  expect_false(setequal(unique(result$resampled_df[, 3]), unique(mock_data[, 3])))
  expect_true(setequal(unique(result$resampled_df[, 4]), unique(mock_data[, 4])))

  # Validate that each column are identified with correct type
  expect_equal(result$resampled_df_log$Type[1], "Continuous")
  expect_equal(result$resampled_df_log$Type[2], "Category")
  expect_equal(result$resampled_df_log$Type[3], "Continuous")
  expect_equal(result$resampled_df_log$Type[4], "Continuous")

  # Validate that each column is processed according to its type
  expect_true(grepl("Deskewing", result$resampled_df_log$Process[1]))
  expect_false(grepl("Flattening", result$resampled_df_log$Process[1]))
  expect_false(grepl("Smoothing", result$resampled_df_log$Process[1]))

  expect_false(grepl("Deskewing", result$resampled_df_log$Process[2]))
  expect_true(grepl("Flattening", result$resampled_df_log$Process[2]))
  expect_false(grepl("Smoothing", result$resampled_df_log$Process[2]))

  expect_false(grepl("Deskewing", result$resampled_df_log$Process[3]))
  expect_false(grepl("Flattening", result$resampled_df_log$Process[3]))
  expect_true(grepl("Smoothing", result$resampled_df_log$Process[3]))

  expect_false(grepl("Deskewing", result$resampled_df_log$Process[4]))
  expect_false(grepl("Flattening", result$resampled_df_log$Process[4]))
  expect_false(grepl("Smoothing", result$resampled_df_log$Process[4]))

  # Check that the resampled data frame has the correct dimensions
  expect_equal(nrow(result$resampled_df), resample_size)
  expect_equal(ncol(result$resampled_df), ncol(mock_data))

  # Check that the resampled log is created correctly
  expect_true(!is.null(result$resampled_df_log))
  expect_equal(nrow(result$resampled_df_log), ncol(mock_data))

  # Validate that resampling processes are logged correctly
  expect_true(all(c("Covariate", "Type", "Process") %in% colnames(result$resampled_df_log)))

  # Test with resample_only = TRUE
  result_resample_only <- get_resampled_df(
    data = mock_data,
    resample_size = resample_size,
    resample_only = TRUE
  )
  expect_true(setequal(unique(result_resample_only$resampled_df[, 1]), unique(mock_data[, 1])))
  expect_true(setequal(unique(result_resample_only$resampled_df[, 2]), unique(mock_data[, 2])))
  expect_true(setequal(unique(result_resample_only$resampled_df[, 3]), unique(mock_data[, 3])))
  expect_true(setequal(unique(result_resample_only$resampled_df[, 4]), unique(mock_data[, 4])))
  expect_null(result_resample_only$resampled_df_log)

  # Test without data_degree parameter
  result_without_degree <- get_resampled_df(
    data = mock_data,
    resample_size = resample_size,
    data_degree = NULL
  )

  # Check if processes are logged with degree
  expect_false(is.null(result_without_degree$resampled_df_log))

  # Validate that each column is processed according to its type
  expect_true(grepl("Deskewing", result_without_degree$resampled_df_log$Process[1]))
  expect_false(grepl("Flattening", result_without_degree$resampled_df_log$Process[1]))
  expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[1]))

  expect_false(grepl("Deskewing", result_without_degree$resampled_df_log$Process[2]))
  expect_true(grepl("Flattening", result_without_degree$resampled_df_log$Process[2]))
  expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[2]))

  expect_false(grepl("Deskewing", result_without_degree$resampled_df_log$Process[3]))
  expect_false(grepl("Flattening", result_without_degree$resampled_df_log$Process[3]))
  expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[3]))

  expect_false(grepl("Deskewing", result_without_degree$resampled_df_log$Process[4]))
  expect_false(grepl("Flattening", result_without_degree$resampled_df_log$Process[4]))
  expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[4]))
})

test_that("get_formula_string works correctly", {
  # Test with a simple formula
  formula <- y ~ x1 + x2
  expected_string <- "y ~ x1 + x2"
  expect_equal(get_formula_string(formula), expected_string)

  # Test with an interaction term
  formula_interaction <- y ~ x1 * x2
  expected_string_interaction <- "y ~ x1 * x2"
  expect_equal(get_formula_string(formula_interaction), expected_string_interaction)

  # Test with a more complex formula
  formula_complex <- y ~ x1 + x2 + I(x1^2) + log(x2)
  expected_string_complex <- "y ~ x1 + x2 + I(x1^2) + log(x2)"
  expect_equal(get_formula_string(formula_complex), expected_string_complex)
})

test_that("get_formula_lhs works correctly", {
  # Test with a simple formula
  formula <- y ~ x1 + x2
  expected_lhs <- "y"
  expect_equal(get_formula_lhs(formula), expected_lhs)

  # Test with an interaction term
  formula_interaction <- y ~ x1 * x2
  expected_lhs_interaction <- "y"
  expect_equal(get_formula_lhs(formula_interaction), expected_lhs_interaction)

  # Test with a more complex formula
  formula_complex <- y ~ x1 + x2 + I(x1^2) + log(x2)
  expected_lhs_complex <- "y"
  expect_equal(get_formula_lhs(formula_complex), expected_lhs_complex)
})

test_that("get_formula_rhs works correctly", {
  # Test with a simple formula
  formula <- y ~ x1 + x2
  expected_rhs <- "x1 + x2"
  expect_equal(get_formula_rhs(formula), expected_rhs)

  # Test with tilde included
  expected_rhs_with_tilde <- "~ x1 + x2"
  expect_equal(get_formula_rhs(formula, with_tilde = TRUE), expected_rhs_with_tilde)

  # Test with interaction term
  formula_interaction <- y ~ x1 * x2
  expected_rhs_interaction <- "x1 * x2"
  expect_equal(get_formula_rhs(formula_interaction), expected_rhs_interaction)

  # Test with a more complex formula
  formula_complex <- y ~ x1 + x2 + I(x1^2) + log(x2)
  expected_rhs_complex <- "x1 + x2 + I(x1^2) + log(x2)"
  expect_equal(get_formula_rhs(formula_complex), expected_rhs_complex)

  # Test with tilde for complex formula
  expected_rhs_complex_with_tilde <- "~ x1 + x2 + I(x1^2) + log(x2)"
  expect_equal(get_formula_rhs(formula_complex, with_tilde = TRUE), expected_rhs_complex_with_tilde)
})


test_that("is.continuous works correctly", {
  # Test with continuous numeric vector
  continuous_vector <- c(1, 2, 3, 4, 5)
  expect_true(is.continuous(continuous_vector))

  # Test with categorical vector
  categorical_vector <- c("A", "B", "A", "B", "C")
  expect_false(is.continuous(categorical_vector))

  # Test with binary numeric vector
  binary_vector <- c(0, 1, 1, 0, 1)
  expect_false(is.continuous(binary_vector))

  # Test with a numeric vector with only two unique values
  two_unique_vector <- c(1, 1, 2, 2, 1)
  expect_false(is.continuous(two_unique_vector))

  # Test with numeric vector with more than two unique values
  mixed_numeric_vector <- c(1, 2, 3, 4, 5, 6)
  expect_true(is.continuous(mixed_numeric_vector))
})

test_that("get_standardized_data works correctly", {
  # Test with numeric and factor columns
  data <- data.frame(
    numeric_col = c(2, 3, NA, 4),
    factor_col = factor(c("A", "B", "A", NA)),
    stringsAsFactors = FALSE
  )

  # Test with numeric NA replacement
  standardized_data <- get_standardized_data(data, na_replace = 0)
  expect_equal(standardized_data$numeric_col, c(2, 3, 0, 4))
  expect_equal(standardized_data$factor_col, factor(c("A", "B", "A", "A")))

  # Test with mode replacement for factors
  standardized_data <- get_standardized_data(data, na_replace = mean)
  expect_equal(standardized_data$numeric_col, c(2, 3, 3, 4))

  # Test with NA omission
  data_with_na_omit <- get_standardized_data(data, na_replace = stats::na.omit)
  expect_equal(nrow(data_with_na_omit), 2)
  expect_equal(ncol(data_with_na_omit), 2)

  # Test with no NA values
  data_no_na <- data.frame(
    numeric_col = c(1, 2, 4),
    factor_col = factor(c("A", "B", "A")),
    stringsAsFactors = FALSE
  )
  standardized_data_no_na <- get_standardized_data(data_no_na)
  expect_equal(standardized_data_no_na, data_no_na)
})