test_that("`impute_knn_brute` and impute_knn_mlpack calculates the missing location correctly", {
  set.seed(1234)
  to_test <- t(sim_mat(n = 50, m = 20, perc_NA = 0.5, perc_col_NA = 1)$input)
  missing <- unname(which(is.na(to_test), arr.ind = TRUE))
  miss <- matrix(is.na(to_test), nrow = nrow(to_test), ncol = ncol(to_test))
  storage.mode(miss) <- "integer"
  n_col_miss <- colSums(is.na(to_test))

  # For brute
  imputed_index_brute <- impute_knn_brute(
    obj = to_test,
    miss = miss,
    k = 5,
    n_col_miss = n_col_miss,
    method = 0,
    dist_pow = 1,
    cores = 1
  )

  imputed_index_mlpack <- impute_knn_mlpack(
    obj = mean_imp_col(to_test),
    miss = miss,
    k = 5,
    n_col_miss = n_col_miss,
    method = 0,
    tree = "kd",
    dist_pow = 1,
    cores = 1
  )

  imputed_index_brute[is.nan(imputed_index_brute)] <- NA
  imputed_index_mlpack[is.nan(imputed_index_mlpack)] <- NA
  expect_equal(imputed_index_brute[, c(1, 2)], missing)
  expect_equal(imputed_index_mlpack[, c(1, 2)], missing)
})

test_that("`knn_imp` works", {
  data("khanmiss1")
  expect_no_error(knn_imp(
    t(khanmiss1),
    k = 3,
    method = "euclidean"
  ))

  expect_no_error(knn_imp(
    t(khanmiss1),
    k = 3,
    method = "manhattan",
    tree = "kd"
  ))
})

test_that("Exactly replicate `impute::impute.knn`", {
  # impute is on bioconductor
  # Skip this test on CRAN to avoid dependency issues
  skip("Uncomment and test manually")
  # data("khanmiss1")
  #
  # # Check if the 'impute' package is installed
  #
  # # Perform imputation using knn_imp with method "impute.knn" on transposed data
  # r1 <- knn_imp(t(khanmiss1), k = 3, method = "euclidean")
  #
  # # Perform imputation using the original impute::impute.knn function
  # # Transpose the result to match the orientation
  # r2 <- t(
  #   impute::impute.knn(
  #     khanmiss1,
  #     k = 3,
  #     maxp = nrow(khanmiss1)
  #   )$data
  # )
  #
  # # Verify that the results from knn_imp match exactly with impute::impute.knn
  # expect_equal(r1[, ], r2[, ])
  #
  # # Test to see if the post_imp strategy would replicate the results completely
  # # Set seed for reproducibility in simulation
  # set.seed(1234)
  #
  # # Generate a simulated matrix with missing values (500 rows, 30 columns, 50% NA, 80% columns with NA)
  # to_test <- t(sim_mat(n = 500, m = 30, perc_NA = 0.5, perc_col_NA = 0.8)$input)
  #
  # # Pre-compute row means before imputation (ignoring NAs)
  # pre_impute <- rowMeans(to_test, na.rm = TRUE)
  #
  # # Impute using knn_imp without post-imputation step; expect some NAs to remain
  # r1.1 <- knn_imp(to_test, k = 5, method = "euclidean", post_imp = FALSE)
  # expect_true(anyNA(r1.1))
  #
  # # impute::impute.knn uses the pre-imputation row means to impute the data.
  # # After knn_imp, we row impute the data with pre-calculated row_means
  # # Identify indices of remaining NAs
  # indices <- which(is.na(r1.1), arr.ind = TRUE)
  #
  # # Fill remaining NAs with pre-computed row means
  # r1.1[indices] <- pre_impute[indices[, 1]]
  #
  # # Verify no NAs remain after manual post-imputation
  # expect_true(!anyNA(r1.1))
  #
  # # Perform imputation using impute::impute.knn on the transposed simulated data
  # r2.1 <- t(
  #   impute::impute.knn(
  #     t(to_test),
  #     k = 5,
  #     maxp = ncol(to_test)
  #   )$data
  # )
  #
  # # Verify that the manually post-imputed knn_imp matches impute::impute.knn
  # expect_equal(r1.1[, ], r2.1[, ])
  #
  # # Test subset. strategy is to use subset, then impute.knn on the same data
  # # and pull out the same subset then compare the two matrices
  # # Set seed for reproducibility in subset selection
  # set.seed(2345)
  #
  # # Generate another simulated matrix (100 rows, 200 columns, 10% NA, all columns with NA)
  # to_test_subset <- t(sim_mat(n = 100, m = 200, perc_NA = 0.1, perc_col_NA = 1)$input)
  #
  # # Randomly select 10 subset columns
  # subset_cols <- sample(colnames(to_test_subset), size = 10)
  #
  # # Verify that the subset has NAs before imputation
  # expect_true(anyNA(to_test_subset[, subset_cols]))
  #
  # # Impute only the subset columns using knn_imp without post_imp
  # r1_subset <- knn_imp(
  #   to_test_subset,
  #   k = 10,
  #   method = "euclidean",
  #   post_imp = FALSE,
  #   subset = subset_cols
  # )[, subset_cols]
  #
  # # Verify no NAs remain in the imputed subset
  # expect_true(!anyNA(r1_subset))
  #
  # # Perform full imputation using impute::impute.knn and extract the subset
  # r2_subset <- t(
  #   impute::impute.knn(
  #     t(to_test_subset),
  #     k = 10,
  #     maxp = ncol(to_test_subset)
  #   )$data
  # )[, subset_cols]
  #
  # # Verify no NAs in the extracted subset from full imputation
  # expect_true(!anyNA(r2_subset))
  #
  # # Verify that the subset imputation matches the extracted subset from full imputation
  # expect_equal(r1_subset, r2_subset)
})

test_that("`subset` feature of `knn_imp` works with post_imp = FALSE/TRUE", {
  set.seed(1234)
  to_test <- t(sim_mat(m = 20, n = 50, perc_NA = 0.2, perc_col_NA = 1)$input)
  # Impute just 3 columns
  ## Check subset using numeric index
  r1 <- knn_imp(to_test, k = 3, post_imp = FALSE, subset = c(1, 3, 5))
  expect_true(!anyNA(r1[, c(1, 3, 5)]))
  expect_equal(is.na(r1[, -c(1, 3, 5)]), is.na(to_test[, -c(1, 3, 5)]))
  ## Check subset using character vector
  r2 <- knn_imp(
    to_test,
    k = 3,
    post_imp = FALSE,
    subset = paste0("feat", c(1, 3, 5))
  )
  expect_equal(r1, r2)

  # Test with post_imp = TRUE and a column requiring post imputation
  to_test_post <- to_test
  # Column 5 will be colMeans if post_imp is TRUE
  to_test_post[2:nrow(to_test_post), 5] <- NA
  r3 <- knn_imp(to_test_post, k = 3, post_imp = TRUE, subset = c(1, 3, 5))
  expect_true(!anyNA(r3[, c(1, 3, 5)]))
  # Expect that only the subset columns are imputed. The rests are untouched
  expect_equal(is.na(r3[, -c(1, 3, 5)]), is.na(to_test_post[, -c(1, 3, 5)]))
  # Verify post_imp on column 5
  col5_mean <- mean(to_test_post[, 5], na.rm = TRUE)
  expect_equal(unname(r3[, 5]), rep(col5_mean, nrow(to_test_post)))
  r4 <- knn_imp(
    to_test_post,
    k = 3,
    post_imp = TRUE,
    subset = paste0("feat", c(1, 3, 5))
  )
  expect_equal(r3, r4)
})

test_that("Behavior with extreme missing columns and rows", {
  set.seed(1234)
  to_test <- t(sim_mat(m = 20, n = 20, perc_NA = 0.2, perc_col_NA = 1)$input)
  # row 1 is all NA
  to_test[1, ] <- NA
  expect_no_error(knn_imp(to_test, k = 3, post_imp = FALSE))
  expect_true(!anyNA(knn_imp(to_test, k = 3, post_imp = TRUE)))

  to_test[, 1] <- NA
  expect_error(knn_imp(to_test, k = 3, post_imp = FALSE))
})