test_that("grouped result is correct with aux columns, knn", {
  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)

  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id

  # impute only first 3 values of group 1, the rest are aux. Group 2 do 4 features.
  group_df <- tibble::tibble(
    features = list(group_1[1:3], group_2[1:4]),
    aux = list(group_1, group_2)
  )

  # run grouped imputation
  obj <- t(to_test$input)
  grouped_results <- group_imp(obj, group = group_df, k = 5)

  # manual imputation for comparison
  sub1_cols <- unique(c(group_df$features[[1]], group_df$aux[[1]]))
  sub1 <- knn_imp(obj[, sub1_cols], k = 5, subset = group_df$features[[1]])
  sub2_cols <- unique(c(group_df$features[[2]], group_df$aux[[2]]))
  sub2 <- knn_imp(obj[, sub2_cols], k = 5, subset = group_df$features[[2]])

  expected_results <- cbind(sub1, sub2)[, colnames(obj)]
  # Compare results
  expect_identical(grouped_results[, ], expected_results)
})

test_that("grouped result is correct with aux columns, pca", {
  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)

  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id

  # impute only first 3 values of group 1, the rest are aux. Group 2 do 4 features.
  group_df <- tibble::tibble(
    features = list(group_1[1:3], group_2[1:4]),
    aux = list(group_1, group_2)
  )

  # run grouped imputation
  obj <- t(to_test$input)
  grouped_results <- group_imp(obj, group = group_df, ncp = 2, seed = 1234)

  # manual imputation for comparison
  sub1_cols <- unique(c(group_df$features[[1]], group_df$aux[[1]]))
  sub1 <- pca_imp(obj[, sub1_cols], ncp = 2, seed = 1234)[, group_df$features[[1]]]
  sub2_cols <- unique(c(group_df$features[[2]], group_df$aux[[2]]))
  sub2 <- pca_imp(obj[, sub2_cols], ncp = 2, seed = 1234)[, group_df$features[[2]]]

  expected_results <- cbind(sub1, sub2)
  expect_equal(grouped_results[, colnames(expected_results)], expected_results)
})

test_that("group-specific parameters work correctly", {
  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)
  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id

  # Different k values for each group
  group_df <- tibble::tibble(
    features = list(group_1[1:3], group_2[1:4]),
    aux = list(group_1, group_2),
    parameters = list(
      list(k = 3, dist_pow = 0),
      list(k = 7, dist_pow = 1)
    )
  )

  obj <- t(to_test$input)
  grouped_results <- group_imp(obj, group = group_df)

  # Manual verification with different parameters
  sub1 <- knn_imp(obj[, group_1], k = 3, subset = group_1[1:3], dist_pow = 0)
  sub2 <- knn_imp(obj[, group_2], k = 7, subset = group_2[1:4], dist_pow = 1)
  expected_results <- cbind(sub1, sub2)[, colnames(obj)]

  expect_identical(grouped_results[, ], expected_results)
})

test_that("duplicate features across groups throws error", {
  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)
  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id

  group_df <- tibble::tibble(
    features = list(group_1[1:5], c(group_1[5], group_2[1:3])), # group_1[5] in both
    aux = list(group_1, group_2)
  )

  obj <- t(to_test$input)
  expect_error(
    group_imp(obj, group = group_df, k = 5),
    "Same features can't be in more than 1 groups"
  )
})

test_that("grouped imputation works without aux columns, knn", {
  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)
  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id

  # no aux columns, only features
  group_df <- tibble::tibble(
    features = list(group_1[1:20], group_2[1:10])
  )

  obj <- t(to_test$input)
  expect_no_error(grouped_results <- group_imp(obj, group = group_df, k = 5))

  # Build expected results: start with original and update only imputed columns
  sub1 <- knn_imp(obj[, group_1[1:20]], k = 5)
  sub2 <- knn_imp(obj[, group_2[1:10]], k = 5)

  expected_results <- obj
  expected_results[, group_1[1:20]] <- sub1
  expected_results[, group_2[1:10]] <- sub2

  expect_identical(grouped_results[, ], expected_results)
})

test_that("group-specific parameters work correctly, pca", {
  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)
  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id
  # Different ncp and coeff.ridge values for each group
  group_df <- tibble::tibble(
    features = list(group_1[1:3], group_2[1:4]),
    aux = list(group_1, group_2),
    parameters = list(
      list(ncp = 2, coeff.ridge = 1, seed = 1234),
      list(ncp = 3, coeff.ridge = 2, seed = 1234)
    )
  )
  obj <- t(to_test$input)
  grouped_results <- group_imp(obj, group = group_df)

  # Manual verification with different parameters
  sub1_full <- pca_imp(obj[, group_1], ncp = 2, coeff.ridge = 1, seed = 1234)
  sub1 <- obj[, group_1]
  sub1[, group_1[1:3]] <- sub1_full[, group_1[1:3]]
  sub2_full <- pca_imp(obj[, group_2], ncp = 3, coeff.ridge = 2, seed = 1234)
  sub2 <- obj[, group_2]
  sub2[, group_2[1:4]] <- sub2_full[, group_2[1:4]]
  expected_results <- cbind(sub1, sub2)[, colnames(obj)]
  expect_equal(grouped_results[, ], expected_results)
})

test_that("grouped imputation works without aux columns, pca", {
  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)
  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id

  # no aux columns, only features
  group_df <- tibble::tibble(
    features = list(group_1[1:20], group_2[1:10])
  )

  obj <- t(to_test$input)
  expect_no_error(grouped_results <- group_imp(obj, group = group_df, ncp = 2, seed = 1234))

  # Build expected results: start with original and update only imputed columns
  sub1 <- pca_imp(obj[, group_1[1:20]], ncp = 2, seed = 1234)
  sub2 <- pca_imp(obj[, group_2[1:10]], ncp = 2, seed = 1234)

  expected_results <- obj
  expected_results[, group_1[1:20]] <- sub1
  expected_results[, group_2[1:10]] <- sub2

  expect_identical(grouped_results[, ], expected_results)
})

test_that("group-specific parameters work correctly in parallel, pca", {
  skip_on_cran()
  skip_on_ci()

  set.seed(1234)
  to_test <- sim_mat(m = 20, n = 50, perc_NA = 0.3, perc_col_NA = 1)
  group_1 <- subset(to_test$group_feature, group == "chr1")$feature_id
  group_2 <- subset(to_test$group_feature, group == "chr2")$feature_id
  # Different ncp and coeff.ridge values for each group
  group_df <- tibble::tibble(
    features = list(group_1[1:3], group_2[1:4]),
    aux = list(group_1, group_2),
    parameters = list(
      list(ncp = 2, coeff.ridge = 1),
      list(ncp = 3, coeff.ridge = 2)
    )
  )
  obj <- t(to_test$input)
  mirai::daemons(2, seed = 1234)
  grouped_results <- group_imp(obj, group = group_df, cores = 2, seed = 1234, nb.init = 10)
  mirai::daemons(0)
  # Manual verification with different parameters
  sub1_full <- pca_imp(obj[, group_1], ncp = 2, coeff.ridge = 1, seed = 1234, nb.init = 10)
  sub1 <- obj[, group_1]
  sub1[, group_1[1:3]] <- sub1_full[, group_1[1:3]]
  sub2_full <- pca_imp(obj[, group_2], ncp = 3, coeff.ridge = 2, seed = 1234, nb.init = 10)
  sub2 <- obj[, group_2]
  sub2[, group_2[1:4]] <- sub2_full[, group_2[1:4]]
  expected_results <- cbind(sub1, sub2)[, colnames(obj)]

  imputed_cols <- c(group_1[1:3], group_2[1:4])
  obj_orig <- obj[, imputed_cols]
  grouped_values <- grouped_results[, imputed_cols][is.na(obj_orig)]
  expected_values <- expected_results[, imputed_cols][is.na(obj_orig)]
  # seeding in parallel is hard to reproduce correctly
  expect_true(
    cor(grouped_values, expected_values) > 0.999
  )
})