test_that("Nystrom method approximates the true kernel on a dataframe.", {
  check_nystrom_approximation <- function(fit, df, max_thresh, mean_thresh) {
    fm <- build_fm(fit, df)
    true_kernel <- rbf_kernel_matrix(fit$kernel_params$sigma, as.matrix(df), as.matrix(df))
    approximate_kernel <- fm %*% t(fm)

    expect_lt(max(abs(true_kernel - approximate_kernel)), max_thresh)
    expect_lt(mean(abs(true_kernel - approximate_kernel)), mean_thresh)
  }

  set.seed(8)
  df <- data.frame(
    X1 = c(2,   3,   4,   5,   6, 7, 8),
    X2 = c(1, 1.2, 1.3, 1.4, 1.1, 7, 1),
    X3 = rnorm(7)
  )

  set.seed(8)
  ## RBF kernel, full feature map
  fit <- kfm_nystrom(df, m = 7, r = 7, kernel = "radial", sigma = 0.05)
  check_nystrom_approximation(fit, df, 1e-14, 1e-15)
  fit <- kfm_nystrom(df, m = 7, r = 7, kernel = "radial", sigma = 0.5)
  check_nystrom_approximation(fit, df, 1e-14, 1e-15)

  ## RBF kernel, smaller feature map
  fit <- kfm_nystrom(df, m = 7, r = 6, kernel = "radial", sigma = 0.05)
  check_nystrom_approximation(fit, df, 1, 1e-3)
  fit <- kfm_nystrom(df, m = 7, r = 6, kernel = "radial", sigma = 0.5)
  check_nystrom_approximation(fit, df, 1, 0.05)

})

test_that("Nystrom method approximates the true kernel on a `mild_df` object", {
  check_nystrom_approximation <- function(fit, df, max_thresh, mean_thresh) {
    X <- subset(df, select = -c(bag_label, bag_name, instance_name)) %>%
      suppressWarnings()
    fm <- build_fm(fit, df)
    fm <- as.matrix(subset(fm, select = -c(bag_label, bag_name, instance_name)))

    true_kernel <- rbf_kernel_matrix(fit$kernel_params$sigma, as.matrix(X), as.matrix(X))
    approximate_kernel <- fm %*% t(fm)

    expect_lt(max(abs(true_kernel - approximate_kernel)), max_thresh)
    expect_lt(mean(abs(true_kernel - approximate_kernel)), mean_thresh)
  }

  set.seed(8)
  df <- mildsvm::generate_mild_df(ncov = 5,
                                  nbag = 7,
                                  nsample = 7,
                                  positive_prob = 0.15)

  set.seed(8)
  ## RBF kernel, full feature map
  fit <- kfm_nystrom(df, m = 196, r = 196, kernel = "radial", sigma = 0.05)
  check_nystrom_approximation(fit, df, 1e-13, 1e-14)
  fit <- kfm_nystrom(df, m = 196, r = 196, kernel = "radial", sigma = 0.5)
  check_nystrom_approximation(fit, df, 1e-13, 1e-14)

  ## RBF kernel, smaller feature map
  fit <- kfm_nystrom(df, m = 7, r = 6, kernel = "radial", sigma = 0.05)
  check_nystrom_approximation(fit, df, 1, 0.05)
  fit <- kfm_nystrom(df, m = 7, r = 6, kernel = "radial", sigma = 0.5)
  check_nystrom_approximation(fit, df, 1, 0.06)

})

test_that("Nystrom methods have correct output dimensions", {

  ## test Nystrom on data frame
  set.seed(8)
  df <- data.frame(
    X1 = c(2,   3,   4,   5,   6, 7, 8),
    X2 = c(1, 1.2, 1.3, 1.4, 1.1, 7, 1),
    X3 = rnorm(7)
  )

  fit <- kfm_nystrom(df, m = 7, r = 7, kernel = "radial", sigma = 0.05)
  fm <- build_fm(fit, df)
  expect_equal(dim(fm), c(7,7))

  fit <- kfm_nystrom(df, m = 7, r = 6, kernel = "radial", sigma = 0.05)
  fm <- build_fm(fit, df)
  expect_equal(dim(fm), c(7,6))
  fm <- build_fm(fit, df[1:3, ])
  expect_equal(dim(fm), c(3,6))

  fit <- kfm_nystrom(df, m = 5, r = 3, kernel = "radial", sigma = 0.05)
  fm <- build_fm(fit, df)
  expect_equal(dim(fm), c(7,3))

  ## test Nystrom on MilData
  mil_data <- mildsvm::generate_mild_df(ncov = 5,
                                        nbag = 7,
                                        nsample = 7,
                                        positive_prob = 0.15)

  fit <- kfm_nystrom(mil_data, m = nrow(mil_data), r = nrow(mil_data), kernel = "radial", sigma = 0.05)
  fm <- build_fm(fit, mil_data)
  expect_equal(dim(fm), c(nrow(mil_data), nrow(mil_data)+3))
  fm <- build_fm(fit, mil_data[1:13, ])
  expect_equal(dim(fm), c(13, nrow(mil_data)+3))

  fit <- kfm_nystrom(mil_data, m = 7, r = 7, kernel = "radial", sigma = 0.05)
  fm <- build_fm(fit, mil_data)
  expect_equal(dim(fm), c(nrow(mil_data), 7+3))

  fit <- kfm_nystrom(mil_data, m = 7, r = 3, kernel = "radial", sigma = 0.05)
  fm <- build_fm(fit, mil_data)
  expect_equal(dim(fm), c(nrow(mil_data), 3+3))

})

test_that("Nystrom method works with various sampling parameters", {

  ## test Nystrom on data frame
  set.seed(8)
  df <- data.frame(
    X1 = c(2,   3,   4,   5,   6, 7, 8),
    X2 = c(1, 1.2, 1.3, 1.4, 1.1, 7, 1),
    X3 = rnorm(7)
  )

  fit <- kfm_nystrom(df, m = 7, r = 7, kernel = "radial", sampling = 1:7, sigma = 0.05)
  expect_equal(fit$df_sub, as.matrix(df[1:7, ]), ignore_attr = TRUE)

  expect_warning({
    fit <- kfm_nystrom(df, m = 7, r = 7, kernel = "radial", sampling = 1:6, sigma = 0.05)
  })

  ## test Nystrom on MilData
  mil_data <- mildsvm::generate_mild_df(ncov = 5,
                                        nbag = 7,
                                        nsample = 7,
                                        positive_prob = 0.15)


  fit <- kfm_nystrom(mil_data, m = 50, r = 50,
                     kernel = "radial", sampling = 'stratified', sigma = 0.05)

  rows <- as.numeric(rownames(fit$df_sub))
  expect(length(unique(table(mil_data$bag_name[rows]))) %in% c(1,2),
         "Expect counts for each bag to be the same (+/- 1)")
  expect(length(unique(table(mil_data$instance_name[rows]))) %in% c(1,2),
         "Expect counts for each instance to be the same (+/- 1)")
  expect(all(unique(rows) == rows), "Rows are sampled at most once")

  expect_warning({
    fit <- kfm_nystrom(mil_data, m = 50, r = 50, kernel = "radial", sampling = 1:10, sigma = 0.05)
  })

  fit <- kfm_nystrom(mil_data, m = 10, r = 10, kernel = "radial", sampling = 1:10, sigma = 0.05)
  expect_equal(fit$df_sub, as.matrix(mil_data[1:10, -c(1:3)]), ignore_attr = TRUE) %>%
    expect_warning()
  fit <- kfm_nystrom(mil_data, m = 10, r = 10, kernel = "radial", sampling = "random", sigma = 0.05)

})

test_that("Stratified sampling works with bag structure", {
  set.seed(8)
  df <- mildsvm::generate_mild_df(ncov = 5,
                                  nbag = 7,
                                  nsample = 7,
                                  positive_prob = 0.15)


  rows <- bag_instance_sampling(df, size = 10)
  expect(length(unique(table(df$bag_name[rows]))) %in% c(1,2),
         "Expect counts for each bag to be the same (+/- 1)")
  expect(length(unique(table(df$instance_name[rows]))) %in% c(1,2),
         "Expect counts for each instance to be the same (+/- 1)")
  expect(all(unique(rows) == rows), "Rows are sampled at most once")

  rows <- bag_instance_sampling(df, size = 50)
  expect(length(unique(table(df$bag_name[rows]))) %in% c(1,2),
         "Expect counts for each bag to be the same (+/- 1)")
  expect(length(unique(table(df$instance_name[rows]))) %in% c(1,2),
         "Expect counts for each instance to be the same (+/- 1)")
  expect(all(unique(rows) == rows), "Rows are sampled at most once")


  rows <- bag_instance_sampling(df, size = 196)
  expect(length(unique(table(df$bag_name[rows]))) %in% c(1,2),
         "Expect counts for each bag to be the same (+/- 1)")
  expect(length(unique(table(df$instance_name[rows]))) %in% c(1,2),
         "Expect counts for each instance to be the same (+/- 1)")
  expect(all(unique(rows) == rows), "Rows are sampled at most once")


  df <- mildsvm::generate_mild_df(positive_dist = "mvnormal",
                                  negative_dist = "mvnormal",
                                  remainder_dist = "mvnormal",
                                  nbag = 2,
                                  ninst = 2,
                                  nsample = 4)
  df <- df[-c(1:3), ]

  rows <- bag_instance_sampling(df, size = 13)
  expect(length(unique(table(df$bag_name[rows]))) %in% c(1,2),
         "Expect counts for each bag to be the same (+/- 1)")
  expect(length(unique(table(df$instance_name[rows]))) %in% c(1,2),
         "Expect counts for each instance to be the same (+/- 1)")
  # in this case, not all rows will be unique, as row 1 gets sampled multiple times


  # how to do stratified sampling

  # how to incorporate into kfm_nystrom function
  # - add parameter 'random_rows' to override variable in kfm_nystrom.default
  # - add parameter 'sampling' which indicates which sampling method to use
  #   - sampling = 'random' or sampling = 'stratified'

})

test_that("Nystrom sampling works with duplicated data", {
  set.seed(8)
  df <- data.frame(
    X1 = c(2,   3,   4,   5,   6, 7, 8),
    X2 = c(1, 1.2, 1.3, 1.4, 1.1, 7, 1),
    X3 = rnorm(7)
  )
  df <- rbind(df, df[1, ])

  expect_warning({
    fit <- kfm_nystrom(df, m = 8, r = 8, kernel = "radial", sampling = 1:8, sigma = 0.05)
  })
  fm <- build_fm(fit, df)
  expect_equal(dim(fm), c(8,7))

})

test_that("Nystrom feature map prints correctly", {
  df <- data.frame(
    X1 = c(2,   3,   4,   5,   6, 7, 8),
    X2 = c(1, 1.2, 1.3, 1.4, 1.1, 7, 1),
    X3 = rnorm(7)
  )
  set.seed(8)
  df2 <- mildsvm::generate_mild_df(ncov = 5,
                                  nbag = 7,
                                  nsample = 7,
                                  positive_prob = 0.15)

  expect_snapshot({
    kfms <- list(
      "default" = kfm_nystrom(df),
      "supplied_sample" = kfm_nystrom(df, sampling = 1:7),
      "stratified_sample" = kfm_nystrom(df2, sampling = "stratified"),
      "low m" = kfm_nystrom(df, m = 5),
      "low r" = kfm_nystrom(df, r = 5),
      "sigma" = kfm_nystrom(df, sigma = 0.05)
    ) %>%
      suppressWarnings() %>%
      suppressMessages()

    print(kfms)
  })
  expect_true(TRUE)
})