# Skip on CRAN as this test takes about a minute.
testthat::skip_on_cran()

# Create random number generator stream for reproducibility.
r <- familiar:::.start_random_number_stream(seed=1863)

##### Subsampling ##############################################################
for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       rare_outcome=TRUE,
                                                       rstream_object=r)
  n_rep <- 3L
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Subsampling ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " functions correctly."), {
                                 
                                 # Create subsample.
                                 subsample_data <- familiar:::.create_subsample(data=data@data,
                                                                                n_iter=20,
                                                                                size=20,
                                                                                stratify=stratify,
                                                                                outcome_type=outcome_type,
                                                                                rstream_object=r)
                                 
                                 # Check that none of in-bag datasets is the same.
                                 for(ii in 1:(length(subsample_data$train_list) - 1)){
                                   for(jj in (ii+1):length(subsample_data$train_list)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data$train_list[[ii]],
                                                                                  subsample_data$train_list[[jj]]),
                                                            FALSE)
                                   }
                                 }
                                 
                                 
                                 for(ii in seq_along(subsample_data$train_list)){
                                   # Check that there is no overlap between
                                   # in-bag and out-of-bag data.
                                   testthat::expect_equal(nrow(data.table::fintersect(unique(subsample_data$train_list[[ii]]),
                                                                                      unique(subsample_data$valid_list[[ii]]))),
                                                          0L)
                                   
                                   # Check that the combination of in-bag and
                                   # out-of-bag data is the same as the input
                                   # dataset.
                                   testthat::expect_equal(data.table::fsetequal(unique(rbind(subsample_data$train_list[[ii]], subsample_data$valid_list[[ii]])),
                                                                                unique(data@data[, mget(familiar:::get_id_columns(id_depth="series"))])),
                                                          TRUE)
                                   
                                   # Check that sampling creates a dataset
                                   # identical to the development dataset.
                                   train_data <- familiar:::select_data_from_samples(data=data,
                                                                                     samples=subsample_data$train_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$train_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(train_data@data),
                                                          n_rep * nrow(subsample_data$train_list[[ii]]))
                                   
                                   # Test that the subsample has the required
                                   # size.
                                   testthat::expect_equal(data.table::uniqueN(train_data@data, by=c("batch_id", "sample_id")), 20)

                                   # Check that sampling creates a dataset
                                   # identical to the validation dataset.
                                   validation_data <- familiar:::select_data_from_samples(data=data,
                                                                                          samples=subsample_data$valid_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(validation_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$valid_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(validation_data@data),
                                                          n_rep * nrow(subsample_data$valid_list[[ii]]))
                                   
                                   
                                   # If stratified, check that occurrence of event or categories is similar between
                                   # discovery and the entire dataset.
                                   if(stratify & outcome_type %in% c("binomial", "multinomial")){
                                     # Determine the frequency of outcome classes in the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                     
                                   } else if(stratify & outcome_type %in% c("survival")){
                                     # Determine the frequency of censored data points and events in classes in
                                     # the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome_event"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome_event"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome_event")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                   }
                                   
                                   # Check that the rare outcome is found in the training data. This prevent
                                   # issues with training data.
                                   if(outcome_type == "multinomial"){
                                     testthat::expect_equal(nrow(train_data@data[outcome == "3"]) > 0, TRUE)
                                   }
                                   
                                   # Assert that all outcome levels in the
                                   # validation folds also appear in the
                                   # training folds.
                                   if(outcome_type %in% c("binomial", "multinomial", "survival")){
                                     testthat::expect_equal(length(setdiff(unique(validation_data@data$outcome),
                                                                           unique(train_data@data$outcome))),
                                                            0)
                                   }
                                 }
                               })
  }
}


for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  # Create synthetic dataset with one outcome.
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Bootstrap ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " with odd data functions correctly."), {
                                 
                                 # One outcome-data
                                 data <- familiar:::test_create_synthetic_series_one_outcome(outcome_type=outcome_type,
                                                                                             rstream_object=r)
                                 
                                 # Create subsample.
                                 subsample_data <- familiar:::.create_subsample(data=data@data,
                                                                                n_iter=20,
                                                                                size=20,
                                                                                stratify=stratify,
                                                                                outcome_type=outcome_type,
                                                                                rstream_object=r)
                                 
                                 # Expect a list. This is sort of a placeholder
                                 # because subsampling should work even
                                 # when the outcome value is singular.
                                 testthat::expect_type(subsample_data, "list")
                                 
                                 # One sample data.
                                 data <- familiar:::test_create_synthetic_series_one_sample_data(outcome_type=outcome_type,
                                                                                                 rstream_object=r)
                                 
                                 # Create subsample. We don't expect an error because
                                 # you can't do cross-validation with a single
                                 # sample.
                                 subsample_data <- familiar:::.create_subsample(data=data@data,
                                                                                n_iter=20,
                                                                                size=20,
                                                                                stratify=stratify,
                                                                                outcome_type=outcome_type,
                                                                                rstream_object=r)
                                 # Expect 
                                 testthat::expect_type(subsample_data, "list")
                               })
  }
}


##### Full undersampling #######################################################
for(outcome_type in c("binomial", "multinomial")){
  # Create synthetic dataset.
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       rare_outcome=FALSE,
                                                       rstream_object=r)
  n_rep <- 3L
  
  testthat::test_that(paste0("Full undersampling for correcting outcome imbalances for ", 
                             outcome_type, " functions correctly."), {

                               # Create subsample.
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_method="full_undersampling",
                                                                                                         rstream_object=r))
                               
                               # Check that none of the training folds are the same.
                               if(length(subsample_data) > 1){
                                 for(ii in 1:(length(subsample_data) - 1)){
                                   for(jj in (ii+1):length(subsample_data)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data[[ii]],
                                                                                  subsample_data[[jj]]),
                                                            FALSE)
                                   }
                                 }
                               }
                               
                               # Determine the minority class.
                               
                               
                               for(ii in seq_along(subsample_data)){
                                 # Assert that all samples in the subsample are unique (not duplicated).
                                 testthat::expect_equal(anyDuplicated(subsample_data[[ii]]), 0)
                                 
                                 # Check that sampling creates a dataset identical to the development subsample.
                                 train_data <- familiar:::select_data_from_samples(data=data,
                                                                                   samples=subsample_data[[ii]])
                                 
                                 # Test that the samples and series are selected.
                                 testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                              subsample_data[[ii]]),
                                                        TRUE)
                                 
                                 # Test that repetitions are likewise selected.
                                 testthat::expect_equal(nrow(train_data@data),
                                                        n_rep * nrow(subsample_data[[ii]]))
                                 
                                 # Assert that outcomes the minority class is
                                 # now selected as least as often as other
                                 # classes.
                                 original_table <- unique(data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("n"=.N), by="outcome"][order(n)]
                                 minority_class_n <- min(original_table$n)
                                 minority_class <- original_table[n == minority_class_n]$outcome[1]
                                 
                                 frequency_table <- unique(train_data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("partition_occurrence"=.N), by="outcome"]
                                 
                                 # Assert that all instances of the minority
                                 # class are selected.
                                 testthat::expect_equal(frequency_table[outcome == minority_class]$partition_occurrence, minority_class_n)
                                 
                                 # Assert that all instances similar to the
                                 # minority class are selected.
                                 testthat::expect_equal(all(frequency_table$partition_occurrence <= minority_class_n), TRUE)
                               }
                             })
}


for(outcome_type in c("binomial", "multinomial")){
  # Create synthetic dataset.
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       n_series=1L,
                                                       n_samples=30,
                                                       rare_outcome=FALSE,
                                                       rstream_object=r)
  n_rep <- 3L
  
  testthat::test_that(paste0("Full undersampling for correcting outcome imbalances for ", 
                             outcome_type, " without multiple series functions correctly."), {
                               
                               # Create subsample.
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_method="full_undersampling",
                                                                                                         rstream_object=r))
                               
                               # Check that none of the training folds are the same.
                               if(length(subsample_data) > 1){
                                 for(ii in 1:(length(subsample_data) - 1)){
                                   for(jj in (ii+1):length(subsample_data)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data[[ii]],
                                                                                  subsample_data[[jj]]),
                                                            FALSE)
                                   }
                                 }
                               }
                               
                               # The union of the datasets is the original
                               # dataset.
                               testthat::expect_equal(data.table::fsetequal(unique(data.table::rbindlist(subsample_data)),
                                                                            unique(data@data[, mget(familiar:::get_id_columns(id_depth="series"))])),
                                                      TRUE)
                               
                               
                               for(ii in seq_along(subsample_data)){
                                 # Assert that all samples in the subsample are unique (not duplicated).
                                 testthat::expect_equal(anyDuplicated(subsample_data[[ii]]), 0)
                                 
                                 # Check that sampling creates a dataset identical to the development subsample.
                                 train_data <- familiar:::select_data_from_samples(data=data,
                                                                                   samples=subsample_data[[ii]])
                                 
                                 # Test that the samples and series are selected.
                                 testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                              subsample_data[[ii]]),
                                                        TRUE)
                                 
                                 # Test that repetitions are likewise selected.
                                 testthat::expect_equal(nrow(train_data@data),
                                                        n_rep * nrow(subsample_data[[ii]]))
                                 
                                 # Assert that outcomes the minority class is
                                 # now selected as least as often as other
                                 # classes.
                                 original_table <- unique(data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("n"=.N), by="outcome"][order(n)]
                                 minority_class_n <- min(original_table$n)
                                 minority_class <- original_table[n == minority_class_n]$outcome[1]
                                 
                                 frequency_table <- unique(train_data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("partition_occurrence"=.N), by="outcome"]
                                 
                                 # Assert that all instances of the minority
                                 # class are selected.
                                 testthat::expect_equal(frequency_table[outcome == minority_class]$partition_occurrence, minority_class_n)
                                 
                                 # Assert that all instances similar to the
                                 # minority class are selected.
                                 testthat::expect_equal(all(frequency_table$partition_occurrence == minority_class_n), TRUE)
                               }
                             })
}


for(outcome_type in c("binomial", "multinomial")){
  # Create synthetic dataset with one outcome.
  
  testthat::test_that(paste0("Full undersampling for correcting outcome imbalances for ", 
                             outcome_type, " with odd data functions correctly."), {
                               
                               # One outcome-data
                               data <- familiar:::test_create_synthetic_series_one_outcome(outcome_type=outcome_type,
                                                                                           rstream_object=r)
                               
                               # Create subsample.
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_method="full_undersampling",
                                                                                                         rstream_object=r))
                               # Expect a list. This is sort of a placeholder
                               # because the partitioning should work.
                               testthat::expect_type(subsample_data, "list")
                               
                               # One sample data.
                               data <- familiar:::test_create_synthetic_series_one_sample_data(outcome_type=outcome_type,
                                                                                               rstream_object=r)
                               
                               # Create subsample
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_method="full_undersampling",
                                                                                                         rstream_object=r))
                               
                               # Expect a list. This is sort of a placeholder
                               # because the partitioning should work.
                               testthat::expect_type(subsample_data, "list")
                             })
}


##### Random undersampling #######################################################
for(outcome_type in c("binomial", "multinomial")){
  # Create synthetic dataset.
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       rare_outcome=FALSE,
                                                       rstream_object=r)
  n_rep <- 3L
  
  testthat::test_that(paste0("Random undersampling for correcting outcome imbalances for ", 
                             outcome_type, " functions correctly."), {
                               
                               # Create subsample.
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_n_partitions=3L,
                                                                                                         imbalance_method="random_undersampling",
                                                                                                         rstream_object=r))
                               
                               # Check that none of the training folds are the same.
                               if(length(subsample_data) > 1){
                                 for(ii in 1:(length(subsample_data) - 1)){
                                   for(jj in (ii+1):length(subsample_data)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data[[ii]],
                                                                                  subsample_data[[jj]]),
                                                            FALSE)
                                   }
                                 }
                               }
                               
                               # Check that at most 3 (the number specified) partitions are created.
                               testthat::expect_lte(length(subsample_data), 3L)
                               
                               for(ii in seq_along(subsample_data)){
                                 # Assert that all samples in the subsample are unique (not duplicated).
                                 testthat::expect_equal(anyDuplicated(subsample_data[[ii]]), 0)
                                 
                                 # Check that sampling creates a dataset identical to the development subsample.
                                 train_data <- familiar:::select_data_from_samples(data=data,
                                                                                   samples=subsample_data[[ii]])
                                 
                                 # Test that the samples and series are selected.
                                 testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                              subsample_data[[ii]]),
                                                        TRUE)
                                 
                                 # Test that repetitions are likewise selected.
                                 testthat::expect_equal(nrow(train_data@data),
                                                        n_rep * nrow(subsample_data[[ii]]))
                                 
                                 # Assert that outcomes the minority class is
                                 # now selected as least as often as other
                                 # classes.
                                 original_table <- unique(data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("n"=.N), by="outcome"][order(n)]
                                 minority_class_n <- min(original_table$n)
                                 minority_class <- original_table[n == minority_class_n]$outcome[1]
                                 
                                 frequency_table <- unique(train_data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("partition_occurrence"=.N), by="outcome"]
                                 
                                 # Assert that all instances of the minority
                                 # class are selected.
                                 testthat::expect_equal(frequency_table[outcome == minority_class]$partition_occurrence, minority_class_n)
                                 
                                 # Assert that all instances similar to the
                                 # minority class are selected.
                                 testthat::expect_equal(all(frequency_table$partition_occurrence <= minority_class_n), TRUE)
                               }
                             })
}



for(outcome_type in c("binomial", "multinomial")){
  # Create synthetic dataset.
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       n_series=1L,
                                                       n_samples=30,
                                                       rare_outcome=FALSE,
                                                       rstream_object=r)
  n_rep <- 3L
  
  testthat::test_that(paste0("Random undersampling for correcting outcome imbalances for ", 
                             outcome_type, " without multiple series functions correctly."), {
                               
                               # Create subsample.
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_n_partitions=3L,
                                                                                                         imbalance_method="random_undersampling",
                                                                                                         rstream_object=r))
                               
                               # Check that none of the training folds are the same.
                               if(length(subsample_data) > 1){
                                 for(ii in 1:(length(subsample_data) - 1)){
                                   for(jj in (ii+1):length(subsample_data)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data[[ii]],
                                                                                  subsample_data[[jj]]),
                                                            FALSE)
                                   }
                                 }
                               }

                               for(ii in seq_along(subsample_data)){
                                 # Assert that all samples in the subsample are unique (not duplicated).
                                 testthat::expect_equal(anyDuplicated(subsample_data[[ii]]), 0)
                                 
                                 # Check that sampling creates a dataset identical to the development subsample.
                                 train_data <- familiar:::select_data_from_samples(data=data,
                                                                                   samples=subsample_data[[ii]])
                                 
                                 # Test that the samples and series are selected.
                                 testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                              subsample_data[[ii]]),
                                                        TRUE)
                                 
                                 # Test that repetitions are likewise selected.
                                 testthat::expect_equal(nrow(train_data@data),
                                                        n_rep * nrow(subsample_data[[ii]]))
                                 
                                 # Assert that outcomes the minority class is
                                 # now selected as least as often as other
                                 # classes.
                                 original_table <- unique(data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("n"=.N), by="outcome"][order(n)]
                                 minority_class_n <- min(original_table$n)
                                 minority_class <- original_table[n == minority_class_n]$outcome[1]
                                 
                                 frequency_table <- unique(train_data@data, by=familiar:::get_id_columns(id_depth="series"))[, list("partition_occurrence"=.N), by="outcome"]
                                 
                                 # Assert that all instances of the minority
                                 # class are selected.
                                 testthat::expect_equal(frequency_table[outcome == minority_class]$partition_occurrence, minority_class_n)
                                 
                                 # Assert that all instances similar to the
                                 # minority class are selected.
                                 testthat::expect_equal(all(frequency_table$partition_occurrence == minority_class_n), TRUE)
                               }
                             })
}


for(outcome_type in c("binomial", "multinomial")){
  # Create synthetic dataset with one outcome.
  
  testthat::test_that(paste0("Random undersampling for correcting outcome imbalances for ", 
                             outcome_type, " with odd data functions correctly."), {
                               
                               # One outcome-data
                               data <- familiar:::test_create_synthetic_series_one_outcome(outcome_type=outcome_type,
                                                                                           rstream_object=r)
                               
                               # Create subsample.
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_n_partitions=3L,
                                                                                                         imbalance_method="random_undersampling",
                                                                                                         rstream_object=r))
                               # Expect a list. This is sort of a placeholder
                               # because the partitioning should work.
                               testthat::expect_type(subsample_data, "list")
                               
                               # One sample data.
                               data <- familiar:::test_create_synthetic_series_one_sample_data(outcome_type=outcome_type,
                                                                                               rstream_object=r)
                               
                               # Create subsample
                               subsample_data <- suppressWarnings(familiar:::.create_balanced_partitions(data=data@data,
                                                                                                         outcome_type=outcome_type,
                                                                                                         imbalance_n_partitions=3L,
                                                                                                         imbalance_method="random_undersampling",
                                                                                                         rstream_object=r))
                               
                               # Expect a list. This is sort of a placeholder
                               # because the partitioning should work.
                               testthat::expect_type(subsample_data, "list")
                             })
}



##### Cross-validation #########################################################
for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       rare_outcome=TRUE,
                                                       rstream_object=r)
  n_rep <- 3L
  n_folds <- 3L
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Cross-validation ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " functions correctly."), {
                                 
                                 # Create subsample.
                                 subsample_data <- familiar:::.create_cv(data=data@data,
                                                                         n_folds=n_folds,
                                                                         stratify=stratify,
                                                                         outcome_type=outcome_type,
                                                                         rstream_object=r)
                                 
                                 # Check that none of the training folds are the same.
                                 for(ii in 1:(length(subsample_data$train_list) - 1)){
                                   for(jj in (ii+1):length(subsample_data$train_list)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data$train_list[[ii]],
                                                                                  subsample_data$train_list[[jj]]),
                                                            FALSE)
                                   }
                                 }
                                 
                                 for(ii in seq_along(subsample_data$train_list)){
                                   # Check that there is no overlap between training folds and the validation fold.
                                   testthat::expect_equal(nrow(data.table::fintersect(unique(subsample_data$train_list[[ii]]),
                                                                                      unique(subsample_data$valid_list[[ii]]))),
                                                          0L)
                                   
                                   # Check that the union of training and validation folds is the input dataset.
                                   testthat::expect_equal(data.table::fsetequal(unique(rbind(subsample_data$train_list[[ii]], subsample_data$valid_list[[ii]])),
                                                                                unique(data@data[, mget(familiar:::get_id_columns(id_depth="series"))])),
                                                          TRUE)
                                   
                                   # Assert that all samples in the training and validation folds are unique (not
                                   # duplicated).
                                   testthat::expect_equal(anyDuplicated(subsample_data$train_list[[ii]]), 0)
                                   testthat::expect_equal(anyDuplicated(subsample_data$valid_list[[ii]]), 0)
                                   
                                   # Check that sampling creates a dataset identical to the development subsample.
                                   train_data <- familiar:::select_data_from_samples(data=data,
                                                                                     samples=subsample_data$train_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$train_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(train_data@data),
                                                          n_rep * nrow(subsample_data$train_list[[ii]]))
                                   
                                   # Test that the size of the training folds is about (n_folds - 1) / n_folds of
                                   # the complete set.
                                   testthat::expect_gte(nrow(train_data@data), nrow(data@data) * (n_folds - 1) / n_folds - 9)
                                   testthat::expect_lte(nrow(train_data@data), nrow(data@data) * (n_folds - 1) / n_folds + 9)
                                   
                                   # Check that sampling creates a dataset identical to the validation subsample.
                                   validation_data <- familiar:::select_data_from_samples(data=data,
                                                                                          samples=subsample_data$valid_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(validation_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$valid_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(validation_data@data),
                                                          n_rep * nrow(subsample_data$valid_list[[ii]]))
                                   
                                   # Test that the size of the validation fold is about 1 / n_folds of
                                   # the complete set.
                                   testthat::expect_gte(nrow(validation_data@data), nrow(data@data) * 1 / n_folds - 9)
                                   testthat::expect_lte(nrow(validation_data@data), nrow(data@data) * 1 / n_folds + 9)
                                   
                                   # Assert that data are correctly stratified.
                                   if(stratify & outcome_type %in% c("binomial", "multinomial")){
                                     # Determine the frequency of outcome classes in the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                     
                                   } else if(stratify & outcome_type %in% c("survival")){
                                     # Determine the frequency of censored data points and events in classes in
                                     # the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome_event"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome_event"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome_event")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                   }
                                   
                                   # Check that the rare outcome is found in the
                                   # training data. This prevent issues with
                                   # training data.
                                   if(outcome_type == "multinomial"){
                                     testthat::expect_equal(nrow(train_data@data[outcome == "3"]) > 0, TRUE)
                                   }
                                   
                                   # Assert that all outcome levels in the
                                   # validation folds also appear in the
                                   # training folds.
                                   if(outcome_type %in% c("binomial", "multinomial", "survival")){
                                     testthat::expect_equal(length(setdiff(unique(validation_data@data$outcome),
                                                                           unique(train_data@data$outcome))),
                                                            0)
                                   }
                                 }
                               })
  }  
}


for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  # Create synthetic dataset with one outcome.
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  n_folds <- 3L
  
  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Cross-validation ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " with odd data functions correctly."), {
                                 
                                 # One outcome-data
                                 data <- familiar:::test_create_synthetic_series_one_outcome(outcome_type=outcome_type,
                                                                                             rstream_object=r)
                                 
                                 # Create subsample.
                                 subsample_data <- suppressWarnings(familiar:::.create_cv(data=data@data,
                                                                                          n_folds=n_folds,
                                                                                          stratify=stratify,
                                                                                          outcome_type=outcome_type,
                                                                                          rstream_object=r))
                                 # Expect a list. This is sort of a placeholder
                                 # because cross-validation should work even
                                 # when the outcome value is singular.
                                 testthat::expect_type(subsample_data, "list")
                                 
                                 # One sample data.
                                 data <- familiar:::test_create_synthetic_series_one_sample_data(outcome_type=outcome_type,
                                                                                                 rstream_object=r)
                                 
                                 # Create subsample. We expect an error because
                                 # you can't do cross-validation with a single
                                 # sample.
                                 subsample_data <- testthat::expect_error(suppressWarnings(familiar:::.create_cv(data=data@data,
                                                                                                                 n_folds=n_folds,
                                                                                                                 stratify=stratify,
                                                                                                                 outcome_type=outcome_type,
                                                                                                                 rstream_object=r)))
                               })
  }
}
  
  

##### Repeated cross-validation ################################################
for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       rare_outcome=TRUE,
                                                       rstream_object=r)
  n_rep <- 3L
  n_folds <- 3L
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Repeated cross-validation ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " functions correctly."), {
                                 
                                 # Create subsample.
                                 subsample_data <- familiar:::.create_repeated_cv(data=data@data,
                                                                                  n_rep=3L,
                                                                                  n_folds=n_folds,
                                                                                  stratify=stratify,
                                                                                  outcome_type=outcome_type,
                                                                                  rstream_object=r)
                                 
                                 # Check that none of the training folds are the same.
                                 for(ii in 1:(length(subsample_data$train_list) - 1)){
                                   for(jj in (ii+1):length(subsample_data$train_list)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data$train_list[[ii]],
                                                                                  subsample_data$train_list[[jj]]),
                                                            FALSE)
                                   }
                                 }
                                 
                                 for(ii in seq_along(subsample_data$train_list)){
                                   # Check that there is no overlap between training folds and the validation fold.
                                   testthat::expect_equal(nrow(data.table::fintersect(unique(subsample_data$train_list[[ii]]),
                                                                                      unique(subsample_data$valid_list[[ii]]))),
                                                          0L)
                                   
                                   # Check that the union of training and validation folds is the input dataset.
                                   testthat::expect_equal(data.table::fsetequal(unique(rbind(subsample_data$train_list[[ii]], subsample_data$valid_list[[ii]])),
                                                                                unique(data@data[, mget(familiar:::get_id_columns(id_depth="series"))])),
                                                          TRUE)
                                   
                                   # Assert that all samples in the training and validation folds are unique (not
                                   # duplicated).
                                   testthat::expect_equal(anyDuplicated(subsample_data$train_list[[ii]]), 0)
                                   testthat::expect_equal(anyDuplicated(subsample_data$valid_list[[ii]]), 0)
                                   
                                   # Check that sampling creates a dataset identical to the development subsample.
                                   train_data <- familiar:::select_data_from_samples(data=data,
                                                                                     samples=subsample_data$train_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$train_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(train_data@data),
                                                          n_rep * nrow(subsample_data$train_list[[ii]]))
                                   
                                   # Test that the size of the training folds is about (n_folds - 1) / n_folds of
                                   # the complete set.
                                   testthat::expect_gte(nrow(train_data@data), nrow(data@data) * (n_folds - 1) / n_folds - 9)
                                   testthat::expect_lte(nrow(train_data@data), nrow(data@data) * (n_folds - 1) / n_folds + 9)
                                   
                                   # Check that sampling creates a dataset identical to the validation subsample.
                                   validation_data <- familiar:::select_data_from_samples(data=data,
                                                                                          samples=subsample_data$valid_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(validation_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$valid_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(validation_data@data),
                                                          n_rep * nrow(subsample_data$valid_list[[ii]]))
                                   
                                   # Test that the size of the validation fold is about 1 / n_folds of
                                   # the complete set.
                                   testthat::expect_gte(nrow(validation_data@data), nrow(data@data) * 1 / n_folds - 9)
                                   testthat::expect_lte(nrow(validation_data@data), nrow(data@data) * 1 / n_folds + 9)
                                   
                                   # Assert that data are correctly stratified.
                                   if(stratify & outcome_type %in% c("binomial", "multinomial")){
                                     # Determine the frequency of outcome classes in the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                     
                                   } else if(stratify & outcome_type %in% c("survival")){
                                     # Determine the frequency of censored data points and events in classes in
                                     # the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome_event"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome_event"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome_event")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                   }
                                   
                                   # Check that the rare outcome is found in the
                                   # training data. This prevent issues with
                                   # training data.
                                   if(outcome_type == "multinomial"){
                                     testthat::expect_equal(nrow(train_data@data[outcome == "3"]) > 0, TRUE)
                                   }
                                   
                                   # Assert that all outcome levels in the
                                   # validation folds also appear in the
                                   # training folds.
                                   if(outcome_type %in% c("binomial", "multinomial", "survival")){
                                     testthat::expect_equal(length(setdiff(unique(validation_data@data$outcome),
                                                                           unique(train_data@data$outcome))),
                                                            0)
                                   }
                                 }
                               })
  }  
}


for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  # Create synthetic dataset with one outcome.
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  n_folds <- 3L

  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Repeated cross-validation ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " with odd data functions correctly."), {
                                 
                                 # One outcome-data
                                 data <- familiar:::test_create_synthetic_series_one_outcome(outcome_type=outcome_type,
                                                                                             rstream_object=r)
                                 
                                 # Create subsample.
                                 subsample_data <- suppressWarnings(familiar:::.create_repeated_cv(data=data@data,
                                                                                                   n_rep=3L,
                                                                                                   n_folds=n_folds,
                                                                                                   stratify=stratify,
                                                                                                   outcome_type=outcome_type,
                                                                                                   rstream_object=r))
                                 # Expect a list. This is sort of a placeholder
                                 # because cross-validation should work even
                                 # when the outcome value is singular.
                                 testthat::expect_type(subsample_data, "list")
                                 
                                 # One sample data.
                                 data <- familiar:::test_create_synthetic_series_one_sample_data(outcome_type=outcome_type,
                                                                                                 rstream_object=r)
                                 
                                 # Create subsample. We expect an error because
                                 # you can't do cross-validation with a single
                                 # sample.
                                 subsample_data <- testthat::expect_error(suppressWarnings(familiar:::.create_repeated_cv(data=data@data,
                                                                                                                          n_rep=3L,
                                                                                                                          n_folds=n_folds,
                                                                                                                          stratify=stratify,
                                                                                                                          outcome_type=outcome_type,
                                                                                                                          rstream_object=r)))
                               })
  }
}



##### Leave-one-out cross-validation ###########################################
for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  
  
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       rare_outcome=TRUE,
                                                       rstream_object=r)
  n_rep <- 3L
  
  testthat::test_that(paste0("Leave-one-out cross-validation for ",
                             outcome_type, " functions correctly."), {
                               
                               # Create subsample
                               subsample_data <- familiar:::.create_loocv(data=data@data,
                                                                          outcome_type=outcome_type,
                                                                          rstream_object=r)
                               
                               # Check that none of the training folds are the same.
                               for(ii in 1:(length(subsample_data$train_list) - 1)){
                                 for(jj in (ii+1):length(subsample_data$train_list)){
                                   testthat::expect_equal(data.table::fsetequal(subsample_data$train_list[[ii]],
                                                                                subsample_data$train_list[[jj]]),
                                                          FALSE)
                                 }
                               }
                               
                               # Assert (for LOOCV) that the number of run pairs
                               # is the number of samples, or the number of
                               # samples - 1 for multinomial (due to
                               # pre-assignment of a sample with the rare
                               # outcome level).
                               if(outcome_type == "multinomial"){
                                 testthat::expect_equal(length(subsample_data$train_list),
                                                        data.table::uniqueN(data@data, by=familiar:::get_id_columns(id_depth="sample")) - 1L)
                                 
                               } else {
                                 testthat::expect_equal(length(subsample_data$train_list),
                                                        data.table::uniqueN(data@data, by=familiar:::get_id_columns(id_depth="sample")))
                               }
                               
                               # Iterate over the subsamples.
                               for(ii in seq_along(subsample_data$train_list)){
                                 # Check that there is no overlap between
                                 # training folds and the validation fold.
                                 testthat::expect_equal(nrow(data.table::fintersect(unique(subsample_data$train_list[[ii]]),
                                                                                    unique(subsample_data$valid_list[[ii]]))),
                                                        0L)
                                 
                                 # Check that the union of training and
                                 # validation folds is the input dataset.
                                 testthat::expect_equal(data.table::fsetequal(unique(rbind(subsample_data$train_list[[ii]], subsample_data$valid_list[[ii]])),
                                                                              unique(data@data[, mget(familiar:::get_id_columns(id_depth="series"))])),
                                                        TRUE)
                                 
                                 # Assert that all samples in the training fold
                                 # are unique (not duplicated).
                                 testthat::expect_equal(anyDuplicated(subsample_data$train_list[[ii]]), 0)
                                 
                                 # Assert that there is only one sample in the
                                 # validation fold.
                                 testthat::expect_equal(data.table::uniqueN(subsample_data$valid_list[[ii]],
                                                                            by=familiar:::get_id_columns(id_depth="sample")),
                                                        1)
                                 
                                 # Check that sampling creates a dataset
                                 # identical to the development subsample.
                                 train_data <- familiar:::select_data_from_samples(data=data,
                                                                                   samples=subsample_data$train_list[[ii]])
                                 
                                 # Test that the samples and series are
                                 # selected.
                                 testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                              subsample_data$train_list[[ii]]),
                                                        TRUE)
                                 
                                 # Test that repetitions are likewise selected.
                                 testthat::expect_equal(nrow(train_data@data),
                                                        n_rep * nrow(subsample_data$train_list[[ii]]))
                                 
                                 # Assert that the size of the training set is
                                 # equal to the number of samples - 1.
                                 testthat::expect_equal(data.table::uniqueN(train_data@data,
                                                                            by=familiar:::get_id_columns(id_depth="sample")),
                                                        data.table::uniqueN(data@data,
                                                                            by=familiar:::get_id_columns(id_depth="sample")) - 1L)
                                 
                                 # Check that sampling creates a dataset
                                 # identical to the validation subsample.
                                 validation_data <- familiar:::select_data_from_samples(data=data,
                                                                                        samples=subsample_data$valid_list[[ii]])
                                 
                                 # Test that the samples and series are
                                 # selected.
                                 testthat::expect_equal(data.table::fsetequal(validation_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                              subsample_data$valid_list[[ii]]),
                                                        TRUE)
                                 
                                 # Test that repetitions are likewise selected.
                                 testthat::expect_equal(nrow(validation_data@data),
                                                        n_rep * nrow(subsample_data$valid_list[[ii]]))
                                 
                                 # Test that the number of samples in the the
                                 # validation fold is 1.
                                 testthat::expect_equal(data.table::uniqueN(validation_data@data,
                                                                            by=familiar:::get_id_columns(id_depth="sample")),
                                                        1L)
                                 # Check that the rare outcome is found in the
                                 # training data. This prevent issues with
                                 # training data.
                                 if(outcome_type == "multinomial"){
                                   testthat::expect_equal(nrow(train_data@data[outcome == "3"]) > 0, TRUE)
                                 }
                                 
                                 # Assert that all outcome levels in the
                                 # validation folds also appear in the
                                 # training folds.
                                 if(outcome_type %in% c("binomial", "multinomial", "survival")){
                                   testthat::expect_equal(length(setdiff(unique(validation_data@data$outcome),
                                                                         unique(train_data@data$outcome))),
                                                          0)
                                 }
                               }
                             })
}


for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  
    testthat::test_that(paste0("Repeated cross-validation for ",
                               outcome_type, " with odd data functions correctly."), {
                                 
                                 # One outcome-data
                                 data <- familiar:::test_create_synthetic_series_one_outcome(outcome_type=outcome_type,
                                                                                             rstream_object=r)
                                 
                                 # Create subsample.
                                 subsample_data <- suppressWarnings(familiar:::.create_loocv(data=data@data,
                                                                                             outcome_type=outcome_type,
                                                                                             rstream_object=r))
                                 # Expect a list. This is sort of a placeholder
                                 # because cross-validation should work even
                                 # when the outcome value is singular.
                                 testthat::expect_type(subsample_data, "list")
                                 
                                 # One sample data.
                                 data <- familiar:::test_create_synthetic_series_one_sample_data(outcome_type=outcome_type,
                                                                                                 rstream_object=r)
                                 
                                 # Create subsample. We expect an error because
                                 # you can't do cross-validation with a single
                                 # sample.
                                 subsample_data <- testthat::expect_error(suppressWarnings(familiar:::.create_loocv(data=data@data,
                                                                                                                    outcome_type=outcome_type,
                                                                                                                    rstream_object=r)))
                               })
}



##### Bootstraps ###############################################################
for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  
  data <- familiar:::test_create_synthetic_series_data(outcome_type=outcome_type,
                                                       rare_outcome=TRUE,
                                                       rstream_object=r)
  n_rep <- 3L
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Bootstrap resampling ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " functions correctly."), {
                                 
                                 # Create subsample.
                                 subsample_data <- familiar:::.create_bootstraps(data=data@data,
                                                                                 n_iter=20,
                                                                                 stratify=stratify,
                                                                                 outcome_type=outcome_type,
                                                                                 rstream_object=r)
                                 
                                 # Check that none of in-bag datasets is the same.
                                 for(ii in 1:(length(subsample_data$train_list) - 1)){
                                   for(jj in (ii+1):length(subsample_data$train_list)){
                                     testthat::expect_equal(data.table::fsetequal(subsample_data$train_list[[ii]],
                                                                                  subsample_data$train_list[[jj]]),
                                                            FALSE)
                                   }
                                 }
                                 
                                 
                                 for(ii in seq_along(subsample_data$train_list)){
                                   # Check that there is no overlap between in-bag and out-of-bag data.
                                   testthat::expect_equal(nrow(data.table::fintersect(unique(subsample_data$train_list[[ii]]),
                                                                                      unique(subsample_data$valid_list[[ii]]))),
                                                          0L)
                                   
                                   # Check that the combination of in-bag and out-of-bag data is the same as
                                   # the input dataset.
                                   testthat::expect_equal(data.table::fsetequal(unique(rbind(subsample_data$train_list[[ii]], subsample_data$valid_list[[ii]])),
                                                                                unique(data@data[, mget(familiar:::get_id_columns(id_depth="series"))])),
                                                          TRUE)
                                   
                                   # Check that sampling creates a dataset identical to the development dataset.
                                   train_data <- familiar:::select_data_from_samples(data=data,
                                                                                     samples=subsample_data$train_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(train_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$train_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(train_data@data),
                                                          n_rep * nrow(subsample_data$train_list[[ii]]))
                                   
                                   # Test that the subsample has the same size
                                   # (or is slightly larger) than the original
                                   # dataset. It can be slightly larger because
                                   # samples with rare outcomes are added to the
                                   # in-bag data.
                                   testthat::expect_gte(nrow(train_data@data), nrow(data@data) - 30)
                                   testthat::expect_lte(nrow(train_data@data), nrow(data@data) + 9)
                                   
                                   # Check that sampling creates a dataset identical to the validation dataset.
                                   validation_data <- familiar:::select_data_from_samples(data=data,
                                                                                          samples=subsample_data$valid_list[[ii]])
                                   
                                   # Test that the samples and series are selected.
                                   testthat::expect_equal(data.table::fsetequal(validation_data@data[repetition_id==1L, mget(familiar:::get_id_columns(id_depth="series"))],
                                                                                subsample_data$valid_list[[ii]]),
                                                          TRUE)
                                   
                                   # Test that repetitions are likewise selected.
                                   testthat::expect_equal(nrow(validation_data@data),
                                                          n_rep * nrow(subsample_data$valid_list[[ii]]))
                                   
                                   
                                   # If stratified, check that occurrence of event or categories is similar between
                                   # discovery and the entire dataset.
                                   if(stratify & outcome_type %in% c("binomial", "multinomial")){
                                     # Determine the frequency of outcome classes in the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                     
                                   } else if(stratify & outcome_type %in% c("survival")){
                                     # Determine the frequency of censored data points and events in classes in
                                     # the original dataset.
                                     input_frequency <- data@data[, list("frequency_original"=.N / nrow(data@data)), by="outcome_event"]
                                     train_frequency <- train_data@data[, list("frequency_bootstrap"=.N / nrow(train_data@data)), by="outcome_event"]
                                     
                                     # Update the frequency table.
                                     frequency_table <- merge(x=input_frequency,
                                                              y=train_frequency,
                                                              by="outcome_event")
                                     
                                     # Check that the data is correctly stratified.
                                     frequency_table[, "similar":=data.table::between(frequency_bootstrap,
                                                                                      lower=frequency_original-0.05,
                                                                                      upper=frequency_original+0.05)]
                                     
                                     testthat::expect_equal(all(frequency_table$similar), TRUE)
                                   }
                                   
                                   # Check that the rare outcome is found in the training data. This prevent
                                   # issues with training data.
                                   if(outcome_type == "multinomial"){
                                     testthat::expect_equal(nrow(train_data@data[outcome == "3"]) > 0, TRUE)
                                   }
                                   
                                   # Assert that all outcome levels in the
                                   # validation folds also appear in the
                                   # training folds.
                                   if(outcome_type %in% c("binomial", "multinomial", "survival")){
                                     testthat::expect_equal(length(setdiff(unique(validation_data@data$outcome),
                                                                           unique(train_data@data$outcome))),
                                                            0)
                                   }
                                 }
                               })
  }
}


for(outcome_type in c("binomial", "multinomial", "continuous", "count", "survival")){
  # Create synthetic dataset with one outcome.
  
  if(outcome_type %in% c("binomial", "multinomial", "survival")){
    available_stratify_options <- c(FALSE, TRUE)
    
  } else {
    available_stratify_options <- FALSE
  }
  
  for(stratify in available_stratify_options){
    
    testthat::test_that(paste0("Bootstrap ", ifelse(stratify, "(stratified) ", ""),
                               "for ", outcome_type, " with odd data functions correctly."), {
                                 
                                 # One outcome-data
                                 data <- familiar:::test_create_synthetic_series_one_outcome(outcome_type=outcome_type,
                                                                                             rstream_object=r)
                                 
                                 # Create subsample.
                                 subsample_data <- familiar:::.create_bootstraps(data=data@data,
                                                                                 n_iter=20,
                                                                                 stratify=stratify,
                                                                                 outcome_type=outcome_type,
                                                                                 rstream_object=r)
                                 
                                 # Expect a list. This is sort of a placeholder
                                 # because cross-validation should work even
                                 # when the outcome value is singular.
                                 testthat::expect_type(subsample_data, "list")
                                 
                                 # One sample data.
                                 data <- familiar:::test_create_synthetic_series_one_sample_data(outcome_type=outcome_type,
                                                                                                 rstream_object=r)
                                 
                                 # Create subsample. We expect an error because
                                 # you can't do cross-validation with a single
                                 # sample.
                                 subsample_data <- testthat::expect_error(familiar:::.create_bootstraps(data=data@data,
                                                                                                        n_iter=20,
                                                                                                        stratify=stratify,
                                                                                                        outcome_type=outcome_type,
                                                                                                        rstream_object=r))
                               })
  }
}