# Skip on CRAN as this test takes some time. testthat::skip_on_cran() testthat::skip_on_ci() outcome_type <- "survival" n_numeric_features <- 1 imputation_method <- "lasso" # Generic test ----------------------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Check that the data is not altered. testthat::expect_equal(data.table::fsetequal(data_copy@data, data@data), TRUE) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # Rare categories test --------------------------------------------------------- for(n_numeric_features in c(3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # At this point replace rare levels in the data. data_copy <- familiar:::test_data_drop_rare_feature_levels(data=data_copy) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # NA-instance test ------------------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_na_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with some NA instances"), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # NA random-value test --------------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_random_na_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with some NA datapoints."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # One feature NA test ---------------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_one_feature_all_na_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with one feature entirely NA."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal(familiar:::feature_info_complete( feature_info_list[[feature]]@imputation_parameters), TRUE) if(feature == "feature_2"){ # Feature 2 is completely missing data. testthat::expect_equal( any(familiar:::is_valid_data(data_copy@data[[feature]])), FALSE) # Check that a None-class imputer is trained. testthat::expect_s4_class( feature_info_list[[feature]]@imputation_parameters, "featureInfoParametersImputationNone") } else { # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } } ) } } # Invariant feature test ------------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_invariant_feature_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0("Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with invariant features."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # One feature invariant test --------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_one_feature_invariant_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with one invariant feature."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # One-sample test -------------------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_one_sample_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with invariant features."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # One-feature test ------------------------------------------------------------- for(n_numeric_features in c(1, 0)){ data <- familiar:::test_create_synthetic_series_random_na_data( outcome_type=outcome_type, n_numeric=n_numeric_features) # Select only the first feature. data@data <- data@data[, mget(c(familiar:::get_non_feature_columns(data), "feature_1"))] for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with a single feature and some NA datapoints."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list <- familiar:::add_imputation_info( feature_info_list=feature_info_list, data=data_copy) # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } } # Collection test -------------------------------------------------------------- for(n_numeric_features in c(4, 3, 2, 1, 0)){ data <- familiar:::test_create_synthetic_series_random_na_data( outcome_type=outcome_type, n_numeric=n_numeric_features) for(imputation_method in familiar:::.get_available_imputation_methods()){ testthat::test_that( paste0( "Imputation is correctly performed using the ", imputation_method, " method and ", n_numeric_features, " numeric features for a dataset with some NA datapoints and a collection of imputers."), { # Make a copy of the data. data_copy <- data.table::copy(data) # Create a list of featureInfo objects. feature_info_list <- familiar:::.get_feature_info_data( data=data_copy@data, file_paths=NULL, project_id=character(), outcome_type=outcome_type)[[1]] # Create imputation skeleton. feature_info_list_1 <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list_1 <- familiar:::add_imputation_info( feature_info_list=feature_info_list_1, data=data_copy) # Create imputation skeleton. feature_info_list_2 <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list_2 <- familiar:::add_imputation_info( feature_info_list=feature_info_list_2, data=data_copy) # Create imputation skeleton. feature_info_list_3 <- familiar:::create_imputation_parameter_skeleton( feature_info_list=feature_info_list, imputation_method=imputation_method) # Add imputer parameters. feature_info_list_3 <- familiar:::add_imputation_info( feature_info_list=feature_info_list_3, data=data_copy) # Collect features. for(feature in familiar:::get_feature_columns(data_copy)){ feature_info_list[[feature]]@imputation_parameters <- familiar:::..collect_and_aggregate_imputation_info( feature_info_list=list( feature_info_list_1[[feature]], feature_info_list_2[[feature]], feature_info_list_3[[feature]]), feature_type=feature_info_list[[feature]]@feature_type, feature_name=feature_info_list[[feature]]@name)$parameters } # Assume that the data is pre-processed. data_copy@preprocessing_level <- "batch_normalisation" # Attempt to impute the data. data_copy <- familiar:::impute_features( data=data_copy, feature_info_list=feature_info_list) # Iterate over features. for(feature in familiar:::get_feature_columns(data_copy)){ # Check that imputer parameters were set. testthat::expect_equal( familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters), TRUE) # Check that all feature data are valid. testthat::expect_equal( all(familiar:::is_valid_data(data_copy@data[[feature]])), TRUE) } } ) } }