testthat::test_that("call_repeats", { config <- load_config() suppressWarnings( test_fragments <- genemapper_table_to_fragments(example_data, dye_channel = "B", min_size_bp = 300 ) ) find_alleles( fragments_list = test_fragments, config ) suppressWarnings( suppressMessages( call_repeats( fragments_list = test_fragments, config, assay_size_without_repeat = 87, repeat_size = 3 ) ) ) test_repeats_class <- vector("numeric", length(test_fragments)) for (i in seq_along(test_fragments)) { test_repeats_class[i] <- class(test_fragments[[i]])[1] } testthat::expect_true(all(unique(test_repeats_class) == "fragments")) # force_whole_repeat_units suppressWarnings( suppressMessages( call_repeats( fragments_list = test_fragments, config, force_whole_repeat_units = TRUE, assay_size_without_repeat = 87, repeat_size = 3 ) ) ) #TOODO come up with test for whole repeat units here }) testthat::test_that("repeat period", { fsa_list <- lapply(cell_line_fsa_list[1], function(x) x$clone()) config <- load_config() suppressWarnings( find_ladders( fsa_list, config, ladder_sizes = c(35, 50, 75, 100, 139, 150, 160, 200, 250, 300, 340, 350, 400, 450, 490, 500), max_combinations = 2500000, ladder_selection_window = 5, show_progress_bar = FALSE ) ) # plot_ladders(test_ladders[1:9], n_facet_col = 3, # xlim = c(1000, 4800), # ylim = c(0, 15000)) # # Start a PDF device # pdf(file = "C:/Users/zlm2/Downloads/ladder.pdf", width = 12, height = 6) # Set width and height as desired # # # Loop through the list of plots # for (i in seq_along(test_ladders)) { # test_ladders[[i]]$plot_ladder(xlim = c(1400, 4500)) # } # # # Close the PDF device # dev.off() find_fragments(fsa_list, config, minimum_peak_signal = 20, min_bp_size = 300, show_progress_bar = FALSE ) find_alleles( fragments_list = fsa_list, config ) test_repeats_size_period <- lapply(fsa_list, function(x) x$clone()) suppressMessages( suppressWarnings( call_repeats( fragments_list = test_repeats_size_period, config, force_repeat_pattern = TRUE, force_repeat_pattern_size_window = 0.5 ) ) ) test_repeats_size_none <- lapply(fsa_list, function(x) x$clone()) suppressMessages( suppressWarnings( call_repeats( fragments_list = test_repeats_size_none, config, force_repeat_pattern = FALSE ) ) ) testthat::expect_true( identical( test_repeats_size_period[[1]]$repeat_table_df[which(test_repeats_size_period[[1]]$repeat_table_df$repeats > 118 & test_repeats_size_period[[1]]$repeat_table_df$repeats <140),"repeats"], test_repeats_size_none[[1]]$repeat_table_df[which(test_repeats_size_none[[1]]$repeat_table_df$repeats > 118 & test_repeats_size_none[[1]]$repeat_table_df$repeats <140),"repeats"] ) ) }) testthat::test_that("full pipeline repeat size algo", { fsa_list <- lapply(cell_line_fsa_list, function(x) x$clone()) config <- load_config() suppressWarnings( find_ladders(fsa_list, config, ladder_sizes = c(35, 50, 75, 100, 139, 150, 160, 200, 250, 300, 340, 350, 400, 450, 490, 500), max_combinations = 2500000, ladder_selection_window = 5, show_progress_bar = FALSE ) ) # plot_ladders(test_ladders[1:9], n_facet_col = 3, # xlim = c(1000, 4800), # ylim = c(0, 15000)) # # Start a PDF device # pdf(file = "C:/Users/zlm2/Downloads/ladder.pdf", width = 12, height = 6) # Set width and height as desired # # # Loop through the list of plots # for (i in seq_along(test_ladders)) { # test_ladders[[i]]$plot_ladder(xlim = c(1400, 4500)) # } # # # Close the PDF device # dev.off() find_fragments(fsa_list, config, minimum_peak_signal = 20, min_bp_size = 300, show_progress_bar = FALSE ) add_metadata( fsa_list, metadata_data.frame = metadata ) find_alleles(fsa_list, config) suppressMessages( suppressWarnings( call_repeats( fsa_list, config, force_repeat_pattern = TRUE ) ) ) # plot_traces(test_repeats[1:9], n_facet_col = 3, # xlim = c(100, 200), # ylim = c(0,2000)) # plot_fragments(test_repeats[1:4]) suppressMessages( suppressWarnings( assign_index_peaks( fsa_list, config, grouped = TRUE ) ) ) suppressMessages( suppressWarnings( test_metrics_grouped <- calculate_instability_metrics( fragments_list = fsa_list, peak_threshold = 0.05, window_around_index_peak = c(-40, 40) ) ) ) # Left join plot_data <- merge(test_metrics_grouped, metadata, by = "unique_id", all.x = TRUE) # Filter plot_data <- plot_data[plot_data$day > 0 & plot_data$modal_peak_signal > 500, ] # Group by plot_data <- split(plot_data, plot_data$metrics_group_id) # Mutate for (i in seq_along(plot_data)) { plot_data[[i]]$rel_gain <- plot_data[[i]]$average_repeat_change / median(plot_data[[i]]$average_repeat_change[which(plot_data[[i]]$treatment == 0)]) } plot_data <- do.call(rbind, plot_data) # Revise genotype levels plot_data$genotype <- factor(plot_data$genotype, levels = c("non-edited", "edited")) # ggplot2::ggplot(plot_data, # ggplot2::aes(as.factor(treatment), rel_gain, # colour = as.factor(treatment))) + # ggplot2::geom_boxplot(outlier.shape = NA) + # ggplot2::geom_jitter() + # ggplot2::facet_wrap(ggplot2::vars(genotype)) + # ggplot2::labs(y = "Average repeat gain\n(relative to DMSO)", # x = "Branaplam (nM)") + # ggplot2::theme(legend.position = "none") medians <- aggregate(rel_gain ~ treatment + genotype, plot_data, median, na.rm = TRUE) testthat::expect_true(all(round(medians$rel_gain, 5) == c(1.00000, 0.86158, 0.73262, 0.55721))) }) testthat::test_that("batch correction", { fsa_list <- lapply(cell_line_fsa_list[16:19], function(x) x$clone()) config <- load_config() find_ladders(fsa_list, config, show_progress_bar = FALSE) find_fragments(fsa_list, config, min_bp_size = 300, show_progress_bar = FALSE) add_metadata(fsa_list, metadata[16:19, ]) find_alleles(fsa_list, config) suppressMessages( suppressWarnings( call_repeats(fsa_list, config, correction = "batch") ) ) testthat::expect_true(all.equal( c(rep(0.72867 , 2), rep(-0.72867 , 2)), round(as.numeric(sapply(fsa_list, function(x) x$.__enclos_env__$private$batch_correction_factor)), 5) )) # plot_batch_correction_samples(fsa_list, selected_sample = 1, xlim = c(100, 115)) }) testthat::test_that("batch correction with no data in one batch", { config <- load_config() different_batch <- vector("list", 1) names(different_batch) <- "test1" different_batch[[1]] <- cell_line_fsa_list[[1]]$clone() different_batch[[1]]$unique_id <- "test1" different_batch_metadata <- metadata[which(metadata$unique_id == names(cell_line_fsa_list[1])), ] different_batch_metadata$unique_id <- "test1" different_batch_metadata$batch_run_id <- NA_character_ different_batch_metadata$batch_sample_id <- NA_character_ metadata_modification_df <- rbind(metadata[16:19, ], different_batch_metadata) fsa_list <- c(lapply(cell_line_fsa_list[16:19], function(x) x$clone()), different_batch) find_ladders(fsa_list, config, show_progress_bar = FALSE) find_fragments(fsa_list, config, min_bp_size = 300, show_progress_bar = FALSE) add_metadata(fsa_list, metadata_modification_df) find_alleles(fsa_list, config) suppressMessages( suppressMessages( tryCatch({ call_repeats(fsa_list, config, correction = "batch") }, warning = function(w){ assignment_warning <<- w } ) ) ) testthat::expect_true(class(assignment_warning)[1] == "simpleWarning") testthat::expect_true(grepl("'batch_run_id' 'NA' had no batch correction carried out", assignment_warning)) }) testthat::test_that("batch correction with a single sample id", { config <- load_config() fsa_list <- lapply(cell_line_fsa_list[16:19], function(x) x$clone()) find_ladders(fsa_list, config, show_progress_bar = FALSE) find_fragments(fsa_list, config, min_bp_size = 300, show_progress_bar = FALSE) add_metadata(fsa_list, metadata[16:19, ]) fsa_list <- lapply(fsa_list, function(x){ if(x$batch_sample_id %in% "S-21-211"){ x$batch_sample_id <- NA_character_ } return(x) }) find_alleles(fsa_list, config) suppressMessages( suppressWarnings(call_repeats(fsa_list, config, correction = "batch")) ) # plot_batch_correction_samples(fsa_list, selected_sample = 1, xlim = c(100, 120)) testthat::expect_true(all.equal( c(rep(0.8113, 2), rep(-0.8113, 2)), round(as.numeric(sapply(fsa_list, function(x) x$.__enclos_env__$private$batch_correction_factor)), 5) )) }) testthat::test_that("repeat correction", { config <- load_config() fsa_list <- lapply(cell_line_fsa_list[16:19], function(x) x$clone()) find_ladders(fsa_list, config, show_progress_bar = FALSE) find_fragments(fsa_list, config, min_bp_size = 300, show_progress_bar = FALSE) add_metadata(fsa_list, metadata[16:19, ]) find_alleles(fsa_list, config) suppressMessages( suppressWarnings( call_repeats( fsa_list, config, correction = "repeat" ) ) ) # plot_batch_correction_samples(fsa_list, 1, c(100,130)) # plot_repeat_correction_model(fsa_list) testthat::expect_true(all.equal( c(10.5161, 10.5844, 11.0936 , 11.1783), round(as.numeric(sapply(fsa_list, function(x) x$.__enclos_env__$private$repeat_correction_factor)), 4) )) # extract model summary correction_summary <- extract_repeat_correction_summary(fsa_list) testthat::expect_true(is.data.frame(correction_summary)) testthat::expect_true(all.equal(round(correction_summary$abs_avg_residual, 5), c(0.01585, 0.01585, 0.01777, 0.01523))) }) testthat::test_that("repeat correction with one sample off warning", { config <- load_config() fsa_list <- lapply(cell_line_fsa_list[16:19], function(x) x$clone()) find_ladders(fsa_list, config, show_progress_bar = FALSE) # make peak before just bigger fsa_list[[1]]$trace_bp_df[which(round(fsa_list[[1]]$trace_bp_df$size, 2) == 418.25), "signal"] <- 4000 find_fragments(fsa_list, config, min_bp_size = 300, show_progress_bar = FALSE) add_metadata(fsa_list, metadata[16:19, ]) find_alleles(fsa_list, config) suppressMessages( tryCatch({ call_repeats( fsa_list, config, correction = "repeat" ) }, warning = function(w){ assignment_warning <<- w } ) ) # plot_batch_correction_samples(fsa_list, 1, c(100,115)) # plot_repeat_correction_model(fsa_list) testthat::expect_true(class(assignment_warning)[1] == "simpleWarning") testthat::expect_true(grepl("had no batch correction carried out", assignment_warning)) }) testthat::test_that("repeat correction one run missing", { config <- load_config() fsa_list <- lapply(cell_line_fsa_list[16:19], function(x) x$clone()) find_ladders(fsa_list, config, show_progress_bar = FALSE) find_fragments(fsa_list, config, min_bp_size = 300, show_progress_bar = FALSE) metadata_2 <- metadata[16:19, ] metadata_2$batch_sample_modal_repeat <- ifelse(metadata_2$batch_run_id == "20230414", NA_real_, metadata_2$batch_sample_modal_repeat) add_metadata(fsa_list, metadata_2) find_alleles(fsa_list,config) call_repeats_output <- call_repeats( fsa_list, config, correction = "repeat" ) testthat::expect_true(class(call_repeats_output)[1] == "trace_output") testthat::expect_true(grepl("no samples with 'batch_sample_modal_repeat'", call_repeats_output$error_message)) })