test_that("Artifical errors are denoised properly.", { #PACBB5941-16 #deleted bp at position 60 toy_example1 = 'ACAATGTATTTCATCCTAGGAATATGATCAGGAATATTAGGAATAATATTAAGAATATTATTCGAATTGAACTAGCACAACCAGGACCATTAATAAGAAATGACCAAATTTATAATGTAATTGTTACATCTCATGCATTCATTATAATTTTTATGGTAATACCAATTATAATCGGAGGATTCGGAAATTGATTAGTACCATTAATAATTGGAGCACCAGATATAGCATTCCCACGAATAAATAATATAAGATTTTGACTTCTTCCACCCTCACTTACATTGTTAATTTCAAGATCAATAGTAGAAATAGGACCAGGAACAGGATGAACACTATATCCACCGCTATCGTCAAATATTGCACATTCGGGAGGAAGTGTAGACTTAAGAATTTTTTCATTACACTTAGCCGGTGTATCATCAATCCTAGGAGCAATTAACTTTATTACAACAATTCTAAATATACGAACACTAGGAATATCATTAGACCGAACCCCCTTATTCGTATGATCAGTAATAATTACTGCAATTTTACTACTTCTATCTCTACCAGTACTAGCTGGAGCAATCACGATA' toy1_expected = "ACAATGTATTTCATCCTAGGAATATGATCAGGAATATTAGGAATAATATTAAGAATANNNNNNNGAATTGAACTAGCACAACCAGGACCATTAATAAGAAATGACCAAATTTATAATGTAATTGTTACATCTCATGCATTCATTATAATTTTTATGGTAATACCAATTATAATCGGAGGATTCGGAAATTGATTAGTACCATTAATAATTGGAGCACCAGATATAGCATTCCCACGAATAAATAATATAAGATTTTGACTTCTTCCACCCTCACTTACATTGTTAATTTCAAGATCAATAGTAGAAATAGGACCAGGAACAGGATGAACACTATATCCACCGCTATCGTCAAATATTGCACATTCGGGAGGAAGTGTAGACTTAAGAATTTTTTCATTACACTTAGCCGGTGTATCATCAATCCTAGGAGCAATTAACTTTATTACAACAATTCTAAATATACGAACACTAGGAATATCATTAGACCGAACCCCCTTATTCGTATGATCAGTAATAATTACTGCAATTTTACTACTTCTATCTCTACCAGTACTAGCTGGAGCAATCACGATA" test_toy1_out = denoise(toy_example1, name = "example1", ambig_char = "N", censor_length = 3, aa_check = FALSE, to_file = FALSE) #test_toy1_out$outseq == toy1_expected expect_equal(test_toy1_out$outseq, toy1_expected) #test_toy1_out$adjustment_count == 1 expect_equal(test_toy1_out$adjustment_count, 1) #nchar(test_toy1_out$outseq) == nchar(toy_example1) + 1 expect_equal(nchar(test_toy1_out$outseq), nchar(toy_example1) + 1 ) #PACBB858-16 #567 length sequence and it has the starting point that seems to be causing the issue with alignment toy_example2 = 'TTTTTGGTGCATGATCTAGAATAGTAGGAACTTCCTTAAGAATATTAATTCGTGCAGAATTAGGAACCCCTAATGCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACAGCCCATGCATTCATTATAATTTTCTTTATAGTAATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTACCACTGATATTAAGAGCCCCTGATATAGCTTTCCACGATTAAATAATATAAGATTTTGACTTTTACCTCCATCTTTAACATTATTGTTAACTAGAAGTTTAGTAGAAAGAGGAACCGGTACAGGATGAACAGTTTACCCCCCACTGTCATCTACATTAAGGCATTCCGGAGCATCTGTAGATTTATCTATTTTTTCTTTGCATTTAGCAGGGATTTCCTCTATTCTAGGAGCAGTAAATTTTATTTCAACAATTATTAATATACGGGCCCCAGGAATAACTTTTGATAAAATACCTTTATTTGTGTGATCAGTATTAATTACTGCAGTATTACTATTATTATCTTTACCAGTTCTAGCTGGAGCAATTACTATA' #first two bases are dropped from the sequence prior to adjustment, but reattached b/c keep_edges = TRUE #single deletion at position 221, so character added to lengthen by 1. toy2_expected = unlist(strsplit(toy_example2, "")) toy2_expected = c(toy2_expected[1:221] , c("N") , toy2_expected[222:length(toy2_expected)]) toy2_expected[(212):(232)] = "N" toy2_expected = paste(toy2_expected, collapse="") test_toy2_out = denoise(x = toy_example2, name = "toy_example2-short_and_messy", ambig_char = "N", censor_length = 10, aa_check = FALSE, to_file = FALSE) #test_toy2_out$outseq == toy2_expected expect_equal(test_toy2_out$outseq, toy2_expected) #nchar(test_toy2_out$outseq) == (nchar(toy_example2)+1) expect_equal(nchar(test_toy2_out$outseq), (nchar(toy_example2)+1)) #character 70 is added to this string #should be removed and a censor of 1bp in either direction is applied toy_example3 = 'ACACTTTACTTTATTTTTGGTATTTGAGCAGGTATATTAGGGACTTCATTAAGTTTATTAATTCGAGCAGAAATTAGGTAATCCAGGTTCTTTAATTGGAGATGATCAAATTTATAATACTATTGTTACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGTTTTGGTAATTGACTAGTTCCTTTAATACTAGGAGCCCCTGATATAGCTTTCCCCCGAATAAATAATATAAGTTTCTGACTTTTACCTCCCTCTCTTACCTTATTAATTTCAAGAAGAATTGTAGAAAATGGTGCAGGAACTGGATGAACAGTTTACCCTCCTCTTTCTTCTAATATCGCTCATAGAGGAAGATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCTGGTATTTCATCAATTTTAGGGGCTATTAATTTTATTACTACAATTATTAACATACGATTAAATAGACTAATATTTGATCAAATACCTTTATTCGTATGAGCTGTAGGGATTACTGCTTTTCTTTTA' toy3_expected = strsplit(toy_example3, "")[[1]] toy3_expected = c(toy3_expected[1:69], toy3_expected[71:length(toy3_expected)]) toy3_expected[69] = "N" toy3_expected[70] = "N" toy3_expected = paste(toy3_expected, collapse = "") test_toy3_out = denoise(x = toy_example3, name = "toy_example3-long_and_messy", ambig_char = "N", censor_length = 1, aa_check = FALSE, to_file = FALSE) #test_toy3_out$outseq == toy3_expected expect_equal(test_toy3_out$outseq, toy3_expected) #nchar(test_toy3_out$outseq)==(nchar(toy_example3)-1) expect_equal(nchar(test_toy3_out$outseq), (nchar(toy_example3)-1)) #no errors, but a reverse compliment toy_example_4 = "TAAGTGTTGATAGAGAATTGGGTCCCCTCCCCCTGCGGGGTCAAAGAATGTAGTGTTGAGGTTGCGGTCTGTTAATAATATTGTAATTCCGGCTGCAAGAACAGGGAGAGCAAGGAGTAGAAGAACAGTGGTGATAAGAATAGATCAAACAAATAATGGTGTTTGATATTGAGAAATAGTTGGGGGTTTTATATTAATAATAGTTGTAATAAAATTAATAGAGGCTAAAATTGATGAGATACCGGCCAAATGAAGGGAAAAGATGGCTAAATCAACAGAGGGGCCAGCATGTGCTAAATTACCTGCTAAAGGAGGATATACAGTTCATCCGGTTCCTGCTCCAGCTTCTACTCCTGCGGAGGCCAGGAGAAGTAAGAACGAAGGAGGAAGAAGCCAGAAACTTATATTATTTATTCGAGGGAATGCTATATCTGGTGCACCAATTATTAAAGGAACAAGTCAATTGCCAAAGCCACCAATTATTACAGGTATAACCATAAAAAAGATTATTACAAAGGCATGGGCGGTTACGATCACATTATAAATCTGATCATCCCCTAAAAGAGATCCGGGTTGACCTAGTTCAGCGCGAATTAGTAAACTTAAAGCTATTCCAACTATTCCTGCTCATGCACCAAAAATCAAGTAGAG" test_toy4_out = denoise(x = toy_example_4, name = "toy_example3-long_and_messy", ambig_char = "N", censor_length = 10, aa_check = FALSE, to_file = FALSE) test_toy4_out$outseq == toupper(rev_comp(toy_example_4)) expect_equal(test_toy4_out$outseq, toupper(rev_comp(toy_example_4))) })