delox.R.html 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  2. "http://www.w3.org/TR/html4/strict.dtd">
  3. <html>
  4. <head>
  5. <title></title>
  6. <meta http-equiv="content-type" content="text/html; charset=utf-8">
  7. <style type="text/css">
  8. td.linenos { background-color: #f0f0f0; padding-right: 10px; }
  9. span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
  10. pre { line-height: 125%; }
  11. body .hll { background-color: #ffffcc }
  12. body { background: #f8f8f8; }
  13. body .c { color: #408080; font-style: italic } /* Comment */
  14. body .err { border: 1px solid #FF0000 } /* Error */
  15. body .k { color: #008000; font-weight: bold } /* Keyword */
  16. body .o { color: #666666 } /* Operator */
  17. body .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
  18. body .cm { color: #408080; font-style: italic } /* Comment.Multiline */
  19. body .cp { color: #BC7A00 } /* Comment.Preproc */
  20. body .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
  21. body .c1 { color: #408080; font-style: italic } /* Comment.Single */
  22. body .cs { color: #408080; font-style: italic } /* Comment.Special */
  23. body .gd { color: #A00000 } /* Generic.Deleted */
  24. body .ge { font-style: italic } /* Generic.Emph */
  25. body .gr { color: #FF0000 } /* Generic.Error */
  26. body .gh { color: #000080; font-weight: bold } /* Generic.Heading */
  27. body .gi { color: #00A000 } /* Generic.Inserted */
  28. body .go { color: #888888 } /* Generic.Output */
  29. body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
  30. body .gs { font-weight: bold } /* Generic.Strong */
  31. body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
  32. body .gt { color: #0044DD } /* Generic.Traceback */
  33. body .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
  34. body .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
  35. body .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
  36. body .kp { color: #008000 } /* Keyword.Pseudo */
  37. body .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
  38. body .kt { color: #B00040 } /* Keyword.Type */
  39. body .m { color: #666666 } /* Literal.Number */
  40. body .s { color: #BA2121 } /* Literal.String */
  41. body .na { color: #7D9029 } /* Name.Attribute */
  42. body .nb { color: #008000 } /* Name.Builtin */
  43. body .nc { color: #0000FF; font-weight: bold } /* Name.Class */
  44. body .no { color: #880000 } /* Name.Constant */
  45. body .nd { color: #AA22FF } /* Name.Decorator */
  46. body .ni { color: #999999; font-weight: bold } /* Name.Entity */
  47. body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
  48. body .nf { color: #0000FF } /* Name.Function */
  49. body .nl { color: #A0A000 } /* Name.Label */
  50. body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
  51. body .nt { color: #008000; font-weight: bold } /* Name.Tag */
  52. body .nv { color: #19177C } /* Name.Variable */
  53. body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
  54. body .w { color: #bbbbbb } /* Text.Whitespace */
  55. body .mb { color: #666666 } /* Literal.Number.Bin */
  56. body .mf { color: #666666 } /* Literal.Number.Float */
  57. body .mh { color: #666666 } /* Literal.Number.Hex */
  58. body .mi { color: #666666 } /* Literal.Number.Integer */
  59. body .mo { color: #666666 } /* Literal.Number.Oct */
  60. body .sa { color: #BA2121 } /* Literal.String.Affix */
  61. body .sb { color: #BA2121 } /* Literal.String.Backtick */
  62. body .sc { color: #BA2121 } /* Literal.String.Char */
  63. body .dl { color: #BA2121 } /* Literal.String.Delimiter */
  64. body .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
  65. body .s2 { color: #BA2121 } /* Literal.String.Double */
  66. body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
  67. body .sh { color: #BA2121 } /* Literal.String.Heredoc */
  68. body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
  69. body .sx { color: #008000 } /* Literal.String.Other */
  70. body .sr { color: #BB6688 } /* Literal.String.Regex */
  71. body .s1 { color: #BA2121 } /* Literal.String.Single */
  72. body .ss { color: #19177C } /* Literal.String.Symbol */
  73. body .bp { color: #008000 } /* Name.Builtin.Pseudo */
  74. body .fm { color: #0000FF } /* Name.Function.Magic */
  75. body .vc { color: #19177C } /* Name.Variable.Class */
  76. body .vg { color: #19177C } /* Name.Variable.Global */
  77. body .vi { color: #19177C } /* Name.Variable.Instance */
  78. body .vm { color: #19177C } /* Name.Variable.Magic */
  79. body .il { color: #666666 } /* Literal.Number.Integer.Long */
  80. </style>
  81. </head>
  82. <body>
  83. <h2></h2>
  84. <div class="highlight"><pre><span></span><span class="c1">#!/usr/bin/env Rscript</span>
  85. default.align.opts <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span>match<span class="o">=</span><span class="m">1</span><span class="p">,</span> mismatch<span class="o">=</span><span class="m">3</span><span class="p">,</span>
  86. gapOpening<span class="o">=</span><span class="m">5</span><span class="p">,</span> gapExtension<span class="o">=</span><span class="m">2</span><span class="p">)</span>
  87. parse_arguments <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">()</span> <span class="p">{</span>
  88. <span class="kp">suppressMessages</span><span class="p">({</span>
  89. <span class="kn">library</span><span class="p">(</span>optparse<span class="p">)</span>
  90. <span class="kn">library</span><span class="p">(</span>parallel<span class="p">)</span>
  91. <span class="p">})</span>
  92. option_list <span class="o">&lt;-</span>
  93. <span class="kt">list</span><span class="p">(</span>make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-c&quot;</span><span class="p">,</span> <span class="s">&quot;--min-call&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;integer&quot;</span><span class="p">,</span> default<span class="o">=</span><span class="m">10</span><span class="p">,</span> metavar<span class="o">=</span><span class="s">&quot;10&quot;</span><span class="p">,</span>
  94. help<span class="o">=</span><span class="s">&quot;Minimum perfect overlap required to call the presence of the subject (paired only). Imperfect overlap will need to be longer, based on specified mismatch and gap penalties.&quot;</span><span class="p">),</span>
  95. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-l&quot;</span><span class="p">,</span> <span class="s">&quot;--min-length&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;integer&quot;</span><span class="p">,</span> default<span class="o">=</span><span class="m">36</span><span class="p">,</span> metavar<span class="o">=</span><span class="s">&quot;36&quot;</span><span class="p">,</span>
  96. help<span class="o">=</span><span class="s">&quot;Minimum length allowed after trimming a read. Any reads shorter than this after trimming will be discarded.&quot;</span><span class="p">),</span>
  97. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-i&quot;</span><span class="p">,</span> <span class="s">&quot;--interleaved&quot;</span><span class="p">),</span> action<span class="o">=</span><span class="s">&quot;store_true&quot;</span><span class="p">,</span> default<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span>
  98. help<span class="o">=</span><span class="s">&quot;Specify this option if you have paired-end sequences interleaved in a single FASTQ file. The default is to read paired-end sequences from a matched pair of files, and this option is ignored if two fastq files are provided. When you use this option, skip the \&quot;READ2.fastq\&quot; argument.&quot;</span><span class="p">),</span>
  99. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-o&quot;</span><span class="p">,</span> <span class="s">&quot;--read1-orientation&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;character&quot;</span><span class="p">,</span> default<span class="o">=</span><span class="s">&quot;in&quot;</span><span class="p">,</span> metavar<span class="o">=</span><span class="s">&quot;in/out&quot;</span><span class="p">,</span>
  100. help<span class="o">=</span><span class="s">&quot;Orientation of read1. Can be either \&quot;in\&quot; or \&quot;out\&quot; (paired only). Note that Illumina reads are \&quot;in\&quot;.&quot;</span><span class="p">),</span>
  101. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-q&quot;</span><span class="p">,</span> <span class="s">&quot;--read2-orientation&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;character&quot;</span><span class="p">,</span> default<span class="o">=</span><span class="s">&quot;in&quot;</span><span class="p">,</span> metavar<span class="o">=</span><span class="s">&quot;in/out&quot;</span><span class="p">,</span>
  102. help<span class="o">=</span><span class="s">&quot;Orientation of read2. Can be either \&quot;in\&quot; or \&quot;out\&quot; (paired only)&quot;</span><span class="p">),</span>
  103. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-j&quot;</span><span class="p">,</span> <span class="s">&quot;--jobs&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;integer&quot;</span><span class="p">,</span>
  104. default<span class="o">=</span>parallel<span class="o">:::</span>detectCores<span class="p">(),</span>
  105. metavar<span class="o">=</span><span class="kp">as.character</span><span class="p">(</span>parallel<span class="o">:::</span>detectCores<span class="p">()),</span>
  106. help<span class="o">=</span><span class="s">&quot;Number of jobs to run in parallel for alignment. This should be autodetected by default.&quot;</span><span class="p">),</span>
  107. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-y&quot;</span><span class="p">,</span> <span class="s">&quot;--yield-size&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;integer&quot;</span><span class="p">,</span>
  108. default<span class="o">=</span><span class="m">100000</span><span class="p">,</span>
  109. metavar<span class="o">=</span><span class="s">&quot;100000&quot;</span><span class="p">,</span>
  110. help<span class="o">=</span><span class="s">&quot;Number of reads to process at a time. Setting this higher will read more data into memory at once and result in faster runtime. Setting this lower will require less memory.&quot;</span><span class="p">),</span>
  111. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-m&quot;</span><span class="p">,</span> <span class="s">&quot;--match-bonus&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;double&quot;</span><span class="p">,</span>
  112. default<span class="o">=</span>default.align.opts<span class="o">$</span><span class="kp">match</span><span class="p">,</span>
  113. metavar<span class="o">=</span><span class="kp">as.character</span><span class="p">(</span>default.align.opts<span class="o">$</span><span class="kp">match</span><span class="p">),</span>
  114. help<span class="o">=</span><span class="s">&quot;Score bonus for a matching nucleotide&quot;</span><span class="p">),</span>
  115. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-p&quot;</span><span class="p">,</span> <span class="s">&quot;--mismatch-penalty&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;double&quot;</span><span class="p">,</span>
  116. default<span class="o">=</span>default.align.opts<span class="o">$</span>mismatch<span class="p">,</span>
  117. metavar<span class="o">=</span><span class="kp">as.character</span><span class="p">(</span>default.align.opts<span class="o">$</span>mismatch<span class="p">),</span>
  118. help<span class="o">=</span><span class="s">&quot;Score penalty for a mismatched nucleotide (specify as a positive number)&quot;</span><span class="p">),</span>
  119. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-g&quot;</span><span class="p">,</span> <span class="s">&quot;--gap-open-penalty&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;double&quot;</span><span class="p">,</span>
  120. default<span class="o">=</span>default.align.opts<span class="o">$</span>gapOpening<span class="p">,</span>
  121. metavar<span class="o">=</span><span class="kp">as.character</span><span class="p">(</span>default.align.opts<span class="o">$</span>gapOpening<span class="p">),</span>
  122. help<span class="o">=</span><span class="s">&quot;Score penalty for opening a gap in the alignment (specifiy as a positive number)&quot;</span><span class="p">),</span>
  123. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-e&quot;</span><span class="p">,</span> <span class="s">&quot;--gap-extension-penalty&quot;</span><span class="p">),</span> type<span class="o">=</span><span class="s">&quot;double&quot;</span><span class="p">,</span>
  124. default<span class="o">=</span>default.align.opts<span class="o">$</span><span class="kp">match</span><span class="p">,</span>
  125. metavar<span class="o">=</span><span class="kp">as.character</span><span class="p">(</span>default.align.opts<span class="o">$</span>gapExtension<span class="p">),</span>
  126. help<span class="o">=</span><span class="s">&quot;Score penalty for extending an alignment gap by two nucleotides (specify as a positive number)&quot;</span><span class="p">),</span>
  127. make_option<span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-s&quot;</span><span class="p">,</span> <span class="s">&quot;--single-read-mode&quot;</span><span class="p">),</span> action<span class="o">=</span><span class="s">&quot;store_true&quot;</span><span class="p">,</span> default<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span>
  128. help<span class="o">=</span><span class="s">&quot;Tell DeLoxer to run in single-end mode instead of paired-end mode. In this mode, the only a single input fastq file is provided, and only a single output file is created. No classification is performed, only trimming. When you use this option, skip the \&quot;READ2.fastq\&quot; argument, and specify the full file name for OUTPUT_NAME instead of just the base name.&quot;</span><span class="p">))</span>
  129. option_parser <span class="o">&lt;-</span> OptionParser<span class="p">(</span>option_list<span class="o">=</span>option_list<span class="p">,</span>
  130. usage<span class="o">=</span><span class="s">&quot;%prog [options] adapter.fasta READ1.fastq READ2.fastq OUTPUT_NAME&quot;</span><span class="p">)</span>
  131. opt <span class="o">&lt;-</span> parse_args<span class="p">(</span>option_parser<span class="p">,</span> positional_arguments<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  132. <span class="kr">return</span><span class="p">(</span>opt<span class="p">)</span>
  133. <span class="p">}</span>
  134. <span class="c1">## Call this here to handle --help quickly, before we waste 10 seconds</span>
  135. <span class="c1">## loading all the libraries</span>
  136. <span class="kp">invisible</span><span class="p">(</span>parse_arguments<span class="p">())</span>
  137. print.option.list <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>opt<span class="o">=</span>parse_arguments<span class="p">())</span> <span class="p">{</span>
  138. args <span class="o">&lt;-</span> opt<span class="o">$</span><span class="kp">args</span>
  139. opts <span class="o">&lt;-</span> opt<span class="o">$</span><span class="kp">options</span>
  140. <span class="kp">message</span><span class="p">(</span><span class="s">&quot;Options:&quot;</span><span class="p">)</span>
  141. foreach <span class="p">(</span>o<span class="o">=</span>opts<span class="p">,</span> n<span class="o">=</span><span class="kp">names</span><span class="p">(</span>opts<span class="p">))</span> <span class="o">%do%</span> <span class="p">{</span>
  142. <span class="kr">if</span> <span class="p">(</span>n <span class="o">!=</span> <span class="s">&quot;help&quot;</span><span class="p">)</span>
  143. <span class="kp">message</span><span class="p">(</span><span class="s">&quot; &quot;</span><span class="p">,</span> n<span class="p">,</span> <span class="s">&quot;: &quot;</span><span class="p">,</span> o<span class="p">)</span>
  144. <span class="p">}</span>
  145. <span class="kp">message</span><span class="p">(</span><span class="s">&quot;Args: &quot;</span><span class="p">,</span> <span class="kp">paste</span><span class="p">(</span><span class="s">&quot;\&quot;&quot;</span><span class="p">,</span> <span class="kp">args</span><span class="p">,</span> <span class="s">&quot;\&quot;&quot;</span><span class="p">,</span> sep<span class="o">=</span><span class="s">&quot;&quot;</span><span class="p">,</span> collapse<span class="o">=</span><span class="s">&quot;, &quot;</span><span class="p">))</span>
  146. <span class="p">}</span>
  147. unimplemented <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">()</span> <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;UNIMPLEMENTED&quot;</span><span class="p">)</span>
  148. <span class="c1">## Timestampped message</span>
  149. tsmsg <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span><span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  150. <span class="kp">message</span><span class="p">(</span><span class="s">&quot;# &quot;</span><span class="p">,</span> <span class="kp">date</span><span class="p">(),</span> <span class="s">&quot;: &quot;</span><span class="p">,</span> <span class="kc">...</span><span class="p">)</span>
  151. <span class="p">}</span>
  152. tsmsg<span class="p">(</span><span class="s">&quot;Starting deloxer and loading required packages&quot;</span><span class="p">)</span>
  153. <span class="kp">suppressMessages</span><span class="p">({</span>
  154. <span class="kn">library</span><span class="p">(</span>ShortRead<span class="p">)</span>
  155. <span class="kn">library</span><span class="p">(</span>optparse<span class="p">)</span>
  156. <span class="kn">library</span><span class="p">(</span>foreach<span class="p">)</span>
  157. <span class="kn">library</span><span class="p">(</span>iterators<span class="p">)</span>
  158. <span class="kn">library</span><span class="p">(</span>itertools<span class="p">)</span>
  159. <span class="kn">library</span><span class="p">(</span>doMC<span class="p">)</span>
  160. registerDoMC<span class="p">()</span>
  161. mcoptions <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span>preschedule<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> set.seed<span class="o">=</span><span class="kc">FALSE</span><span class="p">)</span>
  162. <span class="p">})</span>
  163. <span class="c1">## Merge l1 and l2 by names</span>
  164. merge.lists <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>l1<span class="p">,</span> l2<span class="p">)</span> <span class="p">{</span>
  165. new.names <span class="o">&lt;-</span> <span class="kp">setdiff</span><span class="p">(</span><span class="kp">names</span><span class="p">(</span>l2<span class="p">),</span> <span class="kp">names</span><span class="p">(</span>l1<span class="p">))</span>
  166. l1<span class="p">[</span>new.names<span class="p">]</span> <span class="o">&lt;-</span> l2<span class="p">[</span>new.names<span class="p">]</span>
  167. l1
  168. <span class="p">}</span>
  169. <span class="c1">## Return an object sans names</span>
  170. strip.names <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="p">{</span>
  171. <span class="kp">names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> <span class="kc">NULL</span>
  172. x
  173. <span class="p">}</span>
  174. <span class="c1">## Define some missing type coercions</span>
  175. setAs<span class="p">(</span>from<span class="o">=</span><span class="s">&quot;ShortRead&quot;</span><span class="p">,</span> to<span class="o">=</span><span class="s">&quot;DNAStringSet&quot;</span><span class="p">,</span> def<span class="o">=</span><span class="kr">function</span><span class="p">(</span>from<span class="p">)</span> sread<span class="p">(</span>from<span class="p">))</span>
  176. setAs<span class="p">(</span>from<span class="o">=</span><span class="s">&quot;PhredQuality&quot;</span><span class="p">,</span> to<span class="o">=</span><span class="s">&quot;FastqQuality&quot;</span><span class="p">,</span> def<span class="o">=</span><span class="kr">function</span><span class="p">(</span>from<span class="p">)</span> FastqQuality<span class="p">(</span>BStringSet<span class="p">(</span>from<span class="p">)))</span>
  177. setAs<span class="p">(</span>from<span class="o">=</span><span class="s">&quot;SolexaQuality&quot;</span><span class="p">,</span> to<span class="o">=</span><span class="s">&quot;SFastqQuality&quot;</span><span class="p">,</span> def<span class="o">=</span><span class="kr">function</span><span class="p">(</span>from<span class="p">)</span> SFastqQuality<span class="p">(</span>BStringSet<span class="p">(</span>from<span class="p">)))</span>
  178. setAs<span class="p">(</span>from<span class="o">=</span><span class="s">&quot;QualityScaledXStringSet&quot;</span><span class="p">,</span> to<span class="o">=</span><span class="s">&quot;ShortReadQ&quot;</span><span class="p">,</span> def<span class="o">=</span><span class="kr">function</span><span class="p">(</span>from<span class="p">)</span> <span class="p">{</span>
  179. q <span class="o">&lt;-</span> quality<span class="p">(</span>from<span class="p">)</span>
  180. new.quality.class <span class="o">&lt;-</span> <span class="kr">switch</span><span class="p">(</span><span class="kp">class</span><span class="p">(</span><span class="kp">q</span><span class="p">),</span>
  181. SolexaQuality<span class="o">=</span><span class="s">&quot;SFastqQuality&quot;</span><span class="p">,</span>
  182. PhredQuality<span class="o">=</span><span class="s">&quot;FastqQuality&quot;</span><span class="p">,</span>
  183. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Unknown quality type: &quot;</span><span class="p">,</span> <span class="kp">class</span><span class="p">(</span><span class="kp">q</span><span class="p">)))</span>
  184. q <span class="o">&lt;-</span> as<span class="p">(</span><span class="kp">q</span><span class="p">,</span> new.quality.class<span class="p">)</span>
  185. ShortReadQ<span class="p">(</span>sread<span class="o">=</span>as<span class="p">(</span>from<span class="p">,</span> <span class="s">&quot;DNAStringSet&quot;</span><span class="p">),</span>
  186. quality<span class="o">=</span><span class="kp">q</span><span class="p">,</span>
  187. id<span class="o">=</span>BStringSet<span class="p">(</span><span class="kp">names</span><span class="p">(</span>from<span class="p">)))</span>
  188. <span class="p">})</span>
  189. <span class="c1">## Override the provided method to keep the sequence names</span>
  190. setAs<span class="p">(</span>from<span class="o">=</span><span class="s">&quot;ShortReadQ&quot;</span><span class="p">,</span> to<span class="o">=</span><span class="s">&quot;QualityScaledDNAStringSet&quot;</span><span class="p">,</span>
  191. def<span class="o">=</span><span class="kr">function</span> <span class="p">(</span>from<span class="p">,</span> to <span class="o">=</span> <span class="s">&quot;QualityScaledDNAStringSet&quot;</span><span class="p">,</span> strict <span class="o">=</span> <span class="kc">TRUE</span><span class="p">)</span> <span class="p">{</span>
  192. q <span class="o">&lt;-</span> quality<span class="p">(</span>from<span class="p">)</span>
  193. new.quality.class <span class="o">&lt;-</span> <span class="kr">switch</span><span class="p">(</span><span class="kp">class</span><span class="p">(</span><span class="kp">q</span><span class="p">),</span>
  194. SFastqQuality<span class="o">=</span><span class="s">&quot;SolexaQuality&quot;</span><span class="p">,</span>
  195. FastqQuality<span class="o">=</span><span class="s">&quot;PhredQuality&quot;</span><span class="p">,</span>
  196. <span class="s">&quot;XStringQuality&quot;</span><span class="p">)</span>
  197. q <span class="o">&lt;-</span> as<span class="p">(</span><span class="kp">q</span><span class="p">,</span> new.quality.class<span class="p">)</span>
  198. x <span class="o">&lt;-</span> QualityScaledDNAStringSet<span class="p">(</span>sread<span class="p">(</span>from<span class="p">),</span> <span class="kp">q</span><span class="p">)</span>
  199. <span class="kp">names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">as.character</span><span class="p">(</span>id<span class="p">(</span>from<span class="p">))</span>
  200. x
  201. <span class="p">})</span>
  202. <span class="c1">## Define functions for reading fastq into standard Biostrings object</span>
  203. <span class="c1">## and writing it back out. The standard functions readFastq and</span>
  204. <span class="c1">## writeFastq operate on ShortRead objects. These simply wrap them in</span>
  205. <span class="c1">## conversion to/from QualityScaledDNAStringSet.</span>
  206. read.QualityScaledDNAStringSet <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>filepath<span class="p">,</span> format <span class="o">=</span> <span class="s">&quot;fastq&quot;</span><span class="p">,</span> <span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  207. <span class="kr">switch</span><span class="p">(</span><span class="kp">format</span><span class="p">,</span>
  208. fastq<span class="o">=</span>as<span class="p">(</span>readFastq<span class="p">(</span>filepath<span class="p">,</span> withIds<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> <span class="kc">...</span><span class="p">),</span> <span class="s">&quot;QualityScaledDNAStringSet&quot;</span><span class="p">),</span>
  209. <span class="c1">## Default</span>
  210. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Unknown quality-scaled sequence format: &quot;</span><span class="p">,</span> <span class="kp">format</span><span class="p">))</span>
  211. <span class="p">}</span>
  212. write.QualityScaledDNAStringSet <span class="o">&lt;-</span> <span class="kr">function</span> <span class="p">(</span>x<span class="p">,</span> filepath<span class="p">,</span> append <span class="o">=</span> <span class="kc">FALSE</span><span class="p">,</span> format <span class="o">=</span> <span class="s">&quot;fastq&quot;</span><span class="p">)</span> <span class="p">{</span>
  213. <span class="kr">if</span><span class="p">(</span><span class="kp">length</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&gt;</span> <span class="m">0</span><span class="p">)</span> <span class="p">{</span>
  214. sr <span class="o">&lt;-</span> as<span class="p">(</span>x<span class="p">,</span> <span class="s">&quot;ShortReadQ&quot;</span><span class="p">)</span>
  215. <span class="kr">switch</span><span class="p">(</span><span class="kp">format</span><span class="p">,</span>
  216. fastq<span class="o">=</span><span class="p">{</span>
  217. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span><span class="kp">append</span><span class="p">)</span>
  218. <span class="kp">unlink</span><span class="p">(</span>filepath<span class="p">);</span>
  219. writeFastq<span class="p">(</span>object<span class="o">=</span>sr<span class="p">,</span>
  220. file<span class="o">=</span>filepath<span class="p">,</span> mode<span class="o">=</span><span class="kp">ifelse</span><span class="p">(</span><span class="kp">append</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;w&quot;</span><span class="p">))</span>
  221. <span class="p">},</span>
  222. <span class="c1">## Default</span>
  223. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Unknown quality-scaled sequence format: &quot;</span><span class="p">,</span> <span class="kp">format</span><span class="p">))</span>
  224. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  225. <span class="c1">## Zero-length sequence; just truncate/touch the file</span>
  226. <span class="kp">sink</span><span class="p">(</span>file<span class="o">=</span>filepath<span class="p">,</span> append<span class="o">=</span><span class="kp">append</span><span class="p">)</span>
  227. <span class="kp">sink</span><span class="p">()</span>
  228. <span class="p">}</span>
  229. <span class="p">}</span>
  230. discard.short.reads <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>reads<span class="p">,</span> min.length<span class="o">=</span><span class="m">1</span><span class="p">)</span> <span class="p">{</span>
  231. kept.reads <span class="o">&lt;-</span> reads<span class="p">[</span>width<span class="p">(</span>reads<span class="p">)</span> <span class="o">&gt;=</span> min.length<span class="p">]</span>
  232. <span class="kr">return</span><span class="p">(</span>kept.reads<span class="p">)</span>
  233. <span class="p">}</span>
  234. <span class="c1">## Takes a set of interleaved reads (or anything else) and</span>
  235. <span class="c1">## de-interleaves them</span>
  236. deinterleave.pairs <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>reads<span class="p">)</span> <span class="p">{</span>
  237. <span class="kp">stopifnot</span><span class="p">(</span><span class="kp">length</span><span class="p">(</span>reads<span class="p">)</span> <span class="o">%%</span> <span class="m">2</span> <span class="o">==</span> <span class="m">0</span><span class="p">)</span>
  238. mask <span class="o">&lt;-</span> <span class="kp">seq</span><span class="p">(</span>from<span class="o">=</span><span class="m">1</span><span class="p">,</span> to<span class="o">=</span><span class="kp">length</span><span class="p">(</span>reads<span class="p">),</span> by<span class="o">=</span><span class="m">2</span><span class="p">)</span>
  239. <span class="kr">return</span><span class="p">(</span><span class="kt">list</span><span class="p">(</span>read1<span class="o">=</span>reads<span class="p">[</span>mask<span class="p">],</span> read2<span class="o">=</span>reads<span class="p">[</span><span class="o">-</span>mask<span class="p">]))</span>
  240. <span class="p">}</span>
  241. <span class="m">.</span>delox.trimmed.ranges <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>subj<span class="p">,</span> reads<span class="p">,</span> min.length<span class="o">=</span><span class="m">36</span><span class="p">,</span>
  242. include.scores<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span>
  243. include.deleted.ranges<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span>
  244. align.opts<span class="o">=</span><span class="kt">list</span><span class="p">())</span> <span class="p">{</span>
  245. align.opts <span class="o">&lt;-</span> merge.lists<span class="p">(</span>align.opts<span class="p">,</span> default.align.opts<span class="p">)</span>
  246. aln <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span>forward<span class="o">=</span>pairwiseAlignment<span class="p">(</span>pattern<span class="o">=</span>reads<span class="p">,</span>
  247. subject<span class="o">=</span>subj<span class="p">,</span>
  248. type<span class="o">=</span><span class="s">&quot;overlap&quot;</span><span class="p">,</span>
  249. substitutionMatrix<span class="o">=</span>nucleotideSubstitutionMatrix<span class="p">(</span>match <span class="o">=</span> align.opts<span class="o">$</span><span class="kp">match</span><span class="p">,</span> mismatch <span class="o">=</span> <span class="o">-</span>align.opts<span class="o">$</span>mismatch<span class="p">),</span>
  250. gapOpening<span class="o">=-</span>align.opts<span class="o">$</span>gapOpening<span class="p">,</span> gapExtension<span class="o">=-</span>align.opts<span class="o">$</span>gapExtension<span class="p">),</span>
  251. revcomp<span class="o">=</span>pairwiseAlignment<span class="p">(</span>pattern<span class="o">=</span>reads<span class="p">,</span>
  252. subject<span class="o">=</span>reverseComplement<span class="p">(</span>DNAString<span class="p">(</span>subj<span class="p">)),</span>
  253. type<span class="o">=</span><span class="s">&quot;overlap&quot;</span><span class="p">,</span>
  254. substitutionMatrix<span class="o">=</span>nucleotideSubstitutionMatrix<span class="p">(</span>match <span class="o">=</span> align.opts<span class="o">$</span><span class="kp">match</span><span class="p">,</span> mismatch <span class="o">=</span> <span class="o">-</span>align.opts<span class="o">$</span>mismatch<span class="p">),</span>
  255. gapOpening<span class="o">=-</span>align.opts<span class="o">$</span>gapOpening<span class="p">,</span> gapExtension<span class="o">=-</span>align.opts<span class="o">$</span>gapExtension<span class="p">))</span>
  256. aln.scores <span class="o">&lt;-</span> <span class="kp">Map</span><span class="p">(</span>score<span class="p">,</span> aln<span class="p">)</span>
  257. aln.pat <span class="o">&lt;-</span> <span class="kp">Map</span><span class="p">(</span>pattern<span class="p">,</span> aln<span class="p">)</span>
  258. aln.ranges <span class="o">&lt;-</span> <span class="kp">Map</span><span class="p">(</span><span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> IRanges<span class="p">(</span>start<span class="o">=</span>start<span class="p">(</span>x<span class="p">),</span> end<span class="o">=</span>end<span class="p">(</span>x<span class="p">)),</span> aln.pat<span class="p">)</span>
  259. aln.threebands <span class="o">&lt;-</span> <span class="kp">Map</span><span class="p">(</span><span class="kr">function</span> <span class="p">(</span>x<span class="p">)</span> threebands<span class="p">(</span>IRanges<span class="p">(</span>start<span class="o">=</span><span class="m">1</span><span class="p">,</span> end<span class="o">=</span>width<span class="p">(</span>reads<span class="p">)),</span>
  260. start<span class="o">=</span>start<span class="p">(</span>x<span class="p">),</span> end<span class="o">=</span>end<span class="p">(</span>x<span class="p">)),</span>
  261. aln.ranges<span class="p">)</span>
  262. <span class="c1">## For each read, decide whether the forward or reverse alignment</span>
  263. <span class="c1">## was better.</span>
  264. revcomp.better <span class="o">&lt;-</span> aln.scores<span class="o">$</span>forward <span class="o">&lt;</span> aln.scores<span class="o">$</span>revcomp
  265. <span class="c1">## For each read, take the threebands for the better alignment.</span>
  266. best.threebands <span class="o">&lt;-</span> aln.threebands<span class="o">$</span>forward
  267. <span class="kr">for</span> <span class="p">(</span>band <span class="kr">in</span> <span class="kp">names</span><span class="p">(</span>best.threebands<span class="p">))</span> <span class="p">{</span>
  268. best.threebands<span class="p">[[</span>band<span class="p">]][</span>revcomp.better<span class="p">]</span> <span class="o">&lt;-</span> aln.threebands<span class="o">$</span>revcomp<span class="p">[[</span>band<span class="p">]][</span>revcomp.better<span class="p">]</span>
  269. <span class="p">}</span>
  270. <span class="c1">## Use the left band if it is longer than either min.length or</span>
  271. <span class="c1">## length of right band.</span>
  272. use.right.band <span class="o">&lt;-</span> width<span class="p">(</span>best.threebands<span class="o">$</span>left<span class="p">)</span> <span class="o">&lt;</span> <span class="kp">pmin</span><span class="p">(</span>min.length<span class="p">,</span> width<span class="p">(</span>best.threebands<span class="o">$</span>right<span class="p">))</span>
  273. ranges <span class="o">&lt;-</span> best.threebands<span class="o">$</span>left
  274. ranges<span class="p">[</span>use.right.band<span class="p">]</span> <span class="o">&lt;-</span> best.threebands<span class="o">$</span>right<span class="p">[</span>use.right.band<span class="p">]</span>
  275. <span class="c1">## Record which ranges are shorter than min.length</span>
  276. too.short <span class="o">&lt;-</span> width<span class="p">(</span>ranges<span class="p">)</span> <span class="o">&lt;</span> min.length
  277. <span class="c1">## ranges[too.short] &lt;- IRanges(start=1,end=0)</span>
  278. <span class="c1">## Record what was trimmed off of each read (NOT what was kept!)</span>
  279. trim <span class="o">&lt;-</span> <span class="kp">factor</span><span class="p">(</span><span class="kp">ifelse</span><span class="p">(</span>use.right.band<span class="p">,</span> <span class="s">&quot;left&quot;</span><span class="p">,</span> <span class="s">&quot;right&quot;</span><span class="p">),</span> levels<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;right&quot;</span><span class="p">,</span> <span class="s">&quot;left&quot;</span><span class="p">,</span> <span class="s">&quot;all&quot;</span><span class="p">,</span> <span class="s">&quot;none&quot;</span><span class="p">))</span>
  280. <span class="c1">## If it&#39;s too short, then we trim &quot;all&quot;, i.e. discard the whole</span>
  281. <span class="c1">## read.</span>
  282. trim<span class="p">[</span>too.short<span class="p">]</span> <span class="o">&lt;-</span> <span class="s">&quot;all&quot;</span>
  283. <span class="c1">## If the read is not shorter after trimming, then nothing was</span>
  284. <span class="c1">## actually trimmed.</span>
  285. trim<span class="p">[</span>width<span class="p">(</span>ranges<span class="p">)</span> <span class="o">==</span> width<span class="p">(</span>reads<span class="p">)]</span> <span class="o">&lt;-</span> <span class="s">&quot;none&quot;</span>
  286. emeta <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">()</span>
  287. emeta<span class="o">$</span>trim <span class="o">&lt;-</span> trim
  288. <span class="kr">if</span> <span class="p">(</span>include.deleted.ranges<span class="p">)</span> <span class="p">{</span>
  289. deleted.start <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span>too.short<span class="p">,</span> <span class="m">1</span><span class="p">,</span>
  290. <span class="kp">ifelse</span><span class="p">(</span>use.right.band<span class="p">,</span>
  291. start<span class="p">(</span>best.threebands<span class="o">$</span>left<span class="p">),</span>
  292. start<span class="p">(</span>best.threebands<span class="o">$</span>middle<span class="p">)))</span>
  293. deleted.end <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span>too.short<span class="p">,</span> width<span class="p">(</span>reads<span class="p">),</span>
  294. <span class="kp">ifelse</span><span class="p">(</span>use.right.band<span class="p">,</span>
  295. end<span class="p">(</span>best.threebands<span class="o">$</span>middle<span class="p">),</span>
  296. end<span class="p">(</span>best.threebands<span class="o">$</span>right<span class="p">)))</span>
  297. emeta<span class="o">$</span>deleted.range <span class="o">&lt;-</span> IRanges<span class="p">(</span>deleted.start<span class="p">,</span> deleted.end<span class="p">)</span>
  298. <span class="p">}</span>
  299. <span class="kr">if</span> <span class="p">(</span>include.scores<span class="p">)</span> <span class="p">{</span>
  300. <span class="c1">## If requested, take the best score out of each pair of forward</span>
  301. <span class="c1">## and reverse scores.</span>
  302. scores <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span>revcomp.better<span class="p">,</span> aln.scores<span class="o">$</span>revcomp<span class="p">,</span> aln.scores<span class="o">$</span>forward<span class="p">)</span>
  303. emeta<span class="o">$</span>score <span class="o">&lt;-</span> scores
  304. <span class="p">}</span>
  305. mcols<span class="p">(</span>ranges<span class="p">)</span> <span class="o">&lt;-</span> DataFrame<span class="p">(</span>emeta<span class="p">)</span>
  306. <span class="kr">return</span><span class="p">(</span>ranges<span class="p">)</span>
  307. <span class="p">}</span>
  308. <span class="c1">## Always call delox on the underlying DNAStringSet object when called</span>
  309. <span class="c1">## on something more complicated.</span>
  310. <span class="kp">suppressMessages</span><span class="p">({</span>
  311. <span class="kp">invisible</span><span class="p">(</span>setMethod<span class="p">(</span><span class="s">&quot;.delox.trimmed.ranges&quot;</span><span class="p">,</span> signature<span class="o">=</span><span class="kt">c</span><span class="p">(</span>reads<span class="o">=</span><span class="s">&quot;ShortRead&quot;</span><span class="p">),</span>
  312. <span class="kr">function</span> <span class="p">(</span>subj<span class="p">,</span> reads<span class="p">,</span> min.length<span class="p">,</span> include.scores<span class="p">,</span> include.deleted.ranges<span class="p">,</span> align.opts<span class="p">)</span> <span class="p">{</span>
  313. callGeneric<span class="p">(</span>subj<span class="p">,</span> as<span class="p">(</span>reads<span class="p">,</span> <span class="s">&quot;DNAStringSet&quot;</span><span class="p">),</span> min.length<span class="p">,</span> include.scores<span class="p">,</span> include.deleted.ranges<span class="p">,</span> align.opts<span class="p">)</span>
  314. <span class="p">}))</span>
  315. <span class="kp">invisible</span><span class="p">(</span>setMethod<span class="p">(</span><span class="s">&quot;.delox.trimmed.ranges&quot;</span><span class="p">,</span> signature<span class="o">=</span><span class="kt">c</span><span class="p">(</span>reads<span class="o">=</span><span class="s">&quot;QualityScaledDNAStringSet&quot;</span><span class="p">),</span>
  316. <span class="kr">function</span> <span class="p">(</span>subj<span class="p">,</span> reads<span class="p">,</span> min.length<span class="p">,</span> include.scores<span class="p">,</span> include.deleted.ranges<span class="p">,</span> align.opts<span class="p">)</span> <span class="p">{</span>
  317. callGeneric<span class="p">(</span>subj<span class="p">,</span> as<span class="p">(</span>reads<span class="p">,</span> <span class="s">&quot;DNAStringSet&quot;</span><span class="p">),</span> min.length<span class="p">,</span> include.scores<span class="p">,</span> include.deleted.ranges<span class="p">,</span> align.opts<span class="p">)</span>
  318. <span class="p">}))</span>
  319. <span class="kp">invisible</span><span class="p">(</span>setMethod<span class="p">(</span><span class="s">&quot;.delox.trimmed.ranges&quot;</span><span class="p">,</span> signature<span class="o">=</span><span class="kt">c</span><span class="p">(</span>reads<span class="o">=</span><span class="s">&quot;QualityScaledXStringSet&quot;</span><span class="p">),</span>
  320. <span class="kr">function</span> <span class="p">(</span>subj<span class="p">,</span> reads<span class="p">,</span> min.length<span class="p">,</span> include.scores<span class="p">,</span> include.deleted.ranges<span class="p">,</span> align.opts<span class="p">)</span> <span class="p">{</span>
  321. callGeneric<span class="p">(</span>subj<span class="p">,</span> as<span class="p">(</span>reads<span class="p">,</span> <span class="s">&quot;XStringSet&quot;</span><span class="p">),</span> min.length<span class="p">,</span> include.scores<span class="p">,</span> include.deleted.ranges<span class="p">,</span> align.opts<span class="p">)</span>
  322. <span class="p">}))</span>
  323. <span class="p">})</span>
  324. delox.single <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>subj<span class="p">,</span> reads <span class="p">,</span> min.length<span class="o">=</span><span class="m">36</span><span class="p">,</span>
  325. include.scores<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> align.opts<span class="o">=</span><span class="kt">list</span><span class="p">())</span> <span class="p">{</span>
  326. tsmsg<span class="p">(</span><span class="s">&quot;Saving read names&quot;</span><span class="p">)</span>
  327. saved.names <span class="o">&lt;-</span> BStringSet<span class="p">(</span><span class="kp">names</span><span class="p">(</span>reads<span class="p">))</span>
  328. reads <span class="o">&lt;-</span> strip.names<span class="p">(</span>reads<span class="p">)</span>
  329. <span class="kp">invisible</span><span class="p">(</span><span class="kp">gc</span><span class="p">())</span>
  330. tsmsg<span class="p">(</span><span class="s">&quot;Doing alignments&quot;</span><span class="p">)</span>
  331. nchunks <span class="o">&lt;-</span> <span class="kp">min</span><span class="p">(</span>getDoParWorkers<span class="p">(),</span> <span class="kp">ceiling</span><span class="p">(</span><span class="kp">length</span><span class="p">(</span>reads<span class="p">)</span><span class="o">/</span><span class="m">1000</span><span class="p">))</span>
  332. deloxed.ranges <span class="o">&lt;-</span> foreach<span class="p">(</span>reads<span class="o">=</span>isplitVector<span class="p">(</span>reads<span class="p">,</span> chunks<span class="o">=</span>nchunks<span class="p">),</span> <span class="m">.</span>combine<span class="o">=</span><span class="kt">c</span><span class="p">)</span> <span class="o">%dopar%</span> <span class="p">{</span>
  333. <span class="m">.</span>delox.trimmed.ranges<span class="p">(</span>reads<span class="o">=</span>reads<span class="p">,</span> subj<span class="o">=</span>subj<span class="p">,</span> min.length<span class="o">=</span>min.length<span class="p">,</span>
  334. include.scores<span class="o">=</span>include.scores<span class="p">,</span>
  335. include.deleted.ranges<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span>
  336. align.opts<span class="o">=</span>align.opts<span class="p">)</span>
  337. <span class="p">}</span>
  338. <span class="c1">## maybe.chunkapply(.delox.trimmed.ranges,</span>
  339. <span class="c1">## VECTOR.ARGS=list(reads=reads),</span>
  340. <span class="c1">## SCALAR.ARGS=list(subj=subj, min.length=min.length,</span>
  341. <span class="c1">## include.scores=include.scores,</span>
  342. <span class="c1">## include.deleted.ranges=FALSE,</span>
  343. <span class="c1">## align.opts=align.opts),</span>
  344. <span class="c1">## min.chunk.size=1000,</span>
  345. <span class="c1">## MERGE=c)</span>
  346. tsmsg<span class="p">(</span><span class="s">&quot;Trimming reads&quot;</span><span class="p">)</span>
  347. trimmed.reads <span class="o">&lt;-</span> narrow<span class="p">(</span>reads<span class="p">,</span> start<span class="p">(</span>deloxed.ranges<span class="p">),</span> end<span class="p">(</span>deloxed.ranges<span class="p">))</span>
  348. tsmsg<span class="p">(</span><span class="s">&quot;Restoring read names&quot;</span><span class="p">)</span>
  349. <span class="kp">names</span><span class="p">(</span>trimmed.reads<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">as.character</span><span class="p">(</span>saved.names<span class="p">)</span>
  350. tsmsg<span class="p">(</span><span class="s">&quot;Adding metadata&quot;</span><span class="p">)</span>
  351. emeta <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">()</span>
  352. <span class="kr">if</span> <span class="p">(</span>include.scores<span class="p">)</span> <span class="p">{</span>
  353. emeta<span class="o">$</span>score <span class="o">&lt;-</span> mcols<span class="p">(</span>deloxed.ranges<span class="p">)</span><span class="o">$</span>score
  354. <span class="p">}</span>
  355. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>emeta<span class="p">)</span> <span class="o">&gt;</span> <span class="m">0</span><span class="p">)</span> <span class="p">{</span>
  356. mcols<span class="p">(</span>trimmed.reads<span class="p">)</span> <span class="o">&lt;-</span> DataFrame<span class="p">(</span>emeta<span class="p">)</span>
  357. <span class="p">}</span>
  358. <span class="kr">return</span><span class="p">(</span>discard.short.reads<span class="p">(</span>trimmed.reads<span class="p">,</span> min.length<span class="p">))</span>
  359. <span class="p">}</span>
  360. delox.paired <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>subj<span class="p">,</span> read1<span class="p">,</span> read2<span class="p">,</span>
  361. min.call<span class="o">=</span><span class="m">10</span><span class="p">,</span> min.length<span class="o">=</span><span class="m">36</span><span class="p">,</span>
  362. include.scores<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> align.opts<span class="o">=</span><span class="kt">list</span><span class="p">())</span> <span class="p">{</span>
  363. align.opts <span class="o">&lt;-</span> merge.lists<span class="p">(</span>align.opts<span class="p">,</span> default.align.opts<span class="p">)</span>
  364. tsmsg<span class="p">(</span><span class="s">&quot;Checking read counts&quot;</span><span class="p">)</span>
  365. <span class="kp">stopifnot</span><span class="p">(</span><span class="kp">length</span><span class="p">(</span>read1<span class="p">)</span> <span class="o">==</span> <span class="kp">length</span><span class="p">(</span>read2<span class="p">))</span>
  366. tsmsg<span class="p">(</span><span class="s">&quot;Listing reads&quot;</span><span class="p">)</span>
  367. original.reads <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span>read1<span class="o">=</span>read1<span class="p">,</span>
  368. read2<span class="o">=</span>read2<span class="p">)</span>
  369. <span class="kp">rm</span><span class="p">(</span>read1<span class="p">,</span> read2<span class="p">)</span>
  370. tsmsg<span class="p">(</span><span class="s">&quot;Saving read names&quot;</span><span class="p">)</span>
  371. read.names <span class="o">&lt;-</span> foreach<span class="p">(</span>r<span class="o">=</span>original.reads<span class="p">)</span> <span class="o">%do%</span> BStringSet<span class="p">(</span><span class="kp">names</span><span class="p">(</span>r<span class="p">))</span>
  372. <span class="kp">names</span><span class="p">(</span>read.names<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">names</span><span class="p">(</span>original.reads<span class="p">)</span>
  373. original.reads <span class="o">&lt;-</span> <span class="kp">Map</span><span class="p">(</span>strip.names<span class="p">,</span> original.reads<span class="p">)</span>
  374. <span class="kp">invisible</span><span class="p">(</span><span class="kp">gc</span><span class="p">())</span>
  375. tsmsg<span class="p">(</span><span class="s">&quot;Doing alignments&quot;</span><span class="p">)</span>
  376. deloxed.ranges <span class="o">&lt;-</span> <span class="kp">lapply</span><span class="p">(</span>original.reads<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="p">{</span>
  377. nchunks <span class="o">&lt;-</span> <span class="kp">min</span><span class="p">(</span>getDoParWorkers<span class="p">(),</span> <span class="kp">ceiling</span><span class="p">(</span><span class="kp">length</span><span class="p">(</span>x<span class="p">)</span><span class="o">/</span><span class="m">1000</span><span class="p">))</span>
  378. foreach<span class="p">(</span>reads<span class="o">=</span>isplitVector<span class="p">(</span>x<span class="p">,</span> chunks<span class="o">=</span>nchunks<span class="p">),</span> <span class="m">.</span>combine<span class="o">=</span><span class="kt">c</span><span class="p">)</span> <span class="o">%dopar%</span> <span class="p">{</span>
  379. <span class="m">.</span>delox.trimmed.ranges<span class="p">(</span>reads<span class="o">=</span>reads<span class="p">,</span> subj<span class="o">=</span>subj<span class="p">,</span> min.length<span class="o">=</span>min.length<span class="p">,</span>
  380. include.scores<span class="o">=</span>include.scores<span class="p">,</span>
  381. include.deleted.ranges<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span>
  382. align.opts<span class="o">=</span>align.opts<span class="p">)</span>
  383. <span class="p">}</span>
  384. <span class="c1">## maybe.chunkapply(.delox.trimmed.ranges,</span>
  385. <span class="c1">## VECTOR.ARGS=list(reads=strip.names(x)),</span>
  386. <span class="c1">## SCALAR.ARGS=list(subj=subj,</span>
  387. <span class="c1">## min.length=min.length,</span>
  388. <span class="c1">## include.scores=TRUE,</span>
  389. <span class="c1">## include.deleted.ranges=TRUE,</span>
  390. <span class="c1">## align.opts=align.opts),</span>
  391. <span class="c1">## MERGE=c,</span>
  392. <span class="c1">## min.chunk.size=1000)</span>
  393. <span class="p">})</span>
  394. tsmsg<span class="p">(</span><span class="s">&quot;Extracting metadata&quot;</span><span class="p">)</span>
  395. delox.meta <span class="o">&lt;-</span> <span class="kp">lapply</span><span class="p">(</span>deloxed.ranges<span class="p">,</span> mcols<span class="p">)</span>
  396. <span class="c1">## Decide whether enough was trimmed on the inside (right end) of</span>
  397. <span class="c1">## either read to call it a mate-pair.</span>
  398. tsmsg<span class="p">(</span><span class="s">&quot;Calculating inside trim score&quot;</span><span class="p">)</span>
  399. inside.trim.score <span class="o">&lt;-</span> <span class="kp">Reduce</span><span class="p">(</span><span class="kp">pmax</span><span class="p">,</span>
  400. <span class="kp">lapply</span><span class="p">(</span>delox.meta<span class="p">,</span>
  401. <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="kp">ifelse</span><span class="p">(</span>x<span class="o">$</span>trim <span class="o">==</span> <span class="s">&quot;right&quot;</span><span class="p">,</span> x<span class="o">$</span>score<span class="p">,</span> <span class="m">0</span><span class="p">)))</span>
  402. <span class="c1">## Decide whether enough was trimmed on the outside (left end) of</span>
  403. <span class="c1">## either read to call it a non-mate-pair.</span>
  404. tsmsg<span class="p">(</span><span class="s">&quot;Calculating outside trim score&quot;</span><span class="p">)</span>
  405. outside.trim.score <span class="o">&lt;-</span> <span class="kp">Reduce</span><span class="p">(</span><span class="kp">pmax</span><span class="p">,</span>
  406. <span class="kp">lapply</span><span class="p">(</span>delox.meta<span class="p">,</span>
  407. <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="kp">ifelse</span><span class="p">(</span>x<span class="o">$</span>trim <span class="o">==</span> <span class="s">&quot;left&quot;</span><span class="p">,</span> x<span class="o">$</span>score<span class="p">,</span> <span class="m">0</span><span class="p">)))</span>
  408. tsmsg<span class="p">(</span><span class="s">&quot;Calling presence of subject&quot;</span><span class="p">)</span>
  409. calls <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span>inside<span class="o">=</span>inside.trim.score <span class="o">&gt;=</span> min.call <span class="o">*</span> align.opts<span class="o">$</span><span class="kp">match</span><span class="p">,</span>
  410. outside<span class="o">=</span>outside.trim.score <span class="o">&gt;=</span> min.call <span class="o">*</span> align.opts<span class="o">$</span><span class="kp">match</span><span class="p">)</span>
  411. tsmsg<span class="p">(</span><span class="s">&quot;Categorizing reads&quot;</span><span class="p">)</span>
  412. <span class="kt">category</span> <span class="o">&lt;-</span> <span class="kp">factor</span><span class="p">(</span><span class="kp">rep</span><span class="p">(</span><span class="kc">NA</span><span class="p">,</span> <span class="kp">length</span><span class="p">(</span>original.reads<span class="o">$</span>read1<span class="p">)),</span> levels<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;mate&quot;</span><span class="p">,</span> <span class="s">&quot;non-mate&quot;</span><span class="p">,</span> <span class="s">&quot;negative&quot;</span><span class="p">,</span> <span class="s">&quot;unpaired&quot;</span><span class="p">,</span> <span class="s">&quot;discard&quot;</span><span class="p">))</span>
  413. <span class="kp">category</span><span class="p">[</span>calls<span class="o">$</span>inside<span class="p">]</span> <span class="o">&lt;-</span> <span class="s">&quot;mate&quot;</span>
  414. <span class="kp">category</span><span class="p">[</span>calls<span class="o">$</span>outside<span class="p">]</span> <span class="o">&lt;-</span> <span class="s">&quot;non-mate&quot;</span>
  415. <span class="c1">## If they&#39;re either both true or both false, then it&#39;s ambiguous</span>
  416. <span class="kp">category</span><span class="p">[</span>calls<span class="o">$</span>inside <span class="o">==</span> calls<span class="o">$</span>outside<span class="p">]</span> <span class="o">&lt;-</span> <span class="s">&quot;negative&quot;</span>
  417. <span class="c1">## All categories should be filled in now</span>
  418. <span class="kp">stopifnot</span><span class="p">(</span><span class="kp">all</span><span class="p">(</span><span class="o">!</span><span class="kp">is.na</span><span class="p">(</span><span class="kp">category</span><span class="p">)))</span>
  419. too.short <span class="o">&lt;-</span> <span class="kp">lapply</span><span class="p">(</span>deloxed.ranges<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> width<span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;</span> min.length<span class="p">)</span>
  420. <span class="c1">## If either read in a pair is too short, then its partner is no</span>
  421. <span class="c1">## longer paired at all.</span>
  422. one.too.short <span class="o">&lt;-</span> <span class="kp">Reduce</span><span class="p">(</span><span class="sb">`|`</span><span class="p">,</span> too.short<span class="p">)</span>
  423. <span class="kp">category</span><span class="p">[</span>one.too.short<span class="p">]</span> <span class="o">&lt;-</span> <span class="s">&quot;unpaired&quot;</span>
  424. <span class="c1">## If both reads in a pair are too short, then the entire pair is</span>
  425. <span class="c1">## discarded. This is highly unlikely, since Cre-Lox should not</span>
  426. <span class="c1">## appear in the middle of both sequences.</span>
  427. both.too.short <span class="o">&lt;-</span> <span class="kp">Reduce</span><span class="p">(</span><span class="sb">`&amp;`</span><span class="p">,</span> too.short<span class="p">)</span>
  428. <span class="kp">category</span><span class="p">[</span>both.too.short<span class="p">]</span> <span class="o">&lt;-</span> <span class="s">&quot;discard&quot;</span>
  429. tsmsg<span class="p">(</span><span class="s">&quot;Trimming reads and restoring read names&quot;</span><span class="p">)</span>
  430. trimmed.reads <span class="o">&lt;-</span> <span class="kp">lapply</span><span class="p">(</span><span class="kp">names</span><span class="p">(</span>original.reads<span class="p">),</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="p">{</span>
  431. trimmed <span class="o">&lt;-</span> narrow<span class="p">(</span>original.reads<span class="p">[[</span>x<span class="p">]],</span>
  432. start<span class="o">=</span>start<span class="p">(</span>deloxed.ranges<span class="p">[[</span>x<span class="p">]]),</span>
  433. end<span class="o">=</span>end<span class="p">(</span>deloxed.ranges<span class="p">[[</span>x<span class="p">]]))</span>
  434. <span class="kp">names</span><span class="p">(</span>trimmed<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">as.character</span><span class="p">(</span>read.names<span class="p">[[</span>x<span class="p">]])</span>
  435. trimmed
  436. <span class="p">})</span>
  437. <span class="kp">names</span><span class="p">(</span>trimmed.reads<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">names</span><span class="p">(</span>original.reads<span class="p">)</span>
  438. tsmsg<span class="p">(</span><span class="s">&quot;Assembling metadata&quot;</span><span class="p">)</span>
  439. foreach <span class="p">(</span>r<span class="o">=</span><span class="kp">names</span><span class="p">(</span>trimmed.reads<span class="p">))</span> <span class="o">%do%</span> <span class="p">{</span>
  440. emeta <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">()</span>
  441. emeta<span class="o">$</span><span class="kt">category</span> <span class="o">&lt;-</span> <span class="kp">category</span>
  442. emeta<span class="o">$</span><span class="kp">category</span><span class="p">[</span>too.short<span class="p">[[</span>r<span class="p">]]]</span> <span class="o">&lt;-</span> <span class="s">&quot;discard&quot;</span>
  443. <span class="kr">if</span> <span class="p">(</span>include.scores<span class="p">)</span> <span class="p">{</span>
  444. emeta<span class="o">$</span>score <span class="o">&lt;-</span> delox.meta<span class="p">[[</span>r<span class="p">]]</span><span class="o">$</span>score
  445. <span class="p">}</span>
  446. mcols<span class="p">(</span>trimmed.reads<span class="p">[[</span>r<span class="p">]])</span> <span class="o">&lt;-</span> DataFrame<span class="p">(</span>emeta<span class="p">)</span>
  447. <span class="p">}</span>
  448. <span class="kr">return</span><span class="p">(</span>trimmed.reads<span class="p">)</span>
  449. <span class="p">}</span>
  450. <span class="c1">## Wrapper for both single and paired as appropriate</span>
  451. delox <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>subj<span class="p">,</span> read1<span class="p">,</span> read2<span class="o">=</span><span class="kc">NULL</span><span class="p">,</span>
  452. min.call<span class="o">=</span><span class="m">10</span><span class="p">,</span> min.length<span class="o">=</span><span class="m">36</span><span class="p">,</span>
  453. interleaved<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span>
  454. read1.orientation<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;in&quot;</span><span class="p">,</span> <span class="s">&quot;out&quot;</span><span class="p">)[</span><span class="m">1</span><span class="p">],</span>
  455. read2.orientation<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;in&quot;</span><span class="p">,</span> <span class="s">&quot;out&quot;</span><span class="p">)[</span><span class="m">1</span><span class="p">],</span>
  456. align.opts<span class="o">=</span><span class="kt">list</span><span class="p">())</span> <span class="p">{</span>
  457. <span class="kr">if</span> <span class="p">(</span><span class="kp">is.null</span><span class="p">(</span>read2<span class="p">))</span> <span class="p">{</span>
  458. <span class="kr">if</span> <span class="p">(</span>interleaved<span class="p">)</span> <span class="p">{</span>
  459. x <span class="o">&lt;-</span> deinterleave.pairs<span class="p">(</span>read1<span class="p">)</span>
  460. read1 <span class="o">&lt;-</span> x<span class="o">$</span>read1
  461. read2 <span class="o">&lt;-</span> x<span class="o">$</span>read2
  462. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  463. tsmsg<span class="p">(</span><span class="s">&quot;Doing single-read delox&quot;</span><span class="p">)</span>
  464. <span class="kr">return</span><span class="p">(</span>delox.single<span class="p">(</span>subj<span class="o">=</span>subj<span class="p">,</span> reads<span class="o">=</span>read1<span class="p">,</span> min.length<span class="o">=</span>min.length<span class="p">,</span> align.opts<span class="o">=</span>align.opts<span class="p">))</span>
  465. <span class="p">}</span>
  466. <span class="p">}</span>
  467. <span class="c1">## Make sure both reads are oriented &quot;in&quot; before calling</span>
  468. tsmsg<span class="p">(</span><span class="s">&quot;Ensuring correct read orientation&quot;</span><span class="p">)</span>
  469. <span class="kr">if</span> <span class="p">(</span><span class="kp">tolower</span><span class="p">(</span>read1.orientation<span class="p">)</span> <span class="o">==</span> <span class="s">&quot;out&quot;</span><span class="p">)</span> <span class="p">{</span>
  470. read1 <span class="o">&lt;-</span> reverseComplement<span class="p">(</span>read1<span class="p">)</span>
  471. <span class="p">}</span>
  472. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span><span class="kp">is.null</span><span class="p">(</span>read2<span class="p">)</span> <span class="o">&amp;&amp;</span> <span class="kp">tolower</span><span class="p">(</span>read2.orientation<span class="p">)</span> <span class="o">==</span> <span class="s">&quot;out&quot;</span><span class="p">)</span> <span class="p">{</span>
  473. read2 <span class="o">&lt;-</span> reverseComplement<span class="p">(</span>read2<span class="p">)</span>
  474. <span class="p">}</span>
  475. tsmsg<span class="p">(</span><span class="s">&quot;Doing paired-end delox&quot;</span><span class="p">)</span>
  476. deloxed.reads <span class="o">&lt;-</span> delox.paired<span class="p">(</span>subj<span class="p">,</span> read1<span class="p">,</span> read2<span class="p">,</span>
  477. min.call<span class="o">=</span>min.call<span class="p">,</span> min.length<span class="o">=</span>min.length<span class="p">,</span>
  478. align.opts<span class="o">=</span>align.opts<span class="p">)</span>
  479. <span class="c1">## If reads started &quot;out&quot;, put them back that way before returning</span>
  480. tsmsg<span class="p">(</span><span class="s">&quot;Restoring original read orientation&quot;</span><span class="p">)</span>
  481. <span class="kr">if</span> <span class="p">(</span><span class="kp">tolower</span><span class="p">(</span>read1.orientation<span class="p">)</span> <span class="o">==</span> <span class="s">&quot;out&quot;</span><span class="p">)</span> <span class="p">{</span>
  482. deloxed.reads<span class="o">$</span>read1 <span class="o">&lt;-</span> reverseComplement<span class="p">(</span>deloxed.reads<span class="o">$</span>read1<span class="p">)</span>
  483. <span class="p">}</span>
  484. <span class="kr">if</span> <span class="p">(</span><span class="kp">tolower</span><span class="p">(</span>read2.orientation<span class="p">)</span> <span class="o">==</span> <span class="s">&quot;out&quot;</span><span class="p">)</span> <span class="p">{</span>
  485. deloxed.reads<span class="o">$</span>read2 <span class="o">&lt;-</span> reverseComplement<span class="p">(</span>deloxed.reads<span class="o">$</span>read2<span class="p">)</span>
  486. <span class="p">}</span>
  487. <span class="kr">return</span><span class="p">(</span>deloxed.reads<span class="p">)</span>
  488. <span class="p">}</span>
  489. <span class="c1">## ## Hack to work around a bug in BioConductor that prevents subsetting</span>
  490. <span class="c1">## ## of named XStringSet objects. Apparently, since DeLoxer was first</span>
  491. <span class="c1">## ## published, the BioConductor devs broke the XStringSet subsetting</span>
  492. <span class="c1">## ## code so that it can no longer handle XStringSets with names. The</span>
  493. <span class="c1">## ## code below strips the names from the XStringSet, then calls the old</span>
  494. <span class="c1">## ## code to subset the nameless object while subsetting the names</span>
  495. <span class="c1">## ## separately, then finally puts the names back on and returns the</span>
  496. <span class="c1">## ## result.</span>
  497. <span class="c1">## old.XStringSet.subset.method &lt;- selectMethod(&quot;[&quot;, &quot;XStringSet&quot;)</span>
  498. <span class="c1">## invisible(setMethod(&quot;[&quot;, signature=&quot;XStringSet&quot;, definition=function(x, i, j, ..., drop=TRUE) {</span>
  499. <span class="c1">## ## Save the names into a seaprate variable</span>
  500. <span class="c1">## xnames &lt;- names(x)</span>
  501. <span class="c1">## ## Do the old behavior, which works on unnamed objects</span>
  502. <span class="c1">## x &lt;- old.XStringSet.subset.method(unname(x), i, j, ..., drop=drop)</span>
  503. <span class="c1">## ## Put the names back on and return</span>
  504. <span class="c1">## setNames(x, xnames[i])</span>
  505. <span class="c1">## }))</span>
  506. save.deloxed.pairs.as.fastq <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>read1<span class="p">,</span> read2<span class="p">,</span> output.base<span class="p">,</span>
  507. mate.ext<span class="o">=</span><span class="s">&quot;matepaired&quot;</span><span class="p">,</span>
  508. nonmate.ext<span class="o">=</span><span class="s">&quot;paired&quot;</span><span class="p">,</span>
  509. negative.ext<span class="o">=</span><span class="s">&quot;negative&quot;</span><span class="p">,</span>
  510. unpaired.ext<span class="o">=</span><span class="s">&quot;unpaired&quot;</span><span class="p">,</span>
  511. append<span class="o">=</span><span class="kc">FALSE</span><span class="p">)</span> <span class="p">{</span>
  512. extension <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span>mate<span class="o">=</span>mate.ext<span class="p">,</span>
  513. <span class="sb">`non-mate`</span><span class="o">=</span>nonmate.ext<span class="p">,</span>
  514. negative<span class="o">=</span>negative.ext<span class="p">,</span>
  515. unpaired<span class="o">=</span>unpaired.ext<span class="p">)</span>
  516. <span class="c1">## ## Make sure that read1 and read2 are a match for each other</span>
  517. <span class="c1">## stopifnot(identical(as.character(mcols(read1)$category),</span>
  518. <span class="c1">## as.character(mcols(read2)$category)))</span>
  519. <span class="c1">## ## Discard the shorter read on &quot;unpaired&quot;</span>
  520. <span class="c1">## read1.shorter &lt;- width(read1) &lt; width(read2)</span>
  521. <span class="c1">## mcols(read1)$category[mcols(read1)$category == &quot;unpaired&quot; &amp; read1.shorter] &lt;- NA</span>
  522. <span class="c1">## mcols(read2)$category[mcols(read2)$category == &quot;unpaired&quot; &amp; !read1.shorter] &lt;- NA</span>
  523. filename.template <span class="o">&lt;-</span> <span class="s">&quot;%s_read%s.%s.fastq&quot;</span>
  524. <span class="kr">for</span> <span class="p">(</span>cat <span class="kr">in</span> <span class="kp">names</span><span class="p">(</span>extension<span class="p">))</span> <span class="p">{</span>
  525. read1.for.category <span class="o">&lt;-</span> read1<span class="p">[</span>mcols<span class="p">(</span>read1<span class="p">)</span><span class="o">$</span><span class="kt">category</span> <span class="o">==</span> <span class="kp">cat</span><span class="p">]</span>
  526. read1.file.for.category <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span>filename.template<span class="p">,</span> output.base<span class="p">,</span> <span class="m">1</span><span class="p">,</span> extension<span class="p">[[</span><span class="kp">cat</span><span class="p">]])</span>
  527. tsmsg<span class="p">(</span><span class="s">&quot;Writing &quot;</span><span class="p">,</span> read1.file.for.category<span class="p">)</span>
  528. write.QualityScaledDNAStringSet<span class="p">(</span>read1.for.category<span class="p">,</span>
  529. file<span class="o">=</span>read1.file.for.category<span class="p">,</span>
  530. append<span class="o">=</span><span class="kp">append</span><span class="p">)</span>
  531. read2.for.category <span class="o">&lt;-</span> read2<span class="p">[</span>mcols<span class="p">(</span>read2<span class="p">)</span><span class="o">$</span><span class="kt">category</span> <span class="o">==</span> <span class="kp">cat</span><span class="p">]</span>
  532. read2.file.for.category <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span>filename.template<span class="p">,</span> output.base<span class="p">,</span> <span class="m">2</span><span class="p">,</span> extension<span class="p">[[</span><span class="kp">cat</span><span class="p">]])</span>
  533. tsmsg<span class="p">(</span><span class="s">&quot;Writing &quot;</span><span class="p">,</span> read2.file.for.category<span class="p">)</span>
  534. write.QualityScaledDNAStringSet<span class="p">(</span>read2.for.category<span class="p">,</span>
  535. file<span class="o">=</span>read2.file.for.category<span class="p">,</span>
  536. append<span class="o">=</span><span class="kp">append</span><span class="p">)</span>
  537. <span class="p">}</span>
  538. <span class="kr">return</span><span class="p">(</span><span class="kc">TRUE</span><span class="p">)</span>
  539. <span class="p">}</span>
  540. get.category.counts <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>deloxed.pairs<span class="p">)</span> <span class="p">{</span>
  541. r1cat <span class="o">&lt;-</span> mcols<span class="p">(</span>deloxed.pairs<span class="o">$</span>read1<span class="p">)</span><span class="o">$</span><span class="kp">category</span>
  542. r2cat <span class="o">&lt;-</span> mcols<span class="p">(</span>deloxed.pairs<span class="o">$</span>read2<span class="p">)</span><span class="o">$</span><span class="kp">category</span>
  543. x <span class="o">&lt;-</span> <span class="kp">table</span><span class="p">(</span>r1cat<span class="p">)[</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;mate&quot;</span><span class="p">,</span> <span class="s">&quot;non-mate&quot;</span><span class="p">,</span> <span class="s">&quot;negative&quot;</span><span class="p">)]</span>
  544. x<span class="p">[</span><span class="s">&quot;r1.single&quot;</span><span class="p">]</span> <span class="o">&lt;-</span> <span class="kp">sum</span><span class="p">(</span>r1cat <span class="o">==</span> <span class="s">&quot;unpaired&quot;</span><span class="p">)</span>
  545. x<span class="p">[</span><span class="s">&quot;r2.single&quot;</span><span class="p">]</span> <span class="o">&lt;-</span> <span class="kp">sum</span><span class="p">(</span>r2cat <span class="o">==</span> <span class="s">&quot;unpaired&quot;</span><span class="p">)</span>
  546. x<span class="p">[</span><span class="s">&quot;discard&quot;</span><span class="p">]</span> <span class="o">&lt;-</span> <span class="kp">length</span><span class="p">(</span>r1cat<span class="p">)</span> <span class="o">-</span> <span class="kp">sum</span><span class="p">(</span>x<span class="p">)</span>
  547. x
  548. <span class="p">}</span>
  549. mcparallel.quiet <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>expr<span class="p">,</span> <span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  550. parallel<span class="o">:::</span>mcparallel<span class="p">(</span><span class="kp">suppressMessages</span><span class="p">(</span>expr<span class="p">),</span> <span class="kc">...</span><span class="p">)</span>
  551. <span class="p">}</span>
  552. print.stats <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>category.counts<span class="p">)</span> <span class="p">{</span>
  553. category.pct <span class="o">&lt;-</span> setNames<span class="p">(</span><span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;%.3g%%&quot;</span><span class="p">,</span> category.counts <span class="o">/</span> <span class="kp">sum</span><span class="p">(</span>category.counts<span class="p">)</span> <span class="o">*</span> <span class="m">100</span><span class="p">),</span>
  554. <span class="kp">names</span><span class="p">(</span>category.counts<span class="p">))</span>
  555. x <span class="o">&lt;-</span> <span class="kp">rbind</span><span class="p">(</span>Counts<span class="o">=</span>category.counts<span class="p">,</span> Fractions<span class="o">=</span>category.pct<span class="p">)</span>
  556. <span class="kp">names</span><span class="p">(</span><span class="kp">dimnames</span><span class="p">(</span>x<span class="p">))</span> <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="s">&quot;Stat&quot;</span><span class="p">,</span> <span class="s">&quot;Category&quot;</span><span class="p">)</span>
  557. <span class="kp">print</span><span class="p">(</span>x<span class="p">,</span> quote<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span> justify<span class="o">=</span><span class="s">&quot;right&quot;</span><span class="p">)</span>
  558. <span class="p">}</span>
  559. main <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">()</span> <span class="p">{</span>
  560. opt <span class="o">&lt;-</span> parse_arguments<span class="p">()</span>
  561. print.option.list<span class="p">(</span>opt<span class="p">)</span>
  562. args <span class="o">&lt;-</span> opt<span class="o">$</span><span class="kp">args</span>
  563. opts <span class="o">&lt;-</span> opt<span class="o">$</span><span class="kp">options</span>
  564. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span><span class="p">(</span><span class="kp">tolower</span><span class="p">(</span>opts<span class="p">[[</span><span class="s">&quot;read1-orientation&quot;</span><span class="p">]])</span> <span class="o">%in%</span> <span class="kt">c</span><span class="p">(</span><span class="s">&quot;in&quot;</span><span class="p">,</span> <span class="s">&quot;out&quot;</span><span class="p">)</span> <span class="o">&amp;&amp;</span>
  565. <span class="kp">tolower</span><span class="p">(</span>opts<span class="p">[[</span><span class="s">&quot;read2-orientation&quot;</span><span class="p">]])</span> <span class="o">%in%</span> <span class="kt">c</span><span class="p">(</span><span class="s">&quot;in&quot;</span><span class="p">,</span> <span class="s">&quot;out&quot;</span><span class="p">)</span> <span class="p">))</span> <span class="p">{</span>
  566. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Valid orientations are \&quot;in\&quot; and \&quot;out\&quot;&quot;</span><span class="p">)</span>
  567. <span class="p">}</span>
  568. align.opts <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span>match <span class="o">=</span> opts<span class="p">[[</span><span class="s">&quot;match-bonus&quot;</span><span class="p">]],</span>
  569. mismatch <span class="o">=</span> opts<span class="p">[[</span><span class="s">&quot;mismatch-penalty&quot;</span><span class="p">]],</span>
  570. gapOpening <span class="o">=</span> opts<span class="p">[[</span><span class="s">&quot;gap-open-penalty&quot;</span><span class="p">]],</span>
  571. gapExtension <span class="o">=</span> opts<span class="p">[[</span><span class="s">&quot;gap-extension-penalty&quot;</span><span class="p">]])</span>
  572. <span class="kp">stopifnot</span><span class="p">(</span>opts<span class="o">$</span><span class="sb">`min-call`</span> <span class="o">&gt;=</span> <span class="m">1</span> <span class="o">&amp;&amp;</span>
  573. opts<span class="o">$</span><span class="sb">`min-length`</span> <span class="o">&gt;=</span> <span class="m">0</span> <span class="o">&amp;&amp;</span>
  574. opts<span class="o">$</span><span class="sb">`jobs`</span> <span class="o">&gt;=</span> <span class="m">0</span><span class="p">)</span>
  575. <span class="c1">## Set jobs if requested</span>
  576. <span class="kr">if</span> <span class="p">(</span>opts<span class="o">$</span>jobs <span class="o">&gt;</span> <span class="m">0</span><span class="p">)</span> <span class="p">{</span>
  577. <span class="kp">options</span><span class="p">(</span>cores<span class="o">=</span>opts<span class="o">$</span>jobs<span class="p">)</span>
  578. <span class="p">}</span>
  579. tsmsg<span class="p">(</span><span class="s">&quot;Using &quot;</span><span class="p">,</span> getDoParWorkers<span class="p">(),</span> <span class="s">&quot; cores.&quot;</span><span class="p">)</span>
  580. paired <span class="o">&lt;-</span> <span class="o">!</span>opts<span class="p">[[</span><span class="s">&quot;single-read-mode&quot;</span><span class="p">]]</span>
  581. interleaved <span class="o">&lt;-</span> opts<span class="p">[[</span><span class="s">&quot;interleaved&quot;</span><span class="p">]]</span>
  582. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span>paired <span class="o">&amp;&amp;</span> interleaved<span class="p">)</span> <span class="p">{</span>
  583. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;ERROR: You cannot specify both --interleaved and --single-read-mode&quot;</span><span class="p">)</span>
  584. <span class="p">}</span> <span class="kr">else</span> <span class="kr">if</span> <span class="p">(</span><span class="o">!</span>paired<span class="p">)</span> <span class="p">{</span>
  585. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span><span class="kp">args</span><span class="p">)</span> <span class="o">!=</span> <span class="m">3</span><span class="p">)</span> <span class="p">{</span>
  586. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;DeLoxer in single-read mode requires exactly 3 arguments&quot;</span><span class="p">)</span>
  587. <span class="p">}</span>
  588. subject.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">1</span><span class="p">]]</span>
  589. read1.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">2</span><span class="p">]]</span>
  590. read2.file <span class="o">&lt;-</span> <span class="kc">NULL</span>
  591. output.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">3</span><span class="p">]]</span>
  592. <span class="p">}</span> <span class="kr">else</span> <span class="kr">if</span> <span class="p">(</span>interleaved<span class="p">)</span> <span class="p">{</span>
  593. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span><span class="kp">args</span><span class="p">)</span> <span class="o">!=</span> <span class="m">3</span><span class="p">)</span> <span class="p">{</span>
  594. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;DeLoxer interleaved input mode requires exactly 3 arguments&quot;</span><span class="p">)</span>
  595. <span class="p">}</span>
  596. subject.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">1</span><span class="p">]]</span>
  597. read1.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">2</span><span class="p">]]</span>
  598. read2.file <span class="o">&lt;-</span> <span class="kc">NULL</span>
  599. output.basename <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">3</span><span class="p">]]</span>
  600. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  601. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span><span class="kp">args</span><span class="p">)</span> <span class="o">!=</span> <span class="m">4</span><span class="p">)</span> <span class="p">{</span>
  602. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;DeLoxer requires exactly 4 arguments&quot;</span><span class="p">)</span>
  603. <span class="p">}</span>
  604. subject.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">1</span><span class="p">]]</span>
  605. read1.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">2</span><span class="p">]]</span>
  606. read2.file <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">3</span><span class="p">]]</span>
  607. output.basename <span class="o">&lt;-</span> <span class="kp">args</span><span class="p">[[</span><span class="m">4</span><span class="p">]]</span>
  608. <span class="p">}</span>
  609. subj <span class="o">&lt;-</span> readDNAStringSet<span class="p">(</span>subject.file<span class="p">,</span> format<span class="o">=</span><span class="s">&quot;fasta&quot;</span><span class="p">,</span> nrec<span class="o">=</span><span class="m">1</span><span class="p">)[[</span><span class="m">1</span><span class="p">]]</span>
  610. yieldSize <span class="o">&lt;-</span> opts<span class="p">[[</span><span class="s">&quot;yield-size&quot;</span><span class="p">]]</span>
  611. <span class="kr">if</span> <span class="p">(</span>paired<span class="p">)</span> <span class="p">{</span>
  612. tsmsg<span class="p">(</span><span class="s">&quot;Deloxing and classifying paired sequences&quot;</span><span class="p">)</span>
  613. read1.stream <span class="o">&lt;-</span> FastqStreamer<span class="p">(</span>read1.file<span class="p">,</span> n<span class="o">=</span>yieldSize<span class="p">)</span>
  614. read2.stream <span class="o">&lt;-</span> <span class="kr">if</span> <span class="p">(</span><span class="o">!</span>interleaved<span class="p">)</span> FastqStreamer<span class="p">(</span>read2.file<span class="p">,</span> n<span class="o">=</span>yieldSize<span class="p">)</span>
  615. process.chunk <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>fq1<span class="p">,</span> fq2<span class="p">,</span> <span class="kp">append</span><span class="p">)</span> <span class="p">{</span>
  616. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>fq1<span class="p">)</span> <span class="o">&lt;</span> <span class="m">1</span><span class="p">)</span>
  617. <span class="kr">return</span><span class="p">(</span><span class="kc">TRUE</span><span class="p">)</span>
  618. <span class="kr">if</span> <span class="p">(</span>interleaved<span class="p">)</span> <span class="p">{</span>
  619. <span class="kp">stopifnot</span><span class="p">(</span><span class="kp">is.null</span><span class="p">(</span>fq2<span class="p">))</span>
  620. deint <span class="o">&lt;-</span> deinterleave.pairs<span class="p">(</span>fq1<span class="p">)</span>
  621. fq1 <span class="o">&lt;-</span> deint<span class="p">[[</span><span class="m">1</span><span class="p">]]</span>
  622. fq2 <span class="o">&lt;-</span> deint<span class="p">[[</span><span class="m">2</span><span class="p">]]</span>
  623. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  624. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>fq1<span class="p">)</span> <span class="o">!=</span> <span class="kp">length</span><span class="p">(</span>fq2<span class="p">))</span>
  625. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Both input files must have equal numbers of reads&quot;</span><span class="p">)</span>
  626. <span class="p">}</span>
  627. read1 <span class="o">&lt;-</span> as<span class="p">(</span>fq1<span class="p">,</span> <span class="s">&quot;QualityScaledDNAStringSet&quot;</span><span class="p">)</span>
  628. read2 <span class="o">&lt;-</span> as<span class="p">(</span>fq2<span class="p">,</span> <span class="s">&quot;QualityScaledDNAStringSet&quot;</span><span class="p">)</span>
  629. deloxed.pairs <span class="o">&lt;-</span>
  630. delox<span class="p">(</span>subj<span class="p">,</span> read1<span class="p">,</span> read2<span class="p">,</span>
  631. min.call<span class="o">=</span>opts<span class="p">[[</span><span class="s">&quot;min-call&quot;</span><span class="p">]],</span>
  632. interleaved<span class="o">=</span>interleaved<span class="p">,</span>
  633. read1.orientation<span class="o">=</span>opts<span class="p">[[</span><span class="s">&quot;read1-orientation&quot;</span><span class="p">]],</span>
  634. read2.orientation<span class="o">=</span>opts<span class="p">[[</span><span class="s">&quot;read2-orientation&quot;</span><span class="p">]],</span>
  635. align.opts<span class="o">=</span>align.opts<span class="p">)</span>
  636. save.deloxed.pairs.as.fastq<span class="p">(</span>deloxed.pairs<span class="o">$</span>read1<span class="p">,</span> deloxed.pairs<span class="o">$</span>read2<span class="p">,</span> output.basename<span class="p">,</span> append<span class="o">=</span><span class="kp">append</span><span class="p">)</span>
  637. ret <span class="o">&lt;-</span> get.category.counts<span class="p">(</span>deloxed.pairs<span class="p">)</span>
  638. <span class="kr">return</span><span class="p">(</span>ret<span class="p">)</span>
  639. <span class="p">}</span>
  640. fq1 <span class="o">&lt;-</span> yield<span class="p">(</span>read1.stream<span class="p">)</span>
  641. fq2 <span class="o">&lt;-</span> <span class="kr">if</span> <span class="p">(</span><span class="o">!</span>interleaved<span class="p">)</span> yield<span class="p">(</span>read2.stream<span class="p">)</span>
  642. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>fq1<span class="p">)</span> <span class="o">==</span> <span class="m">0</span><span class="p">)</span>
  643. <span class="kp">warning</span><span class="p">(</span><span class="s">&quot;No reads were read from the input file.&quot;</span><span class="p">)</span>
  644. proc <span class="o">&lt;-</span> mcparallel.quiet<span class="p">(</span>process.chunk<span class="p">(</span>fq1<span class="p">,</span> fq2<span class="p">,</span> append<span class="o">=</span><span class="kc">FALSE</span><span class="p">))</span>
  645. reads.processed <span class="o">&lt;-</span> <span class="kp">length</span><span class="p">(</span>fq1<span class="p">)</span> <span class="o">/</span> <span class="kp">ifelse</span><span class="p">(</span>interleaved<span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">)</span>
  646. category.stats <span class="o">&lt;-</span>
  647. category.counts <span class="o">&lt;-</span> <span class="kc">NULL</span>
  648. <span class="kr">while</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>fq1 <span class="o">&lt;-</span> yield<span class="p">(</span>read1.stream<span class="p">)))</span> <span class="p">{</span>
  649. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span>interleaved<span class="p">)</span>
  650. fq2 <span class="o">&lt;-</span> yield<span class="p">(</span>read2.stream<span class="p">)</span>
  651. prev.result <span class="o">&lt;-</span> mccollect<span class="p">(</span>proc<span class="p">)[[</span><span class="m">1</span><span class="p">]]</span>
  652. <span class="kr">if</span> <span class="p">(</span>is<span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;try-error&quot;</span><span class="p">))</span> <span class="p">{</span>
  653. tsmsg<span class="p">(</span><span class="s">&quot;Encountered error in deloxing subprocess:&quot;</span><span class="p">)</span>
  654. <span class="kp">stop</span><span class="p">(</span><span class="kp">attr</span><span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;condition&quot;</span><span class="p">))</span>
  655. <span class="p">}</span>
  656. <span class="kr">if</span> <span class="p">(</span><span class="kp">is.null</span><span class="p">(</span>category.counts<span class="p">))</span> <span class="p">{</span>
  657. category.counts <span class="o">&lt;-</span> prev.result
  658. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  659. category.counts <span class="o">&lt;-</span> category.counts <span class="o">+</span> prev.result
  660. <span class="p">}</span>
  661. tsmsg<span class="p">(</span><span class="s">&quot;Category stats after processing &quot;</span><span class="p">,</span> reads.processed<span class="p">,</span> <span class="s">&quot; reads:&quot;</span><span class="p">)</span>
  662. <span class="c1">## category.pct &lt;- setNames(sprintf(&quot;%.3g%%&quot;, category.counts / sum(category.counts) * 100),</span>
  663. <span class="c1">## names(category.counts))</span>
  664. print.stats<span class="p">(</span>category.counts<span class="p">)</span>
  665. proc <span class="o">&lt;-</span> mcparallel.quiet<span class="p">(</span>process.chunk<span class="p">(</span>fq1<span class="p">,</span> fq2<span class="p">,</span> append<span class="o">=</span><span class="kc">TRUE</span><span class="p">))</span>
  666. reads.processed <span class="o">&lt;-</span> reads.processed <span class="o">+</span> <span class="kp">length</span><span class="p">(</span>fq1<span class="p">)</span> <span class="o">/</span> <span class="kp">ifelse</span><span class="p">(</span>interleaved<span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">1</span><span class="p">)</span>
  667. <span class="p">}</span>
  668. <span class="kp">close</span><span class="p">(</span>read1.stream<span class="p">)</span>
  669. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span>interleaved<span class="p">)</span> <span class="kp">close</span><span class="p">(</span>read2.stream<span class="p">)</span>
  670. prev.result <span class="o">&lt;-</span> mccollect<span class="p">(</span>proc<span class="p">)[[</span><span class="m">1</span><span class="p">]]</span>
  671. <span class="kr">if</span> <span class="p">(</span><span class="kp">is.null</span><span class="p">(</span>category.counts<span class="p">))</span> <span class="p">{</span>
  672. category.counts <span class="o">&lt;-</span> prev.result
  673. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  674. category.counts <span class="o">&lt;-</span> category.counts <span class="o">+</span> prev.result
  675. <span class="p">}</span>
  676. <span class="kr">if</span> <span class="p">(</span>is<span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;try-error&quot;</span><span class="p">))</span> <span class="p">{</span>
  677. tsmsg<span class="p">(</span><span class="s">&quot;Encountered error in deloxing subprocess:&quot;</span><span class="p">)</span>
  678. <span class="kp">stop</span><span class="p">(</span><span class="kp">attr</span><span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;condition&quot;</span><span class="p">))</span>
  679. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Encountered error in deloxing&quot;</span><span class="p">)</span>
  680. <span class="p">}</span>
  681. tsmsg<span class="p">(</span><span class="s">&quot;Final category stats after processing &quot;</span><span class="p">,</span> reads.processed<span class="p">,</span> <span class="s">&quot; reads:&quot;</span><span class="p">)</span>
  682. print.stats<span class="p">(</span>category.counts<span class="p">)</span>
  683. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  684. tsmsg<span class="p">(</span><span class="s">&quot;Deloxing single sequences&quot;</span><span class="p">)</span>
  685. read1.stream <span class="o">&lt;-</span> FastqStreamer<span class="p">(</span>read1.file<span class="p">,</span> n<span class="o">=</span>yieldSize<span class="p">)</span>
  686. process.chunk <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>fq<span class="p">,</span> <span class="kp">append</span><span class="p">)</span> <span class="p">{</span>
  687. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>fq<span class="p">)</span> <span class="o">&lt;</span> <span class="m">1</span><span class="p">)</span>
  688. <span class="kr">return</span><span class="p">(</span><span class="kc">TRUE</span><span class="p">)</span>
  689. reads <span class="o">&lt;-</span> as<span class="p">(</span>fq<span class="p">,</span> <span class="s">&quot;QualityScaledDNAStringSet&quot;</span><span class="p">)</span>
  690. deloxed.reads <span class="o">&lt;-</span>
  691. delox<span class="p">(</span>subj<span class="p">,</span> reads<span class="p">,</span> <span class="kc">NULL</span><span class="p">,</span>
  692. min.call<span class="o">=</span>opts<span class="p">[[</span><span class="s">&quot;min-call&quot;</span><span class="p">]],</span>
  693. interleaved<span class="o">=</span>interleaved<span class="p">,</span>
  694. read1.orientation<span class="o">=</span>opts<span class="p">[[</span><span class="s">&quot;read1-orientation&quot;</span><span class="p">]],</span>
  695. read2.orientation<span class="o">=</span>opts<span class="p">[[</span><span class="s">&quot;read2-orientation&quot;</span><span class="p">]],</span>
  696. align.opts<span class="o">=</span>align.opts<span class="p">)</span>
  697. write.QualityScaledDNAStringSet<span class="p">(</span>deloxed.reads<span class="p">,</span> output.file<span class="p">,</span> append<span class="o">=</span><span class="kp">append</span><span class="p">)</span>
  698. <span class="kr">return</span><span class="p">(</span><span class="kc">TRUE</span><span class="p">)</span>
  699. <span class="p">}</span>
  700. <span class="c1">## First chunk is processed with append=FALSE to start the file</span>
  701. fq <span class="o">&lt;-</span> yield<span class="p">(</span>read1.stream<span class="p">)</span>
  702. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>fq<span class="p">)</span> <span class="o">==</span> <span class="m">0</span><span class="p">)</span>
  703. <span class="kp">warning</span><span class="p">(</span><span class="s">&quot;No reads were read from the input file.&quot;</span><span class="p">)</span>
  704. proc <span class="o">&lt;-</span> mcparallel.quiet<span class="p">(</span><span class="kp">suppressMessages</span><span class="p">(</span>process.chunk<span class="p">(</span>fq<span class="p">,</span> append<span class="o">=</span><span class="kc">FALSE</span><span class="p">)))</span>
  705. reads.processed <span class="o">&lt;-</span> <span class="kp">length</span><span class="p">(</span>fq<span class="p">)</span>
  706. <span class="kr">while</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>fq <span class="o">&lt;-</span> yield<span class="p">(</span>read1.stream<span class="p">)))</span> <span class="p">{</span>
  707. prev.result <span class="o">&lt;-</span> mccollect<span class="p">(</span>proc<span class="p">)[[</span><span class="m">1</span><span class="p">]]</span>
  708. <span class="kr">if</span> <span class="p">(</span>is<span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;try-error&quot;</span><span class="p">))</span> <span class="p">{</span>
  709. tsmsg<span class="p">(</span><span class="s">&quot;Encountered error in deloxing subprocess:&quot;</span><span class="p">)</span>
  710. <span class="kp">stop</span><span class="p">(</span><span class="kp">attr</span><span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;condition&quot;</span><span class="p">))</span>
  711. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Encountered error in deloxing&quot;</span><span class="p">)</span>
  712. <span class="p">}</span>
  713. tsmsg<span class="p">(</span><span class="s">&quot;Processed &quot;</span><span class="p">,</span> reads.processed<span class="p">,</span> <span class="s">&quot; reads&quot;</span><span class="p">)</span>
  714. proc <span class="o">&lt;-</span> mcparallel.quiet<span class="p">(</span><span class="kp">suppressMessages</span><span class="p">(</span>process.chunk<span class="p">(</span>fq<span class="p">,</span> append<span class="o">=</span><span class="kc">TRUE</span><span class="p">)))</span>
  715. reads.processed <span class="o">&lt;-</span> reads.processed <span class="o">+</span> <span class="kp">length</span><span class="p">(</span>fq<span class="p">)</span>
  716. <span class="p">}</span>
  717. <span class="kp">close</span><span class="p">(</span>read1.stream<span class="p">)</span>
  718. prev.result <span class="o">&lt;-</span> mccollect<span class="p">(</span>proc<span class="p">)[[</span><span class="m">1</span><span class="p">]]</span>
  719. <span class="kr">if</span> <span class="p">(</span>is<span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;try-error&quot;</span><span class="p">))</span> <span class="p">{</span>
  720. tsmsg<span class="p">(</span><span class="s">&quot;Encountered error in deloxing subprocess:&quot;</span><span class="p">)</span>
  721. <span class="kp">stop</span><span class="p">(</span><span class="kp">attr</span><span class="p">(</span>prev.result<span class="p">,</span> <span class="s">&quot;condition&quot;</span><span class="p">))</span>
  722. <span class="kp">stop</span><span class="p">(</span><span class="s">&quot;Encountered error in deloxing&quot;</span><span class="p">)</span>
  723. <span class="p">}</span>
  724. tsmsg<span class="p">(</span><span class="s">&quot;Processed &quot;</span><span class="p">,</span> reads.processed<span class="p">,</span> <span class="s">&quot; reads&quot;</span><span class="p">)</span>
  725. <span class="p">}</span>
  726. tsmsg<span class="p">(</span><span class="s">&quot;Finished successful run&quot;</span><span class="p">)</span>
  727. <span class="p">}</span>
  728. main<span class="p">()</span>
  729. </pre></div>
  730. </body>
  731. </html>