blockbuster-pipeline.R.html 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  2. "http://www.w3.org/TR/html4/strict.dtd">
  3. <html>
  4. <head>
  5. <title></title>
  6. <meta http-equiv="content-type" content="text/html; charset=utf-8">
  7. <style type="text/css">
  8. td.linenos { background-color: #f0f0f0; padding-right: 10px; }
  9. span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
  10. pre { line-height: 125%; }
  11. body .hll { background-color: #ffffcc }
  12. body { background: #f8f8f8; }
  13. body .c { color: #408080; font-style: italic } /* Comment */
  14. body .err { border: 1px solid #FF0000 } /* Error */
  15. body .k { color: #008000; font-weight: bold } /* Keyword */
  16. body .o { color: #666666 } /* Operator */
  17. body .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
  18. body .cm { color: #408080; font-style: italic } /* Comment.Multiline */
  19. body .cp { color: #BC7A00 } /* Comment.Preproc */
  20. body .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
  21. body .c1 { color: #408080; font-style: italic } /* Comment.Single */
  22. body .cs { color: #408080; font-style: italic } /* Comment.Special */
  23. body .gd { color: #A00000 } /* Generic.Deleted */
  24. body .ge { font-style: italic } /* Generic.Emph */
  25. body .gr { color: #FF0000 } /* Generic.Error */
  26. body .gh { color: #000080; font-weight: bold } /* Generic.Heading */
  27. body .gi { color: #00A000 } /* Generic.Inserted */
  28. body .go { color: #888888 } /* Generic.Output */
  29. body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
  30. body .gs { font-weight: bold } /* Generic.Strong */
  31. body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
  32. body .gt { color: #0044DD } /* Generic.Traceback */
  33. body .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
  34. body .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
  35. body .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
  36. body .kp { color: #008000 } /* Keyword.Pseudo */
  37. body .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
  38. body .kt { color: #B00040 } /* Keyword.Type */
  39. body .m { color: #666666 } /* Literal.Number */
  40. body .s { color: #BA2121 } /* Literal.String */
  41. body .na { color: #7D9029 } /* Name.Attribute */
  42. body .nb { color: #008000 } /* Name.Builtin */
  43. body .nc { color: #0000FF; font-weight: bold } /* Name.Class */
  44. body .no { color: #880000 } /* Name.Constant */
  45. body .nd { color: #AA22FF } /* Name.Decorator */
  46. body .ni { color: #999999; font-weight: bold } /* Name.Entity */
  47. body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
  48. body .nf { color: #0000FF } /* Name.Function */
  49. body .nl { color: #A0A000 } /* Name.Label */
  50. body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
  51. body .nt { color: #008000; font-weight: bold } /* Name.Tag */
  52. body .nv { color: #19177C } /* Name.Variable */
  53. body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
  54. body .w { color: #bbbbbb } /* Text.Whitespace */
  55. body .mb { color: #666666 } /* Literal.Number.Bin */
  56. body .mf { color: #666666 } /* Literal.Number.Float */
  57. body .mh { color: #666666 } /* Literal.Number.Hex */
  58. body .mi { color: #666666 } /* Literal.Number.Integer */
  59. body .mo { color: #666666 } /* Literal.Number.Oct */
  60. body .sa { color: #BA2121 } /* Literal.String.Affix */
  61. body .sb { color: #BA2121 } /* Literal.String.Backtick */
  62. body .sc { color: #BA2121 } /* Literal.String.Char */
  63. body .dl { color: #BA2121 } /* Literal.String.Delimiter */
  64. body .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
  65. body .s2 { color: #BA2121 } /* Literal.String.Double */
  66. body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
  67. body .sh { color: #BA2121 } /* Literal.String.Heredoc */
  68. body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
  69. body .sx { color: #008000 } /* Literal.String.Other */
  70. body .sr { color: #BB6688 } /* Literal.String.Regex */
  71. body .s1 { color: #BA2121 } /* Literal.String.Single */
  72. body .ss { color: #19177C } /* Literal.String.Symbol */
  73. body .bp { color: #008000 } /* Name.Builtin.Pseudo */
  74. body .fm { color: #0000FF } /* Name.Function.Magic */
  75. body .vc { color: #19177C } /* Name.Variable.Class */
  76. body .vg { color: #19177C } /* Name.Variable.Global */
  77. body .vi { color: #19177C } /* Name.Variable.Instance */
  78. body .vm { color: #19177C } /* Name.Variable.Magic */
  79. body .il { color: #666666 } /* Literal.Number.Integer.Long */
  80. </style>
  81. </head>
  82. <body>
  83. <h2></h2>
  84. <div class="highlight"><pre><span></span><span class="c1">#!/usr/bin/env Rscript</span>
  85. <span class="kn">source</span><span class="p">(</span><span class="s">&quot;common.R&quot;</span><span class="p">)</span>
  86. <span class="kn">library</span><span class="p">(</span>BSgenome.Hsapiens.UCSC.hg19<span class="p">)</span>
  87. textConnectionFromLines <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>lines<span class="p">,</span> linesep<span class="o">=</span><span class="s">&quot;\n&quot;</span><span class="p">)</span> <span class="p">{</span>
  88. <span class="kp">textConnection</span><span class="p">(</span>str_c<span class="p">(</span><span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;%s%s&quot;</span><span class="p">,</span> lines<span class="p">,</span> linesep<span class="p">),</span> collapse<span class="o">=</span><span class="s">&quot;&quot;</span><span class="p">))</span>
  89. <span class="p">}</span>
  90. <span class="c1">## Takes a GRangesList</span>
  91. calculate.block.entropy <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>grl<span class="p">,</span> expr.column<span class="o">=</span><span class="s">&quot;tagExpression&quot;</span><span class="p">)</span> <span class="p">{</span>
  92. groupfac <span class="o">&lt;-</span> <span class="kp">factor</span><span class="p">(</span><span class="kp">rep</span><span class="p">(</span><span class="kp">names</span><span class="p">(</span>grl<span class="p">),</span> elementLengths<span class="p">(</span>grl<span class="p">)))</span>
  93. exprs <span class="o">&lt;-</span> <span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span><span class="kp">unlist</span><span class="p">(</span>grl<span class="p">))[[</span>expr.column<span class="p">]])</span>
  94. total.exprs <span class="o">&lt;-</span> aggregate<span class="p">(</span>exprs<span class="p">,</span> by<span class="o">=</span><span class="kt">list</span><span class="p">(</span>groupfac<span class="p">),</span> FUN<span class="o">=</span><span class="kp">sum</span><span class="p">)</span><span class="o">$</span>x
  95. qi <span class="o">&lt;-</span> exprs <span class="o">/</span> <span class="kp">rep</span><span class="p">(</span>total.exprs<span class="p">,</span> elementLengths<span class="p">(</span>grl<span class="p">))</span>
  96. qi.times.log <span class="o">&lt;-</span> qi <span class="o">*</span> <span class="kp">log2</span><span class="p">(</span>qi<span class="p">)</span>
  97. results <span class="o">&lt;-</span> <span class="o">-</span>aggregate<span class="p">(</span>qi.times.log<span class="p">,</span> by<span class="o">=</span><span class="kt">list</span><span class="p">(</span>groupfac<span class="p">),</span> FUN<span class="o">=</span><span class="kp">sum</span><span class="p">)</span><span class="o">$</span>x
  98. <span class="kp">names</span><span class="p">(</span>results<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">names</span><span class="p">(</span>grl<span class="p">)</span>
  99. results
  100. <span class="p">}</span>
  101. <span class="c1">## http://hoffmann.bioinf.uni-leipzig.de/LIFE/blockbuster.html</span>
  102. read.blockbuster.tag.output <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>bbout<span class="p">)</span> <span class="p">{</span>
  103. x <span class="o">&lt;-</span> <span class="kp">readLines</span><span class="p">(</span>bbout<span class="p">)</span>
  104. cluster.line.numbers <span class="o">&lt;-</span> <span class="kp">which</span><span class="p">(</span>str_sub<span class="p">(</span>x<span class="p">,</span> <span class="m">1</span><span class="p">,</span><span class="m">1</span><span class="p">)</span> <span class="o">==</span> <span class="s">&quot;&gt;&quot;</span><span class="p">)</span>
  105. cluster.lines <span class="o">&lt;-</span> str_sub<span class="p">(</span>x<span class="p">[</span>cluster.line.numbers<span class="p">],</span> <span class="m">2</span><span class="p">)</span>
  106. cluster.table <span class="o">&lt;-</span> read.table<span class="p">(</span>textConnectionFromLines<span class="p">(</span>cluster.lines<span class="p">),</span>
  107. col.names<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;clusterID&quot;</span><span class="p">,</span> <span class="s">&quot;chrom&quot;</span><span class="p">,</span> <span class="s">&quot;clusterStart&quot;</span><span class="p">,</span> <span class="s">&quot;clusterEnd&quot;</span><span class="p">,</span> <span class="s">&quot;strand&quot;</span><span class="p">,</span> <span class="s">&quot;ClusterExpression&quot;</span><span class="p">,</span> <span class="s">&quot;tagCount&quot;</span><span class="p">,</span> <span class="s">&quot;blockCount&quot;</span><span class="p">),</span>
  108. colClasses<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;character&quot;</span><span class="p">,</span> <span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;numeric&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">))</span>
  109. tag.lines <span class="o">&lt;-</span> x<span class="p">[</span><span class="o">-</span>cluster.line.numbers<span class="p">]</span>
  110. cluster.tag.line.counts <span class="o">&lt;-</span> cluster.line.numbers<span class="p">[</span><span class="m">-1</span><span class="p">]</span> <span class="o">-</span> cluster.line.numbers<span class="p">[</span><span class="o">-</span><span class="kp">length</span><span class="p">(</span>cluster.line.numbers<span class="p">)]</span> <span class="o">-</span> <span class="m">1</span>
  111. cluster.tag.line.counts <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span>cluster.tag.line.counts<span class="p">,</span> <span class="kp">length</span><span class="p">(</span>tag.lines<span class="p">)</span> <span class="o">-</span> <span class="kp">sum</span><span class="p">(</span>cluster.tag.line.counts<span class="p">))</span>
  112. tag.table <span class="o">&lt;-</span> read.table<span class="p">(</span>textConnectionFromLines<span class="p">(</span>tag.lines<span class="p">),</span>
  113. col.names<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;tagChrom&quot;</span><span class="p">,</span> <span class="s">&quot;tagStart&quot;</span><span class="p">,</span> <span class="s">&quot;tagEnd&quot;</span><span class="p">,</span> <span class="s">&quot;tagID&quot;</span><span class="p">,</span> <span class="s">&quot;tagExpression&quot;</span><span class="p">,</span> <span class="s">&quot;tagStrand&quot;</span><span class="p">,</span> <span class="s">&quot;blockNb&quot;</span><span class="p">),</span>
  114. colClasses<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;character&quot;</span><span class="p">,</span> <span class="s">&quot;numeric&quot;</span><span class="p">,</span> <span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">))</span>
  115. tag.table<span class="o">$</span>clusterID <span class="o">&lt;-</span> <span class="kp">rep</span><span class="p">(</span>cluster.table<span class="o">$</span>clusterID<span class="p">,</span> cluster.tag.line.counts<span class="p">)</span>
  116. tag.table<span class="o">$</span>blockID <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;%s/B%s&quot;</span><span class="p">,</span> tag.table<span class="o">$</span>clusterID<span class="p">,</span> tag.table<span class="o">$</span>blockNb<span class="p">)</span>
  117. tags <span class="o">&lt;-</span> table.to.granges<span class="p">(</span>tag.table<span class="p">,</span> seqnames.column<span class="o">=</span><span class="s">&quot;tagChrom&quot;</span><span class="p">,</span> start.column<span class="o">=</span><span class="s">&quot;tagStart&quot;</span><span class="p">,</span> end.column<span class="o">=</span><span class="s">&quot;tagEnd&quot;</span><span class="p">,</span> strand.column<span class="o">=</span><span class="s">&quot;tagStrand&quot;</span><span class="p">,</span> seqlengths<span class="o">=</span><span class="s">&quot;hg19&quot;</span><span class="p">)</span>
  118. tags <span class="o">&lt;-</span> <span class="kp">split</span><span class="p">(</span>tags<span class="p">,</span> <span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span>tags<span class="p">)</span><span class="o">$</span>blockID<span class="p">))</span>
  119. <span class="kr">return</span><span class="p">(</span>tags<span class="p">)</span>
  120. <span class="p">}</span>
  121. read.blockbuster.output <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>bbout<span class="p">,</span> bbout.tags<span class="o">=</span><span class="kc">NULL</span><span class="p">)</span> <span class="p">{</span>
  122. x <span class="o">&lt;-</span> <span class="kp">readLines</span><span class="p">(</span>bbout<span class="p">)</span>
  123. cluster.line.numbers <span class="o">&lt;-</span> <span class="kp">which</span><span class="p">(</span>str_sub<span class="p">(</span>x<span class="p">,</span> <span class="m">1</span><span class="p">,</span><span class="m">1</span><span class="p">)</span> <span class="o">==</span> <span class="s">&quot;&gt;&quot;</span><span class="p">)</span>
  124. cluster.lines <span class="o">&lt;-</span> str_sub<span class="p">(</span>x<span class="p">[</span>cluster.line.numbers<span class="p">],</span> <span class="m">2</span><span class="p">)</span>
  125. block.lines <span class="o">&lt;-</span> x<span class="p">[</span><span class="o">-</span>cluster.line.numbers<span class="p">]</span>
  126. cluster.table <span class="o">&lt;-</span> read.table<span class="p">(</span>textConnectionFromLines<span class="p">(</span>cluster.lines<span class="p">),</span>
  127. col.names<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;clusterID&quot;</span><span class="p">,</span> <span class="s">&quot;chrom&quot;</span><span class="p">,</span> <span class="s">&quot;clusterStart&quot;</span><span class="p">,</span> <span class="s">&quot;clusterEnd&quot;</span><span class="p">,</span> <span class="s">&quot;strand&quot;</span><span class="p">,</span> <span class="s">&quot;ClusterExpression&quot;</span><span class="p">,</span> <span class="s">&quot;tagCount&quot;</span><span class="p">,</span> <span class="s">&quot;blockCount&quot;</span><span class="p">),</span>
  128. colClasses<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;character&quot;</span><span class="p">,</span> <span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;numeric&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">))</span>
  129. clusters <span class="o">&lt;-</span> table.to.granges<span class="p">(</span>cluster.table<span class="p">,</span> seqnames.column<span class="o">=</span><span class="s">&quot;chrom&quot;</span><span class="p">,</span> start.column<span class="o">=</span><span class="s">&quot;clusterStart&quot;</span><span class="p">,</span> end.column<span class="o">=</span><span class="s">&quot;clusterEnd&quot;</span><span class="p">,</span> seqlengths<span class="o">=</span><span class="s">&quot;hg19&quot;</span><span class="p">)</span>
  130. block.table <span class="o">&lt;-</span> read.table<span class="p">(</span>textConnectionFromLines<span class="p">(</span>block.lines<span class="p">),</span>
  131. col.names<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;blockNb&quot;</span><span class="p">,</span> <span class="s">&quot;blockChrom&quot;</span><span class="p">,</span> <span class="s">&quot;blockStart&quot;</span><span class="p">,</span> <span class="s">&quot;blockEnd&quot;</span><span class="p">,</span> <span class="s">&quot;blockStrand&quot;</span><span class="p">,</span> <span class="s">&quot;blockExpression&quot;</span><span class="p">,</span> <span class="s">&quot;readCount&quot;</span><span class="p">),</span>
  132. colClasses<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">,</span> <span class="s">&quot;factor&quot;</span><span class="p">,</span> <span class="s">&quot;numeric&quot;</span><span class="p">,</span> <span class="s">&quot;integer&quot;</span><span class="p">))</span>
  133. block.table<span class="o">$</span>clusterID <span class="o">&lt;-</span> <span class="kp">rep</span><span class="p">(</span>cluster.table<span class="o">$</span>clusterID<span class="p">,</span> cluster.table<span class="o">$</span>blockCount<span class="p">)</span>
  134. block.table<span class="o">$</span>blockID <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;%s/B%s&quot;</span><span class="p">,</span> block.table<span class="o">$</span>clusterID<span class="p">,</span> block.table<span class="o">$</span>blockNb<span class="p">)</span>
  135. blocks. <span class="o">&lt;-</span> table.to.granges<span class="p">(</span>block.table<span class="p">,</span> seqnames.column<span class="o">=</span><span class="s">&quot;blockChrom&quot;</span><span class="p">,</span> start.column<span class="o">=</span><span class="s">&quot;blockStart&quot;</span><span class="p">,</span> end.column<span class="o">=</span><span class="s">&quot;blockEnd&quot;</span><span class="p">,</span> strand.column<span class="o">=</span><span class="s">&quot;blockStrand&quot;</span><span class="p">,</span> seqlengths<span class="o">=</span><span class="s">&quot;hg19&quot;</span><span class="p">)</span>
  136. <span class="c1">## blocks. &lt;- split(blocks., as.vector(elementMetadata(blocks.)$clusterID))</span>
  137. retval <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span>clusters<span class="o">=</span>clusters<span class="p">,</span> blocks<span class="o">=</span>blocks.<span class="p">)</span>
  138. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span><span class="kp">is.null</span><span class="p">(</span>bbout.tags<span class="p">))</span> <span class="p">{</span>
  139. retval<span class="o">$</span>tags <span class="o">&lt;-</span> read.blockbuster.tag.output<span class="p">(</span>bbout.tags<span class="p">)</span>
  140. <span class="p">}</span>
  141. <span class="kr">return</span><span class="p">(</span>retval<span class="p">)</span>
  142. <span class="p">}</span>
  143. blockbuster.path <span class="o">&lt;-</span> <span class="s">&quot;/home/ryan/bin/blockbuster&quot;</span>
  144. run.blockbuster <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>gr<span class="p">,</span> <span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  145. temp.bed.file <span class="o">&lt;-</span> <span class="kp">tempfile</span><span class="p">(</span>fileext<span class="o">=</span><span class="s">&quot;.bed&quot;</span><span class="p">)</span>
  146. temp.bbout.file <span class="o">&lt;-</span> <span class="kp">tempfile</span><span class="p">(</span>fileext<span class="o">=</span><span class="s">&quot;.bbout&quot;</span><span class="p">)</span>
  147. temp.bbout.tag.file <span class="o">&lt;-</span> <span class="kp">tempfile</span><span class="p">(</span>fileext<span class="o">=</span><span class="s">&quot;.tag.bbout&quot;</span><span class="p">)</span>
  148. <span class="kp">tryCatch</span><span class="p">({</span>
  149. export<span class="p">(</span><span class="kp">sort</span><span class="p">(</span>gr<span class="p">),</span> temp.bed.file<span class="p">,</span> format<span class="o">=</span><span class="s">&quot;BED&quot;</span><span class="p">)</span>
  150. extra.args <span class="o">&lt;-</span> <span class="kt">list</span><span class="p">(</span><span class="kc">...</span><span class="p">)</span>
  151. bbargs <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="kp">rbind</span><span class="p">(</span><span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;-%s&quot;</span><span class="p">,</span> <span class="kp">names</span><span class="p">(</span>extra.args<span class="p">)),</span> extra.args<span class="p">),</span> <span class="s">&quot;-print&quot;</span><span class="p">,</span> <span class="s">&quot;1&quot;</span><span class="p">,</span> temp.bed.file<span class="p">)</span>
  152. bbargs.tags <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="kp">rbind</span><span class="p">(</span><span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;-%s&quot;</span><span class="p">,</span> <span class="kp">names</span><span class="p">(</span>extra.args<span class="p">)),</span> extra.args<span class="p">),</span> <span class="s">&quot;-print&quot;</span><span class="p">,</span> <span class="s">&quot;2&quot;</span><span class="p">,</span> temp.bed.file<span class="p">)</span>
  153. <span class="kp">system2</span><span class="p">(</span>blockbuster.path<span class="p">,</span> args<span class="o">=</span>bbargs<span class="p">,</span>
  154. stdout<span class="o">=</span>temp.bbout.file<span class="p">)</span>
  155. <span class="kp">system2</span><span class="p">(</span>blockbuster.path<span class="p">,</span> args<span class="o">=</span>bbargs.tags<span class="p">,</span>
  156. stdout<span class="o">=</span>temp.bbout.tag.file<span class="p">)</span>
  157. x <span class="o">&lt;-</span> read.blockbuster.output<span class="p">(</span>temp.bbout.file<span class="p">,</span> temp.bbout.tag.file<span class="p">)</span>
  158. x
  159. <span class="p">},</span> finally<span class="o">=</span><span class="p">{</span>
  160. <span class="kp">unlink</span><span class="p">(</span><span class="kt">c</span><span class="p">(</span>temp.bed.file<span class="p">,</span> temp.bbout.file<span class="p">,</span> temp.bbout.tag.file<span class="p">))</span>
  161. <span class="p">})</span>
  162. <span class="p">}</span>
  163. <span class="c1">## Load the reads from the bam file</span>
  164. infile <span class="o">&lt;-</span> <span class="s">&quot;./all-results.bam&quot;</span>
  165. read.ranges <span class="o">&lt;-</span> <span class="p">{</span>
  166. x <span class="o">&lt;-</span> readAlignedRanges<span class="p">(</span>infile<span class="p">,</span> include.reads<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  167. <span class="c1">## Throw away unneeded columns</span>
  168. elementMetadata<span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">subset</span><span class="p">(</span>elementMetadata<span class="p">(</span>x<span class="p">),</span> select<span class="o">=-</span><span class="kt">c</span><span class="p">(</span>id<span class="p">,</span>qual<span class="p">,</span>flag<span class="p">))</span>
  169. read.multi.map.counts <span class="o">&lt;-</span> <span class="kp">table</span><span class="p">(</span><span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span>x<span class="p">)</span><span class="o">$</span><span class="kp">seq</span><span class="p">))</span>
  170. elementMetadata<span class="p">(</span>x<span class="p">)</span><span class="o">$</span>multimap <span class="o">&lt;-</span> Rle<span class="p">(</span><span class="kp">as.vector</span><span class="p">(</span>read.multi.map.counts<span class="p">[</span><span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span>x<span class="p">)</span><span class="o">$</span><span class="kp">seq</span><span class="p">)]))</span>
  171. x
  172. <span class="p">}</span>
  173. <span class="c1">## Get needed annotations</span>
  174. <span class="c1">## rRNA &amp; tRNA (from the repeats table)</span>
  175. repeat.table <span class="o">&lt;-</span> get.ucsc.table<span class="p">(</span><span class="s">&quot;rmsk&quot;</span><span class="p">,</span> <span class="s">&quot;rmsk&quot;</span><span class="p">,</span> genome<span class="o">=</span><span class="s">&quot;hg19&quot;</span><span class="p">)</span>
  176. repeat.ranges <span class="o">&lt;-</span> table.to.granges<span class="p">(</span>repeat.table<span class="p">,</span> seqnames.column<span class="o">=</span><span class="s">&quot;genoName&quot;</span><span class="p">,</span> start.column<span class="o">=</span><span class="s">&quot;genoStart&quot;</span><span class="p">,</span> end.column<span class="o">=</span><span class="s">&quot;genoEnd&quot;</span><span class="p">,</span> seqlengths<span class="o">=</span><span class="s">&quot;hg19&quot;</span><span class="p">)</span>
  177. trna.ranges <span class="o">&lt;-</span> repeat.ranges<span class="p">[</span>elementMetadata<span class="p">(</span>repeat.ranges<span class="p">)</span><span class="o">$</span>repClass <span class="o">==</span> <span class="s">&quot;tRNA&quot;</span><span class="p">]</span>
  178. rrna.ranges <span class="o">&lt;-</span> repeat.ranges<span class="p">[</span>elementMetadata<span class="p">(</span>repeat.ranges<span class="p">)</span><span class="o">$</span>repClass <span class="o">==</span> <span class="s">&quot;rRNA&quot;</span><span class="p">]</span>
  179. <span class="c1">## miRNA &amp; snoRNA</span>
  180. small.rna.table <span class="o">&lt;-</span> <span class="kp">subset</span><span class="p">(</span>get.ucsc.table<span class="p">(</span><span class="s">&quot;wgRna&quot;</span><span class="p">,</span> <span class="s">&quot;wgRna&quot;</span><span class="p">,</span> genome<span class="o">=</span><span class="s">&quot;hg19&quot;</span><span class="p">),</span> select<span class="o">=-</span><span class="kt">c</span><span class="p">(</span>thickStart<span class="p">,</span>thickEnd<span class="p">,</span>bin<span class="p">))</span>
  181. small.rna.ranges <span class="o">&lt;-</span> table.to.granges<span class="p">(</span>small.rna.table<span class="p">,</span> seqnames.column<span class="o">=</span><span class="s">&quot;chrom&quot;</span><span class="p">,</span> start.column<span class="o">=</span><span class="s">&quot;chromStart&quot;</span><span class="p">,</span> end.column<span class="o">=</span><span class="s">&quot;chromEnd&quot;</span><span class="p">,</span> seqlengths<span class="o">=</span><span class="s">&quot;hg19&quot;</span><span class="p">)</span>
  182. miRNA.types <span class="o">&lt;-</span> <span class="s">&quot;miRNA&quot;</span>
  183. snoRNA.types <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="s">&quot;CDBox&quot;</span><span class="p">,</span> <span class="s">&quot;HAcaBox&quot;</span><span class="p">)</span>
  184. miRNA.ranges <span class="o">&lt;-</span> small.rna.ranges<span class="p">[</span> elementMetadata<span class="p">(</span>small.rna.ranges<span class="p">)</span><span class="o">$</span>type <span class="o">%in%</span> miRNA.types <span class="p">]</span>
  185. snoRNA.ranges <span class="o">&lt;-</span> small.rna.ranges<span class="p">[</span> elementMetadata<span class="p">(</span>small.rna.ranges<span class="p">)</span><span class="o">$</span>type <span class="o">%in%</span> snoRNA.types <span class="p">]</span>
  186. <span class="c1">## chrM</span>
  187. chrM.range <span class="o">&lt;-</span> GRanges<span class="p">(</span>seqnames<span class="o">=</span><span class="s">&quot;chrM&quot;</span><span class="p">,</span> IRanges<span class="p">(</span>start<span class="o">=</span><span class="m">1</span><span class="p">,</span>end<span class="o">=</span>seqlengths<span class="p">(</span>read.ranges<span class="p">)[[</span><span class="s">&quot;chrM&quot;</span><span class="p">]]),</span> seqlengths<span class="o">=</span>seqlengths<span class="p">(</span>read.ranges<span class="p">))</span>
  188. <span class="c1">## For each sequence that maps once to an anotated rRNA, tRNA, miRNA,</span>
  189. <span class="c1">## or chrM, remove *all* mappings for that sequence.</span>
  190. forbidden.ranges <span class="o">&lt;-</span>
  191. <span class="kp">Reduce</span><span class="p">(</span><span class="kp">append</span><span class="p">,</span>
  192. llply<span class="p">(</span><span class="kt">list</span><span class="p">(</span>rrna.ranges<span class="p">,</span>
  193. trna.ranges<span class="p">,</span>
  194. <span class="c1">## miRNA.ranges,</span>
  195. chrM.range<span class="p">),</span>
  196. <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="p">{</span> elementMetadata<span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> <span class="kc">NULL</span><span class="p">;</span> x <span class="p">}))</span>
  197. generate.null.ranges <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>y<span class="p">)</span> <span class="p">{</span>
  198. x <span class="o">&lt;-</span> GRanges<span class="p">(</span>seqnames<span class="o">=</span><span class="kp">names</span><span class="p">(</span>seqlengths<span class="p">(</span>y<span class="p">)),</span> ranges<span class="o">=</span>IRanges<span class="p">(</span><span class="m">1</span><span class="p">,</span><span class="m">0</span><span class="p">),</span> strand<span class="o">=</span><span class="s">&quot;*&quot;</span><span class="p">,</span> seqlengths<span class="o">=</span>seqlengths<span class="p">(</span>y<span class="p">))</span>
  199. <span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> <span class="kp">names</span><span class="p">(</span>elementMetadata<span class="p">(</span>y<span class="p">)))</span> <span class="p">{</span>
  200. elementMetadata<span class="p">(</span>x<span class="p">)[[</span>i<span class="p">]]</span> <span class="o">&lt;-</span> as<span class="p">(</span><span class="kc">NA</span><span class="p">,</span> <span class="kp">class</span><span class="p">(</span><span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span>y<span class="p">)[[</span>i<span class="p">]])))</span>
  201. <span class="p">}</span>
  202. x
  203. <span class="p">}</span>
  204. select.nearest <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">,</span> y<span class="p">)</span> <span class="p">{</span>
  205. y <span class="o">&lt;-</span> <span class="kp">append</span><span class="p">(</span>y<span class="p">,</span> generate.null.ranges<span class="p">(</span>y<span class="p">))</span>
  206. y<span class="p">[</span>nearest<span class="p">(</span>x<span class="p">,</span>y<span class="p">)]</span>
  207. <span class="p">}</span>
  208. annotate.by.granges <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>peaks<span class="p">,</span> gr<span class="p">,</span> annot.columns<span class="p">)</span> <span class="p">{</span>
  209. <span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> <span class="kp">names</span><span class="p">(</span>elementMetadata<span class="p">(</span>gr<span class="p">)))</span> <span class="p">{</span>
  210. elementMetadata<span class="p">(</span>gr<span class="p">)[[</span>i<span class="p">]]</span> <span class="o">&lt;-</span> <span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span>gr<span class="p">)[[</span>i<span class="p">]])</span>
  211. <span class="p">}</span>
  212. nearest.ranges <span class="o">&lt;-</span> select.nearest<span class="p">(</span>peaks<span class="p">,</span> gr<span class="p">)</span>
  213. nearest.distance <span class="o">&lt;-</span> distance<span class="p">(</span>peaks<span class="p">,</span> nearest.ranges<span class="p">,</span> ignore.strand<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  214. in.ranges <span class="o">&lt;-</span> Rle<span class="p">(</span>nearest.distance <span class="o">==</span> <span class="m">0</span><span class="p">)</span>
  215. annot.data <span class="o">&lt;-</span> elementMetadata<span class="p">(</span>nearest.ranges<span class="p">)[</span>annot.columns<span class="p">]</span>
  216. <span class="kr">if</span> <span class="p">(</span><span class="o">!</span><span class="kp">is.null</span><span class="p">(</span><span class="kp">names</span><span class="p">(</span>annot.columns<span class="p">)))</span> <span class="p">{</span>
  217. <span class="kp">names</span><span class="p">(</span>annot.data<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">names</span><span class="p">(</span>annot.columns<span class="p">)</span>
  218. <span class="p">}</span>
  219. DataFrame<span class="p">(</span>overlap<span class="o">=</span>in.ranges<span class="p">,</span> distance<span class="o">=</span>nearest.distance<span class="p">,</span> annot.data<span class="p">)</span>
  220. <span class="p">}</span>
  221. forbidden.seqs <span class="o">&lt;-</span> <span class="kp">unique</span><span class="p">(</span><span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span>read.ranges<span class="p">)</span><span class="o">$</span><span class="kp">seq</span><span class="p">[</span>read.ranges <span class="o">%in%</span> forbidden.ranges<span class="p">]))</span>
  222. forbidden.indices <span class="o">&lt;-</span> <span class="o">!</span><span class="kp">is.na</span><span class="p">(</span>findOverlaps<span class="p">(</span>read.ranges<span class="p">,</span> forbidden.ranges<span class="p">,</span> select<span class="o">=</span><span class="s">&quot;first&quot;</span><span class="p">,</span> ignore.strand<span class="o">=</span><span class="kc">TRUE</span><span class="p">))</span>
  223. forbidden.read.ranges <span class="o">&lt;-</span> read.ranges<span class="p">[</span>forbidden.indices<span class="p">]</span>
  224. read.ranges <span class="o">&lt;-</span> read.ranges<span class="p">[</span><span class="o">!</span>forbidden.indices<span class="p">]</span>
  225. <span class="c1">## Read the counts table</span>
  226. read.counts <span class="o">&lt;-</span> <span class="p">{</span>
  227. x <span class="o">&lt;-</span> read.csv<span class="p">(</span><span class="s">&quot;./data/R21_R82_read_expr_matrix_no_blanks&quot;</span><span class="p">,</span>
  228. stringsAsFactors<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span> sep<span class="o">=</span><span class="s">&quot;\t&quot;</span><span class="p">,</span> header<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> row.names<span class="o">=</span><span class="m">1</span><span class="p">)</span>
  229. <span class="kp">row.names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> str_trim<span class="p">(</span><span class="kp">row.names</span><span class="p">(</span>x<span class="p">))</span>
  230. <span class="kp">names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> str_replace<span class="p">(</span><span class="kp">names</span><span class="p">(</span>x<span class="p">),</span> <span class="s">&quot;_collapsed_sorted_with_zeros$&quot;</span><span class="p">,</span> <span class="s">&quot;&quot;</span><span class="p">)</span>
  231. x <span class="o">&lt;-</span> x<span class="p">[</span><span class="kp">row.names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">%in%</span> elementMetadata<span class="p">(</span>read.ranges<span class="p">)</span><span class="o">$</span><span class="kp">seq</span><span class="p">,]</span>
  232. x <span class="o">&lt;-</span> x<span class="p">[</span><span class="kp">order</span><span class="p">(</span><span class="kp">names</span><span class="p">(</span>x<span class="p">))]</span>
  233. x
  234. <span class="p">}</span>
  235. <span class="c1">## Create per-sample bed files with score = count / multimap</span>
  236. sample.read.ranges <span class="o">&lt;-</span>
  237. llply<span class="p">(</span><span class="kp">names</span><span class="p">(</span>read.counts<span class="p">),</span>
  238. <span class="kr">function</span> <span class="p">(</span><span class="kp">sample</span><span class="p">)</span> <span class="p">{</span>
  239. x <span class="o">&lt;-</span> read.ranges
  240. <span class="c1">## Score = sample count / multimap</span>
  241. elementMetadata<span class="p">(</span>x<span class="p">)</span><span class="o">$</span>score <span class="o">&lt;-</span>
  242. <span class="kp">as.vector</span><span class="p">(</span>read.counts<span class="p">[</span><span class="kp">as.vector</span><span class="p">(</span>elementMetadata<span class="p">(</span>x<span class="p">)</span><span class="o">$</span><span class="kp">seq</span><span class="p">),</span> <span class="kp">sample</span><span class="p">]</span> <span class="o">/</span> elementMetadata<span class="p">(</span>x<span class="p">)</span><span class="o">$</span>multimap<span class="p">)</span>
  243. <span class="c1">## Eliminate reads with zero score</span>
  244. x <span class="o">&lt;-</span> x<span class="p">[</span>elementMetadata<span class="p">(</span>x<span class="p">)</span><span class="o">$</span>score <span class="o">&gt;</span> <span class="m">0</span><span class="p">]</span>
  245. x
  246. <span class="p">},</span> <span class="m">.</span>parallel<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  247. <span class="kp">names</span><span class="p">(</span>sample.read.ranges<span class="p">)</span> <span class="o">&lt;-</span> <span class="kp">names</span><span class="p">(</span>read.counts<span class="p">)</span>
  248. <span class="c1">## Run blockbuster on each sample</span>
  249. <span class="c1">## x &lt;- sample.read.ranges[[1]][1:500]</span>
  250. <span class="c1">## y &lt;- run.blockbuster(x)</span>
  251. <span class="c1">## z &lt;- calculate.block.entropy(y$tags)</span>
  252. blockbuster.results <span class="o">&lt;-</span> llply<span class="p">(</span>sample.read.ranges<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> run.blockbuster<span class="p">(</span>unstranded<span class="p">(</span>x<span class="p">),</span> minBlockHeight<span class="o">=</span><span class="m">5</span><span class="p">),</span> <span class="m">.</span>parallel<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  253. <span class="c1">## calculate.block.entropy(blockbuster.results[[1]]$tags[1:5])</span>
  254. <span class="c1">## Calculate entropy of blocks</span>
  255. block.entropy <span class="o">&lt;-</span> llply<span class="p">(</span>blockbuster.results<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> calculate.block.entropy<span class="p">(</span>x<span class="o">$</span>tags<span class="p">),</span> <span class="m">.</span>parallel<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  256. <span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> <span class="kp">names</span><span class="p">(</span>blockbuster.results<span class="p">))</span> <span class="p">{</span>
  257. elementMetadata<span class="p">(</span>blockbuster.results<span class="p">[[</span>i<span class="p">]]</span><span class="o">$</span>blocks<span class="p">)</span><span class="o">$</span>entropy <span class="o">&lt;-</span> block.entropy<span class="p">[[</span>i<span class="p">]][</span><span class="kp">as.character</span><span class="p">(</span>elementMetadata<span class="p">(</span>blockbuster.results<span class="p">[[</span>i<span class="p">]]</span><span class="o">$</span>blocks<span class="p">)</span><span class="o">$</span>blockID<span class="p">)]</span>
  258. <span class="p">}</span>
  259. <span class="c1">## Plot entropy vs block length</span>
  260. x <span class="o">&lt;-</span> blockbuster.results<span class="p">[[</span><span class="m">1</span><span class="p">]]</span><span class="o">$</span>blocks
  261. y <span class="o">&lt;-</span> <span class="kp">cbind</span><span class="p">(</span><span class="kp">as.data.frame</span><span class="p">(</span>elementMetadata<span class="p">(</span>x<span class="p">)),</span> width<span class="o">=</span>width<span class="p">(</span>x<span class="p">))</span>
  262. ggplot<span class="p">(</span>y<span class="p">,</span> aes<span class="p">(</span>x<span class="o">=</span>width<span class="p">,</span> y<span class="o">=</span>entropy<span class="p">,</span> color<span class="o">=</span><span class="m">..</span>density..<span class="p">))</span> <span class="o">+</span> stat_density2d<span class="p">(</span>geom<span class="o">=</span><span class="s">&quot;tile&quot;</span><span class="p">,</span> aes<span class="p">(</span>fill<span class="o">=</span><span class="m">..</span>density..<span class="p">),</span> contour<span class="o">=</span><span class="kc">FALSE</span><span class="p">)</span> <span class="o">+</span> scale_fill_gradient<span class="p">(</span>low<span class="o">=</span><span class="s">&quot;blue&quot;</span><span class="p">,</span> high<span class="o">=</span><span class="s">&quot;yellow&quot;</span><span class="p">)</span> <span class="o">+</span> scale_color_gradient<span class="p">(</span>low<span class="o">=</span><span class="s">&quot;blue&quot;</span><span class="p">,</span> high<span class="o">=</span><span class="s">&quot;yellow&quot;</span><span class="p">)</span>
  263. <span class="c1">## Compute nearest miRNA/snoRNA to each block and add annotation</span>
  264. block.annot <span class="o">&lt;-</span> llply<span class="p">(</span>blockbuster.results<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>br<span class="p">)</span> <span class="p">{</span>
  265. annotate.by.granges<span class="p">(</span>br<span class="o">$</span>blocks<span class="p">,</span> small.rna.ranges<span class="p">,</span> <span class="kt">c</span><span class="p">(</span>nearest.ncRNA<span class="o">=</span><span class="s">&quot;name&quot;</span><span class="p">,</span> ncRNA.type<span class="o">=</span><span class="s">&quot;type&quot;</span><span class="p">))</span>
  266. <span class="p">},</span> <span class="m">.</span>parallel<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  267. <span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> <span class="kp">names</span><span class="p">(</span>blockbuster.results<span class="p">))</span> <span class="p">{</span>
  268. elementMetadata<span class="p">(</span>blockbuster.results<span class="p">[[</span>i<span class="p">]]</span><span class="o">$</span>blocks<span class="p">)[</span><span class="kp">names</span><span class="p">(</span>block.annot<span class="p">[[</span>i<span class="p">]])]</span> <span class="o">&lt;-</span> block.annot<span class="p">[[</span>i<span class="p">]]</span>
  269. <span class="p">}</span>
  270. <span class="c1">## write output</span>
  271. <span class="kp">saveRDS</span><span class="p">(</span>blockbuster.results<span class="p">,</span> <span class="s">&quot;blockbuster_results.RDS&quot;</span><span class="p">)</span>
  272. block.tables <span class="o">&lt;-</span> llply<span class="p">(</span>blockbuster.results<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>br<span class="p">)</span> as<span class="p">(</span>granges.to.dataframe<span class="p">(</span>br<span class="o">$</span>blocks<span class="p">,</span> ignore.strand<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> include.width<span class="o">=</span><span class="kc">TRUE</span><span class="p">),</span> <span class="s">&quot;data.frame&quot;</span><span class="p">))</span>
  273. write.xlsx.multisheet<span class="p">(</span>block.tables<span class="p">,</span> <span class="s">&quot;blockbuster_results.xlsx&quot;</span><span class="p">,</span> row.names<span class="o">=</span><span class="kc">FALSE</span><span class="p">)</span>
  274. </pre></div>
  275. </body>
  276. </html>