edger-pipeline.R.html 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  2. "http://www.w3.org/TR/html4/strict.dtd">
  3. <html>
  4. <head>
  5. <title></title>
  6. <meta http-equiv="content-type" content="text/html; charset=utf-8">
  7. <style type="text/css">
  8. td.linenos { background-color: #f0f0f0; padding-right: 10px; }
  9. span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
  10. pre { line-height: 125%; }
  11. body .hll { background-color: #ffffcc }
  12. body { background: #f8f8f8; }
  13. body .c { color: #408080; font-style: italic } /* Comment */
  14. body .err { border: 1px solid #FF0000 } /* Error */
  15. body .k { color: #008000; font-weight: bold } /* Keyword */
  16. body .o { color: #666666 } /* Operator */
  17. body .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
  18. body .cm { color: #408080; font-style: italic } /* Comment.Multiline */
  19. body .cp { color: #BC7A00 } /* Comment.Preproc */
  20. body .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
  21. body .c1 { color: #408080; font-style: italic } /* Comment.Single */
  22. body .cs { color: #408080; font-style: italic } /* Comment.Special */
  23. body .gd { color: #A00000 } /* Generic.Deleted */
  24. body .ge { font-style: italic } /* Generic.Emph */
  25. body .gr { color: #FF0000 } /* Generic.Error */
  26. body .gh { color: #000080; font-weight: bold } /* Generic.Heading */
  27. body .gi { color: #00A000 } /* Generic.Inserted */
  28. body .go { color: #888888 } /* Generic.Output */
  29. body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
  30. body .gs { font-weight: bold } /* Generic.Strong */
  31. body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
  32. body .gt { color: #0044DD } /* Generic.Traceback */
  33. body .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
  34. body .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
  35. body .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
  36. body .kp { color: #008000 } /* Keyword.Pseudo */
  37. body .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
  38. body .kt { color: #B00040 } /* Keyword.Type */
  39. body .m { color: #666666 } /* Literal.Number */
  40. body .s { color: #BA2121 } /* Literal.String */
  41. body .na { color: #7D9029 } /* Name.Attribute */
  42. body .nb { color: #008000 } /* Name.Builtin */
  43. body .nc { color: #0000FF; font-weight: bold } /* Name.Class */
  44. body .no { color: #880000 } /* Name.Constant */
  45. body .nd { color: #AA22FF } /* Name.Decorator */
  46. body .ni { color: #999999; font-weight: bold } /* Name.Entity */
  47. body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
  48. body .nf { color: #0000FF } /* Name.Function */
  49. body .nl { color: #A0A000 } /* Name.Label */
  50. body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
  51. body .nt { color: #008000; font-weight: bold } /* Name.Tag */
  52. body .nv { color: #19177C } /* Name.Variable */
  53. body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
  54. body .w { color: #bbbbbb } /* Text.Whitespace */
  55. body .mb { color: #666666 } /* Literal.Number.Bin */
  56. body .mf { color: #666666 } /* Literal.Number.Float */
  57. body .mh { color: #666666 } /* Literal.Number.Hex */
  58. body .mi { color: #666666 } /* Literal.Number.Integer */
  59. body .mo { color: #666666 } /* Literal.Number.Oct */
  60. body .sa { color: #BA2121 } /* Literal.String.Affix */
  61. body .sb { color: #BA2121 } /* Literal.String.Backtick */
  62. body .sc { color: #BA2121 } /* Literal.String.Char */
  63. body .dl { color: #BA2121 } /* Literal.String.Delimiter */
  64. body .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
  65. body .s2 { color: #BA2121 } /* Literal.String.Double */
  66. body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
  67. body .sh { color: #BA2121 } /* Literal.String.Heredoc */
  68. body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
  69. body .sx { color: #008000 } /* Literal.String.Other */
  70. body .sr { color: #BB6688 } /* Literal.String.Regex */
  71. body .s1 { color: #BA2121 } /* Literal.String.Single */
  72. body .ss { color: #19177C } /* Literal.String.Symbol */
  73. body .bp { color: #008000 } /* Name.Builtin.Pseudo */
  74. body .fm { color: #0000FF } /* Name.Function.Magic */
  75. body .vc { color: #19177C } /* Name.Variable.Class */
  76. body .vg { color: #19177C } /* Name.Variable.Global */
  77. body .vi { color: #19177C } /* Name.Variable.Instance */
  78. body .vm { color: #19177C } /* Name.Variable.Magic */
  79. body .il { color: #666666 } /* Literal.Number.Integer.Long */
  80. </style>
  81. </head>
  82. <body>
  83. <h2></h2>
  84. <div class="highlight"><pre><span></span><span class="c1">#!/usr/bin/Rscript</span>
  85. <span class="kn">source</span><span class="p">(</span><span class="s">&quot;common.R&quot;</span><span class="p">)</span>
  86. <span class="kn">library</span><span class="p">(</span>rtracklayer<span class="p">)</span>
  87. <span class="kn">library</span><span class="p">(</span>DiffBind<span class="p">)</span>
  88. <span class="kn">library</span><span class="p">(</span>plyr<span class="p">)</span>
  89. <span class="kn">library</span><span class="p">(</span>doMC<span class="p">)</span>
  90. registerDoMC<span class="p">()</span>
  91. <span class="kp">options</span><span class="p">(</span>cores<span class="o">=</span>multicore<span class="o">:::</span>detectCores<span class="p">())</span>
  92. <span class="kn">library</span><span class="p">(</span>snow<span class="p">)</span>
  93. <span class="kn">library</span><span class="p">(</span>stringr<span class="p">)</span>
  94. <span class="kn">library</span><span class="p">(</span>RColorBrewer<span class="p">)</span>
  95. <span class="kn">library</span><span class="p">(</span>xlsx<span class="p">)</span>
  96. <span class="kn">library</span><span class="p">(</span>edgeR<span class="p">)</span>
  97. tsmsg <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span><span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  98. <span class="kp">message</span><span class="p">(</span><span class="kp">date</span><span class="p">(),</span> <span class="s">&quot;: &quot;</span><span class="p">,</span> <span class="kc">...</span><span class="p">)</span>
  99. <span class="p">}</span>
  100. tsmsgf <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span><span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  101. tsmsg<span class="p">(</span><span class="kp">sprintf</span><span class="p">(</span><span class="kc">...</span><span class="p">))</span>
  102. <span class="p">}</span>
  103. <span class="c1">## Additional args are passed to every call of addDataFrame</span>
  104. write.xlsx.multisheet <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>data.frames<span class="p">,</span> <span class="kp">file</span><span class="p">,</span> sheetNames<span class="o">=</span><span class="kp">names</span><span class="p">(</span>data.frames<span class="p">),</span> <span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  105. <span class="kr">if</span> <span class="p">(</span><span class="kp">is.null</span><span class="p">(</span>sheetNames<span class="p">))</span> <span class="p">{</span>
  106. sheetNames <span class="o">&lt;-</span> str_c<span class="p">(</span><span class="s">&quot;Sheet&quot;</span><span class="p">,</span> <span class="kp">seq_along</span><span class="p">(</span>data.frames<span class="p">))</span>
  107. <span class="p">}</span>
  108. <span class="c1">## Ensure correct number of sheetNames</span>
  109. <span class="kp">stopifnot</span><span class="p">(</span><span class="kp">length</span><span class="p">(</span>sheetNames<span class="p">)</span> <span class="o">==</span> <span class="kp">length</span><span class="p">(</span>data.frames<span class="p">))</span>
  110. <span class="c1">## Fill in missing names if needed</span>
  111. sheetNames<span class="p">[</span><span class="kp">is.na</span><span class="p">(</span>sheetNames<span class="p">)]</span> <span class="o">&lt;-</span> str_c<span class="p">(</span><span class="s">&quot;Sheet&quot;</span><span class="p">,</span> <span class="kp">seq_along</span><span class="p">(</span>data.frames<span class="p">))[</span><span class="kp">is.na</span><span class="p">(</span>sheetNames<span class="p">)]</span>
  112. wb <span class="o">&lt;-</span> createWorkbook<span class="p">()</span>
  113. sheets <span class="o">&lt;-</span> llply<span class="p">(</span>sheetNames<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> createSheet<span class="p">(</span>wb<span class="p">,</span> sheetName<span class="o">=</span>x<span class="p">))</span>
  114. mlply<span class="p">(</span><span class="kp">cbind</span><span class="p">(</span>sheet<span class="o">=</span>sheets<span class="p">,</span> x<span class="o">=</span>data.frames<span class="p">),</span> <span class="m">.</span>fun<span class="o">=</span>addDataFrame<span class="p">,</span> <span class="m">.</span>parallel<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span> <span class="kc">...</span><span class="p">)</span>
  115. saveWorkbook<span class="p">(</span>wb<span class="p">,</span> <span class="kp">file</span><span class="p">)</span>
  116. <span class="p">}</span>
  117. renice.self <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>niceness<span class="o">=</span><span class="m">19</span><span class="p">)</span> <span class="p">{</span>
  118. <span class="kp">system2</span><span class="p">(</span><span class="s">&quot;renice&quot;</span><span class="p">,</span> args<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;-n&quot;</span><span class="p">,</span> <span class="kp">as.character</span><span class="p">(</span>niceness<span class="p">),</span> <span class="kp">Sys.getpid</span><span class="p">()))</span>
  119. <span class="p">}</span>
  120. makeNiceCluster <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span><span class="kc">...</span><span class="p">)</span> <span class="p">{</span>
  121. cl <span class="o">&lt;-</span> makeCluster<span class="p">(</span><span class="kc">...</span><span class="p">)</span>
  122. clusterCall<span class="p">(</span>cl<span class="p">,</span> renice.self<span class="p">)</span>
  123. cl
  124. <span class="p">}</span>
  125. select.nearest <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">,</span> y<span class="p">)</span> <span class="p">{</span>
  126. y<span class="p">[</span>nearest<span class="p">(</span>x<span class="p">,</span>y<span class="p">)]</span>
  127. <span class="p">}</span>
  128. kgxref <span class="o">&lt;-</span> get.ucsc.table<span class="p">(</span><span class="s">&quot;knownGene&quot;</span><span class="p">,</span><span class="s">&quot;kgXref&quot;</span><span class="p">)</span>
  129. get.kgxref <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>kgID<span class="p">)</span> <span class="p">{</span>
  130. x <span class="o">&lt;-</span> <span class="kp">merge</span><span class="p">(</span>x<span class="o">=</span>DataFrame<span class="p">(</span>kgID<span class="o">=</span>kgID<span class="p">),</span> y<span class="o">=</span>kgxref<span class="p">,</span>
  131. all.x<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> all.y<span class="o">=</span><span class="kc">FALSE</span><span class="p">)</span>
  132. x<span class="p">[</span><span class="kp">names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">!=</span> <span class="s">&quot;kgID&quot;</span><span class="p">]</span>
  133. <span class="p">}</span>
  134. read.htseq.counts <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>f<span class="p">)</span> <span class="p">{</span>
  135. x <span class="o">&lt;-</span> read.table<span class="p">(</span>f<span class="p">,</span> header<span class="o">=</span><span class="kc">FALSE</span><span class="p">,</span> sep<span class="o">=</span><span class="s">&quot;\t&quot;</span><span class="p">)</span>
  136. <span class="kp">names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="s">&quot;ID&quot;</span><span class="p">,</span> <span class="s">&quot;count&quot;</span><span class="p">)</span>
  137. <span class="c1">## Discard the last 5 lines</span>
  138. exception.rows <span class="o">&lt;-</span> <span class="p">(</span><span class="kp">nrow</span><span class="p">(</span>x<span class="p">)</span><span class="m">-4</span><span class="p">)</span><span class="o">:</span><span class="kp">nrow</span><span class="p">(</span>x<span class="p">)</span>
  139. <span class="kp">attr</span><span class="p">(</span>x<span class="p">,</span> <span class="s">&quot;exception_counts&quot;</span><span class="p">)</span> <span class="o">&lt;-</span> x<span class="p">[</span>exception.rows<span class="p">,]</span>
  140. x <span class="o">&lt;-</span> x<span class="p">[</span><span class="o">-</span>exception.rows<span class="p">,]</span>
  141. x
  142. <span class="p">}</span>
  143. get.transcript.lengths <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>transcript.ids<span class="p">,</span> exon.lengths<span class="p">)</span> <span class="p">{</span>
  144. exons.by.transcript <span class="o">&lt;-</span> <span class="kp">split</span><span class="p">(</span>exon.lengths<span class="p">,</span> transcript.ids<span class="p">)</span>
  145. <span class="kp">sapply</span><span class="p">(</span>exons.by.transcript<span class="p">,</span> <span class="kp">sum</span><span class="p">)</span>
  146. <span class="p">}</span>
  147. get.gene.lengths <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>gene.ids<span class="p">,</span> transcript.lengths<span class="p">,</span> method<span class="o">=</span><span class="s">&quot;max&quot;</span><span class="p">)</span> <span class="p">{</span>
  148. <span class="kr">if</span> <span class="p">(</span><span class="kp">is.character</span><span class="p">(</span>method<span class="p">))</span> <span class="p">{</span>
  149. method <span class="o">&lt;-</span> <span class="kp">get</span><span class="p">(</span>method<span class="p">)</span>
  150. <span class="p">}</span>
  151. transcripts.by.genes <span class="o">&lt;-</span> <span class="kp">split</span><span class="p">(</span>transcript.lengths<span class="p">,</span> gene.ids<span class="p">)</span>
  152. <span class="kp">sapply</span><span class="p">(</span>transcripts.by.genes<span class="p">,</span> method<span class="p">)</span>
  153. <span class="p">}</span>
  154. str_matches <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>string<span class="p">,</span> pattern<span class="p">)</span> <span class="p">{</span>
  155. <span class="o">!</span><span class="kp">is.na</span><span class="p">(</span>str_match<span class="p">(</span>string<span class="p">,</span> pattern<span class="p">)[,</span><span class="m">1</span><span class="p">])</span>
  156. <span class="p">}</span>
  157. annotate.ensp.in.hsap.or.mmul <span class="o">&lt;-</span> <span class="kr">function</span><span class="p">(</span>ensp.ids<span class="p">)</span> <span class="p">{</span>
  158. ensembl<span class="o">=</span>useMart<span class="p">(</span><span class="s">&quot;ensembl&quot;</span><span class="p">)</span>
  159. datasets <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="s">&quot;mmulatta_gene_ensembl&quot;</span><span class="p">,</span> <span class="s">&quot;hsapiens_gene_ensembl&quot;</span><span class="p">)</span>
  160. x <span class="o">&lt;-</span> <span class="kp">Reduce</span><span class="p">(</span><span class="kp">rbind</span><span class="p">,</span> llply<span class="p">(</span>datasets<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="p">{</span>
  161. ensembl <span class="o">&lt;-</span> useDataset<span class="p">(</span>x<span class="p">,</span> mart<span class="o">=</span>ensembl<span class="p">)</span>
  162. getBM<span class="p">(</span>filters<span class="o">=</span><span class="s">&quot;ensembl_peptide_id&quot;</span><span class="p">,</span>
  163. values<span class="o">=</span><span class="kp">unique</span><span class="p">(</span>ensp.ids<span class="p">),</span>
  164. attributes<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;hgnc_symbol&quot;</span><span class="p">,</span> <span class="s">&quot;wikigene_name&quot;</span><span class="p">,</span> <span class="s">&quot;ensembl_gene_id&quot;</span><span class="p">,</span> <span class="s">&quot;ensembl_transcript_id&quot;</span><span class="p">,</span> <span class="s">&quot;ensembl_peptide_id&quot;</span><span class="p">,</span> <span class="s">&quot;description&quot;</span><span class="p">,</span> <span class="s">&quot;wikigene_description&quot;</span><span class="p">),</span>
  165. mart<span class="o">=</span>ensembl<span class="p">,</span>
  166. uniqueRows<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  167. <span class="p">}))</span>
  168. <span class="c1">## Convert empty string to NA in all columns</span>
  169. x <span class="o">&lt;-</span> <span class="kt">data.frame</span><span class="p">(</span>llply<span class="p">(</span>x<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>column<span class="p">)</span> <span class="kp">ifelse</span><span class="p">(</span>column <span class="o">==</span> <span class="s">&quot;&quot;</span><span class="p">,</span> <span class="kc">NA</span><span class="p">,</span> column<span class="p">)))</span>
  170. <span class="c1">## Unify symbols &amp; descriptions</span>
  171. unified.symbol <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span><span class="kp">is.na</span><span class="p">(</span>x<span class="o">$</span>hgnc_symbol<span class="p">),</span> <span class="kp">as.character</span><span class="p">(</span>x<span class="o">$</span>wikigene_name<span class="p">),</span> <span class="kp">as.character</span><span class="p">(</span>x<span class="o">$</span>hgnc_symbol<span class="p">))</span>
  172. unified.desc <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span><span class="kp">is.na</span><span class="p">(</span>x<span class="o">$</span>description<span class="p">),</span> <span class="kp">as.character</span><span class="p">(</span>x<span class="o">$</span>wikigene_description<span class="p">),</span> <span class="kp">as.character</span><span class="p">(</span>x<span class="o">$</span>description<span class="p">))</span>
  173. x <span class="o">&lt;-</span> <span class="kt">data.frame</span><span class="p">(</span>symbol<span class="o">=</span>unified.symbol<span class="p">,</span> x<span class="p">[</span><span class="o">!</span> <span class="kp">names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">%in%</span> <span class="kt">c</span><span class="p">(</span><span class="s">&quot;hgnc_symbol&quot;</span><span class="p">,</span> <span class="s">&quot;wikigene_name&quot;</span><span class="p">,</span> <span class="s">&quot;description&quot;</span><span class="p">,</span> <span class="s">&quot;wikigene_description&quot;</span><span class="p">)],</span> description<span class="o">=</span>unified.desc<span class="p">)</span>
  174. <span class="c1">## Reorder rows so that more-preferred rows for the same input are on top</span>
  175. x <span class="o">&lt;-</span> x<span class="p">[</span><span class="kp">order</span><span class="p">(</span><span class="kp">is.na</span><span class="p">(</span>x<span class="o">$</span>symbol<span class="p">),</span> str_matches<span class="p">(</span>x<span class="o">$</span>symbol<span class="p">,</span> <span class="s">&quot;^LOC\\d+$&quot;</span><span class="p">),</span> <span class="kp">is.na</span><span class="p">(</span>x<span class="o">$</span>description<span class="p">)),]</span>
  176. <span class="c1">## Keep only the first row for each input</span>
  177. x <span class="o">&lt;-</span> x<span class="p">[</span><span class="o">!</span><span class="kp">duplicated</span><span class="p">(</span>x<span class="o">$</span>ensembl_peptide_id<span class="p">),]</span>
  178. <span class="kt">data.frame</span><span class="p">(</span>x<span class="p">,</span> row.names<span class="o">=</span>x<span class="o">$</span>ensembl_peptide_id<span class="p">)</span>
  179. <span class="p">}</span>
  180. <span class="c1">## TODO: Replace by biomart</span>
  181. CE.name.annot <span class="o">&lt;-</span> <span class="p">{</span>
  182. x <span class="o">&lt;-</span> setNames<span class="p">(</span>nm<span class="o">=</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;CE_ensembl_peptide_id&quot;</span><span class="p">,</span> <span class="s">&quot;symbol&quot;</span><span class="p">,</span> <span class="s">&quot;description&quot;</span><span class="p">),</span>
  183. read.table<span class="p">(</span><span class="s">&quot;annotation/CE.name&quot;</span><span class="p">,</span> sep<span class="o">=</span><span class="s">&quot;\t&quot;</span><span class="p">,</span> quote<span class="o">=</span><span class="s">&quot;&quot;</span><span class="p">))</span>
  184. x<span class="o">$</span>ensembl_peptide_id <span class="o">&lt;-</span> str_replace<span class="p">(</span>x<span class="o">$</span>CE_ensembl_peptide_id<span class="p">,</span> <span class="s">&quot;^CE_&quot;</span><span class="p">,</span> <span class="s">&quot;&quot;</span><span class="p">)</span>
  185. <span class="kp">row.names</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> x<span class="o">$</span>ensembl_peptide_id
  186. x
  187. <span class="p">}</span>
  188. annotation.gff <span class="o">&lt;-</span> import<span class="p">(</span><span class="s">&quot;cuffmerge_results/merged.gff&quot;</span><span class="p">)</span>
  189. transcripts <span class="o">&lt;-</span> annotation.gff<span class="p">[</span>annotation.gff<span class="o">$</span>type <span class="o">==</span> <span class="s">&quot;transcript&quot;</span><span class="p">,</span> <span class="p">]</span>
  190. exons <span class="o">&lt;-</span> annotation.gff<span class="p">[</span>annotation.gff<span class="o">$</span>type <span class="o">==</span> <span class="s">&quot;exon&quot;</span><span class="p">,</span> <span class="p">]</span>
  191. transcript.lengths <span class="o">&lt;-</span> get.transcript.lengths<span class="p">(</span><span class="kp">unlist</span><span class="p">(</span>exons<span class="o">$</span>Parent<span class="p">),</span> width<span class="p">(</span>exons<span class="p">))</span>
  192. gene.lengths <span class="o">&lt;-</span> get.gene.lengths<span class="p">(</span>transcripts<span class="o">$</span>geneID<span class="p">,</span> transcript.lengths<span class="p">[</span>transcripts<span class="o">$</span>ID<span class="p">])</span>
  193. gene.ref.lists <span class="o">&lt;-</span> <span class="kp">sapply</span><span class="p">(</span><span class="kp">split</span><span class="p">(</span>transcripts<span class="o">$</span>nearest_ref<span class="p">,</span> transcripts<span class="o">$</span>geneID<span class="p">),</span>
  194. <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="kp">unique</span><span class="p">(</span>x<span class="p">[</span><span class="o">!</span><span class="kp">is.na</span><span class="p">(</span>x<span class="p">)]))</span>
  195. <span class="c1">## For genes with multiple ref IDs, join them with commas</span>
  196. gene.refs <span class="o">&lt;-</span> <span class="kp">sapply</span><span class="p">(</span>gene.ref.lists<span class="p">,</span> <span class="kr">function</span> <span class="p">(</span>x<span class="p">)</span> <span class="p">{</span>
  197. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>x<span class="p">)</span> <span class="o">==</span> <span class="m">0</span><span class="p">)</span> <span class="p">{</span>
  198. <span class="kc">NA</span>
  199. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  200. str_c<span class="p">(</span>x<span class="p">,</span> collapse<span class="o">=</span><span class="s">&quot;,&quot;</span><span class="p">)</span>
  201. <span class="p">}</span>
  202. <span class="p">})</span>
  203. <span class="c1">## For genes with multiple ref IDs, just pick the first one</span>
  204. gene.first.refs <span class="o">&lt;-</span> <span class="kp">sapply</span><span class="p">(</span>gene.ref.lists<span class="p">,</span> <span class="kr">function</span><span class="p">(</span>x<span class="p">)</span> <span class="p">{</span>
  205. <span class="kr">if</span> <span class="p">(</span><span class="kp">length</span><span class="p">(</span>x<span class="p">)</span> <span class="o">==</span> <span class="m">0</span><span class="p">)</span> <span class="p">{</span>
  206. <span class="kc">NA</span>
  207. <span class="p">}</span> <span class="kr">else</span> <span class="p">{</span>
  208. x<span class="p">[[</span><span class="m">1</span><span class="p">]]</span>
  209. <span class="p">}</span>
  210. <span class="p">})</span>
  211. <span class="c1">## Use delayed assignment so that the cluster won&#39;t be created until it is actually needed</span>
  212. <span class="kp">delayedAssign</span><span class="p">(</span><span class="s">&quot;cl&quot;</span><span class="p">,</span> makeNiceCluster<span class="p">(</span><span class="kp">rep</span><span class="p">(</span><span class="kt">c</span><span class="p">(</span><span class="s">&quot;salomon14&quot;</span><span class="p">,</span> <span class="s">&quot;salomon18&quot;</span><span class="p">,</span> <span class="s">&quot;salomon19&quot;</span><span class="p">),</span> <span class="m">8</span><span class="p">)))</span>
  213. mart.raw.annot <span class="o">&lt;-</span> annotate.ensp.in.hsap.or.mmul<span class="p">(</span>na.omit<span class="p">(</span>gene.first.refs<span class="p">))</span>
  214. gene.annot <span class="o">&lt;-</span> <span class="kt">data.frame</span><span class="p">(</span>mart.raw.annot<span class="p">[</span>gene.first.refs<span class="p">,],</span> row.names<span class="o">=</span><span class="kp">names</span><span class="p">(</span>gene.first.refs<span class="p">))</span>
  215. <span class="c1">## Fill in missing values that are available from the CE.name file</span>
  216. <span class="c1">## provided by the cyno genome paper authors</span>
  217. suppl.gene.annot <span class="o">&lt;-</span> <span class="kt">data.frame</span><span class="p">(</span>CE.name.annot<span class="p">[</span>gene.first.refs<span class="p">,],</span> row.names<span class="o">=</span><span class="kp">names</span><span class="p">(</span>gene.first.refs<span class="p">))</span>
  218. suppl.ensp <span class="o">&lt;-</span> <span class="kp">is.na</span><span class="p">(</span>gene.annot<span class="o">$</span>ensembl_peptide_id<span class="p">)</span> <span class="o">&amp;</span> <span class="o">!</span><span class="kp">is.na</span><span class="p">(</span>suppl.gene.annot<span class="o">$</span>ensembl_peptide_id<span class="p">)</span>
  219. suppl.symbols <span class="o">&lt;-</span> <span class="kp">is.na</span><span class="p">(</span>gene.annot<span class="o">$</span>symbol<span class="p">)</span> <span class="o">&amp;</span> <span class="o">!</span><span class="kp">is.na</span><span class="p">(</span>suppl.gene.annot<span class="o">$</span>symbol<span class="p">)</span>
  220. gene.annot<span class="o">$</span>ensembl_peptide_id <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span>suppl.ensp<span class="p">,</span>
  221. <span class="kp">as.character</span><span class="p">(</span>suppl.gene.annot<span class="o">$</span>ensembl_peptide_id<span class="p">),</span>
  222. <span class="kp">as.character</span><span class="p">(</span>gene.annot<span class="o">$</span>ensembl_peptide_id<span class="p">))</span>
  223. gene.annot<span class="o">$</span>symbol <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span>suppl.symbols<span class="p">,</span>
  224. <span class="kp">as.character</span><span class="p">(</span>suppl.gene.annot<span class="o">$</span>symbol<span class="p">),</span>
  225. <span class="kp">as.character</span><span class="p">(</span>gene.annot<span class="o">$</span>symbol<span class="p">))</span>
  226. <span class="c1">## Replace description whenever we replace the symbol to keep things consistent</span>
  227. gene.annot<span class="o">$</span>description <span class="o">&lt;-</span> <span class="kp">ifelse</span><span class="p">(</span>suppl.symbols<span class="p">,</span>
  228. <span class="kp">as.character</span><span class="p">(</span>suppl.gene.annot<span class="o">$</span>description<span class="p">),</span>
  229. <span class="kp">as.character</span><span class="p">(</span>gene.annot<span class="o">$</span>description<span class="p">))</span>
  230. gene.annot<span class="o">$</span>symbol<span class="p">[</span><span class="kp">is.na</span><span class="p">(</span>gene.annot<span class="o">$</span>ensembl_peptide_id<span class="p">)]</span> <span class="o">&lt;-</span> <span class="s">&quot;[No annotation]&quot;</span>
  231. gene.annot<span class="o">$</span>symbol<span class="p">[</span><span class="kp">is.na</span><span class="p">(</span>gene.annot<span class="o">$</span>symbol<span class="p">)]</span> <span class="o">&lt;-</span> <span class="s">&quot;[No symbol for ENSP]&quot;</span>
  232. expdata <span class="o">&lt;-</span> <span class="p">{</span>
  233. x <span class="o">&lt;-</span> read.xlsx<span class="p">(</span><span class="s">&quot;kenyon-cyno-expdata.xls&quot;</span><span class="p">,</span> <span class="m">1</span><span class="p">)</span>
  234. x<span class="o">$</span>Animal.ID <span class="o">&lt;-</span> str_trim<span class="p">(</span>x<span class="o">$</span>Animal.ID<span class="p">)</span>
  235. x<span class="o">$</span>Condition <span class="o">&lt;-</span> str_replace_all<span class="p">(</span>x<span class="o">$</span>Condition<span class="p">,</span> <span class="s">&quot;γ&quot;</span><span class="p">,</span> <span class="s">&quot;g&quot;</span><span class="p">)</span>
  236. x<span class="o">$</span>Sample.ID <span class="o">&lt;-</span>
  237. <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;%s_%s_%s&quot;</span><span class="p">,</span>
  238. str_replace_all<span class="p">(</span>x<span class="o">$</span>Animal.ID<span class="p">,</span> <span class="s">&quot;[ -]&quot;</span><span class="p">,</span> <span class="s">&quot;_&quot;</span><span class="p">),</span>
  239. <span class="kp">ifelse</span><span class="p">(</span>x<span class="o">$</span>Condition <span class="o">==</span> <span class="s">&quot;Control&quot;</span><span class="p">,</span> <span class="s">&quot;CTRL&quot;</span><span class="p">,</span> <span class="s">&quot;IFNg&quot;</span><span class="p">),</span>
  240. x<span class="o">$</span>Passage<span class="p">)</span>
  241. x<span class="o">$</span>Sample.Name <span class="o">&lt;-</span>
  242. <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;%s_%s_%s_L%03i&quot;</span><span class="p">,</span>
  243. str_replace_all<span class="p">(</span>x<span class="o">$</span>Animal.ID<span class="p">,</span> <span class="s">&quot;[ -]&quot;</span><span class="p">,</span> <span class="s">&quot;_&quot;</span><span class="p">),</span>
  244. <span class="kp">ifelse</span><span class="p">(</span>x<span class="o">$</span>Condition <span class="o">==</span> <span class="s">&quot;Control&quot;</span><span class="p">,</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">),</span>
  245. x<span class="o">$</span>Index.Seq<span class="p">,</span>
  246. x<span class="o">$</span>Lane<span class="p">)</span>
  247. x<span class="o">$</span>FASTQ.Read1.File <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;seqprep_results/%s/%s_R1_001.fastq&quot;</span><span class="p">,</span> x<span class="o">$</span>Sample.Name<span class="p">,</span> x<span class="o">$</span>Sample.Name<span class="p">)</span>
  248. x<span class="o">$</span>FASTQ.Read2.File <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;seqprep_results/%s/%s_R2_001.fastq&quot;</span><span class="p">,</span> x<span class="o">$</span>Sample.Name<span class="p">,</span> x<span class="o">$</span>Sample.Name<span class="p">)</span>
  249. x<span class="o">$</span>BAM.File <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;tophat_results/%s/accepted_hits.bam&quot;</span><span class="p">,</span> x<span class="o">$</span>Sample.Name<span class="p">)</span>
  250. x<span class="o">$</span>GFF.File <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;cufflinks_quantification/%s/transcripts.gff&quot;</span><span class="p">,</span> x<span class="o">$</span>Sample.Name<span class="p">)</span>
  251. x<span class="o">$</span>Counts.File <span class="o">&lt;-</span> <span class="kp">sprintf</span><span class="p">(</span><span class="s">&quot;htseq_counts/%s/counts.txt&quot;</span><span class="p">,</span> x<span class="o">$</span>Sample.Name<span class="p">)</span>
  252. mcparallel<span class="p">(</span>write.xlsx<span class="p">(</span>x<span class="p">,</span> <span class="s">&quot;expdata.xlsx&quot;</span><span class="p">))</span>
  253. <span class="kp">rownames</span><span class="p">(</span>x<span class="p">)</span> <span class="o">&lt;-</span> x<span class="o">$</span>Sample.ID
  254. x
  255. <span class="p">}</span>
  256. counts.vectors <span class="o">&lt;-</span> setNames<span class="p">(</span>llply<span class="p">(</span>expdata<span class="o">$</span>Counts.File<span class="p">,</span> <span class="m">.</span>parallel<span class="o">=</span><span class="kc">TRUE</span><span class="p">,</span> <span class="kr">function</span><span class="p">(</span>f<span class="p">)</span> <span class="p">{</span>
  257. x <span class="o">&lt;-</span> read.htseq.counts<span class="p">(</span>f<span class="p">)</span>
  258. setNames<span class="p">(</span>x<span class="o">$</span>count<span class="p">,</span> x<span class="o">$</span>ID<span class="p">)</span>
  259. <span class="p">}),</span> expdata<span class="o">$</span>Sample.ID<span class="p">)</span>
  260. <span class="c1">## Put the data into a matrix, making sure we account for the</span>
  261. <span class="c1">## possibility that not all genes are listed in all samples.</span>
  262. all.geneIDs <span class="o">&lt;-</span> <span class="kp">sort</span><span class="p">(</span><span class="kp">unique</span><span class="p">(</span><span class="kp">unlist</span><span class="p">(</span>llply<span class="p">(</span>counts.vectors<span class="p">,</span> <span class="kp">names</span><span class="p">))))</span>
  263. counts <span class="o">&lt;-</span> <span class="p">{</span>
  264. x <span class="o">&lt;-</span> <span class="kt">matrix</span><span class="p">(</span>data<span class="o">=</span><span class="m">0</span><span class="p">,</span> ncol<span class="o">=</span><span class="kp">length</span><span class="p">(</span>counts.vectors<span class="p">),</span> nrow<span class="o">=</span><span class="kp">length</span><span class="p">(</span>all.geneIDs<span class="p">),</span>
  265. dimnames<span class="o">=</span><span class="kt">list</span><span class="p">(</span><span class="sb">`geneID`</span><span class="o">=</span>all.geneIDs<span class="p">,</span> <span class="sb">`sample`</span><span class="o">=</span><span class="kp">names</span><span class="p">(</span>counts.vectors<span class="p">)))</span>
  266. <span class="kr">for</span> <span class="p">(</span>i <span class="kr">in</span> expdata<span class="o">$</span>Sample.ID<span class="p">)</span> <span class="p">{</span>
  267. cvec <span class="o">&lt;-</span> counts.vectors<span class="p">[[</span>i<span class="p">]]</span>
  268. x<span class="p">[</span><span class="kp">names</span><span class="p">(</span>cvec<span class="p">),</span>i<span class="p">]</span> <span class="o">&lt;-</span> <span class="kp">cbind</span><span class="p">(</span>cvec<span class="p">)</span>
  269. <span class="p">}</span>
  270. x
  271. <span class="p">}</span>
  272. blocked.design <span class="o">&lt;-</span> model.matrix<span class="p">(</span><span class="o">~</span>Animal.ID<span class="o">+</span>Passage<span class="o">+</span>Condition<span class="p">,</span> data<span class="o">=</span>expdata<span class="p">)</span>
  273. unblocked.design <span class="o">&lt;-</span> model.matrix<span class="p">(</span><span class="o">~</span>Condition<span class="p">,</span> data<span class="o">=</span>expdata<span class="p">)</span>
  274. dge <span class="o">&lt;-</span> DGEList<span class="p">(</span>counts<span class="o">=</span>counts<span class="p">,</span>
  275. group<span class="o">=</span>expdata<span class="o">$</span>Condition<span class="p">,</span>
  276. genes<span class="o">=</span>gene.annot<span class="p">[</span><span class="kp">rownames</span><span class="p">(</span>counts<span class="p">),])</span>
  277. dge <span class="o">&lt;-</span> calcNormFactors<span class="p">(</span>dge<span class="p">)</span>
  278. blocked.dge <span class="o">&lt;-</span> estimateGLMCommonDisp<span class="p">(</span>dge<span class="p">,</span> blocked.design<span class="p">,</span> verbose<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  279. blocked.dge <span class="o">&lt;-</span> estimateGLMTrendedDisp<span class="p">(</span>blocked.dge<span class="p">,</span> blocked.design<span class="p">)</span>
  280. blocked.dge <span class="o">&lt;-</span> estimateGLMTagwiseDisp<span class="p">(</span>blocked.dge<span class="p">,</span> blocked.design<span class="p">)</span>
  281. blocked.fit <span class="o">&lt;-</span> glmFit<span class="p">(</span>blocked.dge<span class="p">,</span> blocked.design<span class="p">)</span>
  282. blocked.lrt <span class="o">&lt;-</span> glmLRT<span class="p">(</span>blocked.dge<span class="p">,</span> blocked.fit<span class="p">,</span> coef<span class="o">=</span><span class="s">&quot;ConditionIFNg activated&quot;</span><span class="p">)</span>
  283. blocked.tt <span class="o">&lt;-</span> topTags<span class="p">(</span>blocked.lrt<span class="p">,</span> n<span class="o">=</span><span class="kp">nrow</span><span class="p">(</span>counts<span class="p">))</span>
  284. unblocked.dge <span class="o">&lt;-</span> estimateGLMCommonDisp<span class="p">(</span>dge<span class="p">,</span> unblocked.design<span class="p">,</span> verbose<span class="o">=</span><span class="kc">TRUE</span><span class="p">)</span>
  285. unblocked.dge <span class="o">&lt;-</span> estimateGLMTrendedDisp<span class="p">(</span>unblocked.dge<span class="p">,</span> unblocked.design<span class="p">)</span>
  286. unblocked.dge <span class="o">&lt;-</span> estimateGLMTagwiseDisp<span class="p">(</span>unblocked.dge<span class="p">,</span> unblocked.design<span class="p">)</span>
  287. unblocked.fit <span class="o">&lt;-</span> glmFit<span class="p">(</span>unblocked.dge<span class="p">,</span> unblocked.design<span class="p">)</span>
  288. unblocked.lrt <span class="o">&lt;-</span> glmLRT<span class="p">(</span>unblocked.dge<span class="p">,</span> unblocked.fit<span class="p">,</span> coef<span class="o">=</span><span class="s">&quot;ConditionIFNg activated&quot;</span><span class="p">)</span>
  289. unblocked.tt <span class="o">&lt;-</span> topTags<span class="p">(</span>unblocked.lrt<span class="p">,</span> n<span class="o">=</span><span class="kp">nrow</span><span class="p">(</span>counts<span class="p">))</span>
  290. write.csv<span class="p">(</span>blocked.tt<span class="o">$</span><span class="kp">table</span><span class="p">,</span> <span class="s">&quot;edgeR-genes-prelim&quot;</span><span class="p">)</span>
  291. write.xlsx.multisheet<span class="p">(</span><span class="kt">list</span><span class="p">(</span>blocked<span class="o">=</span>blocked.tt<span class="o">$</span><span class="kp">table</span><span class="p">,</span>
  292. unblocked<span class="o">=</span>unblocked.tt<span class="o">$</span><span class="kp">table</span><span class="p">),</span>
  293. <span class="s">&quot;edgeR-genes.xlsx&quot;</span><span class="p">)</span>
  294. </pre></div>
  295. </body>
  296. </html>