|
@@ -183,7 +183,7 @@ in partial fulfillment of the requirements for the degree of
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Date
|
|
|
-May 2019
|
|
|
+October 2019
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
@@ -549,41 +549,11 @@ Methods
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
-wide false
|
|
|
-sideways true
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
|
|
|
- lyxscale 50
|
|
|
- width 100theight%
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-\begin_inset Caption Standard
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-\begin_inset CommandInset label
|
|
|
-LatexCommand label
|
|
|
-name "fig:rulegraph"
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-
|
|
|
-\series bold
|
|
|
-Dependency graph of steps in reproducible workflow
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-
|
|
|
+Move figures that are only justifying methods into this section
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -734,6 +704,12 @@ Focus on what hypotheses were tested, then select figures that show how
|
|
|
those hypotheses were tested, even if the result is a negative.
|
|
|
\end_layout
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+Not every interesting result needs to be in here.
|
|
|
+ Chapter should tell a story.
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
\end_inset
|
|
|
|
|
|
|
|
@@ -754,23 +730,35 @@ Maybe reorder these sections to do RNA-seq, then ChIP-seq, then combined
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Subsection
|
|
|
-H3K4 and H3K27 methylation occur in broad regions and are enriched near
|
|
|
- promoters
|
|
|
+RNA-seq align+quant method comparison
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
-wide false
|
|
|
-sideways false
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\begin_inset Flex TODO Note (inline)
|
|
|
+Maybe fix up the excessive axis ranges for these plots?
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Float figure
|
|
|
+wide false
|
|
|
+sideways false
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
-\end_layout
|
|
|
+\align center
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
|
|
|
+ lyxscale 25
|
|
|
+ width 100col%
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -781,19 +769,15 @@ Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-
|
|
|
-\series bold
|
|
|
-\begin_inset CommandInset label
|
|
|
-LatexCommand label
|
|
|
-name "fig:IDR-RC-H3K4me2"
|
|
|
+Comparison of STAR quantification between Ensembl and Entrez gene identifiers
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
-Irreproducible Discovery Rate consistency plots for H3K4me2
|
|
|
-\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
+\end_layout
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
|
|
|
\end_layout
|
|
|
|
|
@@ -809,12 +793,12 @@ sideways false
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\begin_inset Flex TODO Note (inline)
|
|
|
-status open
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
-\end_layout
|
|
|
+\align center
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
|
|
|
+ lyxscale 25
|
|
|
+ width 100col%
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -825,15 +809,8 @@ Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-
|
|
|
-\series bold
|
|
|
-\begin_inset CommandInset label
|
|
|
-LatexCommand label
|
|
|
-name "fig:IDR-RC-H3K4me3"
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-Irreproducible Discovery Rate consistency plots for H3K4me3
|
|
|
+Comparison of Salmon+Shoal quantification between Ensembl and Entrez gene
|
|
|
+ identifiers
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -853,12 +830,12 @@ sideways false
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\begin_inset Flex TODO Note (inline)
|
|
|
-status open
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
-\end_layout
|
|
|
+\align center
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
|
|
|
+ lyxscale 25
|
|
|
+ width 100col%
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -869,15 +846,7 @@ Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-
|
|
|
-\series bold
|
|
|
-\begin_inset CommandInset label
|
|
|
-LatexCommand label
|
|
|
-name "fig:IDR-RC-H3K27me3"
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-Irreproducible Discovery Rate consistency plots for H3K27me3
|
|
|
+Comparison of quantification between STAR and HISAT2 for identical annotation
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -891,23 +860,18 @@ Irreproducible Discovery Rate consistency plots for H3K27me3
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
|
-\begin_inset Float table
|
|
|
+\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
|
-\begin_inset Flex TODO Note (inline)
|
|
|
-status open
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-Need
|
|
|
-\emph on
|
|
|
-median
|
|
|
-\emph default
|
|
|
- peak width, not mean
|
|
|
-\end_layout
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
|
|
|
+ lyxscale 25
|
|
|
+ width 100col%
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -915,204 +879,135 @@ median
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Tabular
|
|
|
-<lyxtabular version="3" rows="4" columns="5">
|
|
|
-<features tabularvalignment="middle">
|
|
|
-<column alignment="center" valignment="top">
|
|
|
-<column alignment="center" valignment="top">
|
|
|
-<column alignment="center" valignment="top">
|
|
|
-<column alignment="center" valignment="top">
|
|
|
-<column alignment="center" valignment="top">
|
|
|
-<row>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
+\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Histone Mark
|
|
|
+Comparison of quantification between STAR and Salmon for identical annotation
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-# Peaks
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-Mean peak width
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Float figure
|
|
|
+wide false
|
|
|
+sideways false
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-genome coverage
|
|
|
-\end_layout
|
|
|
+\align center
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
|
|
|
+ lyxscale 25
|
|
|
+ width 100col%
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-read coverage
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-</row>
|
|
|
-<row>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
+\begin_layout Plain Layout
|
|
|
+\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-H3K4me2
|
|
|
+Comparison of quantification between Salmon and Kallisto for identical annotatio
|
|
|
+n
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-14965
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-3970
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Float figure
|
|
|
+wide false
|
|
|
+sideways false
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-1.92%
|
|
|
-\end_layout
|
|
|
+\align center
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
|
|
|
+ lyxscale 25
|
|
|
+ width 100col%
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-14.2%
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-</row>
|
|
|
-<row>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
+\begin_layout Plain Layout
|
|
|
+\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-H3K4me3
|
|
|
+Comparison of quantification between Salmon with and without Shoal for identical
|
|
|
+ annotation
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-6163
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-2946
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-0.588%
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-6.57%
|
|
|
+\begin_layout Itemize
|
|
|
+Ultimately selected shoal as quantification, Ensembl as annotation.
|
|
|
+ Why? Running downstream analyses with all quant methods and both annotations
|
|
|
+ showed very little practical difference, so choice was not terribly important.
|
|
|
+ Prefer shoal due to theoretical advantages.
|
|
|
+ To note in discussion: reproducible workflow made it easy to do this, enabling
|
|
|
+ an informed decision.
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-</row>
|
|
|
-<row>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-H3K27me3
|
|
|
+\begin_layout Subsection
|
|
|
+RNA-seq has a large confounding batch effect
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Float figure
|
|
|
+wide false
|
|
|
+sideways false
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-18139
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-18967
|
|
|
+Just take the top row
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-11.1%
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
|
-\begin_inset Text
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-22.5%
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-</cell>
|
|
|
-</row>
|
|
|
-</lyxtabular>
|
|
|
+\begin_layout Plain Layout
|
|
|
+\align center
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
|
|
|
+ lyxscale 25
|
|
|
+ width 100col%
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1127,11 +1022,11 @@ H3K27me3
|
|
|
\series bold
|
|
|
\begin_inset CommandInset label
|
|
|
LatexCommand label
|
|
|
-name "tab:peak-calling-summary"
|
|
|
+name "fig:RNA-seq-weights-vs-covars"
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
-SICER+IDR peak-calling summary
|
|
|
+RNA-seq sample weights, grouped by experimental and technical covariates
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1139,62 +1034,13 @@ SICER+IDR peak-calling summary
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-
|
|
|
+\begin_layout Plain Layout
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Standard
|
|
|
-Figures
|
|
|
-\begin_inset CommandInset ref
|
|
|
-LatexCommand ref
|
|
|
-reference "fig:IDR-RC-H3K4me2"
|
|
|
-plural "false"
|
|
|
-caps "false"
|
|
|
-noprefix "false"
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-,
|
|
|
-\begin_inset CommandInset ref
|
|
|
-LatexCommand ref
|
|
|
-reference "fig:IDR-RC-H3K4me3"
|
|
|
-plural "false"
|
|
|
-caps "false"
|
|
|
-noprefix "false"
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-, and
|
|
|
-\begin_inset CommandInset ref
|
|
|
-LatexCommand ref
|
|
|
-reference "fig:IDR-RC-H3K27me3"
|
|
|
-plural "false"
|
|
|
-caps "false"
|
|
|
-noprefix "false"
|
|
|
-
|
|
|
\end_inset
|
|
|
|
|
|
- show the IDR rank-consistency plots for peaks called in an arbitrarily-chosen
|
|
|
- pair of donors.
|
|
|
- For all 3 histone marks, when the peaks for each donor are ranked according
|
|
|
- to their scores, SICER produces much more reproducible results between
|
|
|
- donors.
|
|
|
- This is consistent with SICER's stated goal of identifying broad peaks,
|
|
|
- in contrast to MACS, which is designed for identifying sharp peaks.
|
|
|
- Based on this observation, the SICER peak calls were used for all downstream
|
|
|
- analyses that involved ChIP-seq peaks.
|
|
|
- Table
|
|
|
-\begin_inset CommandInset ref
|
|
|
-LatexCommand ref
|
|
|
-reference "tab:peak-calling-summary"
|
|
|
-plural "false"
|
|
|
-caps "false"
|
|
|
-noprefix "false"
|
|
|
-
|
|
|
-\end_inset
|
|
|
|
|
|
- gives a summary of the peak calling statistics for each histone mark.
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
@@ -1206,10 +1052,10 @@ status open
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
|
\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
|
|
|
- lyxscale 50
|
|
|
+ filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
|
|
|
+ lyxscale 25
|
|
|
width 100col%
|
|
|
- groupId colwidth
|
|
|
+ groupId colwidth-raster
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1224,20 +1070,11 @@ status open
|
|
|
\series bold
|
|
|
\begin_inset CommandInset label
|
|
|
LatexCommand label
|
|
|
-name "fig:effective-promoter-radius"
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-Enrichment of peaks in promoter neighborhoods.
|
|
|
-\end_layout
|
|
|
+name "fig:RNA-PCA-no-batchsub"
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
-
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-
|
|
|
+RNA-seq PCoA plot showing clear batch effect
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1245,40 +1082,23 @@ Enrichment of peaks in promoter neighborhoods.
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Itemize
|
|
|
-Each histone mark is enriched within a certain radius of gene TSS positions,
|
|
|
- but that radius is different for each mark (figure
|
|
|
-\begin_inset CommandInset ref
|
|
|
-LatexCommand ref
|
|
|
-reference "fig:effective-promoter-radius"
|
|
|
-plural "false"
|
|
|
-caps "false"
|
|
|
-noprefix "false"
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-, previously in
|
|
|
-\begin_inset CommandInset citation
|
|
|
-LatexCommand cite
|
|
|
-key "LaMere2016"
|
|
|
-literal "false"
|
|
|
-
|
|
|
\end_inset
|
|
|
|
|
|
- Fig.
|
|
|
- S2)
|
|
|
-\end_layout
|
|
|
|
|
|
-\begin_layout Subsection
|
|
|
-RNA-seq align+quant method comparison
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
|
+\begin_inset Float figure
|
|
|
+wide false
|
|
|
+sideways false
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
\begin_inset Flex TODO Note (inline)
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Maybe fix up the excessive axis ranges for these plots?
|
|
|
+Probably don't need this
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1286,16 +1106,10 @@ Maybe fix up the excessive axis ranges for these plots?
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
-wide false
|
|
|
-sideways false
|
|
|
-status collapsed
|
|
|
-
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
|
\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
|
|
|
+ filename graphics/CD4-csaw/RNA-seq/PCA-naive-batchsub-CROP.png
|
|
|
lyxscale 25
|
|
|
width 100col%
|
|
|
groupId colwidth-raster
|
|
@@ -1309,15 +1123,19 @@ status collapsed
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Comparison of STAR quantification between Ensembl and Entrez gene identifiers
|
|
|
-\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
+\series bold
|
|
|
+\begin_inset CommandInset label
|
|
|
+LatexCommand label
|
|
|
+name "fig:RNA-PCA-limma-batchsub"
|
|
|
|
|
|
+\end_inset
|
|
|
|
|
|
+RNA-seq PCoA plot showing clear batch effect
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
+\end_inset
|
|
|
+
|
|
|
|
|
|
\end_layout
|
|
|
|
|
@@ -1330,12 +1148,12 @@ Comparison of STAR quantification between Ensembl and Entrez gene identifiers
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
|
\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
|
|
|
+ filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
|
|
|
lyxscale 25
|
|
|
width 100col%
|
|
|
groupId colwidth-raster
|
|
@@ -1349,8 +1167,15 @@ status collapsed
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Comparison of Salmon+Shoal quantification between Ensembl and Entrez gene
|
|
|
- identifiers
|
|
|
+
|
|
|
+\series bold
|
|
|
+\begin_inset CommandInset label
|
|
|
+LatexCommand label
|
|
|
+name "fig:RNA-PCA-ComBat-batchsub"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+RNA-seq PCoA plot showing clear batch effect
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1363,35 +1188,38 @@ Comparison of Salmon+Shoal quantification between Ensembl and Entrez gene
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
+\begin_layout Itemize
|
|
|
+RNA-seq batch effect can be partially corrected, but still induces uncorrectable
|
|
|
+ biases in downstream analysis
|
|
|
+\end_layout
|
|
|
+
|
|
|
\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
-wide false
|
|
|
-sideways false
|
|
|
-status collapsed
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
|
|
|
- lyxscale 25
|
|
|
- width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+Figures showing p-value histograms for within-batch and cross-batch contrasts,
|
|
|
+ showing that cross-batch contrasts have attenuated signal, as do comparisons
|
|
|
+ within the bad batch
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-\begin_inset Caption Standard
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-Comparison of quantification between STAR and HISAT2 for identical annotation
|
|
|
+\begin_layout Subsection
|
|
|
+H3K4 and H3K27 methylation occur in broad regions and are enriched near
|
|
|
+ promoters
|
|
|
\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
-
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+Replace these figures with a single table of # of peaks called at chosen
|
|
|
+ IDR threshold, showing that SICER has more
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1403,15 +1231,15 @@ Comparison of quantification between STAR and HISAT2 for identical annotation
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
|
|
|
- lyxscale 25
|
|
|
- width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1422,7 +1250,15 @@ status collapsed
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Comparison of quantification between STAR and Salmon for identical annotation
|
|
|
+
|
|
|
+\series bold
|
|
|
+\begin_inset CommandInset label
|
|
|
+LatexCommand label
|
|
|
+name "fig:IDR-RC-H3K4me2"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+Irreproducible Discovery Rate consistency plots for H3K4me2
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1439,15 +1275,15 @@ Comparison of quantification between STAR and Salmon for identical annotation
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
|
|
|
- lyxscale 25
|
|
|
- width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1458,8 +1294,15 @@ status collapsed
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Comparison of quantification between Salmon and Kallisto for identical annotatio
|
|
|
-n
|
|
|
+
|
|
|
+\series bold
|
|
|
+\begin_inset CommandInset label
|
|
|
+LatexCommand label
|
|
|
+name "fig:IDR-RC-H3K4me3"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+Irreproducible Discovery Rate consistency plots for H3K4me3
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1476,15 +1319,15 @@ n
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
|
|
|
- lyxscale 25
|
|
|
- width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1495,13 +1338,15 @@ status collapsed
|
|
|
\begin_inset Caption Standard
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Comparison of quantification between Salmon with and without Shoal for identical
|
|
|
- annotation
|
|
|
-\end_layout
|
|
|
|
|
|
-\end_inset
|
|
|
+\series bold
|
|
|
+\begin_inset CommandInset label
|
|
|
+LatexCommand label
|
|
|
+name "fig:IDR-RC-H3K27me3"
|
|
|
|
|
|
+\end_inset
|
|
|
|
|
|
+Irreproducible Discovery Rate consistency plots for H3K27me3
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1509,32 +1354,29 @@ Comparison of quantification between Salmon with and without Shoal for identical
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Itemize
|
|
|
-Ultimately selected shoal as quantification, Ensembl as annotation.
|
|
|
- Why? Running downstream analyses with all quant methods and both annotations
|
|
|
- showed very little practical difference, so choice was not terribly important.
|
|
|
- Prefer shoal due to theoretical advantages.
|
|
|
- To note in discussion: reproducible workflow made it easy to do this, enabling
|
|
|
- an informed decision.
|
|
|
-\end_layout
|
|
|
+\end_inset
|
|
|
+
|
|
|
|
|
|
-\begin_layout Subsection
|
|
|
-RNA-seq has a large confounding batch effect
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
+\begin_inset Float table
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
|
|
|
- lyxscale 25
|
|
|
- width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Need
|
|
|
+\emph on
|
|
|
+median
|
|
|
+\emph default
|
|
|
+ peak width, not mean
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1542,91 +1384,204 @@ status collapsed
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\begin_inset Caption Standard
|
|
|
+\align center
|
|
|
+\begin_inset Tabular
|
|
|
+<lyxtabular version="3" rows="4" columns="5">
|
|
|
+<features tabularvalignment="middle">
|
|
|
+<column alignment="center" valignment="top">
|
|
|
+<column alignment="center" valignment="top">
|
|
|
+<column alignment="center" valignment="top">
|
|
|
+<column alignment="center" valignment="top">
|
|
|
+<column alignment="center" valignment="top">
|
|
|
+<row>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-
|
|
|
-\series bold
|
|
|
-\begin_inset CommandInset label
|
|
|
-LatexCommand label
|
|
|
-name "fig:RNA-seq-weights-vs-covars"
|
|
|
+Histone Mark
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
-RNA-seq sample weights, grouped by experimental and technical covariates
|
|
|
+\begin_layout Plain Layout
|
|
|
+# Peaks
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+Mean peak width
|
|
|
+\end_layout
|
|
|
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+genome coverage
|
|
|
\end_layout
|
|
|
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
+
|
|
|
\begin_layout Plain Layout
|
|
|
+read coverage
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+</row>
|
|
|
+<row>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+H3K4me2
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+14965
|
|
|
+\end_layout
|
|
|
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+3970
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
-wide false
|
|
|
-sideways false
|
|
|
-status open
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
|
|
|
- lyxscale 25
|
|
|
- width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+1.92%
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+14.2%
|
|
|
+\end_layout
|
|
|
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+</row>
|
|
|
+<row>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+H3K4me3
|
|
|
\end_layout
|
|
|
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
+
|
|
|
\begin_layout Plain Layout
|
|
|
-\begin_inset Caption Standard
|
|
|
+6163
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
+2946
|
|
|
+\end_layout
|
|
|
|
|
|
-\series bold
|
|
|
-\begin_inset CommandInset label
|
|
|
-LatexCommand label
|
|
|
-name "fig:RNA-PCA-no-batchsub"
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+0.588%
|
|
|
+\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
-RNA-seq PCoA plot showing clear batch effect
|
|
|
+\begin_layout Plain Layout
|
|
|
+6.57%
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
+</cell>
|
|
|
+</row>
|
|
|
+<row>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+H3K27me3
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+18139
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
+\begin_layout Plain Layout
|
|
|
+18967
|
|
|
+\end_layout
|
|
|
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+11.1%
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
-wide false
|
|
|
-sideways false
|
|
|
-status open
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
|
+\begin_inset Text
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/RNA-seq/PCA-naive-batchsub-CROP.png
|
|
|
- lyxscale 25
|
|
|
- width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+22.5%
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+</cell>
|
|
|
+</row>
|
|
|
+</lyxtabular>
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1641,11 +1596,11 @@ status open
|
|
|
\series bold
|
|
|
\begin_inset CommandInset label
|
|
|
LatexCommand label
|
|
|
-name "fig:RNA-PCA-limma-batchsub"
|
|
|
+name "tab:peak-calling-summary"
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
-RNA-seq PCoA plot showing clear batch effect
|
|
|
+SICER+IDR peak-calling summary
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1655,7 +1610,60 @@ RNA-seq PCoA plot showing clear batch effect
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
-
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_layout Standard
|
|
|
+Figures
|
|
|
+\begin_inset CommandInset ref
|
|
|
+LatexCommand ref
|
|
|
+reference "fig:IDR-RC-H3K4me2"
|
|
|
+plural "false"
|
|
|
+caps "false"
|
|
|
+noprefix "false"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+,
|
|
|
+\begin_inset CommandInset ref
|
|
|
+LatexCommand ref
|
|
|
+reference "fig:IDR-RC-H3K4me3"
|
|
|
+plural "false"
|
|
|
+caps "false"
|
|
|
+noprefix "false"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+, and
|
|
|
+\begin_inset CommandInset ref
|
|
|
+LatexCommand ref
|
|
|
+reference "fig:IDR-RC-H3K27me3"
|
|
|
+plural "false"
|
|
|
+caps "false"
|
|
|
+noprefix "false"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+ show the IDR rank-consistency plots for peaks called in an arbitrarily-chosen
|
|
|
+ pair of donors.
|
|
|
+ For all 3 histone marks, when the peaks for each donor are ranked according
|
|
|
+ to their scores, SICER produces much more reproducible results between
|
|
|
+ donors.
|
|
|
+ This is consistent with SICER's stated goal of identifying broad peaks,
|
|
|
+ in contrast to MACS, which is designed for identifying sharp peaks.
|
|
|
+ Based on this observation, the SICER peak calls were used for all downstream
|
|
|
+ analyses that involved ChIP-seq peaks.
|
|
|
+ Table
|
|
|
+\begin_inset CommandInset ref
|
|
|
+LatexCommand ref
|
|
|
+reference "tab:peak-calling-summary"
|
|
|
+plural "false"
|
|
|
+caps "false"
|
|
|
+noprefix "false"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+ gives a summary of the peak calling statistics for each histone mark.
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Standard
|
|
@@ -1667,10 +1675,10 @@ status open
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
|
\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
|
|
|
- lyxscale 25
|
|
|
+ filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
|
|
|
+ lyxscale 50
|
|
|
width 100col%
|
|
|
- groupId colwidth-raster
|
|
|
+ groupId colwidth
|
|
|
|
|
|
\end_inset
|
|
|
|
|
@@ -1685,16 +1693,20 @@ status open
|
|
|
\series bold
|
|
|
\begin_inset CommandInset label
|
|
|
LatexCommand label
|
|
|
-name "fig:RNA-PCA-ComBat-batchsub"
|
|
|
+name "fig:effective-promoter-radius"
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
-RNA-seq PCoA plot showing clear batch effect
|
|
|
+Enrichment of peaks in promoter neighborhoods.
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1703,23 +1715,27 @@ RNA-seq PCoA plot showing clear batch effect
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Itemize
|
|
|
-RNA-seq batch effect can be partially corrected, but still induces uncorrectable
|
|
|
- biases in downstream analysis
|
|
|
-\end_layout
|
|
|
+Each histone mark is enriched within a certain radius of gene TSS positions,
|
|
|
+ but that radius is different for each mark (figure
|
|
|
+\begin_inset CommandInset ref
|
|
|
+LatexCommand ref
|
|
|
+reference "fig:effective-promoter-radius"
|
|
|
+plural "false"
|
|
|
+caps "false"
|
|
|
+noprefix "false"
|
|
|
|
|
|
-\begin_layout Standard
|
|
|
-\begin_inset Flex TODO Note (inline)
|
|
|
-status open
|
|
|
+\end_inset
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-Figures showing p-value histograms for within-batch and cross-batch contrasts,
|
|
|
- showing that cross-batch contrasts have attenuated signal, as do comparisons
|
|
|
- within the bad batch
|
|
|
-\end_layout
|
|
|
+, previously in
|
|
|
+\begin_inset CommandInset citation
|
|
|
+LatexCommand cite
|
|
|
+key "LaMere2016"
|
|
|
+literal "false"
|
|
|
|
|
|
\end_inset
|
|
|
|
|
|
-
|
|
|
+ Fig.
|
|
|
+ S2)
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Subsection
|
|
@@ -1730,7 +1746,7 @@ ChIP-seq blacklisting is important
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -1766,7 +1782,7 @@ Cross-correlation plots with blacklisted reads removed
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -1798,49 +1814,16 @@ Cross-correlation plots without removing blacklisted reads
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Standard
|
|
|
-\begin_inset Float figure
|
|
|
-wide false
|
|
|
-sideways false
|
|
|
-status collapsed
|
|
|
+\begin_layout Subsection
|
|
|
+ChIP-seq normalization
|
|
|
+\end_layout
|
|
|
|
|
|
-\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
+\begin_layout Standard
|
|
|
\begin_inset Flex TODO Note (inline)
|
|
|
status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
-Un-break the figure legend
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-\align center
|
|
|
-\begin_inset Graphics
|
|
|
- filename graphics/CD4-csaw/csaw/CCF-max-plot-CROP.pdf
|
|
|
- lyxscale 50
|
|
|
- width 100col%
|
|
|
- groupId colwidth
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-\begin_inset Caption Standard
|
|
|
-
|
|
|
-\begin_layout Plain Layout
|
|
|
-Estimated fragment size in samples before and after blacklisting
|
|
|
-\end_layout
|
|
|
-
|
|
|
-\end_inset
|
|
|
-
|
|
|
-
|
|
|
+Maybe just one of these figures and then say the other 2 were similar
|
|
|
\end_layout
|
|
|
|
|
|
\end_inset
|
|
@@ -1848,15 +1831,11 @@ Estimated fragment size in samples before and after blacklisting
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
-\begin_layout Subsection
|
|
|
-ChIP-seq normalization
|
|
|
-\end_layout
|
|
|
-
|
|
|
\begin_layout Standard
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -1894,7 +1873,7 @@ MA plot of H3K4me2 read counts in 10kb bins for two arbitrary samples
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -1932,7 +1911,7 @@ MA plot of H3K4me3 read counts in 10kb bins for two arbitrary samples
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -1970,6 +1949,19 @@ MA plot of H3K27me3 read counts in 10kb bins for two arbitrary samples
|
|
|
ChIP-seq must be corrected for hidden confounding factors
|
|
|
\end_layout
|
|
|
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Consolidate these into 1 2x3 grid
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
\begin_layout Standard
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
@@ -2257,6 +2249,19 @@ H3K4 and H3K27 promoter methylation has broadly the expected correlation
|
|
|
with gene expression
|
|
|
\end_layout
|
|
|
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+This section can easily be cut, especially if I can't find those plots.
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
\begin_layout Itemize
|
|
|
H3K4 is correlated with higher expression, and H3K27 is correlated with
|
|
|
lower expression genome-wide
|
|
@@ -2295,7 +2300,7 @@ MOFA recovers biologically relevant variation from blind analysis by correlating
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -2353,7 +2358,20 @@ noprefix "false"
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Maybe drop this one
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -2397,7 +2415,20 @@ Sample distribution for each latent factor estimated by MOFA.
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Talk about how this supports the convergence hypothesis
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -2470,7 +2501,7 @@ LF2 is clearly the RNA-seq batch effect
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status collapsed
|
|
|
+status open
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -2931,15 +2962,55 @@ literal "false"
|
|
|
\end_inset
|
|
|
|
|
|
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Show the figures where the negative result ended this line of inquiry
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Section
|
|
|
Discussion
|
|
|
\end_layout
|
|
|
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+Try to boil it down to 3 main messages to get across
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
\begin_layout Itemize
|
|
|
"Promoter radius" is not constant and must be defined empirically for a
|
|
|
- given data set
|
|
|
+ given data set.
|
|
|
+ Coverage within promoter radius has an expression correlation as well
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Flex TODO Note (inline)
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+MOFA should be a footnote to something else, not its own point
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
\end_layout
|
|
|
|
|
|
\begin_layout Itemize
|
|
@@ -2967,17 +3038,11 @@ MOFA confirmed that the already-implemented batch correction in the RNA-seq
|
|
|
\end_layout
|
|
|
|
|
|
\end_deeper
|
|
|
-\begin_layout Itemize
|
|
|
-Naive-to-memory convergence implies that naive cells are differentiating
|
|
|
- into memory cells, and that gene expression and H3K4 methylation are involved
|
|
|
- in this differentiation while H3K27me3 is less involved
|
|
|
-\end_layout
|
|
|
-
|
|
|
\begin_layout Standard
|
|
|
\begin_inset Float figure
|
|
|
wide false
|
|
|
sideways false
|
|
|
-status open
|
|
|
+status collapsed
|
|
|
|
|
|
\begin_layout Plain Layout
|
|
|
\align center
|
|
@@ -3011,6 +3076,13 @@ LaMere 2016 Figure 8, reproduced with permission.
|
|
|
|
|
|
\end_layout
|
|
|
|
|
|
+\begin_layout Itemize
|
|
|
+Naive-to-memory convergence implies that naive cells are differentiating
|
|
|
+ into memory cells, and that gene expression and H3K4 methylation are involved
|
|
|
+ in this differentiation while H3K27me3 is less involved
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_deeper
|
|
|
\begin_layout Itemize
|
|
|
Convergence is consistent with Lamere2016 fig 8
|
|
|
\begin_inset CommandInset citation
|
|
@@ -3028,10 +3100,65 @@ H3K27me3, canonically regarded as a deactivating mark, seems to have a more
|
|
|
complex effect
|
|
|
\end_layout
|
|
|
|
|
|
+\end_deeper
|
|
|
+\begin_layout Itemize
|
|
|
+TSS positional coverage
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_layout Standard
|
|
|
+\begin_inset Float figure
|
|
|
+wide false
|
|
|
+sideways true
|
|
|
+status open
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+\align center
|
|
|
+\begin_inset Graphics
|
|
|
+ filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
|
|
|
+ lyxscale 50
|
|
|
+ width 100theight%
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+\begin_inset Caption Standard
|
|
|
+
|
|
|
+\begin_layout Plain Layout
|
|
|
+\begin_inset CommandInset label
|
|
|
+LatexCommand label
|
|
|
+name "fig:rulegraph"
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\series bold
|
|
|
+Dependency graph of steps in reproducible workflow
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_inset
|
|
|
+
|
|
|
+
|
|
|
+\end_layout
|
|
|
+
|
|
|
\begin_layout Itemize
|
|
|
Discuss advantages of developing using a reproducible workflow
|
|
|
\end_layout
|
|
|
|
|
|
+\begin_deeper
|
|
|
+\begin_layout Itemize
|
|
|
+Decision-making based on trying every option and running the workflow downstream
|
|
|
+ to see the effects
|
|
|
+\end_layout
|
|
|
+
|
|
|
+\end_deeper
|
|
|
\begin_layout Chapter
|
|
|
Improving array-based analyses of transplant rejection by optimizing data
|
|
|
preprocessing
|