Просмотр исходного кода

Add a bunch of figures to chapter 2

Ryan C. Thompson 5 лет назад
Родитель
Сommit
52d9524b51

+ 1 - 0
.gitignore

@@ -5,6 +5,7 @@
 *RASTER.png
 *RASTER.png
 *PAGE*.pdf
 *PAGE*.pdf
 *CROP.pdf
 *CROP.pdf
+*CROP.png
 
 
 # Data doesn't go in git
 # Data doesn't go in git
 /data
 /data

BIN
graphics/CD4-csaw/Promoter Peak Distance Profile.pdf


BIN
graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal.png


BIN
graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star.png


BIN
graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto.png


BIN
graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal.png


BIN
graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2.png


BIN
graphics/CD4-csaw/rnaseq-compare/star-vs-salmon.png


+ 743 - 14
thesis.lyx

@@ -735,29 +735,758 @@ H3K4 and H3K27 methylation occur in broad regions and are enriched near
  promoters
  promoters
 \end_layout
 \end_layout
 
 
-\begin_layout Itemize
-Figures comparing MACS (non-broad peak caller) to SICER/epic (broad peak
- caller)
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:IDR-RC-H3K4me2"
+
+\end_inset
+
+Irreproducible Discovery Rate consistency plots for H3K4me2
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:IDR-RC-H3K4me3"
+
+\end_inset
+
+Irreproducible Discovery Rate consistency plots for H3K4me3
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:IDR-RC-H3K27me3"
+
+\end_inset
+
+Irreproducible Discovery Rate consistency plots for H3K27me3
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float table
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Need 
+\emph on
+median
+\emph default
+ peak width, not mean
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="5">
+<features tabularvalignment="middle">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Histone Mark
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+# Peaks
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Mean peak width
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+genome coverage
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+read coverage
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+H3K4me2
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+14965
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+3970
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1.92%
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+14.2%
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+H3K4me3
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+6163
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2946
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+0.588%
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+6.57%
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+H3K27me3
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+18139
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+18967
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+11.1%
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+22.5%
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "tab:peak-calling-summary"
+
+\end_inset
+
+SICER+IDR peak-calling summary
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Figures 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:IDR-RC-H3K4me2"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+, 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:IDR-RC-H3K4me3"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+, and 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:IDR-RC-H3K27me3"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+ show the IDR rank-consistency plots for peaks called in an arbitrarily-chosen
+ pair of donors.
+ For all 3 histone marks, when the peaks for each donor are ranked according
+ to their scores, SICER produces much more reproducible results between
+ donors.
+ This is consistent with SICER's stated goal of identifying broad peaks,
+ in contrast to MACS, which is designed for identifying sharp peaks.
+ Based on this observation, the SICER peak calls were used for all downstream
+ analyses that involved ChIP-seq peaks.
+ Table 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "tab:peak-calling-summary"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+ gives a summary of the peak calling statistics for each histone mark.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
+	width 100col%
+	groupId colwidth
+
+\end_inset
+
+
 \end_layout
 \end_layout
 
 
-\begin_deeper
-\begin_layout Itemize
-Compare peak sizes and number of called peaks
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:effective-promoter-radius"
+
+\end_inset
+
+Enrichment of peaks in promoter neighborhoods.
 \end_layout
 \end_layout
 
 
-\begin_layout Itemize
-Show representative IDR consistency plots for both
+\end_inset
+
+
 \end_layout
 \end_layout
 
 
-\end_deeper
-\begin_layout Itemize
-IDR analysis shows that SICER-called peaks are much more reproducible between
- biological replicates
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
 \end_layout
 \end_layout
 
 
 \begin_layout Itemize
 \begin_layout Itemize
 Each histone mark is enriched within a certain radius of gene TSS positions,
 Each histone mark is enriched within a certain radius of gene TSS positions,
- but that radius is different for each mark (figure)
+ but that radius is different for each mark (figure 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:effective-promoter-radius"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+, previously in 
+\begin_inset CommandInset citation
+LatexCommand cite
+key "LaMere2016"
+literal "false"
+
+\end_inset
+
+ Fig.
+ S2)
+\end_layout
+
+\begin_layout Subsection
+RNA-seq align+quant method selection
+\end_layout
+
+\begin_layout Standard
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Maybe fix up the axis ranges for these plots?
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status collapsed
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+Comparison of STAR quantification between Ensembl and Entrez gene identifiers
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status collapsed
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+Comparison of Salmon+Shoal quantification between Ensembl and Entrez gene
+ identifiers
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status collapsed
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+Comparison of quantification between STAR and HISAT2 for identical annotation
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status collapsed
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+Comparison of quantification between STAR and Salmon for identical annotation
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+Comparison of quantification between Salmon and Kallisto for identical annotatio
+n
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+Comparison of quantification between Salmon with and without Shoal for identical
+ annotation
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+
 \end_layout
 \end_layout
 
 
 \begin_layout Subsection
 \begin_layout Subsection
@@ -996,7 +1725,7 @@ This section could probably use some citations
 \end_layout
 \end_layout
 
 
 \begin_layout Standard
 \begin_layout Standard
-Microarrays, bead ararys, and similar assays produce raw data in the form
+Microarrays, bead arrays, and similar assays produce raw data in the form
  of fluorescence intensity measurements, with the each intensity measurement
  of fluorescence intensity measurements, with the each intensity measurement
  proportional to the abundance of some fluorescently-labelled target DNA
  proportional to the abundance of some fluorescently-labelled target DNA
  or RNA sequence that base pairs to a specific probe sequence.
  or RNA sequence that base pairs to a specific probe sequence.