Pārlūkot izejas kodu

Revisions from Friday meeting with Andrew

Ryan C. Thompson 5 gadi atpakaļ
vecāks
revīzija
769f1f4b82
1 mainītis faili ar 630 papildinājumiem un 503 dzēšanām
  1. 630 503
      thesis.lyx

+ 630 - 503
thesis.lyx

@@ -183,7 +183,7 @@ in partial fulfillment of the requirements for the degree of
 \end_layout
 
 \begin_layout Date
-May 2019
+October 2019
 \end_layout
 
 \begin_layout Standard
@@ -549,41 +549,11 @@ Methods
 \end_layout
 
 \begin_layout Standard
-\begin_inset Float figure
-wide false
-sideways true
+\begin_inset Flex TODO Note (inline)
 status open
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
-	lyxscale 50
-	width 100theight%
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Plain Layout
-\begin_inset Caption Standard
-
-\begin_layout Plain Layout
-\begin_inset CommandInset label
-LatexCommand label
-name "fig:rulegraph"
-
-\end_inset
-
-
-\series bold
-Dependency graph of steps in reproducible workflow
-\end_layout
-
-\end_inset
-
-
+Move figures that are only justifying methods into this section
 \end_layout
 
 \end_inset
@@ -734,6 +704,12 @@ Focus on what hypotheses were tested, then select figures that show how
  those hypotheses were tested, even if the result is a negative.
 \end_layout
 
+\begin_layout Plain Layout
+Not every interesting result needs to be in here.
+ Chapter should tell a story.
+ 
+\end_layout
+
 \end_inset
 
 
@@ -754,23 +730,35 @@ Maybe reorder these sections to do RNA-seq, then ChIP-seq, then combined
 \end_layout
 
 \begin_layout Subsection
-H3K4 and H3K27 methylation occur in broad regions and are enriched near
- promoters
+RNA-seq align+quant method comparison
 \end_layout
 
 \begin_layout Standard
-\begin_inset Float figure
-wide false
-sideways false
+\begin_inset Flex TODO Note (inline)
 status open
 
 \begin_layout Plain Layout
-\begin_inset Flex TODO Note (inline)
+Maybe fix up the excessive axis ranges for these plots?
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
 status open
 
 \begin_layout Plain Layout
-Re-generate IDR rank consistency plots for SICER and MACS side-by-side
-\end_layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
 
 \end_inset
 
@@ -781,19 +769,15 @@ Re-generate IDR rank consistency plots for SICER and MACS side-by-side
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-
-\series bold
-\begin_inset CommandInset label
-LatexCommand label
-name "fig:IDR-RC-H3K4me2"
+Comparison of STAR quantification between Ensembl and Entrez gene identifiers
+\end_layout
 
 \end_inset
 
-Irreproducible Discovery Rate consistency plots for H3K4me2
-\end_layout
 
-\end_inset
+\end_layout
 
+\begin_layout Plain Layout
 
 \end_layout
 
@@ -809,12 +793,12 @@ sideways false
 status open
 
 \begin_layout Plain Layout
-\begin_inset Flex TODO Note (inline)
-status open
-
-\begin_layout Plain Layout
-Re-generate IDR rank consistency plots for SICER and MACS side-by-side
-\end_layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
 
 \end_inset
 
@@ -825,15 +809,8 @@ Re-generate IDR rank consistency plots for SICER and MACS side-by-side
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-
-\series bold
-\begin_inset CommandInset label
-LatexCommand label
-name "fig:IDR-RC-H3K4me3"
-
-\end_inset
-
-Irreproducible Discovery Rate consistency plots for H3K4me3
+Comparison of Salmon+Shoal quantification between Ensembl and Entrez gene
+ identifiers
 \end_layout
 
 \end_inset
@@ -853,12 +830,12 @@ sideways false
 status open
 
 \begin_layout Plain Layout
-\begin_inset Flex TODO Note (inline)
-status open
-
-\begin_layout Plain Layout
-Re-generate IDR rank consistency plots for SICER and MACS side-by-side
-\end_layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
 
 \end_inset
 
@@ -869,15 +846,7 @@ Re-generate IDR rank consistency plots for SICER and MACS side-by-side
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-
-\series bold
-\begin_inset CommandInset label
-LatexCommand label
-name "fig:IDR-RC-H3K27me3"
-
-\end_inset
-
-Irreproducible Discovery Rate consistency plots for H3K27me3
+Comparison of quantification between STAR and HISAT2 for identical annotation
 \end_layout
 
 \end_inset
@@ -891,23 +860,18 @@ Irreproducible Discovery Rate consistency plots for H3K27me3
 \end_layout
 
 \begin_layout Standard
-\begin_inset Float table
+\begin_inset Float figure
 wide false
 sideways false
 status open
 
 \begin_layout Plain Layout
 \align center
-\begin_inset Flex TODO Note (inline)
-status open
-
-\begin_layout Plain Layout
-Need 
-\emph on
-median
-\emph default
- peak width, not mean
-\end_layout
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
 
 \end_inset
 
@@ -915,204 +879,135 @@ median
 \end_layout
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Tabular
-<lyxtabular version="3" rows="4" columns="5">
-<features tabularvalignment="middle">
-<column alignment="center" valignment="top">
-<column alignment="center" valignment="top">
-<column alignment="center" valignment="top">
-<column alignment="center" valignment="top">
-<column alignment="center" valignment="top">
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
+\begin_inset Caption Standard
 
 \begin_layout Plain Layout
-Histone Mark
+Comparison of quantification between STAR and Salmon for identical annotation
 \end_layout
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-# Peaks
+
 \end_layout
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-Mean peak width
+
 \end_layout
 
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
 
 \begin_layout Plain Layout
-genome coverage
-\end_layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-read coverage
+
 \end_layout
 
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
+\begin_layout Plain Layout
+\begin_inset Caption Standard
 
 \begin_layout Plain Layout
-H3K4me2
+Comparison of quantification between Salmon and Kallisto for identical annotatio
+n
 \end_layout
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-14965
+
 \end_layout
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-3970
+
 \end_layout
 
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
 
 \begin_layout Plain Layout
-1.92%
-\end_layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-14.2%
+
 \end_layout
 
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
+\begin_layout Plain Layout
+\begin_inset Caption Standard
 
 \begin_layout Plain Layout
-H3K4me3
+Comparison of quantification between Salmon with and without Shoal for identical
+ annotation
 \end_layout
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-6163
+
 \end_layout
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-2946
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-0.588%
 \end_layout
 
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-6.57%
+\begin_layout Itemize
+Ultimately selected shoal as quantification, Ensembl as annotation.
+ Why? Running downstream analyses with all quant methods and both annotations
+ showed very little practical difference, so choice was not terribly important.
+ Prefer shoal due to theoretical advantages.
+ To note in discussion: reproducible workflow made it easy to do this, enabling
+ an informed decision.
 \end_layout
 
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-H3K27me3
+\begin_layout Subsection
+RNA-seq has a large confounding batch effect
 \end_layout
 
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
 
 \begin_layout Plain Layout
-18139
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
+\begin_inset Flex TODO Note (inline)
+status open
 
 \begin_layout Plain Layout
-18967
+Just take the top row
 \end_layout
 
 \end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-11.1%
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
 
-\begin_layout Plain Layout
-22.5%
 \end_layout
 
-\end_inset
-</cell>
-</row>
-</lyxtabular>
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
+	lyxscale 25
+	width 100col%
+	groupId colwidth-raster
 
 \end_inset
 
@@ -1127,11 +1022,11 @@ H3K27me3
 \series bold
 \begin_inset CommandInset label
 LatexCommand label
-name "tab:peak-calling-summary"
+name "fig:RNA-seq-weights-vs-covars"
 
 \end_inset
 
-SICER+IDR peak-calling summary
+RNA-seq sample weights, grouped by experimental and technical covariates
 \end_layout
 
 \end_inset
@@ -1139,62 +1034,13 @@ SICER+IDR peak-calling summary
 
 \end_layout
 
-\end_inset
-
+\begin_layout Plain Layout
 
 \end_layout
 
-\begin_layout Standard
-Figures 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "fig:IDR-RC-H3K4me2"
-plural "false"
-caps "false"
-noprefix "false"
-
-\end_inset
-
-, 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "fig:IDR-RC-H3K4me3"
-plural "false"
-caps "false"
-noprefix "false"
-
-\end_inset
-
-, and 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "fig:IDR-RC-H3K27me3"
-plural "false"
-caps "false"
-noprefix "false"
-
 \end_inset
 
- show the IDR rank-consistency plots for peaks called in an arbitrarily-chosen
- pair of donors.
- For all 3 histone marks, when the peaks for each donor are ranked according
- to their scores, SICER produces much more reproducible results between
- donors.
- This is consistent with SICER's stated goal of identifying broad peaks,
- in contrast to MACS, which is designed for identifying sharp peaks.
- Based on this observation, the SICER peak calls were used for all downstream
- analyses that involved ChIP-seq peaks.
- Table 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "tab:peak-calling-summary"
-plural "false"
-caps "false"
-noprefix "false"
-
-\end_inset
 
- gives a summary of the peak calling statistics for each histone mark.
 \end_layout
 
 \begin_layout Standard
@@ -1206,10 +1052,10 @@ status open
 \begin_layout Plain Layout
 \align center
 \begin_inset Graphics
-	filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
-	lyxscale 50
+	filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
+	lyxscale 25
 	width 100col%
-	groupId colwidth
+	groupId colwidth-raster
 
 \end_inset
 
@@ -1224,20 +1070,11 @@ status open
 \series bold
 \begin_inset CommandInset label
 LatexCommand label
-name "fig:effective-promoter-radius"
-
-\end_inset
-
-Enrichment of peaks in promoter neighborhoods.
-\end_layout
+name "fig:RNA-PCA-no-batchsub"
 
 \end_inset
 
-
-\end_layout
-
-\begin_layout Plain Layout
-
+RNA-seq PCoA plot showing clear batch effect
 \end_layout
 
 \end_inset
@@ -1245,40 +1082,23 @@ Enrichment of peaks in promoter neighborhoods.
 
 \end_layout
 
-\begin_layout Itemize
-Each histone mark is enriched within a certain radius of gene TSS positions,
- but that radius is different for each mark (figure 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "fig:effective-promoter-radius"
-plural "false"
-caps "false"
-noprefix "false"
-
-\end_inset
-
-, previously in 
-\begin_inset CommandInset citation
-LatexCommand cite
-key "LaMere2016"
-literal "false"
-
 \end_inset
 
- Fig.
- S2)
-\end_layout
 
-\begin_layout Subsection
-RNA-seq align+quant method comparison
 \end_layout
 
 \begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
 \begin_inset Flex TODO Note (inline)
 status open
 
 \begin_layout Plain Layout
-Maybe fix up the excessive axis ranges for these plots?
+Probably don't need this
 \end_layout
 
 \end_inset
@@ -1286,16 +1106,10 @@ Maybe fix up the excessive axis ranges for these plots?
 
 \end_layout
 
-\begin_layout Standard
-\begin_inset Float figure
-wide false
-sideways false
-status collapsed
-
 \begin_layout Plain Layout
 \align center
 \begin_inset Graphics
-	filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
+	filename graphics/CD4-csaw/RNA-seq/PCA-naive-batchsub-CROP.png
 	lyxscale 25
 	width 100col%
 	groupId colwidth-raster
@@ -1309,15 +1123,19 @@ status collapsed
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-Comparison of STAR quantification between Ensembl and Entrez gene identifiers
-\end_layout
 
-\end_inset
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:RNA-PCA-limma-batchsub"
 
+\end_inset
 
+RNA-seq PCoA plot showing clear batch effect
 \end_layout
 
-\begin_layout Plain Layout
+\end_inset
+
 
 \end_layout
 
@@ -1330,12 +1148,12 @@ Comparison of STAR quantification between Ensembl and Entrez gene identifiers
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
 \begin_inset Graphics
-	filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
+	filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
 	lyxscale 25
 	width 100col%
 	groupId colwidth-raster
@@ -1349,8 +1167,15 @@ status collapsed
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-Comparison of Salmon+Shoal quantification between Ensembl and Entrez gene
- identifiers
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:RNA-PCA-ComBat-batchsub"
+
+\end_inset
+
+RNA-seq PCoA plot showing clear batch effect
 \end_layout
 
 \end_inset
@@ -1363,35 +1188,38 @@ Comparison of Salmon+Shoal quantification between Ensembl and Entrez gene
 
 \end_layout
 
+\begin_layout Itemize
+RNA-seq batch effect can be partially corrected, but still induces uncorrectable
+ biases in downstream analysis
+\end_layout
+
 \begin_layout Standard
-\begin_inset Float figure
-wide false
-sideways false
-status collapsed
+\begin_inset Flex TODO Note (inline)
+status open
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
-	lyxscale 25
-	width 100col%
-	groupId colwidth-raster
+Figures showing p-value histograms for within-batch and cross-batch contrasts,
+ showing that cross-batch contrasts have attenuated signal, as do comparisons
+ within the bad batch
+\end_layout
 
 \end_inset
 
 
 \end_layout
 
-\begin_layout Plain Layout
-\begin_inset Caption Standard
-
-\begin_layout Plain Layout
-Comparison of quantification between STAR and HISAT2 for identical annotation
+\begin_layout Subsection
+H3K4 and H3K27 methylation occur in broad regions and are enriched near
+ promoters
 \end_layout
 
-\end_inset
-
+\begin_layout Standard
+\begin_inset Flex TODO Note (inline)
+status open
 
+\begin_layout Plain Layout
+Replace these figures with a single table of # of peaks called at chosen
+ IDR threshold, showing that SICER has more
 \end_layout
 
 \end_inset
@@ -1403,15 +1231,15 @@ Comparison of quantification between STAR and HISAT2 for identical annotation
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
-	lyxscale 25
-	width 100col%
-	groupId colwidth-raster
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
+\end_layout
 
 \end_inset
 
@@ -1422,7 +1250,15 @@ status collapsed
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-Comparison of quantification between STAR and Salmon for identical annotation
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:IDR-RC-H3K4me2"
+
+\end_inset
+
+Irreproducible Discovery Rate consistency plots for H3K4me2
 \end_layout
 
 \end_inset
@@ -1439,15 +1275,15 @@ Comparison of quantification between STAR and Salmon for identical annotation
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
-	lyxscale 25
-	width 100col%
-	groupId colwidth-raster
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
+\end_layout
 
 \end_inset
 
@@ -1458,8 +1294,15 @@ status collapsed
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-Comparison of quantification between Salmon and Kallisto for identical annotatio
-n
+
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:IDR-RC-H3K4me3"
+
+\end_inset
+
+Irreproducible Discovery Rate consistency plots for H3K4me3
 \end_layout
 
 \end_inset
@@ -1476,15 +1319,15 @@ n
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
-	lyxscale 25
-	width 100col%
-	groupId colwidth-raster
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Re-generate IDR rank consistency plots for SICER and MACS side-by-side
+\end_layout
 
 \end_inset
 
@@ -1495,13 +1338,15 @@ status collapsed
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
-Comparison of quantification between Salmon with and without Shoal for identical
- annotation
-\end_layout
 
-\end_inset
+\series bold
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:IDR-RC-H3K27me3"
 
+\end_inset
 
+Irreproducible Discovery Rate consistency plots for H3K27me3
 \end_layout
 
 \end_inset
@@ -1509,32 +1354,29 @@ Comparison of quantification between Salmon with and without Shoal for identical
 
 \end_layout
 
-\begin_layout Itemize
-Ultimately selected shoal as quantification, Ensembl as annotation.
- Why? Running downstream analyses with all quant methods and both annotations
- showed very little practical difference, so choice was not terribly important.
- Prefer shoal due to theoretical advantages.
- To note in discussion: reproducible workflow made it easy to do this, enabling
- an informed decision.
-\end_layout
+\end_inset
+
 
-\begin_layout Subsection
-RNA-seq has a large confounding batch effect
 \end_layout
 
 \begin_layout Standard
-\begin_inset Float figure
+\begin_inset Float table
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
-	lyxscale 25
-	width 100col%
-	groupId colwidth-raster
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Need 
+\emph on
+median
+\emph default
+ peak width, not mean
+\end_layout
 
 \end_inset
 
@@ -1542,91 +1384,204 @@ status collapsed
 \end_layout
 
 \begin_layout Plain Layout
-\begin_inset Caption Standard
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="5">
+<features tabularvalignment="middle">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<column alignment="center" valignment="top">
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
 
 \begin_layout Plain Layout
-
-\series bold
-\begin_inset CommandInset label
-LatexCommand label
-name "fig:RNA-seq-weights-vs-covars"
+Histone Mark
+\end_layout
 
 \end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
 
-RNA-seq sample weights, grouped by experimental and technical covariates
+\begin_layout Plain Layout
+# Peaks
 \end_layout
 
 \end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
 
+\begin_layout Plain Layout
+Mean peak width
+\end_layout
 
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+genome coverage
 \end_layout
 
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
 \begin_layout Plain Layout
+read coverage
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
 
+\begin_layout Plain Layout
+H3K4me2
 \end_layout
 
 \end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
 
+\begin_layout Plain Layout
+14965
+\end_layout
 
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+3970
 \end_layout
 
-\begin_layout Standard
-\begin_inset Float figure
-wide false
-sideways false
-status open
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
-	lyxscale 25
-	width 100col%
-	groupId colwidth-raster
+1.92%
+\end_layout
 
 \end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
 
+\begin_layout Plain Layout
+14.2%
+\end_layout
 
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+H3K4me3
 \end_layout
 
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
 \begin_layout Plain Layout
-\begin_inset Caption Standard
+6163
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
 
 \begin_layout Plain Layout
+2946
+\end_layout
 
-\series bold
-\begin_inset CommandInset label
-LatexCommand label
-name "fig:RNA-PCA-no-batchsub"
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+0.588%
+\end_layout
 
 \end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
 
-RNA-seq PCoA plot showing clear batch effect
+\begin_layout Plain Layout
+6.57%
 \end_layout
 
 \end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
 
+\begin_layout Plain Layout
+H3K27me3
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
 
+\begin_layout Plain Layout
+18139
 \end_layout
 
 \end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
 
+\begin_layout Plain Layout
+18967
+\end_layout
 
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+11.1%
 \end_layout
 
-\begin_layout Standard
-\begin_inset Float figure
-wide false
-sideways false
-status open
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
 
 \begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/RNA-seq/PCA-naive-batchsub-CROP.png
-	lyxscale 25
-	width 100col%
-	groupId colwidth-raster
+22.5%
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
 
 \end_inset
 
@@ -1641,11 +1596,11 @@ status open
 \series bold
 \begin_inset CommandInset label
 LatexCommand label
-name "fig:RNA-PCA-limma-batchsub"
+name "tab:peak-calling-summary"
 
 \end_inset
 
-RNA-seq PCoA plot showing clear batch effect
+SICER+IDR peak-calling summary
 \end_layout
 
 \end_inset
@@ -1655,7 +1610,60 @@ RNA-seq PCoA plot showing clear batch effect
 
 \end_inset
 
-
+
+\end_layout
+
+\begin_layout Standard
+Figures 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:IDR-RC-H3K4me2"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+, 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:IDR-RC-H3K4me3"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+, and 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:IDR-RC-H3K27me3"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+ show the IDR rank-consistency plots for peaks called in an arbitrarily-chosen
+ pair of donors.
+ For all 3 histone marks, when the peaks for each donor are ranked according
+ to their scores, SICER produces much more reproducible results between
+ donors.
+ This is consistent with SICER's stated goal of identifying broad peaks,
+ in contrast to MACS, which is designed for identifying sharp peaks.
+ Based on this observation, the SICER peak calls were used for all downstream
+ analyses that involved ChIP-seq peaks.
+ Table 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "tab:peak-calling-summary"
+plural "false"
+caps "false"
+noprefix "false"
+
+\end_inset
+
+ gives a summary of the peak calling statistics for each histone mark.
 \end_layout
 
 \begin_layout Standard
@@ -1667,10 +1675,10 @@ status open
 \begin_layout Plain Layout
 \align center
 \begin_inset Graphics
-	filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
-	lyxscale 25
+	filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
+	lyxscale 50
 	width 100col%
-	groupId colwidth-raster
+	groupId colwidth
 
 \end_inset
 
@@ -1685,16 +1693,20 @@ status open
 \series bold
 \begin_inset CommandInset label
 LatexCommand label
-name "fig:RNA-PCA-ComBat-batchsub"
+name "fig:effective-promoter-radius"
 
 \end_inset
 
-RNA-seq PCoA plot showing clear batch effect
+Enrichment of peaks in promoter neighborhoods.
 \end_layout
 
 \end_inset
 
 
+\end_layout
+
+\begin_layout Plain Layout
+
 \end_layout
 
 \end_inset
@@ -1703,23 +1715,27 @@ RNA-seq PCoA plot showing clear batch effect
 \end_layout
 
 \begin_layout Itemize
-RNA-seq batch effect can be partially corrected, but still induces uncorrectable
- biases in downstream analysis
-\end_layout
+Each histone mark is enriched within a certain radius of gene TSS positions,
+ but that radius is different for each mark (figure 
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:effective-promoter-radius"
+plural "false"
+caps "false"
+noprefix "false"
 
-\begin_layout Standard
-\begin_inset Flex TODO Note (inline)
-status open
+\end_inset
 
-\begin_layout Plain Layout
-Figures showing p-value histograms for within-batch and cross-batch contrasts,
- showing that cross-batch contrasts have attenuated signal, as do comparisons
- within the bad batch
-\end_layout
+, previously in 
+\begin_inset CommandInset citation
+LatexCommand cite
+key "LaMere2016"
+literal "false"
 
 \end_inset
 
-
+ Fig.
+ S2)
 \end_layout
 
 \begin_layout Subsection
@@ -1730,7 +1746,7 @@ ChIP-seq blacklisting is important
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -1766,7 +1782,7 @@ Cross-correlation plots with blacklisted reads removed
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -1798,49 +1814,16 @@ Cross-correlation plots without removing blacklisted reads
 
 \end_layout
 
-\begin_layout Standard
-\begin_inset Float figure
-wide false
-sideways false
-status collapsed
+\begin_layout Subsection
+ChIP-seq normalization
+\end_layout
 
-\begin_layout Plain Layout
-\align center
+\begin_layout Standard
 \begin_inset Flex TODO Note (inline)
 status open
 
 \begin_layout Plain Layout
-Un-break the figure legend
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Plain Layout
-\align center
-\begin_inset Graphics
-	filename graphics/CD4-csaw/csaw/CCF-max-plot-CROP.pdf
-	lyxscale 50
-	width 100col%
-	groupId colwidth
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Plain Layout
-\begin_inset Caption Standard
-
-\begin_layout Plain Layout
-Estimated fragment size in samples before and after blacklisting
-\end_layout
-
-\end_inset
-
-
+Maybe just one of these figures and then say the other 2 were similar
 \end_layout
 
 \end_inset
@@ -1848,15 +1831,11 @@ Estimated fragment size in samples before and after blacklisting
 
 \end_layout
 
-\begin_layout Subsection
-ChIP-seq normalization
-\end_layout
-
 \begin_layout Standard
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -1894,7 +1873,7 @@ MA plot of H3K4me2 read counts in 10kb bins for two arbitrary samples
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -1932,7 +1911,7 @@ MA plot of H3K4me3 read counts in 10kb bins for two arbitrary samples
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -1970,6 +1949,19 @@ MA plot of H3K27me3 read counts in 10kb bins for two arbitrary samples
 ChIP-seq must be corrected for hidden confounding factors
 \end_layout
 
+\begin_layout Standard
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Consolidate these into 1 2x3 grid
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Standard
 \begin_inset Float figure
 wide false
@@ -2257,6 +2249,19 @@ H3K4 and H3K27 promoter methylation has broadly the expected correlation
  with gene expression
 \end_layout
 
+\begin_layout Standard
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+This section can easily be cut, especially if I can't find those plots.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Itemize
 H3K4 is correlated with higher expression, and H3K27 is correlated with
  lower expression genome-wide
@@ -2295,7 +2300,7 @@ MOFA recovers biologically relevant variation from blind analysis by correlating
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -2353,7 +2358,20 @@ noprefix "false"
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
+
+\begin_layout Plain Layout
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Maybe drop this one
+\end_layout
+
+\end_inset
+
+
+\end_layout
 
 \begin_layout Plain Layout
 \align center
@@ -2397,7 +2415,20 @@ Sample distribution for each latent factor estimated by MOFA.
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
+
+\begin_layout Plain Layout
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Talk about how this supports the convergence hypothesis
+\end_layout
+
+\end_inset
+
+
+\end_layout
 
 \begin_layout Plain Layout
 \align center
@@ -2470,7 +2501,7 @@ LF2 is clearly the RNA-seq batch effect
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -2931,15 +2962,55 @@ literal "false"
 \end_inset
 
 
+\end_layout
+
+\begin_layout Standard
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Show the figures where the negative result ended this line of inquiry
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Section
 Discussion
 \end_layout
 
+\begin_layout Standard
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+Try to boil it down to 3 main messages to get across
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Itemize
 "Promoter radius" is not constant and must be defined empirically for a
- given data set
+ given data set.
+ Coverage within promoter radius has an expression correlation as well
+\end_layout
+
+\begin_layout Standard
+\begin_inset Flex TODO Note (inline)
+status open
+
+\begin_layout Plain Layout
+MOFA should be a footnote to something else, not its own point
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Itemize
@@ -2967,17 +3038,11 @@ MOFA confirmed that the already-implemented batch correction in the RNA-seq
 \end_layout
 
 \end_deeper
-\begin_layout Itemize
-Naive-to-memory convergence implies that naive cells are differentiating
- into memory cells, and that gene expression and H3K4 methylation are involved
- in this differentiation while H3K27me3 is less involved
-\end_layout
-
 \begin_layout Standard
 \begin_inset Float figure
 wide false
 sideways false
-status open
+status collapsed
 
 \begin_layout Plain Layout
 \align center
@@ -3011,6 +3076,13 @@ LaMere 2016 Figure 8, reproduced with permission.
 
 \end_layout
 
+\begin_layout Itemize
+Naive-to-memory convergence implies that naive cells are differentiating
+ into memory cells, and that gene expression and H3K4 methylation are involved
+ in this differentiation while H3K27me3 is less involved
+\end_layout
+
+\begin_deeper
 \begin_layout Itemize
 Convergence is consistent with Lamere2016 fig 8 
 \begin_inset CommandInset citation
@@ -3028,10 +3100,65 @@ H3K27me3, canonically regarded as a deactivating mark, seems to have a more
  complex effect
 \end_layout
 
+\end_deeper
+\begin_layout Itemize
+TSS positional coverage 
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways true
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Graphics
+	filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
+	lyxscale 50
+	width 100theight%
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption Standard
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:rulegraph"
+
+\end_inset
+
+
+\series bold
+Dependency graph of steps in reproducible workflow
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Itemize
 Discuss advantages of developing using a reproducible workflow
 \end_layout
 
+\begin_deeper
+\begin_layout Itemize
+Decision-making based on trying every option and running the workflow downstream
+ to see the effects
+\end_layout
+
+\end_deeper
 \begin_layout Chapter
 Improving array-based analyses of transplant rejection by optimizing data
  preprocessing