ソースを参照

Remove nomencl usage, add figure short captions, add TOC entries

Ryan C. Thompson 5 年 前
コミット
80cc1557f1
4 ファイル変更338 行追加475 行削除
  1. 2 6
      Snakefile
  2. 14 10
      abbrevs.tex
  3. BIN
      graphics/CD4-csaw/RNA-seq/weights-vs-covars-nobcv.png
  4. 322 459
      thesis.lyx

+ 2 - 6
Snakefile

@@ -132,12 +132,8 @@ def lyx_input_deps(lyxfile):
     '''Return an iterator over all tex files included by a Lyx file.'''
     with open(lyxfile) as f:
         lyx_text = f.read()
-    tex_names = regex.search('\\\\input{(.*?[.]tex)}', lyx_text).group(1).split(',')
-    # Unfortunately LyX doesn't indicate which bib names refer to
-    # files in the current directory and which don't. Currently that's
-    # not a problem for me since all my refs are in bib files in the
-    # current directory.
-    yield from tex_names
+    for m in regex.finditer('\\\\(?:input|loadglsentries){(.*?[.]tex)}', lyx_text):
+        yield m.group(1)
 
 def lyx_bib_deps(lyxfile):
     '''Return an iterator over all bib files referenced by a Lyx file.

+ 14 - 10
abbrevs.tex

@@ -1,6 +1,11 @@
-%% Methods
+%% Wet-lab methods
 \newabbreviation{RNA-seq}{RNA-seq}{high-throughput RNA sequencing}
 \newabbreviation{ChIP-seq}{ChIP-seq}{chromatin immunoprecipitation followed by high-throughput DNA sequencing}
+\newabbreviation{oligo}{oligo}{oligonucleotide}
+\newabbreviation{GB}{GB}{globin blocking}
+\newabbreviation{PCR}{PCR}{polymerase chain reaction}
+
+%% Computational methods
 \newabbreviation{GLM}{GLM}{generalized linear model}
 \newabbreviation{NB}{NB}{negative binomial}
 \newabbreviation{BCV}{BCV}{biological coefficient of variation}
@@ -10,13 +15,13 @@
 \newabbreviation{SVA}{SVA}{surrogate variable analysis}
 \newabbreviation{PCA}{PCA}{principal component analysis}
 \newabbreviation{PC}{PC}{principal component}
-\newabbreviation{PCoA}{PCoA}{principal coordinate analysis} % AKA MDS?
-\newabbreviation{MOFA}{MOFA}{Multi-Omics Factor Analysis}
 \newabbreviation{LF}{LF}{latent factor}
 %% Note: Can't start with math or else capitalization fails
 \newabbreviation{logCPM}{logCPM}{log$_2$ counts per million}
 \newabbreviation{CPM}{CPM}{counts per million}
 \newabbreviation{logFC}{logFC}{log$_2$ fold change}
+\newabbreviation{FPKM}{FPKM}{fragments per kilobase per million fragments}
+
 \newabbreviation{RMA}{RMA}{Robust Multichip Average}
 \newabbreviation{fRMA}{fRMA}{frozen Robust Multichip Average}
 \newabbreviation{GRSN}{GRSN}{Global Rank-invariant Set Normalization}
@@ -24,15 +29,13 @@
 \newabbreviation{MACS}{MACS}{Model-based Analysis of ChIP-seq}
 \newabbreviation{SICER}{SICER}{Spatial Clustering for Identification of ChIP-Enriched Regions}
 \newabbreviation{TMM}{TMM}{trimmed mean of M-values}
-\newabbreviation{FPKM}{FPKM}{fragments per kilobase per million fragments}
-\newabbreviation{CpGi}{CpGi}{CpG island}
-\newabbreviation{ROC}{ROC}{receiver operating characteristic}
-\newabbreviation{AUC}{AUC}{area under ROC curve}
-\newabbreviation{PCR}{PCR}{polymerase chain reaction}
+\newabbreviation{PCoA}{PCoA}{principal coordinate analysis} % AKA MDS?
+\newabbreviation{MOFA}{MOFA}{Multi-Omics Factor Analysis}
 \newabbreviation{SWAN}{SWAN}{subset-quantile within array normalization}
 \newabbreviation{BH}{BH}{Benjamini-Hochberg}
-\newabbreviation{oligo}{oligo}{oligonucleotide}
-\newabbreviation{GB}{GB}{globin blocking}
+
+\newabbreviation{ROC}{ROC}{receiver operating characteristic}
+\newabbreviation{AUC}{AUC}{area under ROC curve}
 
 %% Data sources
 \newabbreviation{GEO}{GEO}{Gene Expression Omnibus}
@@ -41,6 +44,7 @@
 
 %% Biology
 \newabbreviation{TSS}{TSS}{transcription start site}
+\newabbreviation{CpGi}{CpGi}{CpG island}
 \newabbreviation{TX}{TX}{healthy transplant}
 \newabbreviation{AR}{AR}{acute rejection}
 \newabbreviation{ADNR}{ADNR}{acute dysfunction with no rejection}

BIN
graphics/CD4-csaw/RNA-seq/weights-vs-covars-nobcv.png


+ 322 - 459
thesis.lyx

@@ -9,6 +9,9 @@
 % List all used files in log output
 \listfiles
 
+%% Add TOC, List of Figures, etc. to TOC
+\usepackage{tocbibind}
+
 % Add a DRAFT watermark
 \usepackage{draftwatermark}
 \usepackage{accsupp}
@@ -41,16 +44,9 @@
 % This one breaks subfigs so it's disabled
 % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
 
-% Bold all nomenclature entries
-\renewcommand{\nomlabel}[1]{\textsf{\textbf{#1}}}
-
-% https://tex.stackexchange.com/a/31083/5654
-%\let\nomenclOrig\nomenclature
-%\renewcommand*{\nomenclature}[3][]{#2\nomenclOrig[#1]{#2}{#3}}
-
-\usepackage[nohypertypes={abbreviation}]{glossaries-extra}
+\usepackage[automake,nonumberlist,nohypertypes={abbreviation}]{glossaries-extra}
 \setabbreviationstyle{long-short}
-\input{abbrevs.tex}
+\loadglsentries{abbrevs.tex}
 \makeglossaries
 \end_preamble
 \use_default_options true
@@ -307,20 +303,84 @@ final
 \end_inset
 
 
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+addcontentsline{toc}{chapter}{Copyright notice}
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Standard
 [Copyright notice]
 \end_layout
 
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+addcontentsline{toc}{chapter}{Thesis acceptance form}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Standard
 [Thesis acceptance form]
 \end_layout
 
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+addcontentsline{toc}{chapter}{Dedication}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Standard
 [Dedication]
 \end_layout
 
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+addcontentsline{toc}{chapter}{Acknowledgements}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Standard
 [Acknowledgements]
 \end_layout
@@ -355,7 +415,7 @@ LatexCommand tableofcontents
 status open
 
 \begin_layout Plain Layout
-To create a new nomenclature entry:
+To create a new abbreviation:
 \end_layout
 
 \begin_layout Enumerate
@@ -363,17 +423,9 @@ Add an entry to abbrevs.tex
 \end_layout
 
 \begin_layout Enumerate
-Find the first instance of the term, and wrap it in Insert -> Custom Insets
- -> Glossary Term (use Capital if starting a sentence)
-\end_layout
-
-\begin_layout Enumerate
-Add a nomenclature entry after the first instance
-\end_layout
-
-\begin_layout Enumerate
-Replace every relevant instance throughout the document with the Glossary
- Term wrapped version, using Edit -> Find & Replace (Advanced).
+Wrap every occurrence of the term in Insert -> Custom Insets -> Glossary
+ Term (use appropriate variants for caiptal, plural, etc.), using Edit ->
+ Find & Replace (Advanced).
  Skip section headers and floats.
 \end_layout
 
@@ -386,12 +438,9 @@ literal "false"
 \end_inset
 
 
-\end_layout
-
-\begin_layout Plain Layout
 \begin_inset CommandInset href
 LatexCommand href
-target "https://wiki.lyx.org/Tips/Nomenclature"
+target "https://ctan.org/pkg/glossaries-extra"
 literal "false"
 
 \end_inset
@@ -405,66 +454,32 @@ literal "false"
 \end_layout
 
 \begin_layout Standard
-\begin_inset CommandInset nomencl_print
-LatexCommand printnomenclature
-set_width "auto"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout List of TODOs
-
-\end_layout
-
-\begin_layout Standard
-\begin_inset Flex TODO Note (inline)
+\align center
+\begin_inset ERT
 status open
 
 \begin_layout Plain Layout
-Check all figures to make sure they fit on the page with their legends.
-\end_layout
-
-\end_inset
 
 
+\backslash
+renewcommand*{
+\backslash
+glossaryname}{List of Abbreviations}%
 \end_layout
 
-\begin_layout Standard
-\begin_inset Flex TODO Note (inline)
-status open
-
 \begin_layout Plain Layout
-Make all descriptions consistent in terms of 
-\begin_inset Quotes eld
-\end_inset
 
-we did X
-\begin_inset Quotes erd
-\end_inset
 
- vs 
-\begin_inset Quotes eld
-\end_inset
+\backslash
+printglossaries
+\end_layout
 
-I did X
-\begin_inset Quotes erd
 \end_inset
 
- vs 
-\begin_inset Quotes eld
-\end_inset
 
-X was done
-\begin_inset Quotes erd
-\end_inset
-
-.
 \end_layout
 
-\end_inset
-
+\begin_layout List of TODOs
 
 \end_layout
 
@@ -1016,15 +1031,6 @@ MSC
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "MSC"
-description "mesenchymal stem cell"
-literal "true"
-
-\end_inset
-
 .
 \end_layout
 
@@ -1188,15 +1194,6 @@ status open
 RNA-seq
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "RNA-seq"
-description "High-throughput RNA sequencing"
-literal "false"
-
 \end_inset
 
  experiment, the dependent variables may be the count of 
@@ -1480,15 +1477,6 @@ ChIP-seq
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "ChIP-seq"
-description "Chromatin immunoprecipitation followed by high-throughput DNA sequencing"
-literal "false"
-
-\end_inset
-
 , which tend to be much smaller and therefore violate the assumption of
  a normal distribution more severely.
  For all count-based data, the 
@@ -1519,15 +1507,6 @@ status open
 GLM
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "GLM"
-description "generalized linear model"
-literal "false"
-
 \end_inset
 
  instead of a linear model.
@@ -1571,15 +1550,6 @@ status open
 NB
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "NB"
-description "negative binomial"
-literal "false"
-
 \end_inset
 
  distribution rather than modeling the normalized log counts using a normal
@@ -1795,15 +1765,6 @@ status open
 MACS
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "MACS"
-description "Model-based Analysis of ChIP-seq"
-literal "false"
-
 \end_inset
 
  exploit this pattern to identify specific loci at which such 
@@ -1868,15 +1829,6 @@ status open
 SICER
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "SICER"
-description "Spatial Clustering for Identification of ChIP-Enriched Regions"
-literal "false"
-
 \end_inset
 
  assume that peaks are represented in the 
@@ -1924,15 +1876,6 @@ status open
 ENCODE
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "ENCODE"
-description "Encyclopedia Of DNA Elements"
-literal "false"
-
 \end_inset
 
  project has developed a method called 
@@ -1943,15 +1886,6 @@ status open
 IDR
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "IDR"
-description "irreproducible discovery rate"
-literal "false"
-
 \end_inset
 
  for this purpose 
@@ -2100,15 +2034,6 @@ RMA
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "RMA"
-description "robust multichip average"
-literal "false"
-
-\end_inset
-
  
 \begin_inset CommandInset citation
 LatexCommand cite
@@ -2195,15 +2120,6 @@ GRSN
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "GRSN"
-description "global rank-invariant set normalization"
-literal "false"
-
-\end_inset
-
 , and 
 \begin_inset Flex Glossary Term
 status open
@@ -2214,15 +2130,6 @@ SCAN
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "SCAN"
-description "Single-Channel Array Normalization"
-literal "false"
-
-\end_inset
-
  
 \begin_inset CommandInset citation
 LatexCommand cite
@@ -2283,15 +2190,6 @@ CPM
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "CPM"
-description "counts per million"
-literal "false"
-
-\end_inset
-
 .
  Furthermore, if the abundance of a single gene increases, then in order
  for its fraction of the total reads to increase, all other genes' fractions
@@ -2406,15 +2304,6 @@ status open
 logFC
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "logFC"
-description "$\\log_2$ fold change"
-literal "true"
-
 \end_inset
 
  is zero across all abundance levels.
@@ -2496,15 +2385,6 @@ status open
 SVD
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "SVD"
-description "singular value decomposition"
-literal "false"
-
 \end_inset
 
  on the matrix of linear model residuals (which contain all the un-modeled
@@ -2520,15 +2400,6 @@ status open
 SVA
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "SVA"
-description "surrogate variable analysis"
-literal "false"
-
 \end_inset
 
  starts with this approach, but takes some additional steps to identify
@@ -2819,15 +2690,6 @@ status open
 TSS
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "TSS"
-description "transcription start site"
-literal "false"
-
 \end_inset
 
  is an important factor, as opposed to simple proximity.
@@ -3193,15 +3055,6 @@ SRA
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "SRA"
-description "Sequence Read Archive"
-literal "false"
-
-\end_inset
-
  
 \begin_inset CommandInset citation
 LatexCommand cite
@@ -3350,6 +3203,16 @@ After batch correction with ComBat
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status open
+
+\begin_layout Plain Layout
+PCoA plots of RNA-seq data showing effect of batch correction.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:RNA-PCA"
@@ -3421,23 +3284,10 @@ wide false
 sideways false
 status collapsed
 
-\begin_layout Plain Layout
-\begin_inset Flex TODO Note (inline)
-status open
-
-\begin_layout Plain Layout
-Just take the top row
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
 \begin_layout Plain Layout
 \align center
 \begin_inset Graphics
-	filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
+	filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-nobcv-CROP.png
 	lyxscale 25
 	width 100col%
 	groupId colwidth-raster
@@ -3453,6 +3303,16 @@ Just take the top row
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+RNA-seq sample weights, grouped by experimental and technical covariates.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:RNA-seq-weights-vs-covars"
@@ -3523,15 +3383,6 @@ TMM
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "TMM"
-description "trimmed mean of M-values"
-literal "false"
-
-\end_inset
-
  
 \begin_inset CommandInset citation
 LatexCommand cite
@@ -3548,15 +3399,6 @@ status open
 logCPM
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "logCPM"
-description "$\\log_2$ counts per million"
-literal "true"
-
 \end_inset
 
  with quality weights using 
@@ -3620,15 +3462,6 @@ status open
 BH
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "BH"
-description "Benjamini-Hochberg"
-literal "false"
-
 \end_inset
 
  procedure for 
@@ -3784,6 +3617,16 @@ bp.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Strand cross-correlation plots for ChIP-seq data, before and after blacklisting.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:CCF-master"
@@ -4435,6 +4278,17 @@ H3K27me3, SVs subtracted
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+PCoA plots of ChIP-seq sliding window data, before and after subtracting
+ surrogate variables (SVs).
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:PCoA-ChIP"
@@ -4768,13 +4622,23 @@ Scatter plots of specific pairs of MOFA latent factors.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status open
+
+\begin_layout Plain Layout
+MOFA latent factors identify shared patterns of variation.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:MOFA-master"
 
 \end_inset
 
-MOFA latent factors separate technical confounders from 
+MOFA latent factors identify shared patterns of variation.
 \end_layout
 
 \end_inset
@@ -4816,15 +4680,6 @@ status open
 MOFA
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "MOFA"
-description "Multi-Omics Factor Analysis"
-literal "false"
-
 \end_inset
 
  was run on all the 
@@ -4877,15 +4732,6 @@ status open
 LF
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "LF"
-description "latent factor"
-literal "false"
-
 \end_inset
 
  1, 4, and 5 were determined to explain the most variation consistently
@@ -5583,6 +5429,16 @@ status collapsed
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+PCoA plot of RNA-seq samples after ComBat batch correction.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:rna-pca-final"
@@ -6112,9 +5968,19 @@ literal "false"
 \begin_layout Plain Layout
 \begin_inset Caption Standard
 
-\begin_layout Plain Layout
+\begin_layout Plain Layout
+
+\series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Enrichment of peaks in promoter neighborhoods.
+\end_layout
+
+\end_inset
+
 
-\series bold
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:near-promoter-peak-enrich"
@@ -6429,6 +6295,16 @@ This figure is generated from the old analysis.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Expression distributions of genes with and without promoter peaks.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:fpkm-by-peak"
@@ -6506,15 +6382,6 @@ status open
 FPKM
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "FPKM"
-description "fragments per kilobase per million fragments"
-literal "false"
-
 \end_inset
 
  values when a peak overlaps the promoter is about 
@@ -7061,15 +6928,6 @@ PCoA
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "PCoA"
-description "principal coordinate analysis"
-literal "false"
-
-\end_inset
-
 .
  All 3 marks show a noticeable convergence between the naïve and memory
  samples at day 14, visible as an overlapping of the day 14 groups on each
@@ -7354,6 +7212,16 @@ RNA-seq PCoA showing principal coordinates 2 and 3.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+PCoA plots for promoter ChIP-seq and expression RNA-seq data
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:PCoA-promoters"
@@ -7592,6 +7460,17 @@ Gene expression grouped by promoter coverage clusters.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+K-means clustering of promoter H3K4me2 relative coverage depth in naïve
+ day 0 samples.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:H3K4me2-neighborhood"
@@ -7798,15 +7677,6 @@ status open
 PCA
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "PCA"
-description "principal component analysis"
-literal "false"
-
 \end_inset
 
  plot based on the same relative bin abundance data, and colored based on
@@ -8203,6 +8073,17 @@ Gene expression grouped by promoter coverage clusters.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+K-means clustering of promoter H3K4me3 relative coverage depth in naïve
+ day 0 samples.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:H3K4me3-neighborhood"
@@ -8213,7 +8094,7 @@ K-means clustering of promoter H3K4me3 relative coverage depth in naïve
  day 0 samples.
  
 \series default
-H3K4me2 ChIP-seq reads were binned into 500-bp windows tiled across each
+H3K4me3 ChIP-seq reads were binned into 500-bp windows tiled across each
  promoter from 5
 \begin_inset space ~
 \end_inset
@@ -8507,6 +8388,17 @@ Repeated figure legends are kind of an issue here.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+K-means clustering of promoter H3K27me3 relative coverage depth in naïve
+ day 0 samples.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:H3K27me3-neighborhood"
@@ -9148,7 +9040,7 @@ LF
 \begin_inset Float figure
 wide false
 sideways false
-status collapsed
+status open
 
 \begin_layout Plain Layout
 \align center
@@ -9169,6 +9061,21 @@ status collapsed
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Lamere 2016 Figure 8 “Model for the role of H3K4 methylation during CD4
+ T-cell activation.
+\begin_inset Quotes erd
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:Lamere2016-Fig8"
@@ -9862,15 +9769,6 @@ status open
 CpGi
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "CpGi"
-description "CpG island"
-literal "false"
-
 \end_inset
 
  in the promoter was correlated with increases or decreases in gene expression
@@ -10465,15 +10363,6 @@ status open
 glsdisp*{TX}{healthy transplants (TX)}
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "TX"
-description "healthy transplant"
-literal "false"
-
 \end_inset
 
  from transplants undergoing 
@@ -10484,15 +10373,6 @@ status open
 AR
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "AR"
-description "acute rejection"
-literal "false"
-
 \end_inset
 
  or 
@@ -10505,15 +10385,6 @@ ADNR
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "ADNR"
-description "acute dysfunction with no rejection"
-literal "false"
-
-\end_inset
-
 .
  However, the the standard normalization algorithm used for microarray data,
  
@@ -10631,15 +10502,6 @@ glsdisp*{GEO}{the Gene Expression Omnibus (GEO)}
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "GEO"
-description "Gene Expression Omnibus"
-literal "false"
-
-\end_inset
-
 .
  Each array's probe intensity distribution is normalized against these pre-gener
 ated quantiles.
@@ -11007,15 +10869,6 @@ status open
 ROC
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "ROC"
-description "receiver operating characteristic"
-literal "false"
-
 \end_inset
 
  curves and 
@@ -11026,15 +10879,6 @@ status open
 AUC
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "AUC"
-description "area under ROC curve"
-literal "false"
-
 \end_inset
 
  values were generated 
@@ -11553,15 +11397,6 @@ CAN
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "CAN"
-description "chronic allograft nephropathy"
-literal "false"
-
-\end_inset
-
 .
  The data consisted of 33 TX, 9 AR, 8 ADNR, and 28 CAN samples.
  The uneven group sizes are a result of taking the biopsy samples before
@@ -11576,15 +11411,6 @@ status open
 T1D
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "T1D"
-description "Type 1 diabetes"
-literal "false"
-
 \end_inset
 
  or 
@@ -11597,15 +11423,6 @@ T2D
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "T2D"
-description "Type 2 diabetes"
-literal "false"
-
-\end_inset
-
 ).
  
 \end_layout
@@ -11621,15 +11438,6 @@ SWAN
 
 \end_inset
 
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "SWAN"
-description "subset-quantile within array normalization"
-literal "false"
-
-\end_inset
-
  
 \begin_inset CommandInset citation
 LatexCommand cite
@@ -12172,6 +11980,18 @@ status open
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Classifier probabilities on validation samples when normalized with RMA
+ together vs.
+ separately.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:Classifier-probabilities-RMA"
@@ -12377,6 +12197,16 @@ ROC curves for PAM on external validation data
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+ROC curves for PAM using different normalization strategies.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:ROC-PAM-main"
@@ -13443,6 +13273,17 @@ Number of samples usable in fRMA probe weight learning as a function of
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Effect of batch size selection on number of batches and number of samples
+ included in fRMA probe weight learning.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:frmatools-batch-size"
@@ -13645,6 +13486,16 @@ Violin plot of inter-normalization log ratios for blood samples.
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Violin plot of log ratios between normalizations for 20 biopsy samples.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:frma-violin"
@@ -13950,6 +13801,16 @@ fRMA vs fRMA for blood samples.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Representative MA plots comparing RMA and custom fRMA normalizations.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:Representative-MA-plots"
@@ -14314,7 +14175,16 @@ Mean-variance trend after voom modeling in analysis C.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
 Mean-variance trend modeling in methylation array data.
+\end_layout
+
+\end_inset
+
+ Mean-variance trend modeling in methylation array data.
  
 \series default
 The estimated 
@@ -14737,6 +14607,16 @@ Redo the sample weight boxplot with notches, and remove fill colors
 \begin_inset Caption Standard
 
 \begin_layout Plain Layout
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Box-and-whiskers plot of sample quality weights grouped by diabetes diagnosis.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:diabetes-sample-weights"
@@ -15760,6 +15640,16 @@ CAN vs.
 \begin_layout Plain Layout
 
 \series bold
+\begin_inset Argument 1
+status collapsed
+
+\begin_layout Plain Layout
+Probe p-value histograms for each contrast in each analysis.
+\end_layout
+
+\end_inset
+
+
 \begin_inset CommandInset label
 LatexCommand label
 name "fig:meth-p-value-histograms"
@@ -16872,15 +16762,6 @@ status open
 GB
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "GB"
-description "globin blocking"
-literal "false"
-
 \end_inset
 
  protocol approximately doubles the yield of informative (non-globin) reads
@@ -17026,15 +16907,6 @@ status open
 mRNA
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "mRNA"
-description "messenger RNA"
-literal "false"
-
 \end_inset
 
  are naturally present in mammalian peripheral blood samples (up to 70%
@@ -17372,15 +17244,6 @@ status open
 PCR
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "PCR"
-description "polymerase chain reaction"
-literal "false"
-
 \end_inset
 
  tube.
@@ -17516,15 +17379,6 @@ status open
 ncRNA
 \end_layout
 
-\end_inset
-
-
-\begin_inset CommandInset nomenclature
-LatexCommand nomenclature
-symbol "ncRNA"
-description "non-coding RNA"
-literal "false"
-
 \end_inset
 
  gene, resulting in significant undercounting of globin reads.
@@ -20634,11 +20488,15 @@ GB
  method in place, the way is now clear for this experiment to proceed.
 \end_layout
 
-\begin_layout Chapter
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Chapter*
 Future Directions
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 \begin_inset Flex TODO Note (inline)
 status open
 
@@ -20650,6 +20508,11 @@ If there are any chapter-independent future directions, put them here.
 \end_inset
 
 
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Chapter