Browse Source

More progress and lots of citations in Ch 2 Methods

Ryan C. Thompson 6 years ago
parent
commit
9256979ff8
4 changed files with 173 additions and 5 deletions
  1. 62 0
      code-refs.bib
  2. BIN
      graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
  3. 31 0
      refs.bib
  4. 80 5
      thesis.lyx

+ 62 - 0
code-refs.bib

@@ -0,0 +1,62 @@
+%% This BibTeX bibliography file was created using BibDesk.
+%% http://bibdesk.sourceforge.net/
+
+%% Created for Ryan C. Thompson at 2019-08-01 02:17:26 -0700 
+
+
+%% Saved with string encoding Unicode (UTF-8) 
+
+
+
+@misc{gh-cd4-csaw,
+	Abstract = {epic is a software package for finding medium to diffusely enriched domains in chip-seq data. It is a fast, parallel and memory-efficient implementation of the incredibly popular SICER algorithm. By running epic on a set of data ("ChIP") files and control ("Input") files, epic is able to quickly differentially enriched regions.
+
+epic is an improvement over the original SICER by being faster, more memory efficient, multicore, and significantly much easier to install and use.},
+	Author = {Ryan C. Thompson},
+	Date-Added = {2019-08-01 02:15:39 -0700},
+	Date-Modified = {2019-08-01 02:15:39 -0700},
+	Howpublished = {\url{https://github.com/DarwinAwardWinner/CD4-csaw}},
+	Keywords = {chipseq, rnaseq},
+	Month = {nov},
+	Publisher = {GitHub, Inc.},
+	Title = {Reproducible reanalysis of a combined ChIP-Seq \& RNA-Seq data set},
+	Year = {2018},
+	Bdsk-Url-1 = {https://doi.org/10.5281/zenodo.806811}}
+
+@manual{greylistchip,
+	Author = {Gord Brown},
+	Date-Added = {2019-08-01 02:00:09 -0700},
+	Date-Modified = {2019-08-01 02:03:29 -0700},
+	Edition = {R package version 1.16.0.},
+	Organization = {Bioconductor},
+	Title = {GreyListChIP: Grey Lists -- Mask Artefact Regions Based on ChIP Inputs},
+	Year = {2019}}
+
+@misc{gh-epic,
+	Abstract = {epic is a software package for finding medium to diffusely enriched domains in chip-seq data. It is a fast, parallel and memory-efficient implementation of the incredibly popular SICER algorithm. By running epic on a set of data ("ChIP") files and control ("Input") files, epic is able to quickly differentially enriched regions.
+
+epic is an improvement over the original SICER by being faster, more memory efficient, multicore, and significantly much easier to install and use.},
+	Author = {Endre Bakken Stovner},
+	Date-Added = {2019-08-01 01:47:19 -0700},
+	Date-Modified = {2019-08-01 01:47:19 -0700},
+	Howpublished = {\url{https://github.com/biocore-ntnu/epic}},
+	Keywords = {chipseq},
+	Month = {nov},
+	Publisher = {GitHub, Inc.},
+	Title = {epic: diffuse domain ChIP-Seq caller based on SICER},
+	Year = {2018},
+	Bdsk-Url-1 = {https://doi.org/10.5281/zenodo.806811}}
+
+@misc{gh-hg38-ref,
+	Abstract = {epic is a software package for finding medium to diffusely enriched domains in chip-seq data. It is a fast, parallel and memory-efficient implementation of the incredibly popular SICER algorithm. By running epic on a set of data ("ChIP") files and control ("Input") files, epic is able to quickly differentially enriched regions.
+
+epic is an improvement over the original SICER by being faster, more memory efficient, multicore, and significantly much easier to install and use.},
+	Author = {Ryan C. Thompson},
+	Date-Added = {2019-08-01 01:44:09 -0700},
+	Date-Modified = {2019-08-01 02:17:22 -0700},
+	Howpublished = {\url{https://github.com/DarwinAwardWinner/hg38-ref}},
+	Month = {dec},
+	Publisher = {GitHub, Inc.},
+	Title = {Workflow to download/generate various mapping indices for the human hg38 genome},
+	Year = {2016},
+	Bdsk-Url-1 = {https://doi.org/10.5281/zenodo.806811}}

BIN
graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf


File diff suppressed because it is too large
+ 31 - 0
refs.bib


+ 80 - 5
thesis.lyx

@@ -588,8 +588,16 @@ Dependency graph of steps in reproducible workflow
 \end_layout
 
 \begin_layout Standard
-A reproducible workflow was written to analyze the raw ChIP-seq and RNA-seq
- data from previous studies 
+A reproducible workflow 
+\begin_inset CommandInset citation
+LatexCommand cite
+key "gh-cd4-csaw"
+literal "false"
+
+\end_inset
+
+ was written to analyze the raw ChIP-seq and RNA-seq data from previous
+ studies 
 \begin_inset CommandInset citation
 LatexCommand cite
 key "LaMere2016,LaMere2017"
@@ -598,8 +606,21 @@ literal "true"
 \end_inset
 
 .
- The workflow starts by retrieving the sequence reads from the Sequence
- Read Archive (SRA) 
+ Briefly, this data consists of RNA-seq and ChIP-seq from CD4 T-cells cultured
+ from 4 donors.
+ From each donor, naive and memory CD4 T-cells were isolated separately.
+ Then cultures of both cells were activated [how?], and samples were taken
+ at 4 time points: Day 0 (pre-activation), Day 1 (early activation), Day
+ 5 (peak activation), and Day 14 (post-activation).
+ For each combination of cell type and time point, RNA was isolated, and
+ ChIP-seq was performed for each of 3 histone marks: H3K4me2, H3K4me3, and
+ H3K27me3.
+ The ChIP-seq input was also sequenced for each sample.
+ The result was 32 samples for each assay.
+\end_layout
+
+\begin_layout Standard
+Sequence reads were retrieved from the Sequence Read Archive (SRA) 
 \begin_inset CommandInset citation
 LatexCommand cite
 key "Leinonen2011"
@@ -608,6 +629,60 @@ literal "false"
 \end_inset
 
 .
+ ChIP-seq (and input) reads were aligned to CRCh38 genome assembly using
+ Bowtie 2 
+\begin_inset CommandInset citation
+LatexCommand cite
+key "Langmead2012,Schneider2017,gh-hg38-ref"
+literal "false"
+
+\end_inset
+
+.
+ Artifact regions were annotated using a custom implementation of the GreyListCh
+IP algorithm, and these 
+\begin_inset Quotes eld
+\end_inset
+
+greylists
+\begin_inset Quotes erd
+\end_inset
+
+ were merged with the ENCODE blacklist 
+\begin_inset CommandInset citation
+LatexCommand cite
+key "greylistchip,Amemiya2019,Dunham2012"
+literal "false"
+
+\end_inset
+
+.
+ Any read or peak overlapping one of these regions was regarded as artifactual
+ and excluded from downstream analyses.
+ 
+\end_layout
+
+\begin_layout Standard
+Peaks are called using epic, an implementation of the SICER algorithm 
+\begin_inset CommandInset citation
+LatexCommand cite
+key "Zang2009,gh-epic"
+literal "false"
+
+\end_inset
+
+.
+ Peaks are also called separately using MACS, but MACS was determined to
+ be a poor fit for the data, and these peak calls are not used further 
+\begin_inset CommandInset citation
+LatexCommand cite
+key "Zhang2008"
+literal "false"
+
+\end_inset
+
+.
+ 
 \end_layout
 
 \begin_layout Itemize
@@ -6412,7 +6487,7 @@ Check bib entry formatting & sort order
 \begin_inset CommandInset bibtex
 LatexCommand bibtex
 btprint "btPrintCited"
-bibfiles "refs"
+bibfiles "refs,code-refs"
 options "bibtotoc,unsrt"
 
 \end_inset

Some files were not shown because too many files changed in this diff