thesis.lyx 75 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230
  1. #LyX 2.3 created this file. For more info see http://www.lyx.org/
  2. \lyxformat 544
  3. \begin_document
  4. \begin_header
  5. \save_transient_properties true
  6. \origin unavailable
  7. \textclass extbook
  8. \begin_preamble
  9. % List all used files in log output
  10. \listfiles
  11. % Add a DRAFT watermark
  12. \usepackage{draftwatermark}
  13. \SetWatermarkLightness{0.97}
  14. \SetWatermarkScale{1}
  15. % Set up required header format
  16. \usepackage{fancyhdr}
  17. \pagestyle{fancy}
  18. \renewcommand{\headrulewidth}{0pt}
  19. \rhead{}
  20. \lhead{}
  21. \rfoot{}
  22. \lfoot{}
  23. \cfoot{\thepage} % Page number bottom center
  24. % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
  25. \usepackage{xstring}
  26. \usepackage{etoolbox}
  27. \usepackage{caption}
  28. \captionsetup{labelfont=bf,tableposition=top}
  29. \makeatletter
  30. \newcommand\formatlabel[1]{%
  31. \noexpandarg
  32. \IfSubStr{#1}{.}{%
  33. \StrBefore{#1}{.}[\firstcaption]%
  34. \StrBehind{#1}{.}[\secondcaption]%
  35. \textbf{\firstcaption.} \secondcaption}{%
  36. #1}%
  37. }
  38. \patchcmd{\@caption}{#3}{\formatlabel{#3}}
  39. \makeatother
  40. \end_preamble
  41. \use_default_options true
  42. \begin_modules
  43. todonotes
  44. \end_modules
  45. \maintain_unincluded_children false
  46. \language english
  47. \language_package default
  48. \inputencoding utf8
  49. \fontencoding global
  50. \font_roman "default" "default"
  51. \font_sans "default" "default"
  52. \font_typewriter "default" "default"
  53. \font_math "auto" "auto"
  54. \font_default_family default
  55. \use_non_tex_fonts false
  56. \font_sc false
  57. \font_osf false
  58. \font_sf_scale 100 100
  59. \font_tt_scale 100 100
  60. \use_microtype false
  61. \use_dash_ligatures true
  62. \graphics default
  63. \default_output_format pdf4
  64. \output_sync 0
  65. \bibtex_command default
  66. \index_command default
  67. \paperfontsize 12
  68. \spacing double
  69. \use_hyperref true
  70. \pdf_bookmarks true
  71. \pdf_bookmarksnumbered false
  72. \pdf_bookmarksopen false
  73. \pdf_bookmarksopenlevel 1
  74. \pdf_breaklinks false
  75. \pdf_pdfborder false
  76. \pdf_colorlinks false
  77. \pdf_backref false
  78. \pdf_pdfusetitle true
  79. \papersize letterpaper
  80. \use_geometry true
  81. \use_package amsmath 1
  82. \use_package amssymb 1
  83. \use_package cancel 1
  84. \use_package esint 1
  85. \use_package mathdots 1
  86. \use_package mathtools 1
  87. \use_package mhchem 1
  88. \use_package stackrel 1
  89. \use_package stmaryrd 1
  90. \use_package undertilde 1
  91. \cite_engine basic
  92. \cite_engine_type default
  93. \biblio_style plain
  94. \use_bibtopic false
  95. \use_indices false
  96. \paperorientation portrait
  97. \suppress_date false
  98. \justification true
  99. \use_refstyle 1
  100. \use_minted 0
  101. \index Index
  102. \shortcut idx
  103. \color #008000
  104. \end_index
  105. \leftmargin 1.5in
  106. \topmargin 1in
  107. \rightmargin 1in
  108. \bottommargin 1in
  109. \secnumdepth 3
  110. \tocdepth 3
  111. \paragraph_separation indent
  112. \paragraph_indentation default
  113. \is_math_indent 0
  114. \math_numbering_side default
  115. \quotes_style english
  116. \dynamic_quotes 0
  117. \papercolumns 1
  118. \papersides 2
  119. \paperpagestyle default
  120. \tracking_changes false
  121. \output_changes false
  122. \html_math_output 0
  123. \html_css_as_file 0
  124. \html_be_strict false
  125. \end_header
  126. \begin_body
  127. \begin_layout Title
  128. Bioinformatic analysis of complex, high-throughput genomic and epigenomic
  129. data in the context of immunology and transplant rejection
  130. \end_layout
  131. \begin_layout Author
  132. A thesis presented
  133. \begin_inset Newline newline
  134. \end_inset
  135. by
  136. \begin_inset Newline newline
  137. \end_inset
  138. Ryan C.
  139. Thompson
  140. \begin_inset Newline newline
  141. \end_inset
  142. to
  143. \begin_inset Newline newline
  144. \end_inset
  145. The Scripps Research Institute Graduate Program
  146. \begin_inset Newline newline
  147. \end_inset
  148. in partial fulfillment of the requirements for the degree of
  149. \begin_inset Newline newline
  150. \end_inset
  151. Doctor of Philosophy in the subject of Biology
  152. \begin_inset Newline newline
  153. \end_inset
  154. for
  155. \begin_inset Newline newline
  156. \end_inset
  157. The Scripps Research Institute
  158. \begin_inset Newline newline
  159. \end_inset
  160. La Jolla, California
  161. \end_layout
  162. \begin_layout Date
  163. May 2019
  164. \end_layout
  165. \begin_layout Standard
  166. [Copyright notice]
  167. \end_layout
  168. \begin_layout Standard
  169. [Thesis acceptance form]
  170. \end_layout
  171. \begin_layout Standard
  172. [Dedication]
  173. \end_layout
  174. \begin_layout Standard
  175. [Acknowledgements]
  176. \end_layout
  177. \begin_layout Standard
  178. \begin_inset CommandInset toc
  179. LatexCommand tableofcontents
  180. \end_inset
  181. \end_layout
  182. \begin_layout Standard
  183. \begin_inset FloatList table
  184. \end_inset
  185. \end_layout
  186. \begin_layout Standard
  187. \begin_inset FloatList figure
  188. \end_inset
  189. \end_layout
  190. \begin_layout Standard
  191. [List of Abbreviations]
  192. \end_layout
  193. \begin_layout Standard
  194. \begin_inset Flex TODO Note (inline)
  195. status open
  196. \begin_layout Plain Layout
  197. Look into auto-generated nomenclature list: https://wiki.lyx.org/Tips/Nomenclature
  198. \end_layout
  199. \end_inset
  200. \end_layout
  201. \begin_layout List of TODOs
  202. \end_layout
  203. \begin_layout Standard
  204. [Abstract]
  205. \end_layout
  206. \begin_layout Chapter*
  207. Abstract
  208. \end_layout
  209. \begin_layout Chapter
  210. Introduction
  211. \end_layout
  212. \begin_layout Section
  213. Background & Significance
  214. \end_layout
  215. \begin_layout Subsection
  216. Biological motivation
  217. \end_layout
  218. \begin_layout Itemize
  219. Rejection is the major long-term threat to organ and tissue grafts
  220. \end_layout
  221. \begin_deeper
  222. \begin_layout Itemize
  223. Common mechanisms of rejection
  224. \end_layout
  225. \begin_layout Itemize
  226. Effective immune suppression requires monitoring for rejection and tuning
  227. \end_layout
  228. \begin_layout Itemize
  229. Current tests for rejection (tissue biopsy) are invasive and biased
  230. \end_layout
  231. \begin_layout Itemize
  232. A blood test based on microarrays would be less biased and invasive
  233. \end_layout
  234. \end_deeper
  235. \begin_layout Itemize
  236. Memory cells are resistant to immune suppression
  237. \end_layout
  238. \begin_deeper
  239. \begin_layout Itemize
  240. Mechanisms of resistance in memory cells are poorly understood
  241. \end_layout
  242. \begin_layout Itemize
  243. A better understanding of immune memory formation is needed
  244. \end_layout
  245. \end_deeper
  246. \begin_layout Itemize
  247. Mesenchymal stem cell infusion is a promising new treatment to prevent/delay
  248. rejection
  249. \end_layout
  250. \begin_deeper
  251. \begin_layout Itemize
  252. Demonstrated in mice, but not yet in primates
  253. \end_layout
  254. \begin_layout Itemize
  255. Mechanism currently unknown, but MSC are known to be immune modulatory
  256. \end_layout
  257. \end_deeper
  258. \begin_layout Subsection
  259. Overview of bioinformatic analysis methods
  260. \end_layout
  261. \begin_layout Standard
  262. An overview of all the methods used, including what problem they solve,
  263. what assumptions they make, and a basic description of how they work.
  264. \end_layout
  265. \begin_layout Itemize
  266. ChIP-seq Peak calling
  267. \end_layout
  268. \begin_deeper
  269. \begin_layout Itemize
  270. Cross-correlation analysis to determine fragment size
  271. \end_layout
  272. \begin_layout Itemize
  273. Broad vs narrow peaks
  274. \end_layout
  275. \begin_layout Itemize
  276. SICER for broad peaks
  277. \end_layout
  278. \begin_layout Itemize
  279. IDR for biologically reproducible peaks
  280. \end_layout
  281. \begin_layout Itemize
  282. csaw peak filtering guidelines for unbiased downstream analysis
  283. \end_layout
  284. \end_deeper
  285. \begin_layout Itemize
  286. Normalization is non-trivial and application-dependant
  287. \end_layout
  288. \begin_deeper
  289. \begin_layout Itemize
  290. Expression arrays: RMA & fRMA; why fRMA is needed
  291. \end_layout
  292. \begin_layout Itemize
  293. Methylation arrays: M-value transformation approximates normal data but
  294. induces heteroskedasticity
  295. \end_layout
  296. \begin_layout Itemize
  297. RNA-seq: normalize based on assumption that the average gene is not changing
  298. \end_layout
  299. \begin_layout Itemize
  300. ChIP-seq: complex with many considerations, dependent on experimental methods,
  301. biological system, and analysis goals
  302. \end_layout
  303. \end_deeper
  304. \begin_layout Itemize
  305. Limma: The standard linear modeling framework for genomics
  306. \end_layout
  307. \begin_deeper
  308. \begin_layout Itemize
  309. empirical Bayes variance modeling: limma's core feature
  310. \end_layout
  311. \begin_layout Itemize
  312. edgeR & DESeq2: Extend with negative bonomial GLM for RNA-seq and other
  313. count data
  314. \end_layout
  315. \begin_layout Itemize
  316. voom: Extend with precision weights to model mean-variance trend
  317. \end_layout
  318. \begin_layout Itemize
  319. arrayWeights and duplicateCorrelation to handle complex variance structures
  320. \end_layout
  321. \end_deeper
  322. \begin_layout Itemize
  323. sva and ComBat for batch correction
  324. \end_layout
  325. \begin_layout Itemize
  326. Factor analysis: PCA, MDS, MOFA
  327. \end_layout
  328. \begin_deeper
  329. \begin_layout Itemize
  330. Batch-corrected PCA is informative, but careful application is required
  331. to avoid bias
  332. \end_layout
  333. \end_deeper
  334. \begin_layout Itemize
  335. Gene set analysis: camera and SPIA
  336. \end_layout
  337. \begin_layout Section
  338. Innovation
  339. \end_layout
  340. \begin_layout Itemize
  341. MSC infusion to improve transplant outcomes (prevent/delay rejection)
  342. \end_layout
  343. \begin_deeper
  344. \begin_layout Itemize
  345. Characterize MSC response to interferon gamma
  346. \end_layout
  347. \begin_layout Itemize
  348. IFN-g is thought to stimulate their function
  349. \end_layout
  350. \begin_layout Itemize
  351. Test IFN-g treated MSC infusion as a therapy to delay graft rejection in
  352. cynomolgus monkeys
  353. \end_layout
  354. \begin_layout Itemize
  355. Monitor animals post-transplant using blood RNA-seq at serial time points
  356. \end_layout
  357. \end_deeper
  358. \begin_layout Itemize
  359. Investigate dynamics of histone marks in CD4 T-cell activation and memory
  360. \end_layout
  361. \begin_deeper
  362. \begin_layout Itemize
  363. Previous studies have looked at single snapshots of histone marks
  364. \end_layout
  365. \begin_layout Itemize
  366. Instead, look at changes in histone marks across activation and memory
  367. \end_layout
  368. \end_deeper
  369. \begin_layout Itemize
  370. High-throughput sequencing and microarray technologies
  371. \end_layout
  372. \begin_deeper
  373. \begin_layout Itemize
  374. Powerful methods for assaying gene expression and epigenetics across entire
  375. genomes
  376. \end_layout
  377. \begin_layout Itemize
  378. Proper analysis requires finding and exploiting systematic genome-wide trends
  379. \end_layout
  380. \end_deeper
  381. \begin_layout Chapter
  382. Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
  383. in naive and memory CD4 T-cell activation
  384. \end_layout
  385. \begin_layout Standard
  386. \begin_inset Flex TODO Note (inline)
  387. status open
  388. \begin_layout Plain Layout
  389. Author list: Me, Sarah, Dan
  390. \end_layout
  391. \end_inset
  392. \end_layout
  393. \begin_layout Section
  394. Approach
  395. \end_layout
  396. \begin_layout Itemize
  397. CD4 T-cells are central to all adaptive immune responses and memory
  398. \end_layout
  399. \begin_layout Itemize
  400. H3K4 and H3K27 methylation are major epigenetic regulators of gene expression
  401. \end_layout
  402. \begin_layout Itemize
  403. Canonically, H3K4 is activating and H3K27 is inhibitory, but the reality
  404. is complex
  405. \end_layout
  406. \begin_layout Itemize
  407. Looking at these marks during CD4 activation and memory should reveal new
  408. mechanistic details
  409. \end_layout
  410. \begin_layout Itemize
  411. Test
  412. \begin_inset Quotes eld
  413. \end_inset
  414. poised promoter
  415. \begin_inset Quotes erd
  416. \end_inset
  417. hypothesis in which H3K4 and H3K27 are both methylated
  418. \end_layout
  419. \begin_layout Itemize
  420. Expand scope of analysis beyond simple promoter counts
  421. \end_layout
  422. \begin_deeper
  423. \begin_layout Itemize
  424. Analyze peaks genome-wide, including in intergenic regions
  425. \end_layout
  426. \begin_layout Itemize
  427. Analysis of coverage distribution shape within promoters, e.g.
  428. upstream vs downstream coverage
  429. \end_layout
  430. \end_deeper
  431. \begin_layout Section
  432. Methods
  433. \end_layout
  434. \begin_layout Itemize
  435. Re-analyze previously published CD4 ChIP-seq & RNA-seq data
  436. \begin_inset CommandInset citation
  437. LatexCommand cite
  438. key "LaMere2016,Lamere2017"
  439. literal "true"
  440. \end_inset
  441. \end_layout
  442. \begin_deeper
  443. \begin_layout Itemize
  444. Completely reimplement analysis from scratch as a reproducible workflow
  445. \end_layout
  446. \begin_layout Itemize
  447. Use newly published methods & algorithms not available during the original
  448. analysis: SICER, csaw, MOFA, ComBat, sva, GREAT, and more
  449. \end_layout
  450. \end_deeper
  451. \begin_layout Itemize
  452. SICER, IDR, csaw, & GREAT to call ChIP-seq peaks genome-wide, perform differenti
  453. al abundance analysis, and relate those peaks to gene expression
  454. \end_layout
  455. \begin_layout Itemize
  456. Promoter counts in sliding windows around each gene's highest-expressed
  457. TSS to investigate coverage distribution within promoters
  458. \end_layout
  459. \begin_layout Section
  460. Results
  461. \end_layout
  462. \begin_layout Itemize
  463. Different histone marks have different effective promoter radii
  464. \end_layout
  465. \begin_layout Itemize
  466. H3K4 and RNA-seq data show clear evidence of naive convergence with memory
  467. between days 1 and 5
  468. \end_layout
  469. \begin_layout Itemize
  470. Promoter coverage distribution affects gene expression independent of total
  471. promoter count
  472. \end_layout
  473. \begin_layout Itemize
  474. Remaining analyses to complete:
  475. \end_layout
  476. \begin_deeper
  477. \begin_layout Itemize
  478. Look for naive-to-memory convergence in H3K27 data
  479. \end_layout
  480. \begin_layout Itemize
  481. Look at enriched pathways for day 0 to day 1 (activation) compared to day
  482. 1 to day 5 (putative naive-to-memory differentiation)
  483. \end_layout
  484. \begin_layout Itemize
  485. Find genes with different expression patterns in naive vs.
  486. memory and try to explain the difference with the Day 0 histone mark data
  487. \end_layout
  488. \begin_deeper
  489. \begin_layout Itemize
  490. Determine whether co-occurrence of H3K4me3 and H3K27me3 (proposed
  491. \begin_inset Quotes eld
  492. \end_inset
  493. poised
  494. \begin_inset Quotes erd
  495. \end_inset
  496. state) has effects on post-activation expression dynamics
  497. \end_layout
  498. \begin_layout Itemize
  499. Promoter coverage distribution dynamics throughout activation for interesting
  500. subsets of genes
  501. \end_layout
  502. \end_deeper
  503. \begin_layout Itemize
  504. (Backup) Compare and contrast behavior of promoter peaks vs intergenic (putative
  505. enhancer) peaks (GREAT analysis)
  506. \end_layout
  507. \begin_deeper
  508. \begin_layout Itemize
  509. Put results in context of important T-cell pathways & gene expression data
  510. \end_layout
  511. \end_deeper
  512. \end_deeper
  513. \begin_layout Section
  514. Discussion
  515. \end_layout
  516. \begin_layout Itemize
  517. "Promoter radius" is not constant and must be defined empirically for a
  518. given data set
  519. \end_layout
  520. \begin_layout Itemize
  521. Evaluate evidence for poised promoters and enhancer effects on gene expression
  522. dynamics of naive-to-memory differentiation
  523. \end_layout
  524. \begin_layout Itemize
  525. Compare to published work on other epigenetic marks (e.g.
  526. chromatin accessibility)
  527. \end_layout
  528. \begin_layout Chapter
  529. Improving array-based analyses of transplant rejection by optimizing data
  530. preprocessing
  531. \end_layout
  532. \begin_layout Standard
  533. \begin_inset Note Note
  534. status open
  535. \begin_layout Plain Layout
  536. Author list: Me, Sunil, Padma, Dan
  537. \end_layout
  538. \end_inset
  539. \end_layout
  540. \begin_layout Section
  541. Approach
  542. \end_layout
  543. \begin_layout Itemize
  544. Machine-learning applications demand a "single-channel" normalization method
  545. \end_layout
  546. \begin_layout Itemize
  547. frozen RMA is a good solution, but not trivial to apply
  548. \end_layout
  549. \begin_layout Itemize
  550. Methylation array data preprocessing induces heteroskedasticity
  551. \end_layout
  552. \begin_layout Itemize
  553. Need to account for this mean-variance dependency in analysis
  554. \end_layout
  555. \begin_layout Section
  556. Methods
  557. \end_layout
  558. \begin_layout Itemize
  559. Expression array normalization for detecting acute rejection
  560. \end_layout
  561. \begin_layout Itemize
  562. Use frozen RMA, a single-channel variant of RMA
  563. \end_layout
  564. \begin_layout Itemize
  565. Generate custom fRMA normalization vectors for each tissue (biopsy, blood)
  566. \end_layout
  567. \begin_layout Itemize
  568. Methylation arrays for differential methylation in rejection vs.
  569. healthy transplant
  570. \end_layout
  571. \begin_layout Itemize
  572. Adapt voom method originally designed for RNA-seq to model mean-variance
  573. dependence
  574. \end_layout
  575. \begin_layout Itemize
  576. Use sample precision weighting and sva to adjust for other confounding factors
  577. \end_layout
  578. \begin_layout Section
  579. Results
  580. \end_layout
  581. \begin_layout Itemize
  582. custom fRMA normalization improved cross-validated classifier performance
  583. \begin_inset CommandInset citation
  584. LatexCommand cite
  585. key "Kurian2014"
  586. literal "true"
  587. \end_inset
  588. \end_layout
  589. \begin_layout Itemize
  590. voom, precision weights, and sva improved model fit
  591. \end_layout
  592. \begin_deeper
  593. \begin_layout Itemize
  594. Also increased sensitivity for detecting differential methylation
  595. \end_layout
  596. \end_deeper
  597. \begin_layout Section
  598. Discussion
  599. \end_layout
  600. \begin_layout Itemize
  601. fRMA enables classifying new samples without re-normalizing the entire data
  602. set
  603. \end_layout
  604. \begin_deeper
  605. \begin_layout Itemize
  606. Critical for translating a classifier into clinical practice
  607. \end_layout
  608. \end_deeper
  609. \begin_layout Itemize
  610. Methods like voom designed for RNA-seq can also help with array analysis
  611. \end_layout
  612. \begin_layout Itemize
  613. Extracting and modeling confounders common to many features improves model
  614. correspondence to known biology
  615. \end_layout
  616. \begin_layout Chapter
  617. Globin-blocking for more effective blood RNA-seq analysis in primate animal
  618. model
  619. \end_layout
  620. \begin_layout Standard
  621. \begin_inset Flex TODO Note (inline)
  622. status open
  623. \begin_layout Plain Layout
  624. Choose between above and the paper title: Optimizing yield of deep RNA sequencin
  625. g for gene expression profiling by globin reduction of peripheral blood
  626. samples from cynomolgus monkeys (Macaca fascicularis).
  627. \end_layout
  628. \end_inset
  629. \end_layout
  630. \begin_layout Standard
  631. \begin_inset Flex TODO Note (inline)
  632. status open
  633. \begin_layout Plain Layout
  634. Chapter author list: https://tex.stackexchange.com/questions/156862/displaying-aut
  635. hor-for-each-chapter-in-book Every chapter gets an author list, which may
  636. or may not be part of a citation to a published/preprinted paper.
  637. \end_layout
  638. \end_inset
  639. \end_layout
  640. \begin_layout Standard
  641. \begin_inset Flex TODO Note (inline)
  642. status open
  643. \begin_layout Plain Layout
  644. Preprint then cite the paper
  645. \end_layout
  646. \end_inset
  647. \end_layout
  648. \begin_layout Section*
  649. Abstract
  650. \end_layout
  651. \begin_layout Paragraph
  652. Background
  653. \end_layout
  654. \begin_layout Standard
  655. Primate blood contains high concentrations of globin messenger RNA.
  656. Globin reduction is a standard technique used to improve the expression
  657. results obtained by DNA microarrays on RNA from blood samples.
  658. However, with whole transcriptome RNA-sequencing (RNA-seq) quickly replacing
  659. microarrays for many applications, the impact of globin reduction for RNA-seq
  660. has not been previously studied.
  661. Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
  662. primates.
  663. \end_layout
  664. \begin_layout Paragraph
  665. Results
  666. \end_layout
  667. \begin_layout Standard
  668. Here we report a protocol for RNA-seq in primate blood samples that uses
  669. complimentary oligonucleotides to block reverse transcription of the alpha
  670. and beta globin genes.
  671. In test samples from cynomolgus monkeys (Macaca fascicularis), this globin
  672. blocking protocol approximately doubles the yield of informative (non-globin)
  673. reads by greatly reducing the fraction of globin reads, while also improving
  674. the consistency in sequencing depth between samples.
  675. The increased yield enables detection of about 2000 more genes, significantly
  676. increases the correlation in measured gene expression levels between samples,
  677. and increases the sensitivity of differential gene expression tests.
  678. \end_layout
  679. \begin_layout Paragraph
  680. Conclusions
  681. \end_layout
  682. \begin_layout Standard
  683. These results show that globin blocking significantly improves the cost-effectiv
  684. eness of mRNA sequencing in primate blood samples by doubling the yield
  685. of useful reads, allowing detection of more genes, and improving the precision
  686. of gene expression measurements.
  687. Based on these results, a globin reducing or blocking protocol is recommended
  688. for all RNA-seq studies of primate blood samples.
  689. \end_layout
  690. \begin_layout Section
  691. Approach
  692. \end_layout
  693. \begin_layout Itemize
  694. Cynomolgus monkeys as a model organism
  695. \end_layout
  696. \begin_deeper
  697. \begin_layout Itemize
  698. Highly related to humans
  699. \end_layout
  700. \begin_layout Itemize
  701. Small size and short life cycle - good research animal
  702. \end_layout
  703. \begin_layout Itemize
  704. Genomics resources still in development
  705. \end_layout
  706. \end_deeper
  707. \begin_layout Itemize
  708. Inadequacy of existing blood RNA-seq protocols
  709. \end_layout
  710. \begin_deeper
  711. \begin_layout Itemize
  712. Existing protocols use a separate globin pulldown step, slowing down processing
  713. \end_layout
  714. \end_deeper
  715. \begin_layout Standard
  716. Increasingly, researchers are turning to high-throughput mRNA sequencing
  717. technologies (RNA-seq) in preference to expression microarrays for analysis
  718. of gene expression
  719. \begin_inset CommandInset citation
  720. LatexCommand cite
  721. key "Mutz2012"
  722. literal "false"
  723. \end_inset
  724. .
  725. The advantages are even greater for study of model organisms with no well-estab
  726. lished array platforms available, such as the cynomolgus monkey (Macaca
  727. fascicularis).
  728. High fractions of globin mRNA are naturally present in mammalian peripheral
  729. blood samples (up to 70% of total mRNA) and these are known to interfere
  730. with the results of array-based expression profiling
  731. \begin_inset CommandInset citation
  732. LatexCommand cite
  733. key "Winn2010"
  734. literal "false"
  735. \end_inset
  736. .
  737. The importance of globin reduction for RNA-seq of blood has only been evaluated
  738. for a deepSAGE protocol on human samples
  739. \begin_inset CommandInset citation
  740. LatexCommand cite
  741. key "Mastrokolias2012"
  742. literal "false"
  743. \end_inset
  744. .
  745. In the present report, we evaluated globin reduction using custom blocking
  746. oligonucleotides for deep RNA-seq of peripheral blood samples from a nonhuman
  747. primate, cynomolgus monkey, using the Illumina technology platform.
  748. We demonstrate that globin reduction significantly improves the cost-effectiven
  749. ess of RNA-seq in blood samples.
  750. Thus, our protocol offers a significant advantage to any investigator planning
  751. to use RNA-seq for gene expression profiling of nonhuman primate blood
  752. samples.
  753. Our method can be generally applied to any species by designing complementary
  754. oligonucleotide blocking probes to the globin gene sequences of that species.
  755. Indeed, any highly expressed but biologically uninformative transcripts
  756. can also be blocked to further increase sequencing efficiency and value
  757. \begin_inset CommandInset citation
  758. LatexCommand cite
  759. key "Arnaud2016"
  760. literal "false"
  761. \end_inset
  762. .
  763. \end_layout
  764. \begin_layout Section
  765. Methods
  766. \end_layout
  767. \begin_layout Subsection*
  768. Sample collection
  769. \end_layout
  770. \begin_layout Standard
  771. All research reported here was done under IACUC-approved protocols at the
  772. University of Miami and complied with all applicable federal and state
  773. regulations and ethical principles for nonhuman primate research.
  774. Blood draws occurred between 16 April 2012 and 18 June 2015.
  775. The experimental system involved intrahepatic pancreatic islet transplantation
  776. into Cynomolgus monkeys with induced diabetes mellitus with or without
  777. concomitant infusion of mesenchymal stem cells.
  778. Blood was collected at serial time points before and after transplantation
  779. into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
  780. precise volume:volume ratio of 2.5 ml whole blood into 6.9 ml of PAX gene
  781. additive.
  782. \end_layout
  783. \begin_layout Subsection*
  784. Globin Blocking
  785. \end_layout
  786. \begin_layout Standard
  787. Four oligonucleotides were designed to hybridize to the 3’ end of the transcript
  788. s for Cynomolgus HBA1, HBA2 and HBB, with two hybridization sites for HBB
  789. and 2 sites for HBA (the chosen sites were identical in both HBA genes).
  790. All oligos were purchased from Sigma and were entirely composed of 2’O-Me
  791. bases with a C3 spacer positioned at the 3’ ends to prevent any polymerase
  792. mediated primer extension.
  793. \end_layout
  794. \begin_layout Quote
  795. HBA1/2 site 1: GCCCACUCAGACUUUAUUCAAAG-C3spacer
  796. \end_layout
  797. \begin_layout Quote
  798. HBA1/2 site 2: GGUGCAAGGAGGGGAGGAG-C3spacer
  799. \end_layout
  800. \begin_layout Quote
  801. HBB site 1: AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
  802. \end_layout
  803. \begin_layout Quote
  804. HBB site 2: CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
  805. \end_layout
  806. \begin_layout Subsection*
  807. RNA-seq Library Preparation
  808. \end_layout
  809. \begin_layout Standard
  810. Sequencing libraries were prepared with 200ng total RNA from each sample.
  811. Polyadenylated mRNA was selected from 200 ng aliquots of cynomologus blood-deri
  812. ved total RNA using Ambion Dynabeads Oligo(dT)25 beads (Invitrogen) following
  813. manufacturer’s recommended protocol.
  814. PolyA selected RNA was then combined with 8 pmol of HBA1/2 (site 1), 8
  815. pmol of HBA1/2 (site 2), 12 pmol of HBB (site 1) and 12 pmol of HBB (site
  816. 2) oligonucleotides.
  817. In addition, 20 pmol of RT primer containing a portion of the Illumina
  818. adapter sequence (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV)
  819. and 4 µL of 5X First Strand buffer (250 mM Tris-HCl pH 8.3, 375 mM KCl,
  820. 15mM MgCl2) were added in a total volume of 15 µL.
  821. The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
  822. then placed on ice.
  823. This was followed by the addition of 2 µL 0.1 M DTT, 1 µL RNaseOUT, 1 µL
  824. 10mM dNTPs 10% biotin-16 aminoallyl-2’- dUTP and 10% biotin-16 aminoallyl-2’-
  825. dCTP (TriLink Biotech, San Diego, CA), 1 µL Superscript II (200U/ µL, Thermo-Fi
  826. sher).
  827. A second “unblocked” library was prepared in the same way for each sample
  828. but replacing the blocking oligos with an equivalent volume of water.
  829. The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
  830. followed by incubation at 75°C for 10 minutes to inactivate the reverse
  831. transcriptase.
  832. \end_layout
  833. \begin_layout Standard
  834. The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
  835. ) following supplier’s recommended protocol.
  836. The cDNA/RNA hybrid was eluted in 25 µL of 10 mM Tris-HCl pH 8.0, and then
  837. bound to 25 µL of M280 Magnetic Streptavidin beads washed per recommended
  838. protocol (Thermo-Fisher).
  839. After 30 minutes of binding, beads were washed one time in 100 µL 0.1N NaOH
  840. to denature and remove the bound RNA, followed by two 100 µL washes with
  841. 1X TE buffer.
  842. \end_layout
  843. \begin_layout Standard
  844. Subsequent attachment of the 5-prime Illumina A adapter was performed by
  845. on-bead random primer extension of the following sequence (A-N8 primer:
  846. TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN).
  847. Briefly, beads were resuspended in a 20 µL reaction containing 5 µM A-N8
  848. primer, 40mM Tris-HCl pH 7.5, 20mM MgCl2, 50mM NaCl, 0.325U/µL Sequenase
  849. 2.0 (Affymetrix, Santa Clara, CA), 0.0025U/µL inorganic pyrophosphatase (Affymetr
  850. ix) and 300 µM each dNTP.
  851. Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
  852. times with 1X TE buffer (200µL).
  853. \end_layout
  854. \begin_layout Standard
  855. The magnetic streptavidin beads were resuspended in 34 µL nuclease-free
  856. water and added directly to a PCR tube.
  857. The two Illumina protocol-specified PCR primers were added at 0.53 µM (Illumina
  858. TruSeq Universal Primer 1 and Illumina TruSeq barcoded PCR primer 2), along
  859. with 40 µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycl
  860. ed as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
  861. 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
  862. \end_layout
  863. \begin_layout Standard
  864. PCR products were purified with 1X Ampure Beads following manufacturer’s
  865. recommended protocol.
  866. Libraries were then analyzed using the Agilent TapeStation and quantitation
  867. of desired size range was performed by “smear analysis”.
  868. Samples were pooled in equimolar batches of 16 samples.
  869. Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
  870. Gels; Thermo-Fisher).
  871. Products were cut between 250 and 350 bp (corresponding to insert sizes
  872. of 130 to 230 bps).
  873. Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
  874. t with 75 base read lengths.
  875. \end_layout
  876. \begin_layout Subsection*
  877. Read alignment and counting
  878. \end_layout
  879. \begin_layout Standard
  880. Reads were aligned to the cynomolgus genome using STAR
  881. \begin_inset CommandInset citation
  882. LatexCommand cite
  883. key "Dobin2013,Wilson2013"
  884. literal "false"
  885. \end_inset
  886. .
  887. Counts of uniquely mapped reads were obtained for every gene in each sample
  888. with the “featureCounts” function from the Rsubread package, using each
  889. of the three possibilities for the “strandSpecific” option: sense, antisense,
  890. and unstranded
  891. \begin_inset CommandInset citation
  892. LatexCommand cite
  893. key "Liao2014"
  894. literal "false"
  895. \end_inset
  896. .
  897. A few artifacts in the cynomolgus genome annotation complicated read counting.
  898. First, no ortholog is annotated for alpha globin in the cynomolgus genome,
  899. presumably because the human genome has two alpha globin genes with nearly
  900. identical sequences, making the orthology relationship ambiguous.
  901. However, two loci in the cynomolgus genome are as “hemoglobin subunit alpha-lik
  902. e” (LOC102136192 and LOC102136846).
  903. LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
  904. as protein-coding.
  905. Our globin reduction protocol was designed to include blocking of these
  906. two genes.
  907. Indeed, these two genes have almost the same read counts in each library
  908. as the properly-annotated HBB gene and much larger counts than any other
  909. gene in the unblocked libraries, giving confidence that reads derived from
  910. the real alpha globin are mapping to both genes.
  911. Thus, reads from both of these loci were counted as alpha globin reads
  912. in all further analyses.
  913. The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
  914. 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
  915. If counting is not performed in stranded mode (or if a non-strand-specific
  916. sequencing protocol is used), many reads mapping to the globin gene will
  917. be discarded as ambiguous due to their overlap with this ncRNA gene, resulting
  918. in significant undercounting of globin reads.
  919. Therefore, stranded sense counts were used for all further analysis in
  920. the present study to insure that we accurately accounted for globin transcript
  921. reduction.
  922. However, we note that stranded reads are not necessary for RNA-seq using
  923. our protocol in standard practice.
  924. \end_layout
  925. \begin_layout Subsection*
  926. Normalization and Exploratory Data Analysis
  927. \end_layout
  928. \begin_layout Standard
  929. Libraries were normalized by computing scaling factors using the edgeR package’s
  930. Trimmed Mean of M-values method
  931. \begin_inset CommandInset citation
  932. LatexCommand cite
  933. key "Robinson2010"
  934. literal "false"
  935. \end_inset
  936. .
  937. Log2 counts per million values (logCPM) were calculated using the cpm function
  938. in edgeR for individual samples and aveLogCPM function for averages across
  939. groups of samples, using those functions’ default prior count values to
  940. avoid taking the logarithm of 0.
  941. Genes were considered “present” if their average normalized logCPM values
  942. across all libraries were at least -1.
  943. Normalizing for gene length was unnecessary because the sequencing protocol
  944. is 3’-biased and hence the expected read count for each gene is related
  945. to the transcript’s copy number but not its length.
  946. \end_layout
  947. \begin_layout Standard
  948. In order to assess the effect of blocking on reproducibility, Pearson and
  949. Spearman correlation coefficients were computed between the logCPM values
  950. for every pair of libraries within the globin-blocked (GB) and unblocked
  951. (non-GB) groups, and edgeR's “estimateDisp” function was used to compute
  952. negative binomial dispersions separately for the two groups
  953. \begin_inset CommandInset citation
  954. LatexCommand cite
  955. key "Chen2014"
  956. literal "false"
  957. \end_inset
  958. .
  959. \end_layout
  960. \begin_layout Subsection*
  961. Differential Expression Analysis
  962. \end_layout
  963. \begin_layout Standard
  964. All tests for differential gene expression were performed using edgeR, by
  965. first fitting a negative binomial generalized linear model to the counts
  966. and normalization factors and then performing a quasi-likelihood F-test
  967. with robust estimation of outlier gene dispersions
  968. \begin_inset CommandInset citation
  969. LatexCommand cite
  970. key "Lund2012,Phipson2016"
  971. literal "false"
  972. \end_inset
  973. .
  974. To investigate the effects of globin blocking on each gene, an additive
  975. model was fit to the full data with coefficients for globin blocking and
  976. SampleID.
  977. To test the effect of globin blocking on detection of differentially expressed
  978. genes, the GB samples and non-GB samples were each analyzed independently
  979. as follows: for each animal with both a pre-transplant and a post-transplant
  980. time point in the data set, the pre-transplant sample and the earliest
  981. post-transplant sample were selected, and all others were excluded, yielding
  982. a pre-/post-transplant pair of samples for each animal (N=7 animals with
  983. paired samples).
  984. These samples were analyzed for pre-transplant vs.
  985. post-transplant differential gene expression while controlling for inter-animal
  986. variation using an additive model with coefficients for transplant and
  987. animal ID.
  988. In all analyses, p-values were adjusted using the Benjamini-Hochberg procedure
  989. for FDR correction
  990. \begin_inset CommandInset citation
  991. LatexCommand cite
  992. key "Benjamini1995"
  993. literal "false"
  994. \end_inset
  995. .
  996. \end_layout
  997. \begin_layout Standard
  998. \begin_inset Note Note
  999. status open
  1000. \begin_layout Itemize
  1001. New blood RNA-seq protocol to block reverse transcription of globin genes
  1002. \end_layout
  1003. \begin_layout Itemize
  1004. Blood RNA-seq time course after transplants with/without MSC infusion
  1005. \end_layout
  1006. \end_inset
  1007. \end_layout
  1008. \begin_layout Section
  1009. Results
  1010. \end_layout
  1011. \begin_layout Subsection*
  1012. Globin blocking yields a larger and more consistent fraction of useful reads
  1013. \end_layout
  1014. \begin_layout Standard
  1015. The objective of the present study was to validate a new protocol for deep
  1016. RNA-seq of whole blood drawn into PaxGene tubes from cynomolgus monkeys
  1017. undergoing islet transplantation, with particular focus on minimizing the
  1018. loss of useful sequencing space to uninformative globin reads.
  1019. The details of the analysis with respect to transplant outcomes and the
  1020. impact of mesenchymal stem cell treatment will be reported in a separate
  1021. manuscript (in preparation).
  1022. To focus on the efficacy of our globin blocking protocol, 37 blood samples,
  1023. 16 from pre-transplant and 21 from post-transplant time points, were each
  1024. prepped once with and once without globin blocking oligos, and were then
  1025. sequenced on an Illumina NextSeq500 instrument.
  1026. The number of reads aligning to each gene in the cynomolgus genome was
  1027. counted.
  1028. Table 1 summarizes the distribution of read fractions among the GB and
  1029. non-GB libraries.
  1030. In the libraries with no globin blocking, globin reads made up an average
  1031. of 44.6% of total input reads, while reads assigned to all other genes made
  1032. up an average of 26.3%.
  1033. The remaining reads either aligned to intergenic regions (that include
  1034. long non-coding RNAs) or did not align with any annotated transcripts in
  1035. the current build of the cynomolgus genome.
  1036. In the GB libraries, globin reads made up only 3.48% and reads assigned
  1037. to all other genes increased to 50.4%.
  1038. Thus, globin blocking resulted in a 92.2% reduction in globin reads and
  1039. a 91.6% increase in yield of useful non-globin reads.
  1040. \end_layout
  1041. \begin_layout Standard
  1042. This reduction is not quite as efficient as the previous analysis showed
  1043. for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
  1044. \begin_inset CommandInset citation
  1045. LatexCommand cite
  1046. key "Mastrokolias2012"
  1047. literal "false"
  1048. \end_inset
  1049. .
  1050. Nonetheless, this degree of globin reduction is sufficient to nearly double
  1051. the yield of useful reads.
  1052. Thus, globin blocking cuts the required sequencing effort (and costs) to
  1053. achieve a target coverage depth by almost 50%.
  1054. Consistent with this near doubling of yield, the average difference in
  1055. un-normalized logCPM across all genes between the GB libraries and non-GB
  1056. libraries is approximately 1 (mean = 1.01, median = 1.08), an overall 2-fold
  1057. increase.
  1058. Un-normalized values are used here because the TMM normalization correctly
  1059. identifies this 2-fold difference as biologically irrelevant and removes
  1060. it.
  1061. \end_layout
  1062. \begin_layout Standard
  1063. \begin_inset Float figure
  1064. wide false
  1065. sideways false
  1066. status open
  1067. \begin_layout Plain Layout
  1068. \align center
  1069. \begin_inset Graphics
  1070. filename graphics/Globin Paper/figure1 - globin-fractions.pdf
  1071. \end_inset
  1072. \end_layout
  1073. \begin_layout Plain Layout
  1074. \begin_inset Caption Standard
  1075. \begin_layout Plain Layout
  1076. \series bold
  1077. \begin_inset Argument 1
  1078. status collapsed
  1079. \begin_layout Plain Layout
  1080. Fraction of genic reads in each sample aligned to non-globin genes, with
  1081. and without globin blocking (GB).
  1082. \end_layout
  1083. \end_inset
  1084. \begin_inset CommandInset label
  1085. LatexCommand label
  1086. name "fig:Fraction-of-genic-reads"
  1087. \end_inset
  1088. Fraction of genic reads in each sample aligned to non-globin genes, with
  1089. and without globin blocking (GB).
  1090. \series default
  1091. All reads in each sequencing library were aligned to the cyno genome, and
  1092. the number of reads uniquely aligning to each gene was counted.
  1093. For each sample, counts were summed separately for all globin genes and
  1094. for the remainder of the genes (non-globin genes), and the fraction of
  1095. genic reads aligned to non-globin genes was computed.
  1096. Each point represents an individual sample.
  1097. Gray + signs indicate the means for globin-blocked libraries and unblocked
  1098. libraries.
  1099. The overall distribution for each group is represented as a notched box
  1100. plots.
  1101. Points are randomly spread vertically to avoid excessive overlapping.
  1102. \end_layout
  1103. \end_inset
  1104. \end_layout
  1105. \begin_layout Plain Layout
  1106. \end_layout
  1107. \end_inset
  1108. \end_layout
  1109. \begin_layout Standard
  1110. \begin_inset Float table
  1111. placement p
  1112. wide false
  1113. sideways true
  1114. status open
  1115. \begin_layout Plain Layout
  1116. \align center
  1117. \begin_inset Tabular
  1118. <lyxtabular version="3" rows="4" columns="7">
  1119. <features tabularvalignment="middle">
  1120. <column alignment="center" valignment="top">
  1121. <column alignment="center" valignment="top">
  1122. <column alignment="center" valignment="top">
  1123. <column alignment="center" valignment="top">
  1124. <column alignment="center" valignment="top">
  1125. <column alignment="center" valignment="top">
  1126. <column alignment="center" valignment="top">
  1127. <row>
  1128. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1129. \begin_inset Text
  1130. \begin_layout Plain Layout
  1131. \end_layout
  1132. \end_inset
  1133. </cell>
  1134. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1135. \begin_inset Text
  1136. \begin_layout Plain Layout
  1137. \family roman
  1138. \series medium
  1139. \shape up
  1140. \size normal
  1141. \emph off
  1142. \bar no
  1143. \strikeout off
  1144. \xout off
  1145. \uuline off
  1146. \uwave off
  1147. \noun off
  1148. \color none
  1149. Percent of Total Reads
  1150. \end_layout
  1151. \end_inset
  1152. </cell>
  1153. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1154. \begin_inset Text
  1155. \begin_layout Plain Layout
  1156. \end_layout
  1157. \end_inset
  1158. </cell>
  1159. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1160. \begin_inset Text
  1161. \begin_layout Plain Layout
  1162. \end_layout
  1163. \end_inset
  1164. </cell>
  1165. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1166. \begin_inset Text
  1167. \begin_layout Plain Layout
  1168. \end_layout
  1169. \end_inset
  1170. </cell>
  1171. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1172. \begin_inset Text
  1173. \begin_layout Plain Layout
  1174. \family roman
  1175. \series medium
  1176. \shape up
  1177. \size normal
  1178. \emph off
  1179. \bar no
  1180. \strikeout off
  1181. \xout off
  1182. \uuline off
  1183. \uwave off
  1184. \noun off
  1185. \color none
  1186. Percent of Genic Reads
  1187. \end_layout
  1188. \end_inset
  1189. </cell>
  1190. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1191. \begin_inset Text
  1192. \begin_layout Plain Layout
  1193. \end_layout
  1194. \end_inset
  1195. </cell>
  1196. </row>
  1197. <row>
  1198. <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
  1199. \begin_inset Text
  1200. \begin_layout Plain Layout
  1201. GB
  1202. \end_layout
  1203. \end_inset
  1204. </cell>
  1205. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1206. \begin_inset Text
  1207. \begin_layout Plain Layout
  1208. \family roman
  1209. \series medium
  1210. \shape up
  1211. \size normal
  1212. \emph off
  1213. \bar no
  1214. \strikeout off
  1215. \xout off
  1216. \uuline off
  1217. \uwave off
  1218. \noun off
  1219. \color none
  1220. Non-globin Reads
  1221. \end_layout
  1222. \end_inset
  1223. </cell>
  1224. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1225. \begin_inset Text
  1226. \begin_layout Plain Layout
  1227. \family roman
  1228. \series medium
  1229. \shape up
  1230. \size normal
  1231. \emph off
  1232. \bar no
  1233. \strikeout off
  1234. \xout off
  1235. \uuline off
  1236. \uwave off
  1237. \noun off
  1238. \color none
  1239. Globin Reads
  1240. \end_layout
  1241. \end_inset
  1242. </cell>
  1243. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1244. \begin_inset Text
  1245. \begin_layout Plain Layout
  1246. \family roman
  1247. \series medium
  1248. \shape up
  1249. \size normal
  1250. \emph off
  1251. \bar no
  1252. \strikeout off
  1253. \xout off
  1254. \uuline off
  1255. \uwave off
  1256. \noun off
  1257. \color none
  1258. All Genic Reads
  1259. \end_layout
  1260. \end_inset
  1261. </cell>
  1262. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1263. \begin_inset Text
  1264. \begin_layout Plain Layout
  1265. \family roman
  1266. \series medium
  1267. \shape up
  1268. \size normal
  1269. \emph off
  1270. \bar no
  1271. \strikeout off
  1272. \xout off
  1273. \uuline off
  1274. \uwave off
  1275. \noun off
  1276. \color none
  1277. All Aligned Reads
  1278. \end_layout
  1279. \end_inset
  1280. </cell>
  1281. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1282. \begin_inset Text
  1283. \begin_layout Plain Layout
  1284. \family roman
  1285. \series medium
  1286. \shape up
  1287. \size normal
  1288. \emph off
  1289. \bar no
  1290. \strikeout off
  1291. \xout off
  1292. \uuline off
  1293. \uwave off
  1294. \noun off
  1295. \color none
  1296. Non-globin Reads
  1297. \end_layout
  1298. \end_inset
  1299. </cell>
  1300. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1301. \begin_inset Text
  1302. \begin_layout Plain Layout
  1303. \family roman
  1304. \series medium
  1305. \shape up
  1306. \size normal
  1307. \emph off
  1308. \bar no
  1309. \strikeout off
  1310. \xout off
  1311. \uuline off
  1312. \uwave off
  1313. \noun off
  1314. \color none
  1315. Globin Reads
  1316. \end_layout
  1317. \end_inset
  1318. </cell>
  1319. </row>
  1320. <row>
  1321. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1322. \begin_inset Text
  1323. \begin_layout Plain Layout
  1324. \family roman
  1325. \series medium
  1326. \shape up
  1327. \size normal
  1328. \emph off
  1329. \bar no
  1330. \strikeout off
  1331. \xout off
  1332. \uuline off
  1333. \uwave off
  1334. \noun off
  1335. \color none
  1336. Yes
  1337. \end_layout
  1338. \end_inset
  1339. </cell>
  1340. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1341. \begin_inset Text
  1342. \begin_layout Plain Layout
  1343. \family roman
  1344. \series medium
  1345. \shape up
  1346. \size normal
  1347. \emph off
  1348. \bar no
  1349. \strikeout off
  1350. \xout off
  1351. \uuline off
  1352. \uwave off
  1353. \noun off
  1354. \color none
  1355. 50.4% ± 6.82
  1356. \end_layout
  1357. \end_inset
  1358. </cell>
  1359. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1360. \begin_inset Text
  1361. \begin_layout Plain Layout
  1362. \family roman
  1363. \series medium
  1364. \shape up
  1365. \size normal
  1366. \emph off
  1367. \bar no
  1368. \strikeout off
  1369. \xout off
  1370. \uuline off
  1371. \uwave off
  1372. \noun off
  1373. \color none
  1374. 3.48% ± 2.94
  1375. \end_layout
  1376. \end_inset
  1377. </cell>
  1378. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1379. \begin_inset Text
  1380. \begin_layout Plain Layout
  1381. \family roman
  1382. \series medium
  1383. \shape up
  1384. \size normal
  1385. \emph off
  1386. \bar no
  1387. \strikeout off
  1388. \xout off
  1389. \uuline off
  1390. \uwave off
  1391. \noun off
  1392. \color none
  1393. 53.9% ± 6.81
  1394. \end_layout
  1395. \end_inset
  1396. </cell>
  1397. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1398. \begin_inset Text
  1399. \begin_layout Plain Layout
  1400. \family roman
  1401. \series medium
  1402. \shape up
  1403. \size normal
  1404. \emph off
  1405. \bar no
  1406. \strikeout off
  1407. \xout off
  1408. \uuline off
  1409. \uwave off
  1410. \noun off
  1411. \color none
  1412. 89.7% ± 2.40
  1413. \end_layout
  1414. \end_inset
  1415. </cell>
  1416. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1417. \begin_inset Text
  1418. \begin_layout Plain Layout
  1419. \family roman
  1420. \series medium
  1421. \shape up
  1422. \size normal
  1423. \emph off
  1424. \bar no
  1425. \strikeout off
  1426. \xout off
  1427. \uuline off
  1428. \uwave off
  1429. \noun off
  1430. \color none
  1431. 93.5% ± 5.25
  1432. \end_layout
  1433. \end_inset
  1434. </cell>
  1435. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1436. \begin_inset Text
  1437. \begin_layout Plain Layout
  1438. \family roman
  1439. \series medium
  1440. \shape up
  1441. \size normal
  1442. \emph off
  1443. \bar no
  1444. \strikeout off
  1445. \xout off
  1446. \uuline off
  1447. \uwave off
  1448. \noun off
  1449. \color none
  1450. 6.49% ± 5.25
  1451. \end_layout
  1452. \end_inset
  1453. </cell>
  1454. </row>
  1455. <row>
  1456. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1457. \begin_inset Text
  1458. \begin_layout Plain Layout
  1459. \family roman
  1460. \series medium
  1461. \shape up
  1462. \size normal
  1463. \emph off
  1464. \bar no
  1465. \strikeout off
  1466. \xout off
  1467. \uuline off
  1468. \uwave off
  1469. \noun off
  1470. \color none
  1471. No
  1472. \end_layout
  1473. \end_inset
  1474. </cell>
  1475. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1476. \begin_inset Text
  1477. \begin_layout Plain Layout
  1478. \family roman
  1479. \series medium
  1480. \shape up
  1481. \size normal
  1482. \emph off
  1483. \bar no
  1484. \strikeout off
  1485. \xout off
  1486. \uuline off
  1487. \uwave off
  1488. \noun off
  1489. \color none
  1490. 26.3% ± 8.95
  1491. \end_layout
  1492. \end_inset
  1493. </cell>
  1494. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1495. \begin_inset Text
  1496. \begin_layout Plain Layout
  1497. \family roman
  1498. \series medium
  1499. \shape up
  1500. \size normal
  1501. \emph off
  1502. \bar no
  1503. \strikeout off
  1504. \xout off
  1505. \uuline off
  1506. \uwave off
  1507. \noun off
  1508. \color none
  1509. 44.6% ± 16.6
  1510. \end_layout
  1511. \end_inset
  1512. </cell>
  1513. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1514. \begin_inset Text
  1515. \begin_layout Plain Layout
  1516. \family roman
  1517. \series medium
  1518. \shape up
  1519. \size normal
  1520. \emph off
  1521. \bar no
  1522. \strikeout off
  1523. \xout off
  1524. \uuline off
  1525. \uwave off
  1526. \noun off
  1527. \color none
  1528. 70.1% ± 9.38
  1529. \end_layout
  1530. \end_inset
  1531. </cell>
  1532. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1533. \begin_inset Text
  1534. \begin_layout Plain Layout
  1535. \family roman
  1536. \series medium
  1537. \shape up
  1538. \size normal
  1539. \emph off
  1540. \bar no
  1541. \strikeout off
  1542. \xout off
  1543. \uuline off
  1544. \uwave off
  1545. \noun off
  1546. \color none
  1547. 90.7% ± 5.16
  1548. \end_layout
  1549. \end_inset
  1550. </cell>
  1551. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1552. \begin_inset Text
  1553. \begin_layout Plain Layout
  1554. \family roman
  1555. \series medium
  1556. \shape up
  1557. \size normal
  1558. \emph off
  1559. \bar no
  1560. \strikeout off
  1561. \xout off
  1562. \uuline off
  1563. \uwave off
  1564. \noun off
  1565. \color none
  1566. 38.8% ± 17.1
  1567. \end_layout
  1568. \end_inset
  1569. </cell>
  1570. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1571. \begin_inset Text
  1572. \begin_layout Plain Layout
  1573. \family roman
  1574. \series medium
  1575. \shape up
  1576. \size normal
  1577. \emph off
  1578. \bar no
  1579. \strikeout off
  1580. \xout off
  1581. \uuline off
  1582. \uwave off
  1583. \noun off
  1584. \color none
  1585. 61.2% ± 17.1
  1586. \end_layout
  1587. \end_inset
  1588. </cell>
  1589. </row>
  1590. </lyxtabular>
  1591. \end_inset
  1592. \end_layout
  1593. \begin_layout Plain Layout
  1594. \begin_inset Caption Standard
  1595. \begin_layout Plain Layout
  1596. \series bold
  1597. \begin_inset Argument 1
  1598. status collapsed
  1599. \begin_layout Plain Layout
  1600. Fractions of reads mapping to genomic features in GB and non-GB samples.
  1601. \end_layout
  1602. \end_inset
  1603. \begin_inset CommandInset label
  1604. LatexCommand label
  1605. name "tab:Fractions-of-reads"
  1606. \end_inset
  1607. Fractions of reads mapping to genomic features in GB and non-GB samples.
  1608. \series default
  1609. All values are given as mean ± standard deviation.
  1610. \end_layout
  1611. \end_inset
  1612. \end_layout
  1613. \begin_layout Plain Layout
  1614. \end_layout
  1615. \end_inset
  1616. \end_layout
  1617. \begin_layout Standard
  1618. Another important aspect is that the standard deviations in Table
  1619. \begin_inset CommandInset ref
  1620. LatexCommand ref
  1621. reference "tab:Fractions-of-reads"
  1622. plural "false"
  1623. caps "false"
  1624. noprefix "false"
  1625. \end_inset
  1626. are uniformly smaller in the GB samples than the non-GB ones, indicating
  1627. much greater consistency of yield.
  1628. This is best seen in the percentage of non-globin reads as a fraction of
  1629. total reads aligned to annotated genes (genic reads).
  1630. For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
  1631. the GB samples it ranges from 81.9% to 99.9% (Figure
  1632. \begin_inset CommandInset ref
  1633. LatexCommand ref
  1634. reference "fig:Fraction-of-genic-reads"
  1635. plural "false"
  1636. caps "false"
  1637. noprefix "false"
  1638. \end_inset
  1639. ).
  1640. This means that for applications where it is critical that each sample
  1641. achieve a specified minimum coverage in order to provide useful information,
  1642. it would be necessary to budget up to 10 times the sequencing depth per
  1643. sample without globin blocking, even though the average yield improvement
  1644. for globin blocking is only 2-fold, because every sample has a chance of
  1645. being 90% globin and 10% useful reads.
  1646. Hence, the more consistent behavior of GB samples makes planning an experiment
  1647. easier and more efficient because it eliminates the need to over-sequence
  1648. every sample in order to guard against the worst case of a high-globin
  1649. fraction.
  1650. \end_layout
  1651. \begin_layout Subsection*
  1652. Globin blocking lowers the noise floor and allows detection of about 2000
  1653. more genes
  1654. \end_layout
  1655. \begin_layout Standard
  1656. \begin_inset Flex TODO Note (inline)
  1657. status open
  1658. \begin_layout Plain Layout
  1659. Remove redundant titles from figures
  1660. \end_layout
  1661. \end_inset
  1662. \end_layout
  1663. \begin_layout Standard
  1664. \begin_inset Float figure
  1665. wide false
  1666. sideways false
  1667. status open
  1668. \begin_layout Plain Layout
  1669. \align center
  1670. \begin_inset Graphics
  1671. filename graphics/Globin Paper/figure2 - aveLogCPM-colored.pdf
  1672. \end_inset
  1673. \end_layout
  1674. \begin_layout Plain Layout
  1675. \begin_inset Caption Standard
  1676. \begin_layout Plain Layout
  1677. \series bold
  1678. \begin_inset Argument 1
  1679. status collapsed
  1680. \begin_layout Plain Layout
  1681. Distributions of average group gene abundances when normalized separately
  1682. or together.
  1683. \end_layout
  1684. \end_inset
  1685. \begin_inset CommandInset label
  1686. LatexCommand label
  1687. name "fig:logcpm-dists"
  1688. \end_inset
  1689. Distributions of average group gene abundances when normalized separately
  1690. or together.
  1691. \series default
  1692. All reads in each sequencing library were aligned to the cyno genome, and
  1693. the number of reads uniquely aligning to each gene was counted.
  1694. Genes with zero counts in all libraries were discarded.
  1695. Libraries were normalized using the TMM method.
  1696. Libraries were split into globin-blocked (GB) and non-GB groups and the
  1697. average abundance for each gene in both groups, measured in log2 counts
  1698. per million reads counted, was computed using the aveLogCPM function.
  1699. The distribution of average gene logCPM values was plotted for both groups
  1700. using a kernel density plot to approximate a continuous distribution.
  1701. The logCPM GB distributions are marked in red, non-GB in blue.
  1702. The black vertical line denotes the chosen detection threshold of -1.
  1703. Top panel: Libraries were split into GB and non-GB groups first and normalized
  1704. separately.
  1705. Bottom panel: Libraries were all normalized together first and then split
  1706. into groups.
  1707. \end_layout
  1708. \end_inset
  1709. \end_layout
  1710. \begin_layout Plain Layout
  1711. \end_layout
  1712. \end_inset
  1713. \end_layout
  1714. \begin_layout Standard
  1715. Since globin blocking yields more usable sequencing depth, it should also
  1716. allow detection of more genes at any given threshold.
  1717. When we looked at the distribution of average normalized logCPM values
  1718. across all libraries for genes with at least one read assigned to them,
  1719. we observed the expected bimodal distribution, with a high-abundance "signal"
  1720. peak representing detected genes and a low-abundance "noise" peak representing
  1721. genes whose read count did not rise above the noise floor (Figure
  1722. \begin_inset CommandInset ref
  1723. LatexCommand ref
  1724. reference "fig:logcpm-dists"
  1725. plural "false"
  1726. caps "false"
  1727. noprefix "false"
  1728. \end_inset
  1729. ).
  1730. Consistent with the 2-fold increase in raw counts assigned to non-globin
  1731. genes, the signal peak for GB samples is shifted to the right relative
  1732. to the non-GB signal peak.
  1733. When all the samples are normalized together, this difference is normalized
  1734. out, lining up the signal peaks, and this reveals that, as expected, the
  1735. noise floor for the GB samples is about 2-fold lower.
  1736. This greater separation between signal and noise peaks in the GB samples
  1737. means that low-expression genes should be more easily detected and more
  1738. precisely quantified than in the non-GB samples.
  1739. \end_layout
  1740. \begin_layout Standard
  1741. \begin_inset Float figure
  1742. wide false
  1743. sideways false
  1744. status open
  1745. \begin_layout Plain Layout
  1746. \align center
  1747. \begin_inset Graphics
  1748. filename graphics/Globin Paper/figure3 - detection.pdf
  1749. \end_inset
  1750. \end_layout
  1751. \begin_layout Plain Layout
  1752. \begin_inset Caption Standard
  1753. \begin_layout Plain Layout
  1754. \series bold
  1755. \begin_inset Argument 1
  1756. status collapsed
  1757. \begin_layout Plain Layout
  1758. Gene detections as a function of abundance thresholds in globin-blocked
  1759. (GB) and non-GB samples.
  1760. \end_layout
  1761. \end_inset
  1762. \begin_inset CommandInset label
  1763. LatexCommand label
  1764. name "fig:Gene-detections"
  1765. \end_inset
  1766. Gene detections as a function of abundance thresholds in globin-blocked
  1767. (GB) and non-GB samples.
  1768. \series default
  1769. Average abundance (logCPM,
  1770. \begin_inset Formula $\log_{2}$
  1771. \end_inset
  1772. counts per million reads counted) was computed by separate group normalization
  1773. as described in Figure
  1774. \begin_inset CommandInset ref
  1775. LatexCommand ref
  1776. reference "fig:logcpm-dists"
  1777. plural "false"
  1778. caps "false"
  1779. noprefix "false"
  1780. \end_inset
  1781. for both the GB and non-GB groups, as well as for all samples considered
  1782. as one large group.
  1783. For each every integer threshold from -2 to 3, the number of genes detected
  1784. at or above that logCPM threshold was plotted for each group.
  1785. \end_layout
  1786. \end_inset
  1787. \end_layout
  1788. \begin_layout Plain Layout
  1789. \end_layout
  1790. \end_inset
  1791. \end_layout
  1792. \begin_layout Standard
  1793. Based on these distributions, we selected a detection threshold of -1, which
  1794. is approximately the leftmost edge of the trough between the signal and
  1795. noise peaks.
  1796. This represents the most liberal possible detection threshold that doesn't
  1797. call substantial numbers of noise genes as detected.
  1798. Among the full dataset, 13429 genes were detected at this threshold, and
  1799. 22276 were not.
  1800. When considering the GB libraries and non-GB libraries separately and re-comput
  1801. ing normalization factors independently within each group, 14535 genes were
  1802. detected in the GB libraries while only 12460 were detected in the non-GB
  1803. libraries.
  1804. Thus, GB allowed the detection of 2000 extra genes that were buried under
  1805. the noise floor without GB.
  1806. This pattern of at least 2000 additional genes detected with GB was also
  1807. consistent across a wide range of possible detection thresholds, from -2
  1808. to 3 (see Figure
  1809. \begin_inset CommandInset ref
  1810. LatexCommand ref
  1811. reference "fig:Gene-detections"
  1812. plural "false"
  1813. caps "false"
  1814. noprefix "false"
  1815. \end_inset
  1816. ).
  1817. \end_layout
  1818. \begin_layout Subsection*
  1819. Globin blocking does not add significant additional noise or decrease sample
  1820. quality
  1821. \end_layout
  1822. \begin_layout Standard
  1823. One potential worry is that the globin blocking protocol could perturb the
  1824. levels of non-globin genes.
  1825. There are two kinds of possible perturbations: systematic and random.
  1826. The former is not a major concern for detection of differential expression,
  1827. since a 2-fold change in every sample has no effect on the relative fold
  1828. change between samples.
  1829. In contrast, random perturbations would increase the noise and obscure
  1830. the signal in the dataset, reducing the capacity to detect differential
  1831. expression.
  1832. \end_layout
  1833. \begin_layout Standard
  1834. \begin_inset Float figure
  1835. wide false
  1836. sideways false
  1837. status open
  1838. \begin_layout Plain Layout
  1839. \align center
  1840. \begin_inset Graphics
  1841. filename graphics/Globin Paper/figure4 - maplot-colored.pdf
  1842. \end_inset
  1843. \end_layout
  1844. \begin_layout Plain Layout
  1845. \begin_inset Caption Standard
  1846. \begin_layout Plain Layout
  1847. \begin_inset Argument 1
  1848. status collapsed
  1849. \begin_layout Plain Layout
  1850. MA plot showing effects of globin blocking on each gene's abundance.
  1851. \end_layout
  1852. \end_inset
  1853. \begin_inset CommandInset label
  1854. LatexCommand label
  1855. name "fig:MA-plot"
  1856. \end_inset
  1857. \series bold
  1858. MA plot showing effects of globin blocking on each gene's abundance.
  1859. \series default
  1860. All libraries were normalized together as described in Figure
  1861. \begin_inset CommandInset ref
  1862. LatexCommand ref
  1863. reference "fig:logcpm-dists"
  1864. plural "false"
  1865. caps "false"
  1866. noprefix "false"
  1867. \end_inset
  1868. , and genes with an average logCPM below -1 were filtered out.
  1869. Each remaining gene was tested for differential abundance with respect
  1870. to globin blocking (GB) using edgeR’s quasi-likelihod F-test, fitting a
  1871. negative binomial generalized linear model to table of read counts in each
  1872. library.
  1873. For each gene, edgeR reported average abundance (logCPM),
  1874. \begin_inset Formula $\log_{2}$
  1875. \end_inset
  1876. fold change (logFC), p-value, and Benjamini-Hochberg adjusted false discovery
  1877. rate (FDR).
  1878. Each gene's logFC was plotted against its logCPM, colored by FDR.
  1879. Red points are significant at ≤10% FDR, and blue are not significant at
  1880. that threshold.
  1881. The alpha and beta globin genes targeted for blocking are marked with large
  1882. triangles, while all other genes are represented as small points.
  1883. \end_layout
  1884. \end_inset
  1885. \end_layout
  1886. \begin_layout Plain Layout
  1887. \end_layout
  1888. \end_inset
  1889. \end_layout
  1890. \begin_layout Standard
  1891. \begin_inset Flex TODO Note (inline)
  1892. status open
  1893. \begin_layout Plain Layout
  1894. Standardize on
  1895. \begin_inset Quotes eld
  1896. \end_inset
  1897. log2
  1898. \begin_inset Quotes erd
  1899. \end_inset
  1900. notation
  1901. \end_layout
  1902. \end_inset
  1903. \end_layout
  1904. \begin_layout Standard
  1905. The data do indeed show small systematic perturbations in gene levels (Figure
  1906. \begin_inset CommandInset ref
  1907. LatexCommand ref
  1908. reference "fig:MA-plot"
  1909. plural "false"
  1910. caps "false"
  1911. noprefix "false"
  1912. \end_inset
  1913. ).
  1914. Other than the 3 designated alpha and beta globin genes, two other genes
  1915. stand out as having especially large negative log fold changes: HBD and
  1916. LOC1021365.
  1917. HBD, delta globin, is most likely targeted by the blocking oligos due to
  1918. high sequence homology with the other globin genes.
  1919. LOC1021365 is the aforementioned ncRNA that is reverse-complementary to
  1920. one of the alpha-like genes and that would be expected to be removed during
  1921. the globin blocking step.
  1922. All other genes appear in a cluster centered vertically at 0, and the vast
  1923. majority of genes in this cluster show an absolute log2(FC) of 0.5 or less.
  1924. Nevertheless, many of these small perturbations are still statistically
  1925. significant, indicating that the globin blocking oligos likely cause very
  1926. small but non-zero systematic perturbations in measured gene expression
  1927. levels.
  1928. \end_layout
  1929. \begin_layout Standard
  1930. \begin_inset Float figure
  1931. wide false
  1932. sideways false
  1933. status open
  1934. \begin_layout Plain Layout
  1935. \align center
  1936. \begin_inset Graphics
  1937. filename graphics/Globin Paper/figure5 - corrplot.pdf
  1938. \end_inset
  1939. \end_layout
  1940. \begin_layout Plain Layout
  1941. \begin_inset Caption Standard
  1942. \begin_layout Plain Layout
  1943. \series bold
  1944. \begin_inset Argument 1
  1945. status collapsed
  1946. \begin_layout Plain Layout
  1947. Comparison of inter-sample gene abundance correlations with and without
  1948. globin blocking.
  1949. \end_layout
  1950. \end_inset
  1951. \begin_inset CommandInset label
  1952. LatexCommand label
  1953. name "fig:gene-abundance-correlations"
  1954. \end_inset
  1955. Comparison of inter-sample gene abundance correlations with and without
  1956. globin blocking (GB).
  1957. \series default
  1958. All libraries were normalized together as described in Figure 2, and genes
  1959. with an average abundance (logCPM, log2 counts per million reads counted)
  1960. less than -1 were filtered out.
  1961. Each gene’s logCPM was computed in each library using the edgeR cpm function.
  1962. For each pair of biological samples, the Pearson correlation between those
  1963. samples' GB libraries was plotted against the correlation between the same
  1964. samples’ non-GB libraries.
  1965. Each point represents an unique pair of samples.
  1966. The solid gray line shows a quantile-quantile plot of distribution of GB
  1967. correlations vs.
  1968. that of non-GB correlations.
  1969. The thin dashed line is the identity line, provided for reference.
  1970. \end_layout
  1971. \end_inset
  1972. \end_layout
  1973. \begin_layout Plain Layout
  1974. \end_layout
  1975. \end_inset
  1976. \end_layout
  1977. \begin_layout Standard
  1978. To evaluate the possibility of globin blocking causing random perturbations
  1979. and reducing sample quality, we computed the Pearson correlation between
  1980. logCPM values for every pair of samples with and without GB and plotted
  1981. them against each other (Figure
  1982. \begin_inset CommandInset ref
  1983. LatexCommand ref
  1984. reference "fig:gene-abundance-correlations"
  1985. plural "false"
  1986. caps "false"
  1987. noprefix "false"
  1988. \end_inset
  1989. ).
  1990. The plot indicated that the GB libraries have higher sample-to-sample correlati
  1991. ons than the non-GB libraries.
  1992. Parametric and nonparametric tests for differences between the correlations
  1993. with and without GB both confirmed that this difference was highly significant
  1994. (2-sided paired t-test: t = 37.2, df = 665, P ≪ 2.2e-16; 2-sided Wilcoxon
  1995. sign-rank test: V = 2195, P ≪ 2.2e-16).
  1996. Performing the same tests on the Spearman correlations gave the same conclusion
  1997. (t-test: t = 26.8, df = 665, P ≪ 2.2e-16; sign-rank test: V = 8781, P ≪ 2.2e-16).
  1998. The edgeR package was used to compute the overall biological coefficient
  1999. of variation (BCV) for GB and non-GB libraries, and found that globin blocking
  2000. resulted in a negligible increase in the BCV (0.417 with GB vs.
  2001. 0.400 without).
  2002. The near equality of the BCVs for both sets indicates that the higher correlati
  2003. ons in the GB libraries are most likely a result of the increased yield
  2004. of useful reads, which reduces the contribution of Poisson counting uncertainty
  2005. to the overall variance of the logCPM values
  2006. \begin_inset CommandInset citation
  2007. LatexCommand cite
  2008. key "McCarthy2012"
  2009. literal "false"
  2010. \end_inset
  2011. .
  2012. This improves the precision of expression measurements and more than offsets
  2013. the negligible increase in BCV.
  2014. \end_layout
  2015. \begin_layout Subsection*
  2016. More differentially expressed genes are detected with globin blocking
  2017. \end_layout
  2018. \begin_layout Standard
  2019. \begin_inset Float table
  2020. wide false
  2021. sideways false
  2022. status open
  2023. \begin_layout Plain Layout
  2024. \align center
  2025. \begin_inset Tabular
  2026. <lyxtabular version="3" rows="5" columns="5">
  2027. <features tabularvalignment="middle">
  2028. <column alignment="center" valignment="top">
  2029. <column alignment="center" valignment="top">
  2030. <column alignment="center" valignment="top">
  2031. <column alignment="center" valignment="top">
  2032. <column alignment="center" valignment="top">
  2033. <row>
  2034. <cell alignment="center" valignment="top" usebox="none">
  2035. \begin_inset Text
  2036. \begin_layout Plain Layout
  2037. \end_layout
  2038. \end_inset
  2039. </cell>
  2040. <cell alignment="center" valignment="top" usebox="none">
  2041. \begin_inset Text
  2042. \begin_layout Plain Layout
  2043. \end_layout
  2044. \end_inset
  2045. </cell>
  2046. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2047. \begin_inset Text
  2048. \begin_layout Plain Layout
  2049. \series bold
  2050. No Globin Blocking
  2051. \end_layout
  2052. \end_inset
  2053. </cell>
  2054. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2055. \begin_inset Text
  2056. \begin_layout Plain Layout
  2057. \end_layout
  2058. \end_inset
  2059. </cell>
  2060. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2061. \begin_inset Text
  2062. \begin_layout Plain Layout
  2063. \end_layout
  2064. \end_inset
  2065. </cell>
  2066. </row>
  2067. <row>
  2068. <cell alignment="center" valignment="top" usebox="none">
  2069. \begin_inset Text
  2070. \begin_layout Plain Layout
  2071. \end_layout
  2072. \end_inset
  2073. </cell>
  2074. <cell alignment="center" valignment="top" usebox="none">
  2075. \begin_inset Text
  2076. \begin_layout Plain Layout
  2077. \end_layout
  2078. \end_inset
  2079. </cell>
  2080. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2081. \begin_inset Text
  2082. \begin_layout Plain Layout
  2083. \series bold
  2084. Up
  2085. \end_layout
  2086. \end_inset
  2087. </cell>
  2088. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2089. \begin_inset Text
  2090. \begin_layout Plain Layout
  2091. \series bold
  2092. NS
  2093. \end_layout
  2094. \end_inset
  2095. </cell>
  2096. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2097. \begin_inset Text
  2098. \begin_layout Plain Layout
  2099. \series bold
  2100. Down
  2101. \end_layout
  2102. \end_inset
  2103. </cell>
  2104. </row>
  2105. <row>
  2106. <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
  2107. \begin_inset Text
  2108. \begin_layout Plain Layout
  2109. \series bold
  2110. Globin-Blocking
  2111. \end_layout
  2112. \end_inset
  2113. </cell>
  2114. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2115. \begin_inset Text
  2116. \begin_layout Plain Layout
  2117. \series bold
  2118. Up
  2119. \end_layout
  2120. \end_inset
  2121. </cell>
  2122. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2123. \begin_inset Text
  2124. \begin_layout Plain Layout
  2125. \family roman
  2126. \series medium
  2127. \shape up
  2128. \size normal
  2129. \emph off
  2130. \bar no
  2131. \strikeout off
  2132. \xout off
  2133. \uuline off
  2134. \uwave off
  2135. \noun off
  2136. \color none
  2137. 231
  2138. \end_layout
  2139. \end_inset
  2140. </cell>
  2141. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2142. \begin_inset Text
  2143. \begin_layout Plain Layout
  2144. \family roman
  2145. \series medium
  2146. \shape up
  2147. \size normal
  2148. \emph off
  2149. \bar no
  2150. \strikeout off
  2151. \xout off
  2152. \uuline off
  2153. \uwave off
  2154. \noun off
  2155. \color none
  2156. 515
  2157. \end_layout
  2158. \end_inset
  2159. </cell>
  2160. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2161. \begin_inset Text
  2162. \begin_layout Plain Layout
  2163. \family roman
  2164. \series medium
  2165. \shape up
  2166. \size normal
  2167. \emph off
  2168. \bar no
  2169. \strikeout off
  2170. \xout off
  2171. \uuline off
  2172. \uwave off
  2173. \noun off
  2174. \color none
  2175. 2
  2176. \end_layout
  2177. \end_inset
  2178. </cell>
  2179. </row>
  2180. <row>
  2181. <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2182. \begin_inset Text
  2183. \begin_layout Plain Layout
  2184. \end_layout
  2185. \end_inset
  2186. </cell>
  2187. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2188. \begin_inset Text
  2189. \begin_layout Plain Layout
  2190. \series bold
  2191. NS
  2192. \end_layout
  2193. \end_inset
  2194. </cell>
  2195. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2196. \begin_inset Text
  2197. \begin_layout Plain Layout
  2198. \family roman
  2199. \series medium
  2200. \shape up
  2201. \size normal
  2202. \emph off
  2203. \bar no
  2204. \strikeout off
  2205. \xout off
  2206. \uuline off
  2207. \uwave off
  2208. \noun off
  2209. \color none
  2210. 160
  2211. \end_layout
  2212. \end_inset
  2213. </cell>
  2214. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2215. \begin_inset Text
  2216. \begin_layout Plain Layout
  2217. \family roman
  2218. \series medium
  2219. \shape up
  2220. \size normal
  2221. \emph off
  2222. \bar no
  2223. \strikeout off
  2224. \xout off
  2225. \uuline off
  2226. \uwave off
  2227. \noun off
  2228. \color none
  2229. 11235
  2230. \end_layout
  2231. \end_inset
  2232. </cell>
  2233. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2234. \begin_inset Text
  2235. \begin_layout Plain Layout
  2236. \family roman
  2237. \series medium
  2238. \shape up
  2239. \size normal
  2240. \emph off
  2241. \bar no
  2242. \strikeout off
  2243. \xout off
  2244. \uuline off
  2245. \uwave off
  2246. \noun off
  2247. \color none
  2248. 136
  2249. \end_layout
  2250. \end_inset
  2251. </cell>
  2252. </row>
  2253. <row>
  2254. <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2255. \begin_inset Text
  2256. \begin_layout Plain Layout
  2257. \end_layout
  2258. \end_inset
  2259. </cell>
  2260. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2261. \begin_inset Text
  2262. \begin_layout Plain Layout
  2263. \series bold
  2264. Down
  2265. \end_layout
  2266. \end_inset
  2267. </cell>
  2268. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2269. \begin_inset Text
  2270. \begin_layout Plain Layout
  2271. \family roman
  2272. \series medium
  2273. \shape up
  2274. \size normal
  2275. \emph off
  2276. \bar no
  2277. \strikeout off
  2278. \xout off
  2279. \uuline off
  2280. \uwave off
  2281. \noun off
  2282. \color none
  2283. 0
  2284. \end_layout
  2285. \end_inset
  2286. </cell>
  2287. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2288. \begin_inset Text
  2289. \begin_layout Plain Layout
  2290. \family roman
  2291. \series medium
  2292. \shape up
  2293. \size normal
  2294. \emph off
  2295. \bar no
  2296. \strikeout off
  2297. \xout off
  2298. \uuline off
  2299. \uwave off
  2300. \noun off
  2301. \color none
  2302. 548
  2303. \end_layout
  2304. \end_inset
  2305. </cell>
  2306. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2307. \begin_inset Text
  2308. \begin_layout Plain Layout
  2309. \family roman
  2310. \series medium
  2311. \shape up
  2312. \size normal
  2313. \emph off
  2314. \bar no
  2315. \strikeout off
  2316. \xout off
  2317. \uuline off
  2318. \uwave off
  2319. \noun off
  2320. \color none
  2321. 127
  2322. \end_layout
  2323. \end_inset
  2324. </cell>
  2325. </row>
  2326. </lyxtabular>
  2327. \end_inset
  2328. \end_layout
  2329. \begin_layout Plain Layout
  2330. \begin_inset Caption Standard
  2331. \begin_layout Plain Layout
  2332. \series bold
  2333. \begin_inset Argument 1
  2334. status open
  2335. \begin_layout Plain Layout
  2336. Comparison of significantly differentially expressed genes with and without
  2337. globin blocking.
  2338. \end_layout
  2339. \end_inset
  2340. \begin_inset CommandInset label
  2341. LatexCommand label
  2342. name "tab:Comparison-of-significant"
  2343. \end_inset
  2344. Comparison of significantly differentially expressed genes with and without
  2345. globin blocking.
  2346. \series default
  2347. Up, Down: Genes significantly up/down-regulated in post-transplant samples
  2348. relative to pre-transplant samples, with a false discovery rate of 10%
  2349. or less.
  2350. NS: Non-significant genes (false discovery rate greater than 10%).
  2351. \end_layout
  2352. \end_inset
  2353. \end_layout
  2354. \begin_layout Plain Layout
  2355. \end_layout
  2356. \end_inset
  2357. \end_layout
  2358. \begin_layout Standard
  2359. To compare performance on differential gene expression tests, we took subsets
  2360. of both the GB and non-GB libraries with exactly one pre-transplant and
  2361. one post-transplant sample for each animal that had paired samples available
  2362. for analysis (N=7 animals, N=14 samples in each subset).
  2363. The same test for pre- vs.
  2364. post-transplant differential gene expression was performed on the same
  2365. 7 pairs of samples from GB libraries and non-GB libraries, in each case
  2366. using an FDR of 10% as the threshold of significance.
  2367. Out of 12954 genes that passed the detection threshold in both subsets,
  2368. 358 were called significantly differentially expressed in the same direction
  2369. in both sets; 1063 were differentially expressed in the GB set only; 296
  2370. were differentially expressed in the non-GB set only; 2 genes were called
  2371. significantly up in the GB set but significantly down in the non-GB set;
  2372. and the remaining 11235 were not called differentially expressed in either
  2373. set.
  2374. These data are summarized in Table
  2375. \begin_inset CommandInset ref
  2376. LatexCommand ref
  2377. reference "tab:Comparison-of-significant"
  2378. plural "false"
  2379. caps "false"
  2380. noprefix "false"
  2381. \end_inset
  2382. .
  2383. The differences in BCV calculated by EdgeR for these subsets of samples
  2384. were negligible (BCV = 0.302 for GB and 0.297 for non-GB).
  2385. \end_layout
  2386. \begin_layout Standard
  2387. The key point is that the GB data results in substantially more differentially
  2388. expressed calls than the non-GB data.
  2389. Since there is no gold standard for this dataset, it is impossible to be
  2390. certain whether this is due to under-calling of differential expression
  2391. in the non-GB samples or over-calling in the GB samples.
  2392. However, given that both datasets are derived from the same biological
  2393. samples and have nearly equal BCVs, it is more likely that the larger number
  2394. of DE calls in the GB samples are genuine detections that were enabled
  2395. by the higher sequencing depth and measurement precision of the GB samples.
  2396. Note that the same set of genes was considered in both subsets, so the
  2397. larger number of differentially expressed gene calls in the GB data set
  2398. reflects a greater sensitivity to detect significant differential gene
  2399. expression and not simply the larger total number of detected genes in
  2400. GB samples described earlier.
  2401. \end_layout
  2402. \begin_layout Section
  2403. Discussion
  2404. \end_layout
  2405. \begin_layout Standard
  2406. The original experience with whole blood gene expression profiling on DNA
  2407. microarrays demonstrated that the high concentration of globin transcripts
  2408. reduced the sensitivity to detect genes with relatively low expression
  2409. levels, in effect, significantly reducing the sensitivity.
  2410. To address this limitation, commercial protocols for globin reduction were
  2411. developed based on strategies to block globin transcript amplification
  2412. during labeling or physically removing globin transcripts by affinity bead
  2413. methods
  2414. \begin_inset CommandInset citation
  2415. LatexCommand cite
  2416. key "Winn2010"
  2417. literal "false"
  2418. \end_inset
  2419. .
  2420. More recently, using the latest generation of labeling protocols and arrays,
  2421. it was determined that globin reduction was no longer necessary to obtain
  2422. sufficient sensitivity to detect differential transcript expression
  2423. \begin_inset CommandInset citation
  2424. LatexCommand cite
  2425. key "NuGEN2010"
  2426. literal "false"
  2427. \end_inset
  2428. .
  2429. However, we are not aware of any publications using these currently available
  2430. protocols the with latest generation of microarrays that actually compare
  2431. the detection sensitivity with and without globin reduction.
  2432. However, in practice this has now been adopted generally primarily driven
  2433. by concerns for cost control.
  2434. The main objective of our work was to directly test the impact of globin
  2435. gene transcripts and a new globin blocking protocol for application to
  2436. the newest generation of differential gene expression profiling determined
  2437. using next generation sequencing.
  2438. \end_layout
  2439. \begin_layout Standard
  2440. The challenge of doing global gene expression profiling in cynomolgus monkeys
  2441. is that the current available arrays were never designed to comprehensively
  2442. cover this genome and have not been updated since the first assemblies
  2443. of the cynomolgus genome were published.
  2444. Therefore, we determined that the best strategy for peripheral blood profiling
  2445. was to do deep RNA-seq and inform the workflow using the latest available
  2446. genome assembly and annotation
  2447. \begin_inset CommandInset citation
  2448. LatexCommand cite
  2449. key "Wilson2013"
  2450. literal "false"
  2451. \end_inset
  2452. .
  2453. However, it was not immediately clear whether globin reduction was necessary
  2454. for RNA-seq or how much improvement in efficiency or sensitivity to detect
  2455. differential gene expression would be achieved for the added cost and work.
  2456. \end_layout
  2457. \begin_layout Standard
  2458. We only found one report that demonstrated that globin reduction significantly
  2459. improved the effective read yields for sequencing of human peripheral blood
  2460. cell RNA using a DeepSAGE protocol
  2461. \begin_inset CommandInset citation
  2462. LatexCommand cite
  2463. key "Mastrokolias2012"
  2464. literal "false"
  2465. \end_inset
  2466. .
  2467. The approach to DeepSAGE involves two different restriction enzymes that
  2468. purify and then tag small fragments of transcripts at specific locations
  2469. and thus, significantly reduces the complexity of the transcriptome.
  2470. Therefore, we could not determine how DeepSAGE results would translate
  2471. to the common strategy in the field for assaying the entire transcript
  2472. population by whole-transcriptome 3’-end RNA-seq.
  2473. Furthermore, if globin reduction is necessary, we also needed a globin
  2474. reduction method specific to cynomolgus globin sequences that would work
  2475. an organism for which no kit is available off the shelf.
  2476. \end_layout
  2477. \begin_layout Standard
  2478. As mentioned above, the addition of globin blocking oligos has a very small
  2479. impact on measured expression levels of gene expression.
  2480. However, this is a non-issue for the purposes of differential expression
  2481. testing, since a systematic change in a gene in all samples does not affect
  2482. relative expression levels between samples.
  2483. However, we must acknowledge that simple comparisons of gene expression
  2484. data obtained by GB and non-GB protocols are not possible without additional
  2485. normalization.
  2486. \end_layout
  2487. \begin_layout Standard
  2488. More importantly, globin blocking not only nearly doubles the yield of usable
  2489. reads, it also increases inter-sample correlation and sensitivity to detect
  2490. differential gene expression relative to the same set of samples profiled
  2491. without blocking.
  2492. In addition, globin blocking does not add a significant amount of random
  2493. noise to the data.
  2494. Globin blocking thus represents a cost-effective way to squeeze more data
  2495. and statistical power out of the same blood samples and the same amount
  2496. of sequencing.
  2497. In conclusion, globin reduction greatly increases the yield of useful RNA-seq
  2498. reads mapping to the rest of the genome, with minimal perturbations in
  2499. the relative levels of non-globin genes.
  2500. Based on these results, globin transcript reduction using sequence-specific,
  2501. complementary blocking oligonucleotides is recommended for all deep RNA-seq
  2502. of cynomolgus and other nonhuman primate blood samples.
  2503. \end_layout
  2504. \begin_layout Chapter
  2505. Future Directions
  2506. \end_layout
  2507. \begin_layout Itemize
  2508. Study other epigenetic marks in more contexts
  2509. \end_layout
  2510. \begin_deeper
  2511. \begin_layout Itemize
  2512. DNA methylation, histone marks, chromatin accessibility & conformation in
  2513. CD4 T-cells
  2514. \end_layout
  2515. \begin_layout Itemize
  2516. Also look at other types lymphocytes: CD8 T-cells, B-cells, NK cells
  2517. \end_layout
  2518. \end_deeper
  2519. \begin_layout Itemize
  2520. Investigate epigenetic regulation of lifespan extension in
  2521. \emph on
  2522. C.
  2523. elegans
  2524. \end_layout
  2525. \begin_deeper
  2526. \begin_layout Itemize
  2527. ChIP-seq of important transcriptional regulators to see how transcriptional
  2528. drift is prevented
  2529. \end_layout
  2530. \end_deeper
  2531. \begin_layout Standard
  2532. \begin_inset ERT
  2533. status open
  2534. \begin_layout Plain Layout
  2535. % Use "References" instead of "Bibliography"
  2536. \end_layout
  2537. \begin_layout Plain Layout
  2538. \backslash
  2539. renewcommand{
  2540. \backslash
  2541. bibname}{References}
  2542. \end_layout
  2543. \end_inset
  2544. \end_layout
  2545. \begin_layout Standard
  2546. \begin_inset Flex TODO Note (inline)
  2547. status open
  2548. \begin_layout Plain Layout
  2549. Check bib entry formatting & sort order
  2550. \end_layout
  2551. \end_inset
  2552. \end_layout
  2553. \begin_layout Standard
  2554. \begin_inset CommandInset bibtex
  2555. LatexCommand bibtex
  2556. btprint "btPrintCited"
  2557. bibfiles "refs"
  2558. options "bibtotoc,unsrt"
  2559. \end_inset
  2560. \end_layout
  2561. \end_body
  2562. \end_document