thesis.lyx 75 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220
  1. #LyX 2.3 created this file. For more info see http://www.lyx.org/
  2. \lyxformat 544
  3. \begin_document
  4. \begin_header
  5. \save_transient_properties true
  6. \origin unavailable
  7. \textclass extbook
  8. \begin_preamble
  9. % List all used files in log output
  10. \listfiles
  11. % Add a DRAFT watermark
  12. \usepackage{draftwatermark}
  13. \SetWatermarkLightness{0.97}
  14. \SetWatermarkScale{1}
  15. % Set up required header format
  16. \usepackage{fancyhdr}
  17. \pagestyle{fancy}
  18. \renewcommand{\headrulewidth}{0pt}
  19. \rhead{}
  20. \lhead{}
  21. \rfoot{}
  22. \lfoot{}
  23. \cfoot{\thepage} % Page number bottom center
  24. % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
  25. \usepackage{xstring}
  26. \usepackage{etoolbox}
  27. \usepackage{caption}
  28. \captionsetup{labelfont=bf,tableposition=top}
  29. \makeatletter
  30. \newcommand\formatlabel[1]{%
  31. \noexpandarg
  32. \IfSubStr{#1}{.}{%
  33. \StrBefore{#1}{.}[\firstcaption]%
  34. \StrBehind{#1}{.}[\secondcaption]%
  35. \textbf{\firstcaption.} \secondcaption}{%
  36. #1}%
  37. }
  38. \patchcmd{\@caption}{#3}{\formatlabel{#3}}
  39. \makeatother
  40. \end_preamble
  41. \use_default_options true
  42. \maintain_unincluded_children false
  43. \language english
  44. \language_package default
  45. \inputencoding utf8
  46. \fontencoding global
  47. \font_roman "default" "default"
  48. \font_sans "default" "default"
  49. \font_typewriter "default" "default"
  50. \font_math "auto" "auto"
  51. \font_default_family default
  52. \use_non_tex_fonts false
  53. \font_sc false
  54. \font_osf false
  55. \font_sf_scale 100 100
  56. \font_tt_scale 100 100
  57. \use_microtype false
  58. \use_dash_ligatures true
  59. \graphics default
  60. \default_output_format pdf4
  61. \output_sync 0
  62. \bibtex_command default
  63. \index_command default
  64. \paperfontsize 12
  65. \spacing double
  66. \use_hyperref true
  67. \pdf_bookmarks true
  68. \pdf_bookmarksnumbered false
  69. \pdf_bookmarksopen false
  70. \pdf_bookmarksopenlevel 1
  71. \pdf_breaklinks false
  72. \pdf_pdfborder false
  73. \pdf_colorlinks false
  74. \pdf_backref false
  75. \pdf_pdfusetitle true
  76. \papersize letterpaper
  77. \use_geometry true
  78. \use_package amsmath 1
  79. \use_package amssymb 1
  80. \use_package cancel 1
  81. \use_package esint 1
  82. \use_package mathdots 1
  83. \use_package mathtools 1
  84. \use_package mhchem 1
  85. \use_package stackrel 1
  86. \use_package stmaryrd 1
  87. \use_package undertilde 1
  88. \cite_engine basic
  89. \cite_engine_type default
  90. \biblio_style plain
  91. \use_bibtopic false
  92. \use_indices false
  93. \paperorientation portrait
  94. \suppress_date false
  95. \justification true
  96. \use_refstyle 1
  97. \use_minted 0
  98. \index Index
  99. \shortcut idx
  100. \color #008000
  101. \end_index
  102. \leftmargin 1.5in
  103. \topmargin 1in
  104. \rightmargin 1in
  105. \bottommargin 1in
  106. \secnumdepth 3
  107. \tocdepth 3
  108. \paragraph_separation indent
  109. \paragraph_indentation default
  110. \is_math_indent 0
  111. \math_numbering_side default
  112. \quotes_style english
  113. \dynamic_quotes 0
  114. \papercolumns 1
  115. \papersides 2
  116. \paperpagestyle default
  117. \tracking_changes false
  118. \output_changes false
  119. \html_math_output 0
  120. \html_css_as_file 0
  121. \html_be_strict false
  122. \end_header
  123. \begin_body
  124. \begin_layout Title
  125. Bioinformatic analysis of complex, high-throughput genomic and epigenomic
  126. data in the context of immunology and transplant rejection
  127. \end_layout
  128. \begin_layout Author
  129. A thesis presented
  130. \begin_inset Newline newline
  131. \end_inset
  132. by
  133. \begin_inset Newline newline
  134. \end_inset
  135. Ryan C.
  136. Thompson
  137. \begin_inset Newline newline
  138. \end_inset
  139. to
  140. \begin_inset Newline newline
  141. \end_inset
  142. The Scripps Research Institute Graduate Program
  143. \begin_inset Newline newline
  144. \end_inset
  145. in partial fulfillment of the requirements for the degree of
  146. \begin_inset Newline newline
  147. \end_inset
  148. Doctor of Philosophy in the subject of Biology
  149. \begin_inset Newline newline
  150. \end_inset
  151. for
  152. \begin_inset Newline newline
  153. \end_inset
  154. The Scripps Research Institute
  155. \begin_inset Newline newline
  156. \end_inset
  157. La Jolla, California
  158. \end_layout
  159. \begin_layout Date
  160. May 2019
  161. \end_layout
  162. \begin_layout Standard
  163. [Copyright notice]
  164. \end_layout
  165. \begin_layout Standard
  166. [Thesis acceptance form]
  167. \end_layout
  168. \begin_layout Standard
  169. [Dedication]
  170. \end_layout
  171. \begin_layout Standard
  172. [Acknowledgements]
  173. \end_layout
  174. \begin_layout Standard
  175. \begin_inset CommandInset toc
  176. LatexCommand tableofcontents
  177. \end_inset
  178. \end_layout
  179. \begin_layout Standard
  180. \begin_inset FloatList table
  181. \end_inset
  182. \end_layout
  183. \begin_layout Standard
  184. \begin_inset FloatList figure
  185. \end_inset
  186. \end_layout
  187. \begin_layout Standard
  188. [List of Abbreviations]
  189. \begin_inset Note Note
  190. status open
  191. \begin_layout Plain Layout
  192. https://wiki.lyx.org/Tips/Nomenclature
  193. \end_layout
  194. \end_inset
  195. \end_layout
  196. \begin_layout Standard
  197. [Abstract]
  198. \end_layout
  199. \begin_layout Chapter*
  200. Abstract
  201. \end_layout
  202. \begin_layout Chapter
  203. Introduction
  204. \end_layout
  205. \begin_layout Section
  206. Background & Significance
  207. \end_layout
  208. \begin_layout Subsection
  209. Biological motivation
  210. \end_layout
  211. \begin_layout Itemize
  212. Rejection is the major long-term threat to organ and tissue grafts
  213. \end_layout
  214. \begin_deeper
  215. \begin_layout Itemize
  216. Common mechanisms of rejection
  217. \end_layout
  218. \begin_layout Itemize
  219. Effective immune suppression requires monitoring for rejection and tuning
  220. \end_layout
  221. \begin_layout Itemize
  222. Current tests for rejection (tissue biopsy) are invasive and biased
  223. \end_layout
  224. \begin_layout Itemize
  225. A blood test based on microarrays would be less biased and invasive
  226. \end_layout
  227. \end_deeper
  228. \begin_layout Itemize
  229. Memory cells are resistant to immune suppression
  230. \end_layout
  231. \begin_deeper
  232. \begin_layout Itemize
  233. Mechanisms of resistance in memory cells are poorly understood
  234. \end_layout
  235. \begin_layout Itemize
  236. A better understanding of immune memory formation is needed
  237. \end_layout
  238. \end_deeper
  239. \begin_layout Itemize
  240. Mesenchymal stem cell infusion is a promising new treatment to prevent/delay
  241. rejection
  242. \end_layout
  243. \begin_deeper
  244. \begin_layout Itemize
  245. Demonstrated in mice, but not yet in primates
  246. \end_layout
  247. \begin_layout Itemize
  248. Mechanism currently unknown, but MSC are known to be immune modulatory
  249. \end_layout
  250. \end_deeper
  251. \begin_layout Subsection
  252. Overview of bioinformatic analysis methods
  253. \end_layout
  254. \begin_layout Standard
  255. An overview of all the methods used, including what problem they solve,
  256. what assumptions they make, and a basic description of how they work.
  257. \end_layout
  258. \begin_layout Itemize
  259. ChIP-seq Peak calling
  260. \end_layout
  261. \begin_deeper
  262. \begin_layout Itemize
  263. Cross-correlation analysis to determine fragment size
  264. \end_layout
  265. \begin_layout Itemize
  266. Broad vs narrow peaks
  267. \end_layout
  268. \begin_layout Itemize
  269. SICER for broad peaks
  270. \end_layout
  271. \begin_layout Itemize
  272. IDR for biologically reproducible peaks
  273. \end_layout
  274. \begin_layout Itemize
  275. csaw peak filtering guidelines for unbiased downstream analysis
  276. \end_layout
  277. \end_deeper
  278. \begin_layout Itemize
  279. Normalization is non-trivial and application-dependant
  280. \end_layout
  281. \begin_deeper
  282. \begin_layout Itemize
  283. Expression arrays: RMA & fRMA; why fRMA is needed
  284. \end_layout
  285. \begin_layout Itemize
  286. Methylation arrays: M-value transformation approximates normal data but
  287. induces heteroskedasticity
  288. \end_layout
  289. \begin_layout Itemize
  290. RNA-seq: normalize based on assumption that the average gene is not changing
  291. \end_layout
  292. \begin_layout Itemize
  293. ChIP-seq: complex with many considerations, dependent on experimental methods,
  294. biological system, and analysis goals
  295. \end_layout
  296. \end_deeper
  297. \begin_layout Itemize
  298. Limma: The standard linear modeling framework for genomics
  299. \end_layout
  300. \begin_deeper
  301. \begin_layout Itemize
  302. empirical Bayes variance modeling: limma's core feature
  303. \end_layout
  304. \begin_layout Itemize
  305. edgeR & DESeq2: Extend with negative bonomial GLM for RNA-seq and other
  306. count data
  307. \end_layout
  308. \begin_layout Itemize
  309. voom: Extend with precision weights to model mean-variance trend
  310. \end_layout
  311. \begin_layout Itemize
  312. arrayWeights and duplicateCorrelation to handle complex variance structures
  313. \end_layout
  314. \end_deeper
  315. \begin_layout Itemize
  316. sva and ComBat for batch correction
  317. \end_layout
  318. \begin_layout Itemize
  319. Factor analysis: PCA, MDS, MOFA
  320. \end_layout
  321. \begin_deeper
  322. \begin_layout Itemize
  323. Batch-corrected PCA is informative, but careful application is required
  324. to avoid bias
  325. \end_layout
  326. \end_deeper
  327. \begin_layout Itemize
  328. Gene set analysis: camera and SPIA
  329. \end_layout
  330. \begin_layout Section
  331. Innovation
  332. \end_layout
  333. \begin_layout Itemize
  334. MSC infusion to improve transplant outcomes (prevent/delay rejection)
  335. \end_layout
  336. \begin_deeper
  337. \begin_layout Itemize
  338. Characterize MSC response to interferon gamma
  339. \end_layout
  340. \begin_layout Itemize
  341. IFN-g is thought to stimulate their function
  342. \end_layout
  343. \begin_layout Itemize
  344. Test IFN-g treated MSC infusion as a therapy to delay graft rejection in
  345. cynomolgus monkeys
  346. \end_layout
  347. \begin_layout Itemize
  348. Monitor animals post-transplant using blood RNA-seq at serial time points
  349. \end_layout
  350. \end_deeper
  351. \begin_layout Itemize
  352. Investigate dynamics of histone marks in CD4 T-cell activation and memory
  353. \end_layout
  354. \begin_deeper
  355. \begin_layout Itemize
  356. Previous studies have looked at single snapshots of histone marks
  357. \end_layout
  358. \begin_layout Itemize
  359. Instead, look at changes in histone marks across activation and memory
  360. \end_layout
  361. \end_deeper
  362. \begin_layout Itemize
  363. High-throughput sequencing and microarray technologies
  364. \end_layout
  365. \begin_deeper
  366. \begin_layout Itemize
  367. Powerful methods for assaying gene expression and epigenetics across entire
  368. genomes
  369. \end_layout
  370. \begin_layout Itemize
  371. Proper analysis requires finding and exploiting systematic genome-wide trends
  372. \end_layout
  373. \end_deeper
  374. \begin_layout Chapter
  375. Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
  376. in naive and memory CD4 T-cell activation
  377. \end_layout
  378. \begin_layout Standard
  379. \begin_inset Note Note
  380. status open
  381. \begin_layout Plain Layout
  382. Author list: Me, Sarah, Dan
  383. \end_layout
  384. \end_inset
  385. \end_layout
  386. \begin_layout Section
  387. Approach
  388. \end_layout
  389. \begin_layout Itemize
  390. CD4 T-cells are central to all adaptive immune responses and memory
  391. \end_layout
  392. \begin_layout Itemize
  393. H3K4 and H3K27 methylation are major epigenetic regulators of gene expression
  394. \end_layout
  395. \begin_layout Itemize
  396. Canonically, H3K4 is activating and H3K27 is inhibitory, but the reality
  397. is complex
  398. \end_layout
  399. \begin_layout Itemize
  400. Looking at these marks during CD4 activation and memory should reveal new
  401. mechanistic details
  402. \end_layout
  403. \begin_layout Itemize
  404. Test
  405. \begin_inset Quotes eld
  406. \end_inset
  407. poised promoter
  408. \begin_inset Quotes erd
  409. \end_inset
  410. hypothesis in which H3K4 and H3K27 are both methylated
  411. \end_layout
  412. \begin_layout Itemize
  413. Expand scope of analysis beyond simple promoter counts
  414. \end_layout
  415. \begin_deeper
  416. \begin_layout Itemize
  417. Analyze peaks genome-wide, including in intergenic regions
  418. \end_layout
  419. \begin_layout Itemize
  420. Analysis of coverage distribution shape within promoters, e.g.
  421. upstream vs downstream coverage
  422. \end_layout
  423. \end_deeper
  424. \begin_layout Section
  425. Methods
  426. \end_layout
  427. \begin_layout Itemize
  428. Re-analyze previously published CD4 ChIP-seq & RNA-seq data
  429. \begin_inset CommandInset citation
  430. LatexCommand cite
  431. key "LaMere2016,Lamere2017"
  432. literal "true"
  433. \end_inset
  434. \end_layout
  435. \begin_deeper
  436. \begin_layout Itemize
  437. Completely reimplement analysis from scratch as a reproducible workflow
  438. \end_layout
  439. \begin_layout Itemize
  440. Use newly published methods & algorithms not available during the original
  441. analysis: SICER, csaw, MOFA, ComBat, sva, GREAT, and more
  442. \end_layout
  443. \end_deeper
  444. \begin_layout Itemize
  445. SICER, IDR, csaw, & GREAT to call ChIP-seq peaks genome-wide, perform differenti
  446. al abundance analysis, and relate those peaks to gene expression
  447. \end_layout
  448. \begin_layout Itemize
  449. Promoter counts in sliding windows around each gene's highest-expressed
  450. TSS to investigate coverage distribution within promoters
  451. \end_layout
  452. \begin_layout Section
  453. Results
  454. \end_layout
  455. \begin_layout Itemize
  456. Different histone marks have different effective promoter radii
  457. \end_layout
  458. \begin_layout Itemize
  459. H3K4 and RNA-seq data show clear evidence of naive convergence with memory
  460. between days 1 and 5
  461. \end_layout
  462. \begin_layout Itemize
  463. Promoter coverage distribution affects gene expression independent of total
  464. promoter count
  465. \end_layout
  466. \begin_layout Itemize
  467. Remaining analyses to complete:
  468. \end_layout
  469. \begin_deeper
  470. \begin_layout Itemize
  471. Look for naive-to-memory convergence in H3K27 data
  472. \end_layout
  473. \begin_layout Itemize
  474. Look at enriched pathways for day 0 to day 1 (activation) compared to day
  475. 1 to day 5 (putative naive-to-memory differentiation)
  476. \end_layout
  477. \begin_layout Itemize
  478. Find genes with different expression patterns in naive vs.
  479. memory and try to explain the difference with the Day 0 histone mark data
  480. \end_layout
  481. \begin_deeper
  482. \begin_layout Itemize
  483. Determine whether co-occurrence of H3K4me3 and H3K27me3 (proposed
  484. \begin_inset Quotes eld
  485. \end_inset
  486. poised
  487. \begin_inset Quotes erd
  488. \end_inset
  489. state) has effects on post-activation expression dynamics
  490. \end_layout
  491. \begin_layout Itemize
  492. Promoter coverage distribution dynamics throughout activation for interesting
  493. subsets of genes
  494. \end_layout
  495. \end_deeper
  496. \begin_layout Itemize
  497. (Backup) Compare and contrast behavior of promoter peaks vs intergenic (putative
  498. enhancer) peaks (GREAT analysis)
  499. \end_layout
  500. \begin_deeper
  501. \begin_layout Itemize
  502. Put results in context of important T-cell pathways & gene expression data
  503. \end_layout
  504. \end_deeper
  505. \end_deeper
  506. \begin_layout Section
  507. Discussion
  508. \end_layout
  509. \begin_layout Itemize
  510. "Promoter radius" is not constant and must be defined empirically for a
  511. given data set
  512. \end_layout
  513. \begin_layout Itemize
  514. Evaluate evidence for poised promoters and enhancer effects on gene expression
  515. dynamics of naive-to-memory differentiation
  516. \end_layout
  517. \begin_layout Itemize
  518. Compare to published work on other epigenetic marks (e.g.
  519. chromatin accessibility)
  520. \end_layout
  521. \begin_layout Chapter
  522. Improving array-based analyses of transplant rejection by optimizing data
  523. preprocessing
  524. \end_layout
  525. \begin_layout Standard
  526. \begin_inset Note Note
  527. status open
  528. \begin_layout Plain Layout
  529. Author list: Me, Sunil, Padma, Dan
  530. \end_layout
  531. \end_inset
  532. \end_layout
  533. \begin_layout Section
  534. Approach
  535. \end_layout
  536. \begin_layout Itemize
  537. Machine-learning applications demand a "single-channel" normalization method
  538. \end_layout
  539. \begin_layout Itemize
  540. frozen RMA is a good solution, but not trivial to apply
  541. \end_layout
  542. \begin_layout Itemize
  543. Methylation array data preprocessing induces heteroskedasticity
  544. \end_layout
  545. \begin_layout Itemize
  546. Need to account for this mean-variance dependency in analysis
  547. \end_layout
  548. \begin_layout Section
  549. Methods
  550. \end_layout
  551. \begin_layout Itemize
  552. Expression array normalization for detecting acute rejection
  553. \end_layout
  554. \begin_layout Itemize
  555. Use frozen RMA, a single-channel variant of RMA
  556. \end_layout
  557. \begin_layout Itemize
  558. Generate custom fRMA normalization vectors for each tissue (biopsy, blood)
  559. \end_layout
  560. \begin_layout Itemize
  561. Methylation arrays for differential methylation in rejection vs.
  562. healthy transplant
  563. \end_layout
  564. \begin_layout Itemize
  565. Adapt voom method originally designed for RNA-seq to model mean-variance
  566. dependence
  567. \end_layout
  568. \begin_layout Itemize
  569. Use sample precision weighting and sva to adjust for other confounding factors
  570. \end_layout
  571. \begin_layout Section
  572. Results
  573. \end_layout
  574. \begin_layout Itemize
  575. custom fRMA normalization improved cross-validated classifier performance
  576. \begin_inset CommandInset citation
  577. LatexCommand cite
  578. key "Kurian2014"
  579. literal "true"
  580. \end_inset
  581. \end_layout
  582. \begin_layout Itemize
  583. voom, precision weights, and sva improved model fit
  584. \end_layout
  585. \begin_deeper
  586. \begin_layout Itemize
  587. Also increased sensitivity for detecting differential methylation
  588. \end_layout
  589. \end_deeper
  590. \begin_layout Section
  591. Discussion
  592. \end_layout
  593. \begin_layout Itemize
  594. fRMA enables classifying new samples without re-normalizing the entire data
  595. set
  596. \end_layout
  597. \begin_deeper
  598. \begin_layout Itemize
  599. Critical for translating a classifier into clinical practice
  600. \end_layout
  601. \end_deeper
  602. \begin_layout Itemize
  603. Methods like voom designed for RNA-seq can also help with array analysis
  604. \end_layout
  605. \begin_layout Itemize
  606. Extracting and modeling confounders common to many features improves model
  607. correspondence to known biology
  608. \end_layout
  609. \begin_layout Chapter
  610. Globin-blocking for more effective blood RNA-seq analysis in primate animal
  611. model
  612. \end_layout
  613. \begin_layout Standard
  614. \begin_inset Note Note
  615. status open
  616. \begin_layout Plain Layout
  617. TODO Choose between above and the paper title: Optimizing yield of deep
  618. RNA sequencing for gene expression profiling by globin reduction of peripheral
  619. blood samples from cynomolgus monkeys (Macaca fascicularis).
  620. \end_layout
  621. \end_inset
  622. \end_layout
  623. \begin_layout Standard
  624. \begin_inset Note Note
  625. status open
  626. \begin_layout Plain Layout
  627. How to integrate/credit sections written by others (e.g.
  628. wetlab methods)? (Majority of paper text is written by me.)Preprint the
  629. paper, then cite it.
  630. Every chapter has an author list, which may or may not be part of a citation
  631. to a published/preprinted paper.
  632. \end_layout
  633. \begin_layout Plain Layout
  634. TODO: Preprint the paper, then cite it.
  635. \end_layout
  636. \begin_layout Plain Layout
  637. TODO: Chapter author list: https://tex.stackexchange.com/questions/156862/displayi
  638. ng-author-for-each-chapter-in-book
  639. \end_layout
  640. \end_inset
  641. \end_layout
  642. \begin_layout Section*
  643. Abstract
  644. \end_layout
  645. \begin_layout Paragraph
  646. Background
  647. \end_layout
  648. \begin_layout Standard
  649. Primate blood contains high concentrations of globin messenger RNA.
  650. Globin reduction is a standard technique used to improve the expression
  651. results obtained by DNA microarrays on RNA from blood samples.
  652. However, with whole transcriptome RNA-sequencing (RNA-seq) quickly replacing
  653. microarrays for many applications, the impact of globin reduction for RNA-seq
  654. has not been previously studied.
  655. Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
  656. primates.
  657. \end_layout
  658. \begin_layout Paragraph
  659. Results
  660. \end_layout
  661. \begin_layout Standard
  662. Here we report a protocol for RNA-seq in primate blood samples that uses
  663. complimentary oligonucleotides to block reverse transcription of the alpha
  664. and beta globin genes.
  665. In test samples from cynomolgus monkeys (Macaca fascicularis), this globin
  666. blocking protocol approximately doubles the yield of informative (non-globin)
  667. reads by greatly reducing the fraction of globin reads, while also improving
  668. the consistency in sequencing depth between samples.
  669. The increased yield enables detection of about 2000 more genes, significantly
  670. increases the correlation in measured gene expression levels between samples,
  671. and increases the sensitivity of differential gene expression tests.
  672. \end_layout
  673. \begin_layout Paragraph
  674. Conclusions
  675. \end_layout
  676. \begin_layout Standard
  677. These results show that globin blocking significantly improves the cost-effectiv
  678. eness of mRNA sequencing in primate blood samples by doubling the yield
  679. of useful reads, allowing detection of more genes, and improving the precision
  680. of gene expression measurements.
  681. Based on these results, a globin reducing or blocking protocol is recommended
  682. for all RNA-seq studies of primate blood samples.
  683. \end_layout
  684. \begin_layout Section
  685. Approach
  686. \end_layout
  687. \begin_layout Itemize
  688. Cynomolgus monkeys as a model organism
  689. \end_layout
  690. \begin_deeper
  691. \begin_layout Itemize
  692. Highly related to humans
  693. \end_layout
  694. \begin_layout Itemize
  695. Small size and short life cycle - good research animal
  696. \end_layout
  697. \begin_layout Itemize
  698. Genomics resources still in development
  699. \end_layout
  700. \end_deeper
  701. \begin_layout Itemize
  702. Inadequacy of existing blood RNA-seq protocols
  703. \end_layout
  704. \begin_deeper
  705. \begin_layout Itemize
  706. Existing protocols use a separate globin pulldown step, slowing down processing
  707. \end_layout
  708. \end_deeper
  709. \begin_layout Standard
  710. Increasingly, researchers are turning to high-throughput mRNA sequencing
  711. technologies (RNA-seq) in preference to expression microarrays for analysis
  712. of gene expression
  713. \begin_inset CommandInset citation
  714. LatexCommand cite
  715. key "Mutz2012"
  716. literal "false"
  717. \end_inset
  718. .
  719. The advantages are even greater for study of model organisms with no well-estab
  720. lished array platforms available, such as the cynomolgus monkey (Macaca
  721. fascicularis).
  722. High fractions of globin mRNA are naturally present in mammalian peripheral
  723. blood samples (up to 70% of total mRNA) and these are known to interfere
  724. with the results of array-based expression profiling
  725. \begin_inset CommandInset citation
  726. LatexCommand cite
  727. key "Winn2010"
  728. literal "false"
  729. \end_inset
  730. .
  731. The importance of globin reduction for RNA-seq of blood has only been evaluated
  732. for a deepSAGE protocol on human samples
  733. \begin_inset CommandInset citation
  734. LatexCommand cite
  735. key "Mastrokolias2012"
  736. literal "false"
  737. \end_inset
  738. .
  739. In the present report, we evaluated globin reduction using custom blocking
  740. oligonucleotides for deep RNA-seq of peripheral blood samples from a nonhuman
  741. primate, cynomolgus monkey, using the Illumina technology platform.
  742. We demonstrate that globin reduction significantly improves the cost-effectiven
  743. ess of RNA-seq in blood samples.
  744. Thus, our protocol offers a significant advantage to any investigator planning
  745. to use RNA-seq for gene expression profiling of nonhuman primate blood
  746. samples.
  747. Our method can be generally applied to any species by designing complementary
  748. oligonucleotide blocking probes to the globin gene sequences of that species.
  749. Indeed, any highly expressed but biologically uninformative transcripts
  750. can also be blocked to further increase sequencing efficiency and value
  751. \begin_inset CommandInset citation
  752. LatexCommand cite
  753. key "Arnaud2016"
  754. literal "false"
  755. \end_inset
  756. .
  757. \end_layout
  758. \begin_layout Section
  759. Methods
  760. \end_layout
  761. \begin_layout Subsection*
  762. Sample collection
  763. \end_layout
  764. \begin_layout Standard
  765. All research reported here was done under IACUC-approved protocols at the
  766. University of Miami and complied with all applicable federal and state
  767. regulations and ethical principles for nonhuman primate research.
  768. Blood draws occurred between 16 April 2012 and 18 June 2015.
  769. The experimental system involved intrahepatic pancreatic islet transplantation
  770. into Cynomolgus monkeys with induced diabetes mellitus with or without
  771. concomitant infusion of mesenchymal stem cells.
  772. Blood was collected at serial time points before and after transplantation
  773. into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
  774. precise volume:volume ratio of 2.5 ml whole blood into 6.9 ml of PAX gene
  775. additive.
  776. \end_layout
  777. \begin_layout Subsection*
  778. Globin Blocking
  779. \end_layout
  780. \begin_layout Standard
  781. Four oligonucleotides were designed to hybridize to the 3’ end of the transcript
  782. s for Cynomolgus HBA1, HBA2 and HBB, with two hybridization sites for HBB
  783. and 2 sites for HBA (the chosen sites were identical in both HBA genes).
  784. All oligos were purchased from Sigma and were entirely composed of 2’O-Me
  785. bases with a C3 spacer positioned at the 3’ ends to prevent any polymerase
  786. mediated primer extension.
  787. \end_layout
  788. \begin_layout Quote
  789. HBA1/2 site 1: GCCCACUCAGACUUUAUUCAAAG-C3spacer
  790. \end_layout
  791. \begin_layout Quote
  792. HBA1/2 site 2: GGUGCAAGGAGGGGAGGAG-C3spacer
  793. \end_layout
  794. \begin_layout Quote
  795. HBB site 1: AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
  796. \end_layout
  797. \begin_layout Quote
  798. HBB site 2: CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
  799. \end_layout
  800. \begin_layout Subsection*
  801. RNA-seq Library Preparation
  802. \end_layout
  803. \begin_layout Standard
  804. Sequencing libraries were prepared with 200ng total RNA from each sample.
  805. Polyadenylated mRNA was selected from 200 ng aliquots of cynomologus blood-deri
  806. ved total RNA using Ambion Dynabeads Oligo(dT)25 beads (Invitrogen) following
  807. manufacturer’s recommended protocol.
  808. PolyA selected RNA was then combined with 8 pmol of HBA1/2 (site 1), 8
  809. pmol of HBA1/2 (site 2), 12 pmol of HBB (site 1) and 12 pmol of HBB (site
  810. 2) oligonucleotides.
  811. In addition, 20 pmol of RT primer containing a portion of the Illumina
  812. adapter sequence (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV)
  813. and 4 µL of 5X First Strand buffer (250 mM Tris-HCl pH 8.3, 375 mM KCl,
  814. 15mM MgCl2) were added in a total volume of 15 µL.
  815. The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
  816. then placed on ice.
  817. This was followed by the addition of 2 µL 0.1 M DTT, 1 µL RNaseOUT, 1 µL
  818. 10mM dNTPs 10% biotin-16 aminoallyl-2’- dUTP and 10% biotin-16 aminoallyl-2’-
  819. dCTP (TriLink Biotech, San Diego, CA), 1 µL Superscript II (200U/ µL, Thermo-Fi
  820. sher).
  821. A second “unblocked” library was prepared in the same way for each sample
  822. but replacing the blocking oligos with an equivalent volume of water.
  823. The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
  824. followed by incubation at 75°C for 10 minutes to inactivate the reverse
  825. transcriptase.
  826. \end_layout
  827. \begin_layout Standard
  828. The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
  829. ) following supplier’s recommended protocol.
  830. The cDNA/RNA hybrid was eluted in 25 µL of 10 mM Tris-HCl pH 8.0, and then
  831. bound to 25 µL of M280 Magnetic Streptavidin beads washed per recommended
  832. protocol (Thermo-Fisher).
  833. After 30 minutes of binding, beads were washed one time in 100 µL 0.1N NaOH
  834. to denature and remove the bound RNA, followed by two 100 µL washes with
  835. 1X TE buffer.
  836. \end_layout
  837. \begin_layout Standard
  838. Subsequent attachment of the 5-prime Illumina A adapter was performed by
  839. on-bead random primer extension of the following sequence (A-N8 primer:
  840. TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN).
  841. Briefly, beads were resuspended in a 20 µL reaction containing 5 µM A-N8
  842. primer, 40mM Tris-HCl pH 7.5, 20mM MgCl2, 50mM NaCl, 0.325U/µL Sequenase
  843. 2.0 (Affymetrix, Santa Clara, CA), 0.0025U/µL inorganic pyrophosphatase (Affymetr
  844. ix) and 300 µM each dNTP.
  845. Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
  846. times with 1X TE buffer (200µL).
  847. \end_layout
  848. \begin_layout Standard
  849. The magnetic streptavidin beads were resuspended in 34 µL nuclease-free
  850. water and added directly to a PCR tube.
  851. The two Illumina protocol-specified PCR primers were added at 0.53 µM (Illumina
  852. TruSeq Universal Primer 1 and Illumina TruSeq barcoded PCR primer 2), along
  853. with 40 µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycl
  854. ed as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
  855. 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
  856. \end_layout
  857. \begin_layout Standard
  858. PCR products were purified with 1X Ampure Beads following manufacturer’s
  859. recommended protocol.
  860. Libraries were then analyzed using the Agilent TapeStation and quantitation
  861. of desired size range was performed by “smear analysis”.
  862. Samples were pooled in equimolar batches of 16 samples.
  863. Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
  864. Gels; Thermo-Fisher).
  865. Products were cut between 250 and 350 bp (corresponding to insert sizes
  866. of 130 to 230 bps).
  867. Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
  868. t with 75 base read lengths.
  869. \end_layout
  870. \begin_layout Subsection*
  871. Read alignment and counting
  872. \end_layout
  873. \begin_layout Standard
  874. Reads were aligned to the cynomolgus genome using STAR
  875. \begin_inset CommandInset citation
  876. LatexCommand cite
  877. key "Dobin2013,Wilson2013"
  878. literal "false"
  879. \end_inset
  880. .
  881. Counts of uniquely mapped reads were obtained for every gene in each sample
  882. with the “featureCounts” function from the Rsubread package, using each
  883. of the three possibilities for the “strandSpecific” option: sense, antisense,
  884. and unstranded
  885. \begin_inset CommandInset citation
  886. LatexCommand cite
  887. key "Liao2014"
  888. literal "false"
  889. \end_inset
  890. .
  891. A few artifacts in the cynomolgus genome annotation complicated read counting.
  892. First, no ortholog is annotated for alpha globin in the cynomolgus genome,
  893. presumably because the human genome has two alpha globin genes with nearly
  894. identical sequences, making the orthology relationship ambiguous.
  895. However, two loci in the cynomolgus genome are as “hemoglobin subunit alpha-lik
  896. e” (LOC102136192 and LOC102136846).
  897. LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
  898. as protein-coding.
  899. Our globin reduction protocol was designed to include blocking of these
  900. two genes.
  901. Indeed, these two genes have almost the same read counts in each library
  902. as the properly-annotated HBB gene and much larger counts than any other
  903. gene in the unblocked libraries, giving confidence that reads derived from
  904. the real alpha globin are mapping to both genes.
  905. Thus, reads from both of these loci were counted as alpha globin reads
  906. in all further analyses.
  907. The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
  908. 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
  909. If counting is not performed in stranded mode (or if a non-strand-specific
  910. sequencing protocol is used), many reads mapping to the globin gene will
  911. be discarded as ambiguous due to their overlap with this ncRNA gene, resulting
  912. in significant undercounting of globin reads.
  913. Therefore, stranded sense counts were used for all further analysis in
  914. the present study to insure that we accurately accounted for globin transcript
  915. reduction.
  916. However, we note that stranded reads are not necessary for RNA-seq using
  917. our protocol in standard practice.
  918. \end_layout
  919. \begin_layout Subsection*
  920. Normalization and Exploratory Data Analysis
  921. \end_layout
  922. \begin_layout Standard
  923. Libraries were normalized by computing scaling factors using the edgeR package’s
  924. Trimmed Mean of M-values method
  925. \begin_inset CommandInset citation
  926. LatexCommand cite
  927. key "Robinson2010"
  928. literal "false"
  929. \end_inset
  930. .
  931. Log2 counts per million values (logCPM) were calculated using the cpm function
  932. in edgeR for individual samples and aveLogCPM function for averages across
  933. groups of samples, using those functions’ default prior count values to
  934. avoid taking the logarithm of 0.
  935. Genes were considered “present” if their average normalized logCPM values
  936. across all libraries were at least -1.
  937. Normalizing for gene length was unnecessary because the sequencing protocol
  938. is 3’-biased and hence the expected read count for each gene is related
  939. to the transcript’s copy number but not its length.
  940. \end_layout
  941. \begin_layout Standard
  942. In order to assess the effect of blocking on reproducibility, Pearson and
  943. Spearman correlation coefficients were computed between the logCPM values
  944. for every pair of libraries within the globin-blocked (GB) and unblocked
  945. (non-GB) groups, and edgeR's “estimateDisp” function was used to compute
  946. negative binomial dispersions separately for the two groups
  947. \begin_inset CommandInset citation
  948. LatexCommand cite
  949. key "Chen2014"
  950. literal "false"
  951. \end_inset
  952. .
  953. \end_layout
  954. \begin_layout Subsection*
  955. Differential Expression Analysis
  956. \end_layout
  957. \begin_layout Standard
  958. All tests for differential gene expression were performed using edgeR, by
  959. first fitting a negative binomial generalized linear model to the counts
  960. and normalization factors and then performing a quasi-likelihood F-test
  961. with robust estimation of outlier gene dispersions
  962. \begin_inset CommandInset citation
  963. LatexCommand cite
  964. key "Lund2012,Phipson2016"
  965. literal "false"
  966. \end_inset
  967. .
  968. To investigate the effects of globin blocking on each gene, an additive
  969. model was fit to the full data with coefficients for globin blocking and
  970. SampleID.
  971. To test the effect of globin blocking on detection of differentially expressed
  972. genes, the GB samples and non-GB samples were each analyzed independently
  973. as follows: for each animal with both a pre-transplant and a post-transplant
  974. time point in the data set, the pre-transplant sample and the earliest
  975. post-transplant sample were selected, and all others were excluded, yielding
  976. a pre-/post-transplant pair of samples for each animal (N=7 animals with
  977. paired samples).
  978. These samples were analyzed for pre-transplant vs.
  979. post-transplant differential gene expression while controlling for inter-animal
  980. variation using an additive model with coefficients for transplant and
  981. animal ID.
  982. In all analyses, p-values were adjusted using the Benjamini-Hochberg procedure
  983. for FDR correction
  984. \begin_inset CommandInset citation
  985. LatexCommand cite
  986. key "Benjamini1995"
  987. literal "false"
  988. \end_inset
  989. .
  990. \end_layout
  991. \begin_layout Standard
  992. \begin_inset Note Note
  993. status open
  994. \begin_layout Itemize
  995. New blood RNA-seq protocol to block reverse transcription of globin genes
  996. \end_layout
  997. \begin_layout Itemize
  998. Blood RNA-seq time course after transplants with/without MSC infusion
  999. \end_layout
  1000. \end_inset
  1001. \end_layout
  1002. \begin_layout Section
  1003. Results
  1004. \end_layout
  1005. \begin_layout Subsection*
  1006. Globin blocking yields a larger and more consistent fraction of useful reads
  1007. \end_layout
  1008. \begin_layout Standard
  1009. The objective of the present study was to validate a new protocol for deep
  1010. RNA-seq of whole blood drawn into PaxGene tubes from cynomolgus monkeys
  1011. undergoing islet transplantation, with particular focus on minimizing the
  1012. loss of useful sequencing space to uninformative globin reads.
  1013. The details of the analysis with respect to transplant outcomes and the
  1014. impact of mesenchymal stem cell treatment will be reported in a separate
  1015. manuscript (in preparation).
  1016. To focus on the efficacy of our globin blocking protocol, 37 blood samples,
  1017. 16 from pre-transplant and 21 from post-transplant time points, were each
  1018. prepped once with and once without globin blocking oligos, and were then
  1019. sequenced on an Illumina NextSeq500 instrument.
  1020. The number of reads aligning to each gene in the cynomolgus genome was
  1021. counted.
  1022. Table 1 summarizes the distribution of read fractions among the GB and
  1023. non-GB libraries.
  1024. In the libraries with no globin blocking, globin reads made up an average
  1025. of 44.6% of total input reads, while reads assigned to all other genes made
  1026. up an average of 26.3%.
  1027. The remaining reads either aligned to intergenic regions (that include
  1028. long non-coding RNAs) or did not align with any annotated transcripts in
  1029. the current build of the cynomolgus genome.
  1030. In the GB libraries, globin reads made up only 3.48% and reads assigned
  1031. to all other genes increased to 50.4%.
  1032. Thus, globin blocking resulted in a 92.2% reduction in globin reads and
  1033. a 91.6% increase in yield of useful non-globin reads.
  1034. \end_layout
  1035. \begin_layout Standard
  1036. This reduction is not quite as efficient as the previous analysis showed
  1037. for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
  1038. \begin_inset CommandInset citation
  1039. LatexCommand cite
  1040. key "Mastrokolias2012"
  1041. literal "false"
  1042. \end_inset
  1043. .
  1044. Nonetheless, this degree of globin reduction is sufficient to nearly double
  1045. the yield of useful reads.
  1046. Thus, globin blocking cuts the required sequencing effort (and costs) to
  1047. achieve a target coverage depth by almost 50%.
  1048. Consistent with this near doubling of yield, the average difference in
  1049. un-normalized logCPM across all genes between the GB libraries and non-GB
  1050. libraries is approximately 1 (mean = 1.01, median = 1.08), an overall 2-fold
  1051. increase.
  1052. Un-normalized values are used here because the TMM normalization correctly
  1053. identifies this 2-fold difference as biologically irrelevant and removes
  1054. it.
  1055. \end_layout
  1056. \begin_layout Standard
  1057. \begin_inset Float figure
  1058. wide false
  1059. sideways false
  1060. status open
  1061. \begin_layout Plain Layout
  1062. \align center
  1063. \begin_inset Graphics
  1064. filename graphics/Globin Paper/figure1 - globin-fractions.pdf
  1065. \end_inset
  1066. \end_layout
  1067. \begin_layout Plain Layout
  1068. \begin_inset Caption Standard
  1069. \begin_layout Plain Layout
  1070. \series bold
  1071. \begin_inset Argument 1
  1072. status collapsed
  1073. \begin_layout Plain Layout
  1074. Fraction of genic reads in each sample aligned to non-globin genes, with
  1075. and without globin blocking (GB).
  1076. \end_layout
  1077. \end_inset
  1078. \begin_inset CommandInset label
  1079. LatexCommand label
  1080. name "fig:Fraction-of-genic-reads"
  1081. \end_inset
  1082. Fraction of genic reads in each sample aligned to non-globin genes, with
  1083. and without globin blocking (GB).
  1084. \series default
  1085. All reads in each sequencing library were aligned to the cyno genome, and
  1086. the number of reads uniquely aligning to each gene was counted.
  1087. For each sample, counts were summed separately for all globin genes and
  1088. for the remainder of the genes (non-globin genes), and the fraction of
  1089. genic reads aligned to non-globin genes was computed.
  1090. Each point represents an individual sample.
  1091. Gray + signs indicate the means for globin-blocked libraries and unblocked
  1092. libraries.
  1093. The overall distribution for each group is represented as a notched box
  1094. plots.
  1095. Points are randomly spread vertically to avoid excessive overlapping.
  1096. \end_layout
  1097. \end_inset
  1098. \end_layout
  1099. \begin_layout Plain Layout
  1100. \end_layout
  1101. \end_inset
  1102. \end_layout
  1103. \begin_layout Standard
  1104. \begin_inset Float table
  1105. placement p
  1106. wide false
  1107. sideways true
  1108. status open
  1109. \begin_layout Plain Layout
  1110. \align center
  1111. \begin_inset Tabular
  1112. <lyxtabular version="3" rows="4" columns="7">
  1113. <features tabularvalignment="middle">
  1114. <column alignment="center" valignment="top">
  1115. <column alignment="center" valignment="top">
  1116. <column alignment="center" valignment="top">
  1117. <column alignment="center" valignment="top">
  1118. <column alignment="center" valignment="top">
  1119. <column alignment="center" valignment="top">
  1120. <column alignment="center" valignment="top">
  1121. <row>
  1122. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1123. \begin_inset Text
  1124. \begin_layout Plain Layout
  1125. \end_layout
  1126. \end_inset
  1127. </cell>
  1128. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1129. \begin_inset Text
  1130. \begin_layout Plain Layout
  1131. \family roman
  1132. \series medium
  1133. \shape up
  1134. \size normal
  1135. \emph off
  1136. \bar no
  1137. \strikeout off
  1138. \xout off
  1139. \uuline off
  1140. \uwave off
  1141. \noun off
  1142. \color none
  1143. Percent of Total Reads
  1144. \end_layout
  1145. \end_inset
  1146. </cell>
  1147. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1148. \begin_inset Text
  1149. \begin_layout Plain Layout
  1150. \end_layout
  1151. \end_inset
  1152. </cell>
  1153. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1154. \begin_inset Text
  1155. \begin_layout Plain Layout
  1156. \end_layout
  1157. \end_inset
  1158. </cell>
  1159. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1160. \begin_inset Text
  1161. \begin_layout Plain Layout
  1162. \end_layout
  1163. \end_inset
  1164. </cell>
  1165. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1166. \begin_inset Text
  1167. \begin_layout Plain Layout
  1168. \family roman
  1169. \series medium
  1170. \shape up
  1171. \size normal
  1172. \emph off
  1173. \bar no
  1174. \strikeout off
  1175. \xout off
  1176. \uuline off
  1177. \uwave off
  1178. \noun off
  1179. \color none
  1180. Percent of Genic Reads
  1181. \end_layout
  1182. \end_inset
  1183. </cell>
  1184. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1185. \begin_inset Text
  1186. \begin_layout Plain Layout
  1187. \end_layout
  1188. \end_inset
  1189. </cell>
  1190. </row>
  1191. <row>
  1192. <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
  1193. \begin_inset Text
  1194. \begin_layout Plain Layout
  1195. GB
  1196. \end_layout
  1197. \end_inset
  1198. </cell>
  1199. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1200. \begin_inset Text
  1201. \begin_layout Plain Layout
  1202. \family roman
  1203. \series medium
  1204. \shape up
  1205. \size normal
  1206. \emph off
  1207. \bar no
  1208. \strikeout off
  1209. \xout off
  1210. \uuline off
  1211. \uwave off
  1212. \noun off
  1213. \color none
  1214. Non-globin Reads
  1215. \end_layout
  1216. \end_inset
  1217. </cell>
  1218. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1219. \begin_inset Text
  1220. \begin_layout Plain Layout
  1221. \family roman
  1222. \series medium
  1223. \shape up
  1224. \size normal
  1225. \emph off
  1226. \bar no
  1227. \strikeout off
  1228. \xout off
  1229. \uuline off
  1230. \uwave off
  1231. \noun off
  1232. \color none
  1233. Globin Reads
  1234. \end_layout
  1235. \end_inset
  1236. </cell>
  1237. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1238. \begin_inset Text
  1239. \begin_layout Plain Layout
  1240. \family roman
  1241. \series medium
  1242. \shape up
  1243. \size normal
  1244. \emph off
  1245. \bar no
  1246. \strikeout off
  1247. \xout off
  1248. \uuline off
  1249. \uwave off
  1250. \noun off
  1251. \color none
  1252. All Genic Reads
  1253. \end_layout
  1254. \end_inset
  1255. </cell>
  1256. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1257. \begin_inset Text
  1258. \begin_layout Plain Layout
  1259. \family roman
  1260. \series medium
  1261. \shape up
  1262. \size normal
  1263. \emph off
  1264. \bar no
  1265. \strikeout off
  1266. \xout off
  1267. \uuline off
  1268. \uwave off
  1269. \noun off
  1270. \color none
  1271. All Aligned Reads
  1272. \end_layout
  1273. \end_inset
  1274. </cell>
  1275. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1276. \begin_inset Text
  1277. \begin_layout Plain Layout
  1278. \family roman
  1279. \series medium
  1280. \shape up
  1281. \size normal
  1282. \emph off
  1283. \bar no
  1284. \strikeout off
  1285. \xout off
  1286. \uuline off
  1287. \uwave off
  1288. \noun off
  1289. \color none
  1290. Non-globin Reads
  1291. \end_layout
  1292. \end_inset
  1293. </cell>
  1294. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1295. \begin_inset Text
  1296. \begin_layout Plain Layout
  1297. \family roman
  1298. \series medium
  1299. \shape up
  1300. \size normal
  1301. \emph off
  1302. \bar no
  1303. \strikeout off
  1304. \xout off
  1305. \uuline off
  1306. \uwave off
  1307. \noun off
  1308. \color none
  1309. Globin Reads
  1310. \end_layout
  1311. \end_inset
  1312. </cell>
  1313. </row>
  1314. <row>
  1315. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1316. \begin_inset Text
  1317. \begin_layout Plain Layout
  1318. \family roman
  1319. \series medium
  1320. \shape up
  1321. \size normal
  1322. \emph off
  1323. \bar no
  1324. \strikeout off
  1325. \xout off
  1326. \uuline off
  1327. \uwave off
  1328. \noun off
  1329. \color none
  1330. Yes
  1331. \end_layout
  1332. \end_inset
  1333. </cell>
  1334. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1335. \begin_inset Text
  1336. \begin_layout Plain Layout
  1337. \family roman
  1338. \series medium
  1339. \shape up
  1340. \size normal
  1341. \emph off
  1342. \bar no
  1343. \strikeout off
  1344. \xout off
  1345. \uuline off
  1346. \uwave off
  1347. \noun off
  1348. \color none
  1349. 50.4% ± 6.82
  1350. \end_layout
  1351. \end_inset
  1352. </cell>
  1353. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1354. \begin_inset Text
  1355. \begin_layout Plain Layout
  1356. \family roman
  1357. \series medium
  1358. \shape up
  1359. \size normal
  1360. \emph off
  1361. \bar no
  1362. \strikeout off
  1363. \xout off
  1364. \uuline off
  1365. \uwave off
  1366. \noun off
  1367. \color none
  1368. 3.48% ± 2.94
  1369. \end_layout
  1370. \end_inset
  1371. </cell>
  1372. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1373. \begin_inset Text
  1374. \begin_layout Plain Layout
  1375. \family roman
  1376. \series medium
  1377. \shape up
  1378. \size normal
  1379. \emph off
  1380. \bar no
  1381. \strikeout off
  1382. \xout off
  1383. \uuline off
  1384. \uwave off
  1385. \noun off
  1386. \color none
  1387. 53.9% ± 6.81
  1388. \end_layout
  1389. \end_inset
  1390. </cell>
  1391. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1392. \begin_inset Text
  1393. \begin_layout Plain Layout
  1394. \family roman
  1395. \series medium
  1396. \shape up
  1397. \size normal
  1398. \emph off
  1399. \bar no
  1400. \strikeout off
  1401. \xout off
  1402. \uuline off
  1403. \uwave off
  1404. \noun off
  1405. \color none
  1406. 89.7% ± 2.40
  1407. \end_layout
  1408. \end_inset
  1409. </cell>
  1410. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1411. \begin_inset Text
  1412. \begin_layout Plain Layout
  1413. \family roman
  1414. \series medium
  1415. \shape up
  1416. \size normal
  1417. \emph off
  1418. \bar no
  1419. \strikeout off
  1420. \xout off
  1421. \uuline off
  1422. \uwave off
  1423. \noun off
  1424. \color none
  1425. 93.5% ± 5.25
  1426. \end_layout
  1427. \end_inset
  1428. </cell>
  1429. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1430. \begin_inset Text
  1431. \begin_layout Plain Layout
  1432. \family roman
  1433. \series medium
  1434. \shape up
  1435. \size normal
  1436. \emph off
  1437. \bar no
  1438. \strikeout off
  1439. \xout off
  1440. \uuline off
  1441. \uwave off
  1442. \noun off
  1443. \color none
  1444. 6.49% ± 5.25
  1445. \end_layout
  1446. \end_inset
  1447. </cell>
  1448. </row>
  1449. <row>
  1450. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1451. \begin_inset Text
  1452. \begin_layout Plain Layout
  1453. \family roman
  1454. \series medium
  1455. \shape up
  1456. \size normal
  1457. \emph off
  1458. \bar no
  1459. \strikeout off
  1460. \xout off
  1461. \uuline off
  1462. \uwave off
  1463. \noun off
  1464. \color none
  1465. No
  1466. \end_layout
  1467. \end_inset
  1468. </cell>
  1469. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1470. \begin_inset Text
  1471. \begin_layout Plain Layout
  1472. \family roman
  1473. \series medium
  1474. \shape up
  1475. \size normal
  1476. \emph off
  1477. \bar no
  1478. \strikeout off
  1479. \xout off
  1480. \uuline off
  1481. \uwave off
  1482. \noun off
  1483. \color none
  1484. 26.3% ± 8.95
  1485. \end_layout
  1486. \end_inset
  1487. </cell>
  1488. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1489. \begin_inset Text
  1490. \begin_layout Plain Layout
  1491. \family roman
  1492. \series medium
  1493. \shape up
  1494. \size normal
  1495. \emph off
  1496. \bar no
  1497. \strikeout off
  1498. \xout off
  1499. \uuline off
  1500. \uwave off
  1501. \noun off
  1502. \color none
  1503. 44.6% ± 16.6
  1504. \end_layout
  1505. \end_inset
  1506. </cell>
  1507. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1508. \begin_inset Text
  1509. \begin_layout Plain Layout
  1510. \family roman
  1511. \series medium
  1512. \shape up
  1513. \size normal
  1514. \emph off
  1515. \bar no
  1516. \strikeout off
  1517. \xout off
  1518. \uuline off
  1519. \uwave off
  1520. \noun off
  1521. \color none
  1522. 70.1% ± 9.38
  1523. \end_layout
  1524. \end_inset
  1525. </cell>
  1526. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1527. \begin_inset Text
  1528. \begin_layout Plain Layout
  1529. \family roman
  1530. \series medium
  1531. \shape up
  1532. \size normal
  1533. \emph off
  1534. \bar no
  1535. \strikeout off
  1536. \xout off
  1537. \uuline off
  1538. \uwave off
  1539. \noun off
  1540. \color none
  1541. 90.7% ± 5.16
  1542. \end_layout
  1543. \end_inset
  1544. </cell>
  1545. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1546. \begin_inset Text
  1547. \begin_layout Plain Layout
  1548. \family roman
  1549. \series medium
  1550. \shape up
  1551. \size normal
  1552. \emph off
  1553. \bar no
  1554. \strikeout off
  1555. \xout off
  1556. \uuline off
  1557. \uwave off
  1558. \noun off
  1559. \color none
  1560. 38.8% ± 17.1
  1561. \end_layout
  1562. \end_inset
  1563. </cell>
  1564. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1565. \begin_inset Text
  1566. \begin_layout Plain Layout
  1567. \family roman
  1568. \series medium
  1569. \shape up
  1570. \size normal
  1571. \emph off
  1572. \bar no
  1573. \strikeout off
  1574. \xout off
  1575. \uuline off
  1576. \uwave off
  1577. \noun off
  1578. \color none
  1579. 61.2% ± 17.1
  1580. \end_layout
  1581. \end_inset
  1582. </cell>
  1583. </row>
  1584. </lyxtabular>
  1585. \end_inset
  1586. \end_layout
  1587. \begin_layout Plain Layout
  1588. \begin_inset Caption Standard
  1589. \begin_layout Plain Layout
  1590. \series bold
  1591. \begin_inset Argument 1
  1592. status collapsed
  1593. \begin_layout Plain Layout
  1594. Fractions of reads mapping to genomic features in GB and non-GB samples.
  1595. \end_layout
  1596. \end_inset
  1597. \begin_inset CommandInset label
  1598. LatexCommand label
  1599. name "tab:Fractions-of-reads"
  1600. \end_inset
  1601. Fractions of reads mapping to genomic features in GB and non-GB samples.
  1602. \series default
  1603. All values are given as mean ± standard deviation.
  1604. \end_layout
  1605. \end_inset
  1606. \end_layout
  1607. \begin_layout Plain Layout
  1608. \end_layout
  1609. \end_inset
  1610. \end_layout
  1611. \begin_layout Standard
  1612. Another important aspect is that the standard deviations in Table
  1613. \begin_inset CommandInset ref
  1614. LatexCommand ref
  1615. reference "tab:Fractions-of-reads"
  1616. plural "false"
  1617. caps "false"
  1618. noprefix "false"
  1619. \end_inset
  1620. are uniformly smaller in the GB samples than the non-GB ones, indicating
  1621. much greater consistency of yield.
  1622. This is best seen in the percentage of non-globin reads as a fraction of
  1623. total reads aligned to annotated genes (genic reads).
  1624. For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
  1625. the GB samples it ranges from 81.9% to 99.9% (Figure
  1626. \begin_inset CommandInset ref
  1627. LatexCommand ref
  1628. reference "fig:Fraction-of-genic-reads"
  1629. plural "false"
  1630. caps "false"
  1631. noprefix "false"
  1632. \end_inset
  1633. ).
  1634. This means that for applications where it is critical that each sample
  1635. achieve a specified minimum coverage in order to provide useful information,
  1636. it would be necessary to budget up to 10 times the sequencing depth per
  1637. sample without globin blocking, even though the average yield improvement
  1638. for globin blocking is only 2-fold, because every sample has a chance of
  1639. being 90% globin and 10% useful reads.
  1640. Hence, the more consistent behavior of GB samples makes planning an experiment
  1641. easier and more efficient because it eliminates the need to over-sequence
  1642. every sample in order to guard against the worst case of a high-globin
  1643. fraction.
  1644. \end_layout
  1645. \begin_layout Subsection*
  1646. Globin blocking lowers the noise floor and allows detection of about 2000
  1647. more genes
  1648. \end_layout
  1649. \begin_layout Standard
  1650. \begin_inset Note Note
  1651. status collapsed
  1652. \begin_layout Plain Layout
  1653. TODO Remove extraneous titles from figures
  1654. \end_layout
  1655. \end_inset
  1656. \end_layout
  1657. \begin_layout Standard
  1658. \begin_inset Float figure
  1659. wide false
  1660. sideways false
  1661. status open
  1662. \begin_layout Plain Layout
  1663. \align center
  1664. \begin_inset Graphics
  1665. filename graphics/Globin Paper/figure2 - aveLogCPM-colored.pdf
  1666. \end_inset
  1667. \end_layout
  1668. \begin_layout Plain Layout
  1669. \begin_inset Caption Standard
  1670. \begin_layout Plain Layout
  1671. \series bold
  1672. \begin_inset Argument 1
  1673. status collapsed
  1674. \begin_layout Plain Layout
  1675. Distributions of average group gene abundances when normalized separately
  1676. or together.
  1677. \end_layout
  1678. \end_inset
  1679. \begin_inset CommandInset label
  1680. LatexCommand label
  1681. name "fig:logcpm-dists"
  1682. \end_inset
  1683. Distributions of average group gene abundances when normalized separately
  1684. or together.
  1685. \series default
  1686. All reads in each sequencing library were aligned to the cyno genome, and
  1687. the number of reads uniquely aligning to each gene was counted.
  1688. Genes with zero counts in all libraries were discarded.
  1689. Libraries were normalized using the TMM method.
  1690. Libraries were split into globin-blocked (GB) and non-GB groups and the
  1691. average abundance for each gene in both groups, measured in log2 counts
  1692. per million reads counted, was computed using the aveLogCPM function.
  1693. The distribution of average gene logCPM values was plotted for both groups
  1694. using a kernel density plot to approximate a continuous distribution.
  1695. The logCPM GB distributions are marked in red, non-GB in blue.
  1696. The black vertical line denotes the chosen detection threshold of -1.
  1697. Top panel: Libraries were split into GB and non-GB groups first and normalized
  1698. separately.
  1699. Bottom panel: Libraries were all normalized together first and then split
  1700. into groups.
  1701. \end_layout
  1702. \end_inset
  1703. \end_layout
  1704. \begin_layout Plain Layout
  1705. \end_layout
  1706. \end_inset
  1707. \end_layout
  1708. \begin_layout Standard
  1709. Since globin blocking yields more usable sequencing depth, it should also
  1710. allow detection of more genes at any given threshold.
  1711. When we looked at the distribution of average normalized logCPM values
  1712. across all libraries for genes with at least one read assigned to them,
  1713. we observed the expected bimodal distribution, with a high-abundance "signal"
  1714. peak representing detected genes and a low-abundance "noise" peak representing
  1715. genes whose read count did not rise above the noise floor (Figure
  1716. \begin_inset CommandInset ref
  1717. LatexCommand ref
  1718. reference "fig:logcpm-dists"
  1719. plural "false"
  1720. caps "false"
  1721. noprefix "false"
  1722. \end_inset
  1723. ).
  1724. Consistent with the 2-fold increase in raw counts assigned to non-globin
  1725. genes, the signal peak for GB samples is shifted to the right relative
  1726. to the non-GB signal peak.
  1727. When all the samples are normalized together, this difference is normalized
  1728. out, lining up the signal peaks, and this reveals that, as expected, the
  1729. noise floor for the GB samples is about 2-fold lower.
  1730. This greater separation between signal and noise peaks in the GB samples
  1731. means that low-expression genes should be more easily detected and more
  1732. precisely quantified than in the non-GB samples.
  1733. \end_layout
  1734. \begin_layout Standard
  1735. \begin_inset Float figure
  1736. wide false
  1737. sideways false
  1738. status open
  1739. \begin_layout Plain Layout
  1740. \align center
  1741. \begin_inset Graphics
  1742. filename graphics/Globin Paper/figure3 - detection.pdf
  1743. \end_inset
  1744. \end_layout
  1745. \begin_layout Plain Layout
  1746. \begin_inset Caption Standard
  1747. \begin_layout Plain Layout
  1748. \series bold
  1749. \begin_inset Argument 1
  1750. status collapsed
  1751. \begin_layout Plain Layout
  1752. Gene detections as a function of abundance thresholds in globin-blocked
  1753. (GB) and non-GB samples.
  1754. \end_layout
  1755. \end_inset
  1756. \begin_inset CommandInset label
  1757. LatexCommand label
  1758. name "fig:Gene-detections"
  1759. \end_inset
  1760. Gene detections as a function of abundance thresholds in globin-blocked
  1761. (GB) and non-GB samples.
  1762. \series default
  1763. Average abundance (logCPM,
  1764. \begin_inset Formula $\log_{2}$
  1765. \end_inset
  1766. counts per million reads counted) was computed by separate group normalization
  1767. as described in Figure
  1768. \begin_inset CommandInset ref
  1769. LatexCommand ref
  1770. reference "fig:logcpm-dists"
  1771. plural "false"
  1772. caps "false"
  1773. noprefix "false"
  1774. \end_inset
  1775. for both the GB and non-GB groups, as well as for all samples considered
  1776. as one large group.
  1777. For each every integer threshold from -2 to 3, the number of genes detected
  1778. at or above that logCPM threshold was plotted for each group.
  1779. \end_layout
  1780. \end_inset
  1781. \end_layout
  1782. \begin_layout Plain Layout
  1783. \end_layout
  1784. \end_inset
  1785. \end_layout
  1786. \begin_layout Standard
  1787. Based on these distributions, we selected a detection threshold of -1, which
  1788. is approximately the leftmost edge of the trough between the signal and
  1789. noise peaks.
  1790. This represents the most liberal possible detection threshold that doesn't
  1791. call substantial numbers of noise genes as detected.
  1792. Among the full dataset, 13429 genes were detected at this threshold, and
  1793. 22276 were not.
  1794. When considering the GB libraries and non-GB libraries separately and re-comput
  1795. ing normalization factors independently within each group, 14535 genes were
  1796. detected in the GB libraries while only 12460 were detected in the non-GB
  1797. libraries.
  1798. Thus, GB allowed the detection of 2000 extra genes that were buried under
  1799. the noise floor without GB.
  1800. This pattern of at least 2000 additional genes detected with GB was also
  1801. consistent across a wide range of possible detection thresholds, from -2
  1802. to 3 (see Figure
  1803. \begin_inset CommandInset ref
  1804. LatexCommand ref
  1805. reference "fig:Gene-detections"
  1806. plural "false"
  1807. caps "false"
  1808. noprefix "false"
  1809. \end_inset
  1810. ).
  1811. \end_layout
  1812. \begin_layout Subsection*
  1813. Globin blocking does not add significant additional noise or decrease sample
  1814. quality
  1815. \end_layout
  1816. \begin_layout Standard
  1817. One potential worry is that the globin blocking protocol could perturb the
  1818. levels of non-globin genes.
  1819. There are two kinds of possible perturbations: systematic and random.
  1820. The former is not a major concern for detection of differential expression,
  1821. since a 2-fold change in every sample has no effect on the relative fold
  1822. change between samples.
  1823. In contrast, random perturbations would increase the noise and obscure
  1824. the signal in the dataset, reducing the capacity to detect differential
  1825. expression.
  1826. \end_layout
  1827. \begin_layout Standard
  1828. \begin_inset Float figure
  1829. wide false
  1830. sideways false
  1831. status open
  1832. \begin_layout Plain Layout
  1833. \align center
  1834. \begin_inset Graphics
  1835. filename graphics/Globin Paper/figure4 - maplot-colored.pdf
  1836. \end_inset
  1837. \end_layout
  1838. \begin_layout Plain Layout
  1839. \begin_inset Caption Standard
  1840. \begin_layout Plain Layout
  1841. \begin_inset Argument 1
  1842. status collapsed
  1843. \begin_layout Plain Layout
  1844. MA plot showing effects of globin blocking on each gene's abundance.
  1845. \end_layout
  1846. \end_inset
  1847. \begin_inset CommandInset label
  1848. LatexCommand label
  1849. name "fig:MA-plot"
  1850. \end_inset
  1851. \series bold
  1852. MA plot showing effects of globin blocking on each gene's abundance.
  1853. \series default
  1854. All libraries were normalized together as described in Figure
  1855. \begin_inset CommandInset ref
  1856. LatexCommand ref
  1857. reference "fig:logcpm-dists"
  1858. plural "false"
  1859. caps "false"
  1860. noprefix "false"
  1861. \end_inset
  1862. , and genes with an average logCPM below -1 were filtered out.
  1863. Each remaining gene was tested for differential abundance with respect
  1864. to globin blocking (GB) using edgeR’s quasi-likelihod F-test, fitting a
  1865. negative binomial generalized linear model to table of read counts in each
  1866. library.
  1867. For each gene, edgeR reported average abundance (logCPM),
  1868. \begin_inset Formula $\log_{2}$
  1869. \end_inset
  1870. fold change (logFC), p-value, and Benjamini-Hochberg adjusted false discovery
  1871. rate (FDR).
  1872. Each gene's logFC was plotted against its logCPM, colored by FDR.
  1873. Red points are significant at ≤10% FDR, and blue are not significant at
  1874. that threshold.
  1875. The alpha and beta globin genes targeted for blocking are marked with large
  1876. triangles, while all other genes are represented as small points.
  1877. \end_layout
  1878. \end_inset
  1879. \end_layout
  1880. \begin_layout Plain Layout
  1881. \end_layout
  1882. \end_inset
  1883. \end_layout
  1884. \begin_layout Standard
  1885. \begin_inset Note Note
  1886. status open
  1887. \begin_layout Plain Layout
  1888. TODO Standardize on
  1889. \begin_inset Quotes eld
  1890. \end_inset
  1891. log2
  1892. \begin_inset Quotes erd
  1893. \end_inset
  1894. notation
  1895. \end_layout
  1896. \end_inset
  1897. \end_layout
  1898. \begin_layout Standard
  1899. The data do indeed show small systematic perturbations in gene levels (Figure
  1900. \begin_inset CommandInset ref
  1901. LatexCommand ref
  1902. reference "fig:MA-plot"
  1903. plural "false"
  1904. caps "false"
  1905. noprefix "false"
  1906. \end_inset
  1907. ).
  1908. Other than the 3 designated alpha and beta globin genes, two other genes
  1909. stand out as having especially large negative log fold changes: HBD and
  1910. LOC1021365.
  1911. HBD, delta globin, is most likely targeted by the blocking oligos due to
  1912. high sequence homology with the other globin genes.
  1913. LOC1021365 is the aforementioned ncRNA that is reverse-complementary to
  1914. one of the alpha-like genes and that would be expected to be removed during
  1915. the globin blocking step.
  1916. All other genes appear in a cluster centered vertically at 0, and the vast
  1917. majority of genes in this cluster show an absolute log2(FC) of 0.5 or less.
  1918. Nevertheless, many of these small perturbations are still statistically
  1919. significant, indicating that the globin blocking oligos likely cause very
  1920. small but non-zero systematic perturbations in measured gene expression
  1921. levels.
  1922. \end_layout
  1923. \begin_layout Standard
  1924. \begin_inset Float figure
  1925. wide false
  1926. sideways false
  1927. status open
  1928. \begin_layout Plain Layout
  1929. \align center
  1930. \begin_inset Graphics
  1931. filename graphics/Globin Paper/figure5 - corrplot.pdf
  1932. \end_inset
  1933. \end_layout
  1934. \begin_layout Plain Layout
  1935. \begin_inset Caption Standard
  1936. \begin_layout Plain Layout
  1937. \series bold
  1938. \begin_inset Argument 1
  1939. status collapsed
  1940. \begin_layout Plain Layout
  1941. Comparison of inter-sample gene abundance correlations with and without
  1942. globin blocking.
  1943. \end_layout
  1944. \end_inset
  1945. \begin_inset CommandInset label
  1946. LatexCommand label
  1947. name "fig:gene-abundance-correlations"
  1948. \end_inset
  1949. Comparison of inter-sample gene abundance correlations with and without
  1950. globin blocking (GB).
  1951. \series default
  1952. All libraries were normalized together as described in Figure 2, and genes
  1953. with an average abundance (logCPM, log2 counts per million reads counted)
  1954. less than -1 were filtered out.
  1955. Each gene’s logCPM was computed in each library using the edgeR cpm function.
  1956. For each pair of biological samples, the Pearson correlation between those
  1957. samples' GB libraries was plotted against the correlation between the same
  1958. samples’ non-GB libraries.
  1959. Each point represents an unique pair of samples.
  1960. The solid gray line shows a quantile-quantile plot of distribution of GB
  1961. correlations vs.
  1962. that of non-GB correlations.
  1963. The thin dashed line is the identity line, provided for reference.
  1964. \end_layout
  1965. \end_inset
  1966. \end_layout
  1967. \begin_layout Plain Layout
  1968. \end_layout
  1969. \end_inset
  1970. \end_layout
  1971. \begin_layout Standard
  1972. To evaluate the possibility of globin blocking causing random perturbations
  1973. and reducing sample quality, we computed the Pearson correlation between
  1974. logCPM values for every pair of samples with and without GB and plotted
  1975. them against each other (Figure
  1976. \begin_inset CommandInset ref
  1977. LatexCommand ref
  1978. reference "fig:gene-abundance-correlations"
  1979. plural "false"
  1980. caps "false"
  1981. noprefix "false"
  1982. \end_inset
  1983. ).
  1984. The plot indicated that the GB libraries have higher sample-to-sample correlati
  1985. ons than the non-GB libraries.
  1986. Parametric and nonparametric tests for differences between the correlations
  1987. with and without GB both confirmed that this difference was highly significant
  1988. (2-sided paired t-test: t = 37.2, df = 665, P ≪ 2.2e-16; 2-sided Wilcoxon
  1989. sign-rank test: V = 2195, P ≪ 2.2e-16).
  1990. Performing the same tests on the Spearman correlations gave the same conclusion
  1991. (t-test: t = 26.8, df = 665, P ≪ 2.2e-16; sign-rank test: V = 8781, P ≪ 2.2e-16).
  1992. The edgeR package was used to compute the overall biological coefficient
  1993. of variation (BCV) for GB and non-GB libraries, and found that globin blocking
  1994. resulted in a negligible increase in the BCV (0.417 with GB vs.
  1995. 0.400 without).
  1996. The near equality of the BCVs for both sets indicates that the higher correlati
  1997. ons in the GB libraries are most likely a result of the increased yield
  1998. of useful reads, which reduces the contribution of Poisson counting uncertainty
  1999. to the overall variance of the logCPM values
  2000. \begin_inset CommandInset citation
  2001. LatexCommand cite
  2002. key "McCarthy2012"
  2003. literal "false"
  2004. \end_inset
  2005. .
  2006. This improves the precision of expression measurements and more than offsets
  2007. the negligible increase in BCV.
  2008. \end_layout
  2009. \begin_layout Subsection*
  2010. More differentially expressed genes are detected with globin blocking
  2011. \end_layout
  2012. \begin_layout Standard
  2013. \begin_inset Float table
  2014. wide false
  2015. sideways false
  2016. status open
  2017. \begin_layout Plain Layout
  2018. \align center
  2019. \begin_inset Tabular
  2020. <lyxtabular version="3" rows="5" columns="5">
  2021. <features tabularvalignment="middle">
  2022. <column alignment="center" valignment="top">
  2023. <column alignment="center" valignment="top">
  2024. <column alignment="center" valignment="top">
  2025. <column alignment="center" valignment="top">
  2026. <column alignment="center" valignment="top">
  2027. <row>
  2028. <cell alignment="center" valignment="top" usebox="none">
  2029. \begin_inset Text
  2030. \begin_layout Plain Layout
  2031. \end_layout
  2032. \end_inset
  2033. </cell>
  2034. <cell alignment="center" valignment="top" usebox="none">
  2035. \begin_inset Text
  2036. \begin_layout Plain Layout
  2037. \end_layout
  2038. \end_inset
  2039. </cell>
  2040. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2041. \begin_inset Text
  2042. \begin_layout Plain Layout
  2043. \series bold
  2044. No Globin Blocking
  2045. \end_layout
  2046. \end_inset
  2047. </cell>
  2048. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2049. \begin_inset Text
  2050. \begin_layout Plain Layout
  2051. \end_layout
  2052. \end_inset
  2053. </cell>
  2054. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2055. \begin_inset Text
  2056. \begin_layout Plain Layout
  2057. \end_layout
  2058. \end_inset
  2059. </cell>
  2060. </row>
  2061. <row>
  2062. <cell alignment="center" valignment="top" usebox="none">
  2063. \begin_inset Text
  2064. \begin_layout Plain Layout
  2065. \end_layout
  2066. \end_inset
  2067. </cell>
  2068. <cell alignment="center" valignment="top" usebox="none">
  2069. \begin_inset Text
  2070. \begin_layout Plain Layout
  2071. \end_layout
  2072. \end_inset
  2073. </cell>
  2074. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2075. \begin_inset Text
  2076. \begin_layout Plain Layout
  2077. \series bold
  2078. Up
  2079. \end_layout
  2080. \end_inset
  2081. </cell>
  2082. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2083. \begin_inset Text
  2084. \begin_layout Plain Layout
  2085. \series bold
  2086. NS
  2087. \end_layout
  2088. \end_inset
  2089. </cell>
  2090. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2091. \begin_inset Text
  2092. \begin_layout Plain Layout
  2093. \series bold
  2094. Down
  2095. \end_layout
  2096. \end_inset
  2097. </cell>
  2098. </row>
  2099. <row>
  2100. <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
  2101. \begin_inset Text
  2102. \begin_layout Plain Layout
  2103. \series bold
  2104. Globin-Blocking
  2105. \end_layout
  2106. \end_inset
  2107. </cell>
  2108. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2109. \begin_inset Text
  2110. \begin_layout Plain Layout
  2111. \series bold
  2112. Up
  2113. \end_layout
  2114. \end_inset
  2115. </cell>
  2116. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2117. \begin_inset Text
  2118. \begin_layout Plain Layout
  2119. \family roman
  2120. \series medium
  2121. \shape up
  2122. \size normal
  2123. \emph off
  2124. \bar no
  2125. \strikeout off
  2126. \xout off
  2127. \uuline off
  2128. \uwave off
  2129. \noun off
  2130. \color none
  2131. 231
  2132. \end_layout
  2133. \end_inset
  2134. </cell>
  2135. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2136. \begin_inset Text
  2137. \begin_layout Plain Layout
  2138. \family roman
  2139. \series medium
  2140. \shape up
  2141. \size normal
  2142. \emph off
  2143. \bar no
  2144. \strikeout off
  2145. \xout off
  2146. \uuline off
  2147. \uwave off
  2148. \noun off
  2149. \color none
  2150. 515
  2151. \end_layout
  2152. \end_inset
  2153. </cell>
  2154. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2155. \begin_inset Text
  2156. \begin_layout Plain Layout
  2157. \family roman
  2158. \series medium
  2159. \shape up
  2160. \size normal
  2161. \emph off
  2162. \bar no
  2163. \strikeout off
  2164. \xout off
  2165. \uuline off
  2166. \uwave off
  2167. \noun off
  2168. \color none
  2169. 2
  2170. \end_layout
  2171. \end_inset
  2172. </cell>
  2173. </row>
  2174. <row>
  2175. <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2176. \begin_inset Text
  2177. \begin_layout Plain Layout
  2178. \end_layout
  2179. \end_inset
  2180. </cell>
  2181. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2182. \begin_inset Text
  2183. \begin_layout Plain Layout
  2184. \series bold
  2185. NS
  2186. \end_layout
  2187. \end_inset
  2188. </cell>
  2189. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2190. \begin_inset Text
  2191. \begin_layout Plain Layout
  2192. \family roman
  2193. \series medium
  2194. \shape up
  2195. \size normal
  2196. \emph off
  2197. \bar no
  2198. \strikeout off
  2199. \xout off
  2200. \uuline off
  2201. \uwave off
  2202. \noun off
  2203. \color none
  2204. 160
  2205. \end_layout
  2206. \end_inset
  2207. </cell>
  2208. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2209. \begin_inset Text
  2210. \begin_layout Plain Layout
  2211. \family roman
  2212. \series medium
  2213. \shape up
  2214. \size normal
  2215. \emph off
  2216. \bar no
  2217. \strikeout off
  2218. \xout off
  2219. \uuline off
  2220. \uwave off
  2221. \noun off
  2222. \color none
  2223. 11235
  2224. \end_layout
  2225. \end_inset
  2226. </cell>
  2227. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2228. \begin_inset Text
  2229. \begin_layout Plain Layout
  2230. \family roman
  2231. \series medium
  2232. \shape up
  2233. \size normal
  2234. \emph off
  2235. \bar no
  2236. \strikeout off
  2237. \xout off
  2238. \uuline off
  2239. \uwave off
  2240. \noun off
  2241. \color none
  2242. 136
  2243. \end_layout
  2244. \end_inset
  2245. </cell>
  2246. </row>
  2247. <row>
  2248. <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2249. \begin_inset Text
  2250. \begin_layout Plain Layout
  2251. \end_layout
  2252. \end_inset
  2253. </cell>
  2254. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2255. \begin_inset Text
  2256. \begin_layout Plain Layout
  2257. \series bold
  2258. Down
  2259. \end_layout
  2260. \end_inset
  2261. </cell>
  2262. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2263. \begin_inset Text
  2264. \begin_layout Plain Layout
  2265. \family roman
  2266. \series medium
  2267. \shape up
  2268. \size normal
  2269. \emph off
  2270. \bar no
  2271. \strikeout off
  2272. \xout off
  2273. \uuline off
  2274. \uwave off
  2275. \noun off
  2276. \color none
  2277. 0
  2278. \end_layout
  2279. \end_inset
  2280. </cell>
  2281. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2282. \begin_inset Text
  2283. \begin_layout Plain Layout
  2284. \family roman
  2285. \series medium
  2286. \shape up
  2287. \size normal
  2288. \emph off
  2289. \bar no
  2290. \strikeout off
  2291. \xout off
  2292. \uuline off
  2293. \uwave off
  2294. \noun off
  2295. \color none
  2296. 548
  2297. \end_layout
  2298. \end_inset
  2299. </cell>
  2300. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2301. \begin_inset Text
  2302. \begin_layout Plain Layout
  2303. \family roman
  2304. \series medium
  2305. \shape up
  2306. \size normal
  2307. \emph off
  2308. \bar no
  2309. \strikeout off
  2310. \xout off
  2311. \uuline off
  2312. \uwave off
  2313. \noun off
  2314. \color none
  2315. 127
  2316. \end_layout
  2317. \end_inset
  2318. </cell>
  2319. </row>
  2320. </lyxtabular>
  2321. \end_inset
  2322. \end_layout
  2323. \begin_layout Plain Layout
  2324. \begin_inset Caption Standard
  2325. \begin_layout Plain Layout
  2326. \series bold
  2327. \begin_inset Argument 1
  2328. status open
  2329. \begin_layout Plain Layout
  2330. Comparison of significantly differentially expressed genes with and without
  2331. globin blocking.
  2332. \end_layout
  2333. \end_inset
  2334. \begin_inset CommandInset label
  2335. LatexCommand label
  2336. name "tab:Comparison-of-significant"
  2337. \end_inset
  2338. Comparison of significantly differentially expressed genes with and without
  2339. globin blocking.
  2340. \series default
  2341. Up, Down: Genes significantly up/down-regulated in post-transplant samples
  2342. relative to pre-transplant samples, with a false discovery rate of 10%
  2343. or less.
  2344. NS: Non-significant genes (false discovery rate greater than 10%).
  2345. \end_layout
  2346. \end_inset
  2347. \end_layout
  2348. \begin_layout Plain Layout
  2349. \end_layout
  2350. \end_inset
  2351. \end_layout
  2352. \begin_layout Standard
  2353. To compare performance on differential gene expression tests, we took subsets
  2354. of both the GB and non-GB libraries with exactly one pre-transplant and
  2355. one post-transplant sample for each animal that had paired samples available
  2356. for analysis (N=7 animals, N=14 samples in each subset).
  2357. The same test for pre- vs.
  2358. post-transplant differential gene expression was performed on the same
  2359. 7 pairs of samples from GB libraries and non-GB libraries, in each case
  2360. using an FDR of 10% as the threshold of significance.
  2361. Out of 12954 genes that passed the detection threshold in both subsets,
  2362. 358 were called significantly differentially expressed in the same direction
  2363. in both sets; 1063 were differentially expressed in the GB set only; 296
  2364. were differentially expressed in the non-GB set only; 2 genes were called
  2365. significantly up in the GB set but significantly down in the non-GB set;
  2366. and the remaining 11235 were not called differentially expressed in either
  2367. set.
  2368. These data are summarized in Table
  2369. \begin_inset CommandInset ref
  2370. LatexCommand ref
  2371. reference "tab:Comparison-of-significant"
  2372. plural "false"
  2373. caps "false"
  2374. noprefix "false"
  2375. \end_inset
  2376. .
  2377. The differences in BCV calculated by EdgeR for these subsets of samples
  2378. were negligible (BCV = 0.302 for GB and 0.297 for non-GB).
  2379. \end_layout
  2380. \begin_layout Standard
  2381. The key point is that the GB data results in substantially more differentially
  2382. expressed calls than the non-GB data.
  2383. Since there is no gold standard for this dataset, it is impossible to be
  2384. certain whether this is due to under-calling of differential expression
  2385. in the non-GB samples or over-calling in the GB samples.
  2386. However, given that both datasets are derived from the same biological
  2387. samples and have nearly equal BCVs, it is more likely that the larger number
  2388. of DE calls in the GB samples are genuine detections that were enabled
  2389. by the higher sequencing depth and measurement precision of the GB samples.
  2390. Note that the same set of genes was considered in both subsets, so the
  2391. larger number of differentially expressed gene calls in the GB data set
  2392. reflects a greater sensitivity to detect significant differential gene
  2393. expression and not simply the larger total number of detected genes in
  2394. GB samples described earlier.
  2395. \end_layout
  2396. \begin_layout Section
  2397. Discussion
  2398. \end_layout
  2399. \begin_layout Standard
  2400. The original experience with whole blood gene expression profiling on DNA
  2401. microarrays demonstrated that the high concentration of globin transcripts
  2402. reduced the sensitivity to detect genes with relatively low expression
  2403. levels, in effect, significantly reducing the sensitivity.
  2404. To address this limitation, commercial protocols for globin reduction were
  2405. developed based on strategies to block globin transcript amplification
  2406. during labeling or physically removing globin transcripts by affinity bead
  2407. methods
  2408. \begin_inset CommandInset citation
  2409. LatexCommand cite
  2410. key "Winn2010"
  2411. literal "false"
  2412. \end_inset
  2413. .
  2414. More recently, using the latest generation of labeling protocols and arrays,
  2415. it was determined that globin reduction was no longer necessary to obtain
  2416. sufficient sensitivity to detect differential transcript expression
  2417. \begin_inset CommandInset citation
  2418. LatexCommand cite
  2419. key "NuGEN2010"
  2420. literal "false"
  2421. \end_inset
  2422. .
  2423. However, we are not aware of any publications using these currently available
  2424. protocols the with latest generation of microarrays that actually compare
  2425. the detection sensitivity with and without globin reduction.
  2426. However, in practice this has now been adopted generally primarily driven
  2427. by concerns for cost control.
  2428. The main objective of our work was to directly test the impact of globin
  2429. gene transcripts and a new globin blocking protocol for application to
  2430. the newest generation of differential gene expression profiling determined
  2431. using next generation sequencing.
  2432. \end_layout
  2433. \begin_layout Standard
  2434. The challenge of doing global gene expression profiling in cynomolgus monkeys
  2435. is that the current available arrays were never designed to comprehensively
  2436. cover this genome and have not been updated since the first assemblies
  2437. of the cynomolgus genome were published.
  2438. Therefore, we determined that the best strategy for peripheral blood profiling
  2439. was to do deep RNA-seq and inform the workflow using the latest available
  2440. genome assembly and annotation
  2441. \begin_inset CommandInset citation
  2442. LatexCommand cite
  2443. key "Wilson2013"
  2444. literal "false"
  2445. \end_inset
  2446. .
  2447. However, it was not immediately clear whether globin reduction was necessary
  2448. for RNA-seq or how much improvement in efficiency or sensitivity to detect
  2449. differential gene expression would be achieved for the added cost and work.
  2450. \end_layout
  2451. \begin_layout Standard
  2452. We only found one report that demonstrated that globin reduction significantly
  2453. improved the effective read yields for sequencing of human peripheral blood
  2454. cell RNA using a DeepSAGE protocol
  2455. \begin_inset CommandInset citation
  2456. LatexCommand cite
  2457. key "Mastrokolias2012"
  2458. literal "false"
  2459. \end_inset
  2460. .
  2461. The approach to DeepSAGE involves two different restriction enzymes that
  2462. purify and then tag small fragments of transcripts at specific locations
  2463. and thus, significantly reduces the complexity of the transcriptome.
  2464. Therefore, we could not determine how DeepSAGE results would translate
  2465. to the common strategy in the field for assaying the entire transcript
  2466. population by whole-transcriptome 3’-end RNA-seq.
  2467. Furthermore, if globin reduction is necessary, we also needed a globin
  2468. reduction method specific to cynomolgus globin sequences that would work
  2469. an organism for which no kit is available off the shelf.
  2470. \end_layout
  2471. \begin_layout Standard
  2472. As mentioned above, the addition of globin blocking oligos has a very small
  2473. impact on measured expression levels of gene expression.
  2474. However, this is a non-issue for the purposes of differential expression
  2475. testing, since a systematic change in a gene in all samples does not affect
  2476. relative expression levels between samples.
  2477. However, we must acknowledge that simple comparisons of gene expression
  2478. data obtained by GB and non-GB protocols are not possible without additional
  2479. normalization.
  2480. \end_layout
  2481. \begin_layout Standard
  2482. More importantly, globin blocking not only nearly doubles the yield of usable
  2483. reads, it also increases inter-sample correlation and sensitivity to detect
  2484. differential gene expression relative to the same set of samples profiled
  2485. without blocking.
  2486. In addition, globin blocking does not add a significant amount of random
  2487. noise to the data.
  2488. Globin blocking thus represents a cost-effective way to squeeze more data
  2489. and statistical power out of the same blood samples and the same amount
  2490. of sequencing.
  2491. In conclusion, globin reduction greatly increases the yield of useful RNA-seq
  2492. reads mapping to the rest of the genome, with minimal perturbations in
  2493. the relative levels of non-globin genes.
  2494. Based on these results, globin transcript reduction using sequence-specific,
  2495. complementary blocking oligonucleotides is recommended for all deep RNA-seq
  2496. of cynomolgus and other nonhuman primate blood samples.
  2497. \end_layout
  2498. \begin_layout Chapter
  2499. Future Directions
  2500. \end_layout
  2501. \begin_layout Itemize
  2502. Study other epigenetic marks in more contexts
  2503. \end_layout
  2504. \begin_deeper
  2505. \begin_layout Itemize
  2506. DNA methylation, histone marks, chromatin accessibility & conformation in
  2507. CD4 T-cells
  2508. \end_layout
  2509. \begin_layout Itemize
  2510. Also look at other types lymphocytes: CD8 T-cells, B-cells, NK cells
  2511. \end_layout
  2512. \end_deeper
  2513. \begin_layout Itemize
  2514. Investigate epigenetic regulation of lifespan extension in
  2515. \emph on
  2516. C.
  2517. elegans
  2518. \end_layout
  2519. \begin_deeper
  2520. \begin_layout Itemize
  2521. ChIP-seq of important transcriptional regulators to see how transcriptional
  2522. drift is prevented
  2523. \end_layout
  2524. \end_deeper
  2525. \begin_layout Standard
  2526. \begin_inset ERT
  2527. status open
  2528. \begin_layout Plain Layout
  2529. % Use "References" instead of "Bibliography"
  2530. \end_layout
  2531. \begin_layout Plain Layout
  2532. \backslash
  2533. renewcommand{
  2534. \backslash
  2535. bibname}{References}
  2536. \end_layout
  2537. \end_inset
  2538. \end_layout
  2539. \begin_layout Standard
  2540. \begin_inset Note Note
  2541. status open
  2542. \begin_layout Plain Layout
  2543. TODO: Check bib entry formatting & sort order
  2544. \end_layout
  2545. \end_inset
  2546. \end_layout
  2547. \begin_layout Standard
  2548. \begin_inset CommandInset bibtex
  2549. LatexCommand bibtex
  2550. btprint "btPrintCited"
  2551. bibfiles "refs"
  2552. options "plain"
  2553. \end_inset
  2554. \end_layout
  2555. \end_body
  2556. \end_document