12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812 |
- #LyX 2.3 created this file. For more info see http://www.lyx.org/
- \lyxformat 544
- \begin_document
- \begin_header
- \save_transient_properties true
- \origin unavailable
- \textclass extbook
- \begin_preamble
- % List all used files in log output
- \listfiles
- % Add a DRAFT watermark
- \usepackage{draftwatermark}
- \SetWatermarkLightness{0.97}
- \SetWatermarkScale{1}
- % Set up required header format
- \usepackage{fancyhdr}
- \pagestyle{fancy}
- \renewcommand{\headrulewidth}{0pt}
- \rhead{}
- \lhead{}
- \rfoot{}
- \lfoot{}
- \cfoot{\thepage} % Page number bottom center
- % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
- \usepackage{xstring}
- \usepackage{etoolbox}
- \usepackage{caption}
- \captionsetup{labelfont=bf,tableposition=top}
- \makeatletter
- \newcommand\formatlabel[1]{%
- \noexpandarg
- \IfSubStr{#1}{.}{%
- \StrBefore{#1}{.}[\firstcaption]%
- \StrBehind{#1}{.}[\secondcaption]%
- \textbf{\firstcaption.} \secondcaption}{%
- #1}%
- }
- \patchcmd{\@caption}{#3}{\formatlabel{#3}}
- \makeatother
- \end_preamble
- \use_default_options true
- \begin_modules
- todonotes
- \end_modules
- \maintain_unincluded_children false
- \language english
- \language_package default
- \inputencoding utf8
- \fontencoding default
- \font_roman "default" "default"
- \font_sans "default" "default"
- \font_typewriter "default" "default"
- \font_math "auto" "auto"
- \font_default_family default
- \use_non_tex_fonts false
- \font_sc false
- \font_osf false
- \font_sf_scale 100 100
- \font_tt_scale 100 100
- \use_microtype false
- \use_dash_ligatures true
- \graphics default
- \default_output_format pdf4
- \output_sync 0
- \bibtex_command default
- \index_command default
- \paperfontsize 12
- \spacing double
- \use_hyperref true
- \pdf_bookmarks true
- \pdf_bookmarksnumbered false
- \pdf_bookmarksopen false
- \pdf_bookmarksopenlevel 1
- \pdf_breaklinks false
- \pdf_pdfborder false
- \pdf_colorlinks false
- \pdf_backref false
- \pdf_pdfusetitle true
- \papersize letterpaper
- \use_geometry true
- \use_package amsmath 1
- \use_package amssymb 1
- \use_package cancel 1
- \use_package esint 1
- \use_package mathdots 1
- \use_package mathtools 1
- \use_package mhchem 1
- \use_package stackrel 1
- \use_package stmaryrd 1
- \use_package undertilde 1
- \cite_engine basic
- \cite_engine_type default
- \biblio_style plain
- \use_bibtopic false
- \use_indices false
- \paperorientation portrait
- \suppress_date false
- \justification true
- \use_refstyle 1
- \use_minted 0
- \index Index
- \shortcut idx
- \color #008000
- \end_index
- \leftmargin 1.5in
- \topmargin 1in
- \rightmargin 1in
- \bottommargin 1in
- \secnumdepth 3
- \tocdepth 3
- \paragraph_separation indent
- \paragraph_indentation default
- \is_math_indent 0
- \math_numbering_side default
- \quotes_style english
- \dynamic_quotes 0
- \papercolumns 1
- \papersides 2
- \paperpagestyle default
- \tracking_changes false
- \output_changes false
- \html_math_output 0
- \html_css_as_file 0
- \html_be_strict false
- \end_header
- \begin_body
- \begin_layout Title
- Bioinformatic analysis of complex, high-throughput genomic and epigenomic
- data in the context of immunology and transplant rejection
- \end_layout
- \begin_layout Author
- A thesis presented
- \begin_inset Newline newline
- \end_inset
- by
- \begin_inset Newline newline
- \end_inset
- Ryan C.
- Thompson
- \begin_inset Newline newline
- \end_inset
- to
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute Graduate Program
- \begin_inset Newline newline
- \end_inset
- in partial fulfillment of the requirements for the degree of
- \begin_inset Newline newline
- \end_inset
- Doctor of Philosophy in the subject of Biology
- \begin_inset Newline newline
- \end_inset
- for
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute
- \begin_inset Newline newline
- \end_inset
- La Jolla, California
- \end_layout
- \begin_layout Date
- May 2019
- \end_layout
- \begin_layout Standard
- [Copyright notice]
- \end_layout
- \begin_layout Standard
- [Thesis acceptance form]
- \end_layout
- \begin_layout Standard
- [Dedication]
- \end_layout
- \begin_layout Standard
- [Acknowledgements]
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset toc
- LatexCommand tableofcontents
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList table
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList figure
- \end_inset
- \end_layout
- \begin_layout Standard
- [List of Abbreviations]
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look into auto-generated nomenclature list: https://wiki.lyx.org/Tips/Nomenclature
- \end_layout
- \end_inset
- \end_layout
- \begin_layout List of TODOs
- \end_layout
- \begin_layout Standard
- [Abstract]
- \end_layout
- \begin_layout Chapter*
- Abstract
- \end_layout
- \begin_layout Chapter
- Introduction
- \end_layout
- \begin_layout Section
- Background & Significance
- \end_layout
- \begin_layout Subsection
- Biological motivation
- \end_layout
- \begin_layout Itemize
- Rejection is the major long-term threat to organ and tissue grafts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Common mechanisms of rejection
- \end_layout
- \begin_layout Itemize
- Effective immune suppression requires monitoring for rejection and tuning
-
- \end_layout
- \begin_layout Itemize
- Current tests for rejection (tissue biopsy) are invasive and biased
- \end_layout
- \begin_layout Itemize
- A blood test based on microarrays would be less biased and invasive
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Memory cells are resistant to immune suppression
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Mechanisms of resistance in memory cells are poorly understood
- \end_layout
- \begin_layout Itemize
- A better understanding of immune memory formation is needed
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Mesenchymal stem cell infusion is a promising new treatment to prevent/delay
- rejection
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Demonstrated in mice, but not yet in primates
- \end_layout
- \begin_layout Itemize
- Mechanism currently unknown, but MSC are known to be immune modulatory
- \end_layout
- \end_deeper
- \begin_layout Subsection
- Overview of bioinformatic analysis methods
- \end_layout
- \begin_layout Standard
- An overview of all the methods used, including what problem they solve,
- what assumptions they make, and a basic description of how they work.
- \end_layout
- \begin_layout Itemize
- ChIP-seq Peak calling
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Cross-correlation analysis to determine fragment size
- \end_layout
- \begin_layout Itemize
- Broad vs narrow peaks
- \end_layout
- \begin_layout Itemize
- SICER for broad peaks
- \end_layout
- \begin_layout Itemize
- IDR for biologically reproducible peaks
- \end_layout
- \begin_layout Itemize
- csaw peak filtering guidelines for unbiased downstream analysis
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Normalization is non-trivial and application-dependant
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Expression arrays: RMA & fRMA; why fRMA is needed
- \end_layout
- \begin_layout Itemize
- Methylation arrays: M-value transformation approximates normal data but
- induces heteroskedasticity
- \end_layout
- \begin_layout Itemize
- RNA-seq: normalize based on assumption that the average gene is not changing
- \end_layout
- \begin_layout Itemize
- ChIP-seq: complex with many considerations, dependent on experimental methods,
- biological system, and analysis goals
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Limma: The standard linear modeling framework for genomics
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- empirical Bayes variance modeling: limma's core feature
- \end_layout
- \begin_layout Itemize
- edgeR & DESeq2: Extend with negative bonomial GLM for RNA-seq and other
- count data
- \end_layout
- \begin_layout Itemize
- voom: Extend with precision weights to model mean-variance trend
- \end_layout
- \begin_layout Itemize
- arrayWeights and duplicateCorrelation to handle complex variance structures
- \end_layout
- \end_deeper
- \begin_layout Itemize
- sva and ComBat for batch correction
- \end_layout
- \begin_layout Itemize
- Factor analysis: PCA, MDS, MOFA
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Batch-corrected PCA is informative, but careful application is required
- to avoid bias
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Gene set analysis: camera and SPIA
- \end_layout
- \begin_layout Section
- Innovation
- \end_layout
- \begin_layout Itemize
- MSC infusion to improve transplant outcomes (prevent/delay rejection)
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Characterize MSC response to interferon gamma
- \end_layout
- \begin_layout Itemize
- IFN-g is thought to stimulate their function
- \end_layout
- \begin_layout Itemize
- Test IFN-g treated MSC infusion as a therapy to delay graft rejection in
- cynomolgus monkeys
- \end_layout
- \begin_layout Itemize
- Monitor animals post-transplant using blood RNA-seq at serial time points
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Investigate dynamics of histone marks in CD4 T-cell activation and memory
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Previous studies have looked at single snapshots of histone marks
- \end_layout
- \begin_layout Itemize
- Instead, look at changes in histone marks across activation and memory
- \end_layout
- \end_deeper
- \begin_layout Itemize
- High-throughput sequencing and microarray technologies
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Powerful methods for assaying gene expression and epigenetics across entire
- genomes
- \end_layout
- \begin_layout Itemize
- Proper analysis requires finding and exploiting systematic genome-wide trends
- \end_layout
- \end_deeper
- \begin_layout Chapter
- Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
- in naive and memory CD4 T-cell activation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Author list: Me, Sarah, Dan
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Itemize
- CD4 T-cells are central to all adaptive immune responses and memory
- \end_layout
- \begin_layout Itemize
- H3K4 and H3K27 methylation are major epigenetic regulators of gene expression
- \end_layout
- \begin_layout Itemize
- Canonically, H3K4 is activating and H3K27 is inhibitory, but the reality
- is complex
- \end_layout
- \begin_layout Itemize
- Looking at these marks during CD4 activation and memory should reveal new
- mechanistic details
- \end_layout
- \begin_layout Itemize
- Test
- \begin_inset Quotes eld
- \end_inset
- poised promoter
- \begin_inset Quotes erd
- \end_inset
- hypothesis in which H3K4 and H3K27 are both methylated
- \end_layout
- \begin_layout Itemize
- Expand scope of analysis beyond simple promoter counts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Analyze peaks genome-wide, including in intergenic regions
- \end_layout
- \begin_layout Itemize
- Analysis of coverage distribution shape within promoters, e.g.
- upstream vs downstream coverage
- \end_layout
- \end_deeper
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Itemize
- Re-analyze previously published CD4 ChIP-seq & RNA-seq data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016,Lamere2017"
- literal "true"
- \end_inset
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Completely reimplement analysis from scratch as a reproducible workflow
- \end_layout
- \begin_layout Itemize
- Use newly published methods & algorithms not available during the original
- analysis: SICER, csaw, MOFA, ComBat, sva, GREAT, and more
- \end_layout
- \end_deeper
- \begin_layout Itemize
- SICER, IDR, csaw, & GREAT to call ChIP-seq peaks genome-wide, perform differenti
- al abundance analysis, and relate those peaks to gene expression
- \end_layout
- \begin_layout Itemize
- Promoter counts in sliding windows around each gene's highest-expressed
- TSS to investigate coverage distribution within promoters
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Focus on what hypotheses were tested, then select figures that show how
- those hypotheses were tested, even if the result is a negative.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 methylation occur in broad regions and are enriched near
- promoters
- \end_layout
- \begin_layout Itemize
- Figures comparing MACS (non-broad peak caller) to SICER/epic (broad peak
- caller)
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Compare peak sizes and number of called peaks
- \end_layout
- \begin_layout Itemize
- Show representative IDR consistency plots for both
- \end_layout
- \end_deeper
- \begin_layout Itemize
- IDR analysis shows that SICER-called peaks are much more reproducible between
- biological replicates
- \end_layout
- \begin_layout Itemize
- Each histone mark is enriched within a certain radius of gene TSS positions,
- but that radius is different for each mark (figure)
- \end_layout
- \begin_layout Subsection
- RNA-seq has a large confounding batch effect
- \end_layout
- \begin_layout Itemize
- RNA-seq batch effect can be partially corrected, but still induces uncorrectable
- biases in downstream analysis
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Figure showing MDS plot before & after ComBat
- \end_layout
- \begin_layout Itemize
- Figure relating sample weights to batches, cell types, time points, etc.,
- showing that one batch is significantly worse quality
- \end_layout
- \begin_layout Itemize
- Figures showing p-value histograms for within-batch and cross-batch contrasts,
- showing that cross-batch contrasts have attenuated signal, as do comparisons
- within the bad batch
- \end_layout
- \end_deeper
- \begin_layout Subsection
- ChIP-seq must be corrected for hidden confounding factors
- \end_layout
- \begin_layout Itemize
- Figures showing pre- and post-SVA MDS plots for each histone mark
- \end_layout
- \begin_layout Itemize
- Figures showing BCV plots with and without SVA for each histone mark
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 promoter methylation has broadly the expected correlation
- with gene expression
- \end_layout
- \begin_layout Itemize
- H3K4 is correlated with higher expression, and H3K27 is correlated with
- lower expression genome-wide
- \end_layout
- \begin_layout Itemize
- Figures showing these correlations: box/violin plots of expression distributions
- with every combination of peak presence/absence in promoter
- \end_layout
- \begin_layout Itemize
- Appropriate statistical tests showing significant differences in expected
- directions
- \end_layout
- \begin_layout Subsection
- MOFA recovers biologically relevant variation from blind analysis by correlating
- across datasets
- \end_layout
- \begin_layout Itemize
- MOFA
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Argelaguet2018"
- literal "false"
- \end_inset
- successfully separates biologically relevant patterns of variation from
- technical confounding factors without knowing the sample labels, by finding
- latent factors that explain variation across multiple data sets.
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Figure: show percent-variance-explained plot from MOFA and PCA-like plots
- for the relevant latent factors
- \end_layout
- \begin_layout Itemize
- MOFA analysis also shows that batch effect correction can't get much better
- than it already is (Figure comparing blind MOFA batch correction to ComBat
- correction)
- \end_layout
- \end_deeper
- \begin_layout Subsection
- Naive-to-memory convergence observed in H3K4 and RNA-seq data, not in H3K27me3
- \end_layout
- \begin_layout Itemize
- H3K4 and RNA-seq data show clear evidence of naive convergence with memory
- between days 1 and 5 (MDS plot figure, also compare with last figure from
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- )
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Get explicit permission from Sarah to include the figure
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Table of numbers of genes different between N & M at each time point, showing
- dwindling differences at later time points, consistent with convergence
- \end_layout
- \begin_layout Itemize
- Similar figure for H3K27me3 showing lack of convergence
- \end_layout
- \begin_layout Subsection
- Effect of promoter coverage upstream vs downstream of TSS
- \end_layout
- \begin_layout Itemize
- H3K4me peaks seem to correlate with increased expression as long as they
- are anywhere near the TSS
- \end_layout
- \begin_layout Itemize
- H3K27me3 peaks can have different correlations to gene expression depending
- on their position relative to TSS (e.g.
- upstream vs downstream) Results consistent with
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Young2011"
- literal "false"
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Itemize
- "Promoter radius" is not constant and must be defined empirically for a
- given data set
- \end_layout
- \begin_layout Itemize
- MOFA shows great promise for accelerating discovery of major biological
- effects in multi-omics datasets
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- MOFA was added to this analysis late and played primarily a confirmatory
- role, but it was able to confirm earlier conclusions with much less prior
- information (no sample labels) and much less analyst effort
- \end_layout
- \begin_layout Itemize
- MOFA confirmed that the already-implemented batch correction in the RNA-seq
- data was already performing as well as possible given the limitations of
- the data
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Naive-to-memory convergence implies that naive cells are differentiating
- into memory cells, and that gene expression and H3K4 methylation are involved
- in this differentiation while H3K27me3 is less involved
- \end_layout
- \begin_layout Itemize
- H3K27me3, canonically regarded as a deactivating mark, seems to have a more
- complex
- \end_layout
- \begin_layout Itemize
- Discuss advantages of developing using a reproducible workflow
- \end_layout
- \begin_layout Chapter
- Improving array-based analyses of transplant rejection by optimizing data
- preprocessing
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Author list: Me, Sunil, Tom, Padma, Dan
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Subsection
- fRMA for classifiers
- \end_layout
- \begin_layout Itemize
- RMA makes the normalization of every sample depend on all other samples
- due to the quantile normalization and median polish steps
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- This makes standard RMA impractical to apply in a machine learning context,
- because adding in the new sample(s) to be classified changes the normalization
- of all samples
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Machine-learning applications demand a "single-channel" normalization method
- \end_layout
- \begin_layout Itemize
- Frozen RMA (fRMA) addresses these concerns by replacing the quantile normalizati
- on and median polish with alternatives that do not introduce inter-array
- dependence, allowing each array to be normalized independently of all others
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Quantile normalization is performed against a pre-generated set of quantiles
- learned from a large collection of publically available array data in GEO
- \end_layout
- \begin_layout Itemize
- Median polish is replaced with a weighted average of probes, using weights
- learned form the same public GEO data
- \end_layout
- \begin_layout Itemize
- With fRMA, there is no difference between normalizaing
- \begin_inset Quotes eld
- \end_inset
- together
- \begin_inset Quotes erd
- \end_inset
- or separately, and any normalized sample can be compared to any other
- \end_layout
- \end_deeper
- \begin_layout Itemize
- frozen RMA is a good solution for common array platforms with large amounts
- of publically available data, but for less common platforms, ready-made
- normalization vectors are not provided, so custom vectors must be learned
- from in-house data
- \end_layout
- \begin_layout Subsection
- Adapting voom to model heteroskedasticity in methylation array data
- \end_layout
- \begin_layout Itemize
- Methylation array data preprocessing induces heteroskedasticity
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- β
- \series bold
-
- \series default
- values, interpreted as fraction of copies methylated, range from 0 to 1.
- \end_layout
- \begin_layout Itemize
- β
- \series bold
-
- \series default
- values, with their constrained range, are highly non-normal and not suitable
- for linear modeling
- \end_layout
- \begin_layout Itemize
- M-values, interpreted as ratio of methyled to unmethylated copies, maps
- the beta values from
- \begin_inset Formula $[0,1]$
- \end_inset
- onto
- \begin_inset Formula $(-\infty,+\infty)$
- \end_inset
- , also transforming them to have approximately normally distributed error
- \end_layout
- \end_deeper
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Graphics
- filename graphics/methylvoom/sigmoid.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Sigmoid-beta-m-mapping"
- \end_inset
- \series bold
- Sigmoid shape of the mapping between β and M values
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- However, the sigmoid transformation (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) over-exaggerates the variance of extreme values, leading to a U-shaped
- trend in the mean-variance curve
- \end_layout
- \begin_layout Itemize
- This mean-variance dependency must be accounted for when fitting the linear
- model for differential methylation
- \end_layout
- \begin_layout Itemize
- Voom method, originally developed for RNA-seq data, can model mean-variance
- dependence
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Standard implementation of voom assumes the input is read counts, and adjustment
- s are required to run it on M-values.
- \end_layout
- \begin_layout Itemize
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Put code on Github and reference it
- \end_layout
- \end_inset
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Other methods, such as duplicateCorrelation and arrayWeights, are also applicabl
- e with no need for custom adaptation
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- fRMA
- \end_layout
- \begin_layout Itemize
- Expression array normalization for detecting acute rejection
- \end_layout
- \begin_layout Itemize
- Use frozen RMA, a single-channel variant of RMA
- \end_layout
- \begin_layout Itemize
- Generate custom fRMA normalization vectors for each tissue (biopsy, blood)
- \end_layout
- \begin_layout Subsubsection
- Methylation arrays
- \end_layout
- \begin_layout Itemize
- Methylation arrays for differential methylation in rejection vs.
- healthy transplant
- \end_layout
- \begin_layout Itemize
- Adapt voom method originally designed for RNA-seq to model mean-variance
- dependence
- \end_layout
- \begin_layout Itemize
- Use sample precision weighting, duplicateCorrelation, and sva to adjust
- for other confounding factors
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Improve subsection titles in this section
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- fRMA eliminates unwanted dependence of classifier training on normalization
- strategy caused by RMA
- \end_layout
- \begin_layout Itemize
- Data set consists of training set (23 TX, 35 AR, 21 ADNR), validation set
- (23 TX, 34 AR, 21 ADNR), and external validation set gathered from public
- GEO data (37 TX, 38 AR, no ADNR), all on standard hgu133plus2 Affy arrays
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "true"
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Graphics
- filename graphics/PAM/predplot.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Classifier-probabilities-RMA"
- \end_inset
- \series bold
- Classifier probabilities on validation samples when normalized with RMA
- together vs.
- separately.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- When validation samples are normalized separately from training samples,
- the classifier becomes biased relative to normalizing all samples together
- (Fig.
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- )
- \end_layout
- \begin_layout Itemize
- Normalizing all samples together is not feasible in a clinical context,
- so ordinary RMA is unsuitable
- \end_layout
- \begin_layout Itemize
- fRMA eliminates this issue by normalizing each sample independently to the
- same quantile distribution and summarizing probes using the same weights.
- \end_layout
- \begin_layout Itemize
- Classifier performance on validation set is identical for
- \begin_inset Quotes eld
- \end_inset
- RMA together
- \begin_inset Quotes erd
- \end_inset
- and fRMA, so switching to clinically applicable normalization does not
- sacrifice accuracy
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Check the published paper for any other possibly relevant figures to include
- here.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- fRMA with custom-generated vectors
- \end_layout
- \begin_layout Itemize
- Non-standard platform hthgu133pluspm - no pre-built fRMA vectors available,
- so custom vectors must be learned from in-house data
- \end_layout
- \begin_layout Itemize
- Large body of data available for training fRMA: 341 kidney graft biopsy
- samples, 965 blood samples from graft recipients
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- But not all samples can be used (see trade-off figure)
- \end_layout
- \begin_layout Itemize
- Figure showing trade-off between more samples per group and fewer groups
- with that may samples, to justify choice of number of samples per group
- \end_layout
- \begin_layout Itemize
- pre-generated normalization vectors use ~850 samples
- \begin_inset Flex TODO Note (Margin)
- status collapsed
- \begin_layout Plain Layout
- Look up the exact numbers
- \end_layout
- \end_inset
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- , but are designed to be general across all tissues.
- The samples we have are suitable for tissue-specific normalization vectors.
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Figure: MA plot, RMA vs fRMA, to show that the normalization is appreciably
- and non-linearly different
- \end_layout
- \begin_layout Itemize
- Figure MA plot, fRMA vs fRMA with different randomly-chosen sample subsets
- to show consistency
- \end_layout
- \begin_layout Itemize
- custom fRMA normalization improved cross-validated classifier performance
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Get a figure from Tom showing classifier performance improvement (compared
- to all-sample RMA, I guess?), if possible
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Adapting voom to methylation array data improves model fit
- \end_layout
- \begin_layout Itemize
- voom, precision weights, and sva improved model fit
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Also increased sensitivity for detecting differential methylation
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Figure showing (a) heteroskedasticy without voom, (b) voom-modeled mean-variance
- trend, and (c) homoskedastic mean-variance trend after running voom
- \end_layout
- \begin_layout Itemize
- Figure showing sample weights and their relations to
- \end_layout
- \begin_layout Itemize
- Figure showing MDS plot with and without SVA correction
- \end_layout
- \begin_layout Itemize
- Figure and/or table showing improved p-value historgrams/number of significant
- genes (might need to get this from Padma)
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Itemize
- fRMA enables classifying new samples without re-normalizing the entire data
- set
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Critical for translating a classifier into clinical practice
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Methods like voom designed for RNA-seq can also help with array analysis
- \end_layout
- \begin_layout Itemize
- Extracting and modeling confounders common to many features improves model
- correspondence to known biology
- \end_layout
- \begin_layout Chapter
- Globin-blocking for more effective blood RNA-seq analysis in primate animal
- model
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Choose between above and the paper title: Optimizing yield of deep RNA sequencin
- g for gene expression profiling by globin reduction of peripheral blood
- samples from cynomolgus monkeys (Macaca fascicularis).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Chapter author list: https://tex.stackexchange.com/questions/156862/displaying-aut
- hor-for-each-chapter-in-book Every chapter gets an author list, which may
- or may not be part of a citation to a published/preprinted paper.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Preprint then cite the paper
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section*
- Abstract
- \end_layout
- \begin_layout Paragraph
- Background
- \end_layout
- \begin_layout Standard
- Primate blood contains high concentrations of globin messenger RNA.
- Globin reduction is a standard technique used to improve the expression
- results obtained by DNA microarrays on RNA from blood samples.
- However, with whole transcriptome RNA-sequencing (RNA-seq) quickly replacing
- microarrays for many applications, the impact of globin reduction for RNA-seq
- has not been previously studied.
- Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
- primates.
-
- \end_layout
- \begin_layout Paragraph
- Results
- \end_layout
- \begin_layout Standard
- Here we report a protocol for RNA-seq in primate blood samples that uses
- complimentary oligonucleotides to block reverse transcription of the alpha
- and beta globin genes.
- In test samples from cynomolgus monkeys (Macaca fascicularis), this globin
- blocking protocol approximately doubles the yield of informative (non-globin)
- reads by greatly reducing the fraction of globin reads, while also improving
- the consistency in sequencing depth between samples.
- The increased yield enables detection of about 2000 more genes, significantly
- increases the correlation in measured gene expression levels between samples,
- and increases the sensitivity of differential gene expression tests.
- \end_layout
- \begin_layout Paragraph
- Conclusions
- \end_layout
- \begin_layout Standard
- These results show that globin blocking significantly improves the cost-effectiv
- eness of mRNA sequencing in primate blood samples by doubling the yield
- of useful reads, allowing detection of more genes, and improving the precision
- of gene expression measurements.
- Based on these results, a globin reducing or blocking protocol is recommended
- for all RNA-seq studies of primate blood samples.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Consider putting some of this in the Intro chapter
- \end_layout
- \begin_layout Itemize
- Cynomolgus monkeys as a model organism
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Highly related to humans
- \end_layout
- \begin_layout Itemize
- Small size and short life cycle - good research animal
- \end_layout
- \begin_layout Itemize
- Genomics resources still in development
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Inadequacy of existing blood RNA-seq protocols
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Existing protocols use a separate globin pulldown step, slowing down processing
- \end_layout
- \end_deeper
- \end_inset
- \end_layout
- \begin_layout Standard
- Increasingly, researchers are turning to high-throughput mRNA sequencing
- technologies (RNA-seq) in preference to expression microarrays for analysis
- of gene expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mutz2012"
- literal "false"
- \end_inset
- .
- The advantages are even greater for study of model organisms with no well-estab
- lished array platforms available, such as the cynomolgus monkey (Macaca
- fascicularis).
- High fractions of globin mRNA are naturally present in mammalian peripheral
- blood samples (up to 70% of total mRNA) and these are known to interfere
- with the results of array-based expression profiling
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- The importance of globin reduction for RNA-seq of blood has only been evaluated
- for a deepSAGE protocol on human samples
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- In the present report, we evaluated globin reduction using custom blocking
- oligonucleotides for deep RNA-seq of peripheral blood samples from a nonhuman
- primate, cynomolgus monkey, using the Illumina technology platform.
- We demonstrate that globin reduction significantly improves the cost-effectiven
- ess of RNA-seq in blood samples.
- Thus, our protocol offers a significant advantage to any investigator planning
- to use RNA-seq for gene expression profiling of nonhuman primate blood
- samples.
- Our method can be generally applied to any species by designing complementary
- oligonucleotide blocking probes to the globin gene sequences of that species.
- Indeed, any highly expressed but biologically uninformative transcripts
- can also be blocked to further increase sequencing efficiency and value
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Arnaud2016"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection*
- Sample collection
- \end_layout
- \begin_layout Standard
- All research reported here was done under IACUC-approved protocols at the
- University of Miami and complied with all applicable federal and state
- regulations and ethical principles for nonhuman primate research.
- Blood draws occurred between 16 April 2012 and 18 June 2015.
- The experimental system involved intrahepatic pancreatic islet transplantation
- into Cynomolgus monkeys with induced diabetes mellitus with or without
- concomitant infusion of mesenchymal stem cells.
- Blood was collected at serial time points before and after transplantation
- into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
- precise volume:volume ratio of 2.5 ml whole blood into 6.9 ml of PAX gene
- additive.
- \end_layout
- \begin_layout Subsection*
- Globin Blocking
- \end_layout
- \begin_layout Standard
- Four oligonucleotides were designed to hybridize to the 3’ end of the transcript
- s for Cynomolgus HBA1, HBA2 and HBB, with two hybridization sites for HBB
- and 2 sites for HBA (the chosen sites were identical in both HBA genes).
- All oligos were purchased from Sigma and were entirely composed of 2’O-Me
- bases with a C3 spacer positioned at the 3’ ends to prevent any polymerase
- mediated primer extension.
- \end_layout
- \begin_layout Quote
- HBA1/2 site 1: GCCCACUCAGACUUUAUUCAAAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBA1/2 site 2: GGUGCAAGGAGGGGAGGAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBB site 1: AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBB site 2: CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
- \end_layout
- \begin_layout Subsection*
- RNA-seq Library Preparation
- \end_layout
- \begin_layout Standard
- Sequencing libraries were prepared with 200ng total RNA from each sample.
- Polyadenylated mRNA was selected from 200 ng aliquots of cynomologus blood-deri
- ved total RNA using Ambion Dynabeads Oligo(dT)25 beads (Invitrogen) following
- manufacturer’s recommended protocol.
- PolyA selected RNA was then combined with 8 pmol of HBA1/2 (site 1), 8
- pmol of HBA1/2 (site 2), 12 pmol of HBB (site 1) and 12 pmol of HBB (site
- 2) oligonucleotides.
- In addition, 20 pmol of RT primer containing a portion of the Illumina
- adapter sequence (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV)
- and 4 µL of 5X First Strand buffer (250 mM Tris-HCl pH 8.3, 375 mM KCl,
- 15mM MgCl2) were added in a total volume of 15 µL.
- The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
- then placed on ice.
- This was followed by the addition of 2 µL 0.1 M DTT, 1 µL RNaseOUT, 1 µL
- 10mM dNTPs 10% biotin-16 aminoallyl-2’- dUTP and 10% biotin-16 aminoallyl-2’-
- dCTP (TriLink Biotech, San Diego, CA), 1 µL Superscript II (200U/ µL, Thermo-Fi
- sher).
- A second “unblocked” library was prepared in the same way for each sample
- but replacing the blocking oligos with an equivalent volume of water.
- The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
- followed by incubation at 75°C for 10 minutes to inactivate the reverse
- transcriptase.
- \end_layout
- \begin_layout Standard
- The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
- ) following supplier’s recommended protocol.
- The cDNA/RNA hybrid was eluted in 25 µL of 10 mM Tris-HCl pH 8.0, and then
- bound to 25 µL of M280 Magnetic Streptavidin beads washed per recommended
- protocol (Thermo-Fisher).
- After 30 minutes of binding, beads were washed one time in 100 µL 0.1N NaOH
- to denature and remove the bound RNA, followed by two 100 µL washes with
- 1X TE buffer.
- \end_layout
- \begin_layout Standard
- Subsequent attachment of the 5-prime Illumina A adapter was performed by
- on-bead random primer extension of the following sequence (A-N8 primer:
- TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN).
- Briefly, beads were resuspended in a 20 µL reaction containing 5 µM A-N8
- primer, 40mM Tris-HCl pH 7.5, 20mM MgCl2, 50mM NaCl, 0.325U/µL Sequenase
- 2.0 (Affymetrix, Santa Clara, CA), 0.0025U/µL inorganic pyrophosphatase (Affymetr
- ix) and 300 µM each dNTP.
- Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
- times with 1X TE buffer (200µL).
- \end_layout
- \begin_layout Standard
- The magnetic streptavidin beads were resuspended in 34 µL nuclease-free
- water and added directly to a PCR tube.
- The two Illumina protocol-specified PCR primers were added at 0.53 µM (Illumina
- TruSeq Universal Primer 1 and Illumina TruSeq barcoded PCR primer 2), along
- with 40 µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycl
- ed as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
- 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
- \end_layout
- \begin_layout Standard
- PCR products were purified with 1X Ampure Beads following manufacturer’s
- recommended protocol.
- Libraries were then analyzed using the Agilent TapeStation and quantitation
- of desired size range was performed by “smear analysis”.
- Samples were pooled in equimolar batches of 16 samples.
- Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
- Gels; Thermo-Fisher).
- Products were cut between 250 and 350 bp (corresponding to insert sizes
- of 130 to 230 bps).
- Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
- t with 75 base read lengths.
-
- \end_layout
- \begin_layout Subsection*
- Read alignment and counting
- \end_layout
- \begin_layout Standard
- Reads were aligned to the cynomolgus genome using STAR
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Dobin2013,Wilson2013"
- literal "false"
- \end_inset
- .
- Counts of uniquely mapped reads were obtained for every gene in each sample
- with the “featureCounts” function from the Rsubread package, using each
- of the three possibilities for the “strandSpecific” option: sense, antisense,
- and unstranded
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liao2014"
- literal "false"
- \end_inset
- .
- A few artifacts in the cynomolgus genome annotation complicated read counting.
- First, no ortholog is annotated for alpha globin in the cynomolgus genome,
- presumably because the human genome has two alpha globin genes with nearly
- identical sequences, making the orthology relationship ambiguous.
- However, two loci in the cynomolgus genome are as “hemoglobin subunit alpha-lik
- e” (LOC102136192 and LOC102136846).
- LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
- as protein-coding.
- Our globin reduction protocol was designed to include blocking of these
- two genes.
- Indeed, these two genes have almost the same read counts in each library
- as the properly-annotated HBB gene and much larger counts than any other
- gene in the unblocked libraries, giving confidence that reads derived from
- the real alpha globin are mapping to both genes.
- Thus, reads from both of these loci were counted as alpha globin reads
- in all further analyses.
- The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
- 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
- If counting is not performed in stranded mode (or if a non-strand-specific
- sequencing protocol is used), many reads mapping to the globin gene will
- be discarded as ambiguous due to their overlap with this ncRNA gene, resulting
- in significant undercounting of globin reads.
- Therefore, stranded sense counts were used for all further analysis in
- the present study to insure that we accurately accounted for globin transcript
- reduction.
- However, we note that stranded reads are not necessary for RNA-seq using
- our protocol in standard practice.
-
- \end_layout
- \begin_layout Subsection*
- Normalization and Exploratory Data Analysis
- \end_layout
- \begin_layout Standard
- Libraries were normalized by computing scaling factors using the edgeR package’s
- Trimmed Mean of M-values method
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010"
- literal "false"
- \end_inset
- .
- Log2 counts per million values (logCPM) were calculated using the cpm function
- in edgeR for individual samples and aveLogCPM function for averages across
- groups of samples, using those functions’ default prior count values to
- avoid taking the logarithm of 0.
- Genes were considered “present” if their average normalized logCPM values
- across all libraries were at least -1.
- Normalizing for gene length was unnecessary because the sequencing protocol
- is 3’-biased and hence the expected read count for each gene is related
- to the transcript’s copy number but not its length.
- \end_layout
- \begin_layout Standard
- In order to assess the effect of blocking on reproducibility, Pearson and
- Spearman correlation coefficients were computed between the logCPM values
- for every pair of libraries within the globin-blocked (GB) and unblocked
- (non-GB) groups, and edgeR's “estimateDisp” function was used to compute
- negative binomial dispersions separately for the two groups
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Chen2014"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection*
- Differential Expression Analysis
- \end_layout
- \begin_layout Standard
- All tests for differential gene expression were performed using edgeR, by
- first fitting a negative binomial generalized linear model to the counts
- and normalization factors and then performing a quasi-likelihood F-test
- with robust estimation of outlier gene dispersions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lund2012,Phipson2016"
- literal "false"
- \end_inset
- .
- To investigate the effects of globin blocking on each gene, an additive
- model was fit to the full data with coefficients for globin blocking and
- SampleID.
- To test the effect of globin blocking on detection of differentially expressed
- genes, the GB samples and non-GB samples were each analyzed independently
- as follows: for each animal with both a pre-transplant and a post-transplant
- time point in the data set, the pre-transplant sample and the earliest
- post-transplant sample were selected, and all others were excluded, yielding
- a pre-/post-transplant pair of samples for each animal (N=7 animals with
- paired samples).
- These samples were analyzed for pre-transplant vs.
- post-transplant differential gene expression while controlling for inter-animal
- variation using an additive model with coefficients for transplant and
- animal ID.
- In all analyses, p-values were adjusted using the Benjamini-Hochberg procedure
- for FDR correction
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Itemize
- New blood RNA-seq protocol to block reverse transcription of globin genes
- \end_layout
- \begin_layout Itemize
- Blood RNA-seq time course after transplants with/without MSC infusion
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Subsection*
- Globin blocking yields a larger and more consistent fraction of useful reads
- \end_layout
- \begin_layout Standard
- The objective of the present study was to validate a new protocol for deep
- RNA-seq of whole blood drawn into PaxGene tubes from cynomolgus monkeys
- undergoing islet transplantation, with particular focus on minimizing the
- loss of useful sequencing space to uninformative globin reads.
- The details of the analysis with respect to transplant outcomes and the
- impact of mesenchymal stem cell treatment will be reported in a separate
- manuscript (in preparation).
- To focus on the efficacy of our globin blocking protocol, 37 blood samples,
- 16 from pre-transplant and 21 from post-transplant time points, were each
- prepped once with and once without globin blocking oligos, and were then
- sequenced on an Illumina NextSeq500 instrument.
- The number of reads aligning to each gene in the cynomolgus genome was
- counted.
- Table 1 summarizes the distribution of read fractions among the GB and
- non-GB libraries.
- In the libraries with no globin blocking, globin reads made up an average
- of 44.6% of total input reads, while reads assigned to all other genes made
- up an average of 26.3%.
- The remaining reads either aligned to intergenic regions (that include
- long non-coding RNAs) or did not align with any annotated transcripts in
- the current build of the cynomolgus genome.
- In the GB libraries, globin reads made up only 3.48% and reads assigned
- to all other genes increased to 50.4%.
- Thus, globin blocking resulted in a 92.2% reduction in globin reads and
- a 91.6% increase in yield of useful non-globin reads.
- \end_layout
- \begin_layout Standard
- This reduction is not quite as efficient as the previous analysis showed
- for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- Nonetheless, this degree of globin reduction is sufficient to nearly double
- the yield of useful reads.
- Thus, globin blocking cuts the required sequencing effort (and costs) to
- achieve a target coverage depth by almost 50%.
- Consistent with this near doubling of yield, the average difference in
- un-normalized logCPM across all genes between the GB libraries and non-GB
- libraries is approximately 1 (mean = 1.01, median = 1.08), an overall 2-fold
- increase.
- Un-normalized values are used here because the TMM normalization correctly
- identifies this 2-fold difference as biologically irrelevant and removes
- it.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure1 - globin-fractions.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without globin blocking (GB).
-
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Fraction-of-genic-reads"
- \end_inset
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without globin blocking (GB).
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- For each sample, counts were summed separately for all globin genes and
- for the remainder of the genes (non-globin genes), and the fraction of
- genic reads aligned to non-globin genes was computed.
- Each point represents an individual sample.
- Gray + signs indicate the means for globin-blocked libraries and unblocked
- libraries.
- The overall distribution for each group is represented as a notched box
- plots.
- Points are randomly spread vertically to avoid excessive overlapping.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- placement p
- wide false
- sideways true
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Total Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Aligned Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 50.4% ± 6.82
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 3.48% ± 2.94
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 53.9% ± 6.81
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 89.7% ± 2.40
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 93.5% ± 5.25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 6.49% ± 5.25
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 26.3% ± 8.95
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 44.6% ± 16.6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 70.1% ± 9.38
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 90.7% ± 5.16
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 38.8% ± 17.1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 61.2% ± 17.1
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fractions of reads mapping to genomic features in GB and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Fractions-of-reads"
- \end_inset
- Fractions of reads mapping to genomic features in GB and non-GB samples.
-
- \series default
- All values are given as mean ± standard deviation.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Another important aspect is that the standard deviations in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Fractions-of-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- are uniformly smaller in the GB samples than the non-GB ones, indicating
- much greater consistency of yield.
- This is best seen in the percentage of non-globin reads as a fraction of
- total reads aligned to annotated genes (genic reads).
- For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
- the GB samples it ranges from 81.9% to 99.9% (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Fraction-of-genic-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This means that for applications where it is critical that each sample
- achieve a specified minimum coverage in order to provide useful information,
- it would be necessary to budget up to 10 times the sequencing depth per
- sample without globin blocking, even though the average yield improvement
- for globin blocking is only 2-fold, because every sample has a chance of
- being 90% globin and 10% useful reads.
- Hence, the more consistent behavior of GB samples makes planning an experiment
- easier and more efficient because it eliminates the need to over-sequence
- every sample in order to guard against the worst case of a high-globin
- fraction.
- \end_layout
- \begin_layout Subsection*
- Globin blocking lowers the noise floor and allows detection of about 2000
- more genes
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Remove redundant titles from figures
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure2 - aveLogCPM-colored.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Distributions of average group gene abundances when normalized separately
- or together.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:logcpm-dists"
- \end_inset
- Distributions of average group gene abundances when normalized separately
- or together.
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- Genes with zero counts in all libraries were discarded.
- Libraries were normalized using the TMM method.
- Libraries were split into globin-blocked (GB) and non-GB groups and the
- average abundance for each gene in both groups, measured in log2 counts
- per million reads counted, was computed using the aveLogCPM function.
- The distribution of average gene logCPM values was plotted for both groups
- using a kernel density plot to approximate a continuous distribution.
- The logCPM GB distributions are marked in red, non-GB in blue.
- The black vertical line denotes the chosen detection threshold of -1.
- Top panel: Libraries were split into GB and non-GB groups first and normalized
- separately.
- Bottom panel: Libraries were all normalized together first and then split
- into groups.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since globin blocking yields more usable sequencing depth, it should also
- allow detection of more genes at any given threshold.
- When we looked at the distribution of average normalized logCPM values
- across all libraries for genes with at least one read assigned to them,
- we observed the expected bimodal distribution, with a high-abundance "signal"
- peak representing detected genes and a low-abundance "noise" peak representing
- genes whose read count did not rise above the noise floor (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Consistent with the 2-fold increase in raw counts assigned to non-globin
- genes, the signal peak for GB samples is shifted to the right relative
- to the non-GB signal peak.
- When all the samples are normalized together, this difference is normalized
- out, lining up the signal peaks, and this reveals that, as expected, the
- noise floor for the GB samples is about 2-fold lower.
- This greater separation between signal and noise peaks in the GB samples
- means that low-expression genes should be more easily detected and more
- precisely quantified than in the non-GB samples.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure3 - detection.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Gene detections as a function of abundance thresholds in globin-blocked
- (GB) and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Gene-detections"
- \end_inset
- Gene detections as a function of abundance thresholds in globin-blocked
- (GB) and non-GB samples.
- \series default
- Average abundance (logCPM,
- \begin_inset Formula $\log_{2}$
- \end_inset
- counts per million reads counted) was computed by separate group normalization
- as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- for both the GB and non-GB groups, as well as for all samples considered
- as one large group.
- For each every integer threshold from -2 to 3, the number of genes detected
- at or above that logCPM threshold was plotted for each group.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Based on these distributions, we selected a detection threshold of -1, which
- is approximately the leftmost edge of the trough between the signal and
- noise peaks.
- This represents the most liberal possible detection threshold that doesn't
- call substantial numbers of noise genes as detected.
- Among the full dataset, 13429 genes were detected at this threshold, and
- 22276 were not.
- When considering the GB libraries and non-GB libraries separately and re-comput
- ing normalization factors independently within each group, 14535 genes were
- detected in the GB libraries while only 12460 were detected in the non-GB
- libraries.
- Thus, GB allowed the detection of 2000 extra genes that were buried under
- the noise floor without GB.
- This pattern of at least 2000 additional genes detected with GB was also
- consistent across a wide range of possible detection thresholds, from -2
- to 3 (see Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Gene-detections"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \begin_layout Subsection*
- Globin blocking does not add significant additional noise or decrease sample
- quality
- \end_layout
- \begin_layout Standard
- One potential worry is that the globin blocking protocol could perturb the
- levels of non-globin genes.
- There are two kinds of possible perturbations: systematic and random.
- The former is not a major concern for detection of differential expression,
- since a 2-fold change in every sample has no effect on the relative fold
- change between samples.
- In contrast, random perturbations would increase the noise and obscure
- the signal in the dataset, reducing the capacity to detect differential
- expression.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure4 - maplot-colored.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- MA plot showing effects of globin blocking on each gene's abundance.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-plot"
- \end_inset
- \series bold
- MA plot showing effects of globin blocking on each gene's abundance.
-
- \series default
- All libraries were normalized together as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and genes with an average logCPM below -1 were filtered out.
- Each remaining gene was tested for differential abundance with respect
- to globin blocking (GB) using edgeR’s quasi-likelihod F-test, fitting a
- negative binomial generalized linear model to table of read counts in each
- library.
- For each gene, edgeR reported average abundance (logCPM),
- \begin_inset Formula $\log_{2}$
- \end_inset
- fold change (logFC), p-value, and Benjamini-Hochberg adjusted false discovery
- rate (FDR).
- Each gene's logFC was plotted against its logCPM, colored by FDR.
- Red points are significant at ≤10% FDR, and blue are not significant at
- that threshold.
- The alpha and beta globin genes targeted for blocking are marked with large
- triangles, while all other genes are represented as small points.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Standardize on
- \begin_inset Quotes eld
- \end_inset
- log2
- \begin_inset Quotes erd
- \end_inset
- notation
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The data do indeed show small systematic perturbations in gene levels (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-plot"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Other than the 3 designated alpha and beta globin genes, two other genes
- stand out as having especially large negative log fold changes: HBD and
- LOC1021365.
- HBD, delta globin, is most likely targeted by the blocking oligos due to
- high sequence homology with the other globin genes.
- LOC1021365 is the aforementioned ncRNA that is reverse-complementary to
- one of the alpha-like genes and that would be expected to be removed during
- the globin blocking step.
- All other genes appear in a cluster centered vertically at 0, and the vast
- majority of genes in this cluster show an absolute log2(FC) of 0.5 or less.
- Nevertheless, many of these small perturbations are still statistically
- significant, indicating that the globin blocking oligos likely cause very
- small but non-zero systematic perturbations in measured gene expression
- levels.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure5 - corrplot.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Comparison of inter-sample gene abundance correlations with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:gene-abundance-correlations"
- \end_inset
- Comparison of inter-sample gene abundance correlations with and without
- globin blocking (GB).
- \series default
- All libraries were normalized together as described in Figure 2, and genes
- with an average abundance (logCPM, log2 counts per million reads counted)
- less than -1 were filtered out.
- Each gene’s logCPM was computed in each library using the edgeR cpm function.
- For each pair of biological samples, the Pearson correlation between those
- samples' GB libraries was plotted against the correlation between the same
- samples’ non-GB libraries.
- Each point represents an unique pair of samples.
- The solid gray line shows a quantile-quantile plot of distribution of GB
- correlations vs.
- that of non-GB correlations.
- The thin dashed line is the identity line, provided for reference.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the possibility of globin blocking causing random perturbations
- and reducing sample quality, we computed the Pearson correlation between
- logCPM values for every pair of samples with and without GB and plotted
- them against each other (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:gene-abundance-correlations"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The plot indicated that the GB libraries have higher sample-to-sample correlati
- ons than the non-GB libraries.
- Parametric and nonparametric tests for differences between the correlations
- with and without GB both confirmed that this difference was highly significant
- (2-sided paired t-test: t = 37.2, df = 665, P ≪ 2.2e-16; 2-sided Wilcoxon
- sign-rank test: V = 2195, P ≪ 2.2e-16).
- Performing the same tests on the Spearman correlations gave the same conclusion
- (t-test: t = 26.8, df = 665, P ≪ 2.2e-16; sign-rank test: V = 8781, P ≪ 2.2e-16).
- The edgeR package was used to compute the overall biological coefficient
- of variation (BCV) for GB and non-GB libraries, and found that globin blocking
- resulted in a negligible increase in the BCV (0.417 with GB vs.
- 0.400 without).
- The near equality of the BCVs for both sets indicates that the higher correlati
- ons in the GB libraries are most likely a result of the increased yield
- of useful reads, which reduces the contribution of Poisson counting uncertainty
- to the overall variance of the logCPM values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCarthy2012"
- literal "false"
- \end_inset
- .
- This improves the precision of expression measurements and more than offsets
- the negligible increase in BCV.
- \end_layout
- \begin_layout Subsection*
- More differentially expressed genes are detected with globin blocking
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- No Globin Blocking
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Globin-Blocking
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 515
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 2
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 160
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 11235
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 136
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 548
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 127
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status open
- \begin_layout Plain Layout
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Comparison-of-significant"
- \end_inset
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \series default
- Up, Down: Genes significantly up/down-regulated in post-transplant samples
- relative to pre-transplant samples, with a false discovery rate of 10%
- or less.
- NS: Non-significant genes (false discovery rate greater than 10%).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To compare performance on differential gene expression tests, we took subsets
- of both the GB and non-GB libraries with exactly one pre-transplant and
- one post-transplant sample for each animal that had paired samples available
- for analysis (N=7 animals, N=14 samples in each subset).
- The same test for pre- vs.
- post-transplant differential gene expression was performed on the same
- 7 pairs of samples from GB libraries and non-GB libraries, in each case
- using an FDR of 10% as the threshold of significance.
- Out of 12954 genes that passed the detection threshold in both subsets,
- 358 were called significantly differentially expressed in the same direction
- in both sets; 1063 were differentially expressed in the GB set only; 296
- were differentially expressed in the non-GB set only; 2 genes were called
- significantly up in the GB set but significantly down in the non-GB set;
- and the remaining 11235 were not called differentially expressed in either
- set.
- These data are summarized in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Comparison-of-significant"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The differences in BCV calculated by EdgeR for these subsets of samples
- were negligible (BCV = 0.302 for GB and 0.297 for non-GB).
- \end_layout
- \begin_layout Standard
- The key point is that the GB data results in substantially more differentially
- expressed calls than the non-GB data.
- Since there is no gold standard for this dataset, it is impossible to be
- certain whether this is due to under-calling of differential expression
- in the non-GB samples or over-calling in the GB samples.
- However, given that both datasets are derived from the same biological
- samples and have nearly equal BCVs, it is more likely that the larger number
- of DE calls in the GB samples are genuine detections that were enabled
- by the higher sequencing depth and measurement precision of the GB samples.
- Note that the same set of genes was considered in both subsets, so the
- larger number of differentially expressed gene calls in the GB data set
- reflects a greater sensitivity to detect significant differential gene
- expression and not simply the larger total number of detected genes in
- GB samples described earlier.
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Standard
- The original experience with whole blood gene expression profiling on DNA
- microarrays demonstrated that the high concentration of globin transcripts
- reduced the sensitivity to detect genes with relatively low expression
- levels, in effect, significantly reducing the sensitivity.
- To address this limitation, commercial protocols for globin reduction were
- developed based on strategies to block globin transcript amplification
- during labeling or physically removing globin transcripts by affinity bead
- methods
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- More recently, using the latest generation of labeling protocols and arrays,
- it was determined that globin reduction was no longer necessary to obtain
- sufficient sensitivity to detect differential transcript expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "NuGEN2010"
- literal "false"
- \end_inset
- .
- However, we are not aware of any publications using these currently available
- protocols the with latest generation of microarrays that actually compare
- the detection sensitivity with and without globin reduction.
- However, in practice this has now been adopted generally primarily driven
- by concerns for cost control.
- The main objective of our work was to directly test the impact of globin
- gene transcripts and a new globin blocking protocol for application to
- the newest generation of differential gene expression profiling determined
- using next generation sequencing.
-
- \end_layout
- \begin_layout Standard
- The challenge of doing global gene expression profiling in cynomolgus monkeys
- is that the current available arrays were never designed to comprehensively
- cover this genome and have not been updated since the first assemblies
- of the cynomolgus genome were published.
- Therefore, we determined that the best strategy for peripheral blood profiling
- was to do deep RNA-seq and inform the workflow using the latest available
- genome assembly and annotation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilson2013"
- literal "false"
- \end_inset
- .
- However, it was not immediately clear whether globin reduction was necessary
- for RNA-seq or how much improvement in efficiency or sensitivity to detect
- differential gene expression would be achieved for the added cost and work.
-
- \end_layout
- \begin_layout Standard
- We only found one report that demonstrated that globin reduction significantly
- improved the effective read yields for sequencing of human peripheral blood
- cell RNA using a DeepSAGE protocol
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- The approach to DeepSAGE involves two different restriction enzymes that
- purify and then tag small fragments of transcripts at specific locations
- and thus, significantly reduces the complexity of the transcriptome.
- Therefore, we could not determine how DeepSAGE results would translate
- to the common strategy in the field for assaying the entire transcript
- population by whole-transcriptome 3’-end RNA-seq.
- Furthermore, if globin reduction is necessary, we also needed a globin
- reduction method specific to cynomolgus globin sequences that would work
- an organism for which no kit is available off the shelf.
- \end_layout
- \begin_layout Standard
- As mentioned above, the addition of globin blocking oligos has a very small
- impact on measured expression levels of gene expression.
- However, this is a non-issue for the purposes of differential expression
- testing, since a systematic change in a gene in all samples does not affect
- relative expression levels between samples.
- However, we must acknowledge that simple comparisons of gene expression
- data obtained by GB and non-GB protocols are not possible without additional
- normalization.
-
- \end_layout
- \begin_layout Standard
- More importantly, globin blocking not only nearly doubles the yield of usable
- reads, it also increases inter-sample correlation and sensitivity to detect
- differential gene expression relative to the same set of samples profiled
- without blocking.
- In addition, globin blocking does not add a significant amount of random
- noise to the data.
- Globin blocking thus represents a cost-effective way to squeeze more data
- and statistical power out of the same blood samples and the same amount
- of sequencing.
- In conclusion, globin reduction greatly increases the yield of useful RNA-seq
- reads mapping to the rest of the genome, with minimal perturbations in
- the relative levels of non-globin genes.
- Based on these results, globin transcript reduction using sequence-specific,
- complementary blocking oligonucleotides is recommended for all deep RNA-seq
- of cynomolgus and other nonhuman primate blood samples.
- \end_layout
- \begin_layout Chapter
- Future Directions
- \end_layout
- \begin_layout Itemize
- Study other epigenetic marks in more contexts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- DNA methylation, histone marks, chromatin accessibility & conformation in
- CD4 T-cells
- \end_layout
- \begin_layout Itemize
- Also look at other types lymphocytes: CD8 T-cells, B-cells, NK cells
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Investigate epigenetic regulation of lifespan extension in
- \emph on
- C.
- elegans
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- ChIP-seq of important transcriptional regulators to see how transcriptional
- drift is prevented
- \end_layout
- \end_deeper
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- % Use "References" instead of "Bibliography"
- \end_layout
- \begin_layout Plain Layout
- \backslash
- renewcommand{
- \backslash
- bibname}{References}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Check bib entry formatting & sort order
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset bibtex
- LatexCommand bibtex
- btprint "btPrintCited"
- bibfiles "refs"
- options "bibtotoc,unsrt"
- \end_inset
- \end_layout
- \end_body
- \end_document
|