thesis.lyx 261 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915
  1. #LyX 2.3 created this file. For more info see http://www.lyx.org/
  2. \lyxformat 544
  3. \begin_document
  4. \begin_header
  5. \save_transient_properties true
  6. \origin unavailable
  7. \textclass extbook
  8. \begin_preamble
  9. % List all used files in log output
  10. \listfiles
  11. % Add a DRAFT watermark
  12. \usepackage{draftwatermark}
  13. \SetWatermarkLightness{0.97}
  14. \SetWatermarkScale{1}
  15. % Set up required header format
  16. \usepackage{fancyhdr}
  17. \pagestyle{fancy}
  18. \renewcommand{\headrulewidth}{0pt}
  19. \rhead{}
  20. \lhead{}
  21. \rfoot{}
  22. \lfoot{}
  23. \cfoot{\thepage} % Page number bottom center
  24. % Allow FloatBarrier command
  25. \usepackage{placeins}
  26. % Allow landscape pages
  27. \usepackage{pdflscape}
  28. % Allow doing things after the end of the current page
  29. % (to avoid landscape figures breaking up text)
  30. \usepackage{afterpage}
  31. % This one breaks subfigs so it's disabled
  32. % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
  33. \end_preamble
  34. \use_default_options true
  35. \begin_modules
  36. todonotes
  37. \end_modules
  38. \maintain_unincluded_children false
  39. \language english
  40. \language_package default
  41. \inputencoding utf8
  42. \fontencoding default
  43. \font_roman "default" "default"
  44. \font_sans "default" "default"
  45. \font_typewriter "default" "default"
  46. \font_math "auto" "auto"
  47. \font_default_family default
  48. \use_non_tex_fonts false
  49. \font_sc false
  50. \font_osf false
  51. \font_sf_scale 100 100
  52. \font_tt_scale 100 100
  53. \use_microtype false
  54. \use_dash_ligatures true
  55. \graphics default
  56. \default_output_format pdf4
  57. \output_sync 0
  58. \bibtex_command biber
  59. \index_command default
  60. \paperfontsize 12
  61. \spacing double
  62. \use_hyperref true
  63. \pdf_bookmarks true
  64. \pdf_bookmarksnumbered false
  65. \pdf_bookmarksopen false
  66. \pdf_bookmarksopenlevel 1
  67. \pdf_breaklinks false
  68. \pdf_pdfborder false
  69. \pdf_colorlinks false
  70. \pdf_backref false
  71. \pdf_pdfusetitle true
  72. \papersize letterpaper
  73. \use_geometry true
  74. \use_package amsmath 1
  75. \use_package amssymb 1
  76. \use_package cancel 1
  77. \use_package esint 1
  78. \use_package mathdots 1
  79. \use_package mathtools 1
  80. \use_package mhchem 1
  81. \use_package stackrel 1
  82. \use_package stmaryrd 1
  83. \use_package undertilde 1
  84. \cite_engine biblatex
  85. \cite_engine_type authoryear
  86. \biblio_style plain
  87. \biblatex_bibstyle authoryear
  88. \biblatex_citestyle numeric
  89. \use_bibtopic false
  90. \use_indices false
  91. \paperorientation portrait
  92. \suppress_date false
  93. \justification true
  94. \use_refstyle 1
  95. \use_minted 0
  96. \index Index
  97. \shortcut idx
  98. \color #008000
  99. \end_index
  100. \leftmargin 1.5in
  101. \topmargin 1in
  102. \rightmargin 1in
  103. \bottommargin 1in
  104. \secnumdepth 3
  105. \tocdepth 3
  106. \paragraph_separation indent
  107. \paragraph_indentation default
  108. \is_math_indent 0
  109. \math_numbering_side default
  110. \quotes_style english
  111. \dynamic_quotes 0
  112. \papercolumns 1
  113. \papersides 1
  114. \paperpagestyle default
  115. \tracking_changes false
  116. \output_changes false
  117. \html_math_output 0
  118. \html_css_as_file 0
  119. \html_be_strict false
  120. \end_header
  121. \begin_body
  122. \begin_layout Title
  123. Bioinformatic analysis of complex, high-throughput genomic and epigenomic
  124. data in the context of immunology and transplant rejection
  125. \end_layout
  126. \begin_layout Author
  127. A thesis presented
  128. \begin_inset Newline newline
  129. \end_inset
  130. by
  131. \begin_inset Newline newline
  132. \end_inset
  133. Ryan C.
  134. Thompson
  135. \begin_inset Newline newline
  136. \end_inset
  137. to
  138. \begin_inset Newline newline
  139. \end_inset
  140. The Scripps Research Institute Graduate Program
  141. \begin_inset Newline newline
  142. \end_inset
  143. in partial fulfillment of the requirements for the degree of
  144. \begin_inset Newline newline
  145. \end_inset
  146. Doctor of Philosophy in the subject of Biology
  147. \begin_inset Newline newline
  148. \end_inset
  149. for
  150. \begin_inset Newline newline
  151. \end_inset
  152. The Scripps Research Institute
  153. \begin_inset Newline newline
  154. \end_inset
  155. La Jolla, California
  156. \end_layout
  157. \begin_layout Date
  158. October 2019
  159. \end_layout
  160. \begin_layout Standard
  161. [Copyright notice]
  162. \end_layout
  163. \begin_layout Standard
  164. [Thesis acceptance form]
  165. \end_layout
  166. \begin_layout Standard
  167. [Dedication]
  168. \end_layout
  169. \begin_layout Standard
  170. [Acknowledgements]
  171. \end_layout
  172. \begin_layout Standard
  173. \begin_inset CommandInset toc
  174. LatexCommand tableofcontents
  175. \end_inset
  176. \end_layout
  177. \begin_layout Standard
  178. \begin_inset FloatList table
  179. \end_inset
  180. \end_layout
  181. \begin_layout Standard
  182. \begin_inset FloatList figure
  183. \end_inset
  184. \end_layout
  185. \begin_layout Standard
  186. [List of Abbreviations]
  187. \end_layout
  188. \begin_layout List of TODOs
  189. \end_layout
  190. \begin_layout Standard
  191. \begin_inset Flex TODO Note (inline)
  192. status open
  193. \begin_layout Plain Layout
  194. Check all figures to make sure they fit on the page with their legends.
  195. \end_layout
  196. \end_inset
  197. \end_layout
  198. \begin_layout Standard
  199. \begin_inset Flex TODO Note (inline)
  200. status open
  201. \begin_layout Plain Layout
  202. Search and replace: naive -> naïve
  203. \end_layout
  204. \end_inset
  205. \end_layout
  206. \begin_layout Standard
  207. \begin_inset Flex TODO Note (inline)
  208. status open
  209. \begin_layout Plain Layout
  210. Look into auto-generated nomenclature list: https://wiki.lyx.org/Tips/Nomenclature.
  211. Otherwise, do a manual pass for all abbreviations.
  212. Do nomenclature/abbreviations independently for each chapter.
  213. \end_layout
  214. \end_inset
  215. \end_layout
  216. \begin_layout Standard
  217. \begin_inset Flex TODO Note (inline)
  218. status open
  219. \begin_layout Plain Layout
  220. Make all descriptions consistent in terms of
  221. \begin_inset Quotes eld
  222. \end_inset
  223. we did X
  224. \begin_inset Quotes erd
  225. \end_inset
  226. vs
  227. \begin_inset Quotes eld
  228. \end_inset
  229. X was done
  230. \begin_inset Quotes erd
  231. \end_inset
  232. .
  233. \end_layout
  234. \end_inset
  235. \end_layout
  236. \begin_layout Chapter*
  237. Abstract
  238. \end_layout
  239. \begin_layout Standard
  240. \begin_inset Note Note
  241. status open
  242. \begin_layout Plain Layout
  243. It is included as an integral part of the thesis and should immediately
  244. precede the introduction.
  245. \end_layout
  246. \begin_layout Plain Layout
  247. Preparing your Abstract.
  248. Your abstract (a succinct description of your work) is limited to 350 words.
  249. UMI will shorten it if they must; please do not exceed the limit.
  250. \end_layout
  251. \begin_layout Itemize
  252. Include pertinent place names, names of persons (in full), and other proper
  253. nouns.
  254. These are useful in automated retrieval.
  255. \end_layout
  256. \begin_layout Itemize
  257. Display symbols, as well as foreign words and phrases, clearly and accurately.
  258. Include transliterations for characters other than Roman and Greek letters
  259. and Arabic numerals.
  260. Include accents and diacritical marks.
  261. \end_layout
  262. \begin_layout Itemize
  263. Do not include graphs, charts, tables, or illustrations in your abstract.
  264. \end_layout
  265. \end_inset
  266. \end_layout
  267. \begin_layout Chapter
  268. Introduction
  269. \end_layout
  270. \begin_layout Section
  271. Background & Significance
  272. \end_layout
  273. \begin_layout Subsection
  274. Biological motivation
  275. \end_layout
  276. \begin_layout Itemize
  277. Rejection is the major long-term threat to organ and tissue grafts
  278. \end_layout
  279. \begin_deeper
  280. \begin_layout Itemize
  281. Common mechanisms of rejection
  282. \end_layout
  283. \begin_layout Itemize
  284. Effective immune suppression requires monitoring for rejection and tuning
  285. \end_layout
  286. \begin_layout Itemize
  287. Current tests for rejection (tissue biopsy) are invasive and biased
  288. \end_layout
  289. \begin_layout Itemize
  290. A blood test based on microarrays would be less biased and invasive
  291. \end_layout
  292. \end_deeper
  293. \begin_layout Itemize
  294. Memory cells are resistant to immune suppression
  295. \end_layout
  296. \begin_deeper
  297. \begin_layout Itemize
  298. Mechanisms of resistance in memory cells are poorly understood
  299. \end_layout
  300. \begin_layout Itemize
  301. A better understanding of immune memory formation is needed
  302. \end_layout
  303. \end_deeper
  304. \begin_layout Itemize
  305. Mesenchymal stem cell infusion is a promising new treatment to prevent/delay
  306. rejection
  307. \end_layout
  308. \begin_deeper
  309. \begin_layout Itemize
  310. Demonstrated in mice, but not yet in primates
  311. \end_layout
  312. \begin_layout Itemize
  313. Mechanism currently unknown, but MSC are known to be immune modulatory
  314. \end_layout
  315. \end_deeper
  316. \begin_layout Subsection
  317. Overview of bioinformatic analysis methods
  318. \end_layout
  319. \begin_layout Standard
  320. An overview of all the methods used, including what problem they solve,
  321. what assumptions they make, and a basic description of how they work.
  322. \end_layout
  323. \begin_layout Itemize
  324. ChIP-seq Peak calling
  325. \end_layout
  326. \begin_deeper
  327. \begin_layout Itemize
  328. Cross-correlation analysis to determine fragment size
  329. \end_layout
  330. \begin_layout Itemize
  331. Broad vs narrow peaks
  332. \end_layout
  333. \begin_layout Itemize
  334. SICER for broad peaks
  335. \end_layout
  336. \begin_layout Itemize
  337. IDR for biologically reproducible peaks
  338. \end_layout
  339. \begin_layout Itemize
  340. csaw peak filtering guidelines for unbiased downstream analysis
  341. \end_layout
  342. \end_deeper
  343. \begin_layout Itemize
  344. Normalization is non-trivial and application-dependant
  345. \end_layout
  346. \begin_deeper
  347. \begin_layout Itemize
  348. Expression arrays: RMA & fRMA; why fRMA is needed
  349. \end_layout
  350. \begin_layout Itemize
  351. Methylation arrays: M-value transformation approximates normal data but
  352. induces heteroskedasticity
  353. \end_layout
  354. \begin_layout Itemize
  355. RNA-seq: normalize based on assumption that the average gene is not changing
  356. \end_layout
  357. \begin_layout Itemize
  358. ChIP-seq: complex with many considerations, dependent on experimental methods,
  359. biological system, and analysis goals
  360. \end_layout
  361. \end_deeper
  362. \begin_layout Itemize
  363. Limma: The standard linear modeling framework for genomics
  364. \end_layout
  365. \begin_deeper
  366. \begin_layout Itemize
  367. empirical Bayes variance modeling: limma's core feature
  368. \end_layout
  369. \begin_layout Itemize
  370. edgeR & DESeq2: Extend with negative bonomial GLM for RNA-seq and other
  371. count data
  372. \end_layout
  373. \begin_layout Itemize
  374. voom: Extend with precision weights to model mean-variance trend
  375. \end_layout
  376. \begin_layout Itemize
  377. arrayWeights and duplicateCorrelation to handle complex variance structures
  378. \end_layout
  379. \end_deeper
  380. \begin_layout Itemize
  381. sva and ComBat for batch correction
  382. \end_layout
  383. \begin_layout Itemize
  384. Factor analysis: PCA, MDS, MOFA
  385. \end_layout
  386. \begin_deeper
  387. \begin_layout Itemize
  388. Batch-corrected PCA is informative, but careful application is required
  389. to avoid bias
  390. \end_layout
  391. \end_deeper
  392. \begin_layout Itemize
  393. Gene set analysis: camera and SPIA
  394. \end_layout
  395. \begin_layout Section
  396. Innovation
  397. \end_layout
  398. \begin_layout Itemize
  399. MSC infusion to improve transplant outcomes (prevent/delay rejection)
  400. \end_layout
  401. \begin_deeper
  402. \begin_layout Itemize
  403. Characterize MSC response to interferon gamma
  404. \end_layout
  405. \begin_layout Itemize
  406. IFN-g is thought to stimulate their function
  407. \end_layout
  408. \begin_layout Itemize
  409. Test IFN-g treated MSC infusion as a therapy to delay graft rejection in
  410. cynomolgus monkeys
  411. \end_layout
  412. \begin_layout Itemize
  413. Monitor animals post-transplant using blood RNA-seq at serial time points
  414. \end_layout
  415. \end_deeper
  416. \begin_layout Itemize
  417. Investigate dynamics of histone marks in CD4 T-cell activation and memory
  418. \end_layout
  419. \begin_deeper
  420. \begin_layout Itemize
  421. Previous studies have looked at single snapshots of histone marks
  422. \end_layout
  423. \begin_layout Itemize
  424. Instead, look at changes in histone marks across activation and memory
  425. \end_layout
  426. \end_deeper
  427. \begin_layout Itemize
  428. High-throughput sequencing and microarray technologies
  429. \end_layout
  430. \begin_deeper
  431. \begin_layout Itemize
  432. Powerful methods for assaying gene expression and epigenetics across entire
  433. genomes
  434. \end_layout
  435. \begin_layout Itemize
  436. Proper analysis requires finding and exploiting systematic genome-wide trends
  437. \end_layout
  438. \end_deeper
  439. \begin_layout Chapter
  440. Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
  441. in naive and memory CD4 T-cell activation
  442. \end_layout
  443. \begin_layout Standard
  444. \begin_inset Flex TODO Note (inline)
  445. status open
  446. \begin_layout Plain Layout
  447. Chapter author list: Me, Sarah, Dan
  448. \end_layout
  449. \end_inset
  450. \end_layout
  451. \begin_layout Standard
  452. \begin_inset Flex TODO Note (inline)
  453. status open
  454. \begin_layout Plain Layout
  455. Need better section titles throughout the entire chapter
  456. \end_layout
  457. \end_inset
  458. \end_layout
  459. \begin_layout Section
  460. Approach
  461. \end_layout
  462. \begin_layout Itemize
  463. CD4 T-cells are central to all adaptive immune responses and memory
  464. \end_layout
  465. \begin_layout Itemize
  466. H3K4 and H3K27 methylation are major epigenetic regulators of gene expression
  467. \end_layout
  468. \begin_layout Itemize
  469. Canonically, H3K4 is activating and H3K27 is inhibitory, but the reality
  470. is complex
  471. \end_layout
  472. \begin_layout Itemize
  473. Looking at these marks during CD4 activation and memory should reveal new
  474. mechanistic details
  475. \end_layout
  476. \begin_layout Itemize
  477. Test
  478. \begin_inset Quotes eld
  479. \end_inset
  480. poised promoter
  481. \begin_inset Quotes erd
  482. \end_inset
  483. hypothesis in which H3K4 and H3K27 are both methylated
  484. \end_layout
  485. \begin_layout Itemize
  486. Expand scope of analysis beyond simple promoter counts
  487. \end_layout
  488. \begin_deeper
  489. \begin_layout Itemize
  490. Analyze peaks genome-wide, including in intergenic regions
  491. \end_layout
  492. \begin_layout Itemize
  493. Analysis of coverage distribution shape within promoters, e.g.
  494. upstream vs downstream coverage
  495. \end_layout
  496. \end_deeper
  497. \begin_layout Section
  498. Methods
  499. \end_layout
  500. \begin_layout Standard
  501. \begin_inset Flex TODO Note (inline)
  502. status open
  503. \begin_layout Plain Layout
  504. Look up some more details from the papers (e.g.
  505. activation method).
  506. \end_layout
  507. \end_inset
  508. \end_layout
  509. \begin_layout Standard
  510. A reproducible workflow was written to analyze the raw ChIP-seq and RNA-seq
  511. data from previous studies
  512. \begin_inset CommandInset citation
  513. LatexCommand cite
  514. key "LaMere2016,LaMere2017,gh-cd4-csaw"
  515. literal "true"
  516. \end_inset
  517. .
  518. Briefly, this data consists of RNA-seq and ChIP-seq from CD4 T-cells cultured
  519. from 4 donors.
  520. From each donor, naive and memory CD4 T-cells were isolated separately.
  521. Then cultures of both cells were activated [how?], and samples were taken
  522. at 4 time points: Day 0 (pre-activation), Day 1 (early activation), Day
  523. 5 (peak activation), and Day 14 (post-activation).
  524. For each combination of cell type and time point, RNA was isolated, and
  525. ChIP-seq was performed for each of 3 histone marks: H3K4me2, H3K4me3, and
  526. H3K27me3.
  527. The ChIP-seq input was also sequenced for each sample.
  528. The result was 32 samples for each assay.
  529. \end_layout
  530. \begin_layout Subsection
  531. ChIP-seq alignment and peak calling
  532. \end_layout
  533. \begin_layout Standard
  534. \begin_inset Flex TODO Note (inline)
  535. status open
  536. \begin_layout Plain Layout
  537. All info from this subsection belongs in other subsections.
  538. \end_layout
  539. \end_inset
  540. \end_layout
  541. \begin_layout Standard
  542. Sequence reads were retrieved from the Sequence Read Archive (SRA)
  543. \begin_inset CommandInset citation
  544. LatexCommand cite
  545. key "Leinonen2011"
  546. literal "false"
  547. \end_inset
  548. .
  549. ChIP-seq (and input) reads were aligned to CRCh38 genome assembly using
  550. Bowtie 2
  551. \begin_inset CommandInset citation
  552. LatexCommand cite
  553. key "Langmead2012,Schneider2017,gh-hg38-ref"
  554. literal "false"
  555. \end_inset
  556. .
  557. Artifact regions were annotated using a custom implementation of the GreyListCh
  558. IP algorithm, and these
  559. \begin_inset Quotes eld
  560. \end_inset
  561. greylists
  562. \begin_inset Quotes erd
  563. \end_inset
  564. were merged with the ENCODE blacklist
  565. \begin_inset CommandInset citation
  566. LatexCommand cite
  567. key "greylistchip,Amemiya2019,Dunham2012"
  568. literal "false"
  569. \end_inset
  570. .
  571. Any read or peak overlapping one of these regions was regarded as artifactual
  572. and excluded from downstream analyses.
  573. \end_layout
  574. \begin_layout Standard
  575. Peaks are called using epic, an implementation of the SICER algorithm
  576. \begin_inset CommandInset citation
  577. LatexCommand cite
  578. key "Zang2009,gh-epic"
  579. literal "false"
  580. \end_inset
  581. .
  582. Peaks are also called separately using MACS, but MACS was determined to
  583. be a poor fit for the data, and these peak calls are not used in any further
  584. analyses
  585. \begin_inset CommandInset citation
  586. LatexCommand cite
  587. key "Zhang2008"
  588. literal "false"
  589. \end_inset
  590. .
  591. \end_layout
  592. \begin_layout Subsection
  593. RNA-seq align+quant method comparison
  594. \end_layout
  595. \begin_layout Standard
  596. \begin_inset Note Note
  597. status open
  598. \begin_layout Plain Layout
  599. \begin_inset Float figure
  600. wide false
  601. sideways false
  602. status open
  603. \begin_layout Plain Layout
  604. \align center
  605. \begin_inset Float figure
  606. wide false
  607. sideways false
  608. status collapsed
  609. \begin_layout Plain Layout
  610. \align center
  611. \begin_inset Graphics
  612. filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
  613. lyxscale 25
  614. width 35col%
  615. groupId rna-comp-subfig
  616. \end_inset
  617. \end_layout
  618. \begin_layout Plain Layout
  619. \begin_inset Caption Standard
  620. \begin_layout Plain Layout
  621. STAR quantification, Entrez vs Ensembl gene annotation
  622. \end_layout
  623. \end_inset
  624. \end_layout
  625. \end_inset
  626. \begin_inset space \qquad{}
  627. \end_inset
  628. \begin_inset Float figure
  629. wide false
  630. sideways false
  631. status collapsed
  632. \begin_layout Plain Layout
  633. \align center
  634. \begin_inset Graphics
  635. filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
  636. lyxscale 25
  637. width 35col%
  638. groupId rna-comp-subfig
  639. \end_inset
  640. \end_layout
  641. \begin_layout Plain Layout
  642. \begin_inset Caption Standard
  643. \begin_layout Plain Layout
  644. Salmon+Shoal quantification, Entrez vs Ensembl gene annotation
  645. \end_layout
  646. \end_inset
  647. \end_layout
  648. \end_inset
  649. \end_layout
  650. \begin_layout Plain Layout
  651. \align center
  652. \begin_inset Float figure
  653. wide false
  654. sideways false
  655. status collapsed
  656. \begin_layout Plain Layout
  657. \align center
  658. \begin_inset Graphics
  659. filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
  660. lyxscale 25
  661. width 35col%
  662. groupId rna-comp-subfig
  663. \end_inset
  664. \end_layout
  665. \begin_layout Plain Layout
  666. \begin_inset Caption Standard
  667. \begin_layout Plain Layout
  668. STAR vs HISAT2 quantification, Ensembl gene annotation
  669. \end_layout
  670. \end_inset
  671. \end_layout
  672. \end_inset
  673. \begin_inset space \qquad{}
  674. \end_inset
  675. \begin_inset Float figure
  676. wide false
  677. sideways false
  678. status collapsed
  679. \begin_layout Plain Layout
  680. \align center
  681. \begin_inset Graphics
  682. filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
  683. lyxscale 25
  684. width 35col%
  685. groupId rna-comp-subfig
  686. \end_inset
  687. \end_layout
  688. \begin_layout Plain Layout
  689. \begin_inset Caption Standard
  690. \begin_layout Plain Layout
  691. Salomn vs STAR quantification, Ensembl gene annotation
  692. \end_layout
  693. \end_inset
  694. \end_layout
  695. \end_inset
  696. \end_layout
  697. \begin_layout Plain Layout
  698. \align center
  699. \begin_inset Float figure
  700. wide false
  701. sideways false
  702. status collapsed
  703. \begin_layout Plain Layout
  704. \align center
  705. \begin_inset Graphics
  706. filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
  707. lyxscale 25
  708. width 35col%
  709. groupId rna-comp-subfig
  710. \end_inset
  711. \end_layout
  712. \begin_layout Plain Layout
  713. \begin_inset Caption Standard
  714. \begin_layout Plain Layout
  715. Salmon vs Kallisto quantification, Ensembl gene annotation
  716. \end_layout
  717. \end_inset
  718. \end_layout
  719. \end_inset
  720. \begin_inset space \qquad{}
  721. \end_inset
  722. \begin_inset Float figure
  723. wide false
  724. sideways false
  725. status collapsed
  726. \begin_layout Plain Layout
  727. \align center
  728. \begin_inset Graphics
  729. filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
  730. lyxscale 25
  731. width 35col%
  732. groupId rna-comp-subfig
  733. \end_inset
  734. \end_layout
  735. \begin_layout Plain Layout
  736. \begin_inset Caption Standard
  737. \begin_layout Plain Layout
  738. Salmon+Shoal vs Salmon alone, Ensembl gene annotation
  739. \end_layout
  740. \end_inset
  741. \end_layout
  742. \end_inset
  743. \end_layout
  744. \begin_layout Plain Layout
  745. \begin_inset Caption Standard
  746. \begin_layout Plain Layout
  747. \begin_inset CommandInset label
  748. LatexCommand label
  749. name "fig:RNA-norm-comp"
  750. \end_inset
  751. RNA-seq comparisons
  752. \end_layout
  753. \end_inset
  754. \end_layout
  755. \end_inset
  756. \end_layout
  757. \end_inset
  758. \end_layout
  759. \begin_layout Itemize
  760. Ultimately selected shoal as quantification, Ensembl as annotation.
  761. Why? Running downstream analyses with all quant methods and both annotations
  762. showed very little practical difference, so choice was not terribly important.
  763. Prefer shoal due to theoretical advantages.
  764. To note in discussion: reproducible workflow made it easy to do this, enabling
  765. an informed decision.
  766. \end_layout
  767. \begin_layout Subsection
  768. RNA-seq has a large confounding batch effect
  769. \end_layout
  770. \begin_layout Standard
  771. \begin_inset Float figure
  772. wide false
  773. sideways false
  774. status open
  775. \begin_layout Plain Layout
  776. \begin_inset Flex TODO Note (inline)
  777. status open
  778. \begin_layout Plain Layout
  779. Just take the top row
  780. \end_layout
  781. \end_inset
  782. \end_layout
  783. \begin_layout Plain Layout
  784. \align center
  785. \begin_inset Graphics
  786. filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
  787. lyxscale 25
  788. width 100col%
  789. groupId colwidth-raster
  790. \end_inset
  791. \end_layout
  792. \begin_layout Plain Layout
  793. \begin_inset Caption Standard
  794. \begin_layout Plain Layout
  795. \series bold
  796. \begin_inset CommandInset label
  797. LatexCommand label
  798. name "fig:RNA-seq-weights-vs-covars"
  799. \end_inset
  800. RNA-seq sample weights, grouped by experimental and technical covariates.
  801. \end_layout
  802. \end_inset
  803. \end_layout
  804. \end_inset
  805. \end_layout
  806. \begin_layout Itemize
  807. Batch 1 is garbage quality.
  808. Analyses involving batch 1 samples are expected to yield poor statistical
  809. power.
  810. \end_layout
  811. \begin_layout Standard
  812. \begin_inset Float figure
  813. wide false
  814. sideways false
  815. status open
  816. \begin_layout Plain Layout
  817. \align center
  818. \begin_inset Float figure
  819. wide false
  820. sideways false
  821. status open
  822. \begin_layout Plain Layout
  823. \align center
  824. \begin_inset Graphics
  825. filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
  826. lyxscale 25
  827. width 75col%
  828. groupId rna-pca-subfig
  829. \end_inset
  830. \end_layout
  831. \begin_layout Plain Layout
  832. \begin_inset Caption Standard
  833. \begin_layout Plain Layout
  834. \series bold
  835. \begin_inset CommandInset label
  836. LatexCommand label
  837. name "fig:RNA-PCA-no-batchsub"
  838. \end_inset
  839. Before batch correction
  840. \end_layout
  841. \end_inset
  842. \end_layout
  843. \end_inset
  844. \end_layout
  845. \begin_layout Plain Layout
  846. \align center
  847. \begin_inset Float figure
  848. wide false
  849. sideways false
  850. status open
  851. \begin_layout Plain Layout
  852. \align center
  853. \begin_inset Graphics
  854. filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
  855. lyxscale 25
  856. width 75col%
  857. groupId rna-pca-subfig
  858. \end_inset
  859. \end_layout
  860. \begin_layout Plain Layout
  861. \begin_inset Caption Standard
  862. \begin_layout Plain Layout
  863. \series bold
  864. \begin_inset CommandInset label
  865. LatexCommand label
  866. name "fig:RNA-PCA-ComBat-batchsub"
  867. \end_inset
  868. After batch correction with ComBat
  869. \end_layout
  870. \end_inset
  871. \end_layout
  872. \end_inset
  873. \end_layout
  874. \begin_layout Plain Layout
  875. \begin_inset Caption Standard
  876. \begin_layout Plain Layout
  877. \series bold
  878. \begin_inset CommandInset label
  879. LatexCommand label
  880. name "fig:RNA-PCA"
  881. \end_inset
  882. PCoA plots of RNA-seq data showing effect of batch correction.
  883. \end_layout
  884. \end_inset
  885. \end_layout
  886. \end_inset
  887. \end_layout
  888. \begin_layout Itemize
  889. RNA-seq batch effect can be partially corrected, but still induces uncorrectable
  890. biases in downstream analysis
  891. \end_layout
  892. \begin_layout Subsection
  893. ChIP-seq blacklisting is important
  894. \end_layout
  895. \begin_layout Standard
  896. \begin_inset Float figure
  897. wide false
  898. sideways false
  899. status open
  900. \begin_layout Plain Layout
  901. \align center
  902. \begin_inset Float figure
  903. wide false
  904. sideways false
  905. status open
  906. \begin_layout Plain Layout
  907. \align center
  908. \begin_inset Graphics
  909. filename graphics/CD4-csaw/csaw/CCF-plots-PAGE2-CROP.pdf
  910. lyxscale 50
  911. height 40theight%
  912. groupId ccf-subfig
  913. \end_inset
  914. \end_layout
  915. \begin_layout Plain Layout
  916. \begin_inset Caption Standard
  917. \begin_layout Plain Layout
  918. \series bold
  919. \begin_inset CommandInset label
  920. LatexCommand label
  921. name "fig:CCF-with-blacklist"
  922. \end_inset
  923. Cross-correlation plots with blacklisted reads removed
  924. \end_layout
  925. \end_inset
  926. \end_layout
  927. \end_inset
  928. \end_layout
  929. \begin_layout Plain Layout
  930. \align center
  931. \begin_inset Float figure
  932. wide false
  933. sideways false
  934. status open
  935. \begin_layout Plain Layout
  936. \align center
  937. \begin_inset Graphics
  938. filename graphics/CD4-csaw/csaw/CCF-plots-noBL-PAGE2-CROP.pdf
  939. lyxscale 50
  940. height 40theight%
  941. groupId ccf-subfig
  942. \end_inset
  943. \end_layout
  944. \begin_layout Plain Layout
  945. \begin_inset Caption Standard
  946. \begin_layout Plain Layout
  947. \series bold
  948. \begin_inset CommandInset label
  949. LatexCommand label
  950. name "fig:CCF-without-blacklist"
  951. \end_inset
  952. Cross-correlation plots without removing blacklisted reads
  953. \end_layout
  954. \end_inset
  955. \end_layout
  956. \end_inset
  957. \end_layout
  958. \begin_layout Plain Layout
  959. \begin_inset Caption Standard
  960. \begin_layout Plain Layout
  961. \series bold
  962. \begin_inset CommandInset label
  963. LatexCommand label
  964. name "fig:CCF-master"
  965. \end_inset
  966. Strand cross-correlation plots for ChIP-seq data.
  967. \end_layout
  968. \end_inset
  969. \end_layout
  970. \end_inset
  971. \end_layout
  972. \begin_layout Subsection
  973. ChIP-seq peak calling
  974. \end_layout
  975. \begin_layout Standard
  976. \begin_inset Note Note
  977. status open
  978. \begin_layout Plain Layout
  979. \begin_inset Float figure
  980. wide false
  981. sideways false
  982. status open
  983. \begin_layout Plain Layout
  984. \align center
  985. \begin_inset Float figure
  986. wide false
  987. sideways false
  988. status collapsed
  989. \begin_layout Plain Layout
  990. \align center
  991. \begin_inset Graphics
  992. filename graphics/CD4-csaw/IDR/D4659vsD5053_epic-PAGE1-CROP.pdf
  993. lyxscale 50
  994. width 45col%
  995. groupId idr-rc-subfig
  996. \end_inset
  997. \end_layout
  998. \begin_layout Plain Layout
  999. \begin_inset Caption Standard
  1000. \begin_layout Plain Layout
  1001. Peak ranks from SICER peak caller
  1002. \end_layout
  1003. \end_inset
  1004. \end_layout
  1005. \end_inset
  1006. \begin_inset space \hfill{}
  1007. \end_inset
  1008. \begin_inset Float figure
  1009. wide false
  1010. sideways false
  1011. status collapsed
  1012. \begin_layout Plain Layout
  1013. \align center
  1014. \begin_inset Graphics
  1015. filename graphics/CD4-csaw/IDR/D4659vsD5053_macs-PAGE1-CROP.pdf
  1016. lyxscale 50
  1017. width 45col%
  1018. groupId idr-rc-subfig
  1019. \end_inset
  1020. \end_layout
  1021. \begin_layout Plain Layout
  1022. \begin_inset Caption Standard
  1023. \begin_layout Plain Layout
  1024. Peak ranks from MACS peak caller
  1025. \end_layout
  1026. \end_inset
  1027. \end_layout
  1028. \end_inset
  1029. \end_layout
  1030. \begin_layout Plain Layout
  1031. \begin_inset Caption Standard
  1032. \begin_layout Plain Layout
  1033. \series bold
  1034. \begin_inset CommandInset label
  1035. LatexCommand label
  1036. name "fig:IDR-rank-consist"
  1037. \end_inset
  1038. Irreproducible Discovery Rate rank consistency plots for H3K27me3.
  1039. \series default
  1040. Peaks are ranked by the scores assigned by the peak caller in each donor,
  1041. and then the ranks for two donors are plotted against each other.
  1042. Higher ranks are more significant (top right).
  1043. Peaks meeting various thresholds of reproducibility, measured by the irreproduc
  1044. ible discovery rate (IDR), are shaded accordingly.
  1045. [This could be explained better, or refer to the text.]
  1046. \end_layout
  1047. \end_inset
  1048. \end_layout
  1049. \end_inset
  1050. \end_layout
  1051. \end_inset
  1052. \end_layout
  1053. \begin_layout Standard
  1054. [IDR] When the peaks for each donor are ranked according to their scores,
  1055. SICER produces much more reproducible results between donors.
  1056. This is consistent with SICER's stated goal of identifying broad peaks,
  1057. in contrast to MACS, which is designed for identifying sharp peaks.
  1058. Based on this observation, the SICER peak calls were used for all downstream
  1059. analyses that involved ChIP-seq peaks.
  1060. \end_layout
  1061. \begin_layout Subsection
  1062. ChIP-seq normalization
  1063. \end_layout
  1064. \begin_layout Standard
  1065. \begin_inset Note Note
  1066. status open
  1067. \begin_layout Plain Layout
  1068. \begin_inset Float figure
  1069. wide false
  1070. sideways false
  1071. status collapsed
  1072. \begin_layout Plain Layout
  1073. \align center
  1074. \begin_inset Graphics
  1075. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-sample-MAplot-bins-CROP.png
  1076. lyxscale 25
  1077. width 100col%
  1078. groupId colwidth-raster
  1079. \end_inset
  1080. \end_layout
  1081. \begin_layout Plain Layout
  1082. \begin_inset Caption Standard
  1083. \begin_layout Plain Layout
  1084. \series bold
  1085. \begin_inset CommandInset label
  1086. LatexCommand label
  1087. name "fig:MA-plot-bigbins"
  1088. \end_inset
  1089. MA plot of H3K4me2 read counts in 10kb bins for two arbitrary samples.
  1090. \end_layout
  1091. \end_inset
  1092. \end_layout
  1093. \end_inset
  1094. \end_layout
  1095. \end_inset
  1096. \end_layout
  1097. \begin_layout Subsection
  1098. ChIP-seq must be corrected for hidden confounding factors
  1099. \end_layout
  1100. \begin_layout Standard
  1101. \begin_inset Float figure
  1102. wide false
  1103. sideways false
  1104. status open
  1105. \begin_layout Plain Layout
  1106. \begin_inset Float figure
  1107. wide false
  1108. sideways false
  1109. status collapsed
  1110. \begin_layout Plain Layout
  1111. \align center
  1112. \begin_inset Graphics
  1113. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-raw-CROP.png
  1114. lyxscale 25
  1115. width 45col%
  1116. groupId pcoa-subfig
  1117. \end_inset
  1118. \end_layout
  1119. \begin_layout Plain Layout
  1120. \begin_inset Caption Standard
  1121. \begin_layout Plain Layout
  1122. \series bold
  1123. \begin_inset CommandInset label
  1124. LatexCommand label
  1125. name "fig:PCoA-H3K4me2-bad"
  1126. \end_inset
  1127. H3K4me2, no correction
  1128. \end_layout
  1129. \end_inset
  1130. \end_layout
  1131. \end_inset
  1132. \begin_inset space \hfill{}
  1133. \end_inset
  1134. \begin_inset Float figure
  1135. wide false
  1136. sideways false
  1137. status collapsed
  1138. \begin_layout Plain Layout
  1139. \align center
  1140. \begin_inset Graphics
  1141. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-SVsub-CROP.png
  1142. lyxscale 25
  1143. width 45col%
  1144. groupId pcoa-subfig
  1145. \end_inset
  1146. \end_layout
  1147. \begin_layout Plain Layout
  1148. \begin_inset Caption Standard
  1149. \begin_layout Plain Layout
  1150. \series bold
  1151. \begin_inset CommandInset label
  1152. LatexCommand label
  1153. name "fig:PCoA-H3K4me2-good"
  1154. \end_inset
  1155. H3K4me2, SVs subtracted
  1156. \end_layout
  1157. \end_inset
  1158. \end_layout
  1159. \end_inset
  1160. \end_layout
  1161. \begin_layout Plain Layout
  1162. \begin_inset Float figure
  1163. wide false
  1164. sideways false
  1165. status collapsed
  1166. \begin_layout Plain Layout
  1167. \align center
  1168. \begin_inset Graphics
  1169. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-raw-CROP.png
  1170. lyxscale 25
  1171. width 45col%
  1172. groupId pcoa-subfig
  1173. \end_inset
  1174. \end_layout
  1175. \begin_layout Plain Layout
  1176. \begin_inset Caption Standard
  1177. \begin_layout Plain Layout
  1178. \series bold
  1179. \begin_inset CommandInset label
  1180. LatexCommand label
  1181. name "fig:PCoA-H3K4me3-bad"
  1182. \end_inset
  1183. H3K4me3, no correction
  1184. \end_layout
  1185. \end_inset
  1186. \end_layout
  1187. \end_inset
  1188. \begin_inset space \hfill{}
  1189. \end_inset
  1190. \begin_inset Float figure
  1191. wide false
  1192. sideways false
  1193. status collapsed
  1194. \begin_layout Plain Layout
  1195. \align center
  1196. \begin_inset Graphics
  1197. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-SVsub-CROP.png
  1198. lyxscale 25
  1199. width 45col%
  1200. groupId pcoa-subfig
  1201. \end_inset
  1202. \end_layout
  1203. \begin_layout Plain Layout
  1204. \begin_inset Caption Standard
  1205. \begin_layout Plain Layout
  1206. \series bold
  1207. \begin_inset CommandInset label
  1208. LatexCommand label
  1209. name "fig:PCoA-H3K4me3-good"
  1210. \end_inset
  1211. H3K4me3, SVs subtracted
  1212. \end_layout
  1213. \end_inset
  1214. \end_layout
  1215. \end_inset
  1216. \end_layout
  1217. \begin_layout Plain Layout
  1218. \begin_inset Float figure
  1219. wide false
  1220. sideways false
  1221. status collapsed
  1222. \begin_layout Plain Layout
  1223. \align center
  1224. \begin_inset Graphics
  1225. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-raw-CROP.png
  1226. lyxscale 25
  1227. width 45col%
  1228. groupId pcoa-subfig
  1229. \end_inset
  1230. \end_layout
  1231. \begin_layout Plain Layout
  1232. \begin_inset Caption Standard
  1233. \begin_layout Plain Layout
  1234. \series bold
  1235. \begin_inset CommandInset label
  1236. LatexCommand label
  1237. name "fig:PCoA-H3K27me3-bad"
  1238. \end_inset
  1239. H3K27me3, no correction
  1240. \end_layout
  1241. \end_inset
  1242. \end_layout
  1243. \end_inset
  1244. \begin_inset space \hfill{}
  1245. \end_inset
  1246. \begin_inset Float figure
  1247. wide false
  1248. sideways false
  1249. status collapsed
  1250. \begin_layout Plain Layout
  1251. \align center
  1252. \begin_inset Graphics
  1253. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-SVsub-CROP.png
  1254. lyxscale 25
  1255. width 45col%
  1256. groupId pcoa-subfig
  1257. \end_inset
  1258. \end_layout
  1259. \begin_layout Plain Layout
  1260. \begin_inset Caption Standard
  1261. \begin_layout Plain Layout
  1262. \series bold
  1263. \begin_inset CommandInset label
  1264. LatexCommand label
  1265. name "fig:PCoA-H3K27me3-good"
  1266. \end_inset
  1267. H3K27me3, SVs subtracted
  1268. \end_layout
  1269. \end_inset
  1270. \end_layout
  1271. \end_inset
  1272. \end_layout
  1273. \begin_layout Plain Layout
  1274. \begin_inset Caption Standard
  1275. \begin_layout Plain Layout
  1276. \series bold
  1277. \begin_inset CommandInset label
  1278. LatexCommand label
  1279. name "fig:PCoA-ChIP"
  1280. \end_inset
  1281. PCoA plots of ChIP-seq sliding window data, before and after subtracting
  1282. surrogate variables (SVs).
  1283. \end_layout
  1284. \end_inset
  1285. \end_layout
  1286. \begin_layout Plain Layout
  1287. \end_layout
  1288. \end_inset
  1289. \end_layout
  1290. \begin_layout Itemize
  1291. Figures showing BCV plots with and without SVA for each histone mark?
  1292. \end_layout
  1293. \begin_layout Subsection
  1294. MOFA recovers biologically relevant variation from blind analysis by correlating
  1295. across datasets
  1296. \end_layout
  1297. \begin_layout Standard
  1298. \begin_inset ERT
  1299. status open
  1300. \begin_layout Plain Layout
  1301. \backslash
  1302. afterpage{
  1303. \end_layout
  1304. \begin_layout Plain Layout
  1305. \backslash
  1306. begin{landscape}
  1307. \end_layout
  1308. \end_inset
  1309. \end_layout
  1310. \begin_layout Standard
  1311. \begin_inset Float figure
  1312. wide false
  1313. sideways false
  1314. status open
  1315. \begin_layout Plain Layout
  1316. \begin_inset Float figure
  1317. wide false
  1318. sideways false
  1319. status open
  1320. \begin_layout Plain Layout
  1321. \align center
  1322. \begin_inset Graphics
  1323. filename graphics/CD4-csaw/MOFA-varExplaiend-matrix-CROP.png
  1324. lyxscale 25
  1325. width 45col%
  1326. groupId mofa-subfig
  1327. \end_inset
  1328. \end_layout
  1329. \begin_layout Plain Layout
  1330. \begin_inset Caption Standard
  1331. \begin_layout Plain Layout
  1332. \series bold
  1333. \begin_inset CommandInset label
  1334. LatexCommand label
  1335. name "fig:mofa-varexplained"
  1336. \end_inset
  1337. Variance explained in each data set by each latent factor estimated by MOFA.
  1338. \series default
  1339. For each latent factor (LF) learned by MOFA, the variance explained by
  1340. that factor in each data set (
  1341. \begin_inset Quotes eld
  1342. \end_inset
  1343. view
  1344. \begin_inset Quotes erd
  1345. \end_inset
  1346. ) is shown by the shading of the cells in the lower section.
  1347. The upper section shows the total fraction of each data set's variance
  1348. that is explained by all LFs combined.
  1349. \end_layout
  1350. \end_inset
  1351. \end_layout
  1352. \end_inset
  1353. \begin_inset space \hfill{}
  1354. \end_inset
  1355. \begin_inset Float figure
  1356. wide false
  1357. sideways false
  1358. status open
  1359. \begin_layout Plain Layout
  1360. \align center
  1361. \begin_inset Graphics
  1362. filename graphics/CD4-csaw/MOFA-LF-scatter-CROP.png
  1363. lyxscale 25
  1364. width 45col%
  1365. groupId mofa-subfig
  1366. \end_inset
  1367. \end_layout
  1368. \begin_layout Plain Layout
  1369. \begin_inset Caption Standard
  1370. \begin_layout Plain Layout
  1371. \series bold
  1372. \begin_inset CommandInset label
  1373. LatexCommand label
  1374. name "fig:mofa-lf-scatter"
  1375. \end_inset
  1376. Scatter plots of specific pairs of MOFA latent factors.
  1377. \series default
  1378. LFs 1, 4, and 5 explain substantial variation in all data sets, so they
  1379. are plotted against each other in order to reveal patterns of variation
  1380. that are shared across all data sets.
  1381. \end_layout
  1382. \end_inset
  1383. \end_layout
  1384. \end_inset
  1385. \end_layout
  1386. \begin_layout Plain Layout
  1387. \begin_inset Caption Standard
  1388. \begin_layout Plain Layout
  1389. \series bold
  1390. \begin_inset CommandInset label
  1391. LatexCommand label
  1392. name "fig:MOFA-master"
  1393. \end_inset
  1394. MOFA latent factors separate technical confounders from
  1395. \end_layout
  1396. \end_inset
  1397. \end_layout
  1398. \end_inset
  1399. \end_layout
  1400. \begin_layout Standard
  1401. \begin_inset ERT
  1402. status open
  1403. \begin_layout Plain Layout
  1404. \backslash
  1405. end{landscape}
  1406. \end_layout
  1407. \begin_layout Plain Layout
  1408. }
  1409. \end_layout
  1410. \end_inset
  1411. \end_layout
  1412. \begin_layout Itemize
  1413. Figure
  1414. \begin_inset CommandInset ref
  1415. LatexCommand ref
  1416. reference "fig:mofa-varexplained"
  1417. plural "false"
  1418. caps "false"
  1419. noprefix "false"
  1420. \end_inset
  1421. shows that LF1, 4, and 5 explain substantial var in all data sets
  1422. \end_layout
  1423. \begin_layout Itemize
  1424. Figure
  1425. \begin_inset CommandInset ref
  1426. LatexCommand ref
  1427. reference "fig:mofa-lf-scatter"
  1428. plural "false"
  1429. caps "false"
  1430. noprefix "false"
  1431. \end_inset
  1432. shows that those same 3 LFs, (1, 4, & 5) also correlate best with the experimen
  1433. tal factors (cell type & time point)
  1434. \end_layout
  1435. \begin_layout Itemize
  1436. LF2 is clearly the RNA-seq batch effect
  1437. \end_layout
  1438. \begin_layout Standard
  1439. \begin_inset Note Note
  1440. status open
  1441. \begin_layout Plain Layout
  1442. \begin_inset Float figure
  1443. wide false
  1444. sideways false
  1445. status open
  1446. \begin_layout Plain Layout
  1447. \align center
  1448. \begin_inset Graphics
  1449. filename graphics/CD4-csaw/MOFA-batch-correct-CROP.png
  1450. lyxscale 25
  1451. width 100col%
  1452. groupId colwidth-raster
  1453. \end_inset
  1454. \end_layout
  1455. \begin_layout Plain Layout
  1456. \begin_inset Caption Standard
  1457. \begin_layout Plain Layout
  1458. \series bold
  1459. \begin_inset CommandInset label
  1460. LatexCommand label
  1461. name "fig:mofa-batchsub"
  1462. \end_inset
  1463. Result of RNA-seq batch-correction using MOFA latent factors
  1464. \end_layout
  1465. \end_inset
  1466. \end_layout
  1467. \end_inset
  1468. \end_layout
  1469. \end_inset
  1470. \end_layout
  1471. \begin_layout Itemize
  1472. Attempting to remove the effect of LF2 results in batch correction comparable
  1473. to ComBat (Figure
  1474. \begin_inset CommandInset ref
  1475. LatexCommand ref
  1476. reference "fig:RNA-PCA-ComBat-batchsub"
  1477. plural "false"
  1478. caps "false"
  1479. noprefix "false"
  1480. \end_inset
  1481. )
  1482. \end_layout
  1483. \begin_layout Itemize
  1484. MOFA was able to do this batch subtraction without directly using the sample
  1485. labels (sample labels were used implicitly to select which factor to subtract)
  1486. \end_layout
  1487. \begin_layout Itemize
  1488. Similarity of results shows that batch correction can't get much better
  1489. than ComBat (despite ComBat ignoring time point)
  1490. \end_layout
  1491. \begin_layout Subsection
  1492. MOFA does some interesting stuff but is mostly confirmatory in this context
  1493. \end_layout
  1494. \begin_layout Standard
  1495. \begin_inset Flex TODO Note (inline)
  1496. status open
  1497. \begin_layout Plain Layout
  1498. MOFA should be a footnote to something else, not its own point
  1499. \end_layout
  1500. \end_inset
  1501. \end_layout
  1502. \begin_layout Standard
  1503. \begin_inset Flex TODO Note (inline)
  1504. status open
  1505. \begin_layout Plain Layout
  1506. Combine with previous subsection
  1507. \end_layout
  1508. \end_inset
  1509. \end_layout
  1510. \begin_layout Itemize
  1511. MOFA shows great promise for accelerating discovery of major biological
  1512. effects in multi-omics datasets
  1513. \end_layout
  1514. \begin_deeper
  1515. \begin_layout Itemize
  1516. MOFA successfully separates biologically relevant patterns of variation
  1517. from technical confounding factors without knowing the sample labels, by
  1518. finding latent factors that explain variation across multiple data sets.
  1519. \end_layout
  1520. \begin_layout Itemize
  1521. MOFA was added to this analysis late and played primarily a confirmatory
  1522. role, but it was able to confirm earlier conclusions with much less prior
  1523. information (no sample labels) and much less analyst effort/input
  1524. \end_layout
  1525. \begin_layout Itemize
  1526. Less input from analyst means less opportunity to introduce unwanted bias
  1527. into results
  1528. \end_layout
  1529. \begin_layout Itemize
  1530. MOFA confirmed that the already-implemented batch correction in the RNA-seq
  1531. data was already performing as well as possible given the limitations of
  1532. the data
  1533. \end_layout
  1534. \end_deeper
  1535. \begin_layout Section
  1536. Results
  1537. \end_layout
  1538. \begin_layout Standard
  1539. \begin_inset Flex TODO Note (inline)
  1540. status open
  1541. \begin_layout Plain Layout
  1542. Focus on what hypotheses were tested, then select figures that show how
  1543. those hypotheses were tested, even if the result is a negative.
  1544. Not every interesting result needs to be in here.
  1545. Chapter should tell a story.
  1546. \end_layout
  1547. \end_inset
  1548. \end_layout
  1549. \begin_layout Standard
  1550. \begin_inset Flex TODO Note (inline)
  1551. status open
  1552. \begin_layout Plain Layout
  1553. Maybe reorder these sections to do RNA-seq, then ChIP-seq, then combined
  1554. analyses?
  1555. \end_layout
  1556. \end_inset
  1557. \end_layout
  1558. \begin_layout Subsection
  1559. Interpretation of RNA-seq analysis is limited by a major confounding factor
  1560. \end_layout
  1561. \begin_layout Standard
  1562. \begin_inset Float table
  1563. wide false
  1564. sideways false
  1565. status collapsed
  1566. \begin_layout Plain Layout
  1567. \align center
  1568. \begin_inset Tabular
  1569. <lyxtabular version="3" rows="11" columns="3">
  1570. <features tabularvalignment="middle">
  1571. <column alignment="center" valignment="top">
  1572. <column alignment="center" valignment="top">
  1573. <column alignment="center" valignment="top">
  1574. <row>
  1575. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1576. \begin_inset Text
  1577. \begin_layout Plain Layout
  1578. Test
  1579. \end_layout
  1580. \end_inset
  1581. </cell>
  1582. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1583. \begin_inset Text
  1584. \begin_layout Plain Layout
  1585. Est.
  1586. non-null
  1587. \end_layout
  1588. \end_inset
  1589. </cell>
  1590. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1591. \begin_inset Text
  1592. \begin_layout Plain Layout
  1593. \begin_inset Formula $\mathrm{FDR}\le10\%$
  1594. \end_inset
  1595. \end_layout
  1596. \end_inset
  1597. </cell>
  1598. </row>
  1599. <row>
  1600. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1601. \begin_inset Text
  1602. \begin_layout Plain Layout
  1603. Naive Day 0 vs Day 1
  1604. \end_layout
  1605. \end_inset
  1606. </cell>
  1607. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1608. \begin_inset Text
  1609. \begin_layout Plain Layout
  1610. 5992
  1611. \end_layout
  1612. \end_inset
  1613. </cell>
  1614. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1615. \begin_inset Text
  1616. \begin_layout Plain Layout
  1617. 1613
  1618. \end_layout
  1619. \end_inset
  1620. </cell>
  1621. </row>
  1622. <row>
  1623. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1624. \begin_inset Text
  1625. \begin_layout Plain Layout
  1626. Naive Day 0 vs Day 5
  1627. \end_layout
  1628. \end_inset
  1629. </cell>
  1630. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1631. \begin_inset Text
  1632. \begin_layout Plain Layout
  1633. 3038
  1634. \end_layout
  1635. \end_inset
  1636. </cell>
  1637. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1638. \begin_inset Text
  1639. \begin_layout Plain Layout
  1640. 32
  1641. \end_layout
  1642. \end_inset
  1643. </cell>
  1644. </row>
  1645. <row>
  1646. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1647. \begin_inset Text
  1648. \begin_layout Plain Layout
  1649. Naive Day 0 vs Day 14
  1650. \end_layout
  1651. \end_inset
  1652. </cell>
  1653. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1654. \begin_inset Text
  1655. \begin_layout Plain Layout
  1656. 1870
  1657. \end_layout
  1658. \end_inset
  1659. </cell>
  1660. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1661. \begin_inset Text
  1662. \begin_layout Plain Layout
  1663. 190
  1664. \end_layout
  1665. \end_inset
  1666. </cell>
  1667. </row>
  1668. <row>
  1669. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1670. \begin_inset Text
  1671. \begin_layout Plain Layout
  1672. Memory Day 0 vs Day 1
  1673. \end_layout
  1674. \end_inset
  1675. </cell>
  1676. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1677. \begin_inset Text
  1678. \begin_layout Plain Layout
  1679. 3195
  1680. \end_layout
  1681. \end_inset
  1682. </cell>
  1683. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1684. \begin_inset Text
  1685. \begin_layout Plain Layout
  1686. 411
  1687. \end_layout
  1688. \end_inset
  1689. </cell>
  1690. </row>
  1691. <row>
  1692. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1693. \begin_inset Text
  1694. \begin_layout Plain Layout
  1695. Memory Day 0 vs Day 5
  1696. \end_layout
  1697. \end_inset
  1698. </cell>
  1699. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1700. \begin_inset Text
  1701. \begin_layout Plain Layout
  1702. 2688
  1703. \end_layout
  1704. \end_inset
  1705. </cell>
  1706. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1707. \begin_inset Text
  1708. \begin_layout Plain Layout
  1709. 18
  1710. \end_layout
  1711. \end_inset
  1712. </cell>
  1713. </row>
  1714. <row>
  1715. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1716. \begin_inset Text
  1717. \begin_layout Plain Layout
  1718. Memory Day 0 vs Day 14
  1719. \end_layout
  1720. \end_inset
  1721. </cell>
  1722. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1723. \begin_inset Text
  1724. \begin_layout Plain Layout
  1725. 1911
  1726. \end_layout
  1727. \end_inset
  1728. </cell>
  1729. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1730. \begin_inset Text
  1731. \begin_layout Plain Layout
  1732. 227
  1733. \end_layout
  1734. \end_inset
  1735. </cell>
  1736. </row>
  1737. <row>
  1738. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1739. \begin_inset Text
  1740. \begin_layout Plain Layout
  1741. Day 0 Naive vs Memory
  1742. \end_layout
  1743. \end_inset
  1744. </cell>
  1745. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1746. \begin_inset Text
  1747. \begin_layout Plain Layout
  1748. 0
  1749. \end_layout
  1750. \end_inset
  1751. </cell>
  1752. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1753. \begin_inset Text
  1754. \begin_layout Plain Layout
  1755. 2
  1756. \end_layout
  1757. \end_inset
  1758. </cell>
  1759. </row>
  1760. <row>
  1761. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1762. \begin_inset Text
  1763. \begin_layout Plain Layout
  1764. Day 1 Naive vs Memory
  1765. \end_layout
  1766. \end_inset
  1767. </cell>
  1768. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1769. \begin_inset Text
  1770. \begin_layout Plain Layout
  1771. 9167
  1772. \end_layout
  1773. \end_inset
  1774. </cell>
  1775. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1776. \begin_inset Text
  1777. \begin_layout Plain Layout
  1778. 5532
  1779. \end_layout
  1780. \end_inset
  1781. </cell>
  1782. </row>
  1783. <row>
  1784. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1785. \begin_inset Text
  1786. \begin_layout Plain Layout
  1787. Day 5 Naive vs Memory
  1788. \end_layout
  1789. \end_inset
  1790. </cell>
  1791. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1792. \begin_inset Text
  1793. \begin_layout Plain Layout
  1794. 0
  1795. \end_layout
  1796. \end_inset
  1797. </cell>
  1798. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1799. \begin_inset Text
  1800. \begin_layout Plain Layout
  1801. 0
  1802. \end_layout
  1803. \end_inset
  1804. </cell>
  1805. </row>
  1806. <row>
  1807. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1808. \begin_inset Text
  1809. \begin_layout Plain Layout
  1810. Day 14 Naive vs Memory
  1811. \end_layout
  1812. \end_inset
  1813. </cell>
  1814. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1815. \begin_inset Text
  1816. \begin_layout Plain Layout
  1817. 6446
  1818. \end_layout
  1819. \end_inset
  1820. </cell>
  1821. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1822. \begin_inset Text
  1823. \begin_layout Plain Layout
  1824. 2319
  1825. \end_layout
  1826. \end_inset
  1827. </cell>
  1828. </row>
  1829. </lyxtabular>
  1830. \end_inset
  1831. \end_layout
  1832. \begin_layout Plain Layout
  1833. \begin_inset Caption Standard
  1834. \begin_layout Plain Layout
  1835. \series bold
  1836. \begin_inset CommandInset label
  1837. LatexCommand label
  1838. name "tab:Estimated-and-detected-rnaseq"
  1839. \end_inset
  1840. Estimated and detected differentially expressed genes.
  1841. \series default
  1842. \begin_inset Quotes eld
  1843. \end_inset
  1844. Test
  1845. \begin_inset Quotes erd
  1846. \end_inset
  1847. : Which sample groups were compared;
  1848. \begin_inset Quotes eld
  1849. \end_inset
  1850. Est non-null
  1851. \begin_inset Quotes erd
  1852. \end_inset
  1853. : Estimated number of differentially expressed genes, using the method of
  1854. averaging local FDR values
  1855. \begin_inset CommandInset citation
  1856. LatexCommand cite
  1857. key "Phipson2013Thesis"
  1858. literal "false"
  1859. \end_inset
  1860. ;
  1861. \begin_inset Quotes eld
  1862. \end_inset
  1863. \begin_inset Formula $\mathrm{FDR}\le10\%$
  1864. \end_inset
  1865. \begin_inset Quotes erd
  1866. \end_inset
  1867. : Number of significantly differentially expressed genes at an FDR threshold
  1868. of 10%.
  1869. The total number of genes tested was 16707.
  1870. \end_layout
  1871. \end_inset
  1872. \end_layout
  1873. \end_inset
  1874. \end_layout
  1875. \begin_layout Standard
  1876. \begin_inset Float figure
  1877. wide false
  1878. sideways false
  1879. status collapsed
  1880. \begin_layout Plain Layout
  1881. \align center
  1882. \begin_inset Graphics
  1883. filename graphics/CD4-csaw/RNA-seq/PCA-final-12-CROP.png
  1884. lyxscale 25
  1885. width 100col%
  1886. groupId colwidth-raster
  1887. \end_inset
  1888. \end_layout
  1889. \begin_layout Plain Layout
  1890. \begin_inset Caption Standard
  1891. \begin_layout Plain Layout
  1892. \series bold
  1893. \begin_inset CommandInset label
  1894. LatexCommand label
  1895. name "fig:rna-pca-final"
  1896. \end_inset
  1897. PCoA plot of RNA-seq samples after ComBat batch correction.
  1898. \series default
  1899. Each point represents an individual sample.
  1900. Samples with the same combination of cell type and time point are encircled
  1901. with a shaded region to aid in visual identification of the sample groups.
  1902. Samples with of same cell type from the same donor are connected by lines
  1903. to indicate the
  1904. \begin_inset Quotes eld
  1905. \end_inset
  1906. trajectory
  1907. \begin_inset Quotes erd
  1908. \end_inset
  1909. of each donor's cells over time in PCoA space.
  1910. \end_layout
  1911. \end_inset
  1912. \end_layout
  1913. \begin_layout Plain Layout
  1914. \end_layout
  1915. \end_inset
  1916. \end_layout
  1917. \begin_layout Standard
  1918. Genes called present in the RNA-seq data were tested for differential expression
  1919. between all time points and cell types.
  1920. The counts of differentially expressed genes are shown in Table
  1921. \begin_inset CommandInset ref
  1922. LatexCommand ref
  1923. reference "tab:Estimated-and-detected-rnaseq"
  1924. plural "false"
  1925. caps "false"
  1926. noprefix "false"
  1927. \end_inset
  1928. .
  1929. Notably, all the results for Day 0 and Day 5 have substantially fewer genes
  1930. called differentially expressed than any of the results for other time
  1931. points.
  1932. This is an unfortunate result of the difference in sample quality between
  1933. the two batches of RNA-seq data.
  1934. All the samples in Batch 1, which includes all the samples from Days 0
  1935. and 5, have substantially more variability than the samples in Batch 2,
  1936. which includes the other time points.
  1937. This is reflected in the substantially higher weights assigned to Batch
  1938. 2 (Figure
  1939. \begin_inset CommandInset ref
  1940. LatexCommand ref
  1941. reference "fig:RNA-seq-weights-vs-covars"
  1942. plural "false"
  1943. caps "false"
  1944. noprefix "false"
  1945. \end_inset
  1946. ).
  1947. The batch effect has both a systematic component and a random noise component.
  1948. While the systematic component was subtracted out using ComBat (Figure
  1949. \begin_inset CommandInset ref
  1950. LatexCommand ref
  1951. reference "fig:RNA-PCA"
  1952. plural "false"
  1953. caps "false"
  1954. noprefix "false"
  1955. \end_inset
  1956. ), no such correction is possible for the noise component: Batch 1 simply
  1957. has substantially more random noise in it, which reduces the statistical
  1958. power for any differential expression tests involving samples in that batch.
  1959. \end_layout
  1960. \begin_layout Standard
  1961. Despite the difficulty in detecting specific differentially expressed genes,
  1962. there is still evidence that differential expression is present for these
  1963. time points.
  1964. In Figure
  1965. \begin_inset CommandInset ref
  1966. LatexCommand ref
  1967. reference "fig:rna-pca-final"
  1968. plural "false"
  1969. caps "false"
  1970. noprefix "false"
  1971. \end_inset
  1972. , there is a clear separation between naive and memory samples at Day 0,
  1973. despite the fact that only 2 genes were significantly differentially expressed
  1974. for this comparison.
  1975. Similarly, the small numbers of genes detected for the Day 0 vs Day 5 compariso
  1976. ns do not reflect the large separation between these time points in Figure
  1977. \begin_inset CommandInset ref
  1978. LatexCommand ref
  1979. reference "fig:rna-pca-final"
  1980. plural "false"
  1981. caps "false"
  1982. noprefix "false"
  1983. \end_inset
  1984. .
  1985. In addition, the MOFA latent factor plots in Figure
  1986. \begin_inset CommandInset ref
  1987. LatexCommand ref
  1988. reference "fig:mofa-lf-scatter"
  1989. plural "false"
  1990. caps "false"
  1991. noprefix "false"
  1992. \end_inset
  1993. .
  1994. This suggests that there is indeed a differential expression signal present
  1995. in the data for these comparisons, but the large variability in the Batch
  1996. 1 samples obfuscates this signal at the individual gene level.
  1997. As a result, it is impossible to make any meaningful statements about the
  1998. \begin_inset Quotes eld
  1999. \end_inset
  2000. size
  2001. \begin_inset Quotes erd
  2002. \end_inset
  2003. of the gene signature for any time point, since the number of significant
  2004. genes as well as the estimated number of differentially expressed genes
  2005. depends so strongly on the variations in sample quality in addition to
  2006. the size of the differential expression signal in the data.
  2007. Gene-set enrichment analyses are similarly impractical for the same reason.
  2008. However, analyses looking at genome-wide patterns of expression are still
  2009. practical.
  2010. \end_layout
  2011. \begin_layout Subsection
  2012. H3K4 and H3K27 methylation occur in broad regions and are enriched near
  2013. promoters
  2014. \end_layout
  2015. \begin_layout Standard
  2016. \begin_inset Float table
  2017. wide false
  2018. sideways false
  2019. status open
  2020. \begin_layout Plain Layout
  2021. \align center
  2022. \begin_inset Flex TODO Note (inline)
  2023. status open
  2024. \begin_layout Plain Layout
  2025. Also get
  2026. \emph on
  2027. median
  2028. \emph default
  2029. peak width and maybe other quantiles (25%, 75%)
  2030. \end_layout
  2031. \end_inset
  2032. \end_layout
  2033. \begin_layout Plain Layout
  2034. \align center
  2035. \begin_inset Tabular
  2036. <lyxtabular version="3" rows="4" columns="5">
  2037. <features tabularvalignment="middle">
  2038. <column alignment="center" valignment="top">
  2039. <column alignment="center" valignment="top">
  2040. <column alignment="center" valignment="top">
  2041. <column alignment="center" valignment="top">
  2042. <column alignment="center" valignment="top">
  2043. <row>
  2044. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2045. \begin_inset Text
  2046. \begin_layout Plain Layout
  2047. Histone Mark
  2048. \end_layout
  2049. \end_inset
  2050. </cell>
  2051. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2052. \begin_inset Text
  2053. \begin_layout Plain Layout
  2054. # Peaks
  2055. \end_layout
  2056. \end_inset
  2057. </cell>
  2058. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2059. \begin_inset Text
  2060. \begin_layout Plain Layout
  2061. Mean peak width
  2062. \end_layout
  2063. \end_inset
  2064. </cell>
  2065. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2066. \begin_inset Text
  2067. \begin_layout Plain Layout
  2068. genome coverage
  2069. \end_layout
  2070. \end_inset
  2071. </cell>
  2072. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2073. \begin_inset Text
  2074. \begin_layout Plain Layout
  2075. FRiP
  2076. \end_layout
  2077. \end_inset
  2078. </cell>
  2079. </row>
  2080. <row>
  2081. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2082. \begin_inset Text
  2083. \begin_layout Plain Layout
  2084. H3K4me2
  2085. \end_layout
  2086. \end_inset
  2087. </cell>
  2088. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2089. \begin_inset Text
  2090. \begin_layout Plain Layout
  2091. 14965
  2092. \end_layout
  2093. \end_inset
  2094. </cell>
  2095. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2096. \begin_inset Text
  2097. \begin_layout Plain Layout
  2098. 3970
  2099. \end_layout
  2100. \end_inset
  2101. </cell>
  2102. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2103. \begin_inset Text
  2104. \begin_layout Plain Layout
  2105. 1.92%
  2106. \end_layout
  2107. \end_inset
  2108. </cell>
  2109. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2110. \begin_inset Text
  2111. \begin_layout Plain Layout
  2112. 14.2%
  2113. \end_layout
  2114. \end_inset
  2115. </cell>
  2116. </row>
  2117. <row>
  2118. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2119. \begin_inset Text
  2120. \begin_layout Plain Layout
  2121. H3K4me3
  2122. \end_layout
  2123. \end_inset
  2124. </cell>
  2125. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2126. \begin_inset Text
  2127. \begin_layout Plain Layout
  2128. 6163
  2129. \end_layout
  2130. \end_inset
  2131. </cell>
  2132. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2133. \begin_inset Text
  2134. \begin_layout Plain Layout
  2135. 2946
  2136. \end_layout
  2137. \end_inset
  2138. </cell>
  2139. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2140. \begin_inset Text
  2141. \begin_layout Plain Layout
  2142. 0.588%
  2143. \end_layout
  2144. \end_inset
  2145. </cell>
  2146. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2147. \begin_inset Text
  2148. \begin_layout Plain Layout
  2149. 6.57%
  2150. \end_layout
  2151. \end_inset
  2152. </cell>
  2153. </row>
  2154. <row>
  2155. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2156. \begin_inset Text
  2157. \begin_layout Plain Layout
  2158. H3K27me3
  2159. \end_layout
  2160. \end_inset
  2161. </cell>
  2162. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2163. \begin_inset Text
  2164. \begin_layout Plain Layout
  2165. 18139
  2166. \end_layout
  2167. \end_inset
  2168. </cell>
  2169. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2170. \begin_inset Text
  2171. \begin_layout Plain Layout
  2172. 18967
  2173. \end_layout
  2174. \end_inset
  2175. </cell>
  2176. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2177. \begin_inset Text
  2178. \begin_layout Plain Layout
  2179. 11.1%
  2180. \end_layout
  2181. \end_inset
  2182. </cell>
  2183. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2184. \begin_inset Text
  2185. \begin_layout Plain Layout
  2186. 22.5%
  2187. \end_layout
  2188. \end_inset
  2189. </cell>
  2190. </row>
  2191. </lyxtabular>
  2192. \end_inset
  2193. \end_layout
  2194. \begin_layout Plain Layout
  2195. \begin_inset Caption Standard
  2196. \begin_layout Plain Layout
  2197. \series bold
  2198. \begin_inset CommandInset label
  2199. LatexCommand label
  2200. name "tab:peak-calling-summary"
  2201. \end_inset
  2202. Peak-calling summary.
  2203. \series default
  2204. For each histone mark, the number of peaks called using SICER at an IDR
  2205. threshold of ???, the mean width of those peaks, the fraction of the genome
  2206. covered by peaks, and the fraction of reads in peaks (FRiP).
  2207. \end_layout
  2208. \end_inset
  2209. \end_layout
  2210. \end_inset
  2211. \end_layout
  2212. \begin_layout Standard
  2213. Table
  2214. \begin_inset CommandInset ref
  2215. LatexCommand ref
  2216. reference "tab:peak-calling-summary"
  2217. plural "false"
  2218. caps "false"
  2219. noprefix "false"
  2220. \end_inset
  2221. gives a summary of the peak calling statistics for each histone mark.
  2222. Consistent with previous observations [CITATION NEEDED], all 3 histone
  2223. marks occur in broad regions spanning many consecutive nucleosomes, rather
  2224. than in sharp peaks as would be expected for a transcription factor or
  2225. other molecule that binds to specific sites.
  2226. This conclusion is further supported by Figure
  2227. \begin_inset CommandInset ref
  2228. LatexCommand ref
  2229. reference "fig:CCF-with-blacklist"
  2230. plural "false"
  2231. caps "false"
  2232. noprefix "false"
  2233. \end_inset
  2234. , in which a clear nucleosome-sized periodicity is visible in the cross-correlat
  2235. ion value for each sample, indicating that each time a given mark is present
  2236. on one histone, it is also likely to be found on adjacent histones as well.
  2237. H3K27me3 enrichment in particular is substantially more broad than either
  2238. H3K4 mark, with a mean peak width of almost 19,000 bp.
  2239. This is also reflected in the periodicity observed in Figure
  2240. \begin_inset CommandInset ref
  2241. LatexCommand ref
  2242. reference "fig:CCF-with-blacklist"
  2243. plural "false"
  2244. caps "false"
  2245. noprefix "false"
  2246. \end_inset
  2247. , which remains strong much farther out for H3K27me3 than the other marks,
  2248. showing H3K27me3 especially tends to be found on long runs of consecutive
  2249. histones.
  2250. \end_layout
  2251. \begin_layout Standard
  2252. \begin_inset Float figure
  2253. wide false
  2254. sideways false
  2255. status open
  2256. \begin_layout Plain Layout
  2257. \begin_inset Flex TODO Note (inline)
  2258. status open
  2259. \begin_layout Plain Layout
  2260. Ensure this figure uses the peak calls from the new analysis.
  2261. \end_layout
  2262. \end_inset
  2263. \end_layout
  2264. \begin_layout Plain Layout
  2265. \begin_inset Flex TODO Note (inline)
  2266. status open
  2267. \begin_layout Plain Layout
  2268. Need a control: shuffle all peaks and repeat, N times.
  2269. Do real vs shuffled control both in a top/bottom arrangement.
  2270. \end_layout
  2271. \end_inset
  2272. \end_layout
  2273. \begin_layout Plain Layout
  2274. \begin_inset Flex TODO Note (inline)
  2275. status open
  2276. \begin_layout Plain Layout
  2277. Consider counting TSS inside peaks as negative number indicating how far
  2278. \emph on
  2279. inside
  2280. \emph default
  2281. the peak the TSS is (i.e.
  2282. distance to nearest non-peak area).
  2283. \end_layout
  2284. \end_inset
  2285. \end_layout
  2286. \begin_layout Plain Layout
  2287. \begin_inset Flex TODO Note (inline)
  2288. status open
  2289. \begin_layout Plain Layout
  2290. The H3K4 part of this figure is included in
  2291. \begin_inset CommandInset citation
  2292. LatexCommand cite
  2293. key "LaMere2016"
  2294. literal "false"
  2295. \end_inset
  2296. as Fig.
  2297. S2.
  2298. Do I need to do anything about that?
  2299. \end_layout
  2300. \end_inset
  2301. \end_layout
  2302. \begin_layout Plain Layout
  2303. \align center
  2304. \begin_inset Graphics
  2305. filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
  2306. lyxscale 50
  2307. width 80col%
  2308. \end_inset
  2309. \end_layout
  2310. \begin_layout Plain Layout
  2311. \begin_inset Caption Standard
  2312. \begin_layout Plain Layout
  2313. \series bold
  2314. \begin_inset CommandInset label
  2315. LatexCommand label
  2316. name "fig:near-promoter-peak-enrich"
  2317. \end_inset
  2318. Enrichment of peaks in promoter neighborhoods.
  2319. \series default
  2320. This plot shows the distribution of distances from each annotated transcription
  2321. start site in the genome to the nearest called peak.
  2322. Each line represents one combination of histone mark, cell type, and time
  2323. point.
  2324. Distributions are smoothed using kernel density estimation [CITE?].
  2325. Transcription start sites that occur
  2326. \emph on
  2327. within
  2328. \emph default
  2329. peaks were excluded from this plot to avoid a large spike at zero that
  2330. would overshadow the rest of the distribution.
  2331. \end_layout
  2332. \end_inset
  2333. \end_layout
  2334. \end_inset
  2335. \end_layout
  2336. \begin_layout Standard
  2337. \begin_inset Float table
  2338. wide false
  2339. sideways false
  2340. status open
  2341. \begin_layout Plain Layout
  2342. \align center
  2343. \begin_inset Tabular
  2344. <lyxtabular version="3" rows="4" columns="2">
  2345. <features tabularvalignment="middle">
  2346. <column alignment="center" valignment="top">
  2347. <column alignment="center" valignment="top">
  2348. <row>
  2349. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2350. \begin_inset Text
  2351. \begin_layout Plain Layout
  2352. Histone mark
  2353. \end_layout
  2354. \end_inset
  2355. </cell>
  2356. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2357. \begin_inset Text
  2358. \begin_layout Plain Layout
  2359. Effective promoter radius
  2360. \end_layout
  2361. \end_inset
  2362. </cell>
  2363. </row>
  2364. <row>
  2365. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2366. \begin_inset Text
  2367. \begin_layout Plain Layout
  2368. H3K4me2
  2369. \end_layout
  2370. \end_inset
  2371. </cell>
  2372. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2373. \begin_inset Text
  2374. \begin_layout Plain Layout
  2375. 1 kb
  2376. \end_layout
  2377. \end_inset
  2378. </cell>
  2379. </row>
  2380. <row>
  2381. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2382. \begin_inset Text
  2383. \begin_layout Plain Layout
  2384. H3K4me3
  2385. \end_layout
  2386. \end_inset
  2387. </cell>
  2388. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2389. \begin_inset Text
  2390. \begin_layout Plain Layout
  2391. 1 kb
  2392. \end_layout
  2393. \end_inset
  2394. </cell>
  2395. </row>
  2396. <row>
  2397. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2398. \begin_inset Text
  2399. \begin_layout Plain Layout
  2400. H3K27me3
  2401. \end_layout
  2402. \end_inset
  2403. </cell>
  2404. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2405. \begin_inset Text
  2406. \begin_layout Plain Layout
  2407. 2.5 kb
  2408. \end_layout
  2409. \end_inset
  2410. </cell>
  2411. </row>
  2412. </lyxtabular>
  2413. \end_inset
  2414. \end_layout
  2415. \begin_layout Plain Layout
  2416. \begin_inset Caption Standard
  2417. \begin_layout Plain Layout
  2418. \series bold
  2419. \begin_inset CommandInset label
  2420. LatexCommand label
  2421. name "tab:effective-promoter-radius"
  2422. \end_inset
  2423. Effective promoter radius for each histone mark.
  2424. \series default
  2425. These values represent the approximate distance from transcription start
  2426. site positions within which an excess of peaks are found, as shown in Figure
  2427. \begin_inset CommandInset ref
  2428. LatexCommand ref
  2429. reference "fig:near-promoter-peak-enrich"
  2430. plural "false"
  2431. caps "false"
  2432. noprefix "false"
  2433. \end_inset
  2434. .
  2435. \end_layout
  2436. \end_inset
  2437. \end_layout
  2438. \begin_layout Plain Layout
  2439. \end_layout
  2440. \end_inset
  2441. \end_layout
  2442. \begin_layout Standard
  2443. All 3 histone marks tend to occur more often near promoter regions, as shown
  2444. in Figure
  2445. \begin_inset CommandInset ref
  2446. LatexCommand ref
  2447. reference "fig:near-promoter-peak-enrich"
  2448. plural "false"
  2449. caps "false"
  2450. noprefix "false"
  2451. \end_inset
  2452. .
  2453. The majority of each density distribution is flat, representing the background
  2454. density of peaks genome-wide.
  2455. Each distribution has a peak near zero, representing an enrichment of peaks
  2456. close transcription start site (TSS) positions relative to the remainder
  2457. of the genome.
  2458. Interestingly, the
  2459. \begin_inset Quotes eld
  2460. \end_inset
  2461. radius
  2462. \begin_inset Quotes erd
  2463. \end_inset
  2464. within which this enrichment occurs is not the same for every histone mark
  2465. (Table
  2466. \begin_inset CommandInset ref
  2467. LatexCommand ref
  2468. reference "tab:effective-promoter-radius"
  2469. plural "false"
  2470. caps "false"
  2471. noprefix "false"
  2472. \end_inset
  2473. ).
  2474. For H3K4me2 and H3K4me3, peaks are most enriched within 1
  2475. \begin_inset space ~
  2476. \end_inset
  2477. kbp of TSS positions, while for H3K27me3, enrichment is broader, extending
  2478. to 2.5
  2479. \begin_inset space ~
  2480. \end_inset
  2481. kbp.
  2482. These
  2483. \begin_inset Quotes eld
  2484. \end_inset
  2485. effective promoter radii
  2486. \begin_inset Quotes erd
  2487. \end_inset
  2488. remain approximately the same across all combinations of experimental condition
  2489. (cell type, time point, and donor), so they appear to be a property of
  2490. the histone mark itself.
  2491. Hence, these radii were used to define the promoter regions for each histone
  2492. mark in all further analyses.
  2493. \end_layout
  2494. \begin_layout Standard
  2495. \begin_inset Flex TODO Note (inline)
  2496. status open
  2497. \begin_layout Plain Layout
  2498. Consider also showing figure for distance to nearest peak center, and reference
  2499. median peak size once that is known.
  2500. \end_layout
  2501. \end_inset
  2502. \end_layout
  2503. \begin_layout Subsection
  2504. H3K4 and H3K27 promoter methylation has broadly the expected correlation
  2505. with gene expression
  2506. \end_layout
  2507. \begin_layout Standard
  2508. \begin_inset Float figure
  2509. wide false
  2510. sideways false
  2511. status open
  2512. \begin_layout Plain Layout
  2513. \begin_inset Flex TODO Note (inline)
  2514. status open
  2515. \begin_layout Plain Layout
  2516. This figure is generated from the old analysis.
  2517. Eiher note that in some way or re-generate it from the new peak calls.
  2518. \end_layout
  2519. \end_inset
  2520. \end_layout
  2521. \begin_layout Plain Layout
  2522. \align center
  2523. \begin_inset Graphics
  2524. filename graphics/CD4-csaw/FPKM by Peak Violin Plots-CROP.pdf
  2525. lyxscale 50
  2526. width 100col%
  2527. \end_inset
  2528. \end_layout
  2529. \begin_layout Plain Layout
  2530. \begin_inset Caption Standard
  2531. \begin_layout Plain Layout
  2532. \series bold
  2533. \begin_inset CommandInset label
  2534. LatexCommand label
  2535. name "fig:fpkm-by-peak"
  2536. \end_inset
  2537. Expression distributions of genes with and without promoter peaks.
  2538. \end_layout
  2539. \end_inset
  2540. \end_layout
  2541. \end_inset
  2542. \end_layout
  2543. \begin_layout Standard
  2544. H3K4me2 and H3K4me2 have previously been reported as activating marks, while
  2545. H3K27me3 has been reported as inactivating [CITE].
  2546. The data are consistent with this characterization: genes whose promoters
  2547. (as defined by the radii for each histone mark described above) overlap
  2548. with a H3K4me2 or H3K4me3 peak tend to have higher expression than those
  2549. that don't, while H3K27me3 is likewise associated with lower gene expression,
  2550. as shown in
  2551. \begin_inset CommandInset ref
  2552. LatexCommand ref
  2553. reference "fig:fpkm-by-peak"
  2554. plural "false"
  2555. caps "false"
  2556. noprefix "false"
  2557. \end_inset
  2558. .
  2559. This pattern holds across all combinations of cell type and time point
  2560. (Welch's
  2561. \emph on
  2562. t
  2563. \emph default
  2564. -test, all
  2565. \begin_inset Formula $p\mathrm{-values}\ll2.2\times10^{-16}$
  2566. \end_inset
  2567. ).
  2568. The difference in average FPKM values when a peak overlaps the promoter
  2569. is about
  2570. \begin_inset Formula $+5.67$
  2571. \end_inset
  2572. for H3K4me2,
  2573. \begin_inset Formula $+5.76$
  2574. \end_inset
  2575. for H3K4me2, and
  2576. \begin_inset Formula $-4.00$
  2577. \end_inset
  2578. for H3K27me3.
  2579. \end_layout
  2580. \begin_layout Standard
  2581. \begin_inset Flex TODO Note (inline)
  2582. status open
  2583. \begin_layout Plain Layout
  2584. I also have some figures looking at interactions between marks (e.g.
  2585. what if a promoter has both H3K4me3 and H3K27me3), but I don't know if
  2586. that much detail is warranted here, since all the effects just seem approximate
  2587. ly additive anyway.
  2588. \end_layout
  2589. \end_inset
  2590. \end_layout
  2591. \begin_layout Subsection
  2592. RNA-seq and H3K4 methylation patterns in naive and memory show convergence
  2593. at day 14
  2594. \end_layout
  2595. \begin_layout Standard
  2596. \begin_inset ERT
  2597. status open
  2598. \begin_layout Plain Layout
  2599. \backslash
  2600. afterpage{
  2601. \end_layout
  2602. \begin_layout Plain Layout
  2603. \backslash
  2604. begin{landscape}
  2605. \end_layout
  2606. \end_inset
  2607. \end_layout
  2608. \begin_layout Standard
  2609. \begin_inset Float table
  2610. wide false
  2611. sideways false
  2612. status collapsed
  2613. \begin_layout Plain Layout
  2614. \align center
  2615. \begin_inset Tabular
  2616. <lyxtabular version="3" rows="6" columns="7">
  2617. <features tabularvalignment="middle">
  2618. <column alignment="center" valignment="top">
  2619. <column alignment="center" valignment="top">
  2620. <column alignment="center" valignment="top">
  2621. <column alignment="center" valignment="top">
  2622. <column alignment="center" valignment="top">
  2623. <column alignment="center" valignment="top">
  2624. <column alignment="center" valignment="top">
  2625. <row>
  2626. <cell alignment="center" valignment="top" usebox="none">
  2627. \begin_inset Text
  2628. \begin_layout Plain Layout
  2629. \end_layout
  2630. \end_inset
  2631. </cell>
  2632. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2633. \begin_inset Text
  2634. \begin_layout Plain Layout
  2635. Number of significant promoters
  2636. \end_layout
  2637. \end_inset
  2638. </cell>
  2639. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2640. \begin_inset Text
  2641. \begin_layout Plain Layout
  2642. \end_layout
  2643. \end_inset
  2644. </cell>
  2645. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2646. \begin_inset Text
  2647. \begin_layout Plain Layout
  2648. \end_layout
  2649. \end_inset
  2650. </cell>
  2651. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2652. \begin_inset Text
  2653. \begin_layout Plain Layout
  2654. Est.
  2655. differentially modified promoters
  2656. \end_layout
  2657. \end_inset
  2658. </cell>
  2659. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2660. \begin_inset Text
  2661. \begin_layout Plain Layout
  2662. \end_layout
  2663. \end_inset
  2664. </cell>
  2665. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2666. \begin_inset Text
  2667. \begin_layout Plain Layout
  2668. \end_layout
  2669. \end_inset
  2670. </cell>
  2671. </row>
  2672. <row>
  2673. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2674. \begin_inset Text
  2675. \begin_layout Plain Layout
  2676. Time Point
  2677. \end_layout
  2678. \end_inset
  2679. </cell>
  2680. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2681. \begin_inset Text
  2682. \begin_layout Plain Layout
  2683. H3K4me2
  2684. \end_layout
  2685. \end_inset
  2686. </cell>
  2687. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2688. \begin_inset Text
  2689. \begin_layout Plain Layout
  2690. H3K4me3
  2691. \end_layout
  2692. \end_inset
  2693. </cell>
  2694. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2695. \begin_inset Text
  2696. \begin_layout Plain Layout
  2697. H3K27me3
  2698. \end_layout
  2699. \end_inset
  2700. </cell>
  2701. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2702. \begin_inset Text
  2703. \begin_layout Plain Layout
  2704. H3K4me2
  2705. \end_layout
  2706. \end_inset
  2707. </cell>
  2708. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2709. \begin_inset Text
  2710. \begin_layout Plain Layout
  2711. H3K4me3
  2712. \end_layout
  2713. \end_inset
  2714. </cell>
  2715. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2716. \begin_inset Text
  2717. \begin_layout Plain Layout
  2718. H3K27me3
  2719. \end_layout
  2720. \end_inset
  2721. </cell>
  2722. </row>
  2723. <row>
  2724. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2725. \begin_inset Text
  2726. \begin_layout Plain Layout
  2727. Day 0
  2728. \end_layout
  2729. \end_inset
  2730. </cell>
  2731. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2732. \begin_inset Text
  2733. \begin_layout Plain Layout
  2734. 4553
  2735. \end_layout
  2736. \end_inset
  2737. </cell>
  2738. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2739. \begin_inset Text
  2740. \begin_layout Plain Layout
  2741. 927
  2742. \end_layout
  2743. \end_inset
  2744. </cell>
  2745. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2746. \begin_inset Text
  2747. \begin_layout Plain Layout
  2748. 6
  2749. \end_layout
  2750. \end_inset
  2751. </cell>
  2752. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2753. \begin_inset Text
  2754. \begin_layout Plain Layout
  2755. 9967
  2756. \end_layout
  2757. \end_inset
  2758. </cell>
  2759. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2760. \begin_inset Text
  2761. \begin_layout Plain Layout
  2762. 4149
  2763. \end_layout
  2764. \end_inset
  2765. </cell>
  2766. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2767. \begin_inset Text
  2768. \begin_layout Plain Layout
  2769. 2404
  2770. \end_layout
  2771. \end_inset
  2772. </cell>
  2773. </row>
  2774. <row>
  2775. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2776. \begin_inset Text
  2777. \begin_layout Plain Layout
  2778. Day 1
  2779. \end_layout
  2780. \end_inset
  2781. </cell>
  2782. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2783. \begin_inset Text
  2784. \begin_layout Plain Layout
  2785. 567
  2786. \end_layout
  2787. \end_inset
  2788. </cell>
  2789. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2790. \begin_inset Text
  2791. \begin_layout Plain Layout
  2792. 278
  2793. \end_layout
  2794. \end_inset
  2795. </cell>
  2796. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2797. \begin_inset Text
  2798. \begin_layout Plain Layout
  2799. 1570
  2800. \end_layout
  2801. \end_inset
  2802. </cell>
  2803. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2804. \begin_inset Text
  2805. \begin_layout Plain Layout
  2806. 4370
  2807. \end_layout
  2808. \end_inset
  2809. </cell>
  2810. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2811. \begin_inset Text
  2812. \begin_layout Plain Layout
  2813. 2145
  2814. \end_layout
  2815. \end_inset
  2816. </cell>
  2817. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2818. \begin_inset Text
  2819. \begin_layout Plain Layout
  2820. 6598
  2821. \end_layout
  2822. \end_inset
  2823. </cell>
  2824. </row>
  2825. <row>
  2826. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2827. \begin_inset Text
  2828. \begin_layout Plain Layout
  2829. Day 5
  2830. \end_layout
  2831. \end_inset
  2832. </cell>
  2833. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2834. \begin_inset Text
  2835. \begin_layout Plain Layout
  2836. 2313
  2837. \end_layout
  2838. \end_inset
  2839. </cell>
  2840. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2841. \begin_inset Text
  2842. \begin_layout Plain Layout
  2843. 139
  2844. \end_layout
  2845. \end_inset
  2846. </cell>
  2847. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2848. \begin_inset Text
  2849. \begin_layout Plain Layout
  2850. 490
  2851. \end_layout
  2852. \end_inset
  2853. </cell>
  2854. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2855. \begin_inset Text
  2856. \begin_layout Plain Layout
  2857. 9450
  2858. \end_layout
  2859. \end_inset
  2860. </cell>
  2861. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2862. \begin_inset Text
  2863. \begin_layout Plain Layout
  2864. 1148
  2865. \end_layout
  2866. \end_inset
  2867. </cell>
  2868. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2869. \begin_inset Text
  2870. \begin_layout Plain Layout
  2871. 4141
  2872. \end_layout
  2873. \end_inset
  2874. </cell>
  2875. </row>
  2876. <row>
  2877. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2878. \begin_inset Text
  2879. \begin_layout Plain Layout
  2880. Day 14
  2881. \end_layout
  2882. \end_inset
  2883. </cell>
  2884. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2885. \begin_inset Text
  2886. \begin_layout Plain Layout
  2887. 0
  2888. \end_layout
  2889. \end_inset
  2890. </cell>
  2891. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2892. \begin_inset Text
  2893. \begin_layout Plain Layout
  2894. 0
  2895. \end_layout
  2896. \end_inset
  2897. </cell>
  2898. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2899. \begin_inset Text
  2900. \begin_layout Plain Layout
  2901. 0
  2902. \end_layout
  2903. \end_inset
  2904. </cell>
  2905. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2906. \begin_inset Text
  2907. \begin_layout Plain Layout
  2908. 0
  2909. \end_layout
  2910. \end_inset
  2911. </cell>
  2912. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2913. \begin_inset Text
  2914. \begin_layout Plain Layout
  2915. 0
  2916. \end_layout
  2917. \end_inset
  2918. </cell>
  2919. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2920. \begin_inset Text
  2921. \begin_layout Plain Layout
  2922. 0
  2923. \end_layout
  2924. \end_inset
  2925. </cell>
  2926. </row>
  2927. </lyxtabular>
  2928. \end_inset
  2929. \end_layout
  2930. \begin_layout Plain Layout
  2931. \begin_inset Caption Standard
  2932. \begin_layout Plain Layout
  2933. \series bold
  2934. \begin_inset CommandInset label
  2935. LatexCommand label
  2936. name "tab:Number-signif-promoters"
  2937. \end_inset
  2938. Number of differentially modified promoters between naive and memory cells
  2939. at each time point after activation.
  2940. \series default
  2941. This table shows both the number of differentially modified promoters detected
  2942. at a 10% FDR threshold (left half), and the total number of differentially
  2943. modified promoters as estimated using the method of
  2944. \begin_inset CommandInset citation
  2945. LatexCommand cite
  2946. key "Phipson2013"
  2947. literal "false"
  2948. \end_inset
  2949. (right half).
  2950. \end_layout
  2951. \end_inset
  2952. \end_layout
  2953. \end_inset
  2954. \end_layout
  2955. \begin_layout Standard
  2956. \begin_inset ERT
  2957. status open
  2958. \begin_layout Plain Layout
  2959. \backslash
  2960. end{landscape}
  2961. \end_layout
  2962. \begin_layout Plain Layout
  2963. }
  2964. \end_layout
  2965. \end_inset
  2966. \end_layout
  2967. \begin_layout Standard
  2968. \begin_inset Float figure
  2969. placement p
  2970. wide false
  2971. sideways false
  2972. status open
  2973. \begin_layout Plain Layout
  2974. \align center
  2975. \begin_inset Float figure
  2976. wide false
  2977. sideways false
  2978. status collapsed
  2979. \begin_layout Plain Layout
  2980. \align center
  2981. \begin_inset Graphics
  2982. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-promoter-PCA-group-CROP.png
  2983. lyxscale 25
  2984. width 45col%
  2985. groupId pcoa-prom-subfig
  2986. \end_inset
  2987. \end_layout
  2988. \begin_layout Plain Layout
  2989. \begin_inset Caption Standard
  2990. \begin_layout Plain Layout
  2991. \series bold
  2992. \begin_inset CommandInset label
  2993. LatexCommand label
  2994. name "fig:PCoA-H3K4me2-prom"
  2995. \end_inset
  2996. PCoA plot of H3K4me2 promoters, after subtracting surrogate variables
  2997. \end_layout
  2998. \end_inset
  2999. \end_layout
  3000. \end_inset
  3001. \begin_inset space \hfill{}
  3002. \end_inset
  3003. \begin_inset Float figure
  3004. wide false
  3005. sideways false
  3006. status collapsed
  3007. \begin_layout Plain Layout
  3008. \align center
  3009. \begin_inset Graphics
  3010. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-promoter-PCA-group-CROP.png
  3011. lyxscale 25
  3012. width 45col%
  3013. groupId pcoa-prom-subfig
  3014. \end_inset
  3015. \end_layout
  3016. \begin_layout Plain Layout
  3017. \begin_inset Caption Standard
  3018. \begin_layout Plain Layout
  3019. \series bold
  3020. \begin_inset CommandInset label
  3021. LatexCommand label
  3022. name "fig:PCoA-H3K4me3-prom"
  3023. \end_inset
  3024. PCoA plot of H3K4me3 promoters, after subtracting surrogate variables
  3025. \end_layout
  3026. \end_inset
  3027. \end_layout
  3028. \end_inset
  3029. \end_layout
  3030. \begin_layout Plain Layout
  3031. \align center
  3032. \begin_inset Float figure
  3033. wide false
  3034. sideways false
  3035. status collapsed
  3036. \begin_layout Plain Layout
  3037. \align center
  3038. \begin_inset Graphics
  3039. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-promoter-PCA-group-CROP.png
  3040. lyxscale 25
  3041. width 45col%
  3042. groupId pcoa-prom-subfig
  3043. \end_inset
  3044. \end_layout
  3045. \begin_layout Plain Layout
  3046. \begin_inset Caption Standard
  3047. \begin_layout Plain Layout
  3048. \series bold
  3049. \begin_inset CommandInset label
  3050. LatexCommand label
  3051. name "fig:PCoA-H3K27me3-prom"
  3052. \end_inset
  3053. PCoA plot of H3K27me3 promoters, after subtracting surrogate variables
  3054. \end_layout
  3055. \end_inset
  3056. \end_layout
  3057. \end_inset
  3058. \begin_inset space \hfill{}
  3059. \end_inset
  3060. \begin_inset Float figure
  3061. wide false
  3062. sideways false
  3063. status collapsed
  3064. \begin_layout Plain Layout
  3065. \align center
  3066. \begin_inset Graphics
  3067. filename graphics/CD4-csaw/RNA-seq/PCA-final-23-CROP.png
  3068. lyxscale 25
  3069. width 45col%
  3070. groupId pcoa-prom-subfig
  3071. \end_inset
  3072. \end_layout
  3073. \begin_layout Plain Layout
  3074. \begin_inset Caption Standard
  3075. \begin_layout Plain Layout
  3076. \series bold
  3077. \begin_inset CommandInset label
  3078. LatexCommand label
  3079. name "fig:RNA-PCA-group"
  3080. \end_inset
  3081. RNA-seq PCoA showing principal coordiantes 2 and 3.
  3082. \end_layout
  3083. \end_inset
  3084. \end_layout
  3085. \end_inset
  3086. \end_layout
  3087. \begin_layout Plain Layout
  3088. \begin_inset Caption Standard
  3089. \begin_layout Plain Layout
  3090. \series bold
  3091. \begin_inset CommandInset label
  3092. LatexCommand label
  3093. name "fig:PCoA-promoters"
  3094. \end_inset
  3095. PCoA plots for promoter ChIP-seq and expression RNA-seq data
  3096. \end_layout
  3097. \end_inset
  3098. \end_layout
  3099. \end_inset
  3100. \end_layout
  3101. \begin_layout Standard
  3102. \begin_inset Flex TODO Note (inline)
  3103. status open
  3104. \begin_layout Plain Layout
  3105. Check up on figure refs in this paragraph
  3106. \end_layout
  3107. \end_inset
  3108. \end_layout
  3109. \begin_layout Standard
  3110. We hypothesized that if naive cells had differentiated into memory cells
  3111. by Day 14, then their patterns of expression and histone modification should
  3112. converge with those of memory cells at Day 14.
  3113. Figure
  3114. \begin_inset CommandInset ref
  3115. LatexCommand ref
  3116. reference "fig:PCoA-promoters"
  3117. plural "false"
  3118. caps "false"
  3119. noprefix "false"
  3120. \end_inset
  3121. shows the patterns of variation in all 3 histone marks in the promoter
  3122. regions of the genome using principal coordinate analysis.
  3123. All 3 marks show a noticeable convergence between the naive and memory
  3124. samples at day 14, visible as an overlapping of the day 14 groups on each
  3125. plot.
  3126. This is consistent with the counts of significantly differentially modified
  3127. promoters and estimates of the total numbers of differentially modified
  3128. promoters shown in Table
  3129. \begin_inset CommandInset ref
  3130. LatexCommand ref
  3131. reference "tab:Number-signif-promoters"
  3132. plural "false"
  3133. caps "false"
  3134. noprefix "false"
  3135. \end_inset
  3136. .
  3137. For all histone marks, evidence of differential modification between naive
  3138. and memory samples was detected at every time point except day 14.
  3139. The day 14 convergence pattern is also present in the RNA-seq data (Figure
  3140. \begin_inset CommandInset ref
  3141. LatexCommand ref
  3142. reference "fig:RNA-PCA-group"
  3143. plural "false"
  3144. caps "false"
  3145. noprefix "false"
  3146. \end_inset
  3147. ), albiet in the 2nd and 3rd principal coordinates, indicating that it is
  3148. not the most dominant pattern driving gene expression.
  3149. Taken together, the data show that promoter histone methylation for these
  3150. 3 histone marks and RNA expression for naive and memory cells are most
  3151. similar at day 14, the furthest time point after activation.
  3152. MOFA was also able to capture this day 14 convergence pattern in latent
  3153. factor 5 (Figure
  3154. \begin_inset CommandInset ref
  3155. LatexCommand ref
  3156. reference "fig:mofa-lf-scatter"
  3157. plural "false"
  3158. caps "false"
  3159. noprefix "false"
  3160. \end_inset
  3161. ), which accounts for shared variation across all 3 histone marks and the
  3162. RNA-seq data, confirming that this convergence is a coordinated pattern
  3163. across all 4 data sets.
  3164. While this observation does not prove that the naive cells have differentiated
  3165. into memory cells at Day 14, it is consistent with that hypothesis.
  3166. \end_layout
  3167. \begin_layout Subsection
  3168. Effect of promoter coverage upstream vs downstream of TSS
  3169. \end_layout
  3170. \begin_layout Standard
  3171. \begin_inset Flex TODO Note (inline)
  3172. status open
  3173. \begin_layout Plain Layout
  3174. There is enough here for multiple sections.
  3175. At least one each for H3K4me2 and H3K27me3.
  3176. \end_layout
  3177. \end_inset
  3178. \end_layout
  3179. \begin_layout Standard
  3180. \begin_inset Flex TODO Note (inline)
  3181. status open
  3182. \begin_layout Plain Layout
  3183. For the figures in this section, the group labels are arbitrary, so if time
  3184. allows, it would be good to manually reorder them in a logical way, e.g.
  3185. most upstream to most downstream.
  3186. If this is done, make sure to update the text with the correct group labels.
  3187. \end_layout
  3188. \end_inset
  3189. \end_layout
  3190. \begin_layout Standard
  3191. \begin_inset ERT
  3192. status open
  3193. \begin_layout Plain Layout
  3194. \backslash
  3195. afterpage{
  3196. \end_layout
  3197. \begin_layout Plain Layout
  3198. \backslash
  3199. begin{landscape}
  3200. \end_layout
  3201. \end_inset
  3202. \end_layout
  3203. \begin_layout Standard
  3204. \begin_inset Float figure
  3205. wide false
  3206. sideways false
  3207. status open
  3208. \begin_layout Plain Layout
  3209. \align center
  3210. \begin_inset Float figure
  3211. wide false
  3212. sideways false
  3213. status open
  3214. \begin_layout Plain Layout
  3215. \align center
  3216. \begin_inset Graphics
  3217. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-clusters-CROP.png
  3218. lyxscale 25
  3219. width 30col%
  3220. groupId covprof-subfig
  3221. \end_inset
  3222. \end_layout
  3223. \begin_layout Plain Layout
  3224. \begin_inset Caption Standard
  3225. \begin_layout Plain Layout
  3226. \series bold
  3227. \begin_inset CommandInset label
  3228. LatexCommand label
  3229. name "fig:H3K4me2-neighborhood-clusters"
  3230. \end_inset
  3231. Average relative coverage for each bin in each cluster
  3232. \end_layout
  3233. \end_inset
  3234. \end_layout
  3235. \end_inset
  3236. \begin_inset space \hfill{}
  3237. \end_inset
  3238. \begin_inset Float figure
  3239. wide false
  3240. sideways false
  3241. status open
  3242. \begin_layout Plain Layout
  3243. \align center
  3244. \begin_inset Graphics
  3245. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-PCA-CROP.png
  3246. lyxscale 25
  3247. width 30col%
  3248. groupId covprof-subfig
  3249. \end_inset
  3250. \end_layout
  3251. \begin_layout Plain Layout
  3252. \begin_inset Caption Standard
  3253. \begin_layout Plain Layout
  3254. \series bold
  3255. \begin_inset CommandInset label
  3256. LatexCommand label
  3257. name "fig:H3K4me2-neighborhood-pca"
  3258. \end_inset
  3259. PCA of relative coverage depth, colored by K-means cluster membership.
  3260. \end_layout
  3261. \end_inset
  3262. \end_layout
  3263. \end_inset
  3264. \begin_inset space \hfill{}
  3265. \end_inset
  3266. \begin_inset Float figure
  3267. wide false
  3268. sideways false
  3269. status open
  3270. \begin_layout Plain Layout
  3271. \align center
  3272. \begin_inset Graphics
  3273. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-expression-CROP.png
  3274. lyxscale 25
  3275. width 30col%
  3276. groupId covprof-subfig
  3277. \end_inset
  3278. \end_layout
  3279. \begin_layout Plain Layout
  3280. \begin_inset Caption Standard
  3281. \begin_layout Plain Layout
  3282. \series bold
  3283. \begin_inset CommandInset label
  3284. LatexCommand label
  3285. name "fig:H3K4me2-neighborhood-expression"
  3286. \end_inset
  3287. Gene expression grouped by promoter coverage clusters.
  3288. \end_layout
  3289. \end_inset
  3290. \end_layout
  3291. \end_inset
  3292. \end_layout
  3293. \begin_layout Plain Layout
  3294. \begin_inset Caption Standard
  3295. \begin_layout Plain Layout
  3296. \series bold
  3297. \begin_inset CommandInset label
  3298. LatexCommand label
  3299. name "fig:H3K4me2-neighborhood"
  3300. \end_inset
  3301. K-means clustering of promoter H3K4me2 relative coverage depth in naive
  3302. day 0 samples.
  3303. \series default
  3304. H3K4me2 ChIP-seq reads were binned into 500-bp windows tiled across each
  3305. promoter from 5
  3306. \begin_inset space ~
  3307. \end_inset
  3308. kbp upstream to 5
  3309. \begin_inset space ~
  3310. \end_inset
  3311. kbp downstream, and the logCPM values were normalized within each promoter
  3312. to an average of 0, yielding relative coverage depths.
  3313. These were then grouped using K-means clustering with
  3314. \begin_inset Formula $K=6$
  3315. \end_inset
  3316. ,
  3317. \series bold
  3318. \series default
  3319. and the average bin values were plotted for each cluster (a).
  3320. The
  3321. \begin_inset Formula $x$
  3322. \end_inset
  3323. -axis is the genomic coordinate of each bin relative to the the transcription
  3324. start site, and the
  3325. \begin_inset Formula $y$
  3326. \end_inset
  3327. -axis is the mean relative coverage depth of that bin across all promoters
  3328. in the cluster.
  3329. Each line represents the average
  3330. \begin_inset Quotes eld
  3331. \end_inset
  3332. shape
  3333. \begin_inset Quotes erd
  3334. \end_inset
  3335. of the promoter coverage for promoters in that cluster.
  3336. PCA was performed on the same data, and the first two principal components
  3337. were plotted, coloring each point by its K-means cluster identity (b).
  3338. For each cluster, the distribution of gene expression values was plotted
  3339. (c).
  3340. \end_layout
  3341. \end_inset
  3342. \end_layout
  3343. \end_inset
  3344. \end_layout
  3345. \begin_layout Standard
  3346. \begin_inset ERT
  3347. status open
  3348. \begin_layout Plain Layout
  3349. \backslash
  3350. end{landscape}
  3351. \end_layout
  3352. \begin_layout Plain Layout
  3353. }
  3354. \end_layout
  3355. \end_inset
  3356. \end_layout
  3357. \begin_layout Standard
  3358. To test whether the position of a histone mark relative to a gene's transcriptio
  3359. n start site (TSS) was important, we looked at the
  3360. \begin_inset Quotes eld
  3361. \end_inset
  3362. landscape
  3363. \begin_inset Quotes erd
  3364. \end_inset
  3365. of ChIP-seq read coverage in naive Day 0 samples within 5 kb of each gene's
  3366. TSS by binning reads into 500-bp windows tiled across each promoter LogCPM
  3367. values were calculated for the bins in each promoter and then the average
  3368. logCPM for each promoter's bins was normalized to zero, such that the values
  3369. represent coverage relative to other regions of the same promoter rather
  3370. than being proportional to absolute read count.
  3371. The promoters were then clustered based on the normalized bin abundances
  3372. using
  3373. \begin_inset Formula $k$
  3374. \end_inset
  3375. -means clustering with
  3376. \begin_inset Formula $K=6$
  3377. \end_inset
  3378. .
  3379. Different values of
  3380. \begin_inset Formula $K$
  3381. \end_inset
  3382. were also tested, but did not substantially change the interpretation of
  3383. the data.
  3384. \end_layout
  3385. \begin_layout Standard
  3386. For H3K4me2, plotting the average bin abundances for each cluster reveals
  3387. a simple pattern (Figure
  3388. \begin_inset CommandInset ref
  3389. LatexCommand ref
  3390. reference "fig:H3K4me2-neighborhood-clusters"
  3391. plural "false"
  3392. caps "false"
  3393. noprefix "false"
  3394. \end_inset
  3395. ): Cluster 5 represents a completely flat promoter coverage profile, likely
  3396. consisting of genes with no H3K4me2 methylation in the promoter.
  3397. All the other clusters represent a continuum of peak positions relative
  3398. to the TSS.
  3399. In order from must upstream to most downstream, they are Clusters 6, 4,
  3400. 3, 1, and 2.
  3401. There do not appear to be any clusters representing coverage patterns other
  3402. than lone peaks, such as coverage troughs or double peaks.
  3403. Next, all promoters were plotted in a PCA plot based on the same relative
  3404. bin abundance data, and colored based on cluster membership (Figure
  3405. \begin_inset CommandInset ref
  3406. LatexCommand ref
  3407. reference "fig:H3K4me2-neighborhood-pca"
  3408. plural "false"
  3409. caps "false"
  3410. noprefix "false"
  3411. \end_inset
  3412. ).
  3413. The PCA plot shows Cluster 5 (the
  3414. \begin_inset Quotes eld
  3415. \end_inset
  3416. no peak
  3417. \begin_inset Quotes erd
  3418. \end_inset
  3419. cluster) at the center, with the other clusters arranged in a counter-clockwise
  3420. arc around it in the order noted above, from most upstream peak to most
  3421. downstream.
  3422. Notably, the
  3423. \begin_inset Quotes eld
  3424. \end_inset
  3425. clusters
  3426. \begin_inset Quotes erd
  3427. \end_inset
  3428. form a single large
  3429. \begin_inset Quotes eld
  3430. \end_inset
  3431. cloud
  3432. \begin_inset Quotes erd
  3433. \end_inset
  3434. with no apparent separation between them, further supporting the conclusion
  3435. that these clusters represent an arbitrary partitioning of a continuous
  3436. distribution of promoter coverage landscapes.
  3437. While the clusters are a useful abstraction that aids in visualization,
  3438. they are ultimately not an accurate representation of the data.
  3439. A better representation might be something like a polar coordinate system
  3440. with the origin at the center of Cluster 5, where the radius represents
  3441. the peak height above the background and the angle represents the peak's
  3442. position upstream or downstream of the TSS.
  3443. The continuous nature of the distribution also explains why different values
  3444. of
  3445. \begin_inset Formula $K$
  3446. \end_inset
  3447. led to similar conclusions.
  3448. \end_layout
  3449. \begin_layout Standard
  3450. \begin_inset Flex TODO Note (inline)
  3451. status open
  3452. \begin_layout Plain Layout
  3453. RNA-seq values in the plots use logCPM but should really use logFPKM or
  3454. logTPM.
  3455. \end_layout
  3456. \end_inset
  3457. \end_layout
  3458. \begin_layout Standard
  3459. \begin_inset Flex TODO Note (inline)
  3460. status open
  3461. \begin_layout Plain Layout
  3462. Should have a table of p-values on difference of means between Cluster 5
  3463. and the others.
  3464. \end_layout
  3465. \end_inset
  3466. \end_layout
  3467. \begin_layout Standard
  3468. To investigate the association between relative peak position and gene expressio
  3469. n, we plotted the Naive Day 0 expression for the genes in each cluster (Figure
  3470. \begin_inset CommandInset ref
  3471. LatexCommand ref
  3472. reference "fig:H3K4me2-neighborhood-expression"
  3473. plural "false"
  3474. caps "false"
  3475. noprefix "false"
  3476. \end_inset
  3477. ).
  3478. Most genes in Cluster 5, the
  3479. \begin_inset Quotes eld
  3480. \end_inset
  3481. no peak
  3482. \begin_inset Quotes erd
  3483. \end_inset
  3484. cluster, have low expression values.
  3485. Taking this as the
  3486. \begin_inset Quotes eld
  3487. \end_inset
  3488. baseline
  3489. \begin_inset Quotes erd
  3490. \end_inset
  3491. distribution when no H3K4me2 methylation is present, we can compare the
  3492. other clusters' distributions to determine which peak positions are associated
  3493. with elevated expression.
  3494. As might be expected, the 3 clusters representing peaks closest to the
  3495. TSS, Clusters 1, 3, and 4, show the highest average expression distributions.
  3496. Specifically, these clusters all have their highest ChIP-seq abundance
  3497. within 1kb of the TSS, consistent with the previously determined promoter
  3498. radius.
  3499. In contrast, cluster 6, which represents peaks several kb upstream of the
  3500. TSS, shows a slightly higher average expression than baseline, while Cluster
  3501. 2, which represents peaks several kb downstream, doesn't appear to show
  3502. any appreciable difference.
  3503. Interestingly, the cluster with the highest average expression is Cluster
  3504. 1, which represents peaks about 1 kb downstream of the TSS, rather than
  3505. Cluster 3, which represents peaks centered directly at the TSS.
  3506. This suggests that conceptualizing the promoter as a region centered on
  3507. the TSS with a certain
  3508. \begin_inset Quotes eld
  3509. \end_inset
  3510. radius
  3511. \begin_inset Quotes erd
  3512. \end_inset
  3513. may be an oversimplification – a peak that is a specific distance from
  3514. the TSS may have a different degree of influence depending on whether it
  3515. is upstream or downstream of the TSS.
  3516. \end_layout
  3517. \begin_layout Standard
  3518. \begin_inset ERT
  3519. status open
  3520. \begin_layout Plain Layout
  3521. \backslash
  3522. afterpage{
  3523. \end_layout
  3524. \begin_layout Plain Layout
  3525. \backslash
  3526. begin{landscape}
  3527. \end_layout
  3528. \end_inset
  3529. \end_layout
  3530. \begin_layout Standard
  3531. \begin_inset Float figure
  3532. wide false
  3533. sideways false
  3534. status open
  3535. \begin_layout Plain Layout
  3536. \align center
  3537. \begin_inset Float figure
  3538. wide false
  3539. sideways false
  3540. status open
  3541. \begin_layout Plain Layout
  3542. \align center
  3543. \begin_inset Graphics
  3544. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-clusters-CROP.png
  3545. lyxscale 25
  3546. width 30col%
  3547. groupId covprof-subfig
  3548. \end_inset
  3549. \end_layout
  3550. \begin_layout Plain Layout
  3551. \begin_inset Caption Standard
  3552. \begin_layout Plain Layout
  3553. \series bold
  3554. \begin_inset CommandInset label
  3555. LatexCommand label
  3556. name "fig:H3K4me3-neighborhood-clusters"
  3557. \end_inset
  3558. Average relative coverage for each bin in each cluster
  3559. \end_layout
  3560. \end_inset
  3561. \end_layout
  3562. \end_inset
  3563. \begin_inset space \hfill{}
  3564. \end_inset
  3565. \begin_inset Float figure
  3566. wide false
  3567. sideways false
  3568. status open
  3569. \begin_layout Plain Layout
  3570. \align center
  3571. \begin_inset Graphics
  3572. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-PCA-CROP.png
  3573. lyxscale 25
  3574. width 30col%
  3575. groupId covprof-subfig
  3576. \end_inset
  3577. \end_layout
  3578. \begin_layout Plain Layout
  3579. \begin_inset Caption Standard
  3580. \begin_layout Plain Layout
  3581. \series bold
  3582. \begin_inset CommandInset label
  3583. LatexCommand label
  3584. name "fig:H3K4me3-neighborhood-pca"
  3585. \end_inset
  3586. PCA of relative coverage depth, colored by K-means cluster membership.
  3587. \end_layout
  3588. \end_inset
  3589. \end_layout
  3590. \end_inset
  3591. \begin_inset space \hfill{}
  3592. \end_inset
  3593. \begin_inset Float figure
  3594. wide false
  3595. sideways false
  3596. status open
  3597. \begin_layout Plain Layout
  3598. \align center
  3599. \begin_inset Graphics
  3600. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-expression-CROP.png
  3601. lyxscale 25
  3602. width 30col%
  3603. groupId covprof-subfig
  3604. \end_inset
  3605. \end_layout
  3606. \begin_layout Plain Layout
  3607. \begin_inset Caption Standard
  3608. \begin_layout Plain Layout
  3609. \series bold
  3610. \begin_inset CommandInset label
  3611. LatexCommand label
  3612. name "fig:H3K4me3-neighborhood-expression"
  3613. \end_inset
  3614. Gene expression grouped by promoter coverage clusters.
  3615. \end_layout
  3616. \end_inset
  3617. \end_layout
  3618. \end_inset
  3619. \end_layout
  3620. \begin_layout Plain Layout
  3621. \begin_inset Caption Standard
  3622. \begin_layout Plain Layout
  3623. \series bold
  3624. \begin_inset CommandInset label
  3625. LatexCommand label
  3626. name "fig:H3K4me3-neighborhood"
  3627. \end_inset
  3628. K-means clustering of promoter H3K4me3 relative coverage depth in naive
  3629. day 0 samples.
  3630. \series default
  3631. H3K4me2 ChIP-seq reads were binned into 500-bp windows tiled across each
  3632. promoter from 5
  3633. \begin_inset space ~
  3634. \end_inset
  3635. kbp upstream to 5
  3636. \begin_inset space ~
  3637. \end_inset
  3638. kbp downstream, and the logCPM values were normalized within each promoter
  3639. to an average of 0, yielding relative coverage depths.
  3640. These were then grouped using K-means clustering with
  3641. \begin_inset Formula $K=6$
  3642. \end_inset
  3643. ,
  3644. \series bold
  3645. \series default
  3646. and the average bin values were plotted for each cluster (a).
  3647. The
  3648. \begin_inset Formula $x$
  3649. \end_inset
  3650. -axis is the genomic coordinate of each bin relative to the the transcription
  3651. start site, and the
  3652. \begin_inset Formula $y$
  3653. \end_inset
  3654. -axis is the mean relative coverage depth of that bin across all promoters
  3655. in the cluster.
  3656. Each line represents the average
  3657. \begin_inset Quotes eld
  3658. \end_inset
  3659. shape
  3660. \begin_inset Quotes erd
  3661. \end_inset
  3662. of the promoter coverage for promoters in that cluster.
  3663. PCA was performed on the same data, and the first two principal components
  3664. were plotted, coloring each point by its K-means cluster identity (b).
  3665. For each cluster, the distribution of gene expression values was plotted
  3666. (c).
  3667. \end_layout
  3668. \end_inset
  3669. \end_layout
  3670. \end_inset
  3671. \end_layout
  3672. \begin_layout Standard
  3673. \begin_inset ERT
  3674. status open
  3675. \begin_layout Plain Layout
  3676. \backslash
  3677. end{landscape}
  3678. \end_layout
  3679. \begin_layout Plain Layout
  3680. }
  3681. \end_layout
  3682. \end_inset
  3683. \end_layout
  3684. \begin_layout Standard
  3685. \begin_inset Flex TODO Note (inline)
  3686. status open
  3687. \begin_layout Plain Layout
  3688. Is there more to say here?
  3689. \end_layout
  3690. \end_inset
  3691. \end_layout
  3692. \begin_layout Standard
  3693. All observations described above for H3K4me2 ChIP-seq also appear to hold
  3694. for H3K4me3 as well (Figure
  3695. \begin_inset CommandInset ref
  3696. LatexCommand ref
  3697. reference "fig:H3K4me3-neighborhood"
  3698. plural "false"
  3699. caps "false"
  3700. noprefix "false"
  3701. \end_inset
  3702. ).
  3703. \end_layout
  3704. \begin_layout Subsection
  3705. Promoter coverage H3K27me3
  3706. \end_layout
  3707. \begin_layout Standard
  3708. \begin_inset ERT
  3709. status open
  3710. \begin_layout Plain Layout
  3711. \backslash
  3712. afterpage{
  3713. \end_layout
  3714. \begin_layout Plain Layout
  3715. \backslash
  3716. begin{landscape}
  3717. \end_layout
  3718. \end_inset
  3719. \end_layout
  3720. \begin_layout Standard
  3721. \begin_inset Float figure
  3722. wide false
  3723. sideways false
  3724. status collapsed
  3725. \begin_layout Plain Layout
  3726. \align center
  3727. \begin_inset Float figure
  3728. wide false
  3729. sideways false
  3730. status collapsed
  3731. \begin_layout Plain Layout
  3732. \align center
  3733. \begin_inset Graphics
  3734. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-clusters-CROP.png
  3735. lyxscale 25
  3736. width 30col%
  3737. groupId covprof-subfig
  3738. \end_inset
  3739. \end_layout
  3740. \begin_layout Plain Layout
  3741. \begin_inset Caption Standard
  3742. \begin_layout Plain Layout
  3743. \series bold
  3744. \begin_inset CommandInset label
  3745. LatexCommand label
  3746. name "fig:H3K27me3-neighborhood-clusters"
  3747. \end_inset
  3748. Average relative coverage for each bin in each cluster
  3749. \end_layout
  3750. \end_inset
  3751. \end_layout
  3752. \end_inset
  3753. \begin_inset space \hfill{}
  3754. \end_inset
  3755. \begin_inset Float figure
  3756. wide false
  3757. sideways false
  3758. status collapsed
  3759. \begin_layout Plain Layout
  3760. \align center
  3761. \begin_inset Graphics
  3762. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-PCA-CROP.png
  3763. lyxscale 25
  3764. width 30col%
  3765. groupId covprof-subfig
  3766. \end_inset
  3767. \end_layout
  3768. \begin_layout Plain Layout
  3769. \begin_inset Caption Standard
  3770. \begin_layout Plain Layout
  3771. \series bold
  3772. \begin_inset CommandInset label
  3773. LatexCommand label
  3774. name "fig:H3K27me3-neighborhood-pca"
  3775. \end_inset
  3776. PCA of relative coverage depth, colored by K-means cluster membership.
  3777. \end_layout
  3778. \end_inset
  3779. \end_layout
  3780. \end_inset
  3781. \begin_inset space \hfill{}
  3782. \end_inset
  3783. \begin_inset Float figure
  3784. wide false
  3785. sideways false
  3786. status collapsed
  3787. \begin_layout Plain Layout
  3788. \align center
  3789. \begin_inset Graphics
  3790. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-expression-CROP.png
  3791. lyxscale 25
  3792. width 30col%
  3793. groupId covprof-subfig
  3794. \end_inset
  3795. \end_layout
  3796. \begin_layout Plain Layout
  3797. \begin_inset Caption Standard
  3798. \begin_layout Plain Layout
  3799. \series bold
  3800. \begin_inset CommandInset label
  3801. LatexCommand label
  3802. name "fig:H3K27me3-neighborhood-expression"
  3803. \end_inset
  3804. Gene expression grouped by promoter coverage clusters.
  3805. \end_layout
  3806. \end_inset
  3807. \end_layout
  3808. \end_inset
  3809. \end_layout
  3810. \begin_layout Plain Layout
  3811. \begin_inset Caption Standard
  3812. \begin_layout Plain Layout
  3813. \series bold
  3814. \begin_inset CommandInset label
  3815. LatexCommand label
  3816. name "fig:H3K27me3-neighborhood"
  3817. \end_inset
  3818. K-means clustering of promoter H3K27me3 relative coverage depth in naive
  3819. day 0 samples.
  3820. \series default
  3821. H3K27me3 ChIP-seq reads were binned into 500-bp windows tiled across each
  3822. promoter from 5
  3823. \begin_inset space ~
  3824. \end_inset
  3825. kbp upstream to 5
  3826. \begin_inset space ~
  3827. \end_inset
  3828. kbp downstream, and the logCPM values were normalized within each promoter
  3829. to an average of 0, yielding relative coverage depths.
  3830. These were then grouped using K-means clustering with
  3831. \begin_inset Formula $K=6$
  3832. \end_inset
  3833. ,
  3834. \series bold
  3835. \series default
  3836. and the average bin values were plotted for each cluster (a).
  3837. The
  3838. \begin_inset Formula $x$
  3839. \end_inset
  3840. -axis is the genomic coordinate of each bin relative to the the transcription
  3841. start site, and the
  3842. \begin_inset Formula $y$
  3843. \end_inset
  3844. -axis is the mean relative coverage depth of that bin across all promoters
  3845. in the cluster.
  3846. Each line represents the average
  3847. \begin_inset Quotes eld
  3848. \end_inset
  3849. shape
  3850. \begin_inset Quotes erd
  3851. \end_inset
  3852. of the promoter coverage for promoters in that cluster.
  3853. PCA was performed on the same data, and the first two principal components
  3854. were plotted, coloring each point by its K-means cluster identity (b).
  3855. For each cluster, the distribution of gene expression values was plotted
  3856. (c).
  3857. \end_layout
  3858. \end_inset
  3859. \end_layout
  3860. \end_inset
  3861. \end_layout
  3862. \begin_layout Standard
  3863. \begin_inset ERT
  3864. status open
  3865. \begin_layout Plain Layout
  3866. \backslash
  3867. end{landscape}
  3868. \end_layout
  3869. \begin_layout Plain Layout
  3870. }
  3871. \end_layout
  3872. \end_inset
  3873. \end_layout
  3874. \begin_layout Itemize
  3875. H3K4me peaks seem to correlate with increased expression as long as they
  3876. are anywhere near the TSS
  3877. \end_layout
  3878. \begin_layout Itemize
  3879. H3K27me3 peaks can have different correlations to gene expression depending
  3880. on their position relative to TSS (e.g.
  3881. upstream vs downstream) Results consistent with
  3882. \begin_inset CommandInset citation
  3883. LatexCommand cite
  3884. key "Young2011"
  3885. literal "false"
  3886. \end_inset
  3887. \end_layout
  3888. \begin_layout Standard
  3889. \begin_inset Flex TODO Note (inline)
  3890. status open
  3891. \begin_layout Plain Layout
  3892. Show the figures where the negative result ended this line of inquiry
  3893. \end_layout
  3894. \end_inset
  3895. \end_layout
  3896. \begin_layout Section
  3897. Discussion
  3898. \end_layout
  3899. \begin_layout Subsection
  3900. Effective promoter radius
  3901. \end_layout
  3902. \begin_layout Itemize
  3903. "Promoter radius" is not constant and must be defined empirically for a
  3904. given data set.
  3905. Coverage within promoter radius has an expression correlation as well
  3906. \end_layout
  3907. \begin_layout Itemize
  3908. Further study required to demonstarte functional consequences of effective
  3909. promoter radius (e.g.
  3910. show diminished association with gene expression outside radius)
  3911. \end_layout
  3912. \begin_layout Subsection
  3913. Convergence
  3914. \end_layout
  3915. \begin_layout Standard
  3916. \begin_inset Flex TODO Note (inline)
  3917. status open
  3918. \begin_layout Plain Layout
  3919. Look up some more references for these histone marks being involved in memory
  3920. differentiation.
  3921. (Ask Sarah)
  3922. \end_layout
  3923. \end_inset
  3924. \end_layout
  3925. \begin_layout Itemize
  3926. Naive-to-memory convergence implies that naive cells are differentiating
  3927. into memory cells, and that gene expression and H3K4/K27 methylation are
  3928. involved in this differentiation
  3929. \end_layout
  3930. \begin_deeper
  3931. \begin_layout Itemize
  3932. Convergence is consistent with Lamere2016 fig 8
  3933. \begin_inset CommandInset citation
  3934. LatexCommand cite
  3935. key "LaMere2016"
  3936. literal "false"
  3937. \end_inset
  3938. (which was created without the benefit of SVA)
  3939. \end_layout
  3940. \begin_layout Itemize
  3941. H3K27me3, canonically regarded as a deactivating mark, seems to have a more
  3942. complex effect
  3943. \end_layout
  3944. \end_deeper
  3945. \begin_layout Standard
  3946. \begin_inset Float figure
  3947. wide false
  3948. sideways false
  3949. status open
  3950. \begin_layout Plain Layout
  3951. \begin_inset Flex TODO Note (inline)
  3952. status open
  3953. \begin_layout Plain Layout
  3954. This float should ideally go right after the section header, but doing so
  3955. crashes LaTeX.
  3956. \end_layout
  3957. \end_inset
  3958. \end_layout
  3959. \begin_layout Plain Layout
  3960. \align center
  3961. \begin_inset Graphics
  3962. filename graphics/CD4-csaw/LaMere2016_fig8.pdf
  3963. lyxscale 50
  3964. width 60col%
  3965. groupId colwidth
  3966. \end_inset
  3967. \end_layout
  3968. \begin_layout Plain Layout
  3969. \begin_inset Caption Standard
  3970. \begin_layout Plain Layout
  3971. \series bold
  3972. \begin_inset CommandInset label
  3973. LatexCommand label
  3974. name "fig:Lamere2016-Fig8"
  3975. \end_inset
  3976. Lamere 2016 Figure 8
  3977. \begin_inset CommandInset citation
  3978. LatexCommand cite
  3979. key "LaMere2016"
  3980. literal "false"
  3981. \end_inset
  3982. .
  3983. \series default
  3984. Reproduced with permission.
  3985. \end_layout
  3986. \end_inset
  3987. \end_layout
  3988. \end_inset
  3989. \end_layout
  3990. \begin_layout Subsection
  3991. Positional
  3992. \end_layout
  3993. \begin_layout Itemize
  3994. TSS positional coverage, hints of something interesting but no clear conclusions
  3995. \end_layout
  3996. \begin_layout Subsection
  3997. Workflow
  3998. \end_layout
  3999. \begin_layout Standard
  4000. \begin_inset ERT
  4001. status open
  4002. \begin_layout Plain Layout
  4003. \backslash
  4004. afterpage{
  4005. \end_layout
  4006. \begin_layout Plain Layout
  4007. \backslash
  4008. begin{landscape}
  4009. \end_layout
  4010. \end_inset
  4011. \end_layout
  4012. \begin_layout Standard
  4013. \begin_inset Float figure
  4014. wide false
  4015. sideways false
  4016. status open
  4017. \begin_layout Plain Layout
  4018. \align center
  4019. \begin_inset Graphics
  4020. filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
  4021. lyxscale 50
  4022. width 100col%
  4023. height 95theight%
  4024. \end_inset
  4025. \end_layout
  4026. \begin_layout Plain Layout
  4027. \begin_inset Caption Standard
  4028. \begin_layout Plain Layout
  4029. \begin_inset CommandInset label
  4030. LatexCommand label
  4031. name "fig:rulegraph"
  4032. \end_inset
  4033. \series bold
  4034. Dependency graph of steps in reproducible workflow
  4035. \end_layout
  4036. \end_inset
  4037. \end_layout
  4038. \end_inset
  4039. \end_layout
  4040. \begin_layout Standard
  4041. \begin_inset ERT
  4042. status open
  4043. \begin_layout Plain Layout
  4044. \backslash
  4045. end{landscape}
  4046. \end_layout
  4047. \begin_layout Plain Layout
  4048. }
  4049. \end_layout
  4050. \end_inset
  4051. \end_layout
  4052. \begin_layout Itemize
  4053. Discuss advantages of developing using a reproducible workflow
  4054. \end_layout
  4055. \begin_deeper
  4056. \begin_layout Itemize
  4057. Decision-making based on trying every option and running the workflow downstream
  4058. to see the effects
  4059. \end_layout
  4060. \end_deeper
  4061. \begin_layout Subsection
  4062. Data quality issues limit conclusions
  4063. \end_layout
  4064. \begin_layout Chapter
  4065. Improving array-based diagnostics for transplant rejection by optimizing
  4066. data preprocessing
  4067. \end_layout
  4068. \begin_layout Standard
  4069. \begin_inset Note Note
  4070. status open
  4071. \begin_layout Plain Layout
  4072. Chapter author list: Me, Sunil, Tom, Padma, Dan
  4073. \end_layout
  4074. \end_inset
  4075. \end_layout
  4076. \begin_layout Section
  4077. Approach
  4078. \end_layout
  4079. \begin_layout Subsection
  4080. Proper pre-processing is essential for array data
  4081. \end_layout
  4082. \begin_layout Standard
  4083. \begin_inset Flex TODO Note (inline)
  4084. status open
  4085. \begin_layout Plain Layout
  4086. This section could probably use some citations
  4087. \end_layout
  4088. \end_inset
  4089. \end_layout
  4090. \begin_layout Standard
  4091. Microarrays, bead arrays, and similar assays produce raw data in the form
  4092. of fluorescence intensity measurements, with the each intensity measurement
  4093. proportional to the abundance of some fluorescently-labelled target DNA
  4094. or RNA sequence that base pairs to a specific probe sequence.
  4095. However, these measurements for each probe are also affected my many technical
  4096. confounding factors, such as the concentration of target material, strength
  4097. of off-target binding, and the sensitivity of the imaging sensor.
  4098. Some array designs also use multiple probe sequences for each target.
  4099. Hence, extensive pre-processing of array data is necessary to normalize
  4100. out the effects of these technical factors and summarize the information
  4101. from multiple probes to arrive at a single usable estimate of abundance
  4102. or other relevant quantity, such as a ratio of two abundances, for each
  4103. target.
  4104. \end_layout
  4105. \begin_layout Standard
  4106. The choice of pre-processing algorithms used in the analysis of an array
  4107. data set can have a large effect on the results of that analysis.
  4108. However, despite their importance, these steps are often neglected or rushed
  4109. in order to get to the more scientifically interesting analysis steps involving
  4110. the actual biology of the system under study.
  4111. Hence, it is often possible to achieve substantial gains in statistical
  4112. power, model goodness-of-fit, or other relevant performance measures, by
  4113. checking the assumptions made by each preprocessing step and choosing specific
  4114. normalization methods tailored to the specific goals of the current analysis.
  4115. \end_layout
  4116. \begin_layout Subsection
  4117. Clinical diagnostic applications for microarrays require single-channel
  4118. normalization
  4119. \end_layout
  4120. \begin_layout Standard
  4121. As the cost of performing microarray assays falls, there is increasing interest
  4122. in using genomic assays for diagnostic purposes, such as distinguishing
  4123. healthy transplants (TX) from transplants undergoing acute rejection (AR)
  4124. or acute dysfunction with no rejection (ADNR).
  4125. However, the the standard normalization algorithm used for microarray data,
  4126. Robust Multi-chip Average (RMA)
  4127. \begin_inset CommandInset citation
  4128. LatexCommand cite
  4129. key "Irizarry2003a"
  4130. literal "false"
  4131. \end_inset
  4132. , is not applicable in a clinical setting.
  4133. Two of the steps in RMA, quantile normalization and probe summarization
  4134. by median polish, depend on every array in the data set being normalized.
  4135. This means that adding or removing any arrays from a data set changes the
  4136. normalized values for all arrays, and data sets that have been normalized
  4137. separately cannot be compared to each other.
  4138. Hence, when using RMA, any arrays to be analyzed together must also be
  4139. normalized together, and the set of arrays included in the data set must
  4140. be held constant throughout an analysis.
  4141. \end_layout
  4142. \begin_layout Standard
  4143. These limitations present serious impediments to the use of arrays as a
  4144. diagnostic tool.
  4145. When training a classifier, the samples to be classified must not be involved
  4146. in any step of the training process, lest their inclusion bias the training
  4147. process.
  4148. Once a classifier is deployed in a clinical setting, the samples to be
  4149. classified will not even
  4150. \emph on
  4151. exist
  4152. \emph default
  4153. at the time of training, so including them would be impossible even if
  4154. it were statistically justifiable.
  4155. Therefore, any machine learning application for microarrays demands that
  4156. the normalized expression values computed for an array must depend only
  4157. on information contained within that array.
  4158. This would ensure that each array's normalization is independent of every
  4159. other array, and that arrays normalized separately can still be compared
  4160. to each other without bias.
  4161. Such a normalization is commonly referred to as
  4162. \begin_inset Quotes eld
  4163. \end_inset
  4164. single-channel normalization
  4165. \begin_inset Quotes erd
  4166. \end_inset
  4167. .
  4168. \end_layout
  4169. \begin_layout Standard
  4170. Frozen RMA (fRMA) addresses these concerns by replacing the quantile normalizati
  4171. on and median polish with alternatives that do not introduce inter-array
  4172. dependence, allowing each array to be normalized independently of all others
  4173. \begin_inset CommandInset citation
  4174. LatexCommand cite
  4175. key "McCall2010"
  4176. literal "false"
  4177. \end_inset
  4178. .
  4179. Quantile normalization is performed against a pre-generated set of quantiles
  4180. learned from a collection of 850 publically available arrays sampled from
  4181. a wide variety of tissues in the Gene Expression Omnibus (GEO).
  4182. Each array's probe intensity distribution is normalized against these pre-gener
  4183. ated quantiles.
  4184. The median polish step is replaced with a robust weighted average of probe
  4185. intensities, using inverse variance weights learned from the same public
  4186. GEO data.
  4187. The result is a normalization that satisfies the requirements mentioned
  4188. above: each array is normalized independently of all others, and any two
  4189. normalized arrays can be compared directly to each other.
  4190. \end_layout
  4191. \begin_layout Standard
  4192. One important limitation of fRMA is that it requires a separate reference
  4193. data set from which to learn the parameters (reference quantiles and probe
  4194. weights) that will be used to normalize each array.
  4195. These parameters are specific to a given array platform, and pre-generated
  4196. parameters are only provided for the most common platforms, such as Affymetrix
  4197. hgu133plus2.
  4198. For a less common platform, such as hthgu133pluspm, is is necessary to
  4199. learn custom parameters from in-house data before fRMA can be used to normalize
  4200. samples on that platform
  4201. \begin_inset CommandInset citation
  4202. LatexCommand cite
  4203. key "McCall2011"
  4204. literal "false"
  4205. \end_inset
  4206. .
  4207. \end_layout
  4208. \begin_layout Standard
  4209. One other option is the aptly-named Single Channel Array Normalization (SCAN),
  4210. which adapts a normalization method originally designed for tiling arrays
  4211. \begin_inset CommandInset citation
  4212. LatexCommand cite
  4213. key "Piccolo2012"
  4214. literal "false"
  4215. \end_inset
  4216. .
  4217. SCAN is truly single-channel in that it does not require a set of normalization
  4218. paramters estimated from an external set of reference samples like fRMA
  4219. does.
  4220. \end_layout
  4221. \begin_layout Subsection
  4222. Heteroskedasticity must be accounted for in methylation array data
  4223. \end_layout
  4224. \begin_layout Standard
  4225. DNA methylation arrays are a relatively new kind of assay that uses microarrays
  4226. to measure the degree of methylation on cytosines in specific regions arrayed
  4227. across the genome.
  4228. First, bisulfite treatment converts all unmethylated cytosines to uracil
  4229. (which then become thymine after amplication) while leaving methylated
  4230. cytosines unaffected.
  4231. Then, each target region is interrogated with two probes: one binds to
  4232. the original genomic sequence and interrogates the level of methylated
  4233. DNA, and the other binds to the same sequence with all cytosines replaced
  4234. by thymidines and interrogates the level of unmethylated DNA.
  4235. \end_layout
  4236. \begin_layout Standard
  4237. \begin_inset Float figure
  4238. wide false
  4239. sideways false
  4240. status collapsed
  4241. \begin_layout Plain Layout
  4242. \align center
  4243. \begin_inset Graphics
  4244. filename graphics/methylvoom/sigmoid.pdf
  4245. lyxscale 50
  4246. width 60col%
  4247. groupId colwidth
  4248. \end_inset
  4249. \end_layout
  4250. \begin_layout Plain Layout
  4251. \begin_inset Caption Standard
  4252. \begin_layout Plain Layout
  4253. \begin_inset CommandInset label
  4254. LatexCommand label
  4255. name "fig:Sigmoid-beta-m-mapping"
  4256. \end_inset
  4257. \series bold
  4258. Sigmoid shape of the mapping between β and M values
  4259. \end_layout
  4260. \end_inset
  4261. \end_layout
  4262. \end_inset
  4263. \end_layout
  4264. \begin_layout Standard
  4265. After normalization, these two probe intensities are summarized in one of
  4266. two ways, each with advantages and disadvantages.
  4267. β
  4268. \series bold
  4269. \series default
  4270. values, interpreted as fraction of DNA copies methylated, range from 0 to
  4271. 1.
  4272. β
  4273. \series bold
  4274. \series default
  4275. values are conceptually easy to interpret, but the constrained range makes
  4276. them unsuitable for linear modeling, and their error distributions are
  4277. highly non-normal, which also frustrates linear modeling.
  4278. M-values, interpreted as the log ratio of methylated to unmethylated copies,
  4279. are computed by mapping the beta values from
  4280. \begin_inset Formula $[0,1]$
  4281. \end_inset
  4282. onto
  4283. \begin_inset Formula $(-\infty,+\infty)$
  4284. \end_inset
  4285. using a sigmoid curve (Figure
  4286. \begin_inset CommandInset ref
  4287. LatexCommand ref
  4288. reference "fig:Sigmoid-beta-m-mapping"
  4289. plural "false"
  4290. caps "false"
  4291. noprefix "false"
  4292. \end_inset
  4293. ).
  4294. This transformation results in values with better statistical perperties:
  4295. the unconstrained range is suitable for linear modeling, and the error
  4296. distributions are more normal.
  4297. Hence, most linear modeling and other statistical testing on methylation
  4298. arrays is performed using M-values.
  4299. \end_layout
  4300. \begin_layout Standard
  4301. However, the steep slope of the sigmoid transformation near 0 and 1 tends
  4302. to over-exaggerate small differences in β values near those extremes, which
  4303. in turn amplifies the error in those values, leading to a U-shaped trend
  4304. in the mean-variance curve: extreme values have higher variances than values
  4305. near the middle.
  4306. This mean-variance dependency must be accounted for when fitting the linear
  4307. model for differential methylation, or else the variance will be systematically
  4308. overestimated for probes with moderate M-values and underestimated for
  4309. probes with extreme M-values.
  4310. This is particularly undesirable for methylation data because the intermediate
  4311. M-values are the ones of most interest, since they are more likely to represent
  4312. areas of varying methylation, whereas extreme M-values typically represent
  4313. complete methylation or complete lack of methylation.
  4314. \end_layout
  4315. \begin_layout Standard
  4316. RNA-seq read count data are also known to show heteroskedasticity, and the
  4317. voom method was introduced for modeling this heteroskedasticity by estimating
  4318. the mean-variance trend in the data and using this trend to assign precision
  4319. weights to each observation
  4320. \begin_inset CommandInset citation
  4321. LatexCommand cite
  4322. key "Law2013"
  4323. literal "false"
  4324. \end_inset
  4325. .
  4326. While methylation array data are not derived from counts and have a very
  4327. different mean-variance relationship from that of typical RNA-seq data,
  4328. the voom method makes no specific assumptions on the shape of the mean-variance
  4329. relationship – it only assumes that the relationship can be modeled as
  4330. a smooth curve.
  4331. Hence, the method is sufficiently general to model the mean-variance relationsh
  4332. ip in methylation array data.
  4333. However, the standard implementation of voom assumes that the input is
  4334. given in raw read counts, and it must be adapted to run on methylation
  4335. M-values.
  4336. \end_layout
  4337. \begin_layout Section
  4338. Methods
  4339. \end_layout
  4340. \begin_layout Subsection
  4341. Evaluation of classifier performance with different normalization methods
  4342. \end_layout
  4343. \begin_layout Standard
  4344. For testing different expression microarray normalizations, a data set of
  4345. 157 hgu133plus2 arrays was used, consisting of blood samples from kidney
  4346. transplant patients whose grafts had been graded as TX, AR, or ADNR via
  4347. biopsy and histology (46 TX, 69 AR, 42 ADNR)
  4348. \begin_inset CommandInset citation
  4349. LatexCommand cite
  4350. key "Kurian2014"
  4351. literal "true"
  4352. \end_inset
  4353. .
  4354. Additionally, an external validation set of 75 samples was gathered from
  4355. public GEO data (37 TX, 38 AR, no ADNR).
  4356. \end_layout
  4357. \begin_layout Standard
  4358. \begin_inset Flex TODO Note (inline)
  4359. status open
  4360. \begin_layout Plain Layout
  4361. Find appropriate GEO identifiers if possible.
  4362. Kurian 2014 says GSE15296, but this seems to be different data.
  4363. I also need to look up the GEO accession for the external validation set.
  4364. \end_layout
  4365. \end_inset
  4366. \end_layout
  4367. \begin_layout Standard
  4368. To evaluate the effect of each normalization on classifier performance,
  4369. the same classifier training and validation procedure was used after each
  4370. normalization method.
  4371. The PAM package was used to train a nearest shrunken centroid classifier
  4372. on the training set and select the appropriate threshold for centroid shrinking.
  4373. Then the trained classifier was used to predict the class probabilities
  4374. of each validation sample.
  4375. From these class probabilities, ROC curves and area-under-curve (AUC) values
  4376. were generated
  4377. \begin_inset CommandInset citation
  4378. LatexCommand cite
  4379. key "Turck2011"
  4380. literal "false"
  4381. \end_inset
  4382. .
  4383. Each normalization was tested on two different sets of training and validation
  4384. samples.
  4385. For internal validation, the 115 TX and AR arrays in the internal set were
  4386. split at random into two equal sized sets, one for training and one for
  4387. validation, each containing the same numbers of TX and AR samples as the
  4388. other set.
  4389. For external validation, the full set of 115 TX and AR samples were used
  4390. as a training set, and the 75 external TX and AR samples were used as the
  4391. validation set.
  4392. Thus, 2 ROC curves and AUC values were generated for each normalization
  4393. method: one internal and one external.
  4394. Because the external validation set contains no ADNR samples, only classificati
  4395. on of TX and AR samples was considered.
  4396. The ADNR samples were included during normalization but excluded from all
  4397. classifier training and validation.
  4398. This ensures that the performance on internal and external validation sets
  4399. is directly comparable, since both are performing the same task: distinguising
  4400. TX from AR.
  4401. \end_layout
  4402. \begin_layout Standard
  4403. \begin_inset Flex TODO Note (inline)
  4404. status open
  4405. \begin_layout Plain Layout
  4406. Summarize the get.best.threshold algorithm for PAM threshold selection, or
  4407. just put the code online?
  4408. \end_layout
  4409. \end_inset
  4410. \end_layout
  4411. \begin_layout Standard
  4412. Six different normalization strategies were evaluated.
  4413. First, 2 well-known non-single-channel normalization methods were considered:
  4414. RMA and dChip
  4415. \begin_inset CommandInset citation
  4416. LatexCommand cite
  4417. key "Li2001,Irizarry2003a"
  4418. literal "false"
  4419. \end_inset
  4420. .
  4421. Since RMA produces expression values on a log2 scale and dChip does not,
  4422. the values from dChip were log2 transformed after normalization.
  4423. Next, RMA and dChip followed by Global Rank-invariant Set Normalization
  4424. (GRSN) were tested
  4425. \begin_inset CommandInset citation
  4426. LatexCommand cite
  4427. key "Pelz2008"
  4428. literal "false"
  4429. \end_inset
  4430. .
  4431. Post-processing with GRSN does not turn RMA or dChip into single-channel
  4432. methods, but it may help mitigate batch effects and is therefore useful
  4433. as a benchmark.
  4434. Lastly, the two single-channel normalization methods, fRMA and SCAN, were
  4435. tested
  4436. \begin_inset CommandInset citation
  4437. LatexCommand cite
  4438. key "McCall2010,Piccolo2012"
  4439. literal "false"
  4440. \end_inset
  4441. .
  4442. When evaluting internal validation performance, only the 157 internal samples
  4443. were normalized; when evaluating external validation performance, all 157
  4444. internal samples and 75 external samples were normalized together.
  4445. \end_layout
  4446. \begin_layout Standard
  4447. For demonstrating the problem with separate normalization of training and
  4448. validation data, one additional normalization was performed: the internal
  4449. and external sets were each normalized separately using RMA, and the normalized
  4450. data for each set were combined into a single set with no further attempts
  4451. at normalizing between the two sets.
  4452. The represents approximately how RMA would have to be used in a clinical
  4453. setting, where the samples to be classified are not available at the time
  4454. the classifier is trained.
  4455. \end_layout
  4456. \begin_layout Subsection
  4457. Generating custom fRMA vectors for hthgu133pluspm array platform
  4458. \end_layout
  4459. \begin_layout Standard
  4460. In order to enable fRMA normalization for the hthgu133pluspm array platform,
  4461. custom fRMA normalization vectors were trained using the frmaTools package
  4462. \begin_inset CommandInset citation
  4463. LatexCommand cite
  4464. key "McCall2011"
  4465. literal "false"
  4466. \end_inset
  4467. .
  4468. Separate vectors were created for two types of samples: kidney graft biopsy
  4469. samples and blood samples from graft recipients.
  4470. For training, a 341 kidney biopsy samples from 2 data sets and 965 blood
  4471. samples from 5 data sets were used as the reference set.
  4472. Arrays were groups into batches based on unique combinations of sample
  4473. type (blood or biopsy), diagnosis (TX, AR, etc.), data set, and scan date.
  4474. Thus, each batch represents arrays of the same kind that were run together
  4475. on the same day.
  4476. For estimating the probe inverse variance weights, frmaTools requires equal-siz
  4477. ed batches, which means a batch size must be chosen, and then batches smaller
  4478. than that size must be ignored, while batches larger than the chosen size
  4479. must be downsampled.
  4480. This downsampling is performed randomly, so the sampling process is repeated
  4481. 5 times and the resulting normalizations are compared to each other.
  4482. \end_layout
  4483. \begin_layout Standard
  4484. To evaluate the consistency of the generated normalization vectors, the
  4485. 5 fRMA vector sets generated from 5 random batch samplings were each used
  4486. to normalize the same 20 randomly selected samples from each tissue.
  4487. Then the normalized expression values for each probe on each array were
  4488. compared across all normalizations.
  4489. Each fRMA normalization was also compared against the normalized expression
  4490. values obtained by normalizing the same 20 samples with ordinary RMA.
  4491. \end_layout
  4492. \begin_layout Subsection
  4493. Modeling methylation array M-value heteroskedasticy in linear models with
  4494. modified voom implementation
  4495. \end_layout
  4496. \begin_layout Standard
  4497. \begin_inset Flex TODO Note (inline)
  4498. status open
  4499. \begin_layout Plain Layout
  4500. Put code on Github and reference it.
  4501. \end_layout
  4502. \end_inset
  4503. \end_layout
  4504. \begin_layout Standard
  4505. To investigate the whether DNA methylation could be used to distinguish
  4506. between healthy and dysfunctional transplants, a data set of 78 Illumina
  4507. 450k methylation arrays from human kidney graft biopsies was analyzed for
  4508. differential metylation between 4 transplant statuses: healthy transplant
  4509. (TX), transplants undergoing acute rejection (AR), acute dysfunction with
  4510. no rejection (ADNR), and chronic allograpft nephropathy (CAN).
  4511. The data consisted of 33 TX, 9 AR, 8 ADNR, and 28 CAN samples.
  4512. The uneven group sizes are a result of taking the biopsy samples before
  4513. the eventual fate of the transplant was known.
  4514. Each sample was additionally annotated with a donor ID (anonymized), Sex,
  4515. Age, Ethnicity, Creatinine Level, and Diabetes diagnosois (all samples
  4516. in this data set came from patients with either Type 1 or Type 2 diabetes).
  4517. \end_layout
  4518. \begin_layout Standard
  4519. The intensity data were first normalized using subset-quantile within array
  4520. normalization (SWAN)
  4521. \begin_inset CommandInset citation
  4522. LatexCommand cite
  4523. key "Maksimovic2012"
  4524. literal "false"
  4525. \end_inset
  4526. , then converted to intensity ratios (beta values)
  4527. \begin_inset CommandInset citation
  4528. LatexCommand cite
  4529. key "Aryee2014"
  4530. literal "false"
  4531. \end_inset
  4532. .
  4533. Any probes binding to loci that overlapped annotated SNPs were dropped,
  4534. and the annotated sex of each sample was verified against the sex inferred
  4535. from the ratio of median probe intensities for the X and Y chromosomes.
  4536. Then, the ratios were transformed to M-values.
  4537. \end_layout
  4538. \begin_layout Standard
  4539. \begin_inset Float table
  4540. wide false
  4541. sideways false
  4542. status open
  4543. \begin_layout Plain Layout
  4544. \align center
  4545. \begin_inset Tabular
  4546. <lyxtabular version="3" rows="4" columns="6">
  4547. <features tabularvalignment="middle">
  4548. <column alignment="center" valignment="top">
  4549. <column alignment="center" valignment="top">
  4550. <column alignment="center" valignment="top">
  4551. <column alignment="center" valignment="top">
  4552. <column alignment="center" valignment="top">
  4553. <column alignment="center" valignment="top">
  4554. <row>
  4555. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4556. \begin_inset Text
  4557. \begin_layout Plain Layout
  4558. Analysis
  4559. \end_layout
  4560. \end_inset
  4561. </cell>
  4562. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4563. \begin_inset Text
  4564. \begin_layout Plain Layout
  4565. random effect
  4566. \end_layout
  4567. \end_inset
  4568. </cell>
  4569. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4570. \begin_inset Text
  4571. \begin_layout Plain Layout
  4572. eBayes
  4573. \end_layout
  4574. \end_inset
  4575. </cell>
  4576. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4577. \begin_inset Text
  4578. \begin_layout Plain Layout
  4579. SVA
  4580. \end_layout
  4581. \end_inset
  4582. </cell>
  4583. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4584. \begin_inset Text
  4585. \begin_layout Plain Layout
  4586. weights
  4587. \end_layout
  4588. \end_inset
  4589. </cell>
  4590. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  4591. \begin_inset Text
  4592. \begin_layout Plain Layout
  4593. voom
  4594. \end_layout
  4595. \end_inset
  4596. </cell>
  4597. </row>
  4598. <row>
  4599. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4600. \begin_inset Text
  4601. \begin_layout Plain Layout
  4602. A
  4603. \end_layout
  4604. \end_inset
  4605. </cell>
  4606. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4607. \begin_inset Text
  4608. \begin_layout Plain Layout
  4609. Yes
  4610. \end_layout
  4611. \end_inset
  4612. </cell>
  4613. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4614. \begin_inset Text
  4615. \begin_layout Plain Layout
  4616. Yes
  4617. \end_layout
  4618. \end_inset
  4619. </cell>
  4620. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4621. \begin_inset Text
  4622. \begin_layout Plain Layout
  4623. No
  4624. \end_layout
  4625. \end_inset
  4626. </cell>
  4627. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4628. \begin_inset Text
  4629. \begin_layout Plain Layout
  4630. No
  4631. \end_layout
  4632. \end_inset
  4633. </cell>
  4634. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4635. \begin_inset Text
  4636. \begin_layout Plain Layout
  4637. No
  4638. \end_layout
  4639. \end_inset
  4640. </cell>
  4641. </row>
  4642. <row>
  4643. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4644. \begin_inset Text
  4645. \begin_layout Plain Layout
  4646. B
  4647. \end_layout
  4648. \end_inset
  4649. </cell>
  4650. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4651. \begin_inset Text
  4652. \begin_layout Plain Layout
  4653. Yes
  4654. \end_layout
  4655. \end_inset
  4656. </cell>
  4657. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4658. \begin_inset Text
  4659. \begin_layout Plain Layout
  4660. Yes
  4661. \end_layout
  4662. \end_inset
  4663. </cell>
  4664. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4665. \begin_inset Text
  4666. \begin_layout Plain Layout
  4667. Yes
  4668. \end_layout
  4669. \end_inset
  4670. </cell>
  4671. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4672. \begin_inset Text
  4673. \begin_layout Plain Layout
  4674. Yes
  4675. \end_layout
  4676. \end_inset
  4677. </cell>
  4678. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4679. \begin_inset Text
  4680. \begin_layout Plain Layout
  4681. No
  4682. \end_layout
  4683. \end_inset
  4684. </cell>
  4685. </row>
  4686. <row>
  4687. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4688. \begin_inset Text
  4689. \begin_layout Plain Layout
  4690. C
  4691. \end_layout
  4692. \end_inset
  4693. </cell>
  4694. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4695. \begin_inset Text
  4696. \begin_layout Plain Layout
  4697. Yes
  4698. \end_layout
  4699. \end_inset
  4700. </cell>
  4701. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4702. \begin_inset Text
  4703. \begin_layout Plain Layout
  4704. Yes
  4705. \end_layout
  4706. \end_inset
  4707. </cell>
  4708. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4709. \begin_inset Text
  4710. \begin_layout Plain Layout
  4711. Yes
  4712. \end_layout
  4713. \end_inset
  4714. </cell>
  4715. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4716. \begin_inset Text
  4717. \begin_layout Plain Layout
  4718. Yes
  4719. \end_layout
  4720. \end_inset
  4721. </cell>
  4722. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  4723. \begin_inset Text
  4724. \begin_layout Plain Layout
  4725. Yes
  4726. \end_layout
  4727. \end_inset
  4728. </cell>
  4729. </row>
  4730. </lyxtabular>
  4731. \end_inset
  4732. \end_layout
  4733. \begin_layout Plain Layout
  4734. \begin_inset Caption Standard
  4735. \begin_layout Plain Layout
  4736. \series bold
  4737. \begin_inset CommandInset label
  4738. LatexCommand label
  4739. name "tab:Summary-of-meth-analysis"
  4740. \end_inset
  4741. Summary of analysis variants for methylation array data.
  4742. \series default
  4743. Each analysis included a different set of steps to adjust or account for
  4744. various systematic features of the data.
  4745. Random effect: The model included a random effect accounting for correlation
  4746. between samples from the same patient
  4747. \begin_inset CommandInset citation
  4748. LatexCommand cite
  4749. key "Smyth2005a"
  4750. literal "false"
  4751. \end_inset
  4752. ; eBayes: Empirical bayes squeezing of per-probe variances toward the mean-varia
  4753. nce trend
  4754. \begin_inset CommandInset citation
  4755. LatexCommand cite
  4756. key "Ritchie2015"
  4757. literal "false"
  4758. \end_inset
  4759. ; SVA: Surrogate variable analysis to account for unobserved confounders
  4760. \begin_inset CommandInset citation
  4761. LatexCommand cite
  4762. key "Leek2007"
  4763. literal "false"
  4764. \end_inset
  4765. ; Weights: Estimate sample weights to account for differences in sample
  4766. quality
  4767. \begin_inset CommandInset citation
  4768. LatexCommand cite
  4769. key "Liu2015,Ritchie2006"
  4770. literal "false"
  4771. \end_inset
  4772. ; voom: Use mean-variance trend to assign individual sample weights
  4773. \begin_inset CommandInset citation
  4774. LatexCommand cite
  4775. key "Law2013"
  4776. literal "false"
  4777. \end_inset
  4778. .
  4779. See the text for a more detailed explanation of each step.
  4780. \end_layout
  4781. \end_inset
  4782. \end_layout
  4783. \end_inset
  4784. \end_layout
  4785. \begin_layout Standard
  4786. From the M-values, a series of parallel analyses was performed, each adding
  4787. additional steps into the model fit to accomodate a feature of the data
  4788. (see Table
  4789. \begin_inset CommandInset ref
  4790. LatexCommand ref
  4791. reference "tab:Summary-of-meth-analysis"
  4792. plural "false"
  4793. caps "false"
  4794. noprefix "false"
  4795. \end_inset
  4796. ).
  4797. For analysis A, a
  4798. \begin_inset Quotes eld
  4799. \end_inset
  4800. basic
  4801. \begin_inset Quotes erd
  4802. \end_inset
  4803. linear modeling analysis was performed, compensating for known confounders
  4804. by including terms for the factor of interest (transplant status) as well
  4805. as the known biological confounders: sex, age, ethnicity, and diabetes.
  4806. Since some samples came from the same patients at different times, the
  4807. intra-patient correlation was modeled as a random effect, estimating a
  4808. shared correlation value across all probes
  4809. \begin_inset CommandInset citation
  4810. LatexCommand cite
  4811. key "Smyth2005a"
  4812. literal "false"
  4813. \end_inset
  4814. .
  4815. Then the linear model was fit, and the variance was modeled using empirical
  4816. Bayes squeezing toward the mean-variance trend
  4817. \begin_inset CommandInset citation
  4818. LatexCommand cite
  4819. key "Ritchie2015"
  4820. literal "false"
  4821. \end_inset
  4822. .
  4823. Finally, t-tests or F-tests were performed as appropriate for each test:
  4824. t-tests for single contrasts, and F-tests for multiple contrasts.
  4825. P-values were corrected for multiple testing using the Benjamini-Hochberg
  4826. procedure for FDR control
  4827. \begin_inset CommandInset citation
  4828. LatexCommand cite
  4829. key "Benjamini1995"
  4830. literal "false"
  4831. \end_inset
  4832. .
  4833. \end_layout
  4834. \begin_layout Standard
  4835. For the analysis B, surrogate variable analysis (SVA) was used to infer
  4836. additional unobserved sources of heterogeneity in the data
  4837. \begin_inset CommandInset citation
  4838. LatexCommand cite
  4839. key "Leek2007"
  4840. literal "false"
  4841. \end_inset
  4842. .
  4843. These surrogate variables were added to the design matrix before fitting
  4844. the linear model.
  4845. In addition, sample quality weights were estimated from the data and used
  4846. during linear modeling to down-weight the contribution of highly variable
  4847. arrays while increasing the weight to arrays with lower variability
  4848. \begin_inset CommandInset citation
  4849. LatexCommand cite
  4850. key "Ritchie2006"
  4851. literal "false"
  4852. \end_inset
  4853. .
  4854. The remainder of the analysis proceeded as in analysis A.
  4855. For analysis C, the voom method was adapted to run on methylation array
  4856. data and used to model and correct for the mean-variance trend using individual
  4857. observation weights
  4858. \begin_inset CommandInset citation
  4859. LatexCommand cite
  4860. key "Law2013"
  4861. literal "false"
  4862. \end_inset
  4863. , which were combined with the sample weights
  4864. \begin_inset CommandInset citation
  4865. LatexCommand cite
  4866. key "Liu2015,Ritchie2006"
  4867. literal "false"
  4868. \end_inset
  4869. .
  4870. Each time weights were used, they were estimated once before estimating
  4871. the random effect correlation value, and then the weights were re-estimated
  4872. taking the random effect into account.
  4873. The remainder of the analysis proceeded as in analysis B.
  4874. \end_layout
  4875. \begin_layout Section
  4876. Results
  4877. \end_layout
  4878. \begin_layout Standard
  4879. \begin_inset Flex TODO Note (inline)
  4880. status open
  4881. \begin_layout Plain Layout
  4882. Improve subsection titles in this section
  4883. \end_layout
  4884. \end_inset
  4885. \end_layout
  4886. \begin_layout Subsection
  4887. Separate normalization with RMA introduces unwanted biases in classification
  4888. \end_layout
  4889. \begin_layout Standard
  4890. \begin_inset Float figure
  4891. wide false
  4892. sideways false
  4893. status open
  4894. \begin_layout Plain Layout
  4895. \align center
  4896. \begin_inset Graphics
  4897. filename graphics/PAM/predplot.pdf
  4898. lyxscale 50
  4899. width 60col%
  4900. groupId colwidth
  4901. \end_inset
  4902. \end_layout
  4903. \begin_layout Plain Layout
  4904. \begin_inset Caption Standard
  4905. \begin_layout Plain Layout
  4906. \begin_inset CommandInset label
  4907. LatexCommand label
  4908. name "fig:Classifier-probabilities-RMA"
  4909. \end_inset
  4910. \series bold
  4911. Classifier probabilities on validation samples when normalized with RMA
  4912. together vs.
  4913. separately.
  4914. \series default
  4915. The PAM classifier algorithm was trained on the training set of arrays to
  4916. distinguish AR from TX and then used to assign class probabilities to the
  4917. validation set.
  4918. The process was performed after normalizing all samples together and after
  4919. normalizing the training and test sets separately, and the class probabilities
  4920. assigned to each sample in the validation set were plotted against each
  4921. other (PP(AR), posterior probability of being AR).
  4922. The color of each point indicates the true classification of that sample.
  4923. \end_layout
  4924. \end_inset
  4925. \end_layout
  4926. \end_inset
  4927. \end_layout
  4928. \begin_layout Standard
  4929. To demonstrate the problem with non-single-channel normalization methods,
  4930. we considered the problem of training a classifier to distinguish TX from
  4931. AR using the samples from the internal set as training data, evaluating
  4932. performance on the external set.
  4933. First, training and evaluation were performed after normalizing all array
  4934. samples together as a single set using RMA, and second, the internal samples
  4935. were normalized separately from the external samples and the training and
  4936. evaluation were repeated.
  4937. For each sample in the validation set, the classifier probabilities from
  4938. both classifiers were plotted against each other (Fig.
  4939. \begin_inset CommandInset ref
  4940. LatexCommand ref
  4941. reference "fig:Classifier-probabilities-RMA"
  4942. plural "false"
  4943. caps "false"
  4944. noprefix "false"
  4945. \end_inset
  4946. ).
  4947. As expected, separate normalization biases the classifier probabilities,
  4948. resulting in several misclassifications.
  4949. In this case, the bias from separate normalization causes the classifier
  4950. to assign a lower probability of AR to every sample.
  4951. \end_layout
  4952. \begin_layout Subsection
  4953. fRMA and SCAN maintain classification performance while eliminating dependence
  4954. on normalization strategy
  4955. \end_layout
  4956. \begin_layout Standard
  4957. \begin_inset Float figure
  4958. wide false
  4959. sideways false
  4960. status open
  4961. \begin_layout Plain Layout
  4962. \align center
  4963. \begin_inset Float figure
  4964. placement tb
  4965. wide false
  4966. sideways false
  4967. status open
  4968. \begin_layout Plain Layout
  4969. \align center
  4970. \begin_inset Graphics
  4971. filename graphics/PAM/ROC-TXvsAR-internal.pdf
  4972. lyxscale 50
  4973. height 40theight%
  4974. groupId roc-pam
  4975. \end_inset
  4976. \end_layout
  4977. \begin_layout Plain Layout
  4978. \begin_inset Caption Standard
  4979. \begin_layout Plain Layout
  4980. \begin_inset CommandInset label
  4981. LatexCommand label
  4982. name "fig:ROC-PAM-int"
  4983. \end_inset
  4984. ROC curves for PAM on internal validation data
  4985. \end_layout
  4986. \end_inset
  4987. \end_layout
  4988. \end_inset
  4989. \end_layout
  4990. \begin_layout Plain Layout
  4991. \align center
  4992. \begin_inset Float figure
  4993. placement tb
  4994. wide false
  4995. sideways false
  4996. status open
  4997. \begin_layout Plain Layout
  4998. \align center
  4999. \begin_inset Graphics
  5000. filename graphics/PAM/ROC-TXvsAR-external.pdf
  5001. lyxscale 50
  5002. height 40theight%
  5003. groupId roc-pam
  5004. \end_inset
  5005. \end_layout
  5006. \begin_layout Plain Layout
  5007. \begin_inset Caption Standard
  5008. \begin_layout Plain Layout
  5009. \begin_inset CommandInset label
  5010. LatexCommand label
  5011. name "fig:ROC-PAM-ext"
  5012. \end_inset
  5013. ROC curves for PAM on external validation data
  5014. \end_layout
  5015. \end_inset
  5016. \end_layout
  5017. \end_inset
  5018. \end_layout
  5019. \begin_layout Plain Layout
  5020. \begin_inset Caption Standard
  5021. \begin_layout Plain Layout
  5022. \series bold
  5023. \begin_inset CommandInset label
  5024. LatexCommand label
  5025. name "fig:ROC-PAM-main"
  5026. \end_inset
  5027. ROC curves for PAM using different normalization strategies.
  5028. \series default
  5029. ROC curves were generated for PAM classification of AR vs TX after 6 different
  5030. normalization strategies applied to the same data sets.
  5031. Only fRMA and SCAN are single-channel normalizations.
  5032. The other normalizations are for comparison.
  5033. \end_layout
  5034. \end_inset
  5035. \end_layout
  5036. \end_inset
  5037. \end_layout
  5038. \begin_layout Standard
  5039. \begin_inset Float table
  5040. wide false
  5041. sideways false
  5042. status open
  5043. \begin_layout Plain Layout
  5044. \align center
  5045. \begin_inset Tabular
  5046. <lyxtabular version="3" rows="7" columns="4">
  5047. <features tabularvalignment="middle">
  5048. <column alignment="center" valignment="top">
  5049. <column alignment="center" valignment="top">
  5050. <column alignment="center" valignment="top">
  5051. <column alignment="center" valignment="top">
  5052. <row>
  5053. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5054. \begin_inset Text
  5055. \begin_layout Plain Layout
  5056. \family roman
  5057. \series medium
  5058. \shape up
  5059. \size normal
  5060. \emph off
  5061. \bar no
  5062. \strikeout off
  5063. \xout off
  5064. \uuline off
  5065. \uwave off
  5066. \noun off
  5067. \color none
  5068. Normalization
  5069. \end_layout
  5070. \end_inset
  5071. </cell>
  5072. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5073. \begin_inset Text
  5074. \begin_layout Plain Layout
  5075. Single-channel?
  5076. \end_layout
  5077. \end_inset
  5078. </cell>
  5079. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5080. \begin_inset Text
  5081. \begin_layout Plain Layout
  5082. \family roman
  5083. \series medium
  5084. \shape up
  5085. \size normal
  5086. \emph off
  5087. \bar no
  5088. \strikeout off
  5089. \xout off
  5090. \uuline off
  5091. \uwave off
  5092. \noun off
  5093. \color none
  5094. Internal Val.
  5095. AUC
  5096. \end_layout
  5097. \end_inset
  5098. </cell>
  5099. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  5100. \begin_inset Text
  5101. \begin_layout Plain Layout
  5102. External Val.
  5103. AUC
  5104. \end_layout
  5105. \end_inset
  5106. </cell>
  5107. </row>
  5108. <row>
  5109. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5110. \begin_inset Text
  5111. \begin_layout Plain Layout
  5112. \family roman
  5113. \series medium
  5114. \shape up
  5115. \size normal
  5116. \emph off
  5117. \bar no
  5118. \strikeout off
  5119. \xout off
  5120. \uuline off
  5121. \uwave off
  5122. \noun off
  5123. \color none
  5124. RMA
  5125. \end_layout
  5126. \end_inset
  5127. </cell>
  5128. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5129. \begin_inset Text
  5130. \begin_layout Plain Layout
  5131. No
  5132. \end_layout
  5133. \end_inset
  5134. </cell>
  5135. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5136. \begin_inset Text
  5137. \begin_layout Plain Layout
  5138. \family roman
  5139. \series medium
  5140. \shape up
  5141. \size normal
  5142. \emph off
  5143. \bar no
  5144. \strikeout off
  5145. \xout off
  5146. \uuline off
  5147. \uwave off
  5148. \noun off
  5149. \color none
  5150. 0.852
  5151. \end_layout
  5152. \end_inset
  5153. </cell>
  5154. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5155. \begin_inset Text
  5156. \begin_layout Plain Layout
  5157. \family roman
  5158. \series medium
  5159. \shape up
  5160. \size normal
  5161. \emph off
  5162. \bar no
  5163. \strikeout off
  5164. \xout off
  5165. \uuline off
  5166. \uwave off
  5167. \noun off
  5168. \color none
  5169. 0.713
  5170. \end_layout
  5171. \end_inset
  5172. </cell>
  5173. </row>
  5174. <row>
  5175. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5176. \begin_inset Text
  5177. \begin_layout Plain Layout
  5178. \family roman
  5179. \series medium
  5180. \shape up
  5181. \size normal
  5182. \emph off
  5183. \bar no
  5184. \strikeout off
  5185. \xout off
  5186. \uuline off
  5187. \uwave off
  5188. \noun off
  5189. \color none
  5190. dChip
  5191. \end_layout
  5192. \end_inset
  5193. </cell>
  5194. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5195. \begin_inset Text
  5196. \begin_layout Plain Layout
  5197. No
  5198. \end_layout
  5199. \end_inset
  5200. </cell>
  5201. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5202. \begin_inset Text
  5203. \begin_layout Plain Layout
  5204. \family roman
  5205. \series medium
  5206. \shape up
  5207. \size normal
  5208. \emph off
  5209. \bar no
  5210. \strikeout off
  5211. \xout off
  5212. \uuline off
  5213. \uwave off
  5214. \noun off
  5215. \color none
  5216. 0.891
  5217. \end_layout
  5218. \end_inset
  5219. </cell>
  5220. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5221. \begin_inset Text
  5222. \begin_layout Plain Layout
  5223. \family roman
  5224. \series medium
  5225. \shape up
  5226. \size normal
  5227. \emph off
  5228. \bar no
  5229. \strikeout off
  5230. \xout off
  5231. \uuline off
  5232. \uwave off
  5233. \noun off
  5234. \color none
  5235. 0.657
  5236. \end_layout
  5237. \end_inset
  5238. </cell>
  5239. </row>
  5240. <row>
  5241. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5242. \begin_inset Text
  5243. \begin_layout Plain Layout
  5244. \family roman
  5245. \series medium
  5246. \shape up
  5247. \size normal
  5248. \emph off
  5249. \bar no
  5250. \strikeout off
  5251. \xout off
  5252. \uuline off
  5253. \uwave off
  5254. \noun off
  5255. \color none
  5256. RMA + GRSN
  5257. \end_layout
  5258. \end_inset
  5259. </cell>
  5260. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5261. \begin_inset Text
  5262. \begin_layout Plain Layout
  5263. No
  5264. \end_layout
  5265. \end_inset
  5266. </cell>
  5267. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5268. \begin_inset Text
  5269. \begin_layout Plain Layout
  5270. \family roman
  5271. \series medium
  5272. \shape up
  5273. \size normal
  5274. \emph off
  5275. \bar no
  5276. \strikeout off
  5277. \xout off
  5278. \uuline off
  5279. \uwave off
  5280. \noun off
  5281. \color none
  5282. 0.816
  5283. \end_layout
  5284. \end_inset
  5285. </cell>
  5286. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5287. \begin_inset Text
  5288. \begin_layout Plain Layout
  5289. \family roman
  5290. \series medium
  5291. \shape up
  5292. \size normal
  5293. \emph off
  5294. \bar no
  5295. \strikeout off
  5296. \xout off
  5297. \uuline off
  5298. \uwave off
  5299. \noun off
  5300. \color none
  5301. 0.750
  5302. \end_layout
  5303. \end_inset
  5304. </cell>
  5305. </row>
  5306. <row>
  5307. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5308. \begin_inset Text
  5309. \begin_layout Plain Layout
  5310. \family roman
  5311. \series medium
  5312. \shape up
  5313. \size normal
  5314. \emph off
  5315. \bar no
  5316. \strikeout off
  5317. \xout off
  5318. \uuline off
  5319. \uwave off
  5320. \noun off
  5321. \color none
  5322. dChip + GRSN
  5323. \end_layout
  5324. \end_inset
  5325. </cell>
  5326. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5327. \begin_inset Text
  5328. \begin_layout Plain Layout
  5329. No
  5330. \end_layout
  5331. \end_inset
  5332. </cell>
  5333. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5334. \begin_inset Text
  5335. \begin_layout Plain Layout
  5336. \family roman
  5337. \series medium
  5338. \shape up
  5339. \size normal
  5340. \emph off
  5341. \bar no
  5342. \strikeout off
  5343. \xout off
  5344. \uuline off
  5345. \uwave off
  5346. \noun off
  5347. \color none
  5348. 0.875
  5349. \end_layout
  5350. \end_inset
  5351. </cell>
  5352. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5353. \begin_inset Text
  5354. \begin_layout Plain Layout
  5355. \family roman
  5356. \series medium
  5357. \shape up
  5358. \size normal
  5359. \emph off
  5360. \bar no
  5361. \strikeout off
  5362. \xout off
  5363. \uuline off
  5364. \uwave off
  5365. \noun off
  5366. \color none
  5367. 0.642
  5368. \end_layout
  5369. \end_inset
  5370. </cell>
  5371. </row>
  5372. <row>
  5373. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5374. \begin_inset Text
  5375. \begin_layout Plain Layout
  5376. \family roman
  5377. \series medium
  5378. \shape up
  5379. \size normal
  5380. \emph off
  5381. \bar no
  5382. \strikeout off
  5383. \xout off
  5384. \uuline off
  5385. \uwave off
  5386. \noun off
  5387. \color none
  5388. fRMA
  5389. \end_layout
  5390. \end_inset
  5391. </cell>
  5392. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5393. \begin_inset Text
  5394. \begin_layout Plain Layout
  5395. Yes
  5396. \end_layout
  5397. \end_inset
  5398. </cell>
  5399. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5400. \begin_inset Text
  5401. \begin_layout Plain Layout
  5402. \family roman
  5403. \series medium
  5404. \shape up
  5405. \size normal
  5406. \emph off
  5407. \bar no
  5408. \strikeout off
  5409. \xout off
  5410. \uuline off
  5411. \uwave off
  5412. \noun off
  5413. \color none
  5414. 0.863
  5415. \end_layout
  5416. \end_inset
  5417. </cell>
  5418. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5419. \begin_inset Text
  5420. \begin_layout Plain Layout
  5421. \family roman
  5422. \series medium
  5423. \shape up
  5424. \size normal
  5425. \emph off
  5426. \bar no
  5427. \strikeout off
  5428. \xout off
  5429. \uuline off
  5430. \uwave off
  5431. \noun off
  5432. \color none
  5433. 0.718
  5434. \end_layout
  5435. \end_inset
  5436. </cell>
  5437. </row>
  5438. <row>
  5439. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5440. \begin_inset Text
  5441. \begin_layout Plain Layout
  5442. \family roman
  5443. \series medium
  5444. \shape up
  5445. \size normal
  5446. \emph off
  5447. \bar no
  5448. \strikeout off
  5449. \xout off
  5450. \uuline off
  5451. \uwave off
  5452. \noun off
  5453. \color none
  5454. SCAN
  5455. \end_layout
  5456. \end_inset
  5457. </cell>
  5458. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5459. \begin_inset Text
  5460. \begin_layout Plain Layout
  5461. Yes
  5462. \end_layout
  5463. \end_inset
  5464. </cell>
  5465. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5466. \begin_inset Text
  5467. \begin_layout Plain Layout
  5468. \family roman
  5469. \series medium
  5470. \shape up
  5471. \size normal
  5472. \emph off
  5473. \bar no
  5474. \strikeout off
  5475. \xout off
  5476. \uuline off
  5477. \uwave off
  5478. \noun off
  5479. \color none
  5480. 0.853
  5481. \end_layout
  5482. \end_inset
  5483. </cell>
  5484. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  5485. \begin_inset Text
  5486. \begin_layout Plain Layout
  5487. \family roman
  5488. \series medium
  5489. \shape up
  5490. \size normal
  5491. \emph off
  5492. \bar no
  5493. \strikeout off
  5494. \xout off
  5495. \uuline off
  5496. \uwave off
  5497. \noun off
  5498. \color none
  5499. 0.689
  5500. \end_layout
  5501. \end_inset
  5502. </cell>
  5503. </row>
  5504. </lyxtabular>
  5505. \end_inset
  5506. \end_layout
  5507. \begin_layout Plain Layout
  5508. \begin_inset Caption Standard
  5509. \begin_layout Plain Layout
  5510. \begin_inset CommandInset label
  5511. LatexCommand label
  5512. name "tab:AUC-PAM"
  5513. \end_inset
  5514. \series bold
  5515. ROC curve AUC values for internal and external validation with 6 different
  5516. normalization strategies.
  5517. \series default
  5518. These AUC values correspond to the ROC curves in Figure
  5519. \begin_inset CommandInset ref
  5520. LatexCommand ref
  5521. reference "fig:ROC-PAM-main"
  5522. plural "false"
  5523. caps "false"
  5524. noprefix "false"
  5525. \end_inset
  5526. .
  5527. \end_layout
  5528. \end_inset
  5529. \end_layout
  5530. \end_inset
  5531. \end_layout
  5532. \begin_layout Standard
  5533. For internal validation, the 6 methods' AUC values ranged from 0.816 to 0.891,
  5534. as shown in Table
  5535. \begin_inset CommandInset ref
  5536. LatexCommand ref
  5537. reference "tab:AUC-PAM"
  5538. plural "false"
  5539. caps "false"
  5540. noprefix "false"
  5541. \end_inset
  5542. .
  5543. Among the non-single-channel normalizations, dChip outperformed RMA, while
  5544. GRSN reduced the AUC values for both dChip and RMA.
  5545. Both single-channel methods, fRMA and SCAN, slightly outperformed RMA,
  5546. with fRMA ahead of SCAN.
  5547. However, the difference between RMA and fRMA is still quite small.
  5548. Figure
  5549. \begin_inset CommandInset ref
  5550. LatexCommand ref
  5551. reference "fig:ROC-PAM-int"
  5552. plural "false"
  5553. caps "false"
  5554. noprefix "false"
  5555. \end_inset
  5556. shows that the ROC curves for RMA, dChip, and fRMA look very similar and
  5557. relatively smooth, while both GRSN curves and the curve for SCAN have a
  5558. more jagged appearance.
  5559. \end_layout
  5560. \begin_layout Standard
  5561. For external validation, as expected, all the AUC values are lower than
  5562. the internal validations, ranging from 0.642 to 0.750 (Table
  5563. \begin_inset CommandInset ref
  5564. LatexCommand ref
  5565. reference "tab:AUC-PAM"
  5566. plural "false"
  5567. caps "false"
  5568. noprefix "false"
  5569. \end_inset
  5570. ).
  5571. With or without GRSN, RMA shows its dominance over dChip in this more challengi
  5572. ng test.
  5573. Unlike in the internal validation, GRSN actually improves the classifier
  5574. performance for RMA, although it does not for dChip.
  5575. Once again, both single-channel methods perform about on par with RMA,
  5576. with fRMA performing slightly better and SCAN performing a bit worse.
  5577. Figure
  5578. \begin_inset CommandInset ref
  5579. LatexCommand ref
  5580. reference "fig:ROC-PAM-ext"
  5581. plural "false"
  5582. caps "false"
  5583. noprefix "false"
  5584. \end_inset
  5585. shows the ROC curves for the external validation test.
  5586. As expected, none of them are as clean-looking as the internal validation
  5587. ROC curves.
  5588. The curves for RMA, RMA+GRSN, and fRMA all look similar, while the other
  5589. curves look more divergent.
  5590. \end_layout
  5591. \begin_layout Subsection
  5592. fRMA with custom-generated vectors enables single-channel normalization
  5593. on hthgu133pluspm platform
  5594. \end_layout
  5595. \begin_layout Standard
  5596. \begin_inset Float figure
  5597. wide false
  5598. sideways false
  5599. status open
  5600. \begin_layout Plain Layout
  5601. \align center
  5602. \begin_inset Float figure
  5603. placement tb
  5604. wide false
  5605. sideways false
  5606. status collapsed
  5607. \begin_layout Plain Layout
  5608. \align center
  5609. \begin_inset Graphics
  5610. filename graphics/frma-pax-bx/batchsize_batches.pdf
  5611. lyxscale 50
  5612. height 35theight%
  5613. groupId frmatools-subfig
  5614. \end_inset
  5615. \end_layout
  5616. \begin_layout Plain Layout
  5617. \begin_inset Caption Standard
  5618. \begin_layout Plain Layout
  5619. \begin_inset CommandInset label
  5620. LatexCommand label
  5621. name "fig:batch-size-batches"
  5622. \end_inset
  5623. \series bold
  5624. Number of batches usable in fRMA probe weight learning as a function of
  5625. batch size.
  5626. \end_layout
  5627. \end_inset
  5628. \end_layout
  5629. \end_inset
  5630. \end_layout
  5631. \begin_layout Plain Layout
  5632. \align center
  5633. \begin_inset Float figure
  5634. placement tb
  5635. wide false
  5636. sideways false
  5637. status collapsed
  5638. \begin_layout Plain Layout
  5639. \align center
  5640. \begin_inset Graphics
  5641. filename graphics/frma-pax-bx/batchsize_samples.pdf
  5642. lyxscale 50
  5643. height 35theight%
  5644. groupId frmatools-subfig
  5645. \end_inset
  5646. \end_layout
  5647. \begin_layout Plain Layout
  5648. \begin_inset Caption Standard
  5649. \begin_layout Plain Layout
  5650. \begin_inset CommandInset label
  5651. LatexCommand label
  5652. name "fig:batch-size-samples"
  5653. \end_inset
  5654. \series bold
  5655. Number of samples usable in fRMA probe weight learning as a function of
  5656. batch size.
  5657. \end_layout
  5658. \end_inset
  5659. \end_layout
  5660. \end_inset
  5661. \end_layout
  5662. \begin_layout Plain Layout
  5663. \begin_inset Caption Standard
  5664. \begin_layout Plain Layout
  5665. \series bold
  5666. \begin_inset CommandInset label
  5667. LatexCommand label
  5668. name "fig:frmatools-batch-size"
  5669. \end_inset
  5670. Effect of batch size selection on number of batches and number of samples
  5671. included in fRMA probe weight learning.
  5672. \series default
  5673. For batch sizes ranging from 3 to 15, the number of batches (a) and samples
  5674. (b) included in probe weight training were plotted for biopsy (BX) and
  5675. blood (PAX) samples.
  5676. The selected batch size, 5, is marked with a dotted vertical line.
  5677. \end_layout
  5678. \end_inset
  5679. \end_layout
  5680. \end_inset
  5681. \end_layout
  5682. \begin_layout Standard
  5683. In order to enable use of fRMA to normalize hthgu133pluspm, a custom set
  5684. of fRMA vectors was created.
  5685. First, an appropriate batch size was chosen by looking at the number of
  5686. batches and number of samples included as a function of batch size (Figure
  5687. \begin_inset CommandInset ref
  5688. LatexCommand ref
  5689. reference "fig:frmatools-batch-size"
  5690. plural "false"
  5691. caps "false"
  5692. noprefix "false"
  5693. \end_inset
  5694. ).
  5695. For a given batch size, all batches with fewer samples that the chosen
  5696. size must be ignored during training, while larger batches must be randomly
  5697. downsampled to the chosen size.
  5698. Hence, the number of samples included for a given batch size equals the
  5699. batch size times the number of batches with at least that many samples.
  5700. From Figure
  5701. \begin_inset CommandInset ref
  5702. LatexCommand ref
  5703. reference "fig:batch-size-samples"
  5704. plural "false"
  5705. caps "false"
  5706. noprefix "false"
  5707. \end_inset
  5708. , it is apparent that that a batch size of 8 maximizes the number of samples
  5709. included in training.
  5710. Increasing the batch size beyond this causes too many smaller batches to
  5711. be excluded, reducing the total number of samples for both tissue types.
  5712. However, a batch size of 8 is not necessarily optimal.
  5713. The article introducing frmaTools concluded that it was highly advantageous
  5714. to use a smaller batch size in order to include more batches, even at the
  5715. expense of including fewer total samples in training
  5716. \begin_inset CommandInset citation
  5717. LatexCommand cite
  5718. key "McCall2011"
  5719. literal "false"
  5720. \end_inset
  5721. .
  5722. To strike an appropriate balance between more batches and more samples,
  5723. a batch size of 5 was chosen.
  5724. For both blood and biopsy samples, this increased the number of batches
  5725. included by 10, with only a modest reduction in the number of samples compared
  5726. to a batch size of 8.
  5727. With a batch size of 5, 26 batches of biopsy samples and 46 batches of
  5728. blood samples were available.
  5729. \end_layout
  5730. \begin_layout Standard
  5731. \begin_inset Float figure
  5732. wide false
  5733. sideways false
  5734. status open
  5735. \begin_layout Plain Layout
  5736. \begin_inset Float figure
  5737. wide false
  5738. sideways false
  5739. status collapsed
  5740. \begin_layout Plain Layout
  5741. \align center
  5742. \begin_inset Graphics
  5743. filename graphics/frma-pax-bx/M-BX-violin.pdf
  5744. lyxscale 40
  5745. width 45col%
  5746. groupId m-violin
  5747. \end_inset
  5748. \end_layout
  5749. \begin_layout Plain Layout
  5750. \begin_inset Caption Standard
  5751. \begin_layout Plain Layout
  5752. \begin_inset CommandInset label
  5753. LatexCommand label
  5754. name "fig:m-bx-violin"
  5755. \end_inset
  5756. \series bold
  5757. Violin plot of inter-normalization log ratios for biopsy samples.
  5758. \end_layout
  5759. \end_inset
  5760. \end_layout
  5761. \end_inset
  5762. \begin_inset space \hfill{}
  5763. \end_inset
  5764. \begin_inset Float figure
  5765. wide false
  5766. sideways false
  5767. status collapsed
  5768. \begin_layout Plain Layout
  5769. \align center
  5770. \begin_inset Graphics
  5771. filename graphics/frma-pax-bx/M-PAX-violin.pdf
  5772. lyxscale 40
  5773. width 45col%
  5774. groupId m-violin
  5775. \end_inset
  5776. \end_layout
  5777. \begin_layout Plain Layout
  5778. \begin_inset Caption Standard
  5779. \begin_layout Plain Layout
  5780. \begin_inset CommandInset label
  5781. LatexCommand label
  5782. name "fig:m-pax-violin"
  5783. \end_inset
  5784. \series bold
  5785. Violin plot of inter-normalization log ratios for blood samples.
  5786. \end_layout
  5787. \end_inset
  5788. \end_layout
  5789. \end_inset
  5790. \end_layout
  5791. \begin_layout Plain Layout
  5792. \begin_inset Caption Standard
  5793. \begin_layout Plain Layout
  5794. \series bold
  5795. Violin plot of log ratios between normalizations for 20 biopsy samples.
  5796. \series default
  5797. Each of 20 randomly selected samples was normalized with RMA and with 5
  5798. different sets of fRMA vectors.
  5799. The distribution of log ratios between normalized expression values, aggregated
  5800. across all 20 arrays, was plotted for each pair of normalizations.
  5801. \end_layout
  5802. \end_inset
  5803. \end_layout
  5804. \end_inset
  5805. \end_layout
  5806. \begin_layout Standard
  5807. Since fRMA training requires equal-size batches, larger batches are downsampled
  5808. randomly.
  5809. This introduces a nondeterministic step in the generation of normalization
  5810. vectors.
  5811. To show that this randomness does not substantially change the outcome,
  5812. the random downsampling and subsequent vector learning was repeated 5 times,
  5813. with a different random seed each time.
  5814. 20 samples were selected at random as a test set and normalized with each
  5815. of the 5 sets of fRMA normalization vectors as well as ordinary RMA, and
  5816. the normalized expression values were compared across normalizations.
  5817. Figure
  5818. \begin_inset CommandInset ref
  5819. LatexCommand ref
  5820. reference "fig:m-bx-violin"
  5821. plural "false"
  5822. caps "false"
  5823. noprefix "false"
  5824. \end_inset
  5825. shows a summary of these comparisons for biopsy samples.
  5826. Comparing RMA to each of the 5 fRMA normalizations, the distribution of
  5827. log ratios is somewhat wide, indicating that the normalizations disagree
  5828. on the expression values of a fair number of probe sets.
  5829. In contrast, comparisons of fRMA against fRMA, the vast mojority of probe
  5830. sets have very small log ratios, indicating a very high agreement between
  5831. the normalized values generated by the two normalizations.
  5832. This shows that the fRMA normalization's behavior is not very sensitive
  5833. to the random downsampling of larger batches during training.
  5834. \end_layout
  5835. \begin_layout Standard
  5836. \begin_inset Float figure
  5837. wide false
  5838. sideways false
  5839. status open
  5840. \begin_layout Plain Layout
  5841. \align center
  5842. \begin_inset Float figure
  5843. wide false
  5844. sideways false
  5845. status collapsed
  5846. \begin_layout Plain Layout
  5847. \align center
  5848. \begin_inset Graphics
  5849. filename graphics/frma-pax-bx/MA-BX-RMA.fRMA-RASTER.png
  5850. lyxscale 10
  5851. width 45col%
  5852. groupId ma-frma
  5853. \end_inset
  5854. \end_layout
  5855. \begin_layout Plain Layout
  5856. \begin_inset Caption Standard
  5857. \begin_layout Plain Layout
  5858. \begin_inset CommandInset label
  5859. LatexCommand label
  5860. name "fig:ma-bx-rma-frma"
  5861. \end_inset
  5862. RMA vs.
  5863. fRMA for biopsy samples.
  5864. \end_layout
  5865. \end_inset
  5866. \end_layout
  5867. \end_inset
  5868. \begin_inset space \hfill{}
  5869. \end_inset
  5870. \begin_inset Float figure
  5871. wide false
  5872. sideways false
  5873. status collapsed
  5874. \begin_layout Plain Layout
  5875. \align center
  5876. \begin_inset Graphics
  5877. filename graphics/frma-pax-bx/MA-BX-fRMA.fRMA-RASTER.png
  5878. lyxscale 10
  5879. width 45col%
  5880. groupId ma-frma
  5881. \end_inset
  5882. \end_layout
  5883. \begin_layout Plain Layout
  5884. \begin_inset Caption Standard
  5885. \begin_layout Plain Layout
  5886. \begin_inset CommandInset label
  5887. LatexCommand label
  5888. name "fig:ma-bx-frma-frma"
  5889. \end_inset
  5890. fRMA vs fRMA for biopsy samples.
  5891. \end_layout
  5892. \end_inset
  5893. \end_layout
  5894. \end_inset
  5895. \end_layout
  5896. \begin_layout Plain Layout
  5897. \align center
  5898. \begin_inset Float figure
  5899. wide false
  5900. sideways false
  5901. status collapsed
  5902. \begin_layout Plain Layout
  5903. \align center
  5904. \begin_inset Graphics
  5905. filename graphics/frma-pax-bx/MA-PAX-RMA.fRMA-RASTER.png
  5906. lyxscale 10
  5907. width 45col%
  5908. groupId ma-frma
  5909. \end_inset
  5910. \end_layout
  5911. \begin_layout Plain Layout
  5912. \begin_inset Caption Standard
  5913. \begin_layout Plain Layout
  5914. \begin_inset CommandInset label
  5915. LatexCommand label
  5916. name "fig:MA-PAX-rma-frma"
  5917. \end_inset
  5918. RMA vs.
  5919. fRMA for blood samples.
  5920. \end_layout
  5921. \end_inset
  5922. \end_layout
  5923. \end_inset
  5924. \begin_inset space \hfill{}
  5925. \end_inset
  5926. \begin_inset Float figure
  5927. wide false
  5928. sideways false
  5929. status collapsed
  5930. \begin_layout Plain Layout
  5931. \align center
  5932. \begin_inset Graphics
  5933. filename graphics/frma-pax-bx/MA-PAX-fRMA.fRMA-RASTER.png
  5934. lyxscale 10
  5935. width 45col%
  5936. groupId ma-frma
  5937. \end_inset
  5938. \end_layout
  5939. \begin_layout Plain Layout
  5940. \begin_inset Caption Standard
  5941. \begin_layout Plain Layout
  5942. \begin_inset CommandInset label
  5943. LatexCommand label
  5944. name "fig:MA-PAX-frma-frma"
  5945. \end_inset
  5946. fRMA vs fRMA for blood samples.
  5947. \end_layout
  5948. \end_inset
  5949. \end_layout
  5950. \end_inset
  5951. \end_layout
  5952. \begin_layout Plain Layout
  5953. \begin_inset Caption Standard
  5954. \begin_layout Plain Layout
  5955. \series bold
  5956. \begin_inset CommandInset label
  5957. LatexCommand label
  5958. name "fig:Representative-MA-plots"
  5959. \end_inset
  5960. Representative MA plots comparing RMA and custom fRMA normalizations.
  5961. \series default
  5962. For each plot, 20 samples were normalized using 2 different normalizations,
  5963. and then averages (A) and log ratios (M) were plotted between the two different
  5964. normalizations for every probe.
  5965. For the
  5966. \begin_inset Quotes eld
  5967. \end_inset
  5968. fRMA vs fRMA
  5969. \begin_inset Quotes erd
  5970. \end_inset
  5971. plots (b & d), two different fRMA normalizations using vectors from two
  5972. independent batch samplings were compared.
  5973. Density of points is represented by blue shading, and individual outlier
  5974. points are plotted.
  5975. \end_layout
  5976. \end_inset
  5977. \end_layout
  5978. \end_inset
  5979. \end_layout
  5980. \begin_layout Standard
  5981. Figure
  5982. \begin_inset CommandInset ref
  5983. LatexCommand ref
  5984. reference "fig:ma-bx-rma-frma"
  5985. plural "false"
  5986. caps "false"
  5987. noprefix "false"
  5988. \end_inset
  5989. shows an MA plot of the RMA-normalized values against the fRMA-normalized
  5990. values for the same probe sets and arrays, corresponding to the first row
  5991. of Figure
  5992. \begin_inset CommandInset ref
  5993. LatexCommand ref
  5994. reference "fig:m-bx-violin"
  5995. plural "false"
  5996. caps "false"
  5997. noprefix "false"
  5998. \end_inset
  5999. .
  6000. This MA plot shows that not only is there a wide distribution of M-values,
  6001. but the trend of M-values is dependent on the average normalized intensity.
  6002. This is expected, since the overall trend represents the differences in
  6003. the quantile normalization step.
  6004. When running RMA, only the quantiles for these specific 20 arrays are used,
  6005. while for fRMA the quantile distribution is taking from all arrays used
  6006. in training.
  6007. Figure
  6008. \begin_inset CommandInset ref
  6009. LatexCommand ref
  6010. reference "fig:ma-bx-frma-frma"
  6011. plural "false"
  6012. caps "false"
  6013. noprefix "false"
  6014. \end_inset
  6015. shows a similar MA plot comparing 2 different fRMA normalizations, correspondin
  6016. g to the 6th row of Figure
  6017. \begin_inset CommandInset ref
  6018. LatexCommand ref
  6019. reference "fig:m-bx-violin"
  6020. plural "false"
  6021. caps "false"
  6022. noprefix "false"
  6023. \end_inset
  6024. .
  6025. The MA plot is very tightly centered around zero with no visible trend.
  6026. Figures
  6027. \begin_inset CommandInset ref
  6028. LatexCommand ref
  6029. reference "fig:m-pax-violin"
  6030. plural "false"
  6031. caps "false"
  6032. noprefix "false"
  6033. \end_inset
  6034. ,
  6035. \begin_inset CommandInset ref
  6036. LatexCommand ref
  6037. reference "fig:MA-PAX-rma-frma"
  6038. plural "false"
  6039. caps "false"
  6040. noprefix "false"
  6041. \end_inset
  6042. , and
  6043. \begin_inset CommandInset ref
  6044. LatexCommand ref
  6045. reference "fig:ma-bx-frma-frma"
  6046. plural "false"
  6047. caps "false"
  6048. noprefix "false"
  6049. \end_inset
  6050. show exactly the same information for the blood samples, once again comparing
  6051. the normalized expression values between normalizations for all probe sets
  6052. across 20 randomly selected test arrays.
  6053. Once again, there is a wider distribution of log ratios between RMA-normalized
  6054. values and fRMA-normalized, and a much tighter distribution when comparing
  6055. different fRMA normalizations to each other, indicating that the fRMA training
  6056. process is robust to random batch downsampling for the blood samples as
  6057. well.
  6058. \end_layout
  6059. \begin_layout Subsection
  6060. SVA, voom, and array weights improve model fit for methylation array data
  6061. \end_layout
  6062. \begin_layout Standard
  6063. \begin_inset ERT
  6064. status open
  6065. \begin_layout Plain Layout
  6066. \backslash
  6067. afterpage{
  6068. \end_layout
  6069. \begin_layout Plain Layout
  6070. \backslash
  6071. begin{landscape}
  6072. \end_layout
  6073. \end_inset
  6074. \end_layout
  6075. \begin_layout Standard
  6076. \begin_inset Float figure
  6077. wide false
  6078. sideways false
  6079. status open
  6080. \begin_layout Plain Layout
  6081. \begin_inset Flex TODO Note (inline)
  6082. status open
  6083. \begin_layout Plain Layout
  6084. Fix axis labels:
  6085. \begin_inset Quotes eld
  6086. \end_inset
  6087. log2 M-value
  6088. \begin_inset Quotes erd
  6089. \end_inset
  6090. is redundant because M-values are already log scale
  6091. \end_layout
  6092. \end_inset
  6093. \end_layout
  6094. \begin_layout Plain Layout
  6095. \begin_inset Float figure
  6096. wide false
  6097. sideways false
  6098. status collapsed
  6099. \begin_layout Plain Layout
  6100. \align center
  6101. \begin_inset Graphics
  6102. filename graphics/methylvoom/unadj.dupcor/meanvar-trends-PAGE1-CROP-RASTER.png
  6103. lyxscale 15
  6104. width 30col%
  6105. groupId voomaw-subfig
  6106. \end_inset
  6107. \end_layout
  6108. \begin_layout Plain Layout
  6109. \begin_inset Caption Standard
  6110. \begin_layout Plain Layout
  6111. \begin_inset CommandInset label
  6112. LatexCommand label
  6113. name "fig:meanvar-basic"
  6114. \end_inset
  6115. Mean-variance trend for analysis A.
  6116. \end_layout
  6117. \end_inset
  6118. \end_layout
  6119. \end_inset
  6120. \begin_inset space \hfill{}
  6121. \end_inset
  6122. \begin_inset Float figure
  6123. wide false
  6124. sideways false
  6125. status collapsed
  6126. \begin_layout Plain Layout
  6127. \align center
  6128. \begin_inset Graphics
  6129. filename graphics/methylvoom/unadj.dupcor.sva.aw/meanvar-trends-PAGE1-CROP-RASTER.png
  6130. lyxscale 15
  6131. width 30col%
  6132. groupId voomaw-subfig
  6133. \end_inset
  6134. \end_layout
  6135. \begin_layout Plain Layout
  6136. \begin_inset Caption Standard
  6137. \begin_layout Plain Layout
  6138. \begin_inset CommandInset label
  6139. LatexCommand label
  6140. name "fig:meanvar-sva-aw"
  6141. \end_inset
  6142. Mean-variance trend for analysis B.
  6143. \end_layout
  6144. \end_inset
  6145. \end_layout
  6146. \end_inset
  6147. \begin_inset space \hfill{}
  6148. \end_inset
  6149. \begin_inset Float figure
  6150. wide false
  6151. sideways false
  6152. status collapsed
  6153. \begin_layout Plain Layout
  6154. \align center
  6155. \begin_inset Graphics
  6156. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/meanvar-trends-PAGE2-CROP-RASTER.png
  6157. lyxscale 15
  6158. width 30col%
  6159. groupId voomaw-subfig
  6160. \end_inset
  6161. \end_layout
  6162. \begin_layout Plain Layout
  6163. \begin_inset Caption Standard
  6164. \begin_layout Plain Layout
  6165. \begin_inset CommandInset label
  6166. LatexCommand label
  6167. name "fig:meanvar-sva-voomaw"
  6168. \end_inset
  6169. Mean-variance trend after voom modeling in analysis C.
  6170. \end_layout
  6171. \end_inset
  6172. \end_layout
  6173. \end_inset
  6174. \end_layout
  6175. \begin_layout Plain Layout
  6176. \begin_inset Caption Standard
  6177. \begin_layout Plain Layout
  6178. \series bold
  6179. Mean-variance trend modeling in methylation array data.
  6180. \series default
  6181. The estimated log2(standard deviation) for each probe is plotted against
  6182. the probe's average M-value across all samples as a black point, with some
  6183. transparency to make overplotting more visible, since there are about 450,000
  6184. points.
  6185. Density of points is also indicated by the dark blue contour lines.
  6186. The prior variance trend estimated by eBayes is shown in light blue, while
  6187. the lowess trend of the points is shown in red.
  6188. \end_layout
  6189. \end_inset
  6190. \end_layout
  6191. \end_inset
  6192. \end_layout
  6193. \begin_layout Standard
  6194. \begin_inset ERT
  6195. status open
  6196. \begin_layout Plain Layout
  6197. \backslash
  6198. end{landscape}
  6199. \end_layout
  6200. \begin_layout Plain Layout
  6201. }
  6202. \end_layout
  6203. \end_inset
  6204. \end_layout
  6205. \begin_layout Standard
  6206. Figure
  6207. \begin_inset CommandInset ref
  6208. LatexCommand ref
  6209. reference "fig:meanvar-basic"
  6210. plural "false"
  6211. caps "false"
  6212. noprefix "false"
  6213. \end_inset
  6214. shows the relationship between the mean M-value and the standard deviation
  6215. calculated for each probe in the methylation array data set.
  6216. A few features of the data are apparent.
  6217. First, the data are very strongly bimodal, with peaks in the density around
  6218. M-values of +4 and -4.
  6219. These modes correspond to methylation sites that are nearly 100% methylated
  6220. and nearly 100% unmethylated, respectively.
  6221. The strong bomodality indicates that a majority of probes interrogate sites
  6222. that fall into one of these two categories.
  6223. The points in between these modes represent sites that are either partially
  6224. methylated in many samples, or are fully methylated in some samples and
  6225. fully unmethylated in other samples, or some combination.
  6226. The next visible feature of the data is the W-shaped variance trend.
  6227. The upticks in the variance trend on either side are expected, based on
  6228. the sigmoid transformation exaggerating small differences at extreme M-values
  6229. (Figure
  6230. \begin_inset CommandInset ref
  6231. LatexCommand ref
  6232. reference "fig:Sigmoid-beta-m-mapping"
  6233. plural "false"
  6234. caps "false"
  6235. noprefix "false"
  6236. \end_inset
  6237. ).
  6238. However, the uptick in the center is interesting: it indicates that sites
  6239. that are not constitutitively methylated or unmethylated have a higher
  6240. variance.
  6241. This could be a genuine biological effect, or it could be spurious noise
  6242. that is only observable at sites with varying methylation.
  6243. \end_layout
  6244. \begin_layout Standard
  6245. In Figure
  6246. \begin_inset CommandInset ref
  6247. LatexCommand ref
  6248. reference "fig:meanvar-sva-aw"
  6249. plural "false"
  6250. caps "false"
  6251. noprefix "false"
  6252. \end_inset
  6253. , we see the mean-variance trend for the same methylation array data, this
  6254. time with surrogate variables and sample quality weights estimated from
  6255. the data and included in the model.
  6256. As expected, the overall average variance is smaller, since the surrogate
  6257. variables account for some of the variance.
  6258. In addition, the uptick in variance in the middle of the M-value range
  6259. has disappeared, turning the W shape into a wide U shape.
  6260. This indicates that the excess variance in the probes with intermediate
  6261. M-values was explained by systematic variations not correlated with known
  6262. covariates, and these variations were modeled by the surrogate variables.
  6263. The result is a nearly flat variance trend for the entire intermediate
  6264. M-value range from about -3 to +3.
  6265. Note that this corresponds closely to the range within which the M-value
  6266. transformation shown in Figure
  6267. \begin_inset CommandInset ref
  6268. LatexCommand ref
  6269. reference "fig:Sigmoid-beta-m-mapping"
  6270. plural "false"
  6271. caps "false"
  6272. noprefix "false"
  6273. \end_inset
  6274. is nearly linear.
  6275. In contrast, the excess variance at the extremes (greater than +3 and less
  6276. than -3) was not
  6277. \begin_inset Quotes eld
  6278. \end_inset
  6279. absorbed
  6280. \begin_inset Quotes erd
  6281. \end_inset
  6282. by the surrogate variables and remains in the plot, indicating that this
  6283. variation has no systematic component: probes with extreme M-values are
  6284. uniformly more variable across all samples, as expected.
  6285. \end_layout
  6286. \begin_layout Standard
  6287. Figure
  6288. \begin_inset CommandInset ref
  6289. LatexCommand ref
  6290. reference "fig:meanvar-sva-voomaw"
  6291. plural "false"
  6292. caps "false"
  6293. noprefix "false"
  6294. \end_inset
  6295. shows the mean-variance trend after fitting the model with the observation
  6296. weights assigned by voom based on the mean-variance trend shown in Figure
  6297. \begin_inset CommandInset ref
  6298. LatexCommand ref
  6299. reference "fig:meanvar-sva-aw"
  6300. plural "false"
  6301. caps "false"
  6302. noprefix "false"
  6303. \end_inset
  6304. .
  6305. As expected, the weights exactly counteract the trend in the data, resulting
  6306. in a nearly flat trend centered vertically at 1 (i.e.
  6307. 0 on the log scale).
  6308. This shows that the observations with extreme M-values have been appropriately
  6309. down-weighted to account for the fact that the noise in those observations
  6310. has been amplified by the non-linear M-value transformation.
  6311. In turn, this gives relatively more weight to observervations in the middle
  6312. region, which are more likely to correspond to probes measuring interesting
  6313. biology (not constitutively methylated or unmethylated).
  6314. \end_layout
  6315. \begin_layout Standard
  6316. \begin_inset Float table
  6317. wide false
  6318. sideways false
  6319. status open
  6320. \begin_layout Plain Layout
  6321. \align center
  6322. \begin_inset Tabular
  6323. <lyxtabular version="3" rows="5" columns="3">
  6324. <features tabularvalignment="middle">
  6325. <column alignment="center" valignment="top">
  6326. <column alignment="center" valignment="top">
  6327. <column alignment="center" valignment="top">
  6328. <row>
  6329. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6330. \begin_inset Text
  6331. \begin_layout Plain Layout
  6332. Covariate
  6333. \end_layout
  6334. \end_inset
  6335. </cell>
  6336. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6337. \begin_inset Text
  6338. \begin_layout Plain Layout
  6339. Test used
  6340. \end_layout
  6341. \end_inset
  6342. </cell>
  6343. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6344. \begin_inset Text
  6345. \begin_layout Plain Layout
  6346. p-value
  6347. \end_layout
  6348. \end_inset
  6349. </cell>
  6350. </row>
  6351. <row>
  6352. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6353. \begin_inset Text
  6354. \begin_layout Plain Layout
  6355. Transplant Status
  6356. \end_layout
  6357. \end_inset
  6358. </cell>
  6359. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6360. \begin_inset Text
  6361. \begin_layout Plain Layout
  6362. F-test
  6363. \end_layout
  6364. \end_inset
  6365. </cell>
  6366. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6367. \begin_inset Text
  6368. \begin_layout Plain Layout
  6369. 0.404
  6370. \end_layout
  6371. \end_inset
  6372. </cell>
  6373. </row>
  6374. <row>
  6375. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6376. \begin_inset Text
  6377. \begin_layout Plain Layout
  6378. Diabetes Diagnosis
  6379. \end_layout
  6380. \end_inset
  6381. </cell>
  6382. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6383. \begin_inset Text
  6384. \begin_layout Plain Layout
  6385. \emph on
  6386. t
  6387. \emph default
  6388. -test
  6389. \end_layout
  6390. \end_inset
  6391. </cell>
  6392. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6393. \begin_inset Text
  6394. \begin_layout Plain Layout
  6395. 0.00106
  6396. \end_layout
  6397. \end_inset
  6398. </cell>
  6399. </row>
  6400. <row>
  6401. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6402. \begin_inset Text
  6403. \begin_layout Plain Layout
  6404. Sex
  6405. \end_layout
  6406. \end_inset
  6407. </cell>
  6408. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6409. \begin_inset Text
  6410. \begin_layout Plain Layout
  6411. \emph on
  6412. t
  6413. \emph default
  6414. -test
  6415. \end_layout
  6416. \end_inset
  6417. </cell>
  6418. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6419. \begin_inset Text
  6420. \begin_layout Plain Layout
  6421. 0.148
  6422. \end_layout
  6423. \end_inset
  6424. </cell>
  6425. </row>
  6426. <row>
  6427. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6428. \begin_inset Text
  6429. \begin_layout Plain Layout
  6430. Age
  6431. \end_layout
  6432. \end_inset
  6433. </cell>
  6434. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6435. \begin_inset Text
  6436. \begin_layout Plain Layout
  6437. linear regression
  6438. \end_layout
  6439. \end_inset
  6440. </cell>
  6441. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6442. \begin_inset Text
  6443. \begin_layout Plain Layout
  6444. 0.212
  6445. \end_layout
  6446. \end_inset
  6447. </cell>
  6448. </row>
  6449. </lyxtabular>
  6450. \end_inset
  6451. \end_layout
  6452. \begin_layout Plain Layout
  6453. \begin_inset Caption Standard
  6454. \begin_layout Plain Layout
  6455. \series bold
  6456. \begin_inset CommandInset label
  6457. LatexCommand label
  6458. name "tab:weight-covariate-tests"
  6459. \end_inset
  6460. Association of sample weights with clinical covariates in methylation array
  6461. data.
  6462. \series default
  6463. Computed sample quality log weights were tested for significant association
  6464. with each of the variables in the model (1st column).
  6465. An appropriate test was selected for each variable based on whether the
  6466. variable had 2 categories (
  6467. \emph on
  6468. t
  6469. \emph default
  6470. -test), had more than 2 categories (F-test), or was numeric (linear regression).
  6471. The test selected is shown in the 2nd column.
  6472. P-values for association with the log weights are shown in the 3rd column.
  6473. No multiple testing adjustment was performed for these p-values.
  6474. \end_layout
  6475. \end_inset
  6476. \end_layout
  6477. \end_inset
  6478. \end_layout
  6479. \begin_layout Standard
  6480. \begin_inset Float figure
  6481. wide false
  6482. sideways false
  6483. status open
  6484. \begin_layout Plain Layout
  6485. \begin_inset Flex TODO Note (inline)
  6486. status open
  6487. \begin_layout Plain Layout
  6488. Redo the sample weight boxplot with notches, and remove fill colors
  6489. \end_layout
  6490. \end_inset
  6491. \end_layout
  6492. \begin_layout Plain Layout
  6493. \align center
  6494. \begin_inset Graphics
  6495. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/sample-weights-PAGE3-CROP.pdf
  6496. lyxscale 50
  6497. width 60col%
  6498. groupId colwidth
  6499. \end_inset
  6500. \end_layout
  6501. \begin_layout Plain Layout
  6502. \begin_inset Caption Standard
  6503. \begin_layout Plain Layout
  6504. \begin_inset CommandInset label
  6505. LatexCommand label
  6506. name "fig:diabetes-sample-weights"
  6507. \end_inset
  6508. \series bold
  6509. Box-and-whiskers plot of sample quality weights grouped by diabetes diagnosis.
  6510. \series default
  6511. Samples were grouped based on diabetes diagnosis, and the distribution of
  6512. sample quality weights for each diagnosis was plotted as a box-and-whiskers
  6513. plot
  6514. \begin_inset CommandInset citation
  6515. LatexCommand cite
  6516. key "McGill1978"
  6517. literal "false"
  6518. \end_inset
  6519. .
  6520. \end_layout
  6521. \end_inset
  6522. \end_layout
  6523. \begin_layout Plain Layout
  6524. \end_layout
  6525. \end_inset
  6526. \end_layout
  6527. \begin_layout Standard
  6528. To determine whether any of the known experimental factors had an impact
  6529. on data quality, the sample quality weights estimated from the data were
  6530. tested for association with each of the experimental factors (Table
  6531. \begin_inset CommandInset ref
  6532. LatexCommand ref
  6533. reference "tab:weight-covariate-tests"
  6534. plural "false"
  6535. caps "false"
  6536. noprefix "false"
  6537. \end_inset
  6538. ).
  6539. Diabetes diagnosis was found to have a potentially significant association
  6540. with the sample weights, with a t-test p-value of
  6541. \begin_inset Formula $1.06\times10^{-3}$
  6542. \end_inset
  6543. .
  6544. Figure
  6545. \begin_inset CommandInset ref
  6546. LatexCommand ref
  6547. reference "fig:diabetes-sample-weights"
  6548. plural "false"
  6549. caps "false"
  6550. noprefix "false"
  6551. \end_inset
  6552. shows the distribution of sample weights grouped by diabetes diagnosis.
  6553. The samples from patients with Type 2 diabetes were assigned significantly
  6554. lower weights than those from patients with Type 1 diabetes.
  6555. This indicates that the type 2 diabetes samples had an overall higher variance
  6556. on average across all probes.
  6557. \end_layout
  6558. \begin_layout Standard
  6559. \begin_inset Float table
  6560. wide false
  6561. sideways false
  6562. status open
  6563. \begin_layout Plain Layout
  6564. \align center
  6565. \begin_inset Flex TODO Note (inline)
  6566. status open
  6567. \begin_layout Plain Layout
  6568. Consider transposing these tables
  6569. \end_layout
  6570. \end_inset
  6571. \end_layout
  6572. \begin_layout Plain Layout
  6573. \begin_inset Float table
  6574. wide false
  6575. sideways false
  6576. status open
  6577. \begin_layout Plain Layout
  6578. \align center
  6579. \begin_inset Tabular
  6580. <lyxtabular version="3" rows="5" columns="4">
  6581. <features tabularvalignment="middle">
  6582. <column alignment="center" valignment="top">
  6583. <column alignment="center" valignment="top">
  6584. <column alignment="center" valignment="top">
  6585. <column alignment="center" valignment="top">
  6586. <row>
  6587. <cell alignment="center" valignment="top" usebox="none">
  6588. \begin_inset Text
  6589. \begin_layout Plain Layout
  6590. \end_layout
  6591. \end_inset
  6592. </cell>
  6593. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6594. \begin_inset Text
  6595. \begin_layout Plain Layout
  6596. Analysis
  6597. \end_layout
  6598. \end_inset
  6599. </cell>
  6600. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6601. \begin_inset Text
  6602. \begin_layout Plain Layout
  6603. \end_layout
  6604. \end_inset
  6605. </cell>
  6606. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6607. \begin_inset Text
  6608. \begin_layout Plain Layout
  6609. \end_layout
  6610. \end_inset
  6611. </cell>
  6612. </row>
  6613. <row>
  6614. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6615. \begin_inset Text
  6616. \begin_layout Plain Layout
  6617. Contrast
  6618. \end_layout
  6619. \end_inset
  6620. </cell>
  6621. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6622. \begin_inset Text
  6623. \begin_layout Plain Layout
  6624. A
  6625. \end_layout
  6626. \end_inset
  6627. </cell>
  6628. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6629. \begin_inset Text
  6630. \begin_layout Plain Layout
  6631. B
  6632. \end_layout
  6633. \end_inset
  6634. </cell>
  6635. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6636. \begin_inset Text
  6637. \begin_layout Plain Layout
  6638. C
  6639. \end_layout
  6640. \end_inset
  6641. </cell>
  6642. </row>
  6643. <row>
  6644. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6645. \begin_inset Text
  6646. \begin_layout Plain Layout
  6647. TX vs AR
  6648. \end_layout
  6649. \end_inset
  6650. </cell>
  6651. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6652. \begin_inset Text
  6653. \begin_layout Plain Layout
  6654. 0
  6655. \end_layout
  6656. \end_inset
  6657. </cell>
  6658. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6659. \begin_inset Text
  6660. \begin_layout Plain Layout
  6661. 25
  6662. \end_layout
  6663. \end_inset
  6664. </cell>
  6665. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6666. \begin_inset Text
  6667. \begin_layout Plain Layout
  6668. 22
  6669. \end_layout
  6670. \end_inset
  6671. </cell>
  6672. </row>
  6673. <row>
  6674. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6675. \begin_inset Text
  6676. \begin_layout Plain Layout
  6677. TX vs ADNR
  6678. \end_layout
  6679. \end_inset
  6680. </cell>
  6681. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6682. \begin_inset Text
  6683. \begin_layout Plain Layout
  6684. 7
  6685. \end_layout
  6686. \end_inset
  6687. </cell>
  6688. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6689. \begin_inset Text
  6690. \begin_layout Plain Layout
  6691. 338
  6692. \end_layout
  6693. \end_inset
  6694. </cell>
  6695. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6696. \begin_inset Text
  6697. \begin_layout Plain Layout
  6698. 369
  6699. \end_layout
  6700. \end_inset
  6701. </cell>
  6702. </row>
  6703. <row>
  6704. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6705. \begin_inset Text
  6706. \begin_layout Plain Layout
  6707. TX vs CAN
  6708. \end_layout
  6709. \end_inset
  6710. </cell>
  6711. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6712. \begin_inset Text
  6713. \begin_layout Plain Layout
  6714. 0
  6715. \end_layout
  6716. \end_inset
  6717. </cell>
  6718. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6719. \begin_inset Text
  6720. \begin_layout Plain Layout
  6721. 231
  6722. \end_layout
  6723. \end_inset
  6724. </cell>
  6725. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6726. \begin_inset Text
  6727. \begin_layout Plain Layout
  6728. 278
  6729. \end_layout
  6730. \end_inset
  6731. </cell>
  6732. </row>
  6733. </lyxtabular>
  6734. \end_inset
  6735. \end_layout
  6736. \begin_layout Plain Layout
  6737. \begin_inset Caption Standard
  6738. \begin_layout Plain Layout
  6739. \begin_inset CommandInset label
  6740. LatexCommand label
  6741. name "tab:methyl-num-signif"
  6742. \end_inset
  6743. Number of probes significant at 10% FDR.
  6744. \end_layout
  6745. \end_inset
  6746. \end_layout
  6747. \end_inset
  6748. \begin_inset space \hfill{}
  6749. \end_inset
  6750. \begin_inset Float table
  6751. wide false
  6752. sideways false
  6753. status open
  6754. \begin_layout Plain Layout
  6755. \align center
  6756. \begin_inset Tabular
  6757. <lyxtabular version="3" rows="5" columns="4">
  6758. <features tabularvalignment="middle">
  6759. <column alignment="center" valignment="top">
  6760. <column alignment="center" valignment="top">
  6761. <column alignment="center" valignment="top">
  6762. <column alignment="center" valignment="top">
  6763. <row>
  6764. <cell alignment="center" valignment="top" usebox="none">
  6765. \begin_inset Text
  6766. \begin_layout Plain Layout
  6767. \end_layout
  6768. \end_inset
  6769. </cell>
  6770. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6771. \begin_inset Text
  6772. \begin_layout Plain Layout
  6773. Analysis
  6774. \end_layout
  6775. \end_inset
  6776. </cell>
  6777. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6778. \begin_inset Text
  6779. \begin_layout Plain Layout
  6780. \end_layout
  6781. \end_inset
  6782. </cell>
  6783. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6784. \begin_inset Text
  6785. \begin_layout Plain Layout
  6786. \end_layout
  6787. \end_inset
  6788. </cell>
  6789. </row>
  6790. <row>
  6791. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6792. \begin_inset Text
  6793. \begin_layout Plain Layout
  6794. Contrast
  6795. \end_layout
  6796. \end_inset
  6797. </cell>
  6798. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6799. \begin_inset Text
  6800. \begin_layout Plain Layout
  6801. A
  6802. \end_layout
  6803. \end_inset
  6804. </cell>
  6805. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6806. \begin_inset Text
  6807. \begin_layout Plain Layout
  6808. B
  6809. \end_layout
  6810. \end_inset
  6811. </cell>
  6812. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6813. \begin_inset Text
  6814. \begin_layout Plain Layout
  6815. C
  6816. \end_layout
  6817. \end_inset
  6818. </cell>
  6819. </row>
  6820. <row>
  6821. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6822. \begin_inset Text
  6823. \begin_layout Plain Layout
  6824. TX vs AR
  6825. \end_layout
  6826. \end_inset
  6827. </cell>
  6828. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6829. \begin_inset Text
  6830. \begin_layout Plain Layout
  6831. 0
  6832. \end_layout
  6833. \end_inset
  6834. </cell>
  6835. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6836. \begin_inset Text
  6837. \begin_layout Plain Layout
  6838. 10,063
  6839. \end_layout
  6840. \end_inset
  6841. </cell>
  6842. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6843. \begin_inset Text
  6844. \begin_layout Plain Layout
  6845. 11,225
  6846. \end_layout
  6847. \end_inset
  6848. </cell>
  6849. </row>
  6850. <row>
  6851. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6852. \begin_inset Text
  6853. \begin_layout Plain Layout
  6854. TX vs ADNR
  6855. \end_layout
  6856. \end_inset
  6857. </cell>
  6858. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6859. \begin_inset Text
  6860. \begin_layout Plain Layout
  6861. 27
  6862. \end_layout
  6863. \end_inset
  6864. </cell>
  6865. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6866. \begin_inset Text
  6867. \begin_layout Plain Layout
  6868. 12,674
  6869. \end_layout
  6870. \end_inset
  6871. </cell>
  6872. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6873. \begin_inset Text
  6874. \begin_layout Plain Layout
  6875. 13,086
  6876. \end_layout
  6877. \end_inset
  6878. </cell>
  6879. </row>
  6880. <row>
  6881. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6882. \begin_inset Text
  6883. \begin_layout Plain Layout
  6884. TX vs CAN
  6885. \end_layout
  6886. \end_inset
  6887. </cell>
  6888. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6889. \begin_inset Text
  6890. \begin_layout Plain Layout
  6891. 966
  6892. \end_layout
  6893. \end_inset
  6894. </cell>
  6895. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6896. \begin_inset Text
  6897. \begin_layout Plain Layout
  6898. 20,039
  6899. \end_layout
  6900. \end_inset
  6901. </cell>
  6902. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6903. \begin_inset Text
  6904. \begin_layout Plain Layout
  6905. 20,955
  6906. \end_layout
  6907. \end_inset
  6908. </cell>
  6909. </row>
  6910. </lyxtabular>
  6911. \end_inset
  6912. \end_layout
  6913. \begin_layout Plain Layout
  6914. \begin_inset Caption Standard
  6915. \begin_layout Plain Layout
  6916. \begin_inset CommandInset label
  6917. LatexCommand label
  6918. name "tab:methyl-est-nonnull"
  6919. \end_inset
  6920. Estimated number of non-null tests, using the method of averaging local
  6921. FDR values
  6922. \begin_inset CommandInset citation
  6923. LatexCommand cite
  6924. key "Phipson2013Thesis"
  6925. literal "false"
  6926. \end_inset
  6927. .
  6928. \end_layout
  6929. \end_inset
  6930. \end_layout
  6931. \end_inset
  6932. \end_layout
  6933. \begin_layout Plain Layout
  6934. \begin_inset Caption Standard
  6935. \begin_layout Plain Layout
  6936. \series bold
  6937. Estimates of degree of differential methylation in for each contrast in
  6938. each analysis.
  6939. \series default
  6940. For each of the analyses in Table
  6941. \begin_inset CommandInset ref
  6942. LatexCommand ref
  6943. reference "tab:Summary-of-meth-analysis"
  6944. plural "false"
  6945. caps "false"
  6946. noprefix "false"
  6947. \end_inset
  6948. , these tables show the number of probes called significantly differentially
  6949. methylated at a threshold of 10% FDR for each comparison between TX and
  6950. the other 3 transplant statuses (a) and the estimated total number of probes
  6951. that are differentially methylated (b).
  6952. \end_layout
  6953. \end_inset
  6954. \end_layout
  6955. \end_inset
  6956. \end_layout
  6957. \begin_layout Standard
  6958. \begin_inset Float figure
  6959. wide false
  6960. sideways false
  6961. status open
  6962. \begin_layout Plain Layout
  6963. \align center
  6964. \series bold
  6965. \begin_inset Float figure
  6966. wide false
  6967. sideways false
  6968. status collapsed
  6969. \begin_layout Plain Layout
  6970. \align center
  6971. \begin_inset Graphics
  6972. filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE1.pdf
  6973. lyxscale 33
  6974. width 30col%
  6975. groupId meth-pval-hist
  6976. \end_inset
  6977. \end_layout
  6978. \begin_layout Plain Layout
  6979. \series bold
  6980. \begin_inset Caption Standard
  6981. \begin_layout Plain Layout
  6982. AR vs.
  6983. TX, Analysis A
  6984. \end_layout
  6985. \end_inset
  6986. \end_layout
  6987. \begin_layout Plain Layout
  6988. \end_layout
  6989. \end_inset
  6990. \begin_inset space \hfill{}
  6991. \end_inset
  6992. \begin_inset Float figure
  6993. wide false
  6994. sideways false
  6995. status collapsed
  6996. \begin_layout Plain Layout
  6997. \align center
  6998. \begin_inset Graphics
  6999. filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE2.pdf
  7000. lyxscale 33
  7001. width 30col%
  7002. groupId meth-pval-hist
  7003. \end_inset
  7004. \end_layout
  7005. \begin_layout Plain Layout
  7006. \series bold
  7007. \begin_inset Caption Standard
  7008. \begin_layout Plain Layout
  7009. ADNR vs.
  7010. TX, Analysis A
  7011. \end_layout
  7012. \end_inset
  7013. \end_layout
  7014. \end_inset
  7015. \begin_inset space \hfill{}
  7016. \end_inset
  7017. \begin_inset Float figure
  7018. wide false
  7019. sideways false
  7020. status collapsed
  7021. \begin_layout Plain Layout
  7022. \align center
  7023. \begin_inset Graphics
  7024. filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE3.pdf
  7025. lyxscale 33
  7026. width 30col%
  7027. groupId meth-pval-hist
  7028. \end_inset
  7029. \end_layout
  7030. \begin_layout Plain Layout
  7031. \series bold
  7032. \begin_inset Caption Standard
  7033. \begin_layout Plain Layout
  7034. CAN vs.
  7035. TX, Analysis A
  7036. \end_layout
  7037. \end_inset
  7038. \end_layout
  7039. \end_inset
  7040. \end_layout
  7041. \begin_layout Plain Layout
  7042. \align center
  7043. \series bold
  7044. \begin_inset Float figure
  7045. wide false
  7046. sideways false
  7047. status collapsed
  7048. \begin_layout Plain Layout
  7049. \align center
  7050. \begin_inset Graphics
  7051. filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE1.pdf
  7052. lyxscale 33
  7053. width 30col%
  7054. groupId meth-pval-hist
  7055. \end_inset
  7056. \end_layout
  7057. \begin_layout Plain Layout
  7058. \series bold
  7059. \begin_inset Caption Standard
  7060. \begin_layout Plain Layout
  7061. AR vs.
  7062. TX, Analysis B
  7063. \end_layout
  7064. \end_inset
  7065. \end_layout
  7066. \end_inset
  7067. \begin_inset space \hfill{}
  7068. \end_inset
  7069. \begin_inset Float figure
  7070. wide false
  7071. sideways false
  7072. status collapsed
  7073. \begin_layout Plain Layout
  7074. \align center
  7075. \begin_inset Graphics
  7076. filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE2.pdf
  7077. lyxscale 33
  7078. width 30col%
  7079. groupId meth-pval-hist
  7080. \end_inset
  7081. \end_layout
  7082. \begin_layout Plain Layout
  7083. \series bold
  7084. \begin_inset Caption Standard
  7085. \begin_layout Plain Layout
  7086. ADNR vs.
  7087. TX, Analysis B
  7088. \end_layout
  7089. \end_inset
  7090. \end_layout
  7091. \end_inset
  7092. \begin_inset space \hfill{}
  7093. \end_inset
  7094. \begin_inset Float figure
  7095. wide false
  7096. sideways false
  7097. status collapsed
  7098. \begin_layout Plain Layout
  7099. \align center
  7100. \begin_inset Graphics
  7101. filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE3.pdf
  7102. lyxscale 33
  7103. width 30col%
  7104. groupId meth-pval-hist
  7105. \end_inset
  7106. \end_layout
  7107. \begin_layout Plain Layout
  7108. \series bold
  7109. \begin_inset Caption Standard
  7110. \begin_layout Plain Layout
  7111. CAN vs.
  7112. TX, Analysis B
  7113. \end_layout
  7114. \end_inset
  7115. \end_layout
  7116. \end_inset
  7117. \end_layout
  7118. \begin_layout Plain Layout
  7119. \align center
  7120. \series bold
  7121. \begin_inset Float figure
  7122. wide false
  7123. sideways false
  7124. status collapsed
  7125. \begin_layout Plain Layout
  7126. \align center
  7127. \begin_inset Graphics
  7128. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE1.pdf
  7129. lyxscale 33
  7130. width 30col%
  7131. groupId meth-pval-hist
  7132. \end_inset
  7133. \end_layout
  7134. \begin_layout Plain Layout
  7135. \series bold
  7136. \begin_inset Caption Standard
  7137. \begin_layout Plain Layout
  7138. AR vs.
  7139. TX, Analysis C
  7140. \end_layout
  7141. \end_inset
  7142. \end_layout
  7143. \end_inset
  7144. \begin_inset space \hfill{}
  7145. \end_inset
  7146. \begin_inset Float figure
  7147. wide false
  7148. sideways false
  7149. status collapsed
  7150. \begin_layout Plain Layout
  7151. \align center
  7152. \begin_inset Graphics
  7153. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE2.pdf
  7154. lyxscale 33
  7155. width 30col%
  7156. groupId meth-pval-hist
  7157. \end_inset
  7158. \end_layout
  7159. \begin_layout Plain Layout
  7160. \series bold
  7161. \begin_inset Caption Standard
  7162. \begin_layout Plain Layout
  7163. ADNR vs.
  7164. TX, Analysis C
  7165. \end_layout
  7166. \end_inset
  7167. \end_layout
  7168. \end_inset
  7169. \begin_inset space \hfill{}
  7170. \end_inset
  7171. \begin_inset Float figure
  7172. wide false
  7173. sideways false
  7174. status collapsed
  7175. \begin_layout Plain Layout
  7176. \align center
  7177. \begin_inset Graphics
  7178. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE3.pdf
  7179. lyxscale 33
  7180. width 30col%
  7181. groupId meth-pval-hist
  7182. \end_inset
  7183. \end_layout
  7184. \begin_layout Plain Layout
  7185. \series bold
  7186. \begin_inset Caption Standard
  7187. \begin_layout Plain Layout
  7188. CAN vs.
  7189. TX, Analysis C
  7190. \end_layout
  7191. \end_inset
  7192. \end_layout
  7193. \end_inset
  7194. \end_layout
  7195. \begin_layout Plain Layout
  7196. \begin_inset Caption Standard
  7197. \begin_layout Plain Layout
  7198. \series bold
  7199. \begin_inset CommandInset label
  7200. LatexCommand label
  7201. name "fig:meth-p-value-histograms"
  7202. \end_inset
  7203. Probe p-value histograms for each contrast in each analysis.
  7204. \series default
  7205. For each differential methylation test of interest, the distribution of
  7206. p-values across all probes is plotted as a histogram.
  7207. The red solid line indicates the density that would be expected under the
  7208. null hypothesis for all probes (a
  7209. \begin_inset Formula $\mathrm{Uniform}(0,1)$
  7210. \end_inset
  7211. distribution), while the blue dotted line indicates the fraction of p-values
  7212. that actually follow the null hypothesis (
  7213. \begin_inset Formula $\hat{\pi}_{0}$
  7214. \end_inset
  7215. ) estimated using the method of averaging local FDR values
  7216. \begin_inset CommandInset citation
  7217. LatexCommand cite
  7218. key "Phipson2013Thesis"
  7219. literal "false"
  7220. \end_inset
  7221. .
  7222. the blue line is only shown in each plot if the estimate of
  7223. \begin_inset Formula $\hat{\pi}_{0}$
  7224. \end_inset
  7225. for that p-value distribution is different from 1.
  7226. \end_layout
  7227. \end_inset
  7228. \end_layout
  7229. \end_inset
  7230. \end_layout
  7231. \begin_layout Standard
  7232. Table
  7233. \begin_inset CommandInset ref
  7234. LatexCommand ref
  7235. reference "tab:methyl-num-signif"
  7236. plural "false"
  7237. caps "false"
  7238. noprefix "false"
  7239. \end_inset
  7240. shows the number of significantly differentially methylated probes reported
  7241. by each analysis for each comparison of interest at an FDR of 10%.
  7242. As expected, the more elaborate analyses, B and C, report more significant
  7243. probes than the more basic analysis A, consistent with the conclusions
  7244. above that the data contain hidden systematic variations that must be modeled.
  7245. Table
  7246. \begin_inset CommandInset ref
  7247. LatexCommand ref
  7248. reference "tab:methyl-est-nonnull"
  7249. plural "false"
  7250. caps "false"
  7251. noprefix "false"
  7252. \end_inset
  7253. shows the estimated number differentially methylated probes for each test
  7254. from each analysis.
  7255. This was computed by estimating the proportion of null hypotheses that
  7256. were true using the method of
  7257. \begin_inset CommandInset citation
  7258. LatexCommand cite
  7259. key "Phipson2013Thesis"
  7260. literal "false"
  7261. \end_inset
  7262. and subtracting that fraction from the total number of probes, yielding
  7263. an estimate of the number of null hypotheses that are false based on the
  7264. distribution of p-values across the entire dataset.
  7265. Note that this does not identify which null hypotheses should be rejected
  7266. (i.e.
  7267. which probes are significant); it only estimates the true number of such
  7268. probes.
  7269. Once again, analyses B and C result it much larger estimates for the number
  7270. of differentially methylated probes.
  7271. In this case, analysis C, the only analysis that includes voom, estimates
  7272. the largest number of differentially methylated probes for all 3 contrasts.
  7273. If the assumptions of all the methods employed hold, then this represents
  7274. a gain in statistical power over the simpler analysis A.
  7275. Figure
  7276. \begin_inset CommandInset ref
  7277. LatexCommand ref
  7278. reference "fig:meth-p-value-histograms"
  7279. plural "false"
  7280. caps "false"
  7281. noprefix "false"
  7282. \end_inset
  7283. shows the p-value distributions for each test, from which the numbers in
  7284. Table
  7285. \begin_inset CommandInset ref
  7286. LatexCommand ref
  7287. reference "tab:methyl-est-nonnull"
  7288. plural "false"
  7289. caps "false"
  7290. noprefix "false"
  7291. \end_inset
  7292. were generated.
  7293. The distributions for analysis A all have a dip in density near zero, which
  7294. is a strong sign of a poor model fit.
  7295. The histograms for analyses B and C are more well-behaved, with a uniform
  7296. component stretching all the way from 0 to 1 representing the probes for
  7297. which the null hypotheses is true (no differential methylation), and a
  7298. zero-biased component representing the probes for which the null hypothesis
  7299. is false (differentially methylated).
  7300. These histograms do not indicate any major issues with the model fit.
  7301. \end_layout
  7302. \begin_layout Standard
  7303. \begin_inset Flex TODO Note (inline)
  7304. status open
  7305. \begin_layout Plain Layout
  7306. If time allows, maybe generate the PCA plots before/after SVA effect subtraction
  7307. ?
  7308. \end_layout
  7309. \end_inset
  7310. \end_layout
  7311. \begin_layout Section
  7312. Discussion
  7313. \end_layout
  7314. \begin_layout Subsection
  7315. fRMA achieves clinically applicable normalization without sacrificing classifica
  7316. tion performance
  7317. \end_layout
  7318. \begin_layout Standard
  7319. As shown in Figure
  7320. \begin_inset CommandInset ref
  7321. LatexCommand ref
  7322. reference "fig:Classifier-probabilities-RMA"
  7323. plural "false"
  7324. caps "false"
  7325. noprefix "false"
  7326. \end_inset
  7327. , improper normalization, particularly separate normalization of training
  7328. and test samples, leads to unwanted biases in classification.
  7329. In a controlled experimental context, it is always possible to correct
  7330. this issue by normalizing all experimental samples together.
  7331. However, because it is not feasible to normalize all samples together in
  7332. a clinical context, a single-channel normalization is required is required.
  7333. \end_layout
  7334. \begin_layout Standard
  7335. The major concern in using a single-channel normalization is that non-single-cha
  7336. nnel methods can share information between arrays to improve the normalization,
  7337. and single-channel methods risk sacrificing the gains in normalization
  7338. accuracy that come from this information sharing.
  7339. In the case of RMA, this information sharing is accomplished through quantile
  7340. normalization and median polish steps.
  7341. The need for information sharing in quantile normalization can easily be
  7342. removed by learning a fixed set of quantiles from external data and normalizing
  7343. each array to these fixed quantiles, instead of the quantiles of the data
  7344. itself.
  7345. As long as the fixed quantiles are reasonable, the result will be similar
  7346. to standard RMA.
  7347. However, there is no analogous way to eliminate cross-array information
  7348. sharing in the median polish step, so fRMA replaces this with a weighted
  7349. average of probes on each array, with the weights learned from external
  7350. data.
  7351. This step of fRMA has the greatest potential to diverge from RMA un undesirable
  7352. ways.
  7353. \end_layout
  7354. \begin_layout Standard
  7355. However, when run on real data, fRMA performed at least as well as RMA in
  7356. both the internal validation and external validation tests.
  7357. This shows that fRMA can be used to normalize individual clinical samples
  7358. in a class prediction context without sacrificing the classifier performance
  7359. that would be obtained by using the more well-established RMA for normalization.
  7360. The other single-channel normalization method considered, SCAN, showed
  7361. some loss of AUC in the external validation test.
  7362. Based on these results, fRMA is the preferred normalization for clinical
  7363. samples in a class prediction context.
  7364. \end_layout
  7365. \begin_layout Subsection
  7366. Robust fRMA vectors can be generated for new array platforms
  7367. \end_layout
  7368. \begin_layout Standard
  7369. \begin_inset Flex TODO Note (inline)
  7370. status open
  7371. \begin_layout Plain Layout
  7372. Look up the exact numbers, do a find & replace for
  7373. \begin_inset Quotes eld
  7374. \end_inset
  7375. 850
  7376. \begin_inset Quotes erd
  7377. \end_inset
  7378. \end_layout
  7379. \end_inset
  7380. \end_layout
  7381. \begin_layout Standard
  7382. The published fRMA normalization vectors for the hgu133plus2 platform were
  7383. generated from a set of about 850 samples chosen from a wide range of tissues,
  7384. which the authors determined was sufficient to generate a robust set of
  7385. normalization vectors that could be applied across all tissues
  7386. \begin_inset CommandInset citation
  7387. LatexCommand cite
  7388. key "McCall2010"
  7389. literal "false"
  7390. \end_inset
  7391. .
  7392. Since we only had hthgu133pluspm for 2 tissues of interest, our needs were
  7393. more modest.
  7394. Even using only 130 samples in 26 batches of 5 samples each for kidney
  7395. biopsies, we were able to train a robust set of fRMA normalization vectors
  7396. that were not meaningfully affected by the random selection of 5 samples
  7397. from each batch.
  7398. As expected, the training process was just as robust for the blood samples
  7399. with 230 samples in 46 batches of 5 samples each.
  7400. Because these vectors were each generated using training samples from a
  7401. single tissue, they are not suitable for general use, unlike the vectors
  7402. provided with fRMA itself.
  7403. They are purpose-built for normalizing a specific type of sample on a specific
  7404. platform.
  7405. This is a mostly acceptable limitation in the context of developing a machine
  7406. learning classifier for diagnosing a disease based on samples of a specific
  7407. tissue.
  7408. \end_layout
  7409. \begin_layout Standard
  7410. \begin_inset Flex TODO Note (inline)
  7411. status open
  7412. \begin_layout Plain Layout
  7413. Talk about how these vectors can be used for any data from these tissues
  7414. on this platform even though they were custom made for this data set.
  7415. \end_layout
  7416. \end_inset
  7417. \end_layout
  7418. \begin_layout Standard
  7419. \begin_inset Flex TODO Note (inline)
  7420. status open
  7421. \begin_layout Plain Layout
  7422. How to bring up that these custom vectors were used in another project by
  7423. someone else that was never published?
  7424. \end_layout
  7425. \end_inset
  7426. \end_layout
  7427. \begin_layout Subsection
  7428. Methylation array data can be successfully analyzed using existing techniques,
  7429. but machine learning poses additional challenges
  7430. \end_layout
  7431. \begin_layout Standard
  7432. Both analysis strategies B and C both yield a reasonable analysis, with
  7433. a mean-variance trend that matches the expected behavior for the non-linear
  7434. M-value transformation (Figure
  7435. \begin_inset CommandInset ref
  7436. LatexCommand ref
  7437. reference "fig:meanvar-sva-aw"
  7438. plural "false"
  7439. caps "false"
  7440. noprefix "false"
  7441. \end_inset
  7442. ) and well-behaved p-value distributions (Figure
  7443. \begin_inset CommandInset ref
  7444. LatexCommand ref
  7445. reference "fig:meth-p-value-histograms"
  7446. plural "false"
  7447. caps "false"
  7448. noprefix "false"
  7449. \end_inset
  7450. ).
  7451. These two analyses also yield similar numbers of significant probes (Table
  7452. \begin_inset CommandInset ref
  7453. LatexCommand ref
  7454. reference "tab:methyl-num-signif"
  7455. plural "false"
  7456. caps "false"
  7457. noprefix "false"
  7458. \end_inset
  7459. ) and similar estimates of the number of differentially methylated probes
  7460. (Table
  7461. \begin_inset CommandInset ref
  7462. LatexCommand ref
  7463. reference "tab:methyl-est-nonnull"
  7464. plural "false"
  7465. caps "false"
  7466. noprefix "false"
  7467. \end_inset
  7468. ).
  7469. The main difference between these two analyses is the method used to account
  7470. for the mean-variance trend.
  7471. In analysis B, the trend is estimated and applied at the probe level: each
  7472. probe's estimated variance is squeezed toward the trend using an empirical
  7473. Bayes procedure (Figure
  7474. \begin_inset CommandInset ref
  7475. LatexCommand ref
  7476. reference "fig:meanvar-sva-aw"
  7477. plural "false"
  7478. caps "false"
  7479. noprefix "false"
  7480. \end_inset
  7481. ).
  7482. In analysis C, the trend is still estimated at the probe level, but instead
  7483. of estimating a single variance value shared across all observations for
  7484. a given probe, the voom method computes an initial estiamte of the variance
  7485. for each observation individually based on where its model-fitted M-value
  7486. falls on the trend line and then assigns inverse-variance weights to model
  7487. the difference in variance between observations.
  7488. An overall variance is still estimated for each probe using the same empirical
  7489. Bayes method, but now the residual trend is flat (Figure
  7490. \begin_inset CommandInset ref
  7491. LatexCommand ref
  7492. reference "fig:meanvar-sva-voomaw"
  7493. plural "false"
  7494. caps "false"
  7495. noprefix "false"
  7496. \end_inset
  7497. ), indicating that the mean-variance trend is adequately modeled by scaling
  7498. the estimated variance for each observation using the weights computed
  7499. by voom.
  7500. \end_layout
  7501. \begin_layout Standard
  7502. The difference between the standard empirical Bayes trended variance modeling
  7503. (analysis B) and voom (analysis C) is analogous to the difference between
  7504. a t-test with equal variance and a t-test with unequal variance, except
  7505. that the unequal group variances used in the latter test are estimated
  7506. based on the mean-variance trend from all the probes rather than the data
  7507. for the specific probe being tested, thus stabilizing the group variance
  7508. estimates by sharing information between probes.
  7509. Allowing voom to model the variance using observation weights in this manner
  7510. allows the linear model fit to concentrate statistical power where it will
  7511. do the most good.
  7512. For example, if a particular probe's M-values are always at the extreme
  7513. of the M-value range (e.g.
  7514. less than -4) for ADNR samples, but the M-values for that probe in TX and
  7515. CAN samples are within the flat region of the mean-variance trend (between
  7516. -3 and +3), voom is able to down-weight the contribution of the high-variance
  7517. M-values from the ADNR samples in order to gain more statistical power
  7518. while testing for differential methylation between TX and CAN.
  7519. In contrast, modeling the mean-variance trend only at the probe level would
  7520. combine the high-variance ADNR samples and lower-variance samples from
  7521. other conditions and estimate an intermediate variance for this probe.
  7522. In practice, analysis B shows that this approach is adequate, but the voom
  7523. approach in analysis C is at least as good on all model fit criteria and
  7524. yields a larger estimate for the number of differentially methylated genes,
  7525. \emph on
  7526. and
  7527. \emph default
  7528. it matches up better with the theoretical
  7529. \end_layout
  7530. \begin_layout Standard
  7531. The significant association of diebetes diagnosis with sample quality is
  7532. interesting.
  7533. The samples with Type 2 diabetes tended to have more variation, averaged
  7534. across all probes, than those with Type 1 diabetes.
  7535. This is consistent with the consensus that type 2 disbetes and the associated
  7536. metabolic syndrome represent a broad dysregulation of the body's endocrine
  7537. signalling related to metabolism [citation needed].
  7538. This dysregulation could easily manifest as a greater degree of variation
  7539. in the DNA methylation patterns of affected tissues.
  7540. In contrast, Type 1 disbetes has a more specific cause and effect, so a
  7541. less variable methylation signature is expected.
  7542. \end_layout
  7543. \begin_layout Standard
  7544. This preliminary anlaysis suggests that some degree of differential methylation
  7545. exists between TX and each of the three types of transplant disfunction
  7546. studied.
  7547. Hence, it may be feasible to train a classifier to diagnose transplant
  7548. disfunction from DNA methylation array data.
  7549. However, the major importance of both SVA and sample quality weighting
  7550. for proper modeling of this data poses significant challenges for any attempt
  7551. at a machine learning on data of similar quality.
  7552. While these are easily used in a modeling context with full sample information,
  7553. neither of these methods is directly applicable in a machine learning context,
  7554. where the diagnosis is not known ahead of time.
  7555. If a machine learning approach for methylation-based diagnosis is to be
  7556. pursued, it will either require machine-learning-friendly methods to address
  7557. the same systematic trends in the data that SVA and sample quality weighting
  7558. address, or it will require higher quality data with substantially less
  7559. systematic perturbation of the data.
  7560. \end_layout
  7561. \begin_layout Chapter
  7562. Globin-blocking for more effective blood RNA-seq analysis in primate animal
  7563. model
  7564. \end_layout
  7565. \begin_layout Standard
  7566. \begin_inset Flex TODO Note (inline)
  7567. status open
  7568. \begin_layout Plain Layout
  7569. Choose between above and the paper title: Optimizing yield of deep RNA sequencin
  7570. g for gene expression profiling by globin reduction of peripheral blood
  7571. samples from cynomolgus monkeys (Macaca fascicularis).
  7572. \end_layout
  7573. \end_inset
  7574. \end_layout
  7575. \begin_layout Standard
  7576. \begin_inset Flex TODO Note (inline)
  7577. status open
  7578. \begin_layout Plain Layout
  7579. Chapter author list: https://tex.stackexchange.com/questions/156862/displaying-aut
  7580. hor-for-each-chapter-in-book Every chapter gets an author list, which may
  7581. or may not be part of a citation to a published/preprinted paper.
  7582. \end_layout
  7583. \end_inset
  7584. \end_layout
  7585. \begin_layout Standard
  7586. \begin_inset Flex TODO Note (inline)
  7587. status open
  7588. \begin_layout Plain Layout
  7589. Preprint then cite the paper
  7590. \end_layout
  7591. \end_inset
  7592. \end_layout
  7593. \begin_layout Section*
  7594. Abstract
  7595. \end_layout
  7596. \begin_layout Paragraph
  7597. Background
  7598. \end_layout
  7599. \begin_layout Standard
  7600. Primate blood contains high concentrations of globin messenger RNA.
  7601. Globin reduction is a standard technique used to improve the expression
  7602. results obtained by DNA microarrays on RNA from blood samples.
  7603. However, with whole transcriptome RNA-sequencing (RNA-seq) quickly replacing
  7604. microarrays for many applications, the impact of globin reduction for RNA-seq
  7605. has not been previously studied.
  7606. Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
  7607. primates.
  7608. \end_layout
  7609. \begin_layout Paragraph
  7610. Results
  7611. \end_layout
  7612. \begin_layout Standard
  7613. Here we report a protocol for RNA-seq in primate blood samples that uses
  7614. complimentary oligonucleotides to block reverse transcription of the alpha
  7615. and beta globin genes.
  7616. In test samples from cynomolgus monkeys (Macaca fascicularis), this globin
  7617. blocking protocol approximately doubles the yield of informative (non-globin)
  7618. reads by greatly reducing the fraction of globin reads, while also improving
  7619. the consistency in sequencing depth between samples.
  7620. The increased yield enables detection of about 2000 more genes, significantly
  7621. increases the correlation in measured gene expression levels between samples,
  7622. and increases the sensitivity of differential gene expression tests.
  7623. \end_layout
  7624. \begin_layout Paragraph
  7625. Conclusions
  7626. \end_layout
  7627. \begin_layout Standard
  7628. These results show that globin blocking significantly improves the cost-effectiv
  7629. eness of mRNA sequencing in primate blood samples by doubling the yield
  7630. of useful reads, allowing detection of more genes, and improving the precision
  7631. of gene expression measurements.
  7632. Based on these results, a globin reducing or blocking protocol is recommended
  7633. for all RNA-seq studies of primate blood samples.
  7634. \end_layout
  7635. \begin_layout Section
  7636. Approach
  7637. \end_layout
  7638. \begin_layout Standard
  7639. \begin_inset Note Note
  7640. status open
  7641. \begin_layout Plain Layout
  7642. Consider putting some of this in the Intro chapter
  7643. \end_layout
  7644. \begin_layout Itemize
  7645. Cynomolgus monkeys as a model organism
  7646. \end_layout
  7647. \begin_deeper
  7648. \begin_layout Itemize
  7649. Highly related to humans
  7650. \end_layout
  7651. \begin_layout Itemize
  7652. Small size and short life cycle - good research animal
  7653. \end_layout
  7654. \begin_layout Itemize
  7655. Genomics resources still in development
  7656. \end_layout
  7657. \end_deeper
  7658. \begin_layout Itemize
  7659. Inadequacy of existing blood RNA-seq protocols
  7660. \end_layout
  7661. \begin_deeper
  7662. \begin_layout Itemize
  7663. Existing protocols use a separate globin pulldown step, slowing down processing
  7664. \end_layout
  7665. \end_deeper
  7666. \end_inset
  7667. \end_layout
  7668. \begin_layout Standard
  7669. Increasingly, researchers are turning to high-throughput mRNA sequencing
  7670. technologies (RNA-seq) in preference to expression microarrays for analysis
  7671. of gene expression
  7672. \begin_inset CommandInset citation
  7673. LatexCommand cite
  7674. key "Mutz2012"
  7675. literal "false"
  7676. \end_inset
  7677. .
  7678. The advantages are even greater for study of model organisms with no well-estab
  7679. lished array platforms available, such as the cynomolgus monkey (Macaca
  7680. fascicularis).
  7681. High fractions of globin mRNA are naturally present in mammalian peripheral
  7682. blood samples (up to 70% of total mRNA) and these are known to interfere
  7683. with the results of array-based expression profiling
  7684. \begin_inset CommandInset citation
  7685. LatexCommand cite
  7686. key "Winn2010"
  7687. literal "false"
  7688. \end_inset
  7689. .
  7690. The importance of globin reduction for RNA-seq of blood has only been evaluated
  7691. for a deepSAGE protocol on human samples
  7692. \begin_inset CommandInset citation
  7693. LatexCommand cite
  7694. key "Mastrokolias2012"
  7695. literal "false"
  7696. \end_inset
  7697. .
  7698. In the present report, we evaluated globin reduction using custom blocking
  7699. oligonucleotides for deep RNA-seq of peripheral blood samples from a nonhuman
  7700. primate, cynomolgus monkey, using the Illumina technology platform.
  7701. We demonstrate that globin reduction significantly improves the cost-effectiven
  7702. ess of RNA-seq in blood samples.
  7703. Thus, our protocol offers a significant advantage to any investigator planning
  7704. to use RNA-seq for gene expression profiling of nonhuman primate blood
  7705. samples.
  7706. Our method can be generally applied to any species by designing complementary
  7707. oligonucleotide blocking probes to the globin gene sequences of that species.
  7708. Indeed, any highly expressed but biologically uninformative transcripts
  7709. can also be blocked to further increase sequencing efficiency and value
  7710. \begin_inset CommandInset citation
  7711. LatexCommand cite
  7712. key "Arnaud2016"
  7713. literal "false"
  7714. \end_inset
  7715. .
  7716. \end_layout
  7717. \begin_layout Section
  7718. Methods
  7719. \end_layout
  7720. \begin_layout Subsection
  7721. Sample collection
  7722. \end_layout
  7723. \begin_layout Standard
  7724. All research reported here was done under IACUC-approved protocols at the
  7725. University of Miami and complied with all applicable federal and state
  7726. regulations and ethical principles for nonhuman primate research.
  7727. Blood draws occurred between 16 April 2012 and 18 June 2015.
  7728. The experimental system involved intrahepatic pancreatic islet transplantation
  7729. into Cynomolgus monkeys with induced diabetes mellitus with or without
  7730. concomitant infusion of mesenchymal stem cells.
  7731. Blood was collected at serial time points before and after transplantation
  7732. into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
  7733. precise volume:volume ratio of 2.5 ml whole blood into 6.9 ml of PAX gene
  7734. additive.
  7735. \end_layout
  7736. \begin_layout Subsection
  7737. Globin Blocking
  7738. \end_layout
  7739. \begin_layout Standard
  7740. Four oligonucleotides were designed to hybridize to the 3’ end of the transcript
  7741. s for Cynomolgus HBA1, HBA2 and HBB, with two hybridization sites for HBB
  7742. and 2 sites for HBA (the chosen sites were identical in both HBA genes).
  7743. All oligos were purchased from Sigma and were entirely composed of 2’O-Me
  7744. bases with a C3 spacer positioned at the 3’ ends to prevent any polymerase
  7745. mediated primer extension.
  7746. \end_layout
  7747. \begin_layout Quote
  7748. HBA1/2 site 1: GCCCACUCAGACUUUAUUCAAAG-C3spacer
  7749. \end_layout
  7750. \begin_layout Quote
  7751. HBA1/2 site 2: GGUGCAAGGAGGGGAGGAG-C3spacer
  7752. \end_layout
  7753. \begin_layout Quote
  7754. HBB site 1: AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
  7755. \end_layout
  7756. \begin_layout Quote
  7757. HBB site 2: CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
  7758. \end_layout
  7759. \begin_layout Subsection
  7760. RNA-seq Library Preparation
  7761. \end_layout
  7762. \begin_layout Standard
  7763. Sequencing libraries were prepared with 200ng total RNA from each sample.
  7764. Polyadenylated mRNA was selected from 200 ng aliquots of cynomologus blood-deri
  7765. ved total RNA using Ambion Dynabeads Oligo(dT)25 beads (Invitrogen) following
  7766. manufacturer’s recommended protocol.
  7767. PolyA selected RNA was then combined with 8 pmol of HBA1/2 (site 1), 8
  7768. pmol of HBA1/2 (site 2), 12 pmol of HBB (site 1) and 12 pmol of HBB (site
  7769. 2) oligonucleotides.
  7770. In addition, 20 pmol of RT primer containing a portion of the Illumina
  7771. adapter sequence (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV)
  7772. and 4 µL of 5X First Strand buffer (250 mM Tris-HCl pH 8.3, 375 mM KCl,
  7773. 15mM MgCl2) were added in a total volume of 15 µL.
  7774. The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
  7775. then placed on ice.
  7776. This was followed by the addition of 2 µL 0.1 M DTT, 1 µL RNaseOUT, 1 µL
  7777. 10mM dNTPs 10% biotin-16 aminoallyl-2’- dUTP and 10% biotin-16 aminoallyl-2’-
  7778. dCTP (TriLink Biotech, San Diego, CA), 1 µL Superscript II (200U/ µL, Thermo-Fi
  7779. sher).
  7780. A second “unblocked” library was prepared in the same way for each sample
  7781. but replacing the blocking oligos with an equivalent volume of water.
  7782. The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
  7783. followed by incubation at 75°C for 10 minutes to inactivate the reverse
  7784. transcriptase.
  7785. \end_layout
  7786. \begin_layout Standard
  7787. The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
  7788. ) following supplier’s recommended protocol.
  7789. The cDNA/RNA hybrid was eluted in 25 µL of 10 mM Tris-HCl pH 8.0, and then
  7790. bound to 25 µL of M280 Magnetic Streptavidin beads washed per recommended
  7791. protocol (Thermo-Fisher).
  7792. After 30 minutes of binding, beads were washed one time in 100 µL 0.1N NaOH
  7793. to denature and remove the bound RNA, followed by two 100 µL washes with
  7794. 1X TE buffer.
  7795. \end_layout
  7796. \begin_layout Standard
  7797. Subsequent attachment of the 5-prime Illumina A adapter was performed by
  7798. on-bead random primer extension of the following sequence (A-N8 primer:
  7799. TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN).
  7800. Briefly, beads were resuspended in a 20 µL reaction containing 5 µM A-N8
  7801. primer, 40mM Tris-HCl pH 7.5, 20mM MgCl2, 50mM NaCl, 0.325U/µL Sequenase
  7802. 2.0 (Affymetrix, Santa Clara, CA), 0.0025U/µL inorganic pyrophosphatase (Affymetr
  7803. ix) and 300 µM each dNTP.
  7804. Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
  7805. times with 1X TE buffer (200µL).
  7806. \end_layout
  7807. \begin_layout Standard
  7808. The magnetic streptavidin beads were resuspended in 34 µL nuclease-free
  7809. water and added directly to a PCR tube.
  7810. The two Illumina protocol-specified PCR primers were added at 0.53 µM (Illumina
  7811. TruSeq Universal Primer 1 and Illumina TruSeq barcoded PCR primer 2), along
  7812. with 40 µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycl
  7813. ed as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
  7814. 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
  7815. \end_layout
  7816. \begin_layout Standard
  7817. PCR products were purified with 1X Ampure Beads following manufacturer’s
  7818. recommended protocol.
  7819. Libraries were then analyzed using the Agilent TapeStation and quantitation
  7820. of desired size range was performed by “smear analysis”.
  7821. Samples were pooled in equimolar batches of 16 samples.
  7822. Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
  7823. Gels; Thermo-Fisher).
  7824. Products were cut between 250 and 350 bp (corresponding to insert sizes
  7825. of 130 to 230 bps).
  7826. Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
  7827. t with 75 base read lengths.
  7828. \end_layout
  7829. \begin_layout Subsection
  7830. Read alignment and counting
  7831. \end_layout
  7832. \begin_layout Standard
  7833. Reads were aligned to the cynomolgus genome using STAR
  7834. \begin_inset CommandInset citation
  7835. LatexCommand cite
  7836. key "Dobin2013,Wilson2013"
  7837. literal "false"
  7838. \end_inset
  7839. .
  7840. Counts of uniquely mapped reads were obtained for every gene in each sample
  7841. with the “featureCounts” function from the Rsubread package, using each
  7842. of the three possibilities for the “strandSpecific” option: sense, antisense,
  7843. and unstranded
  7844. \begin_inset CommandInset citation
  7845. LatexCommand cite
  7846. key "Liao2014"
  7847. literal "false"
  7848. \end_inset
  7849. .
  7850. A few artifacts in the cynomolgus genome annotation complicated read counting.
  7851. First, no ortholog is annotated for alpha globin in the cynomolgus genome,
  7852. presumably because the human genome has two alpha globin genes with nearly
  7853. identical sequences, making the orthology relationship ambiguous.
  7854. However, two loci in the cynomolgus genome are as “hemoglobin subunit alpha-lik
  7855. e” (LOC102136192 and LOC102136846).
  7856. LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
  7857. as protein-coding.
  7858. Our globin reduction protocol was designed to include blocking of these
  7859. two genes.
  7860. Indeed, these two genes have almost the same read counts in each library
  7861. as the properly-annotated HBB gene and much larger counts than any other
  7862. gene in the unblocked libraries, giving confidence that reads derived from
  7863. the real alpha globin are mapping to both genes.
  7864. Thus, reads from both of these loci were counted as alpha globin reads
  7865. in all further analyses.
  7866. The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
  7867. 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
  7868. If counting is not performed in stranded mode (or if a non-strand-specific
  7869. sequencing protocol is used), many reads mapping to the globin gene will
  7870. be discarded as ambiguous due to their overlap with this ncRNA gene, resulting
  7871. in significant undercounting of globin reads.
  7872. Therefore, stranded sense counts were used for all further analysis in
  7873. the present study to insure that we accurately accounted for globin transcript
  7874. reduction.
  7875. However, we note that stranded reads are not necessary for RNA-seq using
  7876. our protocol in standard practice.
  7877. \end_layout
  7878. \begin_layout Subsection
  7879. Normalization and Exploratory Data Analysis
  7880. \end_layout
  7881. \begin_layout Standard
  7882. Libraries were normalized by computing scaling factors using the edgeR package’s
  7883. Trimmed Mean of M-values method
  7884. \begin_inset CommandInset citation
  7885. LatexCommand cite
  7886. key "Robinson2010"
  7887. literal "false"
  7888. \end_inset
  7889. .
  7890. Log2 counts per million values (logCPM) were calculated using the cpm function
  7891. in edgeR for individual samples and aveLogCPM function for averages across
  7892. groups of samples, using those functions’ default prior count values to
  7893. avoid taking the logarithm of 0.
  7894. Genes were considered “present” if their average normalized logCPM values
  7895. across all libraries were at least -1.
  7896. Normalizing for gene length was unnecessary because the sequencing protocol
  7897. is 3’-biased and hence the expected read count for each gene is related
  7898. to the transcript’s copy number but not its length.
  7899. \end_layout
  7900. \begin_layout Standard
  7901. In order to assess the effect of blocking on reproducibility, Pearson and
  7902. Spearman correlation coefficients were computed between the logCPM values
  7903. for every pair of libraries within the globin-blocked (GB) and unblocked
  7904. (non-GB) groups, and edgeR's “estimateDisp” function was used to compute
  7905. negative binomial dispersions separately for the two groups
  7906. \begin_inset CommandInset citation
  7907. LatexCommand cite
  7908. key "Chen2014"
  7909. literal "false"
  7910. \end_inset
  7911. .
  7912. \end_layout
  7913. \begin_layout Subsection
  7914. Differential Expression Analysis
  7915. \end_layout
  7916. \begin_layout Standard
  7917. All tests for differential gene expression were performed using edgeR, by
  7918. first fitting a negative binomial generalized linear model to the counts
  7919. and normalization factors and then performing a quasi-likelihood F-test
  7920. with robust estimation of outlier gene dispersions
  7921. \begin_inset CommandInset citation
  7922. LatexCommand cite
  7923. key "Lund2012,Phipson2016"
  7924. literal "false"
  7925. \end_inset
  7926. .
  7927. To investigate the effects of globin blocking on each gene, an additive
  7928. model was fit to the full data with coefficients for globin blocking and
  7929. SampleID.
  7930. To test the effect of globin blocking on detection of differentially expressed
  7931. genes, the GB samples and non-GB samples were each analyzed independently
  7932. as follows: for each animal with both a pre-transplant and a post-transplant
  7933. time point in the data set, the pre-transplant sample and the earliest
  7934. post-transplant sample were selected, and all others were excluded, yielding
  7935. a pre-/post-transplant pair of samples for each animal (N=7 animals with
  7936. paired samples).
  7937. These samples were analyzed for pre-transplant vs.
  7938. post-transplant differential gene expression while controlling for inter-animal
  7939. variation using an additive model with coefficients for transplant and
  7940. animal ID.
  7941. In all analyses, p-values were adjusted using the Benjamini-Hochberg procedure
  7942. for FDR control
  7943. \begin_inset CommandInset citation
  7944. LatexCommand cite
  7945. key "Benjamini1995"
  7946. literal "false"
  7947. \end_inset
  7948. .
  7949. \end_layout
  7950. \begin_layout Standard
  7951. \begin_inset Note Note
  7952. status open
  7953. \begin_layout Itemize
  7954. New blood RNA-seq protocol to block reverse transcription of globin genes
  7955. \end_layout
  7956. \begin_layout Itemize
  7957. Blood RNA-seq time course after transplants with/without MSC infusion
  7958. \end_layout
  7959. \end_inset
  7960. \end_layout
  7961. \begin_layout Section
  7962. Results
  7963. \end_layout
  7964. \begin_layout Subsection
  7965. Globin blocking yields a larger and more consistent fraction of useful reads
  7966. \end_layout
  7967. \begin_layout Standard
  7968. \begin_inset ERT
  7969. status open
  7970. \begin_layout Plain Layout
  7971. \backslash
  7972. afterpage{
  7973. \end_layout
  7974. \begin_layout Plain Layout
  7975. \backslash
  7976. begin{landscape}
  7977. \end_layout
  7978. \end_inset
  7979. \end_layout
  7980. \begin_layout Standard
  7981. \begin_inset Float table
  7982. placement p
  7983. wide false
  7984. sideways false
  7985. status collapsed
  7986. \begin_layout Plain Layout
  7987. \align center
  7988. \begin_inset Tabular
  7989. <lyxtabular version="3" rows="4" columns="7">
  7990. <features tabularvalignment="middle">
  7991. <column alignment="center" valignment="top">
  7992. <column alignment="center" valignment="top">
  7993. <column alignment="center" valignment="top">
  7994. <column alignment="center" valignment="top">
  7995. <column alignment="center" valignment="top">
  7996. <column alignment="center" valignment="top">
  7997. <column alignment="center" valignment="top">
  7998. <row>
  7999. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8000. \begin_inset Text
  8001. \begin_layout Plain Layout
  8002. \end_layout
  8003. \end_inset
  8004. </cell>
  8005. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8006. \begin_inset Text
  8007. \begin_layout Plain Layout
  8008. \family roman
  8009. \series medium
  8010. \shape up
  8011. \size normal
  8012. \emph off
  8013. \bar no
  8014. \strikeout off
  8015. \xout off
  8016. \uuline off
  8017. \uwave off
  8018. \noun off
  8019. \color none
  8020. Percent of Total Reads
  8021. \end_layout
  8022. \end_inset
  8023. </cell>
  8024. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8025. \begin_inset Text
  8026. \begin_layout Plain Layout
  8027. \end_layout
  8028. \end_inset
  8029. </cell>
  8030. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8031. \begin_inset Text
  8032. \begin_layout Plain Layout
  8033. \end_layout
  8034. \end_inset
  8035. </cell>
  8036. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8037. \begin_inset Text
  8038. \begin_layout Plain Layout
  8039. \end_layout
  8040. \end_inset
  8041. </cell>
  8042. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  8043. \begin_inset Text
  8044. \begin_layout Plain Layout
  8045. \family roman
  8046. \series medium
  8047. \shape up
  8048. \size normal
  8049. \emph off
  8050. \bar no
  8051. \strikeout off
  8052. \xout off
  8053. \uuline off
  8054. \uwave off
  8055. \noun off
  8056. \color none
  8057. Percent of Genic Reads
  8058. \end_layout
  8059. \end_inset
  8060. </cell>
  8061. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  8062. \begin_inset Text
  8063. \begin_layout Plain Layout
  8064. \end_layout
  8065. \end_inset
  8066. </cell>
  8067. </row>
  8068. <row>
  8069. <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
  8070. \begin_inset Text
  8071. \begin_layout Plain Layout
  8072. GB
  8073. \end_layout
  8074. \end_inset
  8075. </cell>
  8076. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8077. \begin_inset Text
  8078. \begin_layout Plain Layout
  8079. \family roman
  8080. \series medium
  8081. \shape up
  8082. \size normal
  8083. \emph off
  8084. \bar no
  8085. \strikeout off
  8086. \xout off
  8087. \uuline off
  8088. \uwave off
  8089. \noun off
  8090. \color none
  8091. Non-globin Reads
  8092. \end_layout
  8093. \end_inset
  8094. </cell>
  8095. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8096. \begin_inset Text
  8097. \begin_layout Plain Layout
  8098. \family roman
  8099. \series medium
  8100. \shape up
  8101. \size normal
  8102. \emph off
  8103. \bar no
  8104. \strikeout off
  8105. \xout off
  8106. \uuline off
  8107. \uwave off
  8108. \noun off
  8109. \color none
  8110. Globin Reads
  8111. \end_layout
  8112. \end_inset
  8113. </cell>
  8114. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8115. \begin_inset Text
  8116. \begin_layout Plain Layout
  8117. \family roman
  8118. \series medium
  8119. \shape up
  8120. \size normal
  8121. \emph off
  8122. \bar no
  8123. \strikeout off
  8124. \xout off
  8125. \uuline off
  8126. \uwave off
  8127. \noun off
  8128. \color none
  8129. All Genic Reads
  8130. \end_layout
  8131. \end_inset
  8132. </cell>
  8133. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8134. \begin_inset Text
  8135. \begin_layout Plain Layout
  8136. \family roman
  8137. \series medium
  8138. \shape up
  8139. \size normal
  8140. \emph off
  8141. \bar no
  8142. \strikeout off
  8143. \xout off
  8144. \uuline off
  8145. \uwave off
  8146. \noun off
  8147. \color none
  8148. All Aligned Reads
  8149. \end_layout
  8150. \end_inset
  8151. </cell>
  8152. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8153. \begin_inset Text
  8154. \begin_layout Plain Layout
  8155. \family roman
  8156. \series medium
  8157. \shape up
  8158. \size normal
  8159. \emph off
  8160. \bar no
  8161. \strikeout off
  8162. \xout off
  8163. \uuline off
  8164. \uwave off
  8165. \noun off
  8166. \color none
  8167. Non-globin Reads
  8168. \end_layout
  8169. \end_inset
  8170. </cell>
  8171. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  8172. \begin_inset Text
  8173. \begin_layout Plain Layout
  8174. \family roman
  8175. \series medium
  8176. \shape up
  8177. \size normal
  8178. \emph off
  8179. \bar no
  8180. \strikeout off
  8181. \xout off
  8182. \uuline off
  8183. \uwave off
  8184. \noun off
  8185. \color none
  8186. Globin Reads
  8187. \end_layout
  8188. \end_inset
  8189. </cell>
  8190. </row>
  8191. <row>
  8192. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8193. \begin_inset Text
  8194. \begin_layout Plain Layout
  8195. \family roman
  8196. \series medium
  8197. \shape up
  8198. \size normal
  8199. \emph off
  8200. \bar no
  8201. \strikeout off
  8202. \xout off
  8203. \uuline off
  8204. \uwave off
  8205. \noun off
  8206. \color none
  8207. Yes
  8208. \end_layout
  8209. \end_inset
  8210. </cell>
  8211. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8212. \begin_inset Text
  8213. \begin_layout Plain Layout
  8214. \family roman
  8215. \series medium
  8216. \shape up
  8217. \size normal
  8218. \emph off
  8219. \bar no
  8220. \strikeout off
  8221. \xout off
  8222. \uuline off
  8223. \uwave off
  8224. \noun off
  8225. \color none
  8226. 50.4% ± 6.82
  8227. \end_layout
  8228. \end_inset
  8229. </cell>
  8230. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8231. \begin_inset Text
  8232. \begin_layout Plain Layout
  8233. \family roman
  8234. \series medium
  8235. \shape up
  8236. \size normal
  8237. \emph off
  8238. \bar no
  8239. \strikeout off
  8240. \xout off
  8241. \uuline off
  8242. \uwave off
  8243. \noun off
  8244. \color none
  8245. 3.48% ± 2.94
  8246. \end_layout
  8247. \end_inset
  8248. </cell>
  8249. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8250. \begin_inset Text
  8251. \begin_layout Plain Layout
  8252. \family roman
  8253. \series medium
  8254. \shape up
  8255. \size normal
  8256. \emph off
  8257. \bar no
  8258. \strikeout off
  8259. \xout off
  8260. \uuline off
  8261. \uwave off
  8262. \noun off
  8263. \color none
  8264. 53.9% ± 6.81
  8265. \end_layout
  8266. \end_inset
  8267. </cell>
  8268. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8269. \begin_inset Text
  8270. \begin_layout Plain Layout
  8271. \family roman
  8272. \series medium
  8273. \shape up
  8274. \size normal
  8275. \emph off
  8276. \bar no
  8277. \strikeout off
  8278. \xout off
  8279. \uuline off
  8280. \uwave off
  8281. \noun off
  8282. \color none
  8283. 89.7% ± 2.40
  8284. \end_layout
  8285. \end_inset
  8286. </cell>
  8287. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8288. \begin_inset Text
  8289. \begin_layout Plain Layout
  8290. \family roman
  8291. \series medium
  8292. \shape up
  8293. \size normal
  8294. \emph off
  8295. \bar no
  8296. \strikeout off
  8297. \xout off
  8298. \uuline off
  8299. \uwave off
  8300. \noun off
  8301. \color none
  8302. 93.5% ± 5.25
  8303. \end_layout
  8304. \end_inset
  8305. </cell>
  8306. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  8307. \begin_inset Text
  8308. \begin_layout Plain Layout
  8309. \family roman
  8310. \series medium
  8311. \shape up
  8312. \size normal
  8313. \emph off
  8314. \bar no
  8315. \strikeout off
  8316. \xout off
  8317. \uuline off
  8318. \uwave off
  8319. \noun off
  8320. \color none
  8321. 6.49% ± 5.25
  8322. \end_layout
  8323. \end_inset
  8324. </cell>
  8325. </row>
  8326. <row>
  8327. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8328. \begin_inset Text
  8329. \begin_layout Plain Layout
  8330. \family roman
  8331. \series medium
  8332. \shape up
  8333. \size normal
  8334. \emph off
  8335. \bar no
  8336. \strikeout off
  8337. \xout off
  8338. \uuline off
  8339. \uwave off
  8340. \noun off
  8341. \color none
  8342. No
  8343. \end_layout
  8344. \end_inset
  8345. </cell>
  8346. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8347. \begin_inset Text
  8348. \begin_layout Plain Layout
  8349. \family roman
  8350. \series medium
  8351. \shape up
  8352. \size normal
  8353. \emph off
  8354. \bar no
  8355. \strikeout off
  8356. \xout off
  8357. \uuline off
  8358. \uwave off
  8359. \noun off
  8360. \color none
  8361. 26.3% ± 8.95
  8362. \end_layout
  8363. \end_inset
  8364. </cell>
  8365. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8366. \begin_inset Text
  8367. \begin_layout Plain Layout
  8368. \family roman
  8369. \series medium
  8370. \shape up
  8371. \size normal
  8372. \emph off
  8373. \bar no
  8374. \strikeout off
  8375. \xout off
  8376. \uuline off
  8377. \uwave off
  8378. \noun off
  8379. \color none
  8380. 44.6% ± 16.6
  8381. \end_layout
  8382. \end_inset
  8383. </cell>
  8384. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8385. \begin_inset Text
  8386. \begin_layout Plain Layout
  8387. \family roman
  8388. \series medium
  8389. \shape up
  8390. \size normal
  8391. \emph off
  8392. \bar no
  8393. \strikeout off
  8394. \xout off
  8395. \uuline off
  8396. \uwave off
  8397. \noun off
  8398. \color none
  8399. 70.1% ± 9.38
  8400. \end_layout
  8401. \end_inset
  8402. </cell>
  8403. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8404. \begin_inset Text
  8405. \begin_layout Plain Layout
  8406. \family roman
  8407. \series medium
  8408. \shape up
  8409. \size normal
  8410. \emph off
  8411. \bar no
  8412. \strikeout off
  8413. \xout off
  8414. \uuline off
  8415. \uwave off
  8416. \noun off
  8417. \color none
  8418. 90.7% ± 5.16
  8419. \end_layout
  8420. \end_inset
  8421. </cell>
  8422. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8423. \begin_inset Text
  8424. \begin_layout Plain Layout
  8425. \family roman
  8426. \series medium
  8427. \shape up
  8428. \size normal
  8429. \emph off
  8430. \bar no
  8431. \strikeout off
  8432. \xout off
  8433. \uuline off
  8434. \uwave off
  8435. \noun off
  8436. \color none
  8437. 38.8% ± 17.1
  8438. \end_layout
  8439. \end_inset
  8440. </cell>
  8441. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  8442. \begin_inset Text
  8443. \begin_layout Plain Layout
  8444. \family roman
  8445. \series medium
  8446. \shape up
  8447. \size normal
  8448. \emph off
  8449. \bar no
  8450. \strikeout off
  8451. \xout off
  8452. \uuline off
  8453. \uwave off
  8454. \noun off
  8455. \color none
  8456. 61.2% ± 17.1
  8457. \end_layout
  8458. \end_inset
  8459. </cell>
  8460. </row>
  8461. </lyxtabular>
  8462. \end_inset
  8463. \end_layout
  8464. \begin_layout Plain Layout
  8465. \begin_inset Caption Standard
  8466. \begin_layout Plain Layout
  8467. \series bold
  8468. \begin_inset Argument 1
  8469. status collapsed
  8470. \begin_layout Plain Layout
  8471. Fractions of reads mapping to genomic features in GB and non-GB samples.
  8472. \end_layout
  8473. \end_inset
  8474. \begin_inset CommandInset label
  8475. LatexCommand label
  8476. name "tab:Fractions-of-reads"
  8477. \end_inset
  8478. Fractions of reads mapping to genomic features in GB and non-GB samples.
  8479. \series default
  8480. All values are given as mean ± standard deviation.
  8481. \end_layout
  8482. \end_inset
  8483. \end_layout
  8484. \end_inset
  8485. \end_layout
  8486. \begin_layout Standard
  8487. \begin_inset ERT
  8488. status open
  8489. \begin_layout Plain Layout
  8490. \backslash
  8491. end{landscape}
  8492. \end_layout
  8493. \begin_layout Plain Layout
  8494. }
  8495. \end_layout
  8496. \end_inset
  8497. \end_layout
  8498. \begin_layout Standard
  8499. The objective of the present study was to validate a new protocol for deep
  8500. RNA-seq of whole blood drawn into PaxGene tubes from cynomolgus monkeys
  8501. undergoing islet transplantation, with particular focus on minimizing the
  8502. loss of useful sequencing space to uninformative globin reads.
  8503. The details of the analysis with respect to transplant outcomes and the
  8504. impact of mesenchymal stem cell treatment will be reported in a separate
  8505. manuscript (in preparation).
  8506. To focus on the efficacy of our globin blocking protocol, 37 blood samples,
  8507. 16 from pre-transplant and 21 from post-transplant time points, were each
  8508. prepped once with and once without globin blocking oligos, and were then
  8509. sequenced on an Illumina NextSeq500 instrument.
  8510. The number of reads aligning to each gene in the cynomolgus genome was
  8511. counted.
  8512. Table 1 summarizes the distribution of read fractions among the GB and
  8513. non-GB libraries.
  8514. In the libraries with no globin blocking, globin reads made up an average
  8515. of 44.6% of total input reads, while reads assigned to all other genes made
  8516. up an average of 26.3%.
  8517. The remaining reads either aligned to intergenic regions (that include
  8518. long non-coding RNAs) or did not align with any annotated transcripts in
  8519. the current build of the cynomolgus genome.
  8520. In the GB libraries, globin reads made up only 3.48% and reads assigned
  8521. to all other genes increased to 50.4%.
  8522. Thus, globin blocking resulted in a 92.2% reduction in globin reads and
  8523. a 91.6% increase in yield of useful non-globin reads.
  8524. \end_layout
  8525. \begin_layout Standard
  8526. This reduction is not quite as efficient as the previous analysis showed
  8527. for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
  8528. \begin_inset CommandInset citation
  8529. LatexCommand cite
  8530. key "Mastrokolias2012"
  8531. literal "false"
  8532. \end_inset
  8533. .
  8534. Nonetheless, this degree of globin reduction is sufficient to nearly double
  8535. the yield of useful reads.
  8536. Thus, globin blocking cuts the required sequencing effort (and costs) to
  8537. achieve a target coverage depth by almost 50%.
  8538. Consistent with this near doubling of yield, the average difference in
  8539. un-normalized logCPM across all genes between the GB libraries and non-GB
  8540. libraries is approximately 1 (mean = 1.01, median = 1.08), an overall 2-fold
  8541. increase.
  8542. Un-normalized values are used here because the TMM normalization correctly
  8543. identifies this 2-fold difference as biologically irrelevant and removes
  8544. it.
  8545. \end_layout
  8546. \begin_layout Standard
  8547. \begin_inset Float figure
  8548. wide false
  8549. sideways false
  8550. status collapsed
  8551. \begin_layout Plain Layout
  8552. \align center
  8553. \begin_inset Graphics
  8554. filename graphics/Globin Paper/figure1 - globin-fractions.pdf
  8555. lyxscale 50
  8556. width 75col%
  8557. \end_inset
  8558. \end_layout
  8559. \begin_layout Plain Layout
  8560. \begin_inset Caption Standard
  8561. \begin_layout Plain Layout
  8562. \series bold
  8563. \begin_inset Argument 1
  8564. status collapsed
  8565. \begin_layout Plain Layout
  8566. Fraction of genic reads in each sample aligned to non-globin genes, with
  8567. and without globin blocking (GB).
  8568. \end_layout
  8569. \end_inset
  8570. \begin_inset CommandInset label
  8571. LatexCommand label
  8572. name "fig:Fraction-of-genic-reads"
  8573. \end_inset
  8574. Fraction of genic reads in each sample aligned to non-globin genes, with
  8575. and without globin blocking (GB).
  8576. \series default
  8577. All reads in each sequencing library were aligned to the cyno genome, and
  8578. the number of reads uniquely aligning to each gene was counted.
  8579. For each sample, counts were summed separately for all globin genes and
  8580. for the remainder of the genes (non-globin genes), and the fraction of
  8581. genic reads aligned to non-globin genes was computed.
  8582. Each point represents an individual sample.
  8583. Gray + signs indicate the means for globin-blocked libraries and unblocked
  8584. libraries.
  8585. The overall distribution for each group is represented as a notched box
  8586. plots.
  8587. Points are randomly spread vertically to avoid excessive overlapping.
  8588. \end_layout
  8589. \end_inset
  8590. \end_layout
  8591. \end_inset
  8592. \end_layout
  8593. \begin_layout Standard
  8594. Another important aspect is that the standard deviations in Table
  8595. \begin_inset CommandInset ref
  8596. LatexCommand ref
  8597. reference "tab:Fractions-of-reads"
  8598. plural "false"
  8599. caps "false"
  8600. noprefix "false"
  8601. \end_inset
  8602. are uniformly smaller in the GB samples than the non-GB ones, indicating
  8603. much greater consistency of yield.
  8604. This is best seen in the percentage of non-globin reads as a fraction of
  8605. total reads aligned to annotated genes (genic reads).
  8606. For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
  8607. the GB samples it ranges from 81.9% to 99.9% (Figure
  8608. \begin_inset CommandInset ref
  8609. LatexCommand ref
  8610. reference "fig:Fraction-of-genic-reads"
  8611. plural "false"
  8612. caps "false"
  8613. noprefix "false"
  8614. \end_inset
  8615. ).
  8616. This means that for applications where it is critical that each sample
  8617. achieve a specified minimum coverage in order to provide useful information,
  8618. it would be necessary to budget up to 10 times the sequencing depth per
  8619. sample without globin blocking, even though the average yield improvement
  8620. for globin blocking is only 2-fold, because every sample has a chance of
  8621. being 90% globin and 10% useful reads.
  8622. Hence, the more consistent behavior of GB samples makes planning an experiment
  8623. easier and more efficient because it eliminates the need to over-sequence
  8624. every sample in order to guard against the worst case of a high-globin
  8625. fraction.
  8626. \end_layout
  8627. \begin_layout Subsection
  8628. Globin blocking lowers the noise floor and allows detection of about 2000
  8629. more low-expression genes
  8630. \end_layout
  8631. \begin_layout Standard
  8632. \begin_inset Flex TODO Note (inline)
  8633. status open
  8634. \begin_layout Plain Layout
  8635. Remove redundant titles from figures
  8636. \end_layout
  8637. \end_inset
  8638. \end_layout
  8639. \begin_layout Standard
  8640. \begin_inset Float figure
  8641. wide false
  8642. sideways false
  8643. status collapsed
  8644. \begin_layout Plain Layout
  8645. \align center
  8646. \begin_inset Graphics
  8647. filename graphics/Globin Paper/figure2 - aveLogCPM-colored.pdf
  8648. lyxscale 50
  8649. height 60theight%
  8650. \end_inset
  8651. \end_layout
  8652. \begin_layout Plain Layout
  8653. \begin_inset Caption Standard
  8654. \begin_layout Plain Layout
  8655. \series bold
  8656. \begin_inset Argument 1
  8657. status collapsed
  8658. \begin_layout Plain Layout
  8659. Distributions of average group gene abundances when normalized separately
  8660. or together.
  8661. \end_layout
  8662. \end_inset
  8663. \begin_inset CommandInset label
  8664. LatexCommand label
  8665. name "fig:logcpm-dists"
  8666. \end_inset
  8667. Distributions of average group gene abundances when normalized separately
  8668. or together.
  8669. \series default
  8670. All reads in each sequencing library were aligned to the cyno genome, and
  8671. the number of reads uniquely aligning to each gene was counted.
  8672. Genes with zero counts in all libraries were discarded.
  8673. Libraries were normalized using the TMM method.
  8674. Libraries were split into globin-blocked (GB) and non-GB groups and the
  8675. average abundance for each gene in both groups, measured in log2 counts
  8676. per million reads counted, was computed using the aveLogCPM function.
  8677. The distribution of average gene logCPM values was plotted for both groups
  8678. using a kernel density plot to approximate a continuous distribution.
  8679. The logCPM GB distributions are marked in red, non-GB in blue.
  8680. The black vertical line denotes the chosen detection threshold of -1.
  8681. Top panel: Libraries were split into GB and non-GB groups first and normalized
  8682. separately.
  8683. Bottom panel: Libraries were all normalized together first and then split
  8684. into groups.
  8685. \end_layout
  8686. \end_inset
  8687. \end_layout
  8688. \begin_layout Plain Layout
  8689. \end_layout
  8690. \end_inset
  8691. \end_layout
  8692. \begin_layout Standard
  8693. Since globin blocking yields more usable sequencing depth, it should also
  8694. allow detection of more genes at any given threshold.
  8695. When we looked at the distribution of average normalized logCPM values
  8696. across all libraries for genes with at least one read assigned to them,
  8697. we observed the expected bimodal distribution, with a high-abundance "signal"
  8698. peak representing detected genes and a low-abundance "noise" peak representing
  8699. genes whose read count did not rise above the noise floor (Figure
  8700. \begin_inset CommandInset ref
  8701. LatexCommand ref
  8702. reference "fig:logcpm-dists"
  8703. plural "false"
  8704. caps "false"
  8705. noprefix "false"
  8706. \end_inset
  8707. ).
  8708. Consistent with the 2-fold increase in raw counts assigned to non-globin
  8709. genes, the signal peak for GB samples is shifted to the right relative
  8710. to the non-GB signal peak.
  8711. When all the samples are normalized together, this difference is normalized
  8712. out, lining up the signal peaks, and this reveals that, as expected, the
  8713. noise floor for the GB samples is about 2-fold lower.
  8714. This greater separation between signal and noise peaks in the GB samples
  8715. means that low-expression genes should be more easily detected and more
  8716. precisely quantified than in the non-GB samples.
  8717. \end_layout
  8718. \begin_layout Standard
  8719. \begin_inset Float figure
  8720. wide false
  8721. sideways false
  8722. status collapsed
  8723. \begin_layout Plain Layout
  8724. \align center
  8725. \begin_inset Graphics
  8726. filename graphics/Globin Paper/figure3 - detection.pdf
  8727. lyxscale 50
  8728. width 70col%
  8729. \end_inset
  8730. \end_layout
  8731. \begin_layout Plain Layout
  8732. \begin_inset Caption Standard
  8733. \begin_layout Plain Layout
  8734. \series bold
  8735. \begin_inset Argument 1
  8736. status collapsed
  8737. \begin_layout Plain Layout
  8738. Gene detections as a function of abundance thresholds in globin-blocked
  8739. (GB) and non-GB samples.
  8740. \end_layout
  8741. \end_inset
  8742. \begin_inset CommandInset label
  8743. LatexCommand label
  8744. name "fig:Gene-detections"
  8745. \end_inset
  8746. Gene detections as a function of abundance thresholds in globin-blocked
  8747. (GB) and non-GB samples.
  8748. \series default
  8749. Average abundance (logCPM,
  8750. \begin_inset Formula $\log_{2}$
  8751. \end_inset
  8752. counts per million reads counted) was computed by separate group normalization
  8753. as described in Figure
  8754. \begin_inset CommandInset ref
  8755. LatexCommand ref
  8756. reference "fig:logcpm-dists"
  8757. plural "false"
  8758. caps "false"
  8759. noprefix "false"
  8760. \end_inset
  8761. for both the GB and non-GB groups, as well as for all samples considered
  8762. as one large group.
  8763. For each every integer threshold from -2 to 3, the number of genes detected
  8764. at or above that logCPM threshold was plotted for each group.
  8765. \end_layout
  8766. \end_inset
  8767. \end_layout
  8768. \begin_layout Plain Layout
  8769. \end_layout
  8770. \end_inset
  8771. \end_layout
  8772. \begin_layout Standard
  8773. Based on these distributions, we selected a detection threshold of -1, which
  8774. is approximately the leftmost edge of the trough between the signal and
  8775. noise peaks.
  8776. This represents the most liberal possible detection threshold that doesn't
  8777. call substantial numbers of noise genes as detected.
  8778. Among the full dataset, 13429 genes were detected at this threshold, and
  8779. 22276 were not.
  8780. When considering the GB libraries and non-GB libraries separately and re-comput
  8781. ing normalization factors independently within each group, 14535 genes were
  8782. detected in the GB libraries while only 12460 were detected in the non-GB
  8783. libraries.
  8784. Thus, GB allowed the detection of 2000 extra genes that were buried under
  8785. the noise floor without GB.
  8786. This pattern of at least 2000 additional genes detected with GB was also
  8787. consistent across a wide range of possible detection thresholds, from -2
  8788. to 3 (see Figure
  8789. \begin_inset CommandInset ref
  8790. LatexCommand ref
  8791. reference "fig:Gene-detections"
  8792. plural "false"
  8793. caps "false"
  8794. noprefix "false"
  8795. \end_inset
  8796. ).
  8797. \end_layout
  8798. \begin_layout Subsection
  8799. Globin blocking does not add significant additional noise or decrease sample
  8800. quality
  8801. \end_layout
  8802. \begin_layout Standard
  8803. One potential worry is that the globin blocking protocol could perturb the
  8804. levels of non-globin genes.
  8805. There are two kinds of possible perturbations: systematic and random.
  8806. The former is not a major concern for detection of differential expression,
  8807. since a 2-fold change in every sample has no effect on the relative fold
  8808. change between samples.
  8809. In contrast, random perturbations would increase the noise and obscure
  8810. the signal in the dataset, reducing the capacity to detect differential
  8811. expression.
  8812. \end_layout
  8813. \begin_layout Standard
  8814. \begin_inset Float figure
  8815. wide false
  8816. sideways false
  8817. status collapsed
  8818. \begin_layout Plain Layout
  8819. \align center
  8820. \begin_inset Graphics
  8821. filename graphics/Globin Paper/figure4 - maplot-colored.pdf
  8822. lyxscale 50
  8823. width 60col%
  8824. groupId colwidth
  8825. \end_inset
  8826. \end_layout
  8827. \begin_layout Plain Layout
  8828. \begin_inset Caption Standard
  8829. \begin_layout Plain Layout
  8830. \begin_inset Argument 1
  8831. status collapsed
  8832. \begin_layout Plain Layout
  8833. MA plot showing effects of globin blocking on each gene's abundance.
  8834. \end_layout
  8835. \end_inset
  8836. \begin_inset CommandInset label
  8837. LatexCommand label
  8838. name "fig:MA-plot"
  8839. \end_inset
  8840. \series bold
  8841. MA plot showing effects of globin blocking on each gene's abundance.
  8842. \series default
  8843. All libraries were normalized together as described in Figure
  8844. \begin_inset CommandInset ref
  8845. LatexCommand ref
  8846. reference "fig:logcpm-dists"
  8847. plural "false"
  8848. caps "false"
  8849. noprefix "false"
  8850. \end_inset
  8851. , and genes with an average logCPM below -1 were filtered out.
  8852. Each remaining gene was tested for differential abundance with respect
  8853. to globin blocking (GB) using edgeR’s quasi-likelihod F-test, fitting a
  8854. negative binomial generalized linear model to table of read counts in each
  8855. library.
  8856. For each gene, edgeR reported average abundance (logCPM),
  8857. \begin_inset Formula $\log_{2}$
  8858. \end_inset
  8859. fold change (logFC), p-value, and Benjamini-Hochberg adjusted false discovery
  8860. rate (FDR).
  8861. Each gene's logFC was plotted against its logCPM, colored by FDR.
  8862. Red points are significant at ≤10% FDR, and blue are not significant at
  8863. that threshold.
  8864. The alpha and beta globin genes targeted for blocking are marked with large
  8865. triangles, while all other genes are represented as small points.
  8866. \end_layout
  8867. \end_inset
  8868. \end_layout
  8869. \begin_layout Plain Layout
  8870. \end_layout
  8871. \end_inset
  8872. \end_layout
  8873. \begin_layout Standard
  8874. \begin_inset Flex TODO Note (inline)
  8875. status open
  8876. \begin_layout Plain Layout
  8877. Standardize on
  8878. \begin_inset Quotes eld
  8879. \end_inset
  8880. log2
  8881. \begin_inset Quotes erd
  8882. \end_inset
  8883. notation
  8884. \end_layout
  8885. \end_inset
  8886. \end_layout
  8887. \begin_layout Standard
  8888. The data do indeed show small systematic perturbations in gene levels (Figure
  8889. \begin_inset CommandInset ref
  8890. LatexCommand ref
  8891. reference "fig:MA-plot"
  8892. plural "false"
  8893. caps "false"
  8894. noprefix "false"
  8895. \end_inset
  8896. ).
  8897. Other than the 3 designated alpha and beta globin genes, two other genes
  8898. stand out as having especially large negative log fold changes: HBD and
  8899. LOC1021365.
  8900. HBD, delta globin, is most likely targeted by the blocking oligos due to
  8901. high sequence homology with the other globin genes.
  8902. LOC1021365 is the aforementioned ncRNA that is reverse-complementary to
  8903. one of the alpha-like genes and that would be expected to be removed during
  8904. the globin blocking step.
  8905. All other genes appear in a cluster centered vertically at 0, and the vast
  8906. majority of genes in this cluster show an absolute log2(FC) of 0.5 or less.
  8907. Nevertheless, many of these small perturbations are still statistically
  8908. significant, indicating that the globin blocking oligos likely cause very
  8909. small but non-zero systematic perturbations in measured gene expression
  8910. levels.
  8911. \end_layout
  8912. \begin_layout Standard
  8913. \begin_inset Float figure
  8914. wide false
  8915. sideways false
  8916. status collapsed
  8917. \begin_layout Plain Layout
  8918. \align center
  8919. \begin_inset Graphics
  8920. filename graphics/Globin Paper/figure5 - corrplot.pdf
  8921. lyxscale 50
  8922. width 70col%
  8923. \end_inset
  8924. \end_layout
  8925. \begin_layout Plain Layout
  8926. \begin_inset Caption Standard
  8927. \begin_layout Plain Layout
  8928. \series bold
  8929. \begin_inset Argument 1
  8930. status collapsed
  8931. \begin_layout Plain Layout
  8932. Comparison of inter-sample gene abundance correlations with and without
  8933. globin blocking.
  8934. \end_layout
  8935. \end_inset
  8936. \begin_inset CommandInset label
  8937. LatexCommand label
  8938. name "fig:gene-abundance-correlations"
  8939. \end_inset
  8940. Comparison of inter-sample gene abundance correlations with and without
  8941. globin blocking (GB).
  8942. \series default
  8943. All libraries were normalized together as described in Figure 2, and genes
  8944. with an average abundance (logCPM, log2 counts per million reads counted)
  8945. less than -1 were filtered out.
  8946. Each gene’s logCPM was computed in each library using the edgeR cpm function.
  8947. For each pair of biological samples, the Pearson correlation between those
  8948. samples' GB libraries was plotted against the correlation between the same
  8949. samples’ non-GB libraries.
  8950. Each point represents an unique pair of samples.
  8951. The solid gray line shows a quantile-quantile plot of distribution of GB
  8952. correlations vs.
  8953. that of non-GB correlations.
  8954. The thin dashed line is the identity line, provided for reference.
  8955. \end_layout
  8956. \end_inset
  8957. \end_layout
  8958. \begin_layout Plain Layout
  8959. \end_layout
  8960. \end_inset
  8961. \end_layout
  8962. \begin_layout Standard
  8963. To evaluate the possibility of globin blocking causing random perturbations
  8964. and reducing sample quality, we computed the Pearson correlation between
  8965. logCPM values for every pair of samples with and without GB and plotted
  8966. them against each other (Figure
  8967. \begin_inset CommandInset ref
  8968. LatexCommand ref
  8969. reference "fig:gene-abundance-correlations"
  8970. plural "false"
  8971. caps "false"
  8972. noprefix "false"
  8973. \end_inset
  8974. ).
  8975. The plot indicated that the GB libraries have higher sample-to-sample correlati
  8976. ons than the non-GB libraries.
  8977. Parametric and nonparametric tests for differences between the correlations
  8978. with and without GB both confirmed that this difference was highly significant
  8979. (2-sided paired t-test: t = 37.2, df = 665, P ≪ 2.2e-16; 2-sided Wilcoxon
  8980. sign-rank test: V = 2195, P ≪ 2.2e-16).
  8981. Performing the same tests on the Spearman correlations gave the same conclusion
  8982. (t-test: t = 26.8, df = 665, P ≪ 2.2e-16; sign-rank test: V = 8781, P ≪ 2.2e-16).
  8983. The edgeR package was used to compute the overall biological coefficient
  8984. of variation (BCV) for GB and non-GB libraries, and found that globin blocking
  8985. resulted in a negligible increase in the BCV (0.417 with GB vs.
  8986. 0.400 without).
  8987. The near equality of the BCVs for both sets indicates that the higher correlati
  8988. ons in the GB libraries are most likely a result of the increased yield
  8989. of useful reads, which reduces the contribution of Poisson counting uncertainty
  8990. to the overall variance of the logCPM values
  8991. \begin_inset CommandInset citation
  8992. LatexCommand cite
  8993. key "McCarthy2012"
  8994. literal "false"
  8995. \end_inset
  8996. .
  8997. This improves the precision of expression measurements and more than offsets
  8998. the negligible increase in BCV.
  8999. \end_layout
  9000. \begin_layout Subsection
  9001. More differentially expressed genes are detected with globin blocking
  9002. \end_layout
  9003. \begin_layout Standard
  9004. \begin_inset Float table
  9005. wide false
  9006. sideways false
  9007. status collapsed
  9008. \begin_layout Plain Layout
  9009. \align center
  9010. \begin_inset Tabular
  9011. <lyxtabular version="3" rows="5" columns="5">
  9012. <features tabularvalignment="middle">
  9013. <column alignment="center" valignment="top">
  9014. <column alignment="center" valignment="top">
  9015. <column alignment="center" valignment="top">
  9016. <column alignment="center" valignment="top">
  9017. <column alignment="center" valignment="top">
  9018. <row>
  9019. <cell alignment="center" valignment="top" usebox="none">
  9020. \begin_inset Text
  9021. \begin_layout Plain Layout
  9022. \end_layout
  9023. \end_inset
  9024. </cell>
  9025. <cell alignment="center" valignment="top" usebox="none">
  9026. \begin_inset Text
  9027. \begin_layout Plain Layout
  9028. \end_layout
  9029. \end_inset
  9030. </cell>
  9031. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  9032. \begin_inset Text
  9033. \begin_layout Plain Layout
  9034. \series bold
  9035. No Globin Blocking
  9036. \end_layout
  9037. \end_inset
  9038. </cell>
  9039. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  9040. \begin_inset Text
  9041. \begin_layout Plain Layout
  9042. \end_layout
  9043. \end_inset
  9044. </cell>
  9045. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  9046. \begin_inset Text
  9047. \begin_layout Plain Layout
  9048. \end_layout
  9049. \end_inset
  9050. </cell>
  9051. </row>
  9052. <row>
  9053. <cell alignment="center" valignment="top" usebox="none">
  9054. \begin_inset Text
  9055. \begin_layout Plain Layout
  9056. \end_layout
  9057. \end_inset
  9058. </cell>
  9059. <cell alignment="center" valignment="top" usebox="none">
  9060. \begin_inset Text
  9061. \begin_layout Plain Layout
  9062. \end_layout
  9063. \end_inset
  9064. </cell>
  9065. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9066. \begin_inset Text
  9067. \begin_layout Plain Layout
  9068. \series bold
  9069. Up
  9070. \end_layout
  9071. \end_inset
  9072. </cell>
  9073. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9074. \begin_inset Text
  9075. \begin_layout Plain Layout
  9076. \series bold
  9077. NS
  9078. \end_layout
  9079. \end_inset
  9080. </cell>
  9081. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  9082. \begin_inset Text
  9083. \begin_layout Plain Layout
  9084. \series bold
  9085. Down
  9086. \end_layout
  9087. \end_inset
  9088. </cell>
  9089. </row>
  9090. <row>
  9091. <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
  9092. \begin_inset Text
  9093. \begin_layout Plain Layout
  9094. \series bold
  9095. Globin-Blocking
  9096. \end_layout
  9097. \end_inset
  9098. </cell>
  9099. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9100. \begin_inset Text
  9101. \begin_layout Plain Layout
  9102. \series bold
  9103. Up
  9104. \end_layout
  9105. \end_inset
  9106. </cell>
  9107. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9108. \begin_inset Text
  9109. \begin_layout Plain Layout
  9110. \family roman
  9111. \series medium
  9112. \shape up
  9113. \size normal
  9114. \emph off
  9115. \bar no
  9116. \strikeout off
  9117. \xout off
  9118. \uuline off
  9119. \uwave off
  9120. \noun off
  9121. \color none
  9122. 231
  9123. \end_layout
  9124. \end_inset
  9125. </cell>
  9126. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9127. \begin_inset Text
  9128. \begin_layout Plain Layout
  9129. \family roman
  9130. \series medium
  9131. \shape up
  9132. \size normal
  9133. \emph off
  9134. \bar no
  9135. \strikeout off
  9136. \xout off
  9137. \uuline off
  9138. \uwave off
  9139. \noun off
  9140. \color none
  9141. 515
  9142. \end_layout
  9143. \end_inset
  9144. </cell>
  9145. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  9146. \begin_inset Text
  9147. \begin_layout Plain Layout
  9148. \family roman
  9149. \series medium
  9150. \shape up
  9151. \size normal
  9152. \emph off
  9153. \bar no
  9154. \strikeout off
  9155. \xout off
  9156. \uuline off
  9157. \uwave off
  9158. \noun off
  9159. \color none
  9160. 2
  9161. \end_layout
  9162. \end_inset
  9163. </cell>
  9164. </row>
  9165. <row>
  9166. <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9167. \begin_inset Text
  9168. \begin_layout Plain Layout
  9169. \end_layout
  9170. \end_inset
  9171. </cell>
  9172. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9173. \begin_inset Text
  9174. \begin_layout Plain Layout
  9175. \series bold
  9176. NS
  9177. \end_layout
  9178. \end_inset
  9179. </cell>
  9180. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9181. \begin_inset Text
  9182. \begin_layout Plain Layout
  9183. \family roman
  9184. \series medium
  9185. \shape up
  9186. \size normal
  9187. \emph off
  9188. \bar no
  9189. \strikeout off
  9190. \xout off
  9191. \uuline off
  9192. \uwave off
  9193. \noun off
  9194. \color none
  9195. 160
  9196. \end_layout
  9197. \end_inset
  9198. </cell>
  9199. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  9200. \begin_inset Text
  9201. \begin_layout Plain Layout
  9202. \family roman
  9203. \series medium
  9204. \shape up
  9205. \size normal
  9206. \emph off
  9207. \bar no
  9208. \strikeout off
  9209. \xout off
  9210. \uuline off
  9211. \uwave off
  9212. \noun off
  9213. \color none
  9214. 11235
  9215. \end_layout
  9216. \end_inset
  9217. </cell>
  9218. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  9219. \begin_inset Text
  9220. \begin_layout Plain Layout
  9221. \family roman
  9222. \series medium
  9223. \shape up
  9224. \size normal
  9225. \emph off
  9226. \bar no
  9227. \strikeout off
  9228. \xout off
  9229. \uuline off
  9230. \uwave off
  9231. \noun off
  9232. \color none
  9233. 136
  9234. \end_layout
  9235. \end_inset
  9236. </cell>
  9237. </row>
  9238. <row>
  9239. <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  9240. \begin_inset Text
  9241. \begin_layout Plain Layout
  9242. \end_layout
  9243. \end_inset
  9244. </cell>
  9245. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  9246. \begin_inset Text
  9247. \begin_layout Plain Layout
  9248. \series bold
  9249. Down
  9250. \end_layout
  9251. \end_inset
  9252. </cell>
  9253. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  9254. \begin_inset Text
  9255. \begin_layout Plain Layout
  9256. \family roman
  9257. \series medium
  9258. \shape up
  9259. \size normal
  9260. \emph off
  9261. \bar no
  9262. \strikeout off
  9263. \xout off
  9264. \uuline off
  9265. \uwave off
  9266. \noun off
  9267. \color none
  9268. 0
  9269. \end_layout
  9270. \end_inset
  9271. </cell>
  9272. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  9273. \begin_inset Text
  9274. \begin_layout Plain Layout
  9275. \family roman
  9276. \series medium
  9277. \shape up
  9278. \size normal
  9279. \emph off
  9280. \bar no
  9281. \strikeout off
  9282. \xout off
  9283. \uuline off
  9284. \uwave off
  9285. \noun off
  9286. \color none
  9287. 548
  9288. \end_layout
  9289. \end_inset
  9290. </cell>
  9291. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  9292. \begin_inset Text
  9293. \begin_layout Plain Layout
  9294. \family roman
  9295. \series medium
  9296. \shape up
  9297. \size normal
  9298. \emph off
  9299. \bar no
  9300. \strikeout off
  9301. \xout off
  9302. \uuline off
  9303. \uwave off
  9304. \noun off
  9305. \color none
  9306. 127
  9307. \end_layout
  9308. \end_inset
  9309. </cell>
  9310. </row>
  9311. </lyxtabular>
  9312. \end_inset
  9313. \end_layout
  9314. \begin_layout Plain Layout
  9315. \begin_inset Caption Standard
  9316. \begin_layout Plain Layout
  9317. \series bold
  9318. \begin_inset Argument 1
  9319. status open
  9320. \begin_layout Plain Layout
  9321. Comparison of significantly differentially expressed genes with and without
  9322. globin blocking.
  9323. \end_layout
  9324. \end_inset
  9325. \begin_inset CommandInset label
  9326. LatexCommand label
  9327. name "tab:Comparison-of-significant"
  9328. \end_inset
  9329. Comparison of significantly differentially expressed genes with and without
  9330. globin blocking.
  9331. \series default
  9332. Up, Down: Genes significantly up/down-regulated in post-transplant samples
  9333. relative to pre-transplant samples, with a false discovery rate of 10%
  9334. or less.
  9335. NS: Non-significant genes (false discovery rate greater than 10%).
  9336. \end_layout
  9337. \end_inset
  9338. \end_layout
  9339. \begin_layout Plain Layout
  9340. \end_layout
  9341. \end_inset
  9342. \end_layout
  9343. \begin_layout Standard
  9344. To compare performance on differential gene expression tests, we took subsets
  9345. of both the GB and non-GB libraries with exactly one pre-transplant and
  9346. one post-transplant sample for each animal that had paired samples available
  9347. for analysis (N=7 animals, N=14 samples in each subset).
  9348. The same test for pre- vs.
  9349. post-transplant differential gene expression was performed on the same
  9350. 7 pairs of samples from GB libraries and non-GB libraries, in each case
  9351. using an FDR of 10% as the threshold of significance.
  9352. Out of 12954 genes that passed the detection threshold in both subsets,
  9353. 358 were called significantly differentially expressed in the same direction
  9354. in both sets; 1063 were differentially expressed in the GB set only; 296
  9355. were differentially expressed in the non-GB set only; 2 genes were called
  9356. significantly up in the GB set but significantly down in the non-GB set;
  9357. and the remaining 11235 were not called differentially expressed in either
  9358. set.
  9359. These data are summarized in Table
  9360. \begin_inset CommandInset ref
  9361. LatexCommand ref
  9362. reference "tab:Comparison-of-significant"
  9363. plural "false"
  9364. caps "false"
  9365. noprefix "false"
  9366. \end_inset
  9367. .
  9368. The differences in BCV calculated by EdgeR for these subsets of samples
  9369. were negligible (BCV = 0.302 for GB and 0.297 for non-GB).
  9370. \end_layout
  9371. \begin_layout Standard
  9372. The key point is that the GB data results in substantially more differentially
  9373. expressed calls than the non-GB data.
  9374. Since there is no gold standard for this dataset, it is impossible to be
  9375. certain whether this is due to under-calling of differential expression
  9376. in the non-GB samples or over-calling in the GB samples.
  9377. However, given that both datasets are derived from the same biological
  9378. samples and have nearly equal BCVs, it is more likely that the larger number
  9379. of DE calls in the GB samples are genuine detections that were enabled
  9380. by the higher sequencing depth and measurement precision of the GB samples.
  9381. Note that the same set of genes was considered in both subsets, so the
  9382. larger number of differentially expressed gene calls in the GB data set
  9383. reflects a greater sensitivity to detect significant differential gene
  9384. expression and not simply the larger total number of detected genes in
  9385. GB samples described earlier.
  9386. \end_layout
  9387. \begin_layout Section
  9388. Discussion
  9389. \end_layout
  9390. \begin_layout Standard
  9391. The original experience with whole blood gene expression profiling on DNA
  9392. microarrays demonstrated that the high concentration of globin transcripts
  9393. reduced the sensitivity to detect genes with relatively low expression
  9394. levels, in effect, significantly reducing the sensitivity.
  9395. To address this limitation, commercial protocols for globin reduction were
  9396. developed based on strategies to block globin transcript amplification
  9397. during labeling or physically removing globin transcripts by affinity bead
  9398. methods
  9399. \begin_inset CommandInset citation
  9400. LatexCommand cite
  9401. key "Winn2010"
  9402. literal "false"
  9403. \end_inset
  9404. .
  9405. More recently, using the latest generation of labeling protocols and arrays,
  9406. it was determined that globin reduction was no longer necessary to obtain
  9407. sufficient sensitivity to detect differential transcript expression
  9408. \begin_inset CommandInset citation
  9409. LatexCommand cite
  9410. key "NuGEN2010"
  9411. literal "false"
  9412. \end_inset
  9413. .
  9414. However, we are not aware of any publications using these currently available
  9415. protocols the with latest generation of microarrays that actually compare
  9416. the detection sensitivity with and without globin reduction.
  9417. However, in practice this has now been adopted generally primarily driven
  9418. by concerns for cost control.
  9419. The main objective of our work was to directly test the impact of globin
  9420. gene transcripts and a new globin blocking protocol for application to
  9421. the newest generation of differential gene expression profiling determined
  9422. using next generation sequencing.
  9423. \end_layout
  9424. \begin_layout Standard
  9425. The challenge of doing global gene expression profiling in cynomolgus monkeys
  9426. is that the current available arrays were never designed to comprehensively
  9427. cover this genome and have not been updated since the first assemblies
  9428. of the cynomolgus genome were published.
  9429. Therefore, we determined that the best strategy for peripheral blood profiling
  9430. was to do deep RNA-seq and inform the workflow using the latest available
  9431. genome assembly and annotation
  9432. \begin_inset CommandInset citation
  9433. LatexCommand cite
  9434. key "Wilson2013"
  9435. literal "false"
  9436. \end_inset
  9437. .
  9438. However, it was not immediately clear whether globin reduction was necessary
  9439. for RNA-seq or how much improvement in efficiency or sensitivity to detect
  9440. differential gene expression would be achieved for the added cost and work.
  9441. \end_layout
  9442. \begin_layout Standard
  9443. We only found one report that demonstrated that globin reduction significantly
  9444. improved the effective read yields for sequencing of human peripheral blood
  9445. cell RNA using a DeepSAGE protocol
  9446. \begin_inset CommandInset citation
  9447. LatexCommand cite
  9448. key "Mastrokolias2012"
  9449. literal "false"
  9450. \end_inset
  9451. .
  9452. The approach to DeepSAGE involves two different restriction enzymes that
  9453. purify and then tag small fragments of transcripts at specific locations
  9454. and thus, significantly reduces the complexity of the transcriptome.
  9455. Therefore, we could not determine how DeepSAGE results would translate
  9456. to the common strategy in the field for assaying the entire transcript
  9457. population by whole-transcriptome 3’-end RNA-seq.
  9458. Furthermore, if globin reduction is necessary, we also needed a globin
  9459. reduction method specific to cynomolgus globin sequences that would work
  9460. an organism for which no kit is available off the shelf.
  9461. \end_layout
  9462. \begin_layout Standard
  9463. As mentioned above, the addition of globin blocking oligos has a very small
  9464. impact on measured expression levels of gene expression.
  9465. However, this is a non-issue for the purposes of differential expression
  9466. testing, since a systematic change in a gene in all samples does not affect
  9467. relative expression levels between samples.
  9468. However, we must acknowledge that simple comparisons of gene expression
  9469. data obtained by GB and non-GB protocols are not possible without additional
  9470. normalization.
  9471. \end_layout
  9472. \begin_layout Standard
  9473. More importantly, globin blocking not only nearly doubles the yield of usable
  9474. reads, it also increases inter-sample correlation and sensitivity to detect
  9475. differential gene expression relative to the same set of samples profiled
  9476. without blocking.
  9477. In addition, globin blocking does not add a significant amount of random
  9478. noise to the data.
  9479. Globin blocking thus represents a cost-effective way to squeeze more data
  9480. and statistical power out of the same blood samples and the same amount
  9481. of sequencing.
  9482. In conclusion, globin reduction greatly increases the yield of useful RNA-seq
  9483. reads mapping to the rest of the genome, with minimal perturbations in
  9484. the relative levels of non-globin genes.
  9485. Based on these results, globin transcript reduction using sequence-specific,
  9486. complementary blocking oligonucleotides is recommended for all deep RNA-seq
  9487. of cynomolgus and other nonhuman primate blood samples.
  9488. \end_layout
  9489. \begin_layout Chapter
  9490. Future Directions
  9491. \end_layout
  9492. \begin_layout Standard
  9493. \begin_inset Flex TODO Note (inline)
  9494. status open
  9495. \begin_layout Plain Layout
  9496. Consider per-chapter future directions.
  9497. Check instructions.
  9498. \end_layout
  9499. \end_inset
  9500. \end_layout
  9501. \begin_layout Section*
  9502. Ch2
  9503. \end_layout
  9504. \begin_layout Itemize
  9505. Functional validation of effective promoter radius
  9506. \end_layout
  9507. \begin_layout Itemize
  9508. Current definition of promoter radius is dependent on peak calling.
  9509. Would be nice to have a better way of defining promoter radius independent
  9510. of peak calling.
  9511. Possibly based on the promoter coverage profiles
  9512. \end_layout
  9513. \begin_layout Itemize
  9514. N-to-M convergence deserves further study of some kind
  9515. \end_layout
  9516. \begin_layout Itemize
  9517. Promoter positional coverage: follow up on hints of interesting patterns
  9518. \end_layout
  9519. \begin_layout Itemize
  9520. Study other epigenetic marks in more contexts
  9521. \end_layout
  9522. \begin_deeper
  9523. \begin_layout Itemize
  9524. DNA methylation, histone marks, chromatin accessibility & conformation in
  9525. CD4 T-cells
  9526. \end_layout
  9527. \begin_layout Itemize
  9528. Also look at other types of lymphocytes: CD8 T-cells, B-cells, NK cells
  9529. \end_layout
  9530. \end_deeper
  9531. \begin_layout Section*
  9532. Ch3
  9533. \end_layout
  9534. \begin_layout Itemize
  9535. Use CV or bootstrap to better evaluate classifiers
  9536. \end_layout
  9537. \begin_layout Itemize
  9538. fRMAtools could be adapted to not require equal-sized groups
  9539. \end_layout
  9540. \begin_layout Section*
  9541. Ch4
  9542. \end_layout
  9543. \begin_layout Itemize
  9544. Look in discussion, I think there's some stuff there already
  9545. \end_layout
  9546. \begin_layout Standard
  9547. \begin_inset ERT
  9548. status open
  9549. \begin_layout Plain Layout
  9550. % Call it "References" instead of "Bibliography"
  9551. \end_layout
  9552. \begin_layout Plain Layout
  9553. \backslash
  9554. renewcommand{
  9555. \backslash
  9556. bibname}{References}
  9557. \end_layout
  9558. \end_inset
  9559. \end_layout
  9560. \begin_layout Standard
  9561. \begin_inset Flex TODO Note (inline)
  9562. status open
  9563. \begin_layout Plain Layout
  9564. Check bib entry formatting & sort order
  9565. \end_layout
  9566. \end_inset
  9567. \end_layout
  9568. \begin_layout Standard
  9569. \begin_inset Flex TODO Note (inline)
  9570. status open
  9571. \begin_layout Plain Layout
  9572. Check in-text citation format.
  9573. Probably don't just want [1], [2], etc.
  9574. \end_layout
  9575. \end_inset
  9576. \end_layout
  9577. \begin_layout Standard
  9578. \begin_inset CommandInset bibtex
  9579. LatexCommand bibtex
  9580. btprint "btPrintCited"
  9581. bibfiles "code-refs,refs-PROCESSED"
  9582. options "bibtotoc,unsrt"
  9583. \end_inset
  9584. \end_layout
  9585. \end_body
  9586. \end_document