12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479 |
- #LyX 2.3 created this file. For more info see http://www.lyx.org/
- \lyxformat 544
- \begin_document
- \begin_header
- \save_transient_properties true
- \origin unavailable
- \textclass extbook
- \begin_preamble
- % List all used files in log output
- \listfiles
- % Add a DRAFT watermark
- \usepackage{draftwatermark}
- \SetWatermarkLightness{0.97}
- \SetWatermarkScale{1}
- % Set up required header format
- \usepackage{fancyhdr}
- \pagestyle{fancy}
- \renewcommand{\headrulewidth}{0pt}
- \rhead{}
- \lhead{}
- \rfoot{}
- \lfoot{}
- \cfoot{\thepage} % Page number bottom center
- % Allow FloatBarrier command
- \usepackage{placeins}
- % Allow landscape pages
- \usepackage{pdflscape}
- % This one breaks subfigs so it's disabled
- % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
- \end_preamble
- \use_default_options true
- \begin_modules
- todonotes
- \end_modules
- \maintain_unincluded_children false
- \language english
- \language_package default
- \inputencoding utf8
- \fontencoding default
- \font_roman "default" "default"
- \font_sans "default" "default"
- \font_typewriter "default" "default"
- \font_math "auto" "auto"
- \font_default_family default
- \use_non_tex_fonts false
- \font_sc false
- \font_osf false
- \font_sf_scale 100 100
- \font_tt_scale 100 100
- \use_microtype false
- \use_dash_ligatures true
- \graphics default
- \default_output_format pdf4
- \output_sync 0
- \bibtex_command default
- \index_command default
- \paperfontsize 12
- \spacing double
- \use_hyperref true
- \pdf_bookmarks true
- \pdf_bookmarksnumbered false
- \pdf_bookmarksopen false
- \pdf_bookmarksopenlevel 1
- \pdf_breaklinks false
- \pdf_pdfborder false
- \pdf_colorlinks false
- \pdf_backref false
- \pdf_pdfusetitle true
- \papersize letterpaper
- \use_geometry true
- \use_package amsmath 1
- \use_package amssymb 1
- \use_package cancel 1
- \use_package esint 1
- \use_package mathdots 1
- \use_package mathtools 1
- \use_package mhchem 1
- \use_package stackrel 1
- \use_package stmaryrd 1
- \use_package undertilde 1
- \cite_engine basic
- \cite_engine_type default
- \biblio_style plain
- \use_bibtopic false
- \use_indices false
- \paperorientation portrait
- \suppress_date false
- \justification true
- \use_refstyle 1
- \use_minted 0
- \index Index
- \shortcut idx
- \color #008000
- \end_index
- \leftmargin 1.5in
- \topmargin 1in
- \rightmargin 1in
- \bottommargin 1in
- \secnumdepth 3
- \tocdepth 3
- \paragraph_separation indent
- \paragraph_indentation default
- \is_math_indent 0
- \math_numbering_side default
- \quotes_style english
- \dynamic_quotes 0
- \papercolumns 1
- \papersides 1
- \paperpagestyle default
- \tracking_changes false
- \output_changes false
- \html_math_output 0
- \html_css_as_file 0
- \html_be_strict false
- \end_header
- \begin_body
- \begin_layout Title
- Bioinformatic analysis of complex, high-throughput genomic and epigenomic
- data in the context of immunology and transplant rejection
- \end_layout
- \begin_layout Author
- A thesis presented
- \begin_inset Newline newline
- \end_inset
- by
- \begin_inset Newline newline
- \end_inset
- Ryan C.
- Thompson
- \begin_inset Newline newline
- \end_inset
- to
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute Graduate Program
- \begin_inset Newline newline
- \end_inset
- in partial fulfillment of the requirements for the degree of
- \begin_inset Newline newline
- \end_inset
- Doctor of Philosophy in the subject of Biology
- \begin_inset Newline newline
- \end_inset
- for
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute
- \begin_inset Newline newline
- \end_inset
- La Jolla, California
- \end_layout
- \begin_layout Date
- October 2019
- \end_layout
- \begin_layout Standard
- [Copyright notice]
- \end_layout
- \begin_layout Standard
- [Thesis acceptance form]
- \end_layout
- \begin_layout Standard
- [Dedication]
- \end_layout
- \begin_layout Standard
- [Acknowledgements]
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset toc
- LatexCommand tableofcontents
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList table
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList figure
- \end_inset
- \end_layout
- \begin_layout Standard
- [List of Abbreviations]
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look into auto-generated nomenclature list: https://wiki.lyx.org/Tips/Nomenclature
- \end_layout
- \end_inset
- \end_layout
- \begin_layout List of TODOs
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- On final pass: Check all figures to make sure they fit on the page with
- their legends.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Chapter*
- Abstract
- \end_layout
- \begin_layout Chapter
- Introduction
- \end_layout
- \begin_layout Section
- Background & Significance
- \end_layout
- \begin_layout Subsection
- Biological motivation
- \end_layout
- \begin_layout Itemize
- Rejection is the major long-term threat to organ and tissue grafts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Common mechanisms of rejection
- \end_layout
- \begin_layout Itemize
- Effective immune suppression requires monitoring for rejection and tuning
-
- \end_layout
- \begin_layout Itemize
- Current tests for rejection (tissue biopsy) are invasive and biased
- \end_layout
- \begin_layout Itemize
- A blood test based on microarrays would be less biased and invasive
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Memory cells are resistant to immune suppression
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Mechanisms of resistance in memory cells are poorly understood
- \end_layout
- \begin_layout Itemize
- A better understanding of immune memory formation is needed
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Mesenchymal stem cell infusion is a promising new treatment to prevent/delay
- rejection
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Demonstrated in mice, but not yet in primates
- \end_layout
- \begin_layout Itemize
- Mechanism currently unknown, but MSC are known to be immune modulatory
- \end_layout
- \end_deeper
- \begin_layout Subsection
- Overview of bioinformatic analysis methods
- \end_layout
- \begin_layout Standard
- An overview of all the methods used, including what problem they solve,
- what assumptions they make, and a basic description of how they work.
- \end_layout
- \begin_layout Itemize
- ChIP-seq Peak calling
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Cross-correlation analysis to determine fragment size
- \end_layout
- \begin_layout Itemize
- Broad vs narrow peaks
- \end_layout
- \begin_layout Itemize
- SICER for broad peaks
- \end_layout
- \begin_layout Itemize
- IDR for biologically reproducible peaks
- \end_layout
- \begin_layout Itemize
- csaw peak filtering guidelines for unbiased downstream analysis
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Normalization is non-trivial and application-dependant
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Expression arrays: RMA & fRMA; why fRMA is needed
- \end_layout
- \begin_layout Itemize
- Methylation arrays: M-value transformation approximates normal data but
- induces heteroskedasticity
- \end_layout
- \begin_layout Itemize
- RNA-seq: normalize based on assumption that the average gene is not changing
- \end_layout
- \begin_layout Itemize
- ChIP-seq: complex with many considerations, dependent on experimental methods,
- biological system, and analysis goals
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Limma: The standard linear modeling framework for genomics
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- empirical Bayes variance modeling: limma's core feature
- \end_layout
- \begin_layout Itemize
- edgeR & DESeq2: Extend with negative bonomial GLM for RNA-seq and other
- count data
- \end_layout
- \begin_layout Itemize
- voom: Extend with precision weights to model mean-variance trend
- \end_layout
- \begin_layout Itemize
- arrayWeights and duplicateCorrelation to handle complex variance structures
- \end_layout
- \end_deeper
- \begin_layout Itemize
- sva and ComBat for batch correction
- \end_layout
- \begin_layout Itemize
- Factor analysis: PCA, MDS, MOFA
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Batch-corrected PCA is informative, but careful application is required
- to avoid bias
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Gene set analysis: camera and SPIA
- \end_layout
- \begin_layout Section
- Innovation
- \end_layout
- \begin_layout Itemize
- MSC infusion to improve transplant outcomes (prevent/delay rejection)
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Characterize MSC response to interferon gamma
- \end_layout
- \begin_layout Itemize
- IFN-g is thought to stimulate their function
- \end_layout
- \begin_layout Itemize
- Test IFN-g treated MSC infusion as a therapy to delay graft rejection in
- cynomolgus monkeys
- \end_layout
- \begin_layout Itemize
- Monitor animals post-transplant using blood RNA-seq at serial time points
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Investigate dynamics of histone marks in CD4 T-cell activation and memory
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Previous studies have looked at single snapshots of histone marks
- \end_layout
- \begin_layout Itemize
- Instead, look at changes in histone marks across activation and memory
- \end_layout
- \end_deeper
- \begin_layout Itemize
- High-throughput sequencing and microarray technologies
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Powerful methods for assaying gene expression and epigenetics across entire
- genomes
- \end_layout
- \begin_layout Itemize
- Proper analysis requires finding and exploiting systematic genome-wide trends
- \end_layout
- \end_deeper
- \begin_layout Chapter
- Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
- in naive and memory CD4 T-cell activation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Chapter author list: Me, Sarah, Dan
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Need better section titles throughout the chapter
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Itemize
- CD4 T-cells are central to all adaptive immune responses and memory
- \end_layout
- \begin_layout Itemize
- H3K4 and H3K27 methylation are major epigenetic regulators of gene expression
- \end_layout
- \begin_layout Itemize
- Canonically, H3K4 is activating and H3K27 is inhibitory, but the reality
- is complex
- \end_layout
- \begin_layout Itemize
- Looking at these marks during CD4 activation and memory should reveal new
- mechanistic details
- \end_layout
- \begin_layout Itemize
- Test
- \begin_inset Quotes eld
- \end_inset
- poised promoter
- \begin_inset Quotes erd
- \end_inset
- hypothesis in which H3K4 and H3K27 are both methylated
- \end_layout
- \begin_layout Itemize
- Expand scope of analysis beyond simple promoter counts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Analyze peaks genome-wide, including in intergenic regions
- \end_layout
- \begin_layout Itemize
- Analysis of coverage distribution shape within promoters, e.g.
- upstream vs downstream coverage
- \end_layout
- \end_deeper
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Standard
- A reproducible workflow
- \begin_inset CommandInset citation
- LatexCommand cite
- key "gh-cd4-csaw"
- literal "false"
- \end_inset
- was written to analyze the raw ChIP-seq and RNA-seq data from previous
- studies
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016,LaMere2017"
- literal "true"
- \end_inset
- .
- Briefly, this data consists of RNA-seq and ChIP-seq from CD4 T-cells cultured
- from 4 donors.
- From each donor, naive and memory CD4 T-cells were isolated separately.
- Then cultures of both cells were activated [how?], and samples were taken
- at 4 time points: Day 0 (pre-activation), Day 1 (early activation), Day
- 5 (peak activation), and Day 14 (post-activation).
- For each combination of cell type and time point, RNA was isolated, and
- ChIP-seq was performed for each of 3 histone marks: H3K4me2, H3K4me3, and
- H3K27me3.
- The ChIP-seq input was also sequenced for each sample.
- The result was 32 samples for each assay.
- \end_layout
- \begin_layout Standard
- Sequence reads were retrieved from the Sequence Read Archive (SRA)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leinonen2011"
- literal "false"
- \end_inset
- .
- ChIP-seq (and input) reads were aligned to CRCh38 genome assembly using
- Bowtie 2
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Langmead2012,Schneider2017,gh-hg38-ref"
- literal "false"
- \end_inset
- .
- Artifact regions were annotated using a custom implementation of the GreyListCh
- IP algorithm, and these
- \begin_inset Quotes eld
- \end_inset
- greylists
- \begin_inset Quotes erd
- \end_inset
- were merged with the ENCODE blacklist
- \begin_inset CommandInset citation
- LatexCommand cite
- key "greylistchip,Amemiya2019,Dunham2012"
- literal "false"
- \end_inset
- .
- Any read or peak overlapping one of these regions was regarded as artifactual
- and excluded from downstream analyses.
-
- \end_layout
- \begin_layout Standard
- Peaks are called using epic, an implementation of the SICER algorithm
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zang2009,gh-epic"
- literal "false"
- \end_inset
- .
- Peaks are also called separately using MACS, but MACS was determined to
- be a poor fit for the data, and these peak calls are not used further
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zhang2008"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Itemize
- Re-analyze previously published CD4 ChIP-seq & RNA-seq data
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Completely reimplement analysis from scratch as a reproducible workflow
- \end_layout
- \begin_layout Itemize
- Use newly published methods & algorithms not available during the original
- analysis: SICER, csaw, MOFA
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Argelaguet2018"
- literal "false"
- \end_inset
- , ComBat, sva, GREAT, and more
- \end_layout
- \end_deeper
- \begin_layout Itemize
- SICER, IDR, csaw, & GREAT to call ChIP-seq peaks genome-wide, perform differenti
- al abundance analysis, and relate those peaks to gene expression
- \end_layout
- \begin_layout Itemize
- Promoter counts in sliding windows around each gene's highest-expressed
- TSS to investigate coverage distribution within promoters
- \end_layout
- \begin_layout Subsection
- RNA-seq align+quant method comparison
- \end_layout
- \begin_layout Standard
- \align left
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Write a legend for Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-norm-comp"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- STAR quantification, Entrez vs Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon+Shoal quantification, Entrez vs Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- STAR vs HISAT2 quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salomn vs STAR quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon vs Kallisto quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon+Shoal vs Salmon alone, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-norm-comp"
- \end_inset
- RNA-seq comparisons
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Ultimately selected shoal as quantification, Ensembl as annotation.
- Why? Running downstream analyses with all quant methods and both annotations
- showed very little practical difference, so choice was not terribly important.
- Prefer shoal due to theoretical advantages.
- To note in discussion: reproducible workflow made it easy to do this, enabling
- an informed decision.
- \end_layout
- \begin_layout Subsection
- RNA-seq has a large confounding batch effect
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Just take the top row
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-seq-weights-vs-covars"
- \end_inset
- RNA-seq sample weights, grouped by experimental and technical covariates.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Batch 1 is garbage quality.
- Analyses involving batch 1 samples are expected to yield poor statistical
- power.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
- lyxscale 25
- width 75col%
- groupId rna-pca-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-no-batchsub"
- \end_inset
- Before batch correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
- lyxscale 25
- width 75col%
- groupId rna-pca-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-ComBat-batchsub"
- \end_inset
- After batch correction with ComBat
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA"
- \end_inset
- PCoA plots of RNA-seq data showing effect of batch correction.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- RNA-seq batch effect can be partially corrected, but still induces uncorrectable
- biases in downstream analysis
- \end_layout
- \begin_layout Subsection
- ChIP-seq blacklisting is important
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/csaw/CCF-plots-PAGE2-CROP.pdf
- lyxscale 50
- height 40theight%
- groupId ccf-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-with-blacklist"
- \end_inset
- Cross-correlation plots with blacklisted reads removed
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/csaw/CCF-plots-noBL-PAGE2-CROP.pdf
- lyxscale 50
- height 40theight%
- groupId ccf-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-without-blacklist"
- \end_inset
- Cross-correlation plots without removing blacklisted reads
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-master"
- \end_inset
- Strand cross-correlation plots for ChIP-seq data.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- ChIP-seq peak calling
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/IDR/D4659vsD5053_epic-PAGE1-CROP.pdf
- lyxscale 50
- width 45col%
- groupId idr-rc-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Peak ranks from SICER peak caller
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/IDR/D4659vsD5053_macs-PAGE1-CROP.pdf
- lyxscale 50
- width 45col%
- groupId idr-rc-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Peak ranks from MACS peak caller
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:IDR-rank-consist"
- \end_inset
- Irreproducible Discovery Rate rank consistency plots for H3K27me3.
- \series default
- Peaks are ranked by the scores assigned by the peak caller in each donor,
- and then the ranks for two donors are plotted against each other.
- Higher ranks are more significant (top right).
- Peaks meeting various thresholds of reproducibility, measured by the irreproduc
- ible discovery rate (IDR), are shaded accordingly.
- [This could be explained better, or refer to the text.]
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:IDR-rank-consist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the IDR rank-consistency plots for peaks called in an arbitrarily-chosen
- pair of donors.
- when the peaks for each donor are ranked according to their scores, SICER
- produces much more reproducible results between donors.
- This is consistent with SICER's stated goal of identifying broad peaks,
- in contrast to MACS, which is designed for identifying sharp peaks.
- Based on this observation, the SICER peak calls were used for all downstream
- analyses that involved ChIP-seq peaks.
-
- \end_layout
- \begin_layout Subsection
- ChIP-seq normalization
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-sample-MAplot-bins-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-plot-bigbins"
- \end_inset
- MA plot of H3K4me2 read counts in 10kb bins for two arbitrary samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- ChIP-seq must be corrected for hidden confounding factors
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-bad"
- \end_inset
- H3K4me2, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-good"
- \end_inset
- H3K4me2, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-bad"
- \end_inset
- H3K4me3, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-good"
- \end_inset
- H3K4me3, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-bad"
- \end_inset
- H3K27me3, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-good"
- \end_inset
- H3K27me3, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-ChIP"
- \end_inset
- PCoA plots of ChIP-seq sliding window data, before and after subtracting
- surrogate variables (SVs).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Figures showing BCV plots with and without SVA for each histone mark?
- \end_layout
- \begin_layout Subsection
- MOFA recovers biologically relevant variation from blind analysis by correlating
- across datasets
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-varExplaiend-matrix-CROP.png
- lyxscale 25
- width 45col%
- groupId mofa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-varexplained"
- \end_inset
- Variance explained in each data set by each latent factor estimated by MOFA.
- \series default
- For each latent factor (LF) learned by MOFA, the variance explained by
- that factor in each data set (
- \begin_inset Quotes eld
- \end_inset
- view
- \begin_inset Quotes erd
- \end_inset
- ) is shown by the shading of the cells in the lower section.
- The upper section shows the total fraction of each data set's variance
- that is explained by all LFs combined.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-LF-scatter-CROP.png
- lyxscale 25
- width 45col%
- groupId mofa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-lf-scatter"
- \end_inset
- Scatter plots of specific pairs of MOFA latent factors.
- \series default
- LFs 1, 4, and 5 explain substantial variation in all data sets, so they
- are plotted against each other in order to reveal patterns of variation
- that are shared across all data sets.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MOFA-master"
- \end_inset
- MOFA latent factors separate technical confounders from
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-varexplained"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that LF1, 4, and 5 explain substantial var in all data sets
- \end_layout
- \begin_layout Itemize
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that those same 3 LFs, (1, 4, & 5) also correlate best with the experimen
- tal factors (cell type & time point)
- \end_layout
- \begin_layout Itemize
- LF2 is clearly the RNA-seq batch effect
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-batch-correct-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-batchsub"
- \end_inset
- Result of RNA-seq batch-correction using MOFA latent factors
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Attempting to remove the effect of LF2 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-batchsub"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) results in batch correction comparable to ComBat (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-ComBat-batchsub"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- )
- \end_layout
- \begin_layout Itemize
- MOFA was able to do this batch subtraction without directly using the sample
- labels (sample labels were used implicitly to select which factor to subtract)
- \end_layout
- \begin_layout Itemize
- Similarity of results shows that batch correction can't get much better
- than ComBat (despite ComBat ignoring time point)
- \end_layout
- \begin_layout Subsection
- MOFA does some interesting stuff but is mostly confirmatory in this context
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- MOFA should be a footnote to something else, not its own point
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Combine with previous subsection
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- MOFA shows great promise for accelerating discovery of major biological
- effects in multi-omics datasets
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- MOFA successfully separates biologically relevant patterns of variation
- from technical confounding factors without knowing the sample labels, by
- finding latent factors that explain variation across multiple data sets.
- \end_layout
- \begin_layout Itemize
- MOFA was added to this analysis late and played primarily a confirmatory
- role, but it was able to confirm earlier conclusions with much less prior
- information (no sample labels) and much less analyst effort/input
- \end_layout
- \begin_layout Itemize
- Less input from analyst means less opportunity to introduce unwanted bias
- into results
- \end_layout
- \begin_layout Itemize
- MOFA confirmed that the already-implemented batch correction in the RNA-seq
- data was already performing as well as possible given the limitations of
- the data
- \end_layout
- \end_deeper
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Focus on what hypotheses were tested, then select figures that show how
- those hypotheses were tested, even if the result is a negative.
- \end_layout
- \begin_layout Plain Layout
- Not every interesting result needs to be in here.
- Chapter should tell a story.
-
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Maybe reorder these sections to do RNA-seq, then ChIP-seq, then combined
- analyses?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 methylation occur in broad regions and are enriched near
- promoters
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Also get
- \emph on
- median
- \emph default
- peak width and maybe other quantiles (25%, 75%)
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Histone Mark
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- # Peaks
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Mean peak width
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- genome coverage
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- FRiP
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 14965
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 3970
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1.92%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 14.2%
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6163
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2946
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.588%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6.57%
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18139
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18967
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 11.1%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 22.5%
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:peak-calling-summary"
- \end_inset
- Peak-calling summary.
-
- \series default
- For each histone mark, the number of peaks called using SICER at an IDR
- threshold of ???, the mean width of those peaks, the fraction of the genome
- covered by peaks, and the fraction of reads in peaks (FRiP).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:peak-calling-summary"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- gives a summary of the peak calling statistics for each histone mark.
- Consistent with previous observations [CITATION NEEDED], all 3 histone
- marks occur in broad regions spanning many consecutive nucleosomes, rather
- than in sharp peaks as would be expected for a transcription factor or
- other molecule that binds to specific sites.
- This conclusion is further supported by Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-with-blacklist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , in which a clear nucleosome-sized periodicity is visible in the cross-correlat
- ion value for each sample, indicating that each time a given mark is present
- on one histone, it is also likely to be found on adjacent histones as well.
- H3K27me3 enrichment in particular is substantially more broad than either
- H3K4 mark, with a mean peak width of almost 19,000 bp.
- This is also reflected in the periodicity observed in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-with-blacklist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which remains strong much farther out for H3K27me3 than the other marks,
- showing H3K27me3 especially tends to be found on long runs of consecutive
- histones.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Ensure this figure uses the peak calls from the new analysis.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Need a control: shuffle all peaks and repeat, N times.
- Do real vs shuffled control both in a top/bottom arrangement.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider counting TSS inside peaks as negative number indicating how far
-
- \emph on
- inside
- \emph default
- the peak the TSS is (i.e.
- distance to nearest non-peak area).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- The H3K4 part of this figure is included in
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- as Fig.
- S2.
- Do I need to do anything about that?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
- lyxscale 50
- width 80col%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:near-promoter-peak-enrich"
- \end_inset
- Enrichment of peaks in promoter neighborhoods.
-
- \series default
- This plot shows the distribution of distances from each annotated transcription
- start site in the genome to the nearest called peak.
- Each line represents one combination of histone mark, cell type, and time
- point.
- Distributions are smoothed using kernel density estimation [CITE?].
- Transcription start sites that occur
- \emph on
- within
- \emph default
- peaks were excluded from this plot to avoid a large spike at zero that
- would overshadow the rest of the distribution.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="2">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Histone mark
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Effective promoter radius
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1 kb
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1 kb
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2.5 kb
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:effective-promoter-radius"
- \end_inset
- Effective promoter radius for each histone mark.
- \series default
- These values represent the approximate distance from transcription start
- site positions within which an excess of peaks are found, as shown in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Problem: the effective promoter radius concept is an interesting result
- on its own, hence its placement here.
- However, it is also important in the methods section, which comes first.
- What do? Refer forward to this section? Move this section to Methods?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- All 3 histone marks tend to occur more often near promoter regions, as shown
- in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The majority of each density distribution is flat, representing the background
- density of peaks genome-wide.
- Each distribution has a peak near zero, representing an enrichment of peaks
- close transcription start site (TSS) positions relative to the remainder
- of the genome.
- Interestingly, the
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- within which this enrichment occurs is not the same for every histone mark
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:effective-promoter-radius"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For H3K4me2 and H3K4me3, peaks are most enriched within 1
- \begin_inset space ~
- \end_inset
- kbp of TSS positions, while for H3K27me3, enrichment is broader, extending
- to 2.5
- \begin_inset space ~
- \end_inset
- kbp.
- These
- \begin_inset Quotes eld
- \end_inset
- effective promoter radii
- \begin_inset Quotes erd
- \end_inset
- were used to define the promoter regions for all further analyses.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Clarify that radius depends on histone mark but
- \emph on
- not
- \emph default
- experimental condition.
-
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider also showing figure for distance to nearest peak center, and reference
- median peak size once that is known.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 promoter methylation has broadly the expected correlation
- with gene expression
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- This section can easily be cut, especially if I can't find those plots.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- H3K4 is correlated with higher expression, and H3K27 is correlated with
- lower expression genome-wide
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Grr, gotta find these figures.
- Maybe in the old analysis? At least one of these plots is definitely in
- Sarah's paper.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Figures showing these correlations: box/violin plots of expression distributions
- with every combination of peak presence/absence in promoter
- \end_layout
- \begin_layout Itemize
- Appropriate statistical tests showing significant differences in expected
- directions
- \end_layout
- \begin_layout Subsection
- RNA-seq and H3K4 methylation patterns in naive and memory show convergence
- at day 14
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- placement p
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-prom"
- \end_inset
- PCoA plot of H3K4me2 promoters, after subtracting surrogate variables
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-prom"
- \end_inset
- PCoA plot of H3K4me3 promoters, after subtracting surrogate variables
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-prom"
- \end_inset
- PCoA plot of H3K27me3 promoters, after subtracting surrogate variables
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-final-23-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-group"
- \end_inset
- RNA-seq PCoA showing principal coordiantes 2 and 3.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-promoters"
- \end_inset
- PCoA plots for promoter ChIP-seq and expression RNA-seq data
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Check up on figure refs in this paragraph
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the patterns of variation in all 3 histone marks in the promoter
- regions of the genome using principal coordinate analysis.
- All 3 marks show a noticeable convergence between the naive and memory
- samples at day 14, visible as an overlapping of the day 14 groups on each
- plot.
- This is consistent with the counts of significantly differentially modified
- promoters and estimates of the total numbers of differentially modified
- promoters shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Number-signif-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- For all histone marks, evidence of differential modification between naive
- and memory samples was detected at every time point except day 14.
- The day 14 convergence pattern is also present in the RNA-seq data (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-group"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), albiet in the 2nd and 3rd principal coordinates, indicating that it is
- not the most dominant pattern driving gene expression.
- Taken together, the data show that promoter histone methylation for these
- 3 histone marks and RNA expression for naive and memory cells are most
- similar at day 14, the furthest time point after activation.
- MOFA was also able to capture this day 14 convergence pattern in latent
- factor 5 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), which accounts for shared variation across all 3 histone marks and the
- RNA-seq data, confirming that this is a coordinated pattern across all
- 4 data sets.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- This table is placed at the end of the subsection because the landscape
- causes a page break, which is not desired between the subsection header
- and the text.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="6" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Number of significant promoters
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Est.
- differentially modified promoters
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Time Point
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4553
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 927
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9967
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4149
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2404
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 567
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 278
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1570
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4370
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2145
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6598
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 5
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2313
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 139
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 490
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9450
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1148
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4141
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 14
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Number-signif-promoters"
- \end_inset
- Number of differentially modified promoters between naive and memory cells
- at each time point after activation.
-
- \series default
- This table shows both the number of differentially modified promoters detected
- at a 10% FDR threshold (left half), and the total number of differentially
- modified promoters as estimated using the method of
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013"
- literal "false"
- \end_inset
- (right half).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Effect of promoter coverage upstream vs downstream of TSS
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- For the figures in this section, the group labels are arbitrary, so if time
- allows, it would be good to manually reorder them in a logical way, e.g.
- most upstream to most downstream.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
-
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- K-means clustering of promoter H3K4me2 relative coverage depth in naive
- day 0 samples.
-
- \series default
- H3K4me2 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using K-means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two principal components
- were plotted, coloring each point by its K-means cluster identity (b).
- For each cluster, the distribution of gene expression values was plotted
- (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- K-means clustering of promoter H3K27me3 relative coverage depth in naive
- day 0 samples.
-
- \series default
- H3K27me3 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using K-means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two principal components
- were plotted, coloring each point by its K-means cluster identity (b).
- For each cluster, the distribution of gene expression values was plotted
- (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- H3K4me peaks seem to correlate with increased expression as long as they
- are anywhere near the TSS
- \end_layout
- \begin_layout Itemize
- H3K27me3 peaks can have different correlations to gene expression depending
- on their position relative to TSS (e.g.
- upstream vs downstream) Results consistent with
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Young2011"
- literal "false"
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Show the figures where the negative result ended this line of inquiry
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Subsection
- Effective promoter radius
- \end_layout
- \begin_layout Itemize
- "Promoter radius" is not constant and must be defined empirically for a
- given data set.
- Coverage within promoter radius has an expression correlation as well
- \end_layout
- \begin_layout Itemize
- Further study required to demonstarte functional consequences of effective
- promoter radius (e.g.
- show diminished association with gene expression outside radius)
- \end_layout
- \begin_layout Subsection
- Convergence
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/LaMere2016_fig8.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- LaMere 2016 Figure 8, reproduced with permission.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look up some more references for these histone marks being involved in memory
- differentiation.
- (Ask Sarah)
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Naive-to-memory convergence implies that naive cells are differentiating
- into memory cells, and that gene expression and H3K4/K27 methylation are
- involved in this differentiation
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Convergence is consistent with Lamere2016 fig 8
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- (which was created without the benefit of SVA)
- \end_layout
- \begin_layout Itemize
- H3K27me3, canonically regarded as a deactivating mark, seems to have a more
- complex effect
- \end_layout
- \end_deeper
- \begin_layout Subsection
- Positional
- \end_layout
- \begin_layout Itemize
- TSS positional coverage, hints of something interesting but no clear conclusions
- \end_layout
- \begin_layout Subsection
- Workflow
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
- lyxscale 50
- width 100col%
- height 95theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:rulegraph"
- \end_inset
- \series bold
- Dependency graph of steps in reproducible workflow
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Discuss advantages of developing using a reproducible workflow
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Decision-making based on trying every option and running the workflow downstream
- to see the effects
- \end_layout
- \end_deeper
- \begin_layout Subsection
- Data quality issues limit conclusions
- \end_layout
- \begin_layout Chapter
- Improving array-based analyses of transplant rejection by optimizing data
- preprocessing
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Chapter author list: Me, Sunil, Tom, Padma, Dan
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Subsection
- Proper pre-processing is essential for array data
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- This section could probably use some citations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Microarrays, bead arrays, and similar assays produce raw data in the form
- of fluorescence intensity measurements, with the each intensity measurement
- proportional to the abundance of some fluorescently-labelled target DNA
- or RNA sequence that base pairs to a specific probe sequence.
- However, these measurements for each probe are also affected my many technical
- confounding factors, such as the concentration of target material, strength
- of off-target binding, and the sensitivity of the imaging sensor.
- Some array designs also use multiple probe sequences for each target.
- Hence, extensive pre-processing of array data is necessary to normalize
- out the effects of these technical factors and summarize the information
- from multiple probes to arrive at a single usable estimate of abundance
- or other relevant quantity, such as a ratio of two abundances, for each
- target.
- \end_layout
- \begin_layout Standard
- The choice of pre-processing algorithms used in the analysis of an array
- data set can have a large effect on the results of that analysis.
- However, despite their importance, these steps are often neglected or rushed
- in order to get to the more scientifically interesting analysis steps involving
- the actual biology of the system under study.
- Hence, it is often possible to achieve substantial gains in statistical
- power, model goodness-of-fit, or other relevant performance measures, by
- checking the assumptions made by each preprocessing step and choosing specific
- normalization methods tailored to the specific goals of the current analysis.
- \end_layout
- \begin_layout Subsection
- Normalization for clinical microarray classifiers must be single-channel
- \end_layout
- \begin_layout Subsubsection
- Standard normalization methods are unsuitable for clinical application
- \end_layout
- \begin_layout Standard
- As the cost of performing microarray assays falls, there is increasing interest
- in using genomic assays for diagnostic purposes, such as distinguishing
- healthy transplants (TX) from transplants undergoing acute rejection (AR)
- or acute dysfunction with no rejection (ADNR).
- However, the the standard normalization algorithm used for microarray data,
- Robust Multi-chip Average (RMA)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Irizarry2003a"
- literal "false"
- \end_inset
- , is not applicable in a clinical setting.
- Two of the steps in RMA, quantile normalization and probe summarization
- by median polish, depend on every array in the data set being normalized.
- This means that adding or removing any arrays from a data set changes the
- normalized values for all arrays, and data sets that have been normalized
- separately cannot be compared to each other.
- Hence, when using RMA, any arrays to be analyzed together must also be
- normalized together, and the set of arrays included in the data set must
- be held constant throughout an analysis.
- \end_layout
- \begin_layout Standard
- These limitations present serious impediments to the use of arrays as a
- diagnostic tool.
- When training a classifier, the samples to be classified must not be involved
- in any step of the training process, lest their inclusion bias the training
- process.
- Once a classifier is deployed in a clinical setting, the samples to be
- classified will not even
- \emph on
- exist
- \emph default
- at the time of training, so including them would be impossible even if
- it were statistically justifiable.
- Therefore, any machine learning application for microarrays demands that
- the normalized expression values computed for an array must depend only
- on information contained within that array.
- This would ensure that each array's normalization is independent of every
- other array, and that arrays normalized separately can still be compared
- to each other without bias.
- Such a normalization is commonly referred to as
- \begin_inset Quotes eld
- \end_inset
- single-channel normalization
- \begin_inset Quotes erd
- \end_inset
- .
- \end_layout
- \begin_layout Subsubsection
- Several strategies are available to meet clinical normalization requirements
- \end_layout
- \begin_layout Standard
- Frozen RMA (fRMA) addresses these concerns by replacing the quantile normalizati
- on and median polish with alternatives that do not introduce inter-array
- dependence, allowing each array to be normalized independently of all others
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Quantile normalization is performed against a pre-generated set of quantiles
- learned from a collection of 850 publically available arrays sampled from
- a wide variety of tissues in the Gene Expression Omnibus (GEO).
- Each array's probe intensity distribution is normalized against these pre-gener
- ated quantiles.
- The median polish step is replaced with a robust weighted average of probe
- intensities, using inverse variance weights learned from the same public
- GEO data.
- The result is a normalization that satisfies the requirements mentioned
- above: each array is normalized independently of all others, and any two
- normalized arrays can be compared directly to each other.
- \end_layout
- \begin_layout Standard
- One important limitation of fRMA is that it requires a separate reference
- data set from which to learn the parameters (reference quantiles and probe
- weights) that will be used to normalize each array.
- These parameters are specific to a given array platform, and pre-generated
- parameters are only provided for the most common platforms, such as Affymetrix
- hgu133plus2.
- For a less common platform, such as hthgu133pluspm, is is necessary to
- learn custom parameters from in-house data before fRMA can be used to normalize
- samples on that platform
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- One other option is the aptly-named Single Channel Array Normalization (SCAN),
- which adapts a normalization method originally designed for tiling arrays
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Piccolo2012"
- literal "false"
- \end_inset
- .
- SCAN is truly single-channel in that it does not require a set of normalization
- paramters estimated from an external set of reference samples like fRMA
- does.
- \end_layout
- \begin_layout Subsection
- Heteroskedasticity must be accounted for in methylation array data
- \end_layout
- \begin_layout Subsubsection
- Methylation array preprocessing induces heteroskedasticity
- \end_layout
- \begin_layout Standard
- DNA methylation arrays are a relatively new kind of assay that uses microarrays
- to measure the degree of methylation on cytosines in specific regions arrayed
- across the genome.
- First, bisulfite treatment converts all unmethylated cytosines to uracil
- (which then become thymine after amplication) while leaving methylated
- cytosines unaffected.
- Then, each target region is interrogated with two probes: one binds to
- the original genomic sequence and interrogates the level of methylated
- DNA, and the other binds to the same sequence with all cytosines replaced
- by thymidines and interrogates the level of unmethylated DNA.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/sigmoid.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Sigmoid-beta-m-mapping"
- \end_inset
- \series bold
- Sigmoid shape of the mapping between β and M values
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- After normalization, these two probe intensities are summarized in one of
- two ways, each with advantages and disadvantages.
- β
- \series bold
-
- \series default
- values, interpreted as fraction of DNA copies methylated, range from 0 to
- 1.
- β
- \series bold
-
- \series default
- values are conceptually easy to interpret, but the constrained range makes
- them unsuitable for linear modeling, and their error distributions are
- highly non-normal, which also frustrates linear modeling.
- M-values, interpreted as the log ratio of methylated to unmethylated copies,
- are computed by mapping the beta values from
- \begin_inset Formula $[0,1]$
- \end_inset
- onto
- \begin_inset Formula $(-\infty,+\infty)$
- \end_inset
- using a sigmoid curve (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This transformation results in values with better statistical perperties:
- the unconstrained range is suitable for linear modeling, and the error
- distributions are more normal.
- Hence, most linear modeling and other statistical testing on methylation
- arrays is performed using M-values.
- \end_layout
- \begin_layout Standard
- However, the steep slope of the sigmoid transformation near 0 and 1 tends
- to over-exaggerate small differences in β values near those extremes, which
- in turn amplifies the error in those values, leading to a U-shaped trend
- in the mean-variance curve: extreme values have higher variances than values
- near the middle.
- This mean-variance dependency must be accounted for when fitting the linear
- model for differential methylation, or else the variance will be systematically
- overestimated for probes with moderate M-values and underestimated for
- probes with extreme M-values.
- \end_layout
- \begin_layout Subsubsection
- The voom method for RNA-seq data can model M-value heteroskedasticity
- \end_layout
- \begin_layout Standard
- RNA-seq read count data are also known to show heteroskedasticity, and the
- voom method was developed for modeling this heteroskedasticity by estimating
- the mean-variance trend in the data and using this trend to assign precision
- weights to each observation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2013"
- literal "false"
- \end_inset
- .
- While methylation array data are not derived from counts and have a very
- different mean-variance relationship from that of typical RNA-seq data,
- the voom method makes no specific assumptions on the shape of the mean-variance
- relationship - it only assumes that the relationship is smooth enough to
- model using a lowess curve.
- Hence, the method is sufficiently general to model the mean-variance relationsh
- ip in methylation array data.
- However, the standard implementation of voom assumes that the input is
- given in raw read counts, and it must be adapted to run on methylation
- M-values.
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- Evaluation of classifier performance with different normalization methods
- \end_layout
- \begin_layout Standard
- For testing different expression microarray normalizations, a data set of
- 157 hgu133plus2 arrays was used, consisting of blood samples from kidney
- transplant patients whose grafts had been graded as TX, AR, or ADNR via
- biopsy and histology (46 TX, 69 AR, 42 ADNR)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "true"
- \end_inset
- .
- Additionally, an external validation set of 75 samples was gathered from
- public GEO data (37 TX, 38 AR, no ADNR).
-
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Find appropriate GEO identifiers if possible.
- Kurian 2014 says GSE15296, but this seems to be different data.
- I also need to look up the GEO accession for the external validation set.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the effect of each normalization on classifier performance,
- the same classifier training and validation procedure was used after each
- normalization method.
- The PAM package was used to train a nearest shrunken centroid classifier
- on the training set and select the appropriate threshold for centroid shrinking.
- Then the trained classifier was used to predict the class probabilities
- of each validation sample.
- From these class probabilities, ROC curves and area-under-curve (AUC) values
- were generated
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Turck2011"
- literal "false"
- \end_inset
- .
- Each normalization was tested on two different sets of training and validation
- samples.
- For internal validation, the 115 TX and AR arrays in the internal set were
- split at random into two equal sized sets, one for training and one for
- validation, each containing the same numbers of TX and AR samples as the
- other set.
- For external validation, the full set of 115 TX and AR samples were used
- as a training set, and the 75 external TX and AR samples were used as the
- validation set.
- Thus, 2 ROC curves and AUC values were generated for each normalization
- method: one internal and one external.
- Because the external validation set contains no ADNR samples, only classificati
- on of TX and AR samples was considered.
- The ADNR samples were included during normalization but excluded from all
- classifier training and validation.
- This ensures that the performance on internal and external validation sets
- is directly comparable, since both are performing the same task: distinguising
- TX from AR.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Summarize the get.best.threshold algorithm for PAM threshold selection, or
- just put the code online?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Six different normalization strategies were evaluated.
- First, 2 well-known non-single-channel normalization methods were considered:
- RMA and dChip
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2001,Irizarry2003a"
- literal "false"
- \end_inset
- .
- Since RMA produces expression values on a log2 scale and dChip does not,
- the values from dChip were log2 transformed after normalization.
- Next, RMA and dChip followed by Global Rank-invariant Set Normalization
- (GRSN) were tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Pelz2008"
- literal "false"
- \end_inset
- .
- Post-processing with GRSN does not turn RMA or dChip into single-channel
- methods, but it may help mitigate batch effects and is therefore useful
- as a benchmark.
- Lastly, the two single-channel normalization methods, fRMA and SCAN, were
- tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010,Piccolo2012"
- literal "false"
- \end_inset
- .
- When evaluting internal validation performance, only the 157 internal samples
- were normalized; when evaluating external validation performance, all 157
- internal samples and 75 external samples were normalized together.
- \end_layout
- \begin_layout Standard
- For demonstrating the problem with separate normalization of training and
- validation data, one additional normalization was performed: the internal
- and external sets were each normalized separately using RMA, and the normalized
- data for each set were combined into a single set with no further attempts
- at normalizing between the two sets.
- The represents approximately how RMA would have to be used in a clinical
- setting, where the samples to be classified are not available at the time
- the classifier is trained.
- \end_layout
- \begin_layout Subsection
- Generating custom fRMA vectors for hthgu133pluspm array platform
- \end_layout
- \begin_layout Standard
- In order to enable fRMA normalization for the hthgu133pluspm array platform,
- custom fRMA normalization vectors were trained using the frmaTools package
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- Separate vectors were created for two types of samples: kidney graft biopsy
- samples and blood samples from graft recipients.
- For training, a 341 kidney biopsy samples from 2 data sets and 965 blood
- samples from 5 data sets were used as the reference set.
- Arrays were groups into batches based on unique combinations of sample
- type (blood or biopsy), diagnosis (TX, AR, etc.), data set, and scan date.
- Thus, each batch represents arrays of the same kind that were run together
- on the same day.
- For estimating the probe inverse variance weights, frmaTools requires equal-siz
- ed batches, which means a batch size must be chosen, and then batches smaller
- than that size must be ignored, while batches larger than the chosen size
- must be downsampled.
- This downsampling is performed randomly, so the sampling process is repeated
- 5 times and the resulting normalizations are compared to each other.
- \end_layout
- \begin_layout Standard
- To evaluate the consistency of the generated normalization vectors, the
- 5 fRMA vector sets generated from 5 random batch samplings were each used
- to normalize the same 20 randomly selected samples from each tissue.
- Then the normalized expression values for each probe on each array were
- compared across all normalizations.
- Each fRMA normalization was also compared against the normalized expression
- values obtained by normalizing the same 20 samples with ordinary RMA.
- \end_layout
- \begin_layout Subsection
- Modeling methylation array M-value heteroskedasticy in linear models with
- modified voom implementation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Put code on Github and reference it.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate the whether DNA methylation could be used to distinguish
- between healthy and dysfunctional transplants, a data set of 78 Illumina
- 450k methylation arrays from human kidney graft biopsies was analyzed for
- differential metylation between 4 transplant statuses: healthy transplant
- (TX), transplants undergoing acute rejection (AR), acute dysfunction with
- no rejection (ADNR), and chronic allograpft nephropathy (CAN).
- The data consisted of 33 TX, 9 AR, 8 ADNR, and 28 CAN samples.
- The uneven group sizes are a result of taking the biopsy samples before
- the eventual fate of the transplant was known.
- Each sample was additionally annotated with a donor ID (anonymized), Sex,
- Age, Ethnicity, Creatinine Level, and Diabetes diagnosois (all samples
- in this data set came from patients with either Type 1 or Type 2 diabetes).
-
- \end_layout
- \begin_layout Standard
- The intensity data were first normalized using subset-quantile within array
- normalization (SWAN)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Maksimovic2012"
- literal "false"
- \end_inset
- , then converted to intensity ratios (beta values)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Aryee2014"
- literal "false"
- \end_inset
- .
- Any probes binding to loci that overlapped annotated SNPs were dropped,
- and the annotated sex of each sample was verified against the sex inferred
- from the ratio of median probe intensities for the X and Y chromosomes.
- Then, the ratios were transformed to M-values.
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="6">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- random effect
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- eBayes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- weights
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- voom
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Summary-of-meth-analysis"
- \end_inset
- Summary of analysis variants for methylation array data.
-
- \series default
- Each analysis included a different set of steps to adjust or account for
- various systematic features of the data.
- Random effect: The model included a random effect accounting for correlation
- between samples from the same patient
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- ; eBayes: Empirical bayes squeezing of per-probe variances toward the mean-varia
- nce trend
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2015"
- literal "false"
- \end_inset
- ; SVA: Surrogate variable analysis to account for unobserved confounders
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- ; Weights: Estimate sample weights to account for differences in sample
- quality
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liu2015,Ritchie2006"
- literal "false"
- \end_inset
- ; voom: Use mean-variance trend to assign individual sample weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2013"
- literal "false"
- \end_inset
- .
- See the text for a more detailed explanation of each step.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- From the M-values, a series of parallel analyses was performed, each adding
- additional steps into the model fit to accomodate a feature of the data
- (see Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Summary-of-meth-analysis"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For analysis A, a
- \begin_inset Quotes eld
- \end_inset
- basic
- \begin_inset Quotes erd
- \end_inset
- linear modeling analysis was performed, compensating for known confounders
- by including terms for the factor of interest (transplant status) as well
- as the known biological confounders: sex, age, ethnicity, and diabetes.
- Since some samples came from the same patients at different times, the
- intra-patient correlation was modeled as a random effect, estimating a
- shared correlation value across all probes
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- .
- Then the linear model was fit, and the variance was modeled using empirical
- Bayes squeezing toward the mean-variance trend
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2015"
- literal "false"
- \end_inset
- .
- Finally, t-tests or F-tests were performed as appropriate for each test:
- t-tests for single contrasts, and F-tests for multiple contrasts.
- P-values were corrected for multiple testing using the Benjamini-Hochberg
- procedure for FDR control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- For the analysis B, surrogate variable analysis (SVA) was used to infer
- additional unobserved sources of heterogeneity in the data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- .
- These surrogate variables were added to the design matrix before fitting
- the linear model.
- In addition, sample quality weights were estimated from the data and used
- during linear modeling to down-weight the contribution of highly variable
- arrays while increasing the weight to arrays with lower variability
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006"
- literal "false"
- \end_inset
- .
- The remainder of the analysis proceeded as in analysis A.
- For analysis C, the voom method was adapted to run on methylation array
- data and used to model and correct for the mean-variance trend using individual
- observation weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2013"
- literal "false"
- \end_inset
- , which were combined with the sample weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liu2015,Ritchie2006"
- literal "false"
- \end_inset
- .
- Each time weights were used, they were estimated once before estimating
- the random effect correlation value, and then the weights were re-estimated
- taking the random effect into account.
- The remainder of the analysis proceeded as in analysis B.
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Improve subsection titles in this section
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- fRMA eliminates unwanted dependence of classifier training on normalization
- strategy caused by RMA
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Write figure legends
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsubsection
- Separate normalization with RMA introduces unwanted biases in classification
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/predplot.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Classifier-probabilities-RMA"
- \end_inset
- \series bold
- Classifier probabilities on validation samples when normalized with RMA
- together vs.
- separately.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To demonstrate the problem with non-single-channel normalization methods,
- we considered the problem of training a classifier to distinguish TX from
- AR using the samples from the internal set as training data, evaluating
- performance on the external set.
- First, training and evaluation were performed after normalizing all array
- samples together as a single set using RMA, and second, the internal samples
- were normalized separately from the external samples and the training and
- evaluation were repeated.
- For each sample in the validation set, the classifier probabilities from
- both classifiers were plotted against each other (Fig.
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- As expected, separate normalization biases the classifier probabilities,
- resulting in several misclassifications.
- In this case, the bias from separate normalization causes the classifier
- to assign a lower probability of AR to every sample.
-
- \end_layout
- \begin_layout Subsubsection
- fRMA and SCAN achieve maintain classification performance while eliminating
- dependence on normalization strategy
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-internal.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-int"
- \end_inset
- ROC curves for PAM on internal validation data using different normalization
- strategies
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="7" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Normalization
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Single-channel?
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Internal Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- External Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.852
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.713
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.891
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.657
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.816
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.750
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.875
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.642
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- fRMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.863
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.718
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- SCAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.853
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.689
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:AUC-PAM"
- \end_inset
- \series bold
- AUC values for internal and external validation with 6 different normalization
- strategies.
- \series default
- Only fRMA and SCAN are single-channel normalizations.
- The other 4 normalizations are for comparison.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- For internal validation, the 6 methods' AUC values ranged from 0.816 to 0.891,
- as shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Among the non-single-channel normalizations, dChip outperformed RMA, while
- GRSN reduced the AUC values for both dChip and RMA.
- Both single-channel methods, fRMA and SCAN, slightly outperformed RMA,
- with fRMA ahead of SCAN.
- However, the difference between RMA and fRMA is still quite small.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-int"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that the ROC curves for RMA, dChip, and fRMA look very similar and
- relatively smooth, while both GRSN curves and the curve for SCAN have a
- more jagged appearance.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-external.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-ext"
- \end_inset
- ROC curve for PAM on external validation data using different normalization
- strategies
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- For external validation, as expected, all the AUC values are lower than
- the internal validations, ranging from 0.642 to 0.750 (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- With or without GRSN, RMA shows its dominance over dChip in this more challengi
- ng test.
- Unlike in the internal validation, GRSN actually improves the classifier
- performance for RMA, although it does not for dChip.
- Once again, both single-channel methods perform about on par with RMA,
- with fRMA performing slightly better and SCAN performing a bit worse.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-ext"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the ROC curves for the external validation test.
- As expected, none of them are as clean-looking as the internal validation
- ROC curves.
- The curves for RMA, RMA+GRSN, and fRMA all look similar, while the other
- curves look more divergent.
- \end_layout
- \begin_layout Subsection
- fRMA with custom-generated vectors enables normalization on hthgu133pluspm
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_batches.pdf
- lyxscale 50
- height 35theight%
- groupId frmatools-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-batches"
- \end_inset
- \series bold
- Number of batches usable in fRMA probe weight learning as a function of
- batch size.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_samples.pdf
- lyxscale 50
- height 35theight%
- groupId frmatools-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-samples"
- \end_inset
- \series bold
- Number of samples usable in fRMA probe weight learning as a function of
- batch size.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:frmatools-batch-size"
- \end_inset
- Effect of batch size selection on number of batches and number of samples
- included in fRMA probe weight learning.
-
- \series default
- For batch sizes ranging from 3 to 15, the number of batches (a) and samples
- (b) included in probe weight training were plotted for biopsy (BX) and
- blood (PAX) samples.
- The selected batch size, 5, is marked with a dotted vertical line.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In order to enable use of fRMA to normalize hthgu133pluspm, a custom set
- of fRMA vectors was created.
- First, an appropriate batch size was chosen by looking at the number of
- batches and number of samples included as a function of batch size (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:frmatools-batch-size"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For a given batch size, all batches with fewer samples that the chosen
- size must be ignored during training, while larger batches must be randomly
- downsampled to the chosen size.
- Hence, the number of samples included for a given batch size equals the
- batch size times the number of batches with at least that many samples.
- From Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:batch-size-samples"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , it is apparent that that a batch size of 8 maximizes the number of samples
- included in training.
- Increasing the batch size beyond this causes too many smaller batches to
- be excluded, reducing the total number of samples for both tissue types.
- However, a batch size of 8 is not necessarily optimal.
- The article introducing frmaTools concluded that it was highly advantageous
- to use a smaller batch size in order to include more batches, even at the
- expense of including fewer total samples in training
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- To strike an appropriate balance between more batches and more samples,
- a batch size of 5 was chosen.
- For both blood and biopsy samples, this increased the number of batches
- included by 10, with only a modest reduction in the number of samples compared
- to a batch size of 8.
- With a batch size of 5, 26 batches of biopsy samples and 46 batches of
- blood samples were available.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-BX-violin.pdf
- lyxscale 40
- width 45col%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-bx-violin"
- \end_inset
- \series bold
- Violin plot of inter-normalization log ratios for biopsy samples.
-
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-PAX-violin.pdf
- lyxscale 40
- width 45col%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-pax-violin"
- \end_inset
- \series bold
- Violin plot of inter-normalization log ratios for blood samples.
-
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- Violin plot of log ratios between normalizations for 20 biopsy samples.
-
- \series default
- Each of 20 randomly selected samples was normalized with RMA and with 5
- different sets of fRMA vectors.
- The distribution of log ratios between normalized expression values, aggregated
- across all 20 arrays, was plotted for each pair of normalizations.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since fRMA training requires equal-size batches, larger batches are downsampled
- randomly.
- This introduces a nondeterministic step in the generation of normalization
- vectors.
- To show that this randomness does not substantially change the outcome,
- the random downsampling and subsequent vector learning was repeated 5 times,
- with a different random seed each time.
- 20 samples were selected at random as a test set and normalized with each
- of the 5 sets of fRMA normalization vectors as well as ordinary RMA, and
- the normalized expression values were compared across normalizations.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a summary of these comparisons for biopsy samples.
- Comparing RMA to each of the 5 fRMA normalizations, the distribution of
- log ratios is somewhat wide, indicating that the normalizations disagree
- on the expression values of a fair number of probe sets.
- In contrast, comparisons of fRMA against fRMA, the vast mojority of probe
- sets have very small log ratios, indicating a very high agreement between
- the normalized values generated by the two normalizations.
- This shows that the fRMA normalization's behavior is not very sensitive
- to the random downsampling of larger batches during training.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-RMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-rma-frma"
- \end_inset
- \series bold
- RMA vs.
- fRMA for biopsy samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-fRMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-frma-frma"
- \end_inset
- \series bold
- fRMA vs fRMA for biopsy samples.
-
- \series default
- Two different fRMA normalizations using vectors from two different batch
- samplings were compared.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-RMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-rma-frma"
- \end_inset
- \series bold
- RMA vs.
- fRMA for blood samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-fRMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-frma-frma"
- \end_inset
- \series bold
- fRMA vs fRMA for blood samples.
-
- \series default
- Two different fRMA normalizations using vectors from two different batch
- samplings were compared.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Representative-MA-plots"
- \end_inset
- Representative MA plots comparing RMA and custom fRMA normalizations.
-
- \series default
- For each plot, 20 samples were normalized using 2 different normalizations,
- and then averages and log ratios were computed between the two different
- normalizations for every probe.
- Density of points is represented by darkness of shading, and individual
- outlier points are plotted.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows an MA plot of the RMA-normalized values against the fRMA-normalized
- values for the same probe sets and arrays, corresponding to the first row
- of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This MA plot shows that not only is there a wide distribution of M-values,
- but the trend of M-values is dependent on the average normalized intensity.
- This is expected, since the overall trend represents the differences in
- the quantile normalization step.
- When running RMA, only the quantiles for these specific 20 arrays are used,
- while for fRMA the quantile distribution is taking from all arrays used
- in training.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a similar MA plot comparing 2 different fRMA normalizations, correspondin
- g to the 6th row of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The MA plot is very tightly centered around zero with no visible trend.
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-pax-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-PAX-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- show exactly the same information for the blood samples, once again comparing
- the normalized expression values between normalizations for all probe sets
- across 20 randomly selected test arrays.
- Once again, there is a wider distribution of log ratios between RMA-normalized
- values and fRMA-normalized, and a much tighter distribution when comparing
- different fRMA normalizations to each other, indicating that the fRMA training
- process is robust to random batch downsampling for the blood samples as
- well.
- \end_layout
- \begin_layout Subsection
- SVA, voom, and array weights improve model fit for methylation array data
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Fix axis labels:
- \begin_inset Quotes eld
- \end_inset
- log2 M-value
- \begin_inset Quotes erd
- \end_inset
- is redundant because M-values are already log scale
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/meanvar-trends-PAGE1-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-basic"
- \end_inset
- Mean-variance trend for analysis A.
-
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/meanvar-trends-PAGE1-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-aw"
- \end_inset
- Mean-variance trend for analysis B.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/meanvar-trends-PAGE2-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-voomaw"
- \end_inset
- Mean-variance trend after voom modeling in analysis C.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- Mean-variance trend modeling in methylation array data.
-
- \series default
- The log2(standard deviation) for each probe is plotted against the probe's
- average M-value across all samples as a black point, with some transparency
- to make overplotting more visible, since there are about 450,000 points.
- Density of points is also indicated by the dark blue contour lines.
- The prior variance trend estimated by eBayes is shown in light blue, while
- the lowess trend of the points is shown in red.
-
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-basic"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the relationship between the mean M-value and the standard deviation
- calculated for each probe in the methylation array data set.
- A few features of the data are apparent.
- First, the data are very strongly bimodal, with peaks in the density around
- M-values of +4 and -4.
- These modes correspond to methylation sites that are nearly 100% methylated
- and nearly 100% unmethylated, respectively.
- The strong bomodality indicates that a majority of probes interrogate sites
- that fall into one of these two categories.
- The points in between these modes represent sites that are either partially
- methylated in many samples, or are fully methylated in some samples and
- fully unmethylated in other samples, or some combination.
- The next visible feature of the data is the W-shaped variance trend.
- The upticks in the variance trend on either side are expected, based on
- the sigmoid transformation exaggerating small differences at extreme M-values
- (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- However, the uptick in the center is interesting: it indicates that sites
- that are not constitutitively methylated or unmethylated have a higher
- variance.
- This could be a genuine biological effect, or it could be spurious noise
- that is only observable at sites with varying methylation.
- \end_layout
- \begin_layout Standard
- In Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , we see the mean-variance trend for the same methylation array data, this
- time with surrogate variables and sample quality weights estimated from
- the data and included in the model.
- As expected, the overall average variance is smaller, since the surrogate
- variables account for some of the variance.
- In addition, the uptick in variance in the middle of the M-value range
- has disappeared, turning the W shape into a wide U shape.
- This indicates that the excess variance in the probes with intermediate
- M-values was explained by systematic variations not correlated with known
- covariates, and these variations were modeled by the surrogate variables.
- The result is a nearly flat variance trend for the entire intermediate
- M-value range from about -3 to +3.
- In contrast, the excess variance at the extremes was not
- \begin_inset Quotes eld
- \end_inset
- absorbed
- \begin_inset Quotes erd
- \end_inset
- by the surrogate variables and remains in the plot, indicating that this
- variation has no systematic component: probes with extreme M-values are
- uniformly more variable across all samples, as expected.
-
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-voomaw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the mean-variance trend after fitting the model with the observation
- weights assigned by voom based on the mean-variance trend shown in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- As expected, the weights exactly counteract the trend in the data, resulting
- in a nearly flat trend centered vertically at 1 (i.e.
- 0 on the log scale).
- This shows that the observations with extreme M-values have been appropriately
- down-weighted to account for the fact that the noise in those observations
- has been amplified by the non-linear M-value transformation.
- In turn, this gives relatively more weight to observervations in the middle
- region, which are more likely to correspond to probes measuring interesting
- biology (not constitutively methylated or unmethylated).
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="3">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Covariate
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Test used
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- p-value
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Transplant Status
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- F-test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.404
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Diabetes Diagnosis
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- t-test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.00106
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Sex
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- t-test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.148
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Age
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- linear regression
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.212
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:weight-covariate-tests"
- \end_inset
- Association of sample weights with clinical covariates in methylation array
- data.
-
- \series default
- Computed sample quality log weights were tested for significant association
- with each of the variables in the model (1st column).
- An appropriate test was selected for each variable (2nd column).
- P-values for significant association are shown in the 3rd column.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Redo the sample weight boxplot with notches and without fill colors (and
- update the legend)
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/sample-weights-PAGE3-CROP.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:diabetes-sample-weights"
- \end_inset
- \series bold
- Boxplot of sample quality weights grouped by diabetes diagnosis.
-
- \series default
- Sample were grouped based on diabetes diagnosis, and the distribution of
- sample quality weights for each diagnosis was plotted.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To determine whether any of the known experimental factors had an impact
- on data quality, the sample quality weights estimated from the data were
- tested for association with each of the experimental factors (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:weight-covariate-tests"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Diabetes diagnosis was found to have a potentially significant association
- with the sample weights, with a t-test p-value of
- \begin_inset Formula $1.06\times10^{-3}$
- \end_inset
- .
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:diabetes-sample-weights"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the distribution of sample weights grouped by diabetes diagnosis.
- The samples from patients with Type 2 diabetes were assigned significantly
- lower weights than those from patients with Type 1 diabetes.
- This indicates that the type 2 diabetes samples had an overall higher variance
- on average across all probes.
-
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider transposing these tables
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 22
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 7
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 338
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 369
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 278
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-num-signif"
- \end_inset
- Number of probes significant at 10% FDR.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 10,063
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 11,225
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 27
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 12,674
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 13,086
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 966
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,039
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,955
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-est-nonnull"
- \end_inset
- Estimated number of non-null tests, using the method of
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013"
- literal "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- Estimates of degree of differential methylation in for each contrast in
- each analysis.
-
- \series default
- For each of the analyses in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Summary-of-meth-analysis"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , these tables show the number of probes called significantly differentially
- methylated at a threshold of 10% FDR for each comparison between TX and
- the other 3 transplant statuses (
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-num-signif"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) and the estimated total number of probes that are differentially methylated
- (
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meth-p-value-histograms"
- \end_inset
- Probe p-value histograms for each contrast in each analysis.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-num-signif"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the number of significantly differentially methylated probes reported
- by each analysis for each comparison of interest at an FDR of 10%.
- As expected, the more elaborate analyses, B and C, report more significant
- probes than the more basic analysis A, consistent with the conclusions
- above that the data contain hidden systematic variations that must be modeled.
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the estimated number differentially methylated probes for each test
- from each analysis.
- This was computed by estimating the proportion of null hypotheses that
- were true using the method of
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013"
- literal "false"
- \end_inset
- and subtracting that fraction from the total number of probes, yielding
- an estimate of the number of null hypotheses that are false based on the
- distribution of p-values across the entire dataset.
- Note that this does not identify which null hypotheses should be rejected
- (i.e.
- which probes are significant); it only estimates the true number of such
- probes.
- Once again, analyses B and C result it much larger estimates for the number
- of differentially methylated probes.
- In this case, analysis C, the only analysis that includes voom, estimates
- the largest number of differentially methylated probes for all 3 contrasts.
- If the assumptions of all the methods employed hold, then this represents
- a gain in statistical power over the simpler analysis A.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meth-p-value-histograms"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the p-value distributions for each test, from which the numbers in
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- were generated.
- The distributions for analysis A all have a dip in density near zero, which
- is a strong sign of a poor model fit.
- The histograms for analyses B and C are more well-behaved, with a uniform
- component stretching all the way from 0 to 1 representing the probes for
- which the null hypotheses is true (no differential methylation), and a
- zero-biased component representing the probes for which the null hypothesis
- is false (differentially methylated).
- These histograms do not indicate any major issues with the model fit.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Maybe include the PCA plots before/after SVA effect subtraction?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Subsection
- fRMA achieves clinically applicable normalization without sacrificing classifica
- tion performance
- \end_layout
- \begin_layout Standard
- As shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , improper normalization, particularly separate normalization of training
- and test samples, leads to unwanted biases in classification.
- In a controlled experimental context, it is always possible to correct
- this issue by normalizing all experimental samples together.
- However, because it is not feasible to normalize all samples together in
- a clinical context, a single-channel normalization is required is required.
-
- \end_layout
- \begin_layout Standard
- The major concern in using a single-channel normalization is that non-single-cha
- nnel methods can share information between arrays to improve the normalization,
- and single-channel methods risk sacrificing the gains in normalization
- accuracy that come from this information sharing.
- In the case of RMA, this information sharing is accomplished through quantile
- normalization and median polish steps.
- The need for information sharing in quantile normalization can easily be
- removed by learning a fixed set of quantiles from external data and normalizing
- each array to these fixed quantiles, instead of the quantiles of the data
- itself.
- As long as the fixed quantiles are reasonable, the result will be similar
- to standard RMA.
- However, there is no analogous way to eliminate cross-array information
- sharing in the median polish step, so fRMA replaces this with a weighted
- average of probes on each array, with the weights learned from external
- data.
- This step of fRMA has the greatest potential to diverge from RMA un undesirable
- ways.
- \end_layout
- \begin_layout Standard
- However, when run on real data, fRMA performed at least as well as RMA in
- both the internal validation and external validation tests.
- This shows that fRMA can be used to normalize individual clinical samples
- in a class prediction context without sacrificing the classifier performance
- that would be obtained by using the more well-established RMA for normalization.
- The other single-channel normalization method considered, SCAN, showed
- some loss of AUC in the external validation test.
- Based on these results, fRMA is the preferred normalization for clinical
- samples in a class prediction context.
- \end_layout
- \begin_layout Subsection
- Robust fRMA vectors can be generated for new array platforms
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look up the exact numbers, do a find & replace for
- \begin_inset Quotes eld
- \end_inset
- 850
- \begin_inset Quotes erd
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The published fRMA normalization vectors for the hgu133plus2 platform were
- generated from a set of about 850 samples chosen from a wide range of tissues,
- which the authors determined was sufficient to generate a robust set of
- normalization vectors that could be applied across all tissues
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Since we only had hthgu133pluspm for 2 tissues of interest, our needs were
- more modest.
- Even using only 130 samples in 26 batches of 5 samples each for kidney
- biopsies, we were able to train a robust set of fRMA normalization vectors
- that were not meaningfully affected by the random selection of 5 samples
- from each batch.
- As expected, the training process was just as robust for the blood samples
- with 230 samples in 46 batches of 5 samples each.
- Because these vectors were each generated using training samples from a
- single tissue, they are not suitable for general use, unlike the vectors
- provided with fRMA itself.
- They are purpose-built for normalizing a specific type of sample on a specific
- platform.
- This is a mostly acceptable limitation in the context of developing a machine
- learning classifier for diagnosing a disease based on samples of a specific
- tissue.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- How to bring up that these custom vectors were used in another project by
- someone else that was never published?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Methylation array data can be successfully analyzed using existing techniques,
- but machine learning poses additional challenges
- \end_layout
- \begin_layout Standard
- Both analysis strategies B and C both yield a reasonable analysis, with
- a mean-variance trend that matches the expected behavior for the non-linear
- M-value transformation (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) and well-behaved p-value distributions (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meth-p-value-histograms"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- These two analyses also yield similar numbers of significant probes (Table
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-num-signif"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) and similar estimates of the number of differentially methylated probes
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The main difference between these two analyses is the method used to account
- for the mean-variance trend.
- In analysis B, the trend is estimated and applied at the probe level: each
- probe's estimated variance is squeezed toward the trend using an empirical
- Bayes procedure (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- In analysis C, the trend is still estimated at the probe level, but instead
- of estimating a single variance value shared across all observations for
- a given probe, the voom method computes an initial estiamte of the variance
- for each observation individually based on where its model-fitted M-value
- falls on the trend line and then assigns inverse-variance weights to model
- the difference in variance between observations.
- An overall variance is still estimated for each probe using the same empirical
- Bayes method, but now the residual trend is flat (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-voomaw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), and the mean-variance trend is modeled by scaling the probe's estimated
- variance for each observation using the weights computed by voom.
- The difference between these two methods is analogous to the difference
- between a t-test with equal variance and a t-test with unequal variance,
- except that the unequal group variances used in the latter test are estimated
- based on the mean-variance trend from all the probes rather than the data
- for the specific probe being tested, thus stabilizing the group variance
- estimates by sharing information between probes.
- In practice, allowing voom to model the variance using observation weights
- in this manner allows the linear model fit to concentrate statistical power
- where it will do the most good.
- For example, if a particular probe's M-values are always at the extreme
- of the M-value range (e.g.
- less than -4) for ADNR samples, but the M-values for that probe in TX and
- CAN samples are within the flat region of the mean-variance trend (between
- -3 and +3), voom is able to down-weight the contribution of the high-variance
- M-values from the ADNR samples in order to gain more statistical power
- while testing for differential methylation between TX and CAN.
- In contrast, modeling the mean-variance trend only at the probe level would
- combine the high-variance ADNR samples and lower-variance samples from
- other conditions and estimate an intermediate variance for this probe.
- In practice, analysis B shows that this approach is adequate, but the voom
- approach in analysis C is at least as good on all model fit criteria and
- yields a larger estimate for the number of differentially methylated genes.
- \end_layout
- \begin_layout Standard
- The significant association of diebetes diagnosis with sample quality is
- interesting.
- The samples with Type 2 diabetes tended to have more variation, averaged
- across all probes, than those with Type 1 diabetes.
- This is consistent with the consensus that type 2 disbetes and the associated
- metabolic syndrome represent a broad dysregulation of the body's endocrine
- signalling related to metabolism [citation needed].
- This dysregulation could easily manifest as a greater degree of variation
- in the DNA methylation patterns of affected tissues.
- In contrast, Type 1 disbetes has a more specific cause and effect, so a
- less variable methylation signature is expected.
- \end_layout
- \begin_layout Standard
- This preliminary anlaysis suggests that some degree of differential methylation
- exists between TX and each of the three types of transplant disfunction
- studied.
- Hence, it may be feasible to train a classifier to diagnose transplant
- disfunction from DNA methylation array data.
- However, the major importance of both SVA and sample quality weighting
- for proper modeling of this data poses significant challenges for any attempt
- at a machine learning on data of similar quality.
- While these are easily used in a modeling context with full sample information,
- neither of these methods is directly applicable in a machine learning context,
- where the diagnosis is not known ahead of time.
- If a machine learning approach for methylation-based diagnosis is to be
- pursued, it will either require machine-learning-friendly methods to address
- the same systematic trends in the data that SVA and sample quality weighting
- address, or it will require higher quality data with substantially less
- systematic perturbation of the data.
- \end_layout
- \begin_layout Chapter
- Globin-blocking for more effective blood RNA-seq analysis in primate animal
- model
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Choose between above and the paper title: Optimizing yield of deep RNA sequencin
- g for gene expression profiling by globin reduction of peripheral blood
- samples from cynomolgus monkeys (Macaca fascicularis).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Chapter author list: https://tex.stackexchange.com/questions/156862/displaying-aut
- hor-for-each-chapter-in-book Every chapter gets an author list, which may
- or may not be part of a citation to a published/preprinted paper.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Preprint then cite the paper
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section*
- Abstract
- \end_layout
- \begin_layout Paragraph
- Background
- \end_layout
- \begin_layout Standard
- Primate blood contains high concentrations of globin messenger RNA.
- Globin reduction is a standard technique used to improve the expression
- results obtained by DNA microarrays on RNA from blood samples.
- However, with whole transcriptome RNA-sequencing (RNA-seq) quickly replacing
- microarrays for many applications, the impact of globin reduction for RNA-seq
- has not been previously studied.
- Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
- primates.
-
- \end_layout
- \begin_layout Paragraph
- Results
- \end_layout
- \begin_layout Standard
- Here we report a protocol for RNA-seq in primate blood samples that uses
- complimentary oligonucleotides to block reverse transcription of the alpha
- and beta globin genes.
- In test samples from cynomolgus monkeys (Macaca fascicularis), this globin
- blocking protocol approximately doubles the yield of informative (non-globin)
- reads by greatly reducing the fraction of globin reads, while also improving
- the consistency in sequencing depth between samples.
- The increased yield enables detection of about 2000 more genes, significantly
- increases the correlation in measured gene expression levels between samples,
- and increases the sensitivity of differential gene expression tests.
- \end_layout
- \begin_layout Paragraph
- Conclusions
- \end_layout
- \begin_layout Standard
- These results show that globin blocking significantly improves the cost-effectiv
- eness of mRNA sequencing in primate blood samples by doubling the yield
- of useful reads, allowing detection of more genes, and improving the precision
- of gene expression measurements.
- Based on these results, a globin reducing or blocking protocol is recommended
- for all RNA-seq studies of primate blood samples.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Consider putting some of this in the Intro chapter
- \end_layout
- \begin_layout Itemize
- Cynomolgus monkeys as a model organism
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Highly related to humans
- \end_layout
- \begin_layout Itemize
- Small size and short life cycle - good research animal
- \end_layout
- \begin_layout Itemize
- Genomics resources still in development
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Inadequacy of existing blood RNA-seq protocols
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Existing protocols use a separate globin pulldown step, slowing down processing
- \end_layout
- \end_deeper
- \end_inset
- \end_layout
- \begin_layout Standard
- Increasingly, researchers are turning to high-throughput mRNA sequencing
- technologies (RNA-seq) in preference to expression microarrays for analysis
- of gene expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mutz2012"
- literal "false"
- \end_inset
- .
- The advantages are even greater for study of model organisms with no well-estab
- lished array platforms available, such as the cynomolgus monkey (Macaca
- fascicularis).
- High fractions of globin mRNA are naturally present in mammalian peripheral
- blood samples (up to 70% of total mRNA) and these are known to interfere
- with the results of array-based expression profiling
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- The importance of globin reduction for RNA-seq of blood has only been evaluated
- for a deepSAGE protocol on human samples
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- In the present report, we evaluated globin reduction using custom blocking
- oligonucleotides for deep RNA-seq of peripheral blood samples from a nonhuman
- primate, cynomolgus monkey, using the Illumina technology platform.
- We demonstrate that globin reduction significantly improves the cost-effectiven
- ess of RNA-seq in blood samples.
- Thus, our protocol offers a significant advantage to any investigator planning
- to use RNA-seq for gene expression profiling of nonhuman primate blood
- samples.
- Our method can be generally applied to any species by designing complementary
- oligonucleotide blocking probes to the globin gene sequences of that species.
- Indeed, any highly expressed but biologically uninformative transcripts
- can also be blocked to further increase sequencing efficiency and value
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Arnaud2016"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- Sample collection
- \end_layout
- \begin_layout Standard
- All research reported here was done under IACUC-approved protocols at the
- University of Miami and complied with all applicable federal and state
- regulations and ethical principles for nonhuman primate research.
- Blood draws occurred between 16 April 2012 and 18 June 2015.
- The experimental system involved intrahepatic pancreatic islet transplantation
- into Cynomolgus monkeys with induced diabetes mellitus with or without
- concomitant infusion of mesenchymal stem cells.
- Blood was collected at serial time points before and after transplantation
- into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
- precise volume:volume ratio of 2.5 ml whole blood into 6.9 ml of PAX gene
- additive.
- \end_layout
- \begin_layout Subsection
- Globin Blocking
- \end_layout
- \begin_layout Standard
- Four oligonucleotides were designed to hybridize to the 3’ end of the transcript
- s for Cynomolgus HBA1, HBA2 and HBB, with two hybridization sites for HBB
- and 2 sites for HBA (the chosen sites were identical in both HBA genes).
- All oligos were purchased from Sigma and were entirely composed of 2’O-Me
- bases with a C3 spacer positioned at the 3’ ends to prevent any polymerase
- mediated primer extension.
- \end_layout
- \begin_layout Quote
- HBA1/2 site 1: GCCCACUCAGACUUUAUUCAAAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBA1/2 site 2: GGUGCAAGGAGGGGAGGAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBB site 1: AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBB site 2: CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
- \end_layout
- \begin_layout Subsection
- RNA-seq Library Preparation
- \end_layout
- \begin_layout Standard
- Sequencing libraries were prepared with 200ng total RNA from each sample.
- Polyadenylated mRNA was selected from 200 ng aliquots of cynomologus blood-deri
- ved total RNA using Ambion Dynabeads Oligo(dT)25 beads (Invitrogen) following
- manufacturer’s recommended protocol.
- PolyA selected RNA was then combined with 8 pmol of HBA1/2 (site 1), 8
- pmol of HBA1/2 (site 2), 12 pmol of HBB (site 1) and 12 pmol of HBB (site
- 2) oligonucleotides.
- In addition, 20 pmol of RT primer containing a portion of the Illumina
- adapter sequence (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV)
- and 4 µL of 5X First Strand buffer (250 mM Tris-HCl pH 8.3, 375 mM KCl,
- 15mM MgCl2) were added in a total volume of 15 µL.
- The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
- then placed on ice.
- This was followed by the addition of 2 µL 0.1 M DTT, 1 µL RNaseOUT, 1 µL
- 10mM dNTPs 10% biotin-16 aminoallyl-2’- dUTP and 10% biotin-16 aminoallyl-2’-
- dCTP (TriLink Biotech, San Diego, CA), 1 µL Superscript II (200U/ µL, Thermo-Fi
- sher).
- A second “unblocked” library was prepared in the same way for each sample
- but replacing the blocking oligos with an equivalent volume of water.
- The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
- followed by incubation at 75°C for 10 minutes to inactivate the reverse
- transcriptase.
- \end_layout
- \begin_layout Standard
- The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
- ) following supplier’s recommended protocol.
- The cDNA/RNA hybrid was eluted in 25 µL of 10 mM Tris-HCl pH 8.0, and then
- bound to 25 µL of M280 Magnetic Streptavidin beads washed per recommended
- protocol (Thermo-Fisher).
- After 30 minutes of binding, beads were washed one time in 100 µL 0.1N NaOH
- to denature and remove the bound RNA, followed by two 100 µL washes with
- 1X TE buffer.
- \end_layout
- \begin_layout Standard
- Subsequent attachment of the 5-prime Illumina A adapter was performed by
- on-bead random primer extension of the following sequence (A-N8 primer:
- TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN).
- Briefly, beads were resuspended in a 20 µL reaction containing 5 µM A-N8
- primer, 40mM Tris-HCl pH 7.5, 20mM MgCl2, 50mM NaCl, 0.325U/µL Sequenase
- 2.0 (Affymetrix, Santa Clara, CA), 0.0025U/µL inorganic pyrophosphatase (Affymetr
- ix) and 300 µM each dNTP.
- Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
- times with 1X TE buffer (200µL).
- \end_layout
- \begin_layout Standard
- The magnetic streptavidin beads were resuspended in 34 µL nuclease-free
- water and added directly to a PCR tube.
- The two Illumina protocol-specified PCR primers were added at 0.53 µM (Illumina
- TruSeq Universal Primer 1 and Illumina TruSeq barcoded PCR primer 2), along
- with 40 µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycl
- ed as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
- 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
- \end_layout
- \begin_layout Standard
- PCR products were purified with 1X Ampure Beads following manufacturer’s
- recommended protocol.
- Libraries were then analyzed using the Agilent TapeStation and quantitation
- of desired size range was performed by “smear analysis”.
- Samples were pooled in equimolar batches of 16 samples.
- Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
- Gels; Thermo-Fisher).
- Products were cut between 250 and 350 bp (corresponding to insert sizes
- of 130 to 230 bps).
- Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
- t with 75 base read lengths.
-
- \end_layout
- \begin_layout Subsection
- Read alignment and counting
- \end_layout
- \begin_layout Standard
- Reads were aligned to the cynomolgus genome using STAR
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Dobin2013,Wilson2013"
- literal "false"
- \end_inset
- .
- Counts of uniquely mapped reads were obtained for every gene in each sample
- with the “featureCounts” function from the Rsubread package, using each
- of the three possibilities for the “strandSpecific” option: sense, antisense,
- and unstranded
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liao2014"
- literal "false"
- \end_inset
- .
- A few artifacts in the cynomolgus genome annotation complicated read counting.
- First, no ortholog is annotated for alpha globin in the cynomolgus genome,
- presumably because the human genome has two alpha globin genes with nearly
- identical sequences, making the orthology relationship ambiguous.
- However, two loci in the cynomolgus genome are as “hemoglobin subunit alpha-lik
- e” (LOC102136192 and LOC102136846).
- LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
- as protein-coding.
- Our globin reduction protocol was designed to include blocking of these
- two genes.
- Indeed, these two genes have almost the same read counts in each library
- as the properly-annotated HBB gene and much larger counts than any other
- gene in the unblocked libraries, giving confidence that reads derived from
- the real alpha globin are mapping to both genes.
- Thus, reads from both of these loci were counted as alpha globin reads
- in all further analyses.
- The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
- 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
- If counting is not performed in stranded mode (or if a non-strand-specific
- sequencing protocol is used), many reads mapping to the globin gene will
- be discarded as ambiguous due to their overlap with this ncRNA gene, resulting
- in significant undercounting of globin reads.
- Therefore, stranded sense counts were used for all further analysis in
- the present study to insure that we accurately accounted for globin transcript
- reduction.
- However, we note that stranded reads are not necessary for RNA-seq using
- our protocol in standard practice.
-
- \end_layout
- \begin_layout Subsection
- Normalization and Exploratory Data Analysis
- \end_layout
- \begin_layout Standard
- Libraries were normalized by computing scaling factors using the edgeR package’s
- Trimmed Mean of M-values method
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010"
- literal "false"
- \end_inset
- .
- Log2 counts per million values (logCPM) were calculated using the cpm function
- in edgeR for individual samples and aveLogCPM function for averages across
- groups of samples, using those functions’ default prior count values to
- avoid taking the logarithm of 0.
- Genes were considered “present” if their average normalized logCPM values
- across all libraries were at least -1.
- Normalizing for gene length was unnecessary because the sequencing protocol
- is 3’-biased and hence the expected read count for each gene is related
- to the transcript’s copy number but not its length.
- \end_layout
- \begin_layout Standard
- In order to assess the effect of blocking on reproducibility, Pearson and
- Spearman correlation coefficients were computed between the logCPM values
- for every pair of libraries within the globin-blocked (GB) and unblocked
- (non-GB) groups, and edgeR's “estimateDisp” function was used to compute
- negative binomial dispersions separately for the two groups
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Chen2014"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Differential Expression Analysis
- \end_layout
- \begin_layout Standard
- All tests for differential gene expression were performed using edgeR, by
- first fitting a negative binomial generalized linear model to the counts
- and normalization factors and then performing a quasi-likelihood F-test
- with robust estimation of outlier gene dispersions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lund2012,Phipson2016"
- literal "false"
- \end_inset
- .
- To investigate the effects of globin blocking on each gene, an additive
- model was fit to the full data with coefficients for globin blocking and
- SampleID.
- To test the effect of globin blocking on detection of differentially expressed
- genes, the GB samples and non-GB samples were each analyzed independently
- as follows: for each animal with both a pre-transplant and a post-transplant
- time point in the data set, the pre-transplant sample and the earliest
- post-transplant sample were selected, and all others were excluded, yielding
- a pre-/post-transplant pair of samples for each animal (N=7 animals with
- paired samples).
- These samples were analyzed for pre-transplant vs.
- post-transplant differential gene expression while controlling for inter-animal
- variation using an additive model with coefficients for transplant and
- animal ID.
- In all analyses, p-values were adjusted using the Benjamini-Hochberg procedure
- for FDR control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Itemize
- New blood RNA-seq protocol to block reverse transcription of globin genes
- \end_layout
- \begin_layout Itemize
- Blood RNA-seq time course after transplants with/without MSC infusion
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Subsection
- Globin blocking yields a larger and more consistent fraction of useful reads
- \end_layout
- \begin_layout Standard
- The objective of the present study was to validate a new protocol for deep
- RNA-seq of whole blood drawn into PaxGene tubes from cynomolgus monkeys
- undergoing islet transplantation, with particular focus on minimizing the
- loss of useful sequencing space to uninformative globin reads.
- The details of the analysis with respect to transplant outcomes and the
- impact of mesenchymal stem cell treatment will be reported in a separate
- manuscript (in preparation).
- To focus on the efficacy of our globin blocking protocol, 37 blood samples,
- 16 from pre-transplant and 21 from post-transplant time points, were each
- prepped once with and once without globin blocking oligos, and were then
- sequenced on an Illumina NextSeq500 instrument.
- The number of reads aligning to each gene in the cynomolgus genome was
- counted.
- Table 1 summarizes the distribution of read fractions among the GB and
- non-GB libraries.
- In the libraries with no globin blocking, globin reads made up an average
- of 44.6% of total input reads, while reads assigned to all other genes made
- up an average of 26.3%.
- The remaining reads either aligned to intergenic regions (that include
- long non-coding RNAs) or did not align with any annotated transcripts in
- the current build of the cynomolgus genome.
- In the GB libraries, globin reads made up only 3.48% and reads assigned
- to all other genes increased to 50.4%.
- Thus, globin blocking resulted in a 92.2% reduction in globin reads and
- a 91.6% increase in yield of useful non-globin reads.
- \end_layout
- \begin_layout Standard
- This reduction is not quite as efficient as the previous analysis showed
- for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- Nonetheless, this degree of globin reduction is sufficient to nearly double
- the yield of useful reads.
- Thus, globin blocking cuts the required sequencing effort (and costs) to
- achieve a target coverage depth by almost 50%.
- Consistent with this near doubling of yield, the average difference in
- un-normalized logCPM across all genes between the GB libraries and non-GB
- libraries is approximately 1 (mean = 1.01, median = 1.08), an overall 2-fold
- increase.
- Un-normalized values are used here because the TMM normalization correctly
- identifies this 2-fold difference as biologically irrelevant and removes
- it.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure1 - globin-fractions.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without globin blocking (GB).
-
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Fraction-of-genic-reads"
- \end_inset
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without globin blocking (GB).
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- For each sample, counts were summed separately for all globin genes and
- for the remainder of the genes (non-globin genes), and the fraction of
- genic reads aligned to non-globin genes was computed.
- Each point represents an individual sample.
- Gray + signs indicate the means for globin-blocked libraries and unblocked
- libraries.
- The overall distribution for each group is represented as a notched box
- plots.
- Points are randomly spread vertically to avoid excessive overlapping.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- placement p
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Total Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Aligned Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 50.4% ± 6.82
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 3.48% ± 2.94
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 53.9% ± 6.81
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 89.7% ± 2.40
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 93.5% ± 5.25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 6.49% ± 5.25
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 26.3% ± 8.95
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 44.6% ± 16.6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 70.1% ± 9.38
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 90.7% ± 5.16
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 38.8% ± 17.1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 61.2% ± 17.1
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fractions of reads mapping to genomic features in GB and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Fractions-of-reads"
- \end_inset
- Fractions of reads mapping to genomic features in GB and non-GB samples.
-
- \series default
- All values are given as mean ± standard deviation.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Another important aspect is that the standard deviations in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Fractions-of-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- are uniformly smaller in the GB samples than the non-GB ones, indicating
- much greater consistency of yield.
- This is best seen in the percentage of non-globin reads as a fraction of
- total reads aligned to annotated genes (genic reads).
- For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
- the GB samples it ranges from 81.9% to 99.9% (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Fraction-of-genic-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This means that for applications where it is critical that each sample
- achieve a specified minimum coverage in order to provide useful information,
- it would be necessary to budget up to 10 times the sequencing depth per
- sample without globin blocking, even though the average yield improvement
- for globin blocking is only 2-fold, because every sample has a chance of
- being 90% globin and 10% useful reads.
- Hence, the more consistent behavior of GB samples makes planning an experiment
- easier and more efficient because it eliminates the need to over-sequence
- every sample in order to guard against the worst case of a high-globin
- fraction.
- \end_layout
- \begin_layout Subsection
- Globin blocking lowers the noise floor and allows detection of about 2000
- more genes
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Remove redundant titles from figures
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure2 - aveLogCPM-colored.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Distributions of average group gene abundances when normalized separately
- or together.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:logcpm-dists"
- \end_inset
- Distributions of average group gene abundances when normalized separately
- or together.
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- Genes with zero counts in all libraries were discarded.
- Libraries were normalized using the TMM method.
- Libraries were split into globin-blocked (GB) and non-GB groups and the
- average abundance for each gene in both groups, measured in log2 counts
- per million reads counted, was computed using the aveLogCPM function.
- The distribution of average gene logCPM values was plotted for both groups
- using a kernel density plot to approximate a continuous distribution.
- The logCPM GB distributions are marked in red, non-GB in blue.
- The black vertical line denotes the chosen detection threshold of -1.
- Top panel: Libraries were split into GB and non-GB groups first and normalized
- separately.
- Bottom panel: Libraries were all normalized together first and then split
- into groups.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since globin blocking yields more usable sequencing depth, it should also
- allow detection of more genes at any given threshold.
- When we looked at the distribution of average normalized logCPM values
- across all libraries for genes with at least one read assigned to them,
- we observed the expected bimodal distribution, with a high-abundance "signal"
- peak representing detected genes and a low-abundance "noise" peak representing
- genes whose read count did not rise above the noise floor (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Consistent with the 2-fold increase in raw counts assigned to non-globin
- genes, the signal peak for GB samples is shifted to the right relative
- to the non-GB signal peak.
- When all the samples are normalized together, this difference is normalized
- out, lining up the signal peaks, and this reveals that, as expected, the
- noise floor for the GB samples is about 2-fold lower.
- This greater separation between signal and noise peaks in the GB samples
- means that low-expression genes should be more easily detected and more
- precisely quantified than in the non-GB samples.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure3 - detection.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Gene detections as a function of abundance thresholds in globin-blocked
- (GB) and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Gene-detections"
- \end_inset
- Gene detections as a function of abundance thresholds in globin-blocked
- (GB) and non-GB samples.
- \series default
- Average abundance (logCPM,
- \begin_inset Formula $\log_{2}$
- \end_inset
- counts per million reads counted) was computed by separate group normalization
- as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- for both the GB and non-GB groups, as well as for all samples considered
- as one large group.
- For each every integer threshold from -2 to 3, the number of genes detected
- at or above that logCPM threshold was plotted for each group.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Based on these distributions, we selected a detection threshold of -1, which
- is approximately the leftmost edge of the trough between the signal and
- noise peaks.
- This represents the most liberal possible detection threshold that doesn't
- call substantial numbers of noise genes as detected.
- Among the full dataset, 13429 genes were detected at this threshold, and
- 22276 were not.
- When considering the GB libraries and non-GB libraries separately and re-comput
- ing normalization factors independently within each group, 14535 genes were
- detected in the GB libraries while only 12460 were detected in the non-GB
- libraries.
- Thus, GB allowed the detection of 2000 extra genes that were buried under
- the noise floor without GB.
- This pattern of at least 2000 additional genes detected with GB was also
- consistent across a wide range of possible detection thresholds, from -2
- to 3 (see Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Gene-detections"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \begin_layout Subsection
- Globin blocking does not add significant additional noise or decrease sample
- quality
- \end_layout
- \begin_layout Standard
- One potential worry is that the globin blocking protocol could perturb the
- levels of non-globin genes.
- There are two kinds of possible perturbations: systematic and random.
- The former is not a major concern for detection of differential expression,
- since a 2-fold change in every sample has no effect on the relative fold
- change between samples.
- In contrast, random perturbations would increase the noise and obscure
- the signal in the dataset, reducing the capacity to detect differential
- expression.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure4 - maplot-colored.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- MA plot showing effects of globin blocking on each gene's abundance.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-plot"
- \end_inset
- \series bold
- MA plot showing effects of globin blocking on each gene's abundance.
-
- \series default
- All libraries were normalized together as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and genes with an average logCPM below -1 were filtered out.
- Each remaining gene was tested for differential abundance with respect
- to globin blocking (GB) using edgeR’s quasi-likelihod F-test, fitting a
- negative binomial generalized linear model to table of read counts in each
- library.
- For each gene, edgeR reported average abundance (logCPM),
- \begin_inset Formula $\log_{2}$
- \end_inset
- fold change (logFC), p-value, and Benjamini-Hochberg adjusted false discovery
- rate (FDR).
- Each gene's logFC was plotted against its logCPM, colored by FDR.
- Red points are significant at ≤10% FDR, and blue are not significant at
- that threshold.
- The alpha and beta globin genes targeted for blocking are marked with large
- triangles, while all other genes are represented as small points.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Standardize on
- \begin_inset Quotes eld
- \end_inset
- log2
- \begin_inset Quotes erd
- \end_inset
- notation
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The data do indeed show small systematic perturbations in gene levels (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-plot"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Other than the 3 designated alpha and beta globin genes, two other genes
- stand out as having especially large negative log fold changes: HBD and
- LOC1021365.
- HBD, delta globin, is most likely targeted by the blocking oligos due to
- high sequence homology with the other globin genes.
- LOC1021365 is the aforementioned ncRNA that is reverse-complementary to
- one of the alpha-like genes and that would be expected to be removed during
- the globin blocking step.
- All other genes appear in a cluster centered vertically at 0, and the vast
- majority of genes in this cluster show an absolute log2(FC) of 0.5 or less.
- Nevertheless, many of these small perturbations are still statistically
- significant, indicating that the globin blocking oligos likely cause very
- small but non-zero systematic perturbations in measured gene expression
- levels.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure5 - corrplot.pdf
- lyxscale 50
- width 50col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Comparison of inter-sample gene abundance correlations with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:gene-abundance-correlations"
- \end_inset
- Comparison of inter-sample gene abundance correlations with and without
- globin blocking (GB).
- \series default
- All libraries were normalized together as described in Figure 2, and genes
- with an average abundance (logCPM, log2 counts per million reads counted)
- less than -1 were filtered out.
- Each gene’s logCPM was computed in each library using the edgeR cpm function.
- For each pair of biological samples, the Pearson correlation between those
- samples' GB libraries was plotted against the correlation between the same
- samples’ non-GB libraries.
- Each point represents an unique pair of samples.
- The solid gray line shows a quantile-quantile plot of distribution of GB
- correlations vs.
- that of non-GB correlations.
- The thin dashed line is the identity line, provided for reference.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the possibility of globin blocking causing random perturbations
- and reducing sample quality, we computed the Pearson correlation between
- logCPM values for every pair of samples with and without GB and plotted
- them against each other (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:gene-abundance-correlations"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The plot indicated that the GB libraries have higher sample-to-sample correlati
- ons than the non-GB libraries.
- Parametric and nonparametric tests for differences between the correlations
- with and without GB both confirmed that this difference was highly significant
- (2-sided paired t-test: t = 37.2, df = 665, P ≪ 2.2e-16; 2-sided Wilcoxon
- sign-rank test: V = 2195, P ≪ 2.2e-16).
- Performing the same tests on the Spearman correlations gave the same conclusion
- (t-test: t = 26.8, df = 665, P ≪ 2.2e-16; sign-rank test: V = 8781, P ≪ 2.2e-16).
- The edgeR package was used to compute the overall biological coefficient
- of variation (BCV) for GB and non-GB libraries, and found that globin blocking
- resulted in a negligible increase in the BCV (0.417 with GB vs.
- 0.400 without).
- The near equality of the BCVs for both sets indicates that the higher correlati
- ons in the GB libraries are most likely a result of the increased yield
- of useful reads, which reduces the contribution of Poisson counting uncertainty
- to the overall variance of the logCPM values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCarthy2012"
- literal "false"
- \end_inset
- .
- This improves the precision of expression measurements and more than offsets
- the negligible increase in BCV.
- \end_layout
- \begin_layout Subsection
- More differentially expressed genes are detected with globin blocking
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- No Globin Blocking
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Globin-Blocking
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 515
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 2
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 160
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 11235
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 136
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 548
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 127
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status open
- \begin_layout Plain Layout
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Comparison-of-significant"
- \end_inset
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \series default
- Up, Down: Genes significantly up/down-regulated in post-transplant samples
- relative to pre-transplant samples, with a false discovery rate of 10%
- or less.
- NS: Non-significant genes (false discovery rate greater than 10%).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To compare performance on differential gene expression tests, we took subsets
- of both the GB and non-GB libraries with exactly one pre-transplant and
- one post-transplant sample for each animal that had paired samples available
- for analysis (N=7 animals, N=14 samples in each subset).
- The same test for pre- vs.
- post-transplant differential gene expression was performed on the same
- 7 pairs of samples from GB libraries and non-GB libraries, in each case
- using an FDR of 10% as the threshold of significance.
- Out of 12954 genes that passed the detection threshold in both subsets,
- 358 were called significantly differentially expressed in the same direction
- in both sets; 1063 were differentially expressed in the GB set only; 296
- were differentially expressed in the non-GB set only; 2 genes were called
- significantly up in the GB set but significantly down in the non-GB set;
- and the remaining 11235 were not called differentially expressed in either
- set.
- These data are summarized in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Comparison-of-significant"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The differences in BCV calculated by EdgeR for these subsets of samples
- were negligible (BCV = 0.302 for GB and 0.297 for non-GB).
- \end_layout
- \begin_layout Standard
- The key point is that the GB data results in substantially more differentially
- expressed calls than the non-GB data.
- Since there is no gold standard for this dataset, it is impossible to be
- certain whether this is due to under-calling of differential expression
- in the non-GB samples or over-calling in the GB samples.
- However, given that both datasets are derived from the same biological
- samples and have nearly equal BCVs, it is more likely that the larger number
- of DE calls in the GB samples are genuine detections that were enabled
- by the higher sequencing depth and measurement precision of the GB samples.
- Note that the same set of genes was considered in both subsets, so the
- larger number of differentially expressed gene calls in the GB data set
- reflects a greater sensitivity to detect significant differential gene
- expression and not simply the larger total number of detected genes in
- GB samples described earlier.
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Standard
- The original experience with whole blood gene expression profiling on DNA
- microarrays demonstrated that the high concentration of globin transcripts
- reduced the sensitivity to detect genes with relatively low expression
- levels, in effect, significantly reducing the sensitivity.
- To address this limitation, commercial protocols for globin reduction were
- developed based on strategies to block globin transcript amplification
- during labeling or physically removing globin transcripts by affinity bead
- methods
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- More recently, using the latest generation of labeling protocols and arrays,
- it was determined that globin reduction was no longer necessary to obtain
- sufficient sensitivity to detect differential transcript expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "NuGEN2010"
- literal "false"
- \end_inset
- .
- However, we are not aware of any publications using these currently available
- protocols the with latest generation of microarrays that actually compare
- the detection sensitivity with and without globin reduction.
- However, in practice this has now been adopted generally primarily driven
- by concerns for cost control.
- The main objective of our work was to directly test the impact of globin
- gene transcripts and a new globin blocking protocol for application to
- the newest generation of differential gene expression profiling determined
- using next generation sequencing.
-
- \end_layout
- \begin_layout Standard
- The challenge of doing global gene expression profiling in cynomolgus monkeys
- is that the current available arrays were never designed to comprehensively
- cover this genome and have not been updated since the first assemblies
- of the cynomolgus genome were published.
- Therefore, we determined that the best strategy for peripheral blood profiling
- was to do deep RNA-seq and inform the workflow using the latest available
- genome assembly and annotation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilson2013"
- literal "false"
- \end_inset
- .
- However, it was not immediately clear whether globin reduction was necessary
- for RNA-seq or how much improvement in efficiency or sensitivity to detect
- differential gene expression would be achieved for the added cost and work.
-
- \end_layout
- \begin_layout Standard
- We only found one report that demonstrated that globin reduction significantly
- improved the effective read yields for sequencing of human peripheral blood
- cell RNA using a DeepSAGE protocol
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- The approach to DeepSAGE involves two different restriction enzymes that
- purify and then tag small fragments of transcripts at specific locations
- and thus, significantly reduces the complexity of the transcriptome.
- Therefore, we could not determine how DeepSAGE results would translate
- to the common strategy in the field for assaying the entire transcript
- population by whole-transcriptome 3’-end RNA-seq.
- Furthermore, if globin reduction is necessary, we also needed a globin
- reduction method specific to cynomolgus globin sequences that would work
- an organism for which no kit is available off the shelf.
- \end_layout
- \begin_layout Standard
- As mentioned above, the addition of globin blocking oligos has a very small
- impact on measured expression levels of gene expression.
- However, this is a non-issue for the purposes of differential expression
- testing, since a systematic change in a gene in all samples does not affect
- relative expression levels between samples.
- However, we must acknowledge that simple comparisons of gene expression
- data obtained by GB and non-GB protocols are not possible without additional
- normalization.
-
- \end_layout
- \begin_layout Standard
- More importantly, globin blocking not only nearly doubles the yield of usable
- reads, it also increases inter-sample correlation and sensitivity to detect
- differential gene expression relative to the same set of samples profiled
- without blocking.
- In addition, globin blocking does not add a significant amount of random
- noise to the data.
- Globin blocking thus represents a cost-effective way to squeeze more data
- and statistical power out of the same blood samples and the same amount
- of sequencing.
- In conclusion, globin reduction greatly increases the yield of useful RNA-seq
- reads mapping to the rest of the genome, with minimal perturbations in
- the relative levels of non-globin genes.
- Based on these results, globin transcript reduction using sequence-specific,
- complementary blocking oligonucleotides is recommended for all deep RNA-seq
- of cynomolgus and other nonhuman primate blood samples.
- \end_layout
- \begin_layout Chapter
- Future Directions
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider per-chapter future directions.
- Check instructions.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Functional validation of effective promoter radius
- \end_layout
- \begin_layout Itemize
- N-to-M convergence deserves further stufy of some kind
- \end_layout
- \begin_layout Itemize
- Promoter positional coverage: follow up on hints of interesting patterns
- \end_layout
- \begin_layout Itemize
- Study other epigenetic marks in more contexts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- DNA methylation, histone marks, chromatin accessibility & conformation in
- CD4 T-cells
- \end_layout
- \begin_layout Itemize
- Also look at other types of lymphocytes: CD8 T-cells, B-cells, NK cells
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Use CV or bootstrap to better evaluate classifiers
- \end_layout
- \begin_layout Itemize
- fRMAtools could be adapted to not require equal-sized groups
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- % Call it "References" instead of "Bibliography"
- \end_layout
- \begin_layout Plain Layout
- \backslash
- renewcommand{
- \backslash
- bibname}{References}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Check bib entry formatting & sort order
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Check in-text citation format.
- Probably don't just want [1], [2], etc.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset bibtex
- LatexCommand bibtex
- btprint "btPrintCited"
- bibfiles "code-refs,refs-PROCESSED"
- options "bibtotoc,unsrt"
- \end_inset
- \end_layout
- \end_body
- \end_document
|