1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308213092131021311213122131321314213152131621317213182131921320213212132221323213242132521326213272132821329213302133121332213332133421335213362133721338213392134021341213422134321344213452134621347213482134921350213512135221353213542135521356213572135821359213602136121362213632136421365213662136721368213692137021371213722137321374213752137621377213782137921380213812138221383213842138521386213872138821389213902139121392213932139421395213962139721398213992140021401214022140321404214052140621407214082140921410214112141221413214142141521416214172141821419214202142121422214232142421425214262142721428214292143021431214322143321434214352143621437214382143921440214412144221443214442144521446214472144821449214502145121452214532145421455214562145721458214592146021461214622146321464214652146621467214682146921470214712147221473214742147521476214772147821479214802148121482214832148421485214862148721488214892149021491214922149321494214952149621497214982149921500215012150221503215042150521506215072150821509215102151121512215132151421515215162151721518215192152021521215222152321524215252152621527215282152921530215312153221533215342153521536215372153821539215402154121542215432154421545215462154721548215492155021551215522155321554215552155621557215582155921560215612156221563215642156521566215672156821569215702157121572215732157421575215762157721578215792158021581215822158321584215852158621587215882158921590215912159221593215942159521596215972159821599216002160121602216032160421605216062160721608216092161021611216122161321614216152161621617216182161921620216212162221623216242162521626216272162821629216302163121632216332163421635216362163721638216392164021641216422164321644216452164621647216482164921650216512165221653216542165521656216572165821659216602166121662216632166421665216662166721668216692167021671216722167321674216752167621677216782167921680216812168221683216842168521686216872168821689216902169121692216932169421695216962169721698216992170021701217022170321704217052170621707217082170921710217112171221713217142171521716217172171821719217202172121722217232172421725217262172721728217292173021731217322173321734217352173621737217382173921740217412174221743217442174521746217472174821749217502175121752217532175421755217562175721758217592176021761217622176321764217652176621767217682176921770217712177221773217742177521776217772177821779217802178121782217832178421785217862178721788217892179021791217922179321794217952179621797217982179921800218012180221803218042180521806218072180821809218102181121812218132181421815218162181721818218192182021821218222182321824218252182621827218282182921830218312183221833218342183521836218372183821839218402184121842218432184421845218462184721848218492185021851218522185321854218552185621857218582185921860218612186221863218642186521866218672186821869218702187121872218732187421875218762187721878218792188021881218822188321884218852188621887218882188921890218912189221893218942189521896218972189821899219002190121902219032190421905219062190721908219092191021911219122191321914219152191621917219182191921920219212192221923219242192521926219272192821929219302193121932219332193421935219362193721938219392194021941219422194321944219452194621947219482194921950219512195221953219542195521956219572195821959219602196121962219632196421965219662196721968219692197021971219722197321974219752197621977219782197921980219812198221983219842198521986219872198821989219902199121992219932199421995219962199721998219992200022001220022200322004220052200622007220082200922010220112201222013220142201522016220172201822019220202202122022220232202422025220262202722028220292203022031220322203322034220352203622037220382203922040220412204222043220442204522046220472204822049220502205122052220532205422055220562205722058220592206022061220622206322064220652206622067220682206922070220712207222073220742207522076220772207822079220802208122082220832208422085220862208722088220892209022091220922209322094220952209622097220982209922100221012210222103221042210522106221072210822109221102211122112221132211422115221162211722118221192212022121221222212322124221252212622127221282212922130221312213222133221342213522136221372213822139221402214122142221432214422145221462214722148221492215022151221522215322154221552215622157221582215922160221612216222163221642216522166221672216822169221702217122172221732217422175221762217722178221792218022181221822218322184221852218622187221882218922190221912219222193221942219522196221972219822199222002220122202222032220422205222062220722208222092221022211222122221322214222152221622217222182221922220222212222222223222242222522226222272222822229222302223122232222332223422235222362223722238222392224022241222422224322244222452224622247222482224922250222512225222253222542225522256222572225822259222602226122262222632226422265222662226722268222692227022271222722227322274222752227622277222782227922280222812228222283222842228522286222872228822289222902229122292222932229422295222962229722298222992230022301223022230322304223052230622307223082230922310223112231222313223142231522316223172231822319223202232122322223232232422325223262232722328223292233022331223322233322334223352233622337223382233922340223412234222343223442234522346223472234822349223502235122352223532235422355223562235722358223592236022361223622236322364223652236622367223682236922370223712237222373223742237522376223772237822379223802238122382223832238422385223862238722388223892239022391223922239322394223952239622397223982239922400224012240222403224042240522406224072240822409224102241122412224132241422415224162241722418224192242022421224222242322424224252242622427224282242922430224312243222433224342243522436224372243822439224402244122442224432244422445224462244722448224492245022451224522245322454224552245622457224582245922460224612246222463224642246522466224672246822469224702247122472224732247422475224762247722478224792248022481224822248322484224852248622487224882248922490224912249222493224942249522496224972249822499225002250122502225032250422505225062250722508225092251022511225122251322514225152251622517225182251922520225212252222523225242252522526225272252822529225302253122532225332253422535225362253722538225392254022541225422254322544225452254622547225482254922550225512255222553225542255522556225572255822559225602256122562225632256422565225662256722568225692257022571225722257322574225752257622577225782257922580225812258222583225842258522586225872258822589225902259122592225932259422595225962259722598225992260022601226022260322604226052260622607226082260922610226112261222613226142261522616226172261822619226202262122622226232262422625226262262722628226292263022631226322263322634226352263622637226382263922640226412264222643226442264522646226472264822649226502265122652226532265422655226562265722658226592266022661226622266322664226652266622667226682266922670226712267222673226742267522676226772267822679226802268122682226832268422685226862268722688226892269022691226922269322694226952269622697226982269922700227012270222703227042270522706227072270822709227102271122712227132271422715227162271722718227192272022721227222272322724227252272622727227282272922730227312273222733227342273522736227372273822739227402274122742227432274422745227462274722748227492275022751227522275322754227552275622757227582275922760227612276222763227642276522766227672276822769227702277122772227732277422775227762277722778227792278022781227822278322784227852278622787227882278922790227912279222793227942279522796227972279822799228002280122802228032280422805228062280722808228092281022811228122281322814228152281622817228182281922820228212282222823228242282522826228272282822829228302283122832228332283422835228362283722838228392284022841228422284322844228452284622847228482284922850228512285222853228542285522856228572285822859228602286122862228632286422865228662286722868228692287022871228722287322874228752287622877228782287922880228812288222883228842288522886228872288822889228902289122892228932289422895228962289722898228992290022901229022290322904229052290622907229082290922910229112291222913229142291522916229172291822919229202292122922229232292422925229262292722928229292293022931229322293322934229352293622937229382293922940229412294222943229442294522946229472294822949229502295122952229532295422955229562295722958229592296022961229622296322964229652296622967229682296922970229712297222973229742297522976229772297822979229802298122982229832298422985229862298722988229892299022991229922299322994229952299622997229982299923000230012300223003230042300523006230072300823009230102301123012230132301423015230162301723018230192302023021230222302323024230252302623027230282302923030230312303223033230342303523036230372303823039230402304123042230432304423045230462304723048230492305023051230522305323054230552305623057230582305923060230612306223063230642306523066230672306823069230702307123072230732307423075230762307723078230792308023081230822308323084230852308623087230882308923090230912309223093230942309523096230972309823099231002310123102231032310423105231062310723108231092311023111231122311323114231152311623117231182311923120231212312223123231242312523126231272312823129231302313123132231332313423135231362313723138231392314023141231422314323144231452314623147231482314923150231512315223153231542315523156231572315823159231602316123162231632316423165231662316723168231692317023171231722317323174231752317623177231782317923180231812318223183231842318523186231872318823189231902319123192231932319423195231962319723198231992320023201232022320323204232052320623207232082320923210232112321223213232142321523216232172321823219232202322123222232232322423225232262322723228232292323023231232322323323234232352323623237232382323923240232412324223243232442324523246232472324823249232502325123252232532325423255232562325723258232592326023261232622326323264232652326623267232682326923270232712327223273232742327523276232772327823279232802328123282232832328423285232862328723288232892329023291232922329323294232952329623297232982329923300233012330223303233042330523306233072330823309233102331123312233132331423315233162331723318233192332023321233222332323324233252332623327233282332923330233312333223333233342333523336233372333823339233402334123342233432334423345233462334723348233492335023351233522335323354233552335623357233582335923360233612336223363233642336523366233672336823369233702337123372233732337423375233762337723378233792338023381233822338323384233852338623387233882338923390233912339223393233942339523396233972339823399234002340123402234032340423405234062340723408234092341023411234122341323414234152341623417234182341923420234212342223423234242342523426234272342823429234302343123432234332343423435234362343723438234392344023441234422344323444234452344623447234482344923450234512345223453234542345523456234572345823459234602346123462234632346423465234662346723468234692347023471234722347323474234752347623477234782347923480234812348223483234842348523486234872348823489234902349123492234932349423495234962349723498234992350023501235022350323504235052350623507235082350923510235112351223513235142351523516235172351823519235202352123522235232352423525235262352723528235292353023531235322353323534235352353623537235382353923540235412354223543235442354523546235472354823549235502355123552235532355423555235562355723558235592356023561235622356323564235652356623567235682356923570235712357223573235742357523576 |
- #LyX 2.3 created this file. For more info see http://www.lyx.org/
- \lyxformat 544
- \begin_document
- \begin_header
- \save_transient_properties true
- \origin unavailable
- \textclass extbook
- \begin_preamble
- % List all used files in log output
- \listfiles
- %% Add TOC, List of Figures, etc. to TOC
- \usepackage{tocbibind}
- % Add a DRAFT watermark
- \usepackage{draftwatermark}
- \usepackage{accsupp}
- \SetWatermarkLightness{0.97}
- \SetWatermarkScale{1}
- % Make watermark not copyable (in Adobe Reader)
- \SetWatermarkText{\BeginAccSupp{method=escape,ActualText={}}DRAFT\EndAccSupp{}}
- % Set up required header format
- \usepackage{fancyhdr}
- \pagestyle{fancy}
- \renewcommand{\headrulewidth}{0pt}
- \rhead{}
- \lhead{}
- \chead{}
- \rfoot{}
- \lfoot{}
- % Make page number not copyable (in Adobe Reader)
- \cfoot{\BeginAccSupp{method=escape,ActualText={}}\thepage\EndAccSupp{}} % Page number bottom center
- % Allow FloatBarrier command
- \usepackage{placeins}
- % Allow landscape pages
- \usepackage{pdflscape}
- % Allow doing things after the end of the current page
- % (to avoid landscape figures breaking up text)
- \usepackage{afterpage}
- % Consider: force floats after placement in text
- % https://tex.stackexchange.com/questions/15706/force-floats-to-be-typeset-after-their-occurrence-in-the-source-text
- % This one breaks subfigs so it's disabled
- % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
- \usepackage[automake=immediate,nonumberlist,nohypertypes={abbreviation}]{glossaries-extra}
- \setabbreviationstyle{long-short}
- \loadglsentries{abbrevs.tex}
- \makeglossaries
- % arara: xelatex
- % arara: biber
- % arara: makeglossaries
- % arara: xelatex
- \end_preamble
- \use_default_options true
- \begin_modules
- todonotes
- logicalmkup
- \end_modules
- \maintain_unincluded_children false
- \begin_local_layout
- Format 66
- InsetLayout "Flex:Glossary Term"
- LyxType custom
- LabelString gls
- LatexType command
- LatexName gls*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Capital)"
- LyxType custom
- LabelString Gls
- LatexType command
- LatexName Gls*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (pl)"
- LyxType custom
- LabelString glspl
- LatexType command
- LatexName glspl*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Capital, pl)"
- LyxType custom
- LabelString Glspl
- LatexType command
- LatexName Glspl*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (glstext)"
- LyxType custom
- LabelString glstext
- LatexType command
- LatexName glstext*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Glstext)"
- LyxType custom
- LabelString Glstext
- LatexType command
- LatexName Glstext*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (glsfirst)"
- LyxType custom
- LabelString glsfirst
- LatexType command
- LatexName glsfirst*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Glsfirst)"
- LyxType custom
- LabelString Glsfirst
- LatexType command
- LatexName Glsfirst*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (glsdesc)"
- LyxType custom
- LabelString glsdesc
- LatexType command
- LatexName glsdesc*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Glsdesc)"
- LyxType custom
- LabelString Glsdesc
- LatexType command
- LatexName Glsdesc*
- InToc true
- CustomPars false
- End
- \end_local_layout
- \language english
- \language_package default
- \inputencoding utf8
- \fontencoding default
- \font_roman "default" "default"
- \font_sans "default" "default"
- \font_typewriter "default" "default"
- \font_math "auto" "auto"
- \font_default_family default
- \use_non_tex_fonts false
- \font_sc false
- \font_osf false
- \font_sf_scale 100 100
- \font_tt_scale 100 100
- \use_microtype false
- \use_dash_ligatures true
- \graphics default
- \default_output_format pdf4
- \output_sync 0
- \bibtex_command biber
- \index_command default
- \paperfontsize 12
- \spacing double
- \use_hyperref true
- \pdf_author "Ryan C. Thompson"
- \pdf_bookmarks true
- \pdf_bookmarksnumbered true
- \pdf_bookmarksopen true
- \pdf_bookmarksopenlevel 1
- \pdf_breaklinks true
- \pdf_pdfborder true
- \pdf_colorlinks false
- \pdf_backref false
- \pdf_pdfusetitle true
- \papersize letterpaper
- \use_geometry true
- \use_package amsmath 1
- \use_package amssymb 1
- \use_package cancel 1
- \use_package esint 1
- \use_package mathdots 1
- \use_package mathtools 1
- \use_package mhchem 1
- \use_package stackrel 1
- \use_package stmaryrd 1
- \use_package undertilde 1
- \cite_engine biblatex
- \cite_engine_type numerical
- \biblio_style plain
- \biblio_options sorting=none
- \biblatex_bibstyle numeric
- \biblatex_citestyle numeric
- \use_bibtopic false
- \use_indices false
- \paperorientation portrait
- \suppress_date false
- \justification true
- \use_refstyle 1
- \use_minted 0
- \index Index
- \shortcut idx
- \color #008000
- \end_index
- \leftmargin 1.5in
- \topmargin 1in
- \rightmargin 1in
- \bottommargin 1in
- \secnumdepth 3
- \tocdepth 3
- \paragraph_separation indent
- \paragraph_indentation default
- \is_math_indent 0
- \math_numbering_side default
- \quotes_style english
- \dynamic_quotes 0
- \papercolumns 1
- \papersides 1
- \paperpagestyle default
- \tracking_changes false
- \output_changes false
- \html_math_output 0
- \html_css_as_file 0
- \html_be_strict false
- \end_header
- \begin_body
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- pdfbookmark{Title page}{title}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Title
- Bioinformatic analysis of complex, high-throughput genomic and epigenomic
- data in the context of immunology and transplant rejection
- \end_layout
- \begin_layout Author
- A thesis presented
- \begin_inset Newline newline
- \end_inset
- by
- \begin_inset Newline newline
- \end_inset
- Ryan C.
- Thompson
- \begin_inset Newline newline
- \end_inset
- to
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute Graduate Program
- \begin_inset Newline newline
- \end_inset
- in partial fulfillment of the requirements for the degree of
- \begin_inset Newline newline
- \end_inset
- Doctor of Philosophy in the subject of Biology
- \begin_inset Newline newline
- \end_inset
- for
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute
- \begin_inset Newline newline
- \end_inset
- La Jolla, California
- \end_layout
- \begin_layout Date
- October 2019
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- To remove TODOs and watermark: Add
- \begin_inset Quotes eld
- \end_inset
- final
- \begin_inset Quotes erd
- \end_inset
- to the document class custom options.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- frontmatter
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Use roman numeral page numbers
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Copyright notice}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- © 2019 by Ryan C.
- Thompson
- \end_layout
- \begin_layout Standard
- \align center
- All rights reserved.
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{2}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Thesis acceptance form}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- [Thesis acceptance form]
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Dedication}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- [Dedication]
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{2}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Acknowledgements}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section*
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- hspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- Acknowledgements
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- hspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- [Acknowledgements]
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset toc
- LatexCommand tableofcontents
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList table
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList figure
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- To create a new abbreviation:
- \end_layout
- \begin_layout Enumerate
- Add an entry to abbrevs.tex
- \end_layout
- \begin_layout Enumerate
- Wrap every occurrence of the term in Insert -> Custom Insets -> Glossary
- Term (use appropriate variants for caiptal, plural, etc.), using Edit ->
- Find & Replace (Advanced).
- Skip section headers and float captions.
- \end_layout
- \begin_layout Plain Layout
- \begin_inset CommandInset href
- LatexCommand href
- target "https://ctan.org/pkg/glossaries?lang=en"
- literal "false"
- \end_inset
- \begin_inset CommandInset href
- LatexCommand href
- target "https://ctan.org/pkg/glossaries-extra"
- literal "false"
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- renewcommand*{
- \backslash
- glossaryname}{List of Abbreviations}%
- \end_layout
- \begin_layout Plain Layout
- \backslash
- printglossaries
- \end_layout
- \end_inset
- \end_layout
- \begin_layout List of TODOs
- \end_layout
- \begin_layout Chapter*
- Abstract
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- It is included as an integral part of the thesis and should immediately
- precede the introduction.
- \end_layout
- \begin_layout Plain Layout
- Preparing your Abstract.
- Your abstract (a succinct description of your work) is limited to 350 words.
- UMI will shorten it if they must; please do not exceed the limit.
- \end_layout
- \begin_layout Itemize
- Include pertinent place names, names of persons (in full), and other proper
- nouns.
- These are useful in automated retrieval.
- \end_layout
- \begin_layout Itemize
- Display symbols, as well as foreign words and phrases, clearly and accurately.
- Include transliterations for characters other than Roman and Greek letters
- and Arabic numerals.
- Include accents and diacritical marks.
- \end_layout
- \begin_layout Itemize
- Do not include graphs, charts, tables, or illustrations in your abstract.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Obviously the abstract gets written last.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Chapter*
- Notes to draft readers
- \end_layout
- \begin_layout Plain Layout
- Thank you so much for agreeing to read my thesis and give me feedback on
- it.
- What you are currently reading is a rough draft, in need of many revisions.
- You can always find the latest version at
- \begin_inset CommandInset href
- LatexCommand href
- target "https://mneme.dedyn.io/~ryan/Thesis/thesis.pdf"
- literal "false"
- \end_inset
- .
- the PDF at this link is updated periodically with my latest revisions,
- but you can just download the current version and give me feedback on that.
- Don't worry about keeping up with the updates.
- \end_layout
- \begin_layout Plain Layout
- As for what feedback I'm looking for, first of all, don't waste your time
- marking spelling mistakes and such.
- I haven't run a spell checker on it yet, so let me worry about that.
- Also, I'm aware that many abbreviations are not properly introduced the
- first time they are used, so don't worry about that either.
- However, if you see any glaring formatting issues, such as a figure being
- too large and getting cut off at the edge of the page, please note them.
- In addition, if any of the text in the figures is too small, please note
- that as well.
- \end_layout
- \begin_layout Plain Layout
- Beyond that, what I'm mainly interested in is feedback on the content.
- For example: does the introduction flow logically, and does it provide
- enough background to understand the other chapters? Does each chapter make
- it clear what work and analyses I have done? Do the figures clearly communicate
- the results I'm trying to show? Do you feel that the claims in the results
- and discussion sections are well-supported? There's no need to suggest
- improvements; just note areas that you feel need improvement.
- Additionally, if you notice any un-cited claims in any chapter, please
- flag them for my attention.
- Similarly, if you discover any factual errors, please note them as well.
- \end_layout
- \begin_layout Plain Layout
- You can provide your feedback in whatever way is most convenient to you.
- You could mark up this PDF with highlights and notes, then send it back
- to me.
- Or you could collect your comments in a separate text file and send that
- to me, or whatever else you like.
- However, if you send me your feedback in a separate document, please note
- a section/figure/table number for each comment, and
- \emph on
- also
- \emph default
- send me the exact PDF that you read so I can reference it while reading
- your comments, since as mentioned above, the current version I'm working
- on will have changed by that point (which might include shuffling sections
- and figures around, changing their numbers).
- One last thing: you'll see a bunch of text in orange boxes throughout the
- PDF.
- These are notes to myself about things that need to be fixed later, so
- if you see a problem noted in an orange box, that means I'm already aware
- of it, and there's no need to comment on it.
- \end_layout
- \begin_layout Plain Layout
- My thesis is due Thursday, October 10th, so in order to be useful to me,
- I'll need your feedback at least several days before that, ideally by Monday,
- October 7th.
- If you have limited time and are unable to get through the whole thesis,
- please focus your efforts on Chapters 1 and 2, since those are the roughest
- and most in need of revision.
- Chapter 3 is fairly short and straightforward, and Chapter 4 is an adaptation
- of a paper that's already been through a few rounds of revision, so they
- should be a lot tighter.
- If you can't spare any time between now and then, or if something unexpected
- comes up, I understand.
- Just let me know.
- \end_layout
- \begin_layout Plain Layout
- Thanks again for your help, and happy reading!
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- mainmatter
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Switch from roman numerals to arabic for page numbers.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Chapter
- Introduction
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- \begin_inset CommandInset label
- LatexCommand label
- name "sec:Biological-motivation"
- \end_inset
- Biological motivation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Find some figures to include even if permission is not obtained.
- Try to obtain permission, and if it cannot be obtained, remove/replace
- them later.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Rethink the subsection organization after the intro is written.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Rejection is the major long-term threat to organ and tissue allografts
- \end_layout
- \begin_layout Standard
- Organ and tissue transplants are a life-saving treatment for people who
- have lost the function of an important organ.
- In some cases, it is possible to transplant a patient's own tissue from
- one area of their body to another, referred to as an autograft.
- This is common for tissues that are distributed throughout many areas of
- the body, such as skin and bone.
- However, in cases of organ failure, there is no functional self tissue
- remaining, and a transplant from another person – a donor – is required.
- This is referred to as an allograft
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Valenzuela2017"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Because an allograft comes from a donor of the same species who is genetically
- distinct from the recipient (with rare exceptions), genetic variants in
- protein-coding regions affect the polypeptide sequences encoded by the
- affected genes, resulting in protein products in the allograft that differ
- from the equivalent proteins produced by the graft recipient's own tissue.
- As a result, without intervention, the recipient's immune system will eventuall
- y identify the graft as foreign tissue and begin attacking it.
- This is called an alloimmune response, and if left unchecked, it eventually
- results in failure and death of the graft, a process referred to as transplant
- rejection
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- Rejection is the primary obstacle to long-term health and survival of an
- allograft
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Valenzuela2017"
- literal "false"
- \end_inset
- .
- Like any adaptive immune response, an alloimmune response generally occurs
- via two broad mechanisms: cellular immunity, in which CD8
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells recognizing graft-specific antigens induce apoptosis in the graft
- cells; and humoral immunity, in which B-cells produce antibodies that bind
- to graft proteins and direct an immune response against the graft
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- In either case, alloimmunity and rejection show most of the typical hallmarks
- of an adaptive immune response, in particular mediation by CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells and formation of immune memory.
-
- \end_layout
- \begin_layout Subsection
- Diagnosis and treatment of allograft rejection is a major challenge
- \end_layout
- \begin_layout Standard
- To prevent rejection, allograft recipients are treated with immune suppressive
- drugs
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kowalski2003,Murphy2012"
- literal "false"
- \end_inset
- .
- The goal is to achieve sufficient suppression of the immune system to prevent
- rejection of the graft without compromising the ability of the immune system
- to raise a normal response against infection.
- As such, a delicate balance must be struck: insufficient immune suppression
- may lead to rejection and ultimately loss of the graft; excessive suppression
- leaves the patient vulnerable to life-threatening opportunistic infections
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- Because every patient's matabolism is different, achieving this delicate
- balance requires drug dosage to be tailored for each patient.
- Furthermore, dosage must be tuned over time, as the immune system's activity
- varies over time and in response to external stimuli with no fixed pattern.
- In order to properly adjust the dosage of immune suppression drugs, it
- is necessary to monitor the health of the transplant and increase the dosage
- if evidence of rejection or alloimmune activity is observed.
- \end_layout
- \begin_layout Standard
- However, diagnosis of rejection is a significant challenge.
- Early diagnosis is essential in order to step up immune suppression before
- the immune system damages the graft beyond recovery
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Israeli2007"
- literal "false"
- \end_inset
- .
- The current gold standard test for graft rejection is a tissue biopsy,
- examined for visible signs of rejection by a trained histologist
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "false"
- \end_inset
- .
- When a patient shows symptoms of possible rejection, a
- \begin_inset Quotes eld
- \end_inset
- for cause
- \begin_inset Quotes erd
- \end_inset
- biopsy is performed to confirm the diagnosis, and immune suppression is
- adjusted as necessary.
- However, in many cases, the early stages of rejection are asymptomatic,
- known as
- \begin_inset Quotes eld
- \end_inset
- sub-clinical
- \begin_inset Quotes erd
- \end_inset
- rejection.
- In light of this, is is now common to perform
- \begin_inset Quotes eld
- \end_inset
- protocol biopsies
- \begin_inset Quotes erd
- \end_inset
- at specific times after transplantation of a graft, even if no symptoms
- of rejection are apparent, in addition to
- \begin_inset Quotes eld
- \end_inset
- for cause
- \begin_inset Quotes erd
- \end_inset
- biopsies
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Salomon2002,Wilkinson2006,Patel2018,Zachariah2018"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- However, biopsies have a number of downsides that limit their effectiveness
- as a diagnostic tool.
- First, the need for manual inspection by a histologist means that diagnosis
- is subject to the biases of the particular histologist examining the biopsy
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "false"
- \end_inset
- .
- In marginal cases, two different histologists may give two different diagnoses
- to the same biopsy.
- Second, a biopsy can only evaluate if rejection is occurring in the section
- of the graft from which the tissue was extracted.
- If rejection is localized to one section of the graft and the tissue is
- extracted from a different section, a false negative diagnosis may result.
- Most importantly, extraction of tissue from a graft is invasive and is
- treated as an injury by the body, which results in inflammation that in
- turn promotes increased immune system activity.
- Hence, the invasiveness of biopsies severely limits the frequency with
- which they can safely be performed
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Patel2018"
- literal "false"
- \end_inset
- .
- Typically, protocol biopsies are not scheduled more than about once per
- month
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilkinson2006"
- literal "false"
- \end_inset
- .
- A less invasive diagnostic test for rejection would bring manifold benefits.
- Such a test would enable more frequent testing and therefore earlier detection
- of rejection events.
- In addition, having a larger pool of historical data for a given patient
- would make it easier to evaluate when a given test is outside the normal
- parameters for that specific patient, rather than relying on normal ranges
- for the population as a whole.
- Lastly, the accumulated data from more frequent tests would be a boon to
- the transplant research community.
- Beyond simply providing more data overall, the better time granularity
- of the tests will enable studying the progression of a rejection event
- on the scale of days to weeks, rather than months.
- \end_layout
- \begin_layout Subsection
- Memory cells are resistant to immune suppression
- \end_layout
- \begin_layout Standard
- One of the defining features of the adaptive immune system is immune memory:
- the ability of the immune system to recognize a previously encountered
- foreign antigen and respond more quickly and more strongly to that antigen
- in subsequent encounters
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- When the immune system first encounters a new antigen, the T-cells that
- respond are known as naïve cells – T-cells that have never detected their
- target antigens before.
- Once activated by their specific antigen presented by an antigen-presenting
- cell in the proper co-stimulatory context, naïve cells differentiate into
- effector cells that carry out their respective functions in targeting and
- destroying the source of the foreign antigen.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TCR
- \end_layout
- \end_inset
- is cell-surface protein complex produced by T-cells that is responsible
- for recognizing the T-cell's specific antigen, presented on a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MHC
- \end_layout
- \end_inset
- , the cell-surface protein complex used by an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- APC
- \end_layout
- \end_inset
- to present antigens to the T-cell.
- However, a naïve T-cell that recognizes its antigen also requires a co-stimulat
- ory signal, delivered through other interactions between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- APC
- \end_layout
- \end_inset
- surface proteins and T-cell surface proteins such as CD28.
- Without proper co-stimulation, a T-cell that recognizes its antigen either
- dies or enters an unresponsive state known as anergy, in which the T-cell
- becomes much more resistant to subsequent activation even with proper co-stimul
- ation.
- The dependency of activation on co-stimulation is an important feature
- of naïve lymphocytes that limits
- \begin_inset Quotes eld
- \end_inset
- false positive
- \begin_inset Quotes erd
- \end_inset
- immune responses against self antigens, because
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- APC
- \end_layout
- \end_inset
- usually only express the proper co-stimulation after the innate immune
- system detects signs of an active infection, such as the presence of common
- bacterial cell components or inflamed tissue.
-
- \end_layout
- \begin_layout Standard
- After the foreign antigen is cleared, most effector cells die since they
- are no longer needed, but some differentiate into memory cells and remain
- alive indefinitely.
- Like naïve cells, memory cells respond to detection of their specific antigen
- by differentiating into effector cells, ready to fight an infection
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- However, the memory response to antigen is qualitatively different: memory
- cells are more sensitive to detection of their antigen, and a lower concentrati
- on of antigen is suffiicient to activate them
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Rogers2000,London2000,Berard2002"
- literal "false"
- \end_inset
- .
- In addition, memory cells are much less dependent on co-stimulation for
- activation: they can activate without certain co-stimulatory signals that
- are required by naïve cells, and the signals they do require are only required
- at lower levels in order to cause activation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "London2000"
- literal "false"
- \end_inset
- .
- Furthermore, mechanisms that induce tolerance (non-response to antigen)
- in naïve cells are much less effective on memory cells
- \begin_inset CommandInset citation
- LatexCommand cite
- key "London2000"
- literal "false"
- \end_inset
- .
- Lastly, once activated, memory cells proliferate and differentiate into
- effector cells more quickly than naïve cells do
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Berard2002"
- literal "false"
- \end_inset
- .
- In combination, these changes in lymphocyte behavior upon differentiation
- into memory cells account for the much quicker and stronger response of
- the immune system to subsequent exposure to a previously-encountered antigen.
- \end_layout
- \begin_layout Standard
- In the context of a pathogenic infection, immune memory is a major advantage,
- allowing an organism to rapidly fight off a previously encountered pathogen
- much more quickly and effectively than the first time it was encountered
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- However, if effector cells that recognize an antigen from an allograft
- are allowed to differentiate into memory cells, preventing rejection of
- the graft becomes much more difficult.
- Many immune suppression drugs work by interfering with the co-stimulation
- that naïve cells require in order to mount an immune response.
- Since memory cells do not require the same degree of co-stimulation, these
- drugs are not effective at suppressing an immune response that is mediated
- by memory cells.
- Secondly, because memory cells are able to mount a stronger and faster
- response to an antigen, all else being equal stronger immune suppression
- is required to prevent an immune response mediated by memory cells.
- \end_layout
- \begin_layout Standard
- However, immune suppression affects the entire immune system, not just cells
- recognizing a specific antigen, so increasing the dosage of immune suppression
- drugs also increases the risk of complications from a compromised immune
- system, such as opportunistic infections
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- While the differences in cell surface markers between naïve and memory
- cells have been fairly well characterized, the internal regulatory mechanisms
- that allow memory cells to respond more quickly and without co-stimulation
- are still poorly understood.
- In order to develop methods of immune suppression that either prevent the
- formation of memory cells or work more effectively against memory cells,
- a more complete understanding of the mechanisms of immune memory formation
- and regulation is required.
- \end_layout
- \begin_layout Subsection
- Infusion of allogenic mesenchymal stem cells modulates the alloimmune response
- \end_layout
- \begin_layout Standard
- One promising experimental treatment for transplant rejection involves the
- infusion of allogenic
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- have been shown to have immune modulatory effects, both in general and
- specifically in the case of immune responses against allografts
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LeBlanc2003,Aggarwal2005,Bartholomew2009,Berman2010"
- literal "false"
- \end_inset
- .
- Furthermore, allogenic
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- themselves are immune-evasive and are rejected by the recipient's immune
- system more slowly than most allogenic tissues
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ankrum2014,Berglund2017"
- literal "false"
- \end_inset
- .
- In addition, treating
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- in culture with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IFNg
- \end_layout
- \end_inset
- is shown to enhance their immunosuppressive properties and homogenize their
- cellulat phenotype, making them more amenable to development into a well-contro
- lled treatment
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Majumdar2003,Ryan2007"
- literal "false"
- \end_inset
- .
- The mechanisms by which
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- modulate the immune system are still poorly understood.
- Despite this, there is signifcant interest in using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IFNg
- \end_layout
- \end_inset
- -activated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- infusion as a supplementary immune suppressive treatment for allograft
- transplantation.
-
- \end_layout
- \begin_layout Standard
- Note that despite the name, none of the above properties of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- are believed to involve their ability as stem cells to differentiate into
- multiple different mature cell types, but rather the intercellular signals
- they produce
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ankrum2014"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- An overview of high-throughput assays would have been nice to have, but
- it's a bit late now.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- \begin_inset CommandInset label
- LatexCommand label
- name "sec:Overview-of-bioinformatic"
- \end_inset
- Overview of bioinformatic analysis methods
- \end_layout
- \begin_layout Standard
- The studies presented in this work all involve the analysis of high-throughput
- genomic and epigenomic assay data.
- Assays like microarrays and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- HTS
- \end_layout
- \end_inset
- are powerful methods for interrogating gene expression and empigenetic
- state across the entire genome.
- However, these data present many unique analysis challenges, and proper
- analysis requires identifying and exploiting genome-wide trends in the
- data to make up for the small sample sizes.
- A wide array of software tools is available to analyze these data.
- This section presents an overview of the most important methods and tools
- used throughout the following analyses, including what problems they solve,
- what assumptions they make, and a basic description of how they work.
- \end_layout
- \begin_layout Subsection
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Limma
- \end_layout
- \end_inset
- : The standard linear modeling framework for genomics
- \end_layout
- \begin_layout Standard
- Linear models are a generalization of the
- \begin_inset Formula $t$
- \end_inset
- -test and ANOVA to arbitrarily complex experimental designs
- \begin_inset CommandInset citation
- LatexCommand cite
- key "chambers:1992"
- literal "false"
- \end_inset
- .
- In a typical linear model, there is one dependent variable observation
- per sample and a large number of samples.
- For example, in a linear model of height as a function of age and sex,
- there is one height measurement per person.
- However, when analyzing genomic data, each sample consists of observations
- of thousands of dependent variables.
- For example, in a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- experiment, the dependent variables may be the count of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- reads for each annotated gene, and there are tens of thousands of genes
- in the human genome.
- Since many assays measure other things than gene expression, the abstract
- term
- \begin_inset Quotes eld
- \end_inset
- feature
- \begin_inset Quotes erd
- \end_inset
- is used to refer to each dependent variable being measured, which may include
- any genomic element, such as genes, promoters, peaks, enhancers, exons,
- etc.
-
- \end_layout
- \begin_layout Standard
- The simplest approach to analyzing such data would be to fit the same model
- independently to each feature.
- However, this is undesirable for most genomics data sets.
- Genomics assays like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- HTS
- \end_layout
- \end_inset
- are expensive, and often the process of generating the samples is also
- quite expensive and time-consuming.
- This expense limits the sample sizes typically employed in genomics experiments
- , so a typical genomic data set has far more features being measured than
- observations (samples) per feature.
- As a result, the statistical power of the linear model for each individual
- feature is likewise limited by the small number of samples.
- However, because thousands of features from the same set of samples are
- analyzed together, there is an opportunity to improve the statistical power
- of the analysis by exploiting shared patterns of variation across features.
- This is the core feature of
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- , a linear modeling framework designed for genomic data.
-
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Limma
- \end_layout
- \end_inset
- is typically used to analyze expression microarray data, and more recently
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, but it can also be used to analyze any other data for which linear
- modeling is appropriate.
- \end_layout
- \begin_layout Standard
- The central challenge when fitting a linear model is to estimate the variance
- of the data accurately.
- Out of all parameters required to evaluate statistical significance of
- an effect, the variance is the most difficult to estimate when sample sizes
- are small.
- A single shared variance could be estimated for all of the features together,
- and this estimate would be very stable, in contrast to the individual feature
- variance estimates.
- However, this would require the assumption that all features have equal
- variance, which is known to be false for most genomic data sets (for example,
- some genes' expression is known to be more variable than others').
-
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Limma
- \end_layout
- \end_inset
- offers a compromise between these two extremes by using a method called
- empirical Bayes moderation to
- \begin_inset Quotes eld
- \end_inset
- squeeze
- \begin_inset Quotes erd
- \end_inset
- the distribution of estimated variances toward a single common value that
- represents the variance of an average feature in the data (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ebayes-example"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- )
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2004"
- literal "false"
- \end_inset
- .
- While the individual feature variance estimates are not stable, the common
- variance estimate for the entire data set is quite stable, so using a combinati
- on of the two yields a variance estimate for each feature with greater precision
- than the individual feature variances.
- The trade-off for this improvement is that squeezing each estimated variance
- toward the common value introduces some bias – the variance will be underestima
- ted for features with high variance and overestimated for features with
- low variance.
- Essentially,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- assumes that extreme variances are less common than variances close to
- the common value.
- The squeezed variance estimates from this empirical Bayes procedure are
- shown empirically to yield greater statistical power than either the individual
- feature variances or the single common value.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Intro/eBayes-CROP-RASTER.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example of empirical Bayes squeezing of per-gene variances.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ebayes-example"
- \end_inset
- \series bold
- Example of empirical Bayes squeezing of per-gene variances.
- \series default
- A smooth trend line (red) is fitted to the individual gene variances (light
- blue) as a function of average gene abundance (logCPM).
- Then the individual gene variances are
- \begin_inset Quotes eld
- \end_inset
- squeezed
- \begin_inset Quotes erd
- \end_inset
- toward the trend (dark blue).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- On top of this core framework,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- also implements many other enhancements that, further relax the assumptions
- of the model and extend the scope of what kinds of data it can analyze.
- Instead of squeezing toward a single common variance value,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- can model the common variance as a function of a covariate, such as average
- expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- .
- This is essential for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, where higher gene counts yield more precise expression measurements
- and therefore smaller variances than low-count genes.
- While linear models typically assume that all samples have equal variance,
-
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- is able to relax this assumption by identifying and down-weighting samples
- that diverge more strongly from the linear model across many features
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006,Liu2015"
- literal "false"
- \end_inset
- .
- In addition,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- is also able to fit simple mixed models incorporating one random effect
- in addition to the fixed effects represented by an ordinary linear model
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- .
- Once again,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- shares information between features to obtain a robust estimate for the
- random effect correlation.
- \end_layout
- \begin_layout Subsection
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- provides
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- -like analysis features for read count data
- \end_layout
- \begin_layout Standard
- Although
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- can be applied to read counts from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, it is less suitable for counts from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- and other sources, which tend to be much smaller and therefore violate
- the assumption of a normal distribution more severely.
- For all count-based data, the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- package works similarly to
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- , but uses a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- instead of a linear model.
- Relative to a linear model, a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- gains flexibility by relaxing several assumptions, the most important of
- which is the assumption of normally distributed errors.
- This allows the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- in
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- to model the counts directly using a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution rather than modeling the normalized log counts using a normal
- distribution as
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- does
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Chen2014,McCarthy2012,Robinson2010a"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution is a good fit for count data because it can be derived as
- a gamma-distributed mixture of Poisson distributions.
- The reads in an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- sample are assumed to be sampled from a much larger population, such that
- the sampling process does not significantly affect the proportions.
- Under this assumption, a gene's read count in an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- sample is distributed as
- \begin_inset Formula $\mathrm{Binomial}(n,p)$
- \end_inset
- , where
- \begin_inset Formula $n$
- \end_inset
- is the total number of reads sequenced from the sample and
- \begin_inset Formula $p$
- \end_inset
- is the proportion of total fragments in the sample derived from that gene.
- When
- \begin_inset Formula $n$
- \end_inset
- is large and
- \begin_inset Formula $p$
- \end_inset
- is small, a
- \begin_inset Formula $\mathrm{Binomial}(n,p)$
- \end_inset
- distribution is well-approximated by
- \begin_inset Formula $\mathrm{Poisson}(np)$
- \end_inset
- .
- Hence, if multiple sequencing runs are performed on the same
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- sample (with the same gene mixing proportions each time), each gene's read
- count is expected to follow a Poisson distribution.
- If the abundance of a gene,
- \begin_inset Formula $p,$
- \end_inset
- varies across biological replicates according to a gamma distribution,
- and
- \begin_inset Formula $n$
- \end_inset
- is held constant, then the result is a gamma-distributed mixture of Poisson
- distributions, which is equivalent to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution.
- The assumption of a gamma distribution for the mixing weights is arbitrary,
- motivated by the convenience of the numerically tractable
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution and the need to select
- \emph on
- some
- \emph default
- distribution, since the true shape of the distribution of biological variance
- is unknown.
- \end_layout
- \begin_layout Standard
- Thus,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- 's use of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- is equivalent to an
- \emph on
- a priori
- \emph default
- assumption that the variation in gene abundances between replicates follows
- a gamma distribution.
- The gamma shape parameter in the context of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- is called the dispersion, and the square root of this dispersion is referred
- to as the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- , since it represents the variability in abundance that was present in the
- biological samples prior to the Poisson
- \begin_inset Quotes eld
- \end_inset
- noise
- \begin_inset Quotes erd
- \end_inset
- that was generated by the random sampling of reads in proportion to feature
- abundances.
- Like
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- estimates the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- for each feature using an empirical Bayes procedure that represents a compromis
- e between per-feature dispersions and a single pooled dispersion estimate
- shared across all features.
- For differential abundance testing,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- offers a likelihood ratio test based on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- .
- However, this test assumes the dispersion parameter is known exactly rather
- than estimated from the data, which can result in overstating the significance
- of differential abundance results.
- More recently, a quasi-likelihood test has been introduced that properly
- factors the uncertainty in dispersion estimation into the estimates of
- statistical significance, and this test is recommended over the likelihood
- ratio test in most cases
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lund2012"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Calling consensus peaks from ChIP-seq data
- \end_layout
- \begin_layout Standard
- Unlike
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, in which gene annotations provide a well-defined set of discrete
- genomic regions in which to count reads,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads can potentially occur anywhere in the genome.
- However, most genome regions will not contain significant
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage, and analyzing every position in the entire genome is statistical
- ly and computationally infeasible, so it is necessary to identify regions
- of interest inside which
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads will be counted and analyzed.
- One option is to define a set of interesting regions
- \emph on
- a priori
- \emph default
- , for example by defining a promoter region for each annotated gene.
- However, it is also possible to use the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data itself to identify regions with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage significantly above the background level, known as peaks.
-
- \end_layout
- \begin_layout Standard
- The challenge in peak calling is that the immunoprecipitation step is not
- 100% selective, so some fraction of reads are
- \emph on
- not
- \emph default
- derived from DNA fragments that were bound by the immunoprecipitated protein.
- These are referred to as background reads.
- Biases in amplification and sequencing, as well as the aforementioned Poisson
- randomness of the sequencing itself, can cause fluctuations in the background
- level of reads that resemble peaks, and the true peaks must be distinguished
- from these.
- It is common to sequence the input DNA to the ChIP-seq reaction alongside
- the immunoprecipitated product in order to aid in estimating the fluctuations
- in background level across the genome.
- \end_layout
- \begin_layout Standard
- There are generally two kinds of peaks that can be identified: narrow peaks
- and broadly enriched regions.
- Proteins that bind specific sites in the genome (such as many transcription
- factors) typically show most of their
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage at these specific sites and very little coverage anywhere
- else.
- Because the footprint of the protein is consistent wherever it binds, each
- peak has a consistent width, typically tens to hundreds of base pairs,
- representing the length of DNA that it binds to.
- Algorithms like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- exploit this pattern to identify specific loci at which such
- \begin_inset Quotes eld
- \end_inset
- narrow peaks
- \begin_inset Quotes erd
- \end_inset
- occur by looking for the characteristic peak shape in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- coverage rising above the surrounding background coverage
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zhang2008"
- literal "false"
- \end_inset
- .
- In contrast, some proteins, chief among them histones, do not bind only
- at a small number of specific sites, but rather bind potentially almost
- everywhere in the entire genome.
- When looking at histone marks, adjacent histones tend to be similarly marked,
- and a given mark may be present on an arbitrary number of consecutive histones
- along the genome.
- Hence, there is no consistent
- \begin_inset Quotes eld
- \end_inset
- footprint size
- \begin_inset Quotes erd
- \end_inset
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- peaks based on histone marks, and peaks typically span many histones.
- Hence, typical peaks span many hundreds or even thousands of base pairs.
- Instead of identifying specific loci of strong enrichment, algorithms like
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- assume that peaks are represented in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data by modest enrichment above background occurring across broad regions,
- and they attempt to identify the extent of those regions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zang2009"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Regardless of the type of peak identified, it is important to identify peaks
- that occur consistently across biological replicates.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ENCODE
- \end_layout
- \end_inset
- project has developed a method called
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- for this purpose
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2006"
- literal "false"
- \end_inset
- .
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- is defined as the probability that a peak identified in one biological
- replicate will
- \emph on
- not
- \emph default
- also be identified in a second replicate.
- Where the more familiar false discovery rate measures the degree of corresponde
- nce between a data-derived ranked list and the (unknown) true list of significan
- t features,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- instead measures the degree of correspondence between two ranked lists
- derived from different data.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- assumes that the highest-ranked features are
- \begin_inset Quotes eld
- \end_inset
- signal
- \begin_inset Quotes erd
- \end_inset
- peaks that tend to be listed in the same order in both lists, while the
- lowest-ranked features are essentially noise peaks, listed in random order
- with no correspondence between the lists.
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- attempts to locate the
- \begin_inset Quotes eld
- \end_inset
- crossover point
- \begin_inset Quotes erd
- \end_inset
- between the signal and the noise by determining how far down the list the
- rank consistency breaks down into randomness (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Example-IDR"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/IDR/D4659vsD5053_epic-PAGE1-CROP-RASTER.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example IDR consistency plot.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Example-IDR"
- \end_inset
- \series bold
- Example IDR consistency plot.
- \series default
- Peak calls in two replicates are ranked from highest score (top and right)
- to lowest score (bottom and left).
- IDR identifies reproducible peaks, which rank highly in both replicates
- (light blue), separating them from
- \begin_inset Quotes eld
- \end_inset
- noise
- \begin_inset Quotes erd
- \end_inset
- peak calls whose ranking is not reproducible between replicates (dark blue).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In addition to other considerations, if called peaks are to be used as regions
- of interest for differential abundance analysis, then care must be taken
- to call peaks in a way that is blind to differential abundance between
- experimental conditions, or else the statistical significance calculations
- for differential abundance will overstate their confidence in the results.
- The
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- csaw
- \end_layout
- \end_inset
- package provides guidelines for calling peaks in this way: peaks are called
- based on a combination of all
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads from all experimental conditions, so that the identified peaks are
- based on the average abundance across all conditions, which is independent
- of any differential abundance between conditions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lun2015a"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Normalization of high-throughput data is non-trivial and application-dependent
- \end_layout
- \begin_layout Standard
- High-throughput data sets invariably require some kind of normalization
- before further analysis can be conducted.
- In general, the goal of normalization is to remove effects in the data
- that are caused by technical factors that have nothing to do with the biology
- being studied.
- \end_layout
- \begin_layout Standard
- For Affymetrix expression arrays, the standard normalization algorithm used
- in most analyses is
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Irizarry2003a"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- is designed with the assumption that some fraction of probes on each array
- will be artifactual and takes advantage of the fact that each gene is represent
- ed by multiple probes by implementing normalization and summarization steps
- that are robust against outlier probes.
- However,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- uses the probe intensities of all arrays in the data set in the normalization
- of each individual array, meaning that the normalized expression values
- in each array depend on every array in the data set, and will necessarily
- change each time an array is added or removed from the data set.
- If this is undesirable,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- implements a variant of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- where the relevant distributional parameters are learned from a large reference
- set of diverse public array data sets and then
- \begin_inset Quotes eld
- \end_inset
- frozen
- \begin_inset Quotes erd
- \end_inset
- , so that each array is effectively normalized against this frozen reference
- set rather than the other arrays in the data set under study
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Other available array normalization methods considered include dChip,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- , and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2001,Pelz2008,Piccolo2012"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- In contrast,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- HTS
- \end_layout
- \end_inset
- data present very different normalization challenges.
- The simplest case is
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in which read counts are obtained for a set of gene annotations, yielding
- a matrix of counts with rows representing genes and columns representing
- samples.
- Because
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- approximates a process of sampling from a population with replacement,
- each gene's count is only interpretable as a fraction of the total reads
- for that sample.
- For that reason,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- abundances are often reported as
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CPM
- \end_layout
- \end_inset
- .
- Furthermore, if the abundance of a single gene increases, then in order
- for its fraction of the total reads to increase, all other genes' fractions
- must decrease to accommodate it.
- This effect is known as composition bias, and it is an artifact of the
- read sampling process that has nothing to do with the biology of the samples
- and must therefore be normalized out.
- The most commonly used methods to normalize for composition bias in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data seek to equalize the average gene abundance across samples, under
- the assumption that the average gene is likely not changing
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010,Anders2010"
- literal "false"
- \end_inset
- .
- The effect of such normalizations is to center the distribution of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- at zero.
- Note that if a true global difference in gene expression is present in
- the data, this difference will be normalized out as well, since it is indisting
- uishable from composition bias.
- In other words,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- cannot measure absolute gene expression, only gene expression as a fraction
- of total reads.
- \end_layout
- \begin_layout Standard
- In
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data, normalization is not as straightforward.
- The
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- csaw
- \end_layout
- \end_inset
- package implements several different normalization strategies and provides
- guidance on when to use each one
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lun2015a"
- literal "false"
- \end_inset
- .
- Briefly, a typical
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- sample has a bimodal distribution of read counts: a low-abundance mode
- representing background regions and a high-abundance mode representing
- signal regions.
- This offers two mutually incompatible normalization strategies: equalizing
- background coverage or equalizing signal coverage (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:chipseq-norm-example"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- If the experiment is well controlled and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP
- \end_layout
- \end_inset
- efficiency is known to be consistent across all samples, then normalizing
- the background coverage to be equal across all samples is a reasonable
- strategy.
- If this is not a safe assumption, then the preferred strategy is to normalize
- the signal regions in a way similar to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data by assuming that the average signal region is not changing abundance
- between samples.
- Beyond this, if a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- experiment has a more complicated structure that doesn't show the typical
- bimodal count distribution, it may be necessary to implement a normalization
- as a smooth function of abundance.
- However, this strategy makes a much stronger assumption about the data:
- that the average
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- is zero across all abundance levels.
- Hence, the simpler scaling normalization based on background or signal
- regions are generally preferred whenever possible.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-sample-MAplot-bins-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example MA plot of ChIP-seq read counts in 10kb bins for two arbitrary samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:chipseq-norm-example"
- \end_inset
- \series bold
- Example MA plot of ChIP-seq read counts in 10kb bins for two arbitrary samples.
-
- \series default
- The distribution of bins is bimodal along the x axis (average abundance),
- with the left mode representing
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- regions with no protein binding and the right mode representing bound regions.
- The modes are also separated on the y axis (logFC), motivating two conflicting
- normalization strategies: background normalization (red) and signal normalizati
- on (blue and green, two similar signal normalizations).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- ComBat and SVA for correction of known and unknown batch effects
- \end_layout
- \begin_layout Standard
- In addition to well-understood effects that can be easily normalized out,
- a data set often contains confounding biological effects that must be accounted
- for in the modeling step.
- For instance, in an experiment with pre-treatment and post-treatment samples
- of cells from several different donors, donor variability represents a
- known batch effect.
- The most straightforward correction for known batches is to estimate the
- mean for each batch independently and subtract out the differences, so
- that all batches have identical means for each feature.
- However, as with variance estimation, estimating the differences in batch
- means is not necessarily robust at the feature level, so the ComBat method
- adds empirical Bayes squeezing of the batch mean differences toward a common
- value, analogous to
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's empirical Bayes squeezing of feature variance estimates
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Johnson2007"
- literal "false"
- \end_inset
- .
- Effectively, ComBat assumes that modest differences between batch means
- are real batch effects, but extreme differences between batch means are
- more likely to be the result of outlier observations that happen to line
- up with the batches rather than a genuine batch effect.
- The result is a batch correction that is more robust against outliers than
- simple subtraction of mean differences.
- \end_layout
- \begin_layout Standard
- In some data sets, unknown batch effects may be present due to inherent
- variability in the data, either caused by technical or biological effects.
- Examples of unknown batch effects include variations in enrichment efficiency
- between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- samples, variations in populations of different cell types, and the effects
- of uncontrolled environmental factors on gene expression in humans or live
- animals.
- In an ordinary linear model context, unknown batch effects cannot be inferred
- and must be treated as random noise.
- However, in high-throughput experiments, once again information can be
- shared across features to identify patterns of un-modeled variation that
- are repeated in many features.
- One attractive strategy would be to perform
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVD
- \end_layout
- \end_inset
- on the matrix of linear model residuals (which contain all the un-modeled
- variation in the data) and take the first few singular vectors as batch
- effects.
- While this can be effective, it makes the unreasonable assumption that
- all batch effects are completely uncorrelated with any of the effects being
- modeled.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- starts with this approach, but takes some additional steps to identify
- batch effects in the full data that are both highly correlated with the
- singular vectors in the residuals and least correlated with the effects
- of interest
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- .
- Since the final batch effects are estimated from the full data, moderate
- correlations between the batch effects and effects of interest are allowed,
- which gives
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- much more freedom to estimate the true extent of the batch effects compared
- to simple residual
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVD
- \end_layout
- \end_inset
- .
- Once the surrogate variables are estimated, they can be included as coefficient
- s in the linear model in a similar fashion to known batch effects in order
- to subtract out their effects on each feature's abundance.
- \end_layout
- \begin_layout Subsection
- Interpreting p-value distributions and estimating false discovery rates
- \end_layout
- \begin_layout Standard
- When testing thousands of genes for differential expression or performing
- thousands of statistical tests for other kinds of genomic data, the result
- is thousands of p-values.
- By construction, p-values have a
- \begin_inset Formula $\mathrm{Uniform}(0,1)$
- \end_inset
- distribution under the null hypothesis.
- This means that if all null hypotheses are true in a large number
- \begin_inset Formula $N$
- \end_inset
- of tests, then for any significance threshold
- \begin_inset Formula $T$
- \end_inset
- , approximately
- \begin_inset Formula $N*T$
- \end_inset
- p-values would be called
- \begin_inset Quotes eld
- \end_inset
- significant
- \begin_inset Quotes erd
- \end_inset
- at that threshold even though the null hypotheses are all true.
- These are called false discoveries.
- \end_layout
- \begin_layout Standard
- When only a fraction of null hypotheses are true, the p-value distribution
- will be a mixture of a uniform component representing the null hypotheses
- that are true and a non-uniform component representing the null hypotheses
- that are not true (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Example-pval-hist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The fraction belonging to the uniform component is referred to as
- \begin_inset Formula $\pi_{0}$
- \end_inset
- , which ranges from 1 (all null hypotheses true) to 0 (all null hypotheses
- false).
- Furthermore, the non-uniform component must be biased toward zero, since
- any evidence against the null hypothesis pushes the p-value for a test
- toward zero.
- We can exploit this fact to estimate the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- for any significance threshold by estimating the degree to which the density
- of p-values left of that threshold exceeds what would be expected for a
- uniform distribution.
- In genomics, the most commonly used
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- estimation method, and the one used in this work, is that of
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- glsdisp{BH}{Benjamini and Hochberg}
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- This is a conservative method that effectively assumes
- \begin_inset Formula $\pi_{0}=1$
- \end_inset
- .
- Hence it gives an estimated upper bound for the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- at any significance threshold, rather than a point estimate.
-
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Intro/med-pval-hist-colored-CROP.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example p-value histogram.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Example-pval-hist"
- \end_inset
- \series bold
- Example p-value histogram.
-
- \series default
- The distribution of p-values from a large number of independent tests (such
- as differential expression tests for each gene in the genome) is a mixture
- of a uniform component representing the null hypotheses that are true (blue
- shading) and a zero-biased component representing the null hypotheses that
- are false (red shading).
- The FDR for any column in the histogram is the fraction of that column
- that is blue.
- The line
- \begin_inset Formula $y=\pi_{0}$
- \end_inset
- represents the theoretical uniform component of this p-value distribution,
- while the line
- \begin_inset Formula $y=1$
- \end_inset
- represents the uniform component when all null hypotheses are true.
- Note that in real data, the true status of each hypothesis is unknown,
- so only the overall shape of the distribution is known.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- We can also estimate
- \begin_inset Formula $\pi_{0}$
- \end_inset
- for the entire distribution of p-values, which can give an idea of the
- overall signal size in the data without setting any significance threshold
- or making any decisions about which specific null hypotheses to reject.
- As
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- estimation, there are many methods proposed for estimating
- \begin_inset Formula $\pi_{0}$
- \end_inset
- .
- The one used in this work is the Phipson method of averaging local
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- .
- Once
- \begin_inset Formula $\pi_{0}$
- \end_inset
- is estimated, the number of null hypotheses that are false can be estimated
- as
- \begin_inset Formula $(1-\pi_{0})*N$
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Conversely, a p-value distribution that is neither uniform nor zero-biased
- is evidence of a modeling failure.
- Such a distribution would imply that there is less than zero evidence against
- the null hypothesis, which is not possible (in a frequentist setting).
- Attempting to estimate
- \begin_inset Formula $\pi_{0}$
- \end_inset
- from such a distribution would yield an estimate greater than 1, a nonsensical
- result.
- The usual cause of a poorly-behaving p-value distribution is a model assumption
- that is violated by the data, such as assuming equal variance between groups
- (homoskedasticity) when the variance of each group is not equal (heteroskedasti
- city) or failing to model a strong confounding batch effect.
- In particular, such a p-value distribution is
- \emph on
- not
- \emph default
- consistent with a simple lack of signal in the data, as this should result
- in a uniform distribution.
- Hence, observing such a p-value distribution should prompt a search for
- violated model assumptions.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Subsection
- Factor analysis: PCA, PCoA, MOFA
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Not sure if this merits a subsection here.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Batch-corrected
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- is informative, but careful application is required to avoid bias
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Structure of the thesis
- \end_layout
- \begin_layout Standard
- This thesis presents 3 instances of using high-throughput genomic and epigenomic
- assays to investigate hypotheses or solve problems relating to the study
- of transplant rejection.
- In Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:CD4-ChIP-seq"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- are used to investigate the dynamics of promoter histone methylation as
- it relates to gene expression in T-cell activation and memory.
- Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Improving-array-based-diagnostic"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- looks at several array-based assays with the potential to diagnose transplant
- rejection and shows that analyses of this array data are greatly improved
- by paying careful attention to normalization and preprocessing.
- Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Globin-blocking-cyno"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- presents a custom method for improving
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of non-human primate blood samples by preventing reverse transcription
- of unwanted globin transcripts.
- Finally, Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Conclusions"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- summarizes the overarching lessons and strategies learned through these
- analyses that can be applied to all future analyses of high-throughput
- genomic assays.
- \end_layout
- \begin_layout Chapter
- \begin_inset CommandInset label
- LatexCommand label
- name "chap:CD4-ChIP-seq"
- \end_inset
- Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
- in naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation
- \end_layout
- \begin_layout Standard
- \size large
- Ryan C.
- Thompson, Sarah A.
- Lamere, Daniel R.
- Salomon
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- This causes all abbreviations to be reintroduced.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Introduction
- \end_layout
- \begin_layout Standard
- CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells are central to all adaptive immune responses, as well as immune
- memory
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- After an infection is cleared, a subset of the naïve CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells that responded to that infection differentiate into memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, which are responsible for responding to the same pathogen in the
- future.
- Memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells are functionally distinct, able to respond to an infection more
- quickly and without the co-stimulation required by naïve CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells.
- However, the molecular mechanisms underlying this functional distinction
- are not well-understood.
- Epigenetic regulation via histone modification is thought to play an important
- role, but while many studies have looked at static snapshots of histone
- methylation in T-cells, few studies have looked at the dynamics of histone
- regulation after T-cell activation, nor the differences in histone methylation
- between naïve and memory T-cells.
- H3K4me2, H3K4me3 and H3K27me3 are three histone marks thought to be major
- epigenetic regulators of gene expression.
- The goal of the present study is to investigate the role of these histone
- marks in CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation kinetics and memory differentiation.
- In static snapshots, H3K4me2 and H3K4me3 are often observed in the promoters
- of highly transcribed genes, while H3K27me3 is more often observed in promoters
- of inactive genes with little to no transcription occurring.
- As a result, the two H3K4 marks have been characterized as
- \begin_inset Quotes eld
- \end_inset
- activating
- \begin_inset Quotes erd
- \end_inset
- marks, while H3K27me3 has been characterized as
- \begin_inset Quotes eld
- \end_inset
- deactivating
- \begin_inset Quotes erd
- \end_inset
- .
- Despite these characterizations, the actual causal relationship between
- these histone modifications and gene transcription is complex and likely
- involves positive and negative feedback loops between the two.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Standard
- In order to investigate the relationship between gene expression and these
- histone modifications in the context of naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation, a previously published data set of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data was re-analyzed using up-to-date methods designed to address the specific
- analysis challenges posed by this data set.
- The data set contains naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell samples in a time course before and after activation.
- Like the original analysis, this analysis looks at the dynamics of these
- histone marks and compares them to gene expression dynamics at the same
- time points during activation, as well as compares them between naïve and
- memory cells, in hope of discovering evidence of new mechanistic details
- in the interplay between them.
- The original analysis of this data treated each gene promoter as a monolithic
- unit and mostly assumed that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads or peaks occurring anywhere within a promoter were equivalent, regardless
- of where they occurred relative to the gene structure.
- For an initial analysis of the data, this was a necessary simplifying assumptio
- n.
- The current analysis aims to relax this assumption, first by directly analyzing
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- peaks for differential modification, and second by taking a more granular
- look at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage within promoter regions to ask whether the location of histone
- modifications relative to the gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- is an important factor, as opposed to simple proximity.
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Standard
- A reproducible workflow was written to analyze the raw
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data from previous studies (
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
- accession number
- \begin_inset CommandInset href
- LatexCommand href
- name "GSE73214"
- target "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE73214"
- literal "false"
- \end_inset
- )
- \begin_inset CommandInset citation
- LatexCommand cite
- key "gh-cd4-csaw,LaMere2015,LaMere2016,LaMere2017"
- literal "true"
- \end_inset
- .
- Briefly, this data consists of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- from CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells from 4 donors.
- From each donor, naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells were isolated separately.
- Then cultures of both cells were activated with CD3/CD28 beads, and samples
- were taken at 4 time points: Day 0 (pre-activation), Day 1 (early activation),
- Day 5 (peak activation), and Day 14 (post-activation).
- For each combination of cell type and time point, RNA was isolated and
- sequenced, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- was performed for each of 3 histone marks: H3K4me2, H3K4me3, and H3K27me3.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- input DNA was also sequenced for each sample.
- The result was 32 samples for each assay.
- \end_layout
- \begin_layout Subsection
- RNA-seq differential expression analysis
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- STAR quantification, Entrez vs Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon+Shoal quantification, Entrez vs Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- STAR vs HISAT2 quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon vs STAR quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon vs Kallisto quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon+Shoal vs Salmon alone, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-norm-comp"
- \end_inset
- RNA-seq comparisons
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Sequence reads were retrieved from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SRA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leinonen2011"
- literal "false"
- \end_inset
- .
- Five different alignment and quantification methods were tested for the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Dobin2012,Kim2019,Liao2014,Pimentel2016,Patro2017,gh-shoal,gh-hg38-ref"
- literal "false"
- \end_inset
- .
- Each quantification was tested with both Ensembl transcripts and GENCODE
- known gene annotations
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zerbino2018,Harrow2012"
- literal "false"
- \end_inset
- .
- Comparisons of downstream results from each combination of quantification
- method and reference revealed that all quantifications gave broadly similar
- results for most genes, with non being obviously superior.
- Salmon quantification with regularization by shoal with the Ensembl annotation
- was chosen as the method theoretically most likely to partially mitigate
- some of the batch effect in the data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Patro2017,gh-shoal"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Due to an error in sample preparation, the RNA from the samples for days
- 0 and 5 were sequenced using a different kit than those for days 1 and
- 14.
- This induced a substantial batch effect in the data due to differences
- in sequencing biases between the two kits, and this batch effect is unfortunate
- ly confounded with the time point variable (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-no-batchsub"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- To do the best possible analysis with this data, this batch effect was
- subtracted out from the data using ComBat
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Johnson2007"
- literal "false"
- \end_inset
- , ignoring the time point variable due to the confounding with the batch
- variable.
- The result is a marked improvement, but the unavoidable confounding with
- time point means that certain real patterns of gene expression will be
- indistinguishable from the batch effect and subtracted out as a result.
- Specifically, any
- \begin_inset Quotes eld
- \end_inset
- zig-zag
- \begin_inset Quotes erd
- \end_inset
- pattern, such as a gene whose expression goes up on day 1, down on day
- 5, and back up again on day 14, will be attenuated or eliminated entirely.
- In the context of a T-cell activation time course, it is unlikely that
- many genes of interest will follow such an expression pattern, so this
- loss was deemed an acceptable cost for correcting the batch effect.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
- lyxscale 25
- width 75col%
- groupId rna-pca-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-no-batchsub"
- \end_inset
- Before batch correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
- lyxscale 25
- width 75col%
- groupId rna-pca-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-ComBat-batchsub"
- \end_inset
- After batch correction with ComBat
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plots of RNA-seq data showing effect of batch correction.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA"
- \end_inset
- \series bold
- PCoA plots of RNA-seq data showing effect of batch correction.
-
- \series default
- The uncorrected data (a) shows a clear separation between samples from the
- two batches (red and blue) dominating the first principal coordinate.
- After correction with ComBat (b), the two batches now have approximately
- the same center, and the first two principal coordinates both show separation
- between experimental conditions rather than batches.
- (Note that time points are shown in hours rather than days in these plots.)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- However, removing the systematic component of the batch effect still leaves
- the noise component.
- The gene quantifications from the first batch are substantially noisier
- than those in the second batch.
- This analysis corrected for this by using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's sample weighting method to assign lower weights to the noisy samples
- of batch 1 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-seq-weights-vs-covars"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- )
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006,Liu2015"
- literal "false"
- \end_inset
- .
- The resulting analysis gives an accurate assessment of statistical significance
- for all comparisons, which unfortunately means a loss of statistical power
- for comparisons involving samples in batch 1.
- \end_layout
- \begin_layout Standard
- In any case, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- counts were first normalized using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TMM
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010"
- literal "false"
- \end_inset
- , converted to normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- with quality weights using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- voomWithQualityWeights
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014,Liu2015"
- literal "false"
- \end_inset
- , and batch-corrected at this point using ComBat.
- A linear model was fit to the batch-corrected, quality-weighted data for
- each gene using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- , and each gene was tested for differential expression using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's empirical Bayes moderated
- \begin_inset Formula $t$
- \end_inset
- -test
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005,Law2014,Phipson2016"
- literal "false"
- \end_inset
- .
- P-values were corrected for multiple testing using the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BH
- \end_layout
- \end_inset
- procedure for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-nobcv-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- RNA-seq sample weights, grouped by experimental and technical covariates.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-seq-weights-vs-covars"
- \end_inset
- \series bold
- RNA-seq sample weights, grouped by experimental and technical covariates.
-
- \series default
- Inverse variance weights were estimated for each sample using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- arrayWeights
- \end_layout
- \end_inset
- function (part of
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- voomWithQualityWeights
- \end_layout
- \end_inset
- ).
- The samples were grouped by each known covariate and the distribution of
- weights was plotted for each group.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- ChIP-seq analyses
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Be consistent about use of
- \begin_inset Quotes eld
- \end_inset
- differential binding
- \begin_inset Quotes erd
- \end_inset
- vs
- \begin_inset Quotes eld
- \end_inset
- differential modification
- \begin_inset Quotes erd
- \end_inset
- throughout this chapter.
- The latter is usually preferred.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Sequence reads were retrieved from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SRA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leinonen2011"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- (and input) reads were aligned to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRCh38
- \end_layout
- \end_inset
- genome assembly using Bowtie 2
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Langmead2012,Schneider2017,gh-hg38-ref"
- literal "false"
- \end_inset
- .
- Artifact regions were annotated using a custom implementation of the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- GreyListChIP
- \end_layout
- \end_inset
- algorithm, and these
- \begin_inset Quotes eld
- \end_inset
- greylists
- \begin_inset Quotes erd
- \end_inset
- were merged with the published
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ENCODE
- \end_layout
- \end_inset
- blacklists
- \begin_inset CommandInset citation
- LatexCommand cite
- key "greylistchip,Dunham2012,Amemiya2019,gh-cd4-csaw"
- literal "false"
- \end_inset
- .
- Any read or called peak overlapping one of these regions was regarded as
- artifactual and excluded from downstream analyses.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-master"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the improvement after blacklisting in the strand cross-correlation
- plots, a common quality control plot for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kharchenko2008,Lun2015a"
- literal "false"
- \end_inset
- .
- Peaks were called using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- epic
- \end_layout
- \end_inset
- , an implementation of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- algorithm
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zang2009,gh-epic"
- literal "false"
- \end_inset
- .
- Peaks were also called separately using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- , but
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- was determined to be a poor fit for the data, and these peak calls are
- not used in any further analyses
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zhang2008"
- literal "false"
- \end_inset
- .
- Consensus peaks were determined by applying the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- framework
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2006,gh-idr"
- literal "false"
- \end_inset
- to find peaks consistently called in the same locations across all 4 donors.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/csaw/CCF-plots-noBL-PAGE2-CROP.pdf
- lyxscale 75
- width 47col%
- groupId ccf-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-without-blacklist"
- \end_inset
- Cross-correlation plots without removing blacklisted reads.
-
- \series default
- Without blacklisting, many artifactual peaks are visible in the cross-correlatio
- ns of the ChIP-seq samples, and the peak at the true fragment size (147
- \begin_inset space ~
- \end_inset
- bp) is frequently overshadowed by the artifactual peak at the read length
- (100
- \begin_inset space ~
- \end_inset
- bp).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/csaw/CCF-plots-PAGE2-CROP.pdf
- lyxscale 75
- width 47col%
- groupId ccf-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-with-blacklist"
- \end_inset
- Cross-correlation plots with blacklisted reads removed.
- \series default
- After blacklisting, most ChIP-seq samples have clean-looking periodic cross-cor
- relation plots, with the largest peak around 147
- \begin_inset space ~
- \end_inset
- bp, the expected size for a fragment of DNA from a single nucleosome, and
- little to no peak at the read length, 100
- \begin_inset space ~
- \end_inset
- bp.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Strand cross-correlation plots for ChIP-seq data, before and after blacklisting.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-master"
- \end_inset
- \series bold
- Strand cross-correlation plots for ChIP-seq data, before and after blacklisting.
-
- \series default
- The number of reads starting at each position in the genome was counted
- separately for the plus and minus strands, and then the correlation coefficient
- between the read start counts for both strands (cross-correlation) was
- computed after shifting the plus strand counts forward by a specified interval
- (the delay).
- This was repeated for every delay value from 0 to 1000, and the cross-correlati
- on values were plotted as a function of the delay.
- In good quality samples, cross-correlation is maximized when the delay
- equals the fragment size; in poor quality samples, cross-correlation is
- often maximized when the delay equals the read length, an artifactual peak
- whose cause is not fully understood.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Promoters were defined by computing the distance from each annotated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- to the nearest called peak and examining the distribution of distances,
- observing that peaks for each histone mark were enriched within a certain
- distance of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- (Note: this analysis was performed using the original peak calls and expression
- values from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- .) For H3K4me2 and H3K4me3, this distance was about 1
- \begin_inset space ~
- \end_inset
- kbp, while for H3K27me3 it was 2.5
- \begin_inset space ~
- \end_inset
- kbp.
- These distances were used as an
- \begin_inset Quotes eld
- \end_inset
- effective promoter radius
- \begin_inset Quotes erd
- \end_inset
- for each mark.
- The promoter region for each gene was defined as the region of the genome
- within this distance upstream or downstream of the gene's annotated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- For genes with multiple annotated
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , a promoter region was defined for each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- individually, and any promoters that overlapped (due to multiple
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- being closer than 2 times the radius) were merged into one large promoter.
- Thus, some genes had multiple promoters defined, which were each analyzed
- separately for differential modification.
- \end_layout
- \begin_layout Standard
- Reads in promoters, peaks, and sliding windows across the genome were counted
- and normalized using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- csaw
- \end_layout
- \end_inset
- and analyzed for differential modification using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lun2014,Lun2015a,Lund2012,Phipson2016"
- literal "false"
- \end_inset
- .
- Unobserved confounding factors in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data were corrected using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007,Leek2014"
- literal "false"
- \end_inset
- .
- Principal coordinate plots of the promoter count data for each histone
- mark before and after subtracting surrogate variable effects are shown
- in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-ChIP"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-bad"
- \end_inset
- H3K4me2, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-good"
- \end_inset
- H3K4me2, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-bad"
- \end_inset
- H3K4me3, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-good"
- \end_inset
- H3K4me3, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-bad"
- \end_inset
- H3K27me3, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-good"
- \end_inset
- H3K27me3, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status collapsed
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plots of ChIP-seq sliding window data, before and after subtracting
- surrogate variables.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-ChIP"
- \end_inset
- \series bold
- PCoA plots of ChIP-seq sliding window data, before and after subtracting
- surrogate variables (SVs).
-
- \series default
- For each histone mark, a PCoA plot of the first 2 principal coordinates
- was created before and after subtraction of SV effects.
- Time points are shown by color and cell type by shape, and samples from
- the same time point and cell type are enclosed in a shaded area to aid
- in visial recognition (this shaded area has no meaning on the plot).
- Samples of the same cell type from the same donor are connected with a
- line in time point order, showing the
- \begin_inset Quotes eld
- \end_inset
- trajectory
- \begin_inset Quotes erd
- \end_inset
- of each donor's samples over time.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate whether the location of a peak within the promoter region
- was important,
- \begin_inset Quotes eld
- \end_inset
- relative coverage profiles
- \begin_inset Quotes erd
- \end_inset
- were generated.
- First, 500-bp sliding windows were tiled around each annotated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- : one window centered on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- itself, and 10 windows each upstream and downstream, thus covering a 10.5-kb
- region centered on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- with 21 windows.
- Reads in each window for each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- were counted in each sample, and the counts were normalized and converted
- to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- as in the differential modification analysis.
- Then, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values within each promoter were normalized to an average of zero, such
- that each window's normalized abundance now represents the relative read
- depth of that window compared to all other windows in the same promoter.
- The normalized abundance values for each window in a promoter are collectively
- referred to as that promoter's
- \begin_inset Quotes eld
- \end_inset
- relative coverage profile
- \begin_inset Quotes erd
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- MOFA analysis of cross-dataset variation patterns
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- was run on all the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- windows overlapping consensus peaks for each histone mark, as well as the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, in order to identify patterns of coordinated variation across all
- data sets
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Argelaguet2018"
- literal "false"
- \end_inset
- .
- The results are summarized in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MOFA-master"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (Capital, pl)
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 1, 4, and 5 were determined to explain the most variation consistently
- across all data sets (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-varexplained"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), and scatter plots of these factors show that they also correlate best
- with the experimental factors (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 2 captures the batch effect in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data.
- Removing the effect of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 2 using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- theoretically yields a batch correction that does not depend on knowing
- the experimental factors.
- When this was attempted, the resulting batch correction was comparable
- to ComBat (see Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-ComBat-batchsub"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), indicating that the ComBat-based batch correction has little room for
- improvement given the problems with the data set.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-varExplaiend-matrix-CROP.png
- lyxscale 25
- width 45col%
- groupId mofa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-varexplained"
- \end_inset
- Variance explained in each data set by each latent factor estimated by MOFA.
- \series default
- For each LF learned by MOFA, the variance explained by that factor in each
- data set (
- \begin_inset Quotes eld
- \end_inset
- view
- \begin_inset Quotes erd
- \end_inset
- ) is shown by the shading of the cells in the lower section.
- The upper section shows the total fraction of each data set's variance
- that is explained by all LFs combined.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-LF-scatter-small.png
- lyxscale 25
- width 45col%
- groupId mofa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-lf-scatter"
- \end_inset
- Scatter plots of specific pairs of MOFA latent factors.
- \series default
- LFs 1, 4, and 5 explain substantial variation in all data sets, so they
- were plotted against each other in order to reveal patterns of variation
- that are shared across all data sets.
- These plots can be interpreted similarly to PCA and PCoA plots.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font a bit too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- MOFA latent factors identify shared patterns of variation.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MOFA-master"
- \end_inset
- \series bold
- MOFA latent factors identify shared patterns of variation.
-
- \series default
- MOFA was used to estimate latent factors (LFs) that explain substantial
- variation in the RNA-seq data and the ChIP-seq data (a).
- Then specific LFs of interest were selected and plotted (b).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-batch-correct-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-batchsub"
- \end_inset
- Result of RNA-seq batch-correction using MOFA latent factors
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Focus on what hypotheses were tested, then select figures that show how
- those hypotheses were tested, even if the result is a negative.
- Not every interesting result needs to be in here.
- Chapter should tell a story.
-
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Interpretation of RNA-seq analysis is limited by a major confounding factor
- \end_layout
- \begin_layout Standard
- Genes called as present in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data were tested for differential expression between all time points and
- cell types.
- The counts of differentially expressed genes are shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Estimated-and-detected-rnaseq"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Notably, all the results for Day 0 and Day 5 have substantially fewer genes
- called differentially expressed than any of the results for other time
- points.
- This is an unfortunate result of the difference in sample quality between
- the two batches of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data.
- All the samples in Batch 1, which includes all the samples from Days 0
- and 5, have substantially more variability than the samples in Batch 2,
- which includes the other time points.
- This is reflected in the substantially higher weights assigned to Batch
- 2 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-seq-weights-vs-covars"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
-
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="11" columns="3">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Est.
- non-null
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \begin_inset Formula $\mathrm{FDR}\le10\%$
- \end_inset
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Naïve Day 0 vs Day 1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 5992
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1613
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Naïve Day 0 vs Day 5
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 3038
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 32
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Naïve Day 0 vs Day 14
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1870
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 190
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Memory Day 0 vs Day 1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 3195
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 411
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Memory Day 0 vs Day 5
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2688
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Memory Day 0 vs Day 14
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1911
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 227
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 0 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 1 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9167
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 5532
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 5 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 14 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6446
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2319
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Estimated and detected differentially expressed genes.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Estimated-and-detected-rnaseq"
- \end_inset
- \series bold
- Estimated and detected differentially expressed genes.
- \series default
-
- \begin_inset Quotes eld
- \end_inset
- Test
- \begin_inset Quotes erd
- \end_inset
- : Which sample groups were compared;
- \begin_inset Quotes eld
- \end_inset
- Est non-null
- \begin_inset Quotes erd
- \end_inset
- : Estimated number of differentially expressed genes, using the method of
- averaging local FDR values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- ;
- \begin_inset Quotes eld
- \end_inset
- \begin_inset Formula $\mathrm{FDR}\le10\%$
- \end_inset
- \begin_inset Quotes erd
- \end_inset
- : Number of significantly differentially expressed genes at an FDR threshold
- of 10%.
- The total number of genes tested was 16707.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- If float lost issues, reposition randomly until success.
- \end_layout
- \end_inset
- The batch effect has both a systematic component and a random noise component.
- While the systematic component was subtracted out using ComBat (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), no such correction is possible for the noise component: Batch 1 simply
- has substantially more random noise in it, which reduces the statistical
- power for any differential expression tests involving samples in that batch.
-
- \end_layout
- \begin_layout Standard
- Despite the difficulty in detecting specific differentially expressed genes,
- there is still evidence that differential expression is present for these
- time points.
- In Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rna-pca-final"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , there is a clear separation between naïve and memory samples at Day 0,
- despite the fact that only 2 genes were significantly differentially expressed
- for this comparison.
- Similarly, the small numbers of genes detected for the Day 0 vs Day 5 compariso
- ns do not reflect the large separation between these time points in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rna-pca-final"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- In addition, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- plots in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This suggests that there is indeed a differential expression signal present
- in the data for these comparisons, but the large variability in the Batch
- 1 samples obfuscates this signal at the individual gene level.
- As a result, it is impossible to make any meaningful statements about the
-
- \begin_inset Quotes eld
- \end_inset
- size
- \begin_inset Quotes erd
- \end_inset
- of the gene signature for any time point, since the number of significant
- genes as well as the estimated number of differentially expressed genes
- depends so strongly on the variations in sample quality in addition to
- the size of the differential expression signal in the data.
- Gene-set enrichment analyses are similarly impractical.
- However, analyses looking at genome-wide patterns of expression are still
- practical.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-final-12-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plot of RNA-seq samples after ComBat batch correction.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:rna-pca-final"
- \end_inset
- \series bold
- PCoA plot of RNA-seq samples after ComBat batch correction.
-
- \series default
- Each point represents an individual sample.
- Samples with the same combination of cell type and time point are encircled
- with a shaded region to aid in visual identification of the sample groups.
- Samples of the same cell type from the same donor are connected by lines
- to indicate the
- \begin_inset Quotes eld
- \end_inset
- trajectory
- \begin_inset Quotes erd
- \end_inset
- of each donor's cells over time in PCoA space.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 methylation occur in broad regions and are enriched near
- promoters
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Also get
- \emph on
- median
- \emph default
- peak width and maybe other quantiles (25%, 75%)
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Histone Mark
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- # Peaks
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Mean peak width
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- genome coverage
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- FRiP
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 14,965
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 3,970
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1.92%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 14.2%
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6,163
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2,946
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.588%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6.57%
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18,139
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18,967
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 11.1%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 22.5%
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Summary of peak-calling statistics.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:peak-calling-summary"
- \end_inset
- \series bold
- Summary of peak-calling statistics.
-
- \series default
- For each histone mark, the number of peaks called using SICER at an IDR
- threshold of 0.05, the mean width of those peaks, the fraction of the genome
- covered by peaks, and the fraction of reads in peaks (FRiP).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:peak-calling-summary"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- gives a summary of the peak calling statistics for each histone mark.
- Consistent with previous observations, all 3 histone marks occur in broad
- regions spanning many consecutive nucleosomes, rather than in sharp peaks
- as would be expected for a transcription factor or other molecule that
- binds to specific sites.
- This conclusion is further supported by Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-with-blacklist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , in which a clear nucleosome-sized periodicity is visible in the cross-correlat
- ion value for each sample, indicating that each time a given mark is present
- on one histone, it is also likely to be found on adjacent histones as well.
- H3K27me3 enrichment in particular is substantially more broad than either
- H3K4 mark, with a mean peak width of almost 19,000 bp.
- This is also reflected in the periodicity observed in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-with-blacklist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which remains strong much farther out for H3K27me3 than the other marks,
- showing H3K27me3 especially tends to be found on long runs of consecutive
- histones.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- All 3 histone marks tend to occur more often near promoter regions, as shown
- in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The majority of each density distribution is flat, representing the background
- density of peaks genome-wide.
- Each distribution has a peak near zero, representing an enrichment of peaks
- close to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- positions relative to the remainder of the genome.
- Interestingly, the
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- within which this enrichment occurs is not the same for every histone mark
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:effective-promoter-radius"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For H3K4me2 and H3K4me3, peaks are most enriched within 1
- \begin_inset space ~
- \end_inset
- kbp of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- positions, while for H3K27me3, enrichment is broader, extending to 2.5
- \begin_inset space ~
- \end_inset
- kbp.
- These
- \begin_inset Quotes eld
- \end_inset
- effective promoter radii
- \begin_inset Quotes erd
- \end_inset
- remain approximately the same across all combinations of experimental condition
- (cell type, time point, and donor), so they appear to be a property of
- the histone mark itself.
- Hence, these radii were used to define the promoter regions for each histone
- mark in all further analyses.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/Promoter-Peak-Distance-Profile-PAGE1-CROP.pdf
- lyxscale 50
- width 80col%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Future direction idea: Need a control: shuffle all peaks and repeat, N times.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Enrichment of peaks in promoter neighborhoods.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:near-promoter-peak-enrich"
- \end_inset
- \series bold
- Enrichment of peaks in promoter neighborhoods.
-
- \series default
- This plot shows the distribution of distances from each annotated transcription
- start site in the genome to the nearest called peak.
- Each line represents one combination of histone mark, cell type, and time
- point.
- Distributions are smoothed using kernel density estimation.
- TSSs that occur
- \emph on
- within
- \emph default
- peaks were excluded from this plot to avoid a large spike at zero that
- would overshadow the rest of the distribution.
- (Note: this figure was generated using the original peak calls and expression
- values from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- .)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="2">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Histone mark
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Effective promoter radius
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1 kbp
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1 kbp
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2.5 kbp
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Effective promoter radius for each histone mark.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:effective-promoter-radius"
- \end_inset
- \series bold
- Effective promoter radius for each histone mark.
- \series default
- These values represent the approximate distance from transcription start
- site positions within which an excess of peaks are found, as shown in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider also showing figure for distance to nearest peak center, and reference
- median peak size once that is known.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Correlations between gene expression and promoter methylation follow expected
- genome-wide trends
- \end_layout
- \begin_layout Standard
- H3K4me2 and H3K4me2 have previously been reported as activating marks whose
- presence in a gene's promoter is associated with higher gene expression,
- while H3K27me3 has been reported as inactivating
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016,LaMere2017"
- literal "false"
- \end_inset
- .
- The data are consistent with this characterization: genes whose promoters
- (as defined by the radii for each histone mark listed in
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:effective-promoter-radius"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) overlap with a H3K4me2 or H3K4me3 peak tend to have higher expression
- than those that don't, while H3K27me3 is likewise associated with lower
- gene expression, as shown in
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:fpkm-by-peak"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This pattern holds across all combinations of cell type and time point
- (Welch's
- \emph on
- t
- \emph default
- -test, all
- \begin_inset Formula $p\textrm{-values}\ll2.2\times10^{-16}$
- \end_inset
- ).
- The difference in average
- \begin_inset Formula $\log_{2}$
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FPKM
- \end_layout
- \end_inset
- values when a peak overlaps the promoter is about
- \begin_inset Formula $+5.67$
- \end_inset
- for H3K4me2,
- \begin_inset Formula $+5.76$
- \end_inset
- for H3K4me2, and
- \begin_inset Formula $-4.00$
- \end_inset
- for H3K27me3.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/FPKM-by-Peak-Violin-Plots-CROP.pdf
- lyxscale 50
- height 80theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Expression distributions of genes with and without promoter peaks.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:fpkm-by-peak"
- \end_inset
- \series bold
- Expression distributions of genes with and without promoter peaks.
-
- \series default
- For each histone mark in each experimental condition, the average RNA-seq
- abundance (
- \begin_inset Formula $\log_{2}$
- \end_inset
- FPKM) of each gene across all 4 donors was calculated.
- Genes were grouped based on whether or not a peak was called in their promoters
- in that condition, and the distribution of abundance values was plotted
- for the no-peak and peak groups.
- (Note: this figure was generated using the original peak calls and expression
- values from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- .)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Gene expression and promoter histone methylation patterns show convergence
- between naïve and memory cells at day 14
- \end_layout
- \begin_layout Standard
- We hypothesized that if naïve cells had differentiated into memory cells
- by Day 14, then their patterns of expression and histone modification should
- converge with those of memory cells at Day 14.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the patterns of variation in all 3 histone marks in the promoter
- regions of the genome using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCoA
- \end_layout
- \end_inset
- .
- All 3 marks show a noticeable convergence between the naïve and memory
- samples at day 14, visible as an overlapping of the day 14 groups on each
- plot.
- This is consistent with the counts of significantly differentially modified
- promoters and estimates of the total numbers of differentially modified
- promoters shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Number-signif-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- For all histone marks, evidence of differential modification between naïve
- and memory samples was detected at every time point except day 14.
- The day 14 convergence pattern is also present in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-group"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), albeit in the 2nd and 3rd principal coordinates, indicating that it is
- not the most dominant pattern driving gene expression.
- Taken together, the data show that promoter histone methylation for these
- 3 histone marks and RNA expression for naïve and memory cells are most
- similar at day 14, the furthest time point after activation.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- was also able to capture this day 14 convergence pattern in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 5 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), which accounts for shared variation across all 3 histone marks and the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, confirming that this convergence is a coordinated pattern across
- all 4 data sets.
- While this observation does not prove that the naïve cells have differentiated
- into memory cells at Day 14, it is consistent with that hypothesis.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- placement p
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-prom"
- \end_inset
- PCoA plot of H3K4me2 promoters, after subtracting surrogate variables.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-prom"
- \end_inset
- PCoA plot of H3K4me3 promoters, after subtracting surrogate variables.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-prom"
- \end_inset
- PCoA plot of H3K27me3 promoters, after subtracting surrogate variables.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-final-23-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-group"
- \end_inset
- RNA-seq PCoA, after ComBat batch correction, showing principal coordinates
- 2 and 3.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plots for promoter ChIP-seq and expression RNA-seq data
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-promoters"
- \end_inset
- \series bold
- PCoA plots for promoter ChIP-seq and expression RNA-seq data.
-
- \series default
- Each point represents an individual sample.
- Samples with the same combination of cell type and time point are encircled
- with a shaded region to aid in visual identification of the sample groups.
- Samples of the same cell type from the same donor are connected by lines
- to indicate the
- \begin_inset Quotes eld
- \end_inset
- trajectory
- \begin_inset Quotes erd
- \end_inset
- of each donor's cells over time in PCoA space.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="6" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Number of significant promoters
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Est.
- differentially modified promoters
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Time Point
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4553
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 927
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9967
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4149
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2404
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 567
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 278
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1570
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4370
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2145
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6598
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 5
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2313
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 139
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 490
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9450
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1148
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4141
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 14
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Number of differentially modified promoters between naïve and memory cells
- at each time point after activation.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Number-signif-promoters"
- \end_inset
- \series bold
- Number of differentially modified promoters between naïve and memory cells
- at each time point after activation.
-
- \series default
- This table shows both the number of differentially modified promoters detected
- at a 10% FDR threshold (left half), and the total number of differentially
- modified promoters estimated using the method of averaging local FDR estimates
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2016"
- literal "false"
- \end_inset
- (right half).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Location of H3K4me2 and H3K4me3 promoter coverage associates with gene expressio
- n
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Make sure use of coverage/abundance/whatever is consistent.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- For the figures in this section and the next, the group labels are arbitrary,
- so if time allows, it would be good to manually reorder them in a logical
- way, e.g.
- most upstream to most downstream.
- If this is done, make sure to update the text with the correct group labels.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To test whether the position of a histone mark relative to a gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- was important, we looked at the
- \begin_inset Quotes eld
- \end_inset
- landscape
- \begin_inset Quotes erd
- \end_inset
- of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage in naïve Day 0 samples within 5 kbp of each gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- by binning reads into 500-bp windows tiled across each promoter
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values were calculated for the bins in each promoter and then the average
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- for each promoter's bins was normalized to zero, such that the values represent
- coverage relative to other regions of the same promoter rather than being
- proportional to absolute read count.
- The promoters were then clustered based on the normalized bin abundances
- using
- \begin_inset Formula $k$
- \end_inset
- -means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- .
- Different values of
- \begin_inset Formula $K$
- \end_inset
- were also tested, but did not substantially change the interpretation of
- the data.
- \end_layout
- \begin_layout Standard
- For H3K4me2, plotting the average bin abundances for each cluster reveals
- a simple pattern (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood-clusters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ): Cluster 5 represents a completely flat promoter coverage profile, likely
- consisting of genes with no H3K4me2 methylation in the promoter.
- All the other clusters represent a continuum of peak positions relative
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- In order from most upstream to most downstream, they are Clusters 6, 4,
- 3, 1, and 2.
- There do not appear to be any clusters representing coverage patterns other
- than lone peaks, such as coverage troughs or double peaks.
- Next, all promoters were plotted in a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot based on the same relative bin abundance data, and colored based on
- cluster membership (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood-pca"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot shows Cluster 5 (the
- \begin_inset Quotes eld
- \end_inset
- no peak
- \begin_inset Quotes erd
- \end_inset
- cluster) at the center, with the other clusters arranged in a counter-clockwise
- arc around it in the order noted above, from most upstream peak to most
- downstream.
- Notably, the
- \begin_inset Quotes eld
- \end_inset
- clusters
- \begin_inset Quotes erd
- \end_inset
- form a single large
- \begin_inset Quotes eld
- \end_inset
- cloud
- \begin_inset Quotes erd
- \end_inset
- with no apparent separation between them, further supporting the conclusion
- that these clusters represent an arbitrary partitioning of a continuous
- distribution of promoter coverage landscapes.
- While the clusters are a useful abstraction that aids in visualization,
- they are ultimately not an accurate representation of the data.
- The continuous nature of the distribution also explains why different values
- of
- \begin_inset Formula $K$
- \end_inset
- led to similar conclusions.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- K-means clustering of promoter H3K4me2 relative coverage depth in naïve
- day 0 samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood"
- \end_inset
- \series bold
- K-means clustering of promoter H3K4me2 relative coverage depth in naïve
- day 0 samples.
-
- \series default
- H3K4me2 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using K-means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two PCs were plotted,
- coloring each point by its K-means cluster identity (b).
- For each cluster, the distribution of gene expression values was plotted
- (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Should have a table of p-values on difference of means between Cluster 5
- and the others.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate the association between relative peak position and gene expressio
- n, we plotted the Naïve Day 0 expression for the genes in each cluster (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood-expression"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Most genes in Cluster 5, the
- \begin_inset Quotes eld
- \end_inset
- no peak
- \begin_inset Quotes erd
- \end_inset
- cluster, have low expression values.
- Taking this as the
- \begin_inset Quotes eld
- \end_inset
- baseline
- \begin_inset Quotes erd
- \end_inset
- distribution when no H3K4me2 methylation is present, we can compare the
- other clusters' distributions to determine which peak positions are associated
- with elevated expression.
- As might be expected, the 3 clusters representing peaks closest to the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , Clusters 1, 3, and 4, show the highest average expression distributions.
- Specifically, these clusters all have their highest
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- abundance within 1kb of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , consistent with the previously determined promoter radius.
- In contrast, cluster 6, which represents peaks several kbp upstream of
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , shows a slightly higher average expression than baseline, while Cluster
- 2, which represents peaks several kbp downstream, doesn't appear to show
- any appreciable difference.
- Interestingly, the cluster with the highest average expression is Cluster
- 1, which represents peaks about 1 kbp downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , rather than Cluster 3, which represents peaks centered directly at the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- This suggests that conceptualizing the promoter as a region centered on
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- with a certain
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- may be an oversimplification – a peak that is a specific distance from
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- may have a different degree of influence depending on whether it is upstream
- or downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- All observations described above for H3K4me2
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- also appear to hold for H3K4me3 as well (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This is expected, since there is a high correlation between the positions
- where both histone marks occur.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- K-means clustering of promoter H3K4me3 relative coverage depth in naïve
- day 0 samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood"
- \end_inset
- \series bold
- K-means clustering of promoter H3K4me3 relative coverage depth in naïve
- day 0 samples.
-
- \series default
- H3K4me3 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using K-means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two PCs were plotted,
- coloring each point by its K-means cluster identity (b).
- For each cluster, the distribution of gene expression values was plotted
- (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Patterns of H3K27me3 promoter coverage associate with gene expression
- \end_layout
- \begin_layout Standard
- Unlike both H3K4 marks, whose main patterns of variation appear directly
- related to the size and position of a single peak within the promoter,
- the patterns of H3K27me3 methylation in promoters are more complex (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Once again looking at the relative coverage in a 500-bp wide bins in a
- 5kb radius around each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , promoters were clustered based on the normalized relative coverage values
- in each bin using
- \begin_inset Formula $k$
- \end_inset
- -means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood-clusters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This time, 3
- \begin_inset Quotes eld
- \end_inset
- axes
- \begin_inset Quotes erd
- \end_inset
- of variation can be observed, each represented by 2 clusters with opposing
- patterns.
- The first axis is greater upstream coverage (Cluster 1) vs.
- greater downstream coverage (Cluster 3); the second axis is the coverage
- at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- itself: peak (Cluster 4) or trough (Cluster 2); lastly, the third axis
- represents a trough upstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- (Cluster 5) vs.
- downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- (Cluster 6).
- Referring to these opposing pairs of clusters as axes of variation is justified
- , because they correspond precisely to the first 3
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- PC
- \end_layout
- \end_inset
- in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot of the relative coverage values (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood-pca"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot reveals that as in the case of H3K4me2, all the
- \begin_inset Quotes eld
- \end_inset
- clusters
- \begin_inset Quotes erd
- \end_inset
- are really just sections of a single connected cloud rather than discrete
- clusters.
- The cloud is approximately ellipsoid-shaped, with each PC being an axis
- of the ellipse, and each cluster consisting of a pyramidal section of the
- ellipsoid.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Repeated figure legends are kind of an issue here.
- What to do?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- K-means clustering of promoter H3K27me3 relative coverage depth in naïve
- day 0 samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood"
- \end_inset
- \series bold
- K-means clustering of promoter H3K27me3 relative coverage depth in naïve
- day 0 samples.
-
- \series default
- H3K27me3 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using
- \begin_inset Formula $k$
- \end_inset
- -means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two PCs were plotted,
- coloring each point by its K-means cluster identity (b).
- (Note: In (b), Cluster 6 is hidden behind all the other clusters.) For each
- cluster, the distribution of gene expression values was plotted (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood-expression"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , we can see that Clusters 1 and 2 are the only clusters with higher gene
- expression than the others.
- For Cluster 2, this is expected, since this cluster represents genes with
- depletion of H3K27me3 near the promoter.
- Hence, elevated expression in cluster 2 is consistent with the conventional
- view of H3K27me3 as a deactivating mark.
- However, Cluster 1, the cluster with the most elevated gene expression,
- represents genes with elevated coverage upstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , or equivalently, decreased coverage downstream, inside the gene body.
- The opposite pattern, in which H3K27me3 is more abundant within the gene
- body and less abundance in the upstream promoter region, does not show
- any elevation in gene expression.
- As with H3K4me2, this shows that the location of H3K27 trimethylation relative
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- is potentially an important factor beyond simple proximity.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Show the figures where the negative result ended this line of inquiry.
- I need to debug some errors resulting from an R upgrade to do this.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Defined pattern analysis
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- This was where I defined interesting expression patterns and then looked
- at initial relative promoter coverage for each expression pattern.
- Negative result.
- I forgot about this until recently.
- Worth including? Remember to also write methods.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Promoter CpG islands?
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- I forgot until recently about the work I did on this.
- Worth including? Remember to also write methods.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Write better section headers
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Each histone mark's
- \begin_inset Quotes eld
- \end_inset
- effective promoter extent
- \begin_inset Quotes erd
- \end_inset
- must be determined empirically
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that H3K4me2, H3K4me3, and H3K27me3 are all enriched near promoters,
- relative to the rest of the genome, consistent with their conventionally
- understood role in regulating gene transcription.
- Interestingly, the radius within this enrichment occurs is not the same
- for each histone mark.
- H3K4me2 and H3K4me3 are enriched within a 1
- \begin_inset space ~
- \end_inset
- kbp radius, while H3K27me3 is enriched within 2.5
- \begin_inset space ~
- \end_inset
- kbp.
- Notably, the determined promoter radius was consistent across all experimental
- conditions, varying only between different histone marks.
- This suggests that the conventional
- \begin_inset Quotes eld
- \end_inset
- one size fits all
- \begin_inset Quotes erd
- \end_inset
- approach of defining a single promoter region for each gene (or each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- ) and using that same promoter region for analyzing all types of genomic
- data within an experiment may not be appropriate, and a better approach
- may be to use a separate promoter radius for each kind of data, with each
- radius being derived from the data itself.
- Furthermore, the apparent asymmetry of upstream and downstream promoter
- histone modification with respect to gene expression, seen in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , shows that even the concept of a promoter
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- is likely an oversimplification.
- At a minimum, nearby enrichment of peaks should be evaluated separately
- for both upstream and downstream peaks, and an appropriate
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- should be selected for each direction.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Sarah: I would have to search the literature, but I believe this has been
- observed before.
- The position relative to the TSS likely has to do with recruitment of the
- transcriptional machinery and the space required for that.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- show that the determined promoter radius of 1
- \begin_inset space ~
- \end_inset
- kbp is approximately consistent with the distance from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- at which enrichment of H3K4 methylation correlates with increased expression,
- showing that this radius, which was determined by a simple analysis of
- measuring the distance from each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- to the nearest peak, also has functional significance.
- For H3K27me3, the correlation between histone modification near the promoter
- and gene expression is more complex, involving non-peak variations such
- as troughs in coverage at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- and asymmetric coverage upstream and downstream, so it is difficult in
- this case to evaluate whether the 2.5
- \begin_inset space ~
- \end_inset
- kbp radius determined from TSS-to-peak distances is functionally significant.
- However, the two patterns of coverage associated with elevated expression
- levels both have interesting features within this radius.
- \end_layout
- \begin_layout Subsection
- Day 14 convergence is consistent with naïve-to-memory differentiation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look up some more references for these histone marks being involved in memory
- differentiation.
- (Ask Sarah)
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- We observed that all 3 histone marks and the gene expression data all exhibit
- evidence of convergence in abundance between naïve and memory cells by
- day 14 after activation (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Number-signif-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- scatter plots (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) show that this pattern of convergence is captured in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 5.
- Like all the
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- in this plot, this factor explains a substantial portion of the variance
- in all 4 data sets, indicating a coordinated pattern of variation shared
- across all histone marks and gene expression.
- This is consistent with the expectation that any naïve CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells remaining at day 14 should have differentiated into memory cells
- by that time, and should therefore have a genomic and epigenomic state
- similar to memory cells.
- This convergence is evidence that these histone marks all play an important
- role in the naïve-to-memory differentiation process.
- A histone mark that was not involved in naïve-to-memory differentiation
- would not be expected to converge in this way after activation.
- \end_layout
- \begin_layout Standard
- In H3K4me2, H3K4me3, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- , this convergence appears to be in progress already by Day 5, shown by
- the smaller distance between naïve and memory cells at day 5 along the
-
- \begin_inset Formula $y$
- \end_inset
- -axes in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-H3K4me2-prom"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-H3K4me3-prom"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-group"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This agrees with the model proposed by Sarah Lamere based on an prior analysis
- of the same data, shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Lamere2016-Fig8"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which shows the pattern of H3K4 methylation and expression for naïve cells
- and memory cells converging at day 5.
- This model was developed without the benefit of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCoA
- \end_layout
- \end_inset
- plots in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which have been corrected for confounding factors by ComBat and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- .
- This shows that proper batch correction assists in extracting meaningful
- patterns in the data while eliminating systematic sources of irrelevant
- variation in the data, allowing simple automated procedures like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCoA
- \end_layout
- \end_inset
- to reveal interesting behaviors in the data that were previously only detectabl
- e by a detailed manual analysis.
- While the ideal comparison to demonstrate this convergence would be naïve
- cells at day 14 to memory cells at day 0, this is not feasible in this
- experimental system, since neither naïve nor memory cells are able to fully
- return to their pre-activation state, as shown by the lack of overlap between
- days 0 and 14 for either naïve or memory cells in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/LaMere2016_fig8.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Lamere 2016 Figure 8 “Model for the role of H3K4 methylation during CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation.
- \begin_inset Quotes erd
- \end_inset
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Lamere2016-Fig8"
- \end_inset
- \series bold
- Lamere 2016 Figure 8
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- ,
- \begin_inset Quotes eld
- \end_inset
- Model for the role of H3K4 methylation during CD4
- \begin_inset Formula $\mathbf{^{+}}$
- \end_inset
- T-cell activation.
- \begin_inset Quotes erd
- \end_inset
-
- \series default
- (Reproduced with permission.)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- The location of histone modifications within the promoter is important
- \end_layout
- \begin_layout Standard
- When looking at patterns in the relative coverage of each histone mark near
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- of each gene, several interesting patterns were apparent.
- For H3K4me2 and H3K4me3, the pattern was straightforward: the consistent
- pattern across all promoters was a single peak a few kbp wide, with the
- main axis of variation being the position of this peak relative to the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- (Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- &
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- There were no obvious
- \begin_inset Quotes eld
- \end_inset
- preferred
- \begin_inset Quotes erd
- \end_inset
- positions, but rather a continuous distribution of relative positions ranging
- all across the promoter region.
- The association with gene expression was also straightforward: peaks closer
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- were more strongly associated with elevated gene expression.
- Coverage downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- appears to be more strongly associated with elevated expression than coverage
- at the same distance upstream, indicating that the
- \begin_inset Quotes eld
- \end_inset
- effective promoter region
- \begin_inset Quotes erd
- \end_inset
- for H3K4me2 and H3K4me3 may be centered downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- The relative promoter coverage for H3K27me3 had a more complex pattern,
- with two specific patterns of promoter coverage associated with elevated
- expression: a sharp depletion of H3K27me3 around the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- relative to the surrounding area, and a depletion of H3K27me3 downstream
- of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- relative to upstream (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- A previous study found that H3K27me3 depletion within the gene body was
- associated with elevated gene expression in 4 different cell types in mice
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Young2011"
- literal "false"
- \end_inset
- .
- This is consistent with the second pattern described here.
- This study also reported that a spike in coverage at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- was associated with
- \emph on
- lower
- \emph default
- expression, which is indirectly consistent with the first pattern described
- here, in the sense that it associates lower H3K27me3 levels near the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- with higher expression.
- \end_layout
- \begin_layout Subsection
- A reproducible workflow aids in analysis
- \end_layout
- \begin_layout Standard
- The analyses described in this chapter were organized into a reproducible
- workflow using the Snakemake workflow management system
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Koster2012"
- literal "false"
- \end_inset
- .
- As shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rulegraph"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , the workflow includes many steps with complex dependencies between them.
- For example, the step that counts the number of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads in 500
- \begin_inset space ~
- \end_inset
- bp windows in each promoter (the starting point for Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), named
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- chipseq_count_tss_neighborhoods
- \end_layout
- \end_inset
- , depends on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- abundance estimates in order to select the most-used
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- for each gene, the aligned
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads, the index for those reads, and the blacklist of regions to be excluded
- from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- analysis.
- Each step declares its inputs and outputs, and Snakemake uses these to
- determine the dependencies between steps.
- Each step is marked as depending on all the steps whose outputs match its
- inputs, generating the workflow graph in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rulegraph"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which Snakemake uses to determine order in which to execute each step
- so that each step is executed only after all of the steps it depends on
- have completed, thereby automating the entire workflow from start to finish.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
- lyxscale 50
- width 100col%
- height 95theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Dependency graph of steps in reproducible workflow.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:rulegraph"
- \end_inset
- \series bold
- Dependency graph of steps in reproducible workflow.
-
- \series default
- The analysis flows from left to right.
- Arrows indicate which analysis steps depend on the output of other steps.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In addition to simply making it easier to organize the steps in the analysis,
- structuring the analysis as a workflow allowed for some analysis strategies
- that would not have been practical otherwise.
- For example, 5 different
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- quantification methods were tested against two different reference transcriptom
- e annotations for a total of 10 different quantifications of the same
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data.
- These were then compared against each other in the exploratory data analysis
- step, to determine that the results were not very sensitive to either the
- choice of quantification method or the choice of annotation.
- This was possible with a single script for the exploratory data analysis,
- because Snakemake was able to automate running this script for every combinatio
- n of method and reference.
- In a similar manner, two different peak calling methods were tested against
- each other, and in this case it was determined that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- was unambiguously superior to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- for all histone marks studied.
- By enabling these types of comparisons, structuring the analysis as an
- automated workflow allowed important analysis decisions to be made in a
- data-driven way, by running every reasonable option through the downstream
- steps, seeing the consequences of choosing each option, and deciding accordingl
- y.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Subsection
- Data quality issues limit conclusions
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Is this needed?
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Future Directions
- \end_layout
- \begin_layout Standard
- The analysis of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- in CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells in Chapter 2 is in many ways a preliminary study that suggests
- a multitude of new avenues of investigation.
- Here we consider a selection of such avenues.
- \end_layout
- \begin_layout Subsection
- Previous negative results
- \end_layout
- \begin_layout Standard
- Two additional analyses were conducted beyond those reported in the results.
- First, we searched for evidence that the presence or absence of a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CpGi
- \end_layout
- \end_inset
- in the promoter was correlated with increases or decreases in gene expression
- or any histone mark in any of the tested contrasts.
- Second, we searched for evidence that the relative
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- coverage profiles prior to activations could predict the change in expression
- of a gene after activation.
- Neither analysis turned up any clear positive results.
- \end_layout
- \begin_layout Subsection
- Improve on the idea of an effective promoter radius
- \end_layout
- \begin_layout Standard
- This study introduced the concept of an
- \begin_inset Quotes eld
- \end_inset
- effective promoter radius
- \begin_inset Quotes erd
- \end_inset
- specific to each histone mark based on distance from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- within which an excess of peaks was called for that mark.
- This concept was then used to guide further analyses throughout the study.
- However, while the effective promoter radius was useful in those analyses,
- it is both limited in theory and shown in practice to be a possible oversimplif
- ication.
- First, the effective promoter radii used in this study were chosen based
- on manual inspection of the TSS-to-peak distance distributions in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , selecting round numbers of analyst convenience (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:effective-promoter-radius"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- It would be better to define an algorithm that selects a more precise radius
- based on the features of the graph.
- One possible way to do this would be to randomly rearrange the called peaks
- throughout the genome many (while preserving the distribution of peak widths)
- and re-generate the same plot as in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This would yield a better
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- distribution that demonstrates the degree of near-TSS enrichment that would
- be expected by random chance.
- The effective promoter radius could be defined as the point where the true
- distribution diverges from the randomized background distribution.
-
- \end_layout
- \begin_layout Standard
- Furthermore, the above definition of effective promoter radius has the significa
- nt limitation of being based on the peak calling method.
- It is thus very sensitive to the choice of peak caller and significance
- threshold for calling peaks, as well as the degree of saturation in the
- sequencing.
- Calling peaks from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- samples with insufficient coverage depth, with the wrong peak caller, or
- with a different significance threshold could give a drastically different
- number of called peaks, and hence a drastically different distribution
- of peak-to-TSS distances.
- To address this, it is desirable to develop a better method of determining
- the effective promoter radius that relies only on the distribution of read
- coverage around the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , independent of the peak calling.
- Furthermore, as demonstrated by the upstream-downstream asymmetries observed
- in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , this definition should determine a different radius for the upstream and
- downstream directions.
- At this point, it may be better to rename this concept
- \begin_inset Quotes eld
- \end_inset
- effective promoter extent
- \begin_inset Quotes erd
- \end_inset
- and avoid the word
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- , since a radius implies a symmetry about the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- that is not supported by the data.
- \end_layout
- \begin_layout Standard
- Beyond improving the definition of effective promoter extent, functional
- validation is necessary to show that this measure of near-TSS enrichment
- has biological meaning.
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- already provide a very limited functional validation of the chosen promoter
- extents for H3K4me2 and H3K4me3 by showing that spikes in coverage within
- this region are most strongly correlated with elevated gene expression.
- However, there are other ways to show functional relevance of the promoter
- extent.
- For example, correlations could be computed between read counts in peaks
- nearby gene promoters and the expression level of those genes, and these
- correlations could be plotted against the distance of the peak upstream
- or downstream of the gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- If the promoter extent truly defines a
- \begin_inset Quotes eld
- \end_inset
- sphere of influence
- \begin_inset Quotes erd
- \end_inset
- within which a histone mark is involved with the regulation of a gene,
- then the correlations for peaks within this extent should be significantly
- higher than those further upstream or downstream.
- Peaks within these extents may also be more likely to show differential
- modification than those outside genic regions of the genome.
- \end_layout
- \begin_layout Subsection
- Design experiments to focus on post-activation convergence of naïve & memory
- cells
- \end_layout
- \begin_layout Standard
- In this study, a convergence between naïve and memory cells was observed
- in both the pattern of gene expression and in epigenetic state of the 3
- histone marks studied, consistent with the hypothesis that any naïve cells
- remaining 14 days after activation have differentiated into memory cells,
- and that both gene expression and these histone marks are involved in this
- differentiation.
- However, the current study was not designed with this specific hypothesis
- in mind, and it therefore has some deficiencies with regard to testing
- it.
- The memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- samples at day 14 do not resemble the memory samples at day 0, indicating
- that in the specific model of activation used for this experiment, the
- cells are not guaranteed to return to their original pre-activation state,
- or perhaps this process takes substantially longer than 14 days.
- This difference is expected, as the cell cultures in this experiment were
- treated with IL2 from day 5 onward
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- , so the signalling environments in which the cells are cultured are different
- at day 0 and day 14.
- This is a challenge for testing the convergence hypothesis because the
- ideal comparison to prove that naïve cells are converging to a resting
- memory state would be to compare the final naïve time point to the Day
- 0 memory samples, but this comparison is only meaningful if memory cells
- generally return to the same
- \begin_inset Quotes eld
- \end_inset
- resting
- \begin_inset Quotes erd
- \end_inset
- state that they started at.
- \end_layout
- \begin_layout Standard
- Because pre-culture and post-culture cells will probably never behave identicall
- y even if they both nominally have a
- \begin_inset Quotes eld
- \end_inset
- resting
- \begin_inset Quotes erd
- \end_inset
- phenotype, a different experiment should be designed in which post-activation
- naive cells are compared to memory cells that were cultured for the same
- amount of time but never activated, in addition to post-activation memory
- cells.
- If the convergence hypothesis is correct, both post-activation cultures
- should converge on the culture of never-activated memory cells.
- \end_layout
- \begin_layout Standard
- In addition, if naïve-to-memory convergence is a general pattern, it should
- also be detectable in other epigenetic marks, including other histone marks
- and DNA methylation.
- An experiment should be designed studying a large number of epigenetic
- marks known or suspected to be involved in regulation of gene expression,
- assaying all of these at the same pre- and post-activation time points.
- Multi-dataset factor analysis methods like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- can then be used to identify coordinated patterns of regulation shared
- across many epigenetic marks.
- Of course, CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells are not the only adaptive immune cells that exhibit memory formation.
- A similar study could be designed for CD8
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, B-cells, and even specific subsets of CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, such as Th1, Th2, Treg, and Th17 cells, to determine whether these
- also show convergence.
- \end_layout
- \begin_layout Subsection
- Follow up on hints of interesting patterns in promoter relative coverage
- profiles
- \end_layout
- \begin_layout Standard
- The analysis of promoter coverage landscapes in resting naive CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells and their correlations with gene expression raises many interesting
- questions.
- The chosen analysis strategy used a clustering approach, but this approach
- was subsequently shown to be a poor fit for the data.
- In light of this, a better means of dimension reduction for promoter landscape
- data is required.
- In the case of H3K4me2 and H3K4me3, one option is to define the first 3
- principal componets as orthogonal promoter
- \begin_inset Quotes eld
- \end_inset
- state variables
- \begin_inset Quotes erd
- \end_inset
- : upstream vs downstream coverage, TSS-centered peak vs trough, and proximal
- upstream trough vs proximal downstream trough.
- Gene expression could then be modeled as a function of these three variables,
- or possibly as a function of the first
- \begin_inset Formula $N$
- \end_inset
- principal components for
- \begin_inset Formula $N$
- \end_inset
- larger than 3.
- For H3K4me2 and H3K4me3, a better representation might be obtained by transform
- ing the first 2 principal coordinates into a polar coordinate system
- \begin_inset Formula $(r,\theta)$
- \end_inset
- with the origin at the center of the
- \begin_inset Quotes eld
- \end_inset
- no peak
- \begin_inset Quotes erd
- \end_inset
- cluster, where the radius
- \begin_inset Formula $r$
- \end_inset
- represents the peak height above the background and the angle
- \begin_inset Formula $\theta$
- \end_inset
- represents the peak's position upstream or downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
-
- \end_layout
- \begin_layout Standard
- Another weakness in the current analysis is the normalization of the average
- abundance of each promoter to an average of zero.
- This allows the abundance value in each window to represent the relative
- abundance of that window compared to all the other windows in the interrogated
- area.
- However, while using the remainder of the windows to set the
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- level against which each window is normalized is convenient, it is far
- from optimal.
- As shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:peak-calling-summary"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , many enriched regions are larger than the 5
- \begin_inset space ~
- \end_inset
- kbp radius., which means there may not be any
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- regions within 5
- \begin_inset space ~
- \end_inset
- kbp of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- to normalize against.
- For example, this normalization strategy fails to distinguish between a
- trough in coverage at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- and a pair of wide peaks upstream and downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- Both cases would present as lower coverage in the windows immediately adjacent
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- and higher coverage in windows further away, but the functional implications
- of these two cases might be completely different.
- To improve the normalization, the background estimation method used by
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- , which is specifically designed for finding broad regions of enrichment,
- should be adapted to estimate the background sequencing depth in each window
- from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- input samples, and each window's read count should be normalized against
- the background and reported as a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- relative to that background.
- \end_layout
- \begin_layout Standard
- Lastly, the analysis of promoter coverage landscapes presented in this work
- only looked at promoter coverage of resting naive CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, with the goal of determining whether this initial promoter state
- was predictive of post-activation changes in gene expression.
- Changes in the promoter coverage landscape over time have not yet been
- considered.
- This represents a significant analysis challenge, by adding yet another
- dimension (genomic coordinate) in to the data.
- \end_layout
- \begin_layout Subsection
- Investigate causes of high correlation between mutually exclusive histone
- marks
- \end_layout
- \begin_layout Standard
- The high correlation between coverage depth observed between H3K4me2 and
- H3K4me3 is both expected and unexpected.
- Since both marks are associated with elevated gene transcription, a positive
- correlation between them is not surprising.
- However, these two marks represent different post-translational modifications
- of the
- \emph on
- same
- \emph default
- lysine residue on the histone H3 polypeptide, which means that they cannot
- both be present on the same H3 subunit.
- Thus, the high correlation between them has several potential explanations.
- One possible reason is cell population heterogeneity: perhaps some genomic
- loci are frequently marked with H3K4me2 in some cells, while in other cells
- the same loci are marked with H3K4me3.
- Another possibility is allele-specific modifications: the loci are marked
- in each diploid cell with H3K4me2 on one allele and H3K4me3 on the other
- allele.
- Lastly, since each histone octamer contains 2 H3 subunits, it is possible
- that having one H3K4me2 mark and one H3K4me3 mark on a given histone octamer
- represents a distinct epigenetic state with a different function than either
- double H3K4me2 or double H3K4me3.
-
- \end_layout
- \begin_layout Standard
- The hypothesis of allele-specific histone modification can easily be tested
- with existing data by locating all heterozygous loci occurring within both
- H3K4me3 and H3K4me2 peaks and checking for opposite allelic imbalance between
- H3K4me3 and H3K4me2 read at each locus.
- If the allele fractions in the reads from the two histone marks for each
- locus are plotted against each other, there should be a negative correlation.
- If no such negative correlation is found, then allele-specific histone
- modification is unlikely to be the reason for the high correlation between
- these histone marks.
- \end_layout
- \begin_layout Standard
- To test the hypothesis that H3K4me2 and H3K4me3 marks are occurring on the
- same histones.
- A double
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP
- \end_layout
- \end_inset
- experiment can be performed
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Jin2007"
- literal "false"
- \end_inset
- .
- In this assay, the input DNA goes through two sequential immunoprecipitations
- with different antibodies: first the anti-H3K4me2 antibody, then the anti-H3K4m
- e3 antibody.
- Only bearing both histone marks, and the DNA associated with them, should
- be isolated.
- This can be followed by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- HTS
- \end_layout
- \end_inset
- to form a
- \begin_inset Quotes eld
- \end_inset
- double
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- \begin_inset Quotes erd
- \end_inset
- assay that can be used to identify DNA regions bound by the isolated histones
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Jin2009"
- literal "false"
- \end_inset
- .
- If peaks called from this double
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- assay are highly correlated with both H3K4me2 and H3K4me3 peaks, then this
- is strong evidence that the correlation between the two marks is actually
- caused by physical co-location on the same histone.
- \end_layout
- \begin_layout Chapter
- \begin_inset CommandInset label
- LatexCommand label
- name "chap:Improving-array-based-diagnostic"
- \end_inset
- Improving array-based diagnostics for transplant rejection by optimizing
- data preprocessing
- \end_layout
- \begin_layout Standard
- \size large
- Ryan C.
- Thompson, Sunil M.
- Kurian, Thomas Whisnant, Padmaja Natarajan, Daniel R.
- Salomon
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Introduction
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Fill this out
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Arrays for diagnostics
- \end_layout
- \begin_layout Standard
- Arrays are an attractive platform for diagnostics
- \end_layout
- \begin_layout Subsection
- Proper pre-processing is essential for array data
- \end_layout
- \begin_layout Standard
- Microarrays, bead arrays, and similar assays produce raw data in the form
- of fluorescence intensity measurements, with each intensity measurement
- proportional to the abundance of some fluorescently labelled target DNA
- or RNA sequence that base pairs to a specific probe sequence.
- However, the fluorescence measurements for each probe are also affected
- my many technical confounding factors, such as the concentration of target
- material, strength of off-target binding, the sensitivity of the imaging
- sensor, and visual artifacts in the image.
- Some array designs also use multiple probe sequences for each target.
- Hence, extensive pre-processing of array data is necessary to normalize
- out the effects of these technical factors and summarize the information
- from multiple probes to arrive at a single usable estimate of abundance
- or other relevant quantity, such as a ratio of two abundances, for each
- target
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Gentleman2005"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- The choice of pre-processing algorithms used in the analysis of an array
- data set can have a large effect on the results of that analysis.
- However, despite their importance, these steps are often neglected or rushed
- in order to get to the more scientifically interesting analysis steps involving
- the actual biology of the system under study.
- Hence, it is often possible to achieve substantial gains in statistical
- power, model goodness-of-fit, or other relevant performance measures, by
- checking the assumptions made by each preprocessing step and choosing specific
- normalization methods tailored to the specific goals of the current analysis.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Subsection
- Clinical diagnostic applications for microarrays require single-channel
- normalization
- \end_layout
- \begin_layout Standard
- As the cost of performing microarray assays falls, there is increasing interest
- in using genomic assays for diagnostic purposes, such as distinguishing
-
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{TX}{healthy transplants (TX)}
- \end_layout
- \end_inset
- from transplants undergoing
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- or
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- .
- However, the the standard normalization algorithm used for microarray data,
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Irizarry2003a"
- literal "false"
- \end_inset
- , is not applicable in a clinical setting.
- Two of the steps in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , quantile normalization and probe summarization by median polish, depend
- on every array in the data set being normalized.
- This means that adding or removing any arrays from a data set changes the
- normalized values for all arrays, and data sets that have been normalized
- separately cannot be compared to each other.
- Hence, when using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , any arrays to be analyzed together must also be normalized together, and
- the set of arrays included in the data set must be held constant throughout
- an analysis.
- \end_layout
- \begin_layout Standard
- These limitations present serious impediments to the use of arrays as a
- diagnostic tool.
- When training a classifier, the samples to be classified must not be involved
- in any step of the training process, lest their inclusion bias the training
- process.
- Once a classifier is deployed in a clinical setting, the samples to be
- classified will not even
- \emph on
- exist
- \emph default
- at the time of training, so including them would be impossible even if
- it were statistically justifiable.
- Therefore, any machine learning application for microarrays demands that
- the normalized expression values computed for an array must depend only
- on information contained within that array.
- This would ensure that each array's normalization is independent of every
- other array, and that arrays normalized separately can still be compared
- to each other without bias.
- Such a normalization is commonly referred to as
- \begin_inset Quotes eld
- \end_inset
- single-channel normalization
- \begin_inset Quotes erd
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- addresses these concerns by replacing the quantile normalization and median
- polish with alternatives that do not introduce inter-array dependence,
- allowing each array to be normalized independently of all others
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Quantile normalization is performed against a pre-generated set of quantiles
- learned from a collection of 850 publicly available arrays sampled from
- a wide variety of tissues in
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{GEO}{the Gene Expression Omnibus (GEO)}
- \end_layout
- \end_inset
- .
- Each array's probe intensity distribution is normalized against these pre-gener
- ated quantiles.
- The median polish step is replaced with a robust weighted average of probe
- intensities, using inverse variance weights learned from the same public
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
- data.
- The result is a normalization that satisfies the requirements mentioned
- above: each array is normalized independently of all others, and any two
- normalized arrays can be compared directly to each other.
- \end_layout
- \begin_layout Standard
- One important limitation of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- is that it requires a separate reference data set from which to learn the
- parameters (reference quantiles and probe weights) that will be used to
- normalize each array.
- These parameters are specific to a given array platform, and pre-generated
- parameters are only provided for the most common platforms, such as Affymetrix
- hgu133plus2.
- For a less common platform, such as hthgu133pluspm, is is necessary to
- learn custom parameters from in-house data before
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- can be used to normalize samples on that platform
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- One other option is the aptly-named
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{SCAN}{Single Channel Array Normalization (SCAN)}
- \end_layout
- \end_inset
- , which adapts a normalization method originally designed for tiling arrays
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Piccolo2012"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- is truly single-channel in that it does not require a set of normalization
- parameters estimated from an external set of reference samples like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- does.
- \end_layout
- \begin_layout Subsection
- Heteroskedasticity must be accounted for in methylation array data
- \end_layout
- \begin_layout Standard
- DNA methylation arrays are a relatively new kind of assay that uses microarrays
- to measure the degree of methylation on cytosines in specific regions arrayed
- across the genome.
- First, bisulfite treatment converts all unmethylated cytosines to uracil
- (which are read as thymine during amplification and sequencing) while leaving
- methylated cytosines unaffected.
- Then, each target region is interrogated with two probes: one binds to
- the original genomic sequence and interrogates the level of methylated
- DNA, and the other binds to the same sequence with all cytosines replaced
- by thymidines and interrogates the level of unmethylated DNA.
- \end_layout
- \begin_layout Standard
- After normalization, these two probe intensities are summarized in one of
- two ways, each with advantages and disadvantages.
- β
- \series bold
-
- \series default
- values, interpreted as fraction of DNA copies methylated, range from 0 to
- 1.
- β
- \series bold
-
- \series default
- values are conceptually easy to interpret, but the constrained range makes
- them unsuitable for linear modeling, and their error distributions are
- highly non-normal, which also frustrates linear modeling.
-
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{M-value}{M-values}
- \end_layout
- \end_inset
- , interpreted as the log ratios of methylated to unmethylated copies for
- each probe region, are computed by mapping the beta values from
- \begin_inset Formula $[0,1]$
- \end_inset
- onto
- \begin_inset Formula $(-\infty,+\infty)$
- \end_inset
- using a sigmoid curve (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This transformation results in values with better statistical properties:
- the unconstrained range is suitable for linear modeling, and the error
- distributions are more normal.
- Hence, most linear modeling and other statistical testing on methylation
- arrays is performed using
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/sigmoid.pdf
- lyxscale 50
- width 60col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Sigmoid shape of the mapping between β and M values.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Sigmoid-beta-m-mapping"
- \end_inset
- \series bold
- Sigmoid shape of the mapping between β and M values.
-
- \series default
- This mapping is monotonic and non-linear, but it is approximately linear
- in the neighborhood of
- \begin_inset Formula $(\beta=0.5,M=0)$
- \end_inset
- .
-
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- However, the steep slope of the sigmoid transformation near 0 and 1 tends
- to over-exaggerate small differences in β values near those extremes, which
- in turn amplifies the error in those values, leading to a U-shaped trend
- in the mean-variance curve: extreme values have higher variances than values
- near the middle.
- This mean-variance dependency must be accounted for when fitting the linear
- model for differential methylation, or else the variance will be systematically
- overestimated for probes with moderate
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- and underestimated for probes with extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- This is particularly undesirable for methylation data because the intermediate
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- are the ones of most interest, since they are more likely to represent
- areas of varying methylation, whereas extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- typically represent complete methylation or complete lack of methylation.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- read count data are also known to show heteroskedasticity, and the voom
- method was introduced for modeling this heteroskedasticity by estimating
- the mean-variance trend in the data and using this trend to assign precision
- weights to each observation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- .
- While methylation array data are not derived from counts and have a very
- different mean-variance relationship from that of typical
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, the voom method makes no specific assumptions on the shape of the
- mean-variance relationship – it only assumes that the relationship can
- be modeled as a smooth curve.
- Hence, the method is sufficiently general to model the mean-variance relationsh
- ip in methylation array data.
- However, while the method does not require count data as input, the standard
- implementation of voom assumes that the input is given in raw read counts,
- and it must be adapted to run on methylation
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- Evaluation of classifier performance with different normalization methods
- \end_layout
- \begin_layout Standard
- For testing different expression microarray normalizations, a data set of
- 157 hgu133plus2 arrays was used, consisting of blood samples from kidney
- transplant patients whose grafts had been graded as
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- , or
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- via biopsy and histology (46 TX, 69 AR, 42 ADNR)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "true"
- \end_inset
- .
- Additionally, an external validation set of 75 samples was gathered from
- public
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
- data (37 TX, 38 AR, no ADNR).
-
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Find appropriate GEO identifiers if possible.
- Kurian 2014 says GSE15296, but this seems to be different data.
- I also need to look up the GEO accession for the external validation set.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the effect of each normalization on classifier performance,
- the same classifier training and validation procedure was used after each
- normalization method.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PAM
- \end_layout
- \end_inset
- algorithm was used to train a nearest shrunken centroid classifier on the
- training set and select the appropriate threshold for centroid shrinking
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Tibshirani2002"
- literal "false"
- \end_inset
- .
- Then the trained classifier was used to predict the class probabilities
- of each validation sample.
- From these class probabilities,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values were generated
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Turck2011"
- literal "false"
- \end_inset
- .
- Each normalization was tested on two different sets of training and validation
- samples.
- For internal validation, the 115
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- arrays in the internal set were split at random into two equal sized sets,
- one for training and one for validation, each containing the same numbers
- of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples as the other set.
- For external validation, the full set of 115
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples were used as a training set, and the 75 external
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples were used as the validation set.
- Thus, 2
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values were generated for each normalization method: one internal and one
- external.
- Because the external validation set contains no
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples, only classification of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples was considered.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples were included during normalization but excluded from all classifier
- training and validation.
- This ensures that the performance on internal and external validation sets
- is directly comparable, since both are performing the same task: distinguishing
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Summarize the get.best.threshold algorithm for PAM threshold selection, or
- just put the code online?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Six different normalization strategies were evaluated.
- First, 2 well-known non-single-channel normalization methods were considered:
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- and dChip
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2001,Irizarry2003a"
- literal "false"
- \end_inset
- .
- Since
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- produces expression values on a
- \begin_inset Formula $\log_{2}$
- \end_inset
- scale and dChip does not, the values from dChip were
- \begin_inset Formula $\log_{2}$
- \end_inset
- transformed after normalization.
- Next,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- and dChip followed by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- were tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Pelz2008"
- literal "false"
- \end_inset
- .
- Post-processing with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- does not turn
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- or dChip into single-channel methods, but it may help mitigate batch effects
- and is therefore useful as a benchmark.
- Lastly, the two single-channel normalization methods,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- , were tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010,Piccolo2012"
- literal "false"
- \end_inset
- .
- When evaluating internal validation performance, only the 157 internal
- samples were normalized; when evaluating external validation performance,
- all 157 internal samples and 75 external samples were normalized together.
- \end_layout
- \begin_layout Standard
- For demonstrating the problem with separate normalization of training and
- validation data, one additional normalization was performed: the internal
- and external sets were each normalized separately using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , and the normalized data for each set were combined into a single set with
- no further attempts at normalizing between the two sets.
- This represents approximately how
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- would have to be used in a clinical setting, where the samples to be classified
- are not available at the time the classifier is trained.
- \end_layout
- \begin_layout Subsection
- Generating custom fRMA vectors for hthgu133pluspm array platform
- \end_layout
- \begin_layout Standard
- In order to enable
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization for the hthgu133pluspm array platform, custom
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors were trained using the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- frmaTools
- \end_layout
- \end_inset
- package
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- Separate vectors were created for two types of samples: kidney graft biopsy
- samples and blood samples from graft recipients.
- For training, 341 kidney biopsy samples from 2 data sets and 965 blood
- samples from 5 data sets were used as the reference set.
- Arrays were groups into batches based on unique combinations of sample
- type (blood or biopsy), diagnosis (TX, AR, etc.), data set, and scan date.
- Thus, each batch represents arrays of the same kind that were run together
- on the same day.
- For estimating the probe inverse variance weights, frmaTools requires equal-siz
- ed batches, which means a batch size must be chosen, and then batches smaller
- than that size must be ignored, while batches larger than the chosen size
- must be downsampled.
- This downsampling is performed randomly, so the sampling process is repeated
- 5 times and the resulting normalizations are compared to each other.
- \end_layout
- \begin_layout Standard
- To evaluate the consistency of the generated normalization vectors, the
- 5
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- vector sets generated from 5 random batch samplings were each used to normalize
- the same 20 randomly selected samples from each tissue.
- Then the normalized expression values for each probe on each array were
- compared across all normalizations.
- Each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization was also compared against the normalized expression values
- obtained by normalizing the same 20 samples with ordinary
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Modeling methylation array M-value heteroskedasticity with a modified voom
- implementation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Put code on Github and reference it.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate the whether DNA methylation could be used to distinguish
- between healthy and dysfunctional transplants, a data set of 78 Illumina
- 450k methylation arrays from human kidney graft biopsies was analyzed for
- differential methylation between 4 transplant statuses:
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- , transplants undergoing
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- , and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CAN
- \end_layout
- \end_inset
- .
- The data consisted of 33 TX, 9 AR, 8 ADNR, and 28 CAN samples.
- The uneven group sizes are a result of taking the biopsy samples before
- the eventual fate of the transplant was known.
- Each sample was additionally annotated with a donor
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ID
- \end_layout
- \end_inset
- (anonymized), sex, age, ethnicity, creatinine level, and diabetes diagnosis
- (all samples in this data set came from patients with either
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- or
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- ).
-
- \end_layout
- \begin_layout Standard
- The intensity data were first normalized using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SWAN
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Maksimovic2012"
- literal "false"
- \end_inset
- , then converted to intensity ratios (beta values)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Aryee2014"
- literal "false"
- \end_inset
- .
- Any probes binding to loci that overlapped annotated SNPs were dropped,
- and the annotated sex of each sample was verified against the sex inferred
- from the ratio of median probe intensities for the X and Y chromosomes.
- Then, the ratios were transformed to
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="6">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- random effect
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- eBayes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- weights
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- voom
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Summary of analysis variants for methylation array data.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Summary-of-meth-analysis"
- \end_inset
- \series bold
- Summary of analysis variants for methylation array data.
-
- \series default
- Each analysis included a different set of steps to adjust or account for
- various systematic features of the data.
- Random effect: The model included a random effect accounting for correlation
- between samples from the same patient
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- ; eBayes: Empirical bayes squeezing of per-probe variances toward the mean-varia
- nce trend
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2015"
- literal "false"
- \end_inset
- ; SVA: Surrogate variable analysis to account for unobserved confounders
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- ; Weights: Estimate sample weights to account for differences in sample
- quality
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liu2015,Ritchie2006"
- literal "false"
- \end_inset
- ; voom: Use mean-variance trend to assign individual sample weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- .
- See the text for a more detailed explanation of each step.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- From the
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- , a series of parallel analyses was performed, each adding additional steps
- into the model fit to accommodate a feature of the data (see Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Summary-of-meth-analysis"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For analysis A, a
- \begin_inset Quotes eld
- \end_inset
- basic
- \begin_inset Quotes erd
- \end_inset
- linear modeling analysis was performed, compensating for known confounders
- by including terms for the factor of interest (transplant status) as well
- as the known biological confounders: sex, age, ethnicity, and diabetes.
- Since some samples came from the same patients at different times, the
- intra-patient correlation was modeled as a random effect, estimating a
- shared correlation value across all probes
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- .
- Then the linear model was fit, and the variance was modeled using empirical
- Bayes squeezing toward the mean-variance trend
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2015"
- literal "false"
- \end_inset
- .
- Finally, t-tests or F-tests were performed as appropriate for each test:
- t-tests for single contrasts, and F-tests for multiple contrasts.
- P-values were corrected for multiple testing using the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BH
- \end_layout
- \end_inset
- procedure for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- For the analysis B,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- was used to infer additional unobserved sources of heterogeneity in the
- data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- .
- These surrogate variables were added to the design matrix before fitting
- the linear model.
- In addition, sample quality weights were estimated from the data and used
- during linear modeling to down-weight the contribution of highly variable
- arrays while increasing the weight to arrays with lower variability
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006"
- literal "false"
- \end_inset
- .
- The remainder of the analysis proceeded as in analysis A.
- For analysis C, the voom method was adapted to run on methylation array
- data and used to model and correct for the mean-variance trend using individual
- observation weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- , which were combined with the sample weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liu2015,Ritchie2006"
- literal "false"
- \end_inset
- .
- Each time weights were used, they were estimated once before estimating
- the random effect correlation value, and then the weights were re-estimated
- taking the random effect into account.
- The remainder of the analysis proceeded as in analysis B.
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Improve subsection titles in this section.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Reconsider subsection organization?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Separate normalization with RMA introduces unwanted biases in classification
- \end_layout
- \begin_layout Standard
- To demonstrate the problem with non-single-channel normalization methods,
- we considered the problem of training a classifier to distinguish
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- using the samples from the internal set as training data, evaluating performanc
- e on the external set.
- First, training and evaluation were performed after normalizing all array
- samples together as a single set using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , and second, the internal samples were normalized separately from the external
- samples and the training and evaluation were repeated.
- For each sample in the validation set, the classifier probabilities from
- both classifiers were plotted against each other (Fig.
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- As expected, separate normalization biases the classifier probabilities,
- resulting in several misclassifications.
- In this case, the bias from separate normalization causes the classifier
- to assign a lower probability of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- to every sample.
-
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/predplot.pdf
- lyxscale 50
- width 60col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Classifier probabilities on validation samples when normalized with RMA
- together vs.
- separately.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Classifier-probabilities-RMA"
- \end_inset
- \series bold
- Classifier probabilities on validation samples when normalized with RMA
- together vs.
- separately.
-
- \series default
- The PAM classifier algorithm was trained on the training set of arrays to
- distinguish AR from TX and then used to assign class probabilities to the
- validation set.
- The process was performed after normalizing all samples together and after
- normalizing the training and test sets separately, and the class probabilities
- assigned to each sample in the validation set were plotted against each
- other.
- Each axis indicates the posterior probability of AR assigned to a sample
- by the classifier in the specified analysis.
- The color of each point indicates the true classification of that sample.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- fRMA and SCAN maintain classification performance while eliminating dependence
- on normalization strategy
- \end_layout
- \begin_layout Standard
- For internal validation, the 6 methods' AUC values ranged from 0.816 to 0.891,
- as shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Among the non-single-channel normalizations, dChip outperformed
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , while
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- reduced the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values for both dChip and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- .
- Both single-channel methods,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- , slightly outperformed
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- ahead of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- .
- However, the difference between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- is still quite small.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-int"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , dChip, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- look very similar and relatively smooth, while both
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- curves and the curve for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- have a more jagged appearance.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-internal.pdf
- lyxscale 50
- height 40theight%
- groupId roc-pam
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-int"
- \end_inset
- ROC curves for PAM on internal validation data
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-external.pdf
- lyxscale 50
- height 40theight%
- groupId roc-pam
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-ext"
- \end_inset
- ROC curves for PAM on external validation data
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- ROC curves for PAM using different normalization strategies.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-main"
- \end_inset
- \series bold
- ROC curves for PAM using different normalization strategies.
-
- \series default
- ROC curves were generated for PAM classification of AR vs TX after 6 different
- normalization strategies applied to the same data sets.
- Only fRMA and SCAN are single-channel normalizations.
- The other normalizations are for comparison.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="7" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Normalization
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Single-channel?
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Internal Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- External Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.852
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.713
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.891
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.657
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.816
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.750
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.875
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.642
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- fRMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.863
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.718
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- SCAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.853
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.689
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- ROC curve AUC values for internal and external validation with 6 different
- normalization strategies.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:AUC-PAM"
- \end_inset
- \series bold
- ROC curve AUC values for internal and external validation with 6 different
- normalization strategies.
- \series default
- These AUC values correspond to the ROC curves in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-main"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- For external validation, as expected, all the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values are lower than the internal validations, ranging from 0.642 to 0.750
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- With or without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- shows its dominance over dChip in this more challenging test.
- Unlike in the internal validation,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- actually improves the classifier performance for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , although it does not for dChip.
- Once again, both single-channel methods perform about on par with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- performing slightly better and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- performing a bit worse.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-ext"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves for the external validation test.
- As expected, none of them are as clean-looking as the internal validation
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves.
- The curves for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , RMA+GRSN, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- all look similar, while the other curves look more divergent.
- \end_layout
- \begin_layout Subsection
- fRMA with custom-generated vectors enables single-channel normalization
- on hthgu133pluspm platform
- \end_layout
- \begin_layout Standard
- In order to enable use of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- to normalize hthgu133pluspm, a custom set of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- vectors was created.
- First, an appropriate batch size was chosen by looking at the number of
- batches and number of samples included as a function of batch size (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:frmatools-batch-size"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For a given batch size, all batches with fewer samples that the chosen
- size must be ignored during training, while larger batches must be randomly
- downsampled to the chosen size.
- Hence, the number of samples included for a given batch size equals the
- batch size times the number of batches with at least that many samples.
- From Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:batch-size-samples"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , it is apparent that a batch size of 8 maximizes the number of samples
- included in training.
- Increasing the batch size beyond this causes too many smaller batches to
- be excluded, reducing the total number of samples for both tissue types.
- However, a batch size of 8 is not necessarily optimal.
- The article introducing frmaTools concluded that it was highly advantageous
- to use a smaller batch size in order to include more batches, even at the
- cost of including fewer total samples in training
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- To strike an appropriate balance between more batches and more samples,
- a batch size of 5 was chosen.
- For both blood and biopsy samples, this increased the number of batches
- included by 10, with only a modest reduction in the number of samples compared
- to a batch size of 8.
- With a batch size of 5, 26 batches of biopsy samples and 46 batches of
- blood samples were available.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_batches.pdf
- lyxscale 50
- height 35theight%
- groupId frmatools-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-batches"
- \end_inset
- \series bold
- Number of batches usable in fRMA probe weight learning as a function of
- batch size.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_samples.pdf
- lyxscale 50
- height 35theight%
- groupId frmatools-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-samples"
- \end_inset
- \series bold
- Number of samples usable in fRMA probe weight learning as a function of
- batch size.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Effect of batch size selection on number of batches and number of samples
- included in fRMA probe weight learning.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:frmatools-batch-size"
- \end_inset
- \series bold
- Effect of batch size selection on number of batches and number of samples
- included in fRMA probe weight learning.
-
- \series default
- For batch sizes ranging from 3 to 15, the number of batches (a) and samples
- (b) included in probe weight training were plotted for biopsy (BX) and
- blood (PAX) samples.
- The selected batch size, 5, is marked with a dotted vertical line.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- training requires equal-size batches, larger batches are downsampled randomly.
- This introduces a nondeterministic step in the generation of normalization
- vectors.
- To show that this randomness does not substantially change the outcome,
- the random downsampling and subsequent vector learning was repeated 5 times,
- with a different random seed each time.
- 20 samples were selected at random as a test set and normalized with each
- of the 5 sets of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors as well as ordinary RMA, and the normalized expression
- values were compared across normalizations.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a summary of these comparisons for biopsy samples.
- Comparing RMA to each of the 5
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalizations, the distribution of log ratios is somewhat wide, indicating
- that the normalizations disagree on the expression values of a fair number
- of probe sets.
- In contrast, comparisons of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- against
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- , the vast majority of probe sets have very small log ratios, indicating
- a very high agreement between the normalized values generated by the two
- normalizations.
- This shows that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization's behavior is not very sensitive to the random downsampling
- of larger batches during training.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-BX-violin.pdf
- lyxscale 40
- height 90theight%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Violin plot of log ratios between normalizations for 20 biopsy samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-bx-violin"
- \end_inset
- \series bold
- Violin plot of log ratios between normalizations for 20 biopsy samples.
-
- \series default
- Each of 20 randomly selected samples was normalized with RMA and with 5
- different sets of fRMA vectors.
- The distribution of log ratios between normalized expression values, aggregated
- across all 20 arrays, was plotted for each pair of normalizations.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-PAX-violin.pdf
- lyxscale 40
- height 90theight%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-pax-violin"
- \end_inset
- \begin_inset Argument 1
- status open
- \begin_layout Plain Layout
- Violin plot of log ratios between normalizations for 20 blood samples.
- \end_layout
- \end_inset
- \series bold
- Violin plot of log ratios between normalizations for 20 blood samples.
-
- \series default
- Each of 20 randomly selected samples was normalized with RMA and with 5
- different sets of fRMA vectors.
- The distribution of log ratios between normalized expression values, aggregated
- across all 20 arrays, was plotted for each pair of normalizations.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows an MA plot of the RMA-normalized values against the fRMA-normalized
- values for the same probe sets and arrays, corresponding to the first row
- of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This MA plot shows that not only is there a wide distribution of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- , but the trend of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- is dependent on the average normalized intensity.
- This is expected, since the overall trend represents the differences in
- the quantile normalization step.
- When running
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , only the quantiles for these specific 20 arrays are used, while for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- the quantile distribution is taking from all arrays used in training.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a similar MA plot comparing 2 different
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalizations, corresponding to the 6th row of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The MA plot is very tightly centered around zero with no visible trend.
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-pax-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-PAX-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- show exactly the same information for the blood samples, once again comparing
- the normalized expression values between normalizations for all probe sets
- across 20 randomly selected test arrays.
- Once again, there is a wider distribution of log ratios between RMA-normalized
- values and fRMA-normalized, and a much tighter distribution when comparing
- different
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalizations to each other, indicating that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- training process is robust to random batch sub-sampling for the blood samples
- as well.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-RMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-rma-frma"
- \end_inset
- RMA vs.
- fRMA for biopsy samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-fRMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-frma-frma"
- \end_inset
- fRMA vs fRMA for biopsy samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-RMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-rma-frma"
- \end_inset
- RMA vs.
- fRMA for blood samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-fRMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-frma-frma"
- \end_inset
- fRMA vs fRMA for blood samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Representative MA plots comparing RMA and custom fRMA normalizations.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Representative-MA-plots"
- \end_inset
- \series bold
- Representative MA plots comparing RMA and custom fRMA normalizations.
-
- \series default
- For each plot, 20 samples were normalized using 2 different normalizations,
- and then averages (A) and log ratios (M) were plotted between the two different
- normalizations for every probe.
- For the
- \begin_inset Quotes eld
- \end_inset
- fRMA vs fRMA
- \begin_inset Quotes erd
- \end_inset
- plots (b & d), two different fRMA normalizations using vectors from two
- independent batch samplings were compared.
- Density of points is represented by blue shading, and individual outlier
- points are plotted.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- SVA, voom, and array weights improve model fit for methylation array data
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-basic"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the relationship between the mean
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- and the standard deviation calculated for each probe in the methylation
- array data set.
- A few features of the data are apparent.
- First, the data are very strongly bimodal, with peaks in the density around
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- of +4 and -4.
- These modes correspond to methylation sites that are nearly 100% methylated
- and nearly 100% unmethylated, respectively.
- The strong bimodality indicates that a majority of probes interrogate sites
- that fall into one of these two categories.
- The points in between these modes represent sites that are either partially
- methylated in many samples, or are fully methylated in some samples and
- fully unmethylated in other samples, or some combination.
- The next visible feature of the data is the W-shaped variance trend.
- The upticks in the variance trend on either side are expected, based on
- the sigmoid transformation exaggerating small differences at extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- However, the uptick in the center is interesting: it indicates that sites
- that are not constitutively methylated or unmethylated have a higher variance.
- This could be a genuine biological effect, or it could be spurious noise
- that is only observable at sites with varying methylation.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Fix axis labels:
- \begin_inset Quotes eld
- \end_inset
- log2 M-value
- \begin_inset Quotes erd
- \end_inset
- is redundant because M-values are already log scale
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/meanvar-trends-PAGE1-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-basic"
- \end_inset
- Mean-variance trend for analysis A.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/meanvar-trends-PAGE1-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-aw"
- \end_inset
- Mean-variance trend for analysis B.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/meanvar-trends-PAGE2-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-voomaw"
- \end_inset
- Mean-variance trend after voom modeling in analysis C.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Mean-variance trend modeling in methylation array data.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:-Meanvar-trend-methyl"
- \end_inset
- \series bold
- Mean-variance trend modeling in methylation array data.
-
- \series default
- The estimated
- \begin_inset Formula $\log_{2}$
- \end_inset
- (standard deviation) for each probe is plotted against the probe's average
- M-value across all samples as a black point, with some transparency to
- make over-plotting more visible, since there are about 450,000 points.
- Density of points is also indicated by the dark blue contour lines.
- The prior variance trend estimated by eBayes is shown in light blue, while
- the lowess trend of the points is shown in red.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , we see the mean-variance trend for the same methylation array data, this
- time with surrogate variables and sample quality weights estimated from
- the data and included in the model.
- As expected, the overall average variance is smaller, since the surrogate
- variables account for some of the variance.
- In addition, the uptick in variance in the middle of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- range has disappeared, turning the W shape into a wide U shape.
- This indicates that the excess variance in the probes with intermediate
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- was explained by systematic variations not correlated with known covariates,
- and these variations were modeled by the surrogate variables.
- The result is a nearly flat variance trend for the entire intermediate
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- range from about -3 to +3.
- Note that this corresponds closely to the range within which the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- transformation shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- is nearly linear.
- In contrast, the excess variance at the extremes (greater than +3 and less
- than -3) was not
- \begin_inset Quotes eld
- \end_inset
- absorbed
- \begin_inset Quotes erd
- \end_inset
- by the surrogate variables and remains in the plot, indicating that this
- variation has no systematic component: probes with extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- are uniformly more variable across all samples, as expected.
-
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-voomaw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the mean-variance trend after fitting the model with the observation
- weights assigned by voom based on the mean-variance trend shown in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- As expected, the weights exactly counteract the trend in the data, resulting
- in a nearly flat trend centered vertically at 1 (i.e.
- 0 on the log scale).
- This shows that the observations with extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- have been appropriately down-weighted to account for the fact that the
- noise in those observations has been amplified by the non-linear
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- transformation.
- In turn, this gives relatively more weight to observations in the middle
- region, which are more likely to correspond to probes measuring interesting
- biology (not constitutively methylated or unmethylated).
- \end_layout
- \begin_layout Standard
- To determine whether any of the known experimental factors had an impact
- on data quality, the sample quality weights estimated from the data were
- tested for association with each of the experimental factors (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:weight-covariate-tests"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Diabetes diagnosis was found to have a potentially significant association
- with the sample weights, with a t-test p-value of
- \begin_inset Formula $1.06\times10^{-3}$
- \end_inset
- .
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:diabetes-sample-weights"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the distribution of sample weights grouped by diabetes diagnosis.
- The samples from patients with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- were assigned significantly lower weights than those from patients with
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- .
- This indicates that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- samples had an overall higher variance on average across all probes.
-
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="3">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Covariate
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Test used
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- p-value
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Transplant Status
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- F-test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.404
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Diabetes Diagnosis
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \emph on
- t
- \emph default
- -test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.00106
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Sex
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \emph on
- t
- \emph default
- -test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.148
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Age
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- linear regression
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.212
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Association of sample weights with clinical covariates in methylation array
- data.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:weight-covariate-tests"
- \end_inset
- \series bold
- Association of sample weights with clinical covariates in methylation array
- data.
-
- \series default
- Computed sample quality log weights were tested for significant association
- with each of the variables in the model (1st column).
- An appropriate test was selected for each variable based on whether the
- variable had 2 categories (
- \emph on
- t
- \emph default
- -test), had more than 2 categories (F-test), or was numeric (linear regression).
- The test selected is shown in the 2nd column.
- P-values for association with the log weights are shown in the 3rd column.
- No multiple testing adjustment was performed for these p-values.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Redo the sample weight boxplot with notches, and remove fill colors
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/sample-weights-PAGE3-CROP.pdf
- lyxscale 50
- width 60col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Box-and-whiskers plot of sample quality weights grouped by diabetes diagnosis.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:diabetes-sample-weights"
- \end_inset
- \series bold
- Box-and-whiskers plot of sample quality weights grouped by diabetes diagnosis.
-
- \series default
- Samples were grouped based on diabetes diagnosis, and the distribution of
- sample quality weights for each diagnosis was plotted as a box-and-whiskers
- plot
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McGill1978"
- literal "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-num-signif"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the number of significantly differentially methylated probes reported
- by each analysis for each comparison of interest at an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- of 10%.
- As expected, the more elaborate analyses, B and C, report more significant
- probes than the more basic analysis A, consistent with the conclusions
- above that the data contain hidden systematic variations that must be modeled.
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the estimated number differentially methylated probes for each test
- from each analysis.
- This was computed by estimating the proportion of null hypotheses that
- were true using the method of
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- and subtracting that fraction from the total number of probes, yielding
- an estimate of the number of null hypotheses that are false based on the
- distribution of p-values across the entire dataset.
- Note that this does not identify which null hypotheses should be rejected
- (i.e.
- which probes are significant); it only estimates the true number of such
- probes.
- Once again, analyses B and C result it much larger estimates for the number
- of differentially methylated probes.
- In this case, analysis C, the only analysis that includes voom, estimates
- the largest number of differentially methylated probes for all 3 contrasts.
- If the assumptions of all the methods employed hold, then this represents
- a gain in statistical power over the simpler analysis A.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meth-p-value-histograms"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the p-value distributions for each test, from which the numbers in
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- were generated.
- The distributions for analysis A all have a dip in density near zero, which
- is a strong sign of a poor model fit.
- The histograms for analyses B and C are more well-behaved, with a uniform
- component stretching all the way from 0 to 1 representing the probes for
- which the null hypotheses is true (no differential methylation), and a
- zero-biased component representing the probes for which the null hypothesis
- is false (differentially methylated).
- These histograms do not indicate any major issues with the model fit.
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider transposing these tables
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 22
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 7
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 338
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 369
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 278
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-num-signif"
- \end_inset
- Number of probes significant at 10% FDR.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 10,063
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 11,225
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 27
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 12,674
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 13,086
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 966
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,039
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,955
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-est-nonnull"
- \end_inset
- Estimated number of non-null tests, using the method of averaging local
- FDR values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Estimates of degree of differential methylation in for each contrast in
- each analysis.
- \end_layout
- \end_inset
- \series bold
- Estimates of degree of differential methylation in for each contrast in
- each analysis.
-
- \series default
- For each of the analyses in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Summary-of-meth-analysis"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , these tables show the number of probes called significantly differentially
- methylated at a threshold of 10% FDR for each comparison between TX and
- the other 3 transplant statuses (a) and the estimated total number of probes
- that are differentially methylated (b).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Probe p-value histograms for each contrast in each analysis.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meth-p-value-histograms"
- \end_inset
- \series bold
- Probe p-value histograms for each contrast in each analysis.
-
- \series default
- For each differential methylation test of interest, the distribution of
- p-values across all probes is plotted as a histogram.
- The red solid line indicates the density that would be expected under the
- null hypothesis for all probes (a
- \begin_inset Formula $\mathrm{Uniform}(0,1)$
- \end_inset
- distribution), while the blue dotted line indicates the fraction of p-values
- that actually follow the null hypothesis (
- \begin_inset Formula $\hat{\pi}_{0}$
- \end_inset
- ) estimated using the method of averaging local FDR values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- .
- A blue line is only shown in each plot if the estimate of
- \begin_inset Formula $\hat{\pi}_{0}$
- \end_inset
- for that p-value distribution is smaller than 1.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- If time allows, maybe generate the PCA plots before/after SVA effect subtraction
- ?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Subsection
- fRMA achieves clinically applicable normalization without sacrificing classifica
- tion performance
- \end_layout
- \begin_layout Standard
- As shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , improper normalization, particularly separate normalization of training
- and test samples, leads to unwanted biases in classification.
- In a controlled experimental context, it is always possible to correct
- this issue by normalizing all experimental samples together.
- However, because it is not feasible to normalize all samples together in
- a clinical context, a single-channel normalization is required.
-
- \end_layout
- \begin_layout Standard
- The major concern in using a single-channel normalization is that non-single-cha
- nnel methods can share information between arrays to improve the normalization,
- and single-channel methods risk sacrificing the gains in normalization
- accuracy that come from this information sharing.
- In the case of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , this information sharing is accomplished through quantile normalization
- and median polish steps.
- The need for information sharing in quantile normalization can easily be
- removed by learning a fixed set of quantiles from external data and normalizing
- each array to these fixed quantiles, instead of the quantiles of the data
- itself.
- As long as the fixed quantiles are reasonable, the result will be similar
- to standard
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- .
- However, there is no analogous way to eliminate cross-array information
- sharing in the median polish step, so
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- replaces this with a weighted average of probes on each array, with the
- weights learned from external data.
- This step of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- has the greatest potential to diverge from RMA in undesirable ways.
- \end_layout
- \begin_layout Standard
- However, when run on real data,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- performed at least as well as
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- in both the internal validation and external validation tests.
- This shows that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- can be used to normalize individual clinical samples in a class prediction
- context without sacrificing the classifier performance that would be obtained
- by using the more well-established
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- for normalization.
- The other single-channel normalization method considered,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- , showed some loss of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- in the external validation test.
- Based on these results,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- is the preferred normalization for clinical samples in a class prediction
- context.
- \end_layout
- \begin_layout Subsection
- Robust fRMA vectors can be generated for new array platforms
- \end_layout
- \begin_layout Standard
- The published
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors for the hgu133plus2 platform were generated from
- a set of 850 samples chosen from a wide range of tissues, which the authors
- determined was sufficient to generate a robust set of normalization vectors
- that could be applied across all tissues
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Since we only had hthgu133pluspm for 2 tissues of interest, our needs were
- more modest.
- Even using only 130 samples in 26 batches of 5 samples each for kidney
- biopsies, we were able to train a robust set of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors that were not meaningfully affected by the random
- selection of 5 samples from each batch.
- As expected, the training process was just as robust for the blood samples
- with 230 samples in 46 batches of 5 samples each.
- Because these vectors were each generated using training samples from a
- single tissue, they are not suitable for general use, unlike the vectors
- provided with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- itself.
- They are purpose-built for normalizing a specific type of sample on a specific
- platform.
- This is a mostly acceptable limitation in the context of developing a machine
- learning classifier for diagnosing a disease from samples of a specific
- tissue.
- \end_layout
- \begin_layout Subsection
- Methylation array data can be successfully analyzed using existing techniques,
- but machine learning poses additional challenges
- \end_layout
- \begin_layout Standard
- Both analysis strategies B and C both yield a reasonable analysis, with
- a mean-variance trend that matches the expected behavior for the non-linear
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- transformation (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) and well-behaved p-value distributions (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meth-p-value-histograms"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- These two analyses also yield similar numbers of significant probes (Table
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-num-signif"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) and similar estimates of the number of differentially methylated probes
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The main difference between these two analyses is the method used to account
- for the mean-variance trend.
- In analysis B, the trend is estimated and applied at the probe level: each
- probe's estimated variance is squeezed toward the trend using an empirical
- Bayes procedure (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- In analysis C, the trend is still estimated at the probe level, but instead
- of estimating a single variance value shared across all observations for
- a given probe, the voom method computes an initial estimate of the variance
- for each observation individually based on where its model-fitted
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- falls on the trend line and then assigns inverse-variance weights to model
- the difference in variance between observations.
- An overall variance is still estimated for each probe using the same empirical
- Bayes method, but now the residual trend is flat (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-voomaw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), indicating that the mean-variance trend is adequately modeled by scaling
- the estimated variance for each observation using the weights computed
- by voom.
-
- \end_layout
- \begin_layout Standard
- The difference between the standard empirical Bayes trended variance modeling
- (analysis B) and voom (analysis C) is analogous to the difference between
- a t-test with equal variance and a t-test with unequal variance, except
- that the unequal group variances used in the latter test are estimated
- based on the mean-variance trend from all the probes rather than the data
- for the specific probe being tested, thus stabilizing the group variance
- estimates by sharing information between probes.
- Allowing voom to model the variance using observation weights in this manner
- allows the linear model fit to concentrate statistical power where it will
- do the most good.
- For example, if a particular probe's
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- are always at the extreme of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- range (e.g.
- less than -4) for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples, but the
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- for that probe in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CAN
- \end_layout
- \end_inset
- samples are within the flat region of the mean-variance trend (between
-
- \begin_inset Formula $-3$
- \end_inset
- and
- \begin_inset Formula $+3$
- \end_inset
- ), voom is able to down-weight the contribution of the high-variance
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples in order to gain more statistical power while testing for differential
- methylation between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CAN
- \end_layout
- \end_inset
- .
- In contrast, modeling the mean-variance trend only at the probe level would
- combine the high-variance
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples and lower-variance samples from other conditions and estimate an
- intermediate variance for this probe.
- In practice, analysis B shows that this approach is adequate, but the voom
- approach in analysis C performs at least as well on all model fit criteria
- and yields a larger estimate for the number of differentially methylated
- genes,
- \emph on
- and
- \emph default
- it matches up slightly better with the theoretical properties of the data.
- \end_layout
- \begin_layout Standard
- The significant association of diabetes diagnosis with sample quality is
- interesting.
- The samples with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- tended to have more variation, averaged across all probes, than those with
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- .
- This is consistent with the consensus that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- and the associated metabolic syndrome represent a broad dysregulation of
- the body's endocrine signaling related to metabolism
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Volkmar2012,Hall2018,Yokoi2018"
- literal "false"
- \end_inset
- .
- This dysregulation could easily manifest as a greater degree of variation
- in the DNA methylation patterns of affected tissues.
- In contrast,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- has a more specific cause and effect, so a less variable methylation signature
- is expected.
- \end_layout
- \begin_layout Standard
- This preliminary analysis suggests that some degree of differential methylation
- exists between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and each of the three types of transplant disfunction studied.
- Hence, it may be feasible to train a classifier to diagnose transplant
- disfunction from DNA methylation array data.
- However, the major importance of both
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- and sample quality weighting for proper modeling of this data poses significant
- challenges for any attempt at a machine learning on data of similar quality.
- While these are easily used in a modeling context with full sample information,
- neither of these methods is directly applicable in a machine learning context,
- where the diagnosis is not known ahead of time.
- If a machine learning approach for methylation-based diagnosis is to be
- pursued, it will either require machine-learning-friendly methods to address
- the same systematic trends in the data that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- and sample quality weighting address, or it will require higher quality
- data with substantially less systematic perturbation of the data.
- \end_layout
- \begin_layout Section
- Future Directions
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Some work was already being done with the existing fRMA vectors.
- Do I mention that here?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Improving fRMA to allow training from batches of unequal size
- \end_layout
- \begin_layout Standard
- Because the tools for building
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors require equal-size batches, many samples must be
- discarded from the training data.
- This is undesirable for a few reasons.
- First, more data is simply better, all other things being equal.
- In this case,
- \begin_inset Quotes eld
- \end_inset
- better
- \begin_inset Quotes erd
- \end_inset
- means a more precise estimate of normalization parameters.
- In addition, the samples to be discarded must be chosen arbitrarily, which
- introduces an unnecessary element of randomness into the estimation process.
- While the randomness can be made deterministic by setting a consistent
- random seed, the need for equal size batches also introduces a need for
- the analyst to decide on the appropriate trade-off between batch size and
- the number of batches.
- This introduces an unnecessary and undesirable
- \begin_inset Quotes eld
- \end_inset
- researcher degree of freedom
- \begin_inset Quotes erd
- \end_inset
- into the analysis, since the generated normalization vectors now depend
- on the choice of batch size based on vague selection criteria and instinct,
- which can unintentionally introduce bias if the researcher chooses a batch
- size based on what seems to yield the most favorable downstream results
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Simmons2011"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Fortunately, the requirement for equal-size batches is not inherent to the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- algorithm but rather a limitation of the implementation in the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- frmaTools
- \end_layout
- \end_inset
- package.
- In personal communication, the package's author, Matthew McCall, has indicated
- that with some work, it should be possible to improve the implementation
- to work with batches of unequal sizes.
- The current implementation ignores the batch size when calculating with-batch
- and between-batch residual variances, since the batch size constant cancels
- out later in the calculations as long as all batches are of equal size.
- Hence, the calculations of these parameters would need to be modified to
- remove this optimization and properly calculate the variances using the
- full formula.
- Once this modification is made, a new strategy would need to be developed
- for assessing the stability of parameter estimates, since the random sub-sampli
- ng step is eliminated, meaning that different sub-samplings can no longer
- be compared as in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:frma-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Representative-MA-plots"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Bootstrap resampling is likely a good candidate here: sample many training
- sets of equal size from the existing training set with replacement, estimate
- parameters from each resampled training set, and compare the estimated
- parameters between bootstraps in order to quantify the variability in each
- parameter's estimation.
- \end_layout
- \begin_layout Subsection
- Developing methylation arrays as a diagnostic tool for kidney transplant
- rejection
- \end_layout
- \begin_layout Standard
- The current study has showed that DNA methylation, as assayed by Illumina
- 450k methylation arrays, has some potential for diagnosing transplant dysfuncti
- ons, including rejection.
- However, very few probes could be confidently identified as differentially
- methylated between healthy and dysfunctional transplants.
- One likely explanation for this is the predominant influence of unobserved
- confounding factors.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- can model and correct for such factors, but the correction can never be
- perfect, so some degree of unwanted systematic variation will always remain
- after
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- correction.
- If the effect size of the confounding factors was similar to that of the
- factor of interest (in this case, transplant status), this would be an
- acceptable limitation, since removing most of the confounding factors'
- effects would allow the main effect to stand out.
- However, in this data set, the confounding factors have a much larger effect
- size than transplant status, which means that the small degree of remaining
- variation not removed by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- can still swamp the effect of interest, making it difficult to detect.
- This is, of course, a major issue when the end goal is to develop a classifier
- to diagnose transplant rejection from methylation data, since batch-correction
- methods like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- that work in a linear modeling context cannot be applied in a machine learning
- context.
- \end_layout
- \begin_layout Standard
- Currently, the source of these unwanted systematic variations in the data
- is unknown.
- The best solution would be to determine the cause of the variation and
- eliminate it, thereby eliminating the need to model and remove that variation.
- However, if this proves impractical, another option is to use
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- to identify probes that are highly associated with the surrogate variables
- that describe the unwanted variation in the data.
- These probes could be discarded prior to classifier training, in order
- to maximize the chance that the training algorithm will be able to identify
- highly predictive probes from those remaining.
- Lastly, it is possible that some of this unwanted variation is a result
- of the array-based assay being used and would be eliminated by switching
- to assaying DNA methylation using bisulphite sequencing.
- However, this carries the risk that the sequencing assay will have its
- own set of biases that must be corrected for in a different way.
- \end_layout
- \begin_layout Chapter
- \begin_inset CommandInset label
- LatexCommand label
- name "chap:Globin-blocking-cyno"
- \end_inset
- Globin-blocking for more effective blood RNA-seq analysis in primate animal
- model
- \end_layout
- \begin_layout Standard
- \size large
- Ryan C.
- Thompson, Terri Gelbart, Steven R.
- Head, Phillip Ordoukhanian, Courtney Mullen, Dongmei Han, Dora Berman,
- Amelia Bartholomew, Norma Kenyon, Daniel R.
- Salomon
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Choose between above and the paper title: Optimizing yield of deep RNA sequencin
- g for gene expression profiling by globin reduction of peripheral blood
- samples from cynomolgus monkeys (
- \emph on
- Macaca fascicularis
- \emph default
- ).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section*
- Abstract
- \end_layout
- \begin_layout Paragraph
- Background
- \end_layout
- \begin_layout Standard
- Primate blood contains high concentrations of globin
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- .
- Globin reduction is a standard technique used to improve the expression
- results obtained by DNA microarrays on RNA from blood samples.
- However, with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- quickly replacing microarrays for many applications, the impact of globin
- reduction for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- is less well-studied.
- Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
- primates.
- \end_layout
- \begin_layout Paragraph
- Results
- \end_layout
- \begin_layout Standard
- Here we report a protocol for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in primate blood samples that uses complimentary
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- to block reverse transcription of the alpha and beta globin genes.
- In test samples from cynomolgus monkeys (
- \emph on
- Macaca fascicularis
- \emph default
- ), this
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol approximately doubles the yield of informative (non-globin) reads
- by greatly reducing the fraction of globin reads, while also improving
- the consistency in sequencing depth between samples.
- The increased yield enables detection of about 2000 more genes, significantly
- increases the correlation in measured gene expression levels between samples,
- and increases the sensitivity of differential gene expression tests.
- \end_layout
- \begin_layout Paragraph
- Conclusions
- \end_layout
- \begin_layout Standard
- These results show that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- significantly improves the cost-effectiveness of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in primate blood samples by doubling the yield of useful reads, allowing
- detection of more genes, and improving the precision of gene expression
- measurements.
- Based on these results, a globin reducing or blocking protocol is recommended
- for all
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- studies of primate blood samples.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Introduction
- \end_layout
- \begin_layout Standard
- As part of a multi-lab PO1 grant to study
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- infusion as a treatment for graft rejection in cynomolgus monkeys (
- \emph on
- Macaca fascicularis
- \emph default
- ), a large number of serial blood draws from cynomolgus monkeys were planned
- in order to monitor the progress of graft healing and eventual rejection
- after transplantation.
- In order to streamline the process of performing
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- on these blood samples, we developed a custom sequencing protocol.
- In the developement of this protocol, we required a solution for the problem
- of excess globin reads.
- High fractions of globin
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- are naturally present in mammalian peripheral blood samples (up to 70%
- of total
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- ) and these are known to interfere with the results of array-based expression
- profiling
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- Globin reduction is also necessary for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of blood samples, though for unrelated reasons: without globin reduction,
- many
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- reads will be derived from the globin genes, leaving fewer for the remainder
- of the genes in the transcriptome.
- However, existing strategies for globin reduction require an additional
- step during sample preparation to deplete the population of globin transcripts
- from the sample prior to reverse transcription
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012,Choi2014,Shin2014"
- literal "false"
- \end_inset
- .
- Furthermore, off-the-shelf globin reduction kits are generally targeted
- at human or mouse globin, not cynomolgus monkey, and sequence identity
- between human and cyno globin genes cannot be automatically assumed.
- Hence, we sought to incorporate a custom globin reduction method into our
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- protocol purely by adding additional reagents to an existing step in the
- sample preparation.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Consider putting some of this in the Intro chapter
- \end_layout
- \begin_layout Itemize
- Cynomolgus monkeys as a model organism
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Highly related to humans
- \end_layout
- \begin_layout Itemize
- Small size and short life cycle - good research animal
- \end_layout
- \begin_layout Itemize
- Genomics resources still in development
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Inadequacy of existing blood RNA-seq protocols
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Existing protocols use a separate globin pulldown step, slowing down processing
- \end_layout
- \end_deeper
- \end_inset
- \end_layout
- \begin_layout Standard
- We evaluated globin reduction for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- by blocking reverse transcription of globin transcripts using custom blocking
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- .
- We demonstrate that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- significantly improves the cost-effectiveness of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in blood samples.
- Thus, our protocol offers a significant advantage to any investigator planning
- to use
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- for gene expression profiling of nonhuman primate blood samples.
- Our method can be generally applied to any species by designing complementary
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- blocking probes to the globin gene sequences of that species.
- Indeed, any highly expressed but biologically uninformative transcripts
- can also be blocked to further increase sequencing efficiency and value
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Arnaud2016"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- Sample collection
- \end_layout
- \begin_layout Standard
- All research reported here was done under IACUC-approved protocols at the
- University of Miami and complied with all applicable federal and state
- regulations and ethical principles for nonhuman primate research.
- Blood draws occurred between 16
- \begin_inset space ~
- \end_inset
- April
- \begin_inset space ~
- \end_inset
- 2012 and 18
- \begin_inset space ~
- \end_inset
- June
- \begin_inset space ~
- \end_inset
- 2015.
- The experimental system involved intrahepatic pancreatic islet transplantation
- into Cynomolgus monkeys with induced diabetes mellitus with or without
- concomitant infusion of mesenchymal stem cells.
- Blood was collected at serial time points before and after transplantation
- into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
- precise volume:volume ratio of 2.5
- \begin_inset space ~
- \end_inset
- ml whole blood into 6.9
- \begin_inset space ~
- \end_inset
- ml of PAX gene additive.
- \end_layout
- \begin_layout Subsection
- Globin blocking oligonucleotide design
- \end_layout
- \begin_layout Standard
- Four
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- were designed to hybridize to the
- \begin_inset Formula $3^{\prime}$
- \end_inset
- end of the transcripts for the Cynomolgus alpha and beta globin, with two
- hybridization sites for each gene.
- All
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- were purchased from Sigma and were entirely composed of 2
- \begin_inset Formula $^{\prime}$
- \end_inset
- O-Me bases with a C3 spacer positioned at the
- \begin_inset Formula $3^{\prime}$
- \end_inset
- ends to prevent any polymerase mediated primer extension.
- \end_layout
- \begin_layout Description
- HBA1/2
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 1:
- \family typewriter
- GCCCACUCAGACUUUAUUCAAAG-C3spacer
- \end_layout
- \begin_layout Description
- HBA1/2
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 2:
- \family typewriter
- GGUGCAAGGAGGGGAGGAG-C3spacer
- \end_layout
- \begin_layout Description
- HBB
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 1:
- \family typewriter
- AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
- \end_layout
- \begin_layout Description
- HBB
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 2:
- \family typewriter
- CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
- \end_layout
- \begin_layout Subsection
- RNA-seq library preparation
- \end_layout
- \begin_layout Standard
- Sequencing libraries were prepared with 200
- \begin_inset space ~
- \end_inset
- ng total RNA from each sample.
- Polyadenylated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- was selected from 200
- \begin_inset space ~
- \end_inset
- ng aliquots of cynomolgus blood-derived total RNA using Ambion Dynabeads
- Oligo(dT)25 beads (Invitrogen) following the manufacturer’s recommended
- protocol.
- PolyA selected RNA was then combined with 8
- \begin_inset space ~
- \end_inset
- pmol of HBA1/2
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 1), 8
- \begin_inset space ~
- \end_inset
- pmol of HBA1/2
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 2), 12
- \begin_inset space ~
- \end_inset
- pmol of HBB
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 1) and 12
- \begin_inset space ~
- \end_inset
- pmol of HBB
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 2)
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- .
- In addition, 20
- \begin_inset space ~
- \end_inset
- pmol of RT primer containing a portion of the Illumina adapter sequence
- (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV) and 4
- \begin_inset space ~
- \end_inset
- \emph on
- μ
- \emph default
- L of 5X First Strand buffer (250
- \begin_inset space ~
- \end_inset
- mM Tris-HCl pH
- \begin_inset space ~
- \end_inset
- 8.3, 375
- \begin_inset space ~
- \end_inset
- mM KCl, 15
- \begin_inset space ~
- \end_inset
- mM
- \begin_inset Formula $\textrm{MgCl}_{2}$
- \end_inset
- ) were added in a total volume of 15
- \begin_inset space ~
- \end_inset
- µL.
- The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
- then placed on ice.
- This was followed by the addition of 2
- \begin_inset space ~
- \end_inset
- µL 0.1
- \begin_inset space ~
- \end_inset
- M DTT, 1
- \begin_inset space ~
- \end_inset
- µL RNaseOUT, 1
- \begin_inset space ~
- \end_inset
- µL 10
- \begin_inset space ~
- \end_inset
- mM dNTPs 10% biotin-16 aminoallyl-
- \begin_inset Formula $2^{\prime}$
- \end_inset
- - dUTP and 10% biotin-16 aminoallyl-
- \begin_inset Formula $2^{\prime}$
- \end_inset
- -dCTP (TriLink Biotech, San Diego, CA), 1
- \begin_inset space ~
- \end_inset
- µL Superscript II (200
- \begin_inset space ~
- \end_inset
- U/µL, Thermo-Fisher).
- A second “unblocked” library was prepared in the same way for each sample
- but replacing the blocking
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- with an equivalent volume of water.
- The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
- followed by incubation at 75°C for 10 minutes to inactivate the reverse
- transcriptase.
- \end_layout
- \begin_layout Standard
- The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
- ) following supplier’s recommended protocol.
- The cDNA/RNA hybrid was eluted in 25
- \begin_inset space ~
- \end_inset
- µL of 10
- \begin_inset space ~
- \end_inset
- mM Tris-HCl pH
- \begin_inset space ~
- \end_inset
- 8.0, and then bound to 25
- \begin_inset space ~
- \end_inset
- µL of M280 Magnetic Streptavidin beads washed per recommended protocol (Thermo-F
- isher).
- After 30 minutes of binding, beads were washed one time in 100
- \begin_inset space ~
- \end_inset
- µL 0.1
- \begin_inset space ~
- \end_inset
- N NaOH to denature and remove the bound RNA, followed by two 100
- \begin_inset space ~
- \end_inset
- µL washes with 1X TE buffer.
- \end_layout
- \begin_layout Standard
- Subsequent attachment of the
- \begin_inset Formula $5^{\prime}$
- \end_inset
- Illumina A adapter was performed by on-bead random primer extension of
- the following sequence (A-N8 primer:
- \family typewriter
- TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN
- \family default
- ).
- Briefly, beads were resuspended in a 20
- \begin_inset space ~
- \end_inset
- µL reaction containing 5
- \begin_inset space ~
- \end_inset
- µM A-N8 primer, 40
- \begin_inset space ~
- \end_inset
- mM Tris-HCl pH
- \begin_inset space ~
- \end_inset
- 7.5, 20
- \begin_inset space ~
- \end_inset
- mM
- \begin_inset Formula $\textrm{MgCl}_{2}$
- \end_inset
- , 50
- \begin_inset space ~
- \end_inset
- mM NaCl, 0.325
- \begin_inset space ~
- \end_inset
- U/µL Sequenase
- \begin_inset space ~
- \end_inset
- 2.0 (Affymetrix, Santa Clara, CA), 0.0025
- \begin_inset space ~
- \end_inset
- U/µL inorganic pyrophosphatase (Affymetrix) and 300
- \begin_inset space ~
- \end_inset
- µM each dNTP.
- Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
- times with 1X TE buffer (200
- \begin_inset space ~
- \end_inset
- µL).
- \end_layout
- \begin_layout Standard
- The magnetic streptavidin beads were resuspended in 34
- \begin_inset space ~
- \end_inset
- µL nuclease-free water and added directly to a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- tube.
- The two Illumina protocol-specified
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- primers were added at 0.53
- \begin_inset space ~
- \end_inset
- µM (Illumina TruSeq Universal Primer 1 and Illumina TruSeq barcoded
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- primer 2), along with 40
- \begin_inset space ~
- \end_inset
- µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycled
- as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
- 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- products were purified with 1X Ampure Beads following manufacturer’s recommende
- d protocol.
- Libraries were then analyzed using the Agilent TapeStation and quantitation
- of desired size range was performed by “smear analysis”.
- Samples were pooled in equimolar batches of 16 samples.
- Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
- Gels; Thermo-Fisher).
- Products were cut between 250 and 350
- \begin_inset space ~
- \end_inset
- bp (corresponding to insert sizes of 130 to 230
- \begin_inset space ~
- \end_inset
- bp).
- Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
- t with 75
- \begin_inset space ~
- \end_inset
- bp read lengths.
-
- \end_layout
- \begin_layout Subsection
- Read alignment and counting
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- emergencystretch 3em
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Need to relax the justification parameters just for this paragraph, or else
- featureCounts can break out of the margin.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Reads were aligned to the cynomolgus genome using STAR
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilson2013,Dobin2012"
- literal "false"
- \end_inset
- .
- Counts of uniquely mapped reads were obtained for every gene in each sample
- with the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- featureCounts
- \end_layout
- \end_inset
- function from the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Rsubread
- \end_layout
- \end_inset
- package, using each of the three possibilities for the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- strandSpecific
- \end_layout
- \end_inset
- option: sense, antisense, and unstranded
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liao2014"
- literal "false"
- \end_inset
- .
- A few artifacts in the cynomolgus genome annotation complicated read counting.
- First, no ortholog is annotated for alpha globin in the cynomolgus genome,
- presumably because the human genome has two alpha globin genes with nearly
- identical sequences, making the orthology relationship ambiguous.
- However, two loci in the cynomolgus genome are annotated as “hemoglobin
- subunit alpha-like” (LOC102136192 and LOC102136846).
- LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
- as protein-coding.
- Our globin reduction protocol was designed to include blocking of these
- two genes.
- Indeed, these two genes together have almost the same read counts in each
- library as the properly-annotated HBB gene and much larger counts than
- any other gene in the unblocked libraries, giving confidence that reads
- derived from the real alpha globin are mapping to both genes.
- Thus, reads from both of these loci were counted as alpha globin reads
- in all further analyses.
- The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
- 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
- If counting is not performed in stranded mode (or if a non-strand-specific
- sequencing protocol is used), many reads mapping to the globin gene will
- be discarded as ambiguous due to their overlap with this
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ncRNA
- \end_layout
- \end_inset
- gene, resulting in significant undercounting of globin reads.
- Therefore, stranded sense counts were used for all further analysis in
- the present study to insure that we accurately accounted for globin transcript
- reduction.
- However, we note that stranded reads are not necessary for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- using our protocol in standard practice.
-
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- emergencystretch 0em
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Normalization and exploratory data analysis
- \end_layout
- \begin_layout Standard
- Libraries were normalized by computing scaling factors using the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- package's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TMM
- \end_layout
- \end_inset
- method
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values were calculated using the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- cpm
- \end_layout
- \end_inset
- function in
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- for individual samples and
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- aveLogCPM
- \end_layout
- \end_inset
- function for averages across groups of samples, using those functions’
- default prior count values to avoid taking the logarithm of 0.
- Genes were considered “present” if their average normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values across all libraries were at least
- \begin_inset Formula $-1$
- \end_inset
- .
- Normalizing for gene length was unnecessary because the sequencing protocol
- is
- \begin_inset Formula $3^{\prime}$
- \end_inset
- -biased and hence the expected read count for each gene is related to the
- transcript’s copy number but not its length.
- \end_layout
- \begin_layout Standard
- In order to assess the effect of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- on reproducibility, Pearson and Spearman correlation coefficients were
- computed between the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values for every pair of libraries within the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- non-GB groups, and
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- 's
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- estimateDisp
- \end_layout
- \end_inset
- function was used to compute
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- dispersions separately for the two groups
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Chen2014"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Differential expression analysis
- \end_layout
- \begin_layout Standard
- All tests for differential gene expression were performed using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- , by first fitting a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- to the counts and normalization factors and then performing a quasi-likelihood
- F-test with robust estimation of outlier gene dispersions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lund2012,Phipson2016"
- literal "false"
- \end_inset
- .
- To investigate the effects of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- on each gene, an additive model was fit to the full data with coefficients
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and Sample
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ID
- \end_layout
- \end_inset
- .
- To test the effect of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- on detection of differentially expressed genes, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples and non-GB samples were each analyzed independently as follows:
- for each animal with both a pre-transplant and a post-transplant time point
- in the data set, the pre-transplant sample and the earliest post-transplant
- sample were selected, and all others were excluded, yielding a pre-/post-transp
- lant pair of samples for each animal (
- \begin_inset Formula $N=7$
- \end_inset
- animals with paired samples).
- These samples were analyzed for pre-transplant vs.
- post-transplant differential gene expression while controlling for inter-animal
- variation using an additive model with coefficients for transplant and
- animal
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ID
- \end_layout
- \end_inset
- .
- In all analyses, p-values were adjusted using the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BH
- \end_layout
- \end_inset
- procedure for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Itemize
- New blood RNA-seq protocol to block reverse transcription of globin genes
- \end_layout
- \begin_layout Itemize
- Blood RNA-seq time course after transplants with/without MSC infusion
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Subsection
- Globin blocking yields a larger and more consistent fraction of useful reads
- \end_layout
- \begin_layout Standard
- The objective of the present study was to validate a new protocol for deep
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of whole blood drawn into PaxGene tubes from cynomolgus monkeys undergoing
- islet transplantation, with particular focus on minimizing the loss of
- useful sequencing space to uninformative globin reads.
- The details of the analysis with respect to transplant outcomes and the
- impact of mesenchymal stem cell treatment will be reported in a separate
- manuscript (in preparation).
- To focus on the efficacy of our
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol, 37 blood samples, 16 from pre-transplant and 21 from post-transplant
- time points, were each prepped once with and once without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- , and were then sequenced on an Illumina NextSeq500 instrument.
- The number of reads aligning to each gene in the cynomolgus genome was
- counted.
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Fractions-of-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- summarizes the distribution of read fractions among the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB libraries.
- In the libraries with no
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- , globin reads made up an average of 44.6% of total input reads, while reads
- assigned to all other genes made up an average of 26.3%.
- The remaining reads either aligned to intergenic regions (that include
- long non-coding RNAs) or did not align with any annotated transcripts in
- the current build of the cynomolgus genome.
- In the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries, globin reads made up only 3.48% and reads assigned to all other
- genes increased to 50.4%.
- Thus,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- resulted in a 92.2% reduction in globin reads and a 91.6% increase in yield
- of useful non-globin reads.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- placement p
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Total Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Aligned Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 50.4% ± 6.82
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 3.48% ± 2.94
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 53.9% ± 6.81
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 89.7% ± 2.40
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 93.5% ± 5.25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 6.49% ± 5.25
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 26.3% ± 8.95
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 44.6% ± 16.6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 70.1% ± 9.38
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 90.7% ± 5.16
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 38.8% ± 17.1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 61.2% ± 17.1
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fractions of reads mapping to genomic features in GB and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Fractions-of-reads"
- \end_inset
- \series bold
- Fractions of reads mapping to genomic features in GB and non-GB samples.
-
- \series default
- All values are given as mean ± standard deviation.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- This reduction is not quite as efficient as the previous analysis showed
- for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- Nonetheless, this degree of globin reduction is sufficient to nearly double
- the yield of useful reads.
- Thus,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- cuts the required sequencing effort (and costs) to achieve a target coverage
- depth by almost 50%.
- Consistent with this near doubling of yield, the average difference in
- un-normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- across all genes between the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries and non-GB libraries is approximately 1 (mean = 1.01, median =
- 1.08), an overall 2-fold increase.
- Un-normalized values are used here because the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TMM
- \end_layout
- \end_inset
- normalization correctly identifies this 2-fold difference as biologically
- irrelevant and removes it.
- \end_layout
- \begin_layout Standard
- Another important aspect is that the standard deviations in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Fractions-of-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- are uniformly smaller in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples than the non-GB ones, indicating much greater consistency of yield.
- This is best seen in the percentage of non-globin reads as a fraction of
- total reads aligned to annotated genes (genic reads).
- For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples it ranges from 81.9% to 99.9% (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Fraction-of-genic-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure1-globin-fractions.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without GB.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Fraction-of-genic-reads"
- \end_inset
- \series bold
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without GB.
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- For each sample, counts were summed separately for all globin genes and
- for the remainder of the genes (non-globin genes), and the fraction of
- genic reads aligned to non-globin genes was computed.
- Each point represents an individual sample.
- Gray + signs indicate the means for globin-blocked libraries and unblocked
- libraries.
- The overall distribution for each group is represented as a notched box
- plot.
- Points are randomly spread vertically to avoid excessive overlapping.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Float lost issues
- \end_layout
- \end_inset
- ).
- This means that for applications where it is critical that each sample
- achieve a specified minimum coverage in order to provide useful information,
- it would be necessary to budget up to 10 times the sequencing depth per
- sample without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- , even though the average yield improvement for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- is only 2-fold, because every sample has a chance of being 90% globin and
- 10% useful reads.
- Hence, the more consistent behavior of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples makes planning an experiment easier and more efficient because
- it eliminates the need to over-sequence every sample in order to guard
- against the worst case of a high-globin fraction.
- \end_layout
- \begin_layout Subsection
- Globin blocking lowers the noise floor and allows detection of about 2000
- more low-expression genes
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Remove redundant titles from figures
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- yields more usable sequencing depth, it should also allow detection of
- more genes at any given threshold.
- When we looked at the distribution of average normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values across all libraries for genes with at least one read assigned to
- them, we observed the expected bimodal distribution, with a high-abundance
- "signal" peak representing detected genes and a low-abundance "noise" peak
- representing genes whose read count did not rise above the noise floor
- (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Consistent with the 2-fold increase in raw counts assigned to non-globin
- genes, the signal peak for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples is shifted to the right relative to the non-GB signal peak.
- When all the samples are normalized together, this difference is normalized
- out, lining up the signal peaks, and this reveals that, as expected, the
- noise floor for the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples is about 2-fold lower.
- This greater separation between signal and noise peaks in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples means that low-expression genes should be more easily detected
- and more precisely quantified than in the non-GB samples.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure2-aveLogCPM-colored.pdf
- lyxscale 50
- height 60theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Distributions of average group gene abundances when normalized separately
- or together.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:logcpm-dists"
- \end_inset
- \series bold
- Distributions of average group gene abundances when normalized separately
- or together.
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- Genes with zero counts in all libraries were discarded.
- Libraries were normalized using the TMM method.
- Libraries were split into GB and non-GB groups and the average logCPM was
- computed.
- The distribution of average gene logCPM values was plotted for both groups
- using a kernel density plot to approximate a continuous distribution.
- The GB logCPM distributions are marked in red, non-GB in blue.
- The black vertical line denotes the chosen detection threshold of
- \begin_inset Formula $-1$
- \end_inset
- .
- Top panel: Libraries were split into GB and non-GB groups first and normalized
- separately.
- Bottom panel: Libraries were all normalized together first and then split
- into groups.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Based on these distributions, we selected a detection threshold of
- \begin_inset Formula $-1$
- \end_inset
- , which is approximately the leftmost edge of the trough between the signal
- and noise peaks.
- This represents the most liberal possible detection threshold that doesn't
- call substantial numbers of noise genes as detected.
- Among the full dataset, 13429 genes were detected at this threshold, and
- 22276 were not.
- When considering the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries and non-GB libraries separately and re-computing normalization
- factors independently within each group, 14535 genes were detected in the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries while only 12460 were detected in the non-GB libraries.
- Thus,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- allowed the detection of 2000 extra genes that were buried under the noise
- floor without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- .
- This pattern of at least 2000 additional genes detected with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- was also consistent across a wide range of possible detection thresholds,
- from -2 to 3 (see Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Gene-detections"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure3-detection.pdf
- lyxscale 50
- width 70col%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Gene detections as a function of abundance thresholds in GB and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Gene-detections"
- \end_inset
- \series bold
- Gene detections as a function of abundance thresholds in GB and non-GB samples.
- \series default
- Average logCPM was computed by separate group normalization as described
- in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- for both the GB and non-GB groups, as well as for all samples considered
- as one large group.
- For each every integer threshold from
- \begin_inset Formula $-2$
- \end_inset
- to 3, the number of genes detected at or above that logCPM threshold was
- plotted for each group.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Globin blocking does not add significant additional noise or decrease sample
- quality
- \end_layout
- \begin_layout Standard
- One potential worry is that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol could perturb the levels of non-globin genes.
- There are two kinds of possible perturbations: systematic and random.
- The former is not a major concern for detection of differential expression,
- since a 2-fold change in every sample has no effect on the relative fold
- change between samples.
- In contrast, random perturbations would increase the noise and obscure
- the signal in the dataset, reducing the capacity to detect differential
- expression.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Standardize on
- \begin_inset Quotes eld
- \end_inset
- log2
- \begin_inset Quotes erd
- \end_inset
- notation
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The data do indeed show small systematic perturbations in gene levels (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-plot"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Other than the 3 designated alpha and beta globin genes, two other genes
- stand out as having especially large negative
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- : HBD and LOC1021365.
- HBD, delta globin, is most likely targeted by the blocking
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- due to high sequence homology with the other globin genes.
- LOC1021365 is the aforementioned
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ncRNA
- \end_layout
- \end_inset
- that is reverse-complementary to one of the alpha-like genes and that would
- be expected to be removed during the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- step.
- All other genes appear in a cluster centered vertically at 0, and the vast
- majority of genes in this cluster show an absolute
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- of 0.5 or less.
- Nevertheless, many of these small perturbations are still statistically
- significant, indicating that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- likely cause very small but non-zero systematic perturbations in measured
- gene expression levels.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure4-maplot-colored.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- MA plot showing effects of GB on each gene's abundance.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-plot"
- \end_inset
- \series bold
- MA plot showing effects of GB on each gene's abundance.
-
- \series default
- All libraries were normalized together as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and genes with an average logCPM below
- \begin_inset Formula $-1$
- \end_inset
- were filtered out.
- Each remaining gene was tested for differential abundance with respect
- to
- \begin_inset Flex Glossary Term (glstext)
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- ’s quasi-likelihood F-test, fitting a NB GLM to table of read counts in
- each library.
- For each gene,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- reported average logCPM, logFC, p-value, and BH-adjusted FDR.
- Each gene's logFC was plotted against its logCPM, colored by FDR.
- Red points are significant at
- \begin_inset Formula $≤10\%$
- \end_inset
- FDR, and blue are not significant at that threshold.
- The alpha and beta globin genes targeted for blocking are marked with large
- triangles, while all other genes are represented as small points.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Give these numbers the LaTeX math treatment
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the possibility of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- causing random perturbations and reducing sample quality, we computed the
- Pearson correlation between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values for every pair of samples with and without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and plotted them against each other (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:gene-abundance-correlations"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The plot indicated that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries have higher sample-to-sample correlations than the non-GB libraries.
- Parametric and nonparametric tests for differences between the correlations
- with and without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- both confirmed that this difference was highly significant (2-sided paired
- t-test:
- \begin_inset Formula $t=37.2$
- \end_inset
- ,
- \begin_inset Formula $d.f.=665$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ; 2-sided Wilcoxon sign-rank test:
- \begin_inset Formula $V=2195$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ).
- Performing the same tests on the Spearman correlations gave the same conclusion
- (t-test:
- \begin_inset Formula $t=26.8$
- \end_inset
- ,
- \begin_inset Formula $d.f.=665$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ; sign-rank test:
- \begin_inset Formula $V=8781$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ).
- The
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- package was used to compute the overall
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB libraries, and found that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- resulted in a negligible increase in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- (0.417 with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- vs.
- 0.400 without).
- The near equality of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- for both sets indicates that the higher correlations in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries are most likely a result of the increased yield of useful reads,
- which reduces the contribution of Poisson counting uncertainty to the overall
- variance of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCarthy2012"
- literal "false"
- \end_inset
- .
- This improves the precision of expression measurements and more than offsets
- the negligible increase in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure5-corrplot.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Comparison of inter-sample gene abundance correlations with and without
- GB.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:gene-abundance-correlations"
- \end_inset
- \series bold
- Comparison of inter-sample gene abundance correlations with and without
- GB.
- \series default
- All libraries were normalized together as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and genes with an average logCPM less than
- \begin_inset Formula $-1$
- \end_inset
- were filtered out.
- Each gene’s logCPM was computed in each library using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- 's
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- cpm
- \end_layout
- \end_inset
- function.
- For each pair of biological samples, the Pearson correlation between those
- samples' GB libraries was plotted against the correlation between the same
- samples’ non-GB libraries.
- Each point represents an unique pair of samples.
- The solid gray line shows a quantile-quantile plot of distribution of GB
- correlations vs.
- that of non-GB correlations.
- The thin dashed line is the identity line, provided for reference.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- More differentially expressed genes are detected with globin blocking
- \end_layout
- \begin_layout Standard
- To compare performance on differential gene expression tests, we took subsets
- of both the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB libraries with exactly one pre-transplant and one post-transplant
- sample for each animal that had paired samples available for analysis (
- \begin_inset Formula $N=7$
- \end_inset
- animals,
- \begin_inset Formula $N=14$
- \end_inset
- samples in each subset).
- The same test for pre- vs.
- post-transplant differential gene expression was performed on the same
- 7 pairs of samples from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries and non-GB libraries, in each case using an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- of 10% as the threshold of significance.
- Out of 12,954 genes that passed the detection threshold in both subsets,
- 358 were called significantly differentially expressed in the same direction
- in both sets; 1063 were differentially expressed in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- set only; 296 were differentially expressed in the non-GB set only; 2 genes
- were called significantly up in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- set but significantly down in the non-GB set; and the remaining 11,235
- were not called differentially expressed in either set.
- These data are summarized in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Comparison-of-significant"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The differences in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- calculated by
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- for these subsets of samples were negligible (
- \begin_inset Formula $\textrm{BCV}=0.302$
- \end_inset
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and 0.297 for non-GB).
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- No Globin Blocking
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Globin-Blocking
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 515
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 2
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 160
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 11235
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 136
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 548
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 127
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Comparison-of-significant"
- \end_inset
- \series bold
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \series default
- Up, Down: Genes significantly up/down-regulated in post-transplant samples
- relative to pre-transplant samples, with a false discovery rate of 10%
- or less.
- NS: Non-significant genes (false discovery rate greater than 10%).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The key point is that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- data results in substantially more differentially expressed calls than
- the non-GB data.
- Since there is no gold standard for this dataset, it is impossible to be
- certain whether this is due to under-calling of differential expression
- in the non-GB samples or over-calling in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples.
- However, given that both datasets are derived from the same biological
- samples and have nearly equal
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- , it is more likely that the larger number of differential expression calls
- in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples are genuine detections that were enabled by the higher sequencing
- depth and measurement precision of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples.
- Note that the same set of genes was considered in both subsets, so the
- larger number of differentially expressed gene calls in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- data set reflects a greater sensitivity to detect significant differential
- gene expression and not simply the larger total number of detected genes
- in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples described earlier.
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Standard
- The original experience with whole blood gene expression profiling on DNA
- microarrays demonstrated that the high concentration of globin transcripts
- reduced the sensitivity to detect genes with relatively low expression
- levels, in effect, significantly reducing the sensitivity.
- To address this limitation, commercial protocols for globin reduction were
- developed based on strategies to block globin transcript amplification
- during labeling or physically removing globin transcripts by affinity bead
- methods
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- More recently, using the latest generation of labeling protocols and arrays,
- it was determined that globin reduction was no longer necessary to obtain
- sufficient sensitivity to detect differential transcript expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "NuGEN2010"
- literal "false"
- \end_inset
- .
- However, we are not aware of any publications using these currently available
- protocols with the latest generation of microarrays that actually compare
- the detection sensitivity with and without globin reduction.
- However, in practice this has now been adopted generally primarily driven
- by concerns for cost control.
- The main objective of our work was to directly test the impact of globin
- gene transcripts and a new
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol for application to the newest generation of differential gene
- expression profiling determined using next generation sequencing.
-
- \end_layout
- \begin_layout Standard
- The challenge of doing global gene expression profiling in cynomolgus monkeys
- is that the current available arrays were never designed to comprehensively
- cover this genome and have not been updated since the first assemblies
- of the cynomolgus genome were published.
- Therefore, we determined that the best strategy for peripheral blood profiling
- was to perform deep
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- and inform the workflow using the latest available genome assembly and
- annotation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilson2013"
- literal "false"
- \end_inset
- .
- However, it was not immediately clear whether globin reduction was necessary
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- or how much improvement in efficiency or sensitivity to detect differential
- gene expression would be achieved for the added cost and effort.
-
- \end_layout
- \begin_layout Standard
- Existing strategies for globin reduction involve degradation or physical
- removal of globin transcripts in a separate step prior to reverse transcription
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012,Choi2014,Shin2014"
- literal "false"
- \end_inset
- .
- This additional step adds significant time, complexity, and cost to sample
- preparation.
- Faced with the need to perform
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- on large numbers of blood samples we sought a solution to globin reduction
- that could be achieved purely by adding additional reagents during the
- reverse transcription reaction.
- Furthermore, we needed a globin reduction method specific to cynomolgus
- globin sequences that would work an organism for which no kit is available
- off the shelf.
- \end_layout
- \begin_layout Standard
- As mentioned above, the addition of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- has a very small impact on measured expression levels of gene expression.
- However, this is a non-issue for the purposes of differential expression
- testing, since a systematic change in a gene in all samples does not affect
- relative expression levels between samples.
- However, we must acknowledge that simple comparisons of gene expression
- data obtained by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB protocols are not possible without additional normalization.
-
- \end_layout
- \begin_layout Standard
- More importantly,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- not only nearly doubles the yield of usable reads, it also increases inter-samp
- le correlation and sensitivity to detect differential gene expression relative
- to the same set of samples profiled without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- .
- In addition,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- does not add a significant amount of random noise to the data.
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- thus represents a cost-effective and low-effort way to squeeze more data
- and statistical power out of the same blood samples and the same amount
- of sequencing.
- In conclusion,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- greatly increases the yield of useful
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- reads mapping to the rest of the genome, with minimal perturbations in
- the relative levels of non-globin genes.
- Based on these results, globin transcript reduction using sequence-specific,
- complementary blocking
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- is recommended for all deep
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of cynomolgus and other nonhuman primate blood samples.
- \end_layout
- \begin_layout Section
- Future Directions
- \end_layout
- \begin_layout Standard
- One drawback of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- method presented in this analysis is a poor yield of genic reads, only
- around 50%.
- In a separate experiment, the reagent mixture was modified so as to address
- this drawback, resulting in a method that produces an even better reduction
- in globin reads without reducing the overall fraction of genic reads.
- However, the data showing this improvement consists of only a few test
- samples, so the larger data set analyzed above was chosen in order to demonstra
- te the effectiveness of the method in reducing globin reads while preserving
- the biological signal.
- \end_layout
- \begin_layout Standard
- The motivation for developing a fast practical way to enrich for non-globin
- reads in cyno blood samples was to enable a large-scale
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- experiment investigating the effects of mesenchymal stem cell infusion
- on blood gene expression in cynomologus transplant recipients in a time
- course after transplantation.
- With the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- method in place, the way is now clear for this experiment to proceed.
- \end_layout
- \begin_layout Chapter
- \begin_inset CommandInset label
- LatexCommand label
- name "chap:Conclusions"
- \end_inset
- Conclusions
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In this work, I have presented a wide range of applications for high-thoughput
- genomic and epigenomic assays based on sequencing and arrays in the context
- of immunology and transplant rejection.
- Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:CD4-ChIP-seq"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- described the use of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- to investigate the interplay between promoter histone marks and gene expression
- during activation of naive and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells.
- Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Improving-array-based-diagnostic"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- explored the use of expression microarrays and methylation arrays for diagnosin
- g transplant rejection.
- Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Globin-blocking-cyno"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- introduced a new
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- protocol for sequencing blood samples from cynomolgus monkeys designed
- to expedite gene expression profiling in serial blood samples from monkeys
- who received an experimental treatment for transplant rejection based on
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- .
- These applications range from basic science to translational medicine,
- but in all cases, high-thoughput genomic assays were central to the results.
-
- \end_layout
- \begin_layout Section
- Every high-throughput analysis presents unique analysis challenges
- \end_layout
- \begin_layout Standard
- In addition, each of these applications of high-throughput genomic assays
- presented unique analysis challenges that could not be solved simply by
- stringing together standard off-the-shelf methods into a straightforward
- analysis pipeline.
- In every case, a bespoke analysis workflow tailored to the data was required,
- and in no case was it possible to determine every step in the workflow
- fully prior to seeing the data.
- For example, exploratory data analysis of the CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data uncovered the batch effect, and the analysis was adjusted to compensate
- for it.
- Similarly, analysis of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data required choosing a
- \begin_inset Quotes eld
- \end_inset
- effective promoter radius
- \begin_inset Quotes erd
- \end_inset
- based on the data itself, and several different peak callers were tested
- before the correct choice became clear.
- In the development of custom
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- vectors, an appropriate batch size had to be chosen based on the properties
- of the training data.
- In the analysis of methylation array data, the appropriate analysis strategy
- was not obvious and was determined by trying several plausible strategies
- and inspecting the model paramters afterward to determine which strategy
- appeared to best capture the observed properties of the data and which
- strategies appeared to have systematic errors as a result of failing to
- capture those properties.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol went through several rounds of testing before satisfactory performance
- was achieved, and as mentioned, optimization of protocol has continued
- past the version described here.
- These are only a few examples out of many instances of analysis decisions
- motivated by the properties of the data.
- \end_layout
- \begin_layout Section
- Successful data analysis requires a toolbox, not a pipeline
- \end_layout
- \begin_layout Standard
- Multiple times throughout this work, I have attempted to construct standard,
- reusable, pipelines for analysis of specific kinds of data, such as
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- or
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- .
- Each time, the very next data set containing this data broke one or more
- of the assumptions I had built into the pipeline, such as an RNA-seq dataset
- where some samples aligned to the sense strand while others aligned to
- the antisense strand, or the discovery that the effective promoter radius
- varies by histone mark.
- Each violation of an assumption required a significant rewrite of the pipeline'
- s code in order to accommodate the new aspect of the data.
- The prospect of reusability turned out to be a pipe(line) dream.
- After several attempts to extend my pipelines to be general enough to handle
- an ever-increasing variety of data idiosyncrasies, I realized that it was
- actually
- \emph on
- less
- \emph default
- work to reimplement an analysis workflow from scratch each time rather
- than try to adapt an existing workflow that was originally designed for
- a different data set.
- \end_layout
- \begin_layout Standard
- Once I embraced the idea of writing a bespoke analysis workflow for every
- data set instead of a one-size-fits-all pipeline, I stopped thinking of
- the pipeline as the atomic unit of analysis.
- Instead, I focused on developing an understanding of the component parts
- of each pipeline, which problems each part solves, and what assumptions
- it makes, so that when I was presented with a new data set, I could quickly
- select the appropriate analysis methods for that data set and compose them
- into a new workflow to answer the demands of a new data set.
- In cases where no off-the-shelf method existed to address a specific aspect
- of the data, knowing about a wide range of analysis methods allowed me
- to select the one that was closest to what I needed and adapt it accordingly,
- even if it was not originally designed to handle the kind of data I was
- analyzing.
- For example, when analyzing heteroskedastic methylation array data, I adapted
- the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- voom
- \end_layout
- \end_inset
- method from
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- , which was originally designed to model heteroskedasticity in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- .
- While
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- voom
- \end_layout
- \end_inset
- was designed to accept read counts, I determined that this was not a fundamenta
- l assumption of the method but rather a limitation of the specific implementatio
- n, and I was able to craft a modified implementation that accepted
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- from methylation arrays.
- In contrast, adapting something like
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- for methylation arrays would not be possible, since many steps of the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- workflow, from normalization to dispersion estimation to model fitting,
- assume that the input is given on the scale of raw counts and take full
- advantage of this assumption
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010,Robinson2010a,McCarthy2012,Chen2014"
- literal "false"
- \end_inset
- .
- In short, I collected a
- \begin_inset Quotes eld
- \end_inset
- toolbox
- \begin_inset Quotes erd
- \end_inset
- full of useful modular analysis methods and developed the knowledge of
- when and where each could be applied, as well as how to compose them on
- demand into pipelines for specific data sets.
- This prepared me to handle the idiosyncrasies of any new data set, even
- when the new data has problems that I have not previously encountered in
- any other data set.
-
- \end_layout
- \begin_layout Standard
- Reusable pipelines have their place, but that place is in automating established
- processes, not researching new science.
- For example, the custom
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- vectors developed in Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Improving-array-based-diagnostic"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , are being incorporated into an automated pipeline for diagnosing transplant
- rejection using biopsy and blood samples from transplant recipients.
- Once ready, this diagnostic method will consist of normalization using
- the pre-trained
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- vectors, followed by classification of the sample by a pre-trained classifier,
- which outputs a posterior probability of acute rejection.
- This is a perfect use case for a proper pipeline: repeating the exact same
- sequence of analysis steps many times.
- The input to the pipeline is sufficiently well-controlled that we can guarantee
- it will satisfy the assumptions of the pipeline.
- But research data is not so well-controlled, so when analyzing data in
- a research context, the analysis must conform to the data, rather than
- trying to force the data to conform to a preferred analysis strategy.
- That means having a toolbox full of composable methods ready to respond
- to the observed properties of the data.
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- % Use "References" as the title of the Bibliography
- \end_layout
- \begin_layout Plain Layout
- \backslash
- renewcommand{
- \backslash
- bibname}{References}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset bibtex
- LatexCommand bibtex
- btprint "btPrintCited"
- bibfiles "code-refs,refs-PROCESSED"
- options "bibtotoc"
- \end_inset
- \end_layout
- \end_body
- \end_document
|