123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349 |
- #LyX 2.3 created this file. For more info see http://www.lyx.org/
- \lyxformat 544
- \begin_document
- \begin_header
- \save_transient_properties true
- \origin unavailable
- \textclass extbook
- \begin_preamble
- % List all used files in log output
- \listfiles
- %% Add TOC, List of Figures, etc. to TOC
- \usepackage{tocbibind}
- % Add a DRAFT watermark
- \usepackage{draftwatermark}
- \usepackage{accsupp}
- \SetWatermarkLightness{0.97}
- \SetWatermarkScale{1}
- % Make watermark not copyable (in Adobe Reader)
- \SetWatermarkText{\BeginAccSupp{method=escape,ActualText={}}DRAFT\EndAccSupp{}}
- % Set up required header format
- \usepackage{fancyhdr}
- \pagestyle{fancy}
- \renewcommand{\headrulewidth}{0pt}
- \rhead{}
- \lhead{}
- \chead{}
- \rfoot{}
- \lfoot{}
- % Make page number not copyable (in Adobe Reader)
- \cfoot{\BeginAccSupp{method=escape,ActualText={}}\thepage\EndAccSupp{}} % Page number bottom center
- % Allow FloatBarrier command
- \usepackage{placeins}
- % Allow landscape pages
- \usepackage{pdflscape}
- % Allow doing things after the end of the current page
- % (to avoid landscape figures breaking up text)
- \usepackage{afterpage}
- % Consider: force floats after placement in text
- % https://tex.stackexchange.com/questions/15706/force-floats-to-be-typeset-after-their-occurrence-in-the-source-text
- % This one breaks subfigs so it's disabled
- % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
- \usepackage[automake=immediate,nonumberlist,nohypertypes={abbreviation}]{glossaries-extra}
- \setabbreviationstyle{long-short}
- \loadglsentries{abbrevs.tex}
- \makeglossaries
- % arara: xelatex
- % arara: biber
- % arara: makeglossaries
- % arara: xelatex
- \end_preamble
- \use_default_options true
- \begin_modules
- todonotes
- logicalmkup
- \end_modules
- \maintain_unincluded_children false
- \begin_local_layout
- Format 66
- InsetLayout "Flex:Glossary Term"
- LyxType custom
- LabelString gls
- LatexType command
- LatexName gls*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Capital)"
- LyxType custom
- LabelString Gls
- LatexType command
- LatexName Gls*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (pl)"
- LyxType custom
- LabelString glspl
- LatexType command
- LatexName glspl*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Capital, pl)"
- LyxType custom
- LabelString Glspl
- LatexType command
- LatexName Glspl*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (glstext)"
- LyxType custom
- LabelString glstext
- LatexType command
- LatexName glstext*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Glstext)"
- LyxType custom
- LabelString Glstext
- LatexType command
- LatexName Glstext*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (glsfirst)"
- LyxType custom
- LabelString glsfirst
- LatexType command
- LatexName glsfirst*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Glsfirst)"
- LyxType custom
- LabelString Glsfirst
- LatexType command
- LatexName Glsfirst*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (glsdesc)"
- LyxType custom
- LabelString glsdesc
- LatexType command
- LatexName glsdesc*
- InToc true
- CustomPars false
- End
- InsetLayout "Flex:Glossary Term (Glsdesc)"
- LyxType custom
- LabelString Glsdesc
- LatexType command
- LatexName Glsdesc*
- InToc true
- CustomPars false
- End
- \end_local_layout
- \language english
- \language_package default
- \inputencoding utf8
- \fontencoding default
- \font_roman "default" "default"
- \font_sans "default" "default"
- \font_typewriter "default" "default"
- \font_math "auto" "auto"
- \font_default_family default
- \use_non_tex_fonts false
- \font_sc false
- \font_osf false
- \font_sf_scale 100 100
- \font_tt_scale 100 100
- \use_microtype false
- \use_dash_ligatures true
- \graphics default
- \default_output_format pdf4
- \output_sync 0
- \bibtex_command biber
- \index_command default
- \paperfontsize 12
- \spacing double
- \use_hyperref true
- \pdf_author "Ryan C. Thompson"
- \pdf_bookmarks true
- \pdf_bookmarksnumbered true
- \pdf_bookmarksopen true
- \pdf_bookmarksopenlevel 1
- \pdf_breaklinks true
- \pdf_pdfborder true
- \pdf_colorlinks false
- \pdf_backref false
- \pdf_pdfusetitle true
- \papersize letterpaper
- \use_geometry true
- \use_package amsmath 1
- \use_package amssymb 1
- \use_package cancel 1
- \use_package esint 1
- \use_package mathdots 1
- \use_package mathtools 1
- \use_package mhchem 1
- \use_package stackrel 1
- \use_package stmaryrd 1
- \use_package undertilde 1
- \cite_engine biblatex
- \cite_engine_type numerical
- \biblio_style plain
- \biblio_options sorting=none
- \biblatex_bibstyle numeric
- \biblatex_citestyle numeric
- \use_bibtopic false
- \use_indices false
- \paperorientation portrait
- \suppress_date false
- \justification true
- \use_refstyle 1
- \use_minted 0
- \index Index
- \shortcut idx
- \color #008000
- \end_index
- \leftmargin 1.5in
- \topmargin 1in
- \rightmargin 1in
- \bottommargin 1in
- \secnumdepth 3
- \tocdepth 3
- \paragraph_separation indent
- \paragraph_indentation default
- \is_math_indent 0
- \math_numbering_side default
- \quotes_style english
- \dynamic_quotes 0
- \papercolumns 1
- \papersides 1
- \paperpagestyle default
- \tracking_changes false
- \output_changes false
- \html_math_output 0
- \html_css_as_file 0
- \html_be_strict false
- \end_header
- \begin_body
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- pdfbookmark{Title page}{title}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Title
- Bioinformatic analysis of complex, high-throughput genomic and epigenomic
- data in the context of immunology and transplant rejection
- \end_layout
- \begin_layout Author
- A thesis presented
- \begin_inset Newline newline
- \end_inset
- by
- \begin_inset Newline newline
- \end_inset
- Ryan C.
- Thompson
- \begin_inset Newline newline
- \end_inset
- to
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute Graduate Program
- \begin_inset Newline newline
- \end_inset
- in partial fulfillment of the requirements for the degree of
- \begin_inset Newline newline
- \end_inset
- Doctor of Philosophy in the subject of Biology
- \begin_inset Newline newline
- \end_inset
- for
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute
- \begin_inset Newline newline
- \end_inset
- La Jolla, California
- \end_layout
- \begin_layout Date
- October 2019
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- To remove TODOs and watermark: Add
- \begin_inset Quotes eld
- \end_inset
- final
- \begin_inset Quotes erd
- \end_inset
- to the document class custom options.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- frontmatter
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Use roman numeral page numbers
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Copyright notice}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- © 2019 by Ryan C.
- Thompson
- \end_layout
- \begin_layout Standard
- \align center
- All rights reserved.
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{2}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Thesis acceptance form}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- [Thesis acceptance form]
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Dedication}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- [Dedication]
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- vspace*{
- \backslash
- stretch{2}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- phantomsection
- \end_layout
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Acknowledgements}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section*
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- hspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- Acknowledgements
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- hspace*{
- \backslash
- stretch{1}}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- [Acknowledgements]
- \end_layout
- \begin_layout Standard
- \begin_inset Newpage newpage
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset toc
- LatexCommand tableofcontents
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList table
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList figure
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- To create a new abbreviation:
- \end_layout
- \begin_layout Enumerate
- Add an entry to abbrevs.tex
- \end_layout
- \begin_layout Enumerate
- Wrap every occurrence of the term in Insert -> Custom Insets -> Glossary
- Term (use appropriate variants for caiptal, plural, etc.), using Edit ->
- Find & Replace (Advanced).
- Skip section headers and float captions.
- \end_layout
- \begin_layout Plain Layout
- \begin_inset CommandInset href
- LatexCommand href
- target "https://ctan.org/pkg/glossaries?lang=en"
- literal "false"
- \end_inset
- \begin_inset CommandInset href
- LatexCommand href
- target "https://ctan.org/pkg/glossaries-extra"
- literal "false"
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- renewcommand*{
- \backslash
- glossaryname}{List of Abbreviations}%
- \end_layout
- \begin_layout Plain Layout
- \backslash
- printglossaries
- \end_layout
- \end_inset
- \end_layout
- \begin_layout List of TODOs
- \end_layout
- \begin_layout Chapter*
- Abstract
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- It is included as an integral part of the thesis and should immediately
- precede the introduction.
- \end_layout
- \begin_layout Plain Layout
- Preparing your Abstract.
- Your abstract (a succinct description of your work) is limited to 350 words.
- UMI will shorten it if they must; please do not exceed the limit.
- \end_layout
- \begin_layout Itemize
- Include pertinent place names, names of persons (in full), and other proper
- nouns.
- These are useful in automated retrieval.
- \end_layout
- \begin_layout Itemize
- Display symbols, as well as foreign words and phrases, clearly and accurately.
- Include transliterations for characters other than Roman and Greek letters
- and Arabic numerals.
- Include accents and diacritical marks.
- \end_layout
- \begin_layout Itemize
- Do not include graphs, charts, tables, or illustrations in your abstract.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Obviously the abstract gets written last.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Chapter*
- Notes to draft readers
- \end_layout
- \begin_layout Plain Layout
- Thank you so much for agreeing to read my thesis and give me feedback on
- it.
- What you are currently reading is a rough draft, in need of many revisions.
- You can always find the latest version at
- \begin_inset CommandInset href
- LatexCommand href
- target "https://mneme.dedyn.io/~ryan/Thesis/thesis.pdf"
- literal "false"
- \end_inset
- .
- the PDF at this link is updated periodically with my latest revisions,
- but you can just download the current version and give me feedback on that.
- Don't worry about keeping up with the updates.
- \end_layout
- \begin_layout Plain Layout
- As for what feedback I'm looking for, first of all, don't waste your time
- marking spelling mistakes and such.
- I haven't run a spell checker on it yet, so let me worry about that.
- Also, I'm aware that many abbreviations are not properly introduced the
- first time they are used, so don't worry about that either.
- However, if you see any glaring formatting issues, such as a figure being
- too large and getting cut off at the edge of the page, please note them.
- In addition, if any of the text in the figures is too small, please note
- that as well.
- \end_layout
- \begin_layout Plain Layout
- Beyond that, what I'm mainly interested in is feedback on the content.
- For example: does the introduction flow logically, and does it provide
- enough background to understand the other chapters? Does each chapter make
- it clear what work and analyses I have done? Do the figures clearly communicate
- the results I'm trying to show? Do you feel that the claims in the results
- and discussion sections are well-supported? There's no need to suggest
- improvements; just note areas that you feel need improvement.
- Additionally, if you notice any un-cited claims in any chapter, please
- flag them for my attention.
- Similarly, if you discover any factual errors, please note them as well.
- \end_layout
- \begin_layout Plain Layout
- You can provide your feedback in whatever way is most convenient to you.
- You could mark up this PDF with highlights and notes, then send it back
- to me.
- Or you could collect your comments in a separate text file and send that
- to me, or whatever else you like.
- However, if you send me your feedback in a separate document, please note
- a section/figure/table number for each comment, and
- \emph on
- also
- \emph default
- send me the exact PDF that you read so I can reference it while reading
- your comments, since as mentioned above, the current version I'm working
- on will have changed by that point (which might include shuffling sections
- and figures around, changing their numbers).
- One last thing: you'll see a bunch of text in orange boxes throughout the
- PDF.
- These are notes to myself about things that need to be fixed later, so
- if you see a problem noted in an orange box, that means I'm already aware
- of it, and there's no need to comment on it.
- \end_layout
- \begin_layout Plain Layout
- My thesis is due Thursday, October 10th, so in order to be useful to me,
- I'll need your feedback at least several days before that, ideally by Monday,
- October 7th.
- If you have limited time and are unable to get through the whole thesis,
- please focus your efforts on Chapters 1 and 2, since those are the roughest
- and most in need of revision.
- Chapter 3 is fairly short and straightforward, and Chapter 4 is an adaptation
- of a paper that's already been through a few rounds of revision, so they
- should be a lot tighter.
- If you can't spare any time between now and then, or if something unexpected
- comes up, I understand.
- Just let me know.
- \end_layout
- \begin_layout Plain Layout
- Thanks again for your help, and happy reading!
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- mainmatter
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Switch from roman numerals to arabic for page numbers.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Chapter
- Introduction
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- \begin_inset CommandInset label
- LatexCommand label
- name "sec:Biological-motivation"
- \end_inset
- Biological motivation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Find some figures to include even if permission is not obtained.
- Try to obtain permission, and if it cannot be obtained, remove/replace
- them later.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Rethink the subsection organization after the intro is written.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Rejection is the major long-term threat to organ and tissue allografts
- \end_layout
- \begin_layout Standard
- Organ and tissue transplants are a life-saving treatment for people who
- have lost the function of an important organ.
- In some cases, it is possible to transplant a patient's own tissue from
- one area of their body to another, referred to as an autograft.
- This is common for tissues that are distributed throughout many areas of
- the body, such as skin and bone.
- However, in cases of organ failure, there is no functional self tissue
- remaining, and a transplant from another person – a donor – is required.
- This is referred to as an allograft
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Valenzuela2017"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- How much mechanistic detail is needed here? My work doesn't really go into
- specific rejection mechanisms, so I think it's best to keep it basic.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Because an allograft comes from a donor of the same species who is genetically
- distinct from the recipient (with rare exceptions), genetic variants in
- protein-coding regions affect the polypeptide sequences encoded by the
- affected genes, resulting in protein products in the allograft that differ
- from the equivalent proteins produced by the graft recipient's own tissue.
- As a result, without intervention, the recipient's immune system will eventuall
- y identify the graft as foreign tissue and begin attacking it.
- This is called an alloimmune response, and if left unchecked, it eventually
- results in failure and death of the graft, a process referred to as transplant
- rejection
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- Rejection is the primary obstacle to long-term health and survival of an
- allograft
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Valenzuela2017"
- literal "false"
- \end_inset
- .
- Like any adaptive immune response, an alloimmune response generally occurs
- via two broad mechanisms: cellular immunity, in which CD8
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells recognizing graft-specific antigens induce apoptosis in the graft
- cells; and humoral immunity, in which B-cells produce antibodies that bind
- to graft proteins and direct an immune response against the graft
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- In either case, alloimmunity and rejection show most of the typical hallmarks
- of an adaptive immune response, in particular mediation by CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells and formation of immune memory.
-
- \end_layout
- \begin_layout Subsection
- Diagnosis and treatment of allograft rejection is a major challenge
- \end_layout
- \begin_layout Standard
- To prevent rejection, allograft recipients are treated with immune suppressive
- drugs
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kowalski2003,Murphy2012"
- literal "false"
- \end_inset
- .
- The goal is to achieve sufficient suppression of the immune system to prevent
- rejection of the graft without compromising the ability of the immune system
- to raise a normal response against infection.
- As such, a delicate balance must be struck: insufficient immune suppression
- may lead to rejection and ultimately loss of the graft; excessive suppression
- leaves the patient vulnerable to life-threatening opportunistic infections
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- Because every patient's matabolism is different, achieving this delicate
- balance requires drug dosage to be tailored for each patient.
- Furthermore, dosage must be tuned over time, as the immune system's activity
- varies over time and in response to external stimuli with no fixed pattern.
- In order to properly adjust the dosage of immune suppression drugs, it
- is necessary to monitor the health of the transplant and increase the dosage
- if evidence of rejection or alloimmune activity is observed.
- \end_layout
- \begin_layout Standard
- However, diagnosis of rejection is a significant challenge.
- Early diagnosis is essential in order to step up immune suppression before
- the immune system damages the graft beyond recovery
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Israeli2007"
- literal "false"
- \end_inset
- .
- The current gold standard test for graft rejection is a tissue biopsy,
- examined for visible signs of rejection by a trained histologist
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "false"
- \end_inset
- .
- When a patient shows symptoms of possible rejection, a
- \begin_inset Quotes eld
- \end_inset
- for cause
- \begin_inset Quotes erd
- \end_inset
- biopsy is performed to confirm the diagnosis, and immune suppression is
- adjusted as necessary.
- However, in many cases, the early stages of rejection are asymptomatic,
- known as
- \begin_inset Quotes eld
- \end_inset
- sub-clinical
- \begin_inset Quotes erd
- \end_inset
- rejection.
- In light of this, is is now common to perform
- \begin_inset Quotes eld
- \end_inset
- protocol biopsies
- \begin_inset Quotes erd
- \end_inset
- at specific times after transplantation of a graft, even if no symptoms
- of rejection are apparent, in addition to
- \begin_inset Quotes eld
- \end_inset
- for cause
- \begin_inset Quotes erd
- \end_inset
- biopsies
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Salomon2002,Wilkinson2006,Patel2018,Zachariah2018"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- However, biopsies have a number of downsides that limit their effectiveness
- as a diagnostic tool.
- First, the need for manual inspection by a histologist means that diagnosis
- is subject to the biases of the particular histologist examining the biopsy
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "false"
- \end_inset
- .
- In marginal cases, two different histologists may give two different diagnoses
- to the same biopsy.
- Second, a biopsy can only evaluate if rejection is occurring in the section
- of the graft from which the tissue was extracted.
- If rejection is localized to one section of the graft and the tissue is
- extracted from a different section, a false negative diagnosis may result.
- Most importantly, extraction of tissue from a graft is invasive and is
- treated as an injury by the body, which results in inflammation that in
- turn promotes increased immune system activity.
- Hence, the invasiveness of biopsies severely limits the frequency with
- which they can safely be performed
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Patel2018"
- literal "false"
- \end_inset
- .
- Typically, protocol biopsies are not scheduled more than about once per
- month
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilkinson2006"
- literal "false"
- \end_inset
- .
- A less invasive diagnostic test for rejection would bring manifold benefits.
- Such a test would enable more frequent testing and therefore earlier detection
- of rejection events.
- In addition, having a larger pool of historical data for a given patient
- would make it easier to evaluate when a given test is outside the normal
- parameters for that specific patient, rather than relying on normal ranges
- for the population as a whole.
- Lastly, the accumulated data from more frequent tests would be a boon to
- the transplant research community.
- Beyond simply providing more data overall, the better time granularity
- of the tests will enable studying the progression of a rejection event
- on the scale of days to weeks, rather than months.
- \end_layout
- \begin_layout Subsection
- Memory cells are resistant to immune suppression
- \end_layout
- \begin_layout Standard
- One of the defining features of the adaptive immune system is immune memory:
- the ability of the immune system to recognize a previously encountered
- foreign antigen and respond more quickly and more strongly to that antigen
- in subsequent encounters
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- When the immune system first encounters a new antigen, the T-cells that
- respond are known as naïve cells – T-cells that have never detected their
- target antigens before.
- Once activated by their specific antigen presented by an antigen-presenting
- cell in the proper co-stimulatory context, naïve cells differentiate into
- effector cells that carry out their respective functions in targeting and
- destroying the source of the foreign antigen.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TCR
- \end_layout
- \end_inset
- is cell-surface protein complex produced by T-cells that is responsible
- for recognizing the T-cell's specific antigen, presented on a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MHC
- \end_layout
- \end_inset
- , the cell-surface protein complex used by an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- APC
- \end_layout
- \end_inset
- to present antigens to the T-cell.
- However, a naïve T-cell that recognizes its antigen also requires a co-stimulat
- ory signal, delivered through other interactions between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- APC
- \end_layout
- \end_inset
- surface proteins and T-cell surface proteins such as CD28.
- Without proper co-stimulation, a T-cell that recognizes its antigen either
- dies or enters an unresponsive state known as anergy, in which the T-cell
- becomes much more resistant to subsequent activation even with proper co-stimul
- ation.
- The dependency of activation on co-stimulation is an important feature
- of naïve lymphocytes that limits
- \begin_inset Quotes eld
- \end_inset
- false positive
- \begin_inset Quotes erd
- \end_inset
- immune responses against self antigens, because
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- APC
- \end_layout
- \end_inset
- usually only express the proper co-stimulation after the innate immune
- system detects signs of an active infection, such as the presence of common
- bacterial cell components or inflamed tissue.
-
- \end_layout
- \begin_layout Standard
- After the foreign antigen is cleared, most effector cells die since they
- are no longer needed, but some differentiate into memory cells and remain
- alive indefinitely.
- Like naïve cells, memory cells respond to detection of their specific antigen
- by differentiating into effector cells, ready to fight an infection
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- However, the memory response to antigen is qualitatively different: memory
- cells are more sensitive to detection of their antigen, and a lower concentrati
- on of antigen is suffiicient to activate them
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Rogers2000,London2000,Berard2002"
- literal "false"
- \end_inset
- .
- In addition, memory cells are much less dependent on co-stimulation for
- activation: they can activate without certain co-stimulatory signals that
- are required by naïve cells, and the signals they do require are only required
- at lower levels in order to cause activation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "London2000"
- literal "false"
- \end_inset
- .
- Furthermore, mechanisms that induce tolerance (non-response to antigen)
- in naïve cells are much less effective on memory cells
- \begin_inset CommandInset citation
- LatexCommand cite
- key "London2000"
- literal "false"
- \end_inset
- .
- Lastly, once activated, memory cells proliferate and differentiate into
- effector cells more quickly than naïve cells do
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Berard2002"
- literal "false"
- \end_inset
- .
- In combination, these changes in lymphocyte behavior upon differentiation
- into memory cells account for the much quicker and stronger response of
- the immune system to subsequent exposure to a previously-encountered antigen.
- \end_layout
- \begin_layout Standard
- In the context of a pathogenic infection, immune memory is a major advantage,
- allowing an organism to rapidly fight off a previously encountered pathogen
- much more quickly and effectively than the first time it was encountered
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- However, if effector cells that recognize an antigen from an allograft
- are allowed to differentiate into memory cells, preventing rejection of
- the graft becomes much more difficult.
- Many immune suppression drugs work by interfering with the co-stimulation
- that naïve cells require in order to mount an immune response.
- Since memory cells do not require the same degree of co-stimulation, these
- drugs are not effective at suppressing an immune response that is mediated
- by memory cells.
- Secondly, because memory cells are able to mount a stronger and faster
- response to an antigen, all else being equal stronger immune suppression
- is required to prevent an immune response mediated by memory cells.
- \end_layout
- \begin_layout Standard
- However, immune suppression affects the entire immune system, not just cells
- recognizing a specific antigen, so increasing the dosage of immune suppression
- drugs also increases the risk of complications from a compromised immune
- system, such as opportunistic infections
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- While the differences in cell surface markers between naïve and memory
- cells have been fairly well characterized, the internal regulatory mechanisms
- that allow memory cells to respond more quickly and without co-stimulation
- are still poorly understood.
- In order to develop methods of immune suppression that either prevent the
- formation of memory cells or work more effectively against memory cells,
- a more complete understanding of the mechanisms of immune memory formation
- and regulation is required.
- \end_layout
- \begin_layout Subsection
- Infusion of allogenic mesenchymal stem cells modulates the alloimmune response
- \end_layout
- \begin_layout Standard
- One promising experimental treatment for transplant rejection involves the
- infusion of allogenic
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- have been shown to have immune modulatory effects, both in general and
- specifically in the case of immune responses against allografts
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LeBlanc2003,Aggarwal2005,Bartholomew2009,Berman2010"
- literal "false"
- \end_inset
- .
- Furthermore, allogenic
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- themselves are immune-evasive and are rejected by the recipient's immune
- system more slowly than most allogenic tissues
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ankrum2014,Berglund2017"
- literal "false"
- \end_inset
- .
- In addition, treating
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- in culture with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IFNg
- \end_layout
- \end_inset
- is shown to enhance their immunosuppressive properties and homogenize their
- cellulat phenotype, making them more amenable to development into a well-contro
- lled treatment
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Majumdar2003,Ryan2007"
- literal "false"
- \end_inset
- .
- The mechanisms by which
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- modulate the immune system are still poorly understood.
- Despite this, there is signifcant interest in using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IFNg
- \end_layout
- \end_inset
- -activated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- infusion as a supplementary immune suppressive treatment for allograft
- transplantation.
-
- \end_layout
- \begin_layout Standard
- Note that despite the name, none of the above properties of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- are believed to involve their ability as stem cells to differentiate into
- multiple different mature cell types, but rather the intercellular signals
- they produce
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ankrum2014"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Section
- \begin_inset CommandInset label
- LatexCommand label
- name "sec:Overview-of-bioinformatic"
- \end_inset
- Overview of bioinformatic analysis methods
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Also cite somewhere: R, Bioconductor
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Powerful methods for assaying gene expression and epigenetics across entire
- genomes
- \end_layout
- \begin_layout Itemize
- Proper analysis requires finding and exploiting systematic genome-wide trends
- \end_layout
- \begin_layout Standard
- The studies presented in this work all involve the analysis of high-throughput
- genomic and epigenomic assay data.
- These data present many unique analysis challenges, and a wide array of
- software tools are available to analyze them.
- This section presents an overview of the most important methods and tools
- used throughout the following analyses, including what problems they solve,
- what assumptions they make, and a basic description of how they work.
- \end_layout
- \begin_layout Subsection
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Limma
- \end_layout
- \end_inset
- : The standard linear modeling framework for genomics
- \end_layout
- \begin_layout Standard
- Linear models are a generalization of the
- \begin_inset Formula $t$
- \end_inset
- -test and ANOVA to arbitrarily complex experimental designs
- \begin_inset CommandInset citation
- LatexCommand cite
- key "chambers:1992"
- literal "false"
- \end_inset
- .
- In a typical linear model, there is one dependent variable observation
- per sample and a large number of samples.
- For example, in a linear model of height as a function of age and sex,
- there is one height measurement per person.
- However, when analyzing genomic data, each sample consists of observations
- of thousands of dependent variables.
- For example, in a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- experiment, the dependent variables may be the count of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- reads for each annotated gene, and there are tens of thousands of genes
- in the human genome.
- Since many assays measure other things than gene expression, the abstract
- term
- \begin_inset Quotes eld
- \end_inset
- feature
- \begin_inset Quotes erd
- \end_inset
- is used to refer to each dependent variable being measured, which may include
- any genomic element, such as genes, promoters, peaks, enhancers, exons,
- etc.
-
- \end_layout
- \begin_layout Standard
- The simplest approach to analyzing such data would be to fit the same model
- independently to each feature.
- However, this is undesirable for most genomics data sets.
- Genomics assays like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- HTS
- \end_layout
- \end_inset
- are expensive, and often the process of generating the samples is also
- quite expensive and time-consuming.
- This expense limits the sample sizes typically employed in genomics experiments
- , so a typical genomic data set has far more features being measured than
- observations (samples) per feature.
- As a result, the statistical power of the linear model for each individual
- feature is likewise limited by the small number of samples.
- However, because thousands of features from the same set of samples are
- analyzed together, there is an opportunity to improve the statistical power
- of the analysis by exploiting shared patterns of variation across features.
- This is the core feature of
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- , a linear modeling framework designed for genomic data.
-
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Limma
- \end_layout
- \end_inset
- is typically used to analyze expression microarray data, and more recently
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, but it can also be used to analyze any other data for which linear
- modeling is appropriate.
- \end_layout
- \begin_layout Standard
- The central challenge when fitting a linear model is to estimate the variance
- of the data accurately.
- Out of all parameters required to evaluate statistical significance of
- an effect, the variance is the most difficult to estimate when sample sizes
- are small.
- A single shared variance could be estimated for all of the features together,
- and this estimate would be very stable, in contrast to the individual feature
- variance estimates.
- However, this would require the assumption that all features have equal
- variance, which is known to be false for most genomic data sets (for example,
- some genes' expression is known to be more variable than others').
-
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Limma
- \end_layout
- \end_inset
- offers a compromise between these two extremes by using a method called
- empirical Bayes moderation to
- \begin_inset Quotes eld
- \end_inset
- squeeze
- \begin_inset Quotes erd
- \end_inset
- the distribution of estimated variances toward a single common value that
- represents the variance of an average feature in the data (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ebayes-example"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- )
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2004"
- literal "false"
- \end_inset
- .
- While the individual feature variance estimates are not stable, the common
- variance estimate for the entire data set is quite stable, so using a combinati
- on of the two yields a variance estimate for each feature with greater precision
- than the individual feature variances.
- The trade-off for this improvement is that squeezing each estimated variance
- toward the common value introduces some bias – the variance will be underestima
- ted for features with high variance and overestimated for features with
- low variance.
- Essentially,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- assumes that extreme variances are less common than variances close to
- the common value.
- The squeezed variance estimates from this empirical Bayes procedure are
- shown empirically to yield greater statistical power than either the individual
- feature variances or the single common value.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Intro/eBayes-CROP-RASTER.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example of empirical Bayes squeezing of per-gene variances.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ebayes-example"
- \end_inset
- \series bold
- Example of empirical Bayes squeezing of per-gene variances.
- \series default
- A smooth trend line (red) is fitted to the individual gene variances (light
- blue) as a function of average gene abundance (logCPM).
- Then the individual gene variances are
- \begin_inset Quotes eld
- \end_inset
- squeezed
- \begin_inset Quotes erd
- \end_inset
- toward the trend (dark blue).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- On top of this core framework,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- also implements many other enhancements that, further relax the assumptions
- of the model and extend the scope of what kinds of data it can analyze.
- Instead of squeezing toward a single common variance value,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- can model the common variance as a function of a covariate, such as average
- expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- .
- This is essential for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, where higher gene counts yield more precise expression measurements
- and therefore smaller variances than low-count genes.
- While linear models typically assume that all samples have equal variance,
-
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- is able to relax this assumption by identifying and down-weighting samples
- that diverge more strongly from the linear model across many features
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006,Liu2015"
- literal "false"
- \end_inset
- .
- In addition,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- is also able to fit simple mixed models incorporating one random effect
- in addition to the fixed effects represented by an ordinary linear model
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- .
- Once again,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- shares information between features to obtain a robust estimate for the
- random effect correlation.
- \end_layout
- \begin_layout Subsection
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- provides
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- -like analysis features for read count data
- \end_layout
- \begin_layout Standard
- Although
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- can be applied to read counts from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, it is less suitable for counts from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- and other sources, which tend to be much smaller and therefore violate
- the assumption of a normal distribution more severely.
- For all count-based data, the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- package works similarly to
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- , but uses a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- instead of a linear model.
- Relative to a linear model, a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- gains flexibility by relaxing several assumptions, the most important of
- which is the assumption of normally distributed errors.
- This allows the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- in
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- to model the counts directly using a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution rather than modeling the normalized log counts using a normal
- distribution as
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- does
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Chen2014,McCarthy2012,Robinson2010a"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution is a good fit for count data because it can be derived as
- a gamma-distributed mixture of Poisson distributions.
- The reads in an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- sample are assumed to be sampled from a much larger population, such that
- the sampling process does not significantly affect the proportions.
- Under this assumption, a gene's read count in an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- sample is distributed as
- \begin_inset Formula $\mathrm{Binomial}(n,p)$
- \end_inset
- , where
- \begin_inset Formula $n$
- \end_inset
- is the total number of reads sequenced from the sample and
- \begin_inset Formula $p$
- \end_inset
- is the proportion of total fragments in the sample derived from that gene.
- When
- \begin_inset Formula $n$
- \end_inset
- is large and
- \begin_inset Formula $p$
- \end_inset
- is small, a
- \begin_inset Formula $\mathrm{Binomial}(n,p)$
- \end_inset
- distribution is well-approximated by
- \begin_inset Formula $\mathrm{Poisson}(np)$
- \end_inset
- .
- Hence, if multiple sequencing runs are performed on the same
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- sample (with the same gene mixing proportions each time), each gene's read
- count is expected to follow a Poisson distribution.
- If the abundance of a gene,
- \begin_inset Formula $p,$
- \end_inset
- varies across biological replicates according to a gamma distribution,
- and
- \begin_inset Formula $n$
- \end_inset
- is held constant, then the result is a gamma-distributed mixture of Poisson
- distributions, which is equivalent to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution.
- The assumption of a gamma distribution for the mixing weights is arbitrary,
- motivated by the convenience of the numerically tractable
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- distribution and the need to select
- \emph on
- some
- \emph default
- distribution, since the true shape of the distribution of biological variance
- is unknown.
- \end_layout
- \begin_layout Standard
- Thus,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- 's use of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- is equivalent to an
- \emph on
- a priori
- \emph default
- assumption that the variation in gene abundances between replicates follows
- a gamma distribution.
- The gamma shape parameter in the context of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- is called the dispersion, and the square root of this dispersion is referred
- to as the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- , since it represents the variability in abundance that was present in the
- biological samples prior to the Poisson
- \begin_inset Quotes eld
- \end_inset
- noise
- \begin_inset Quotes erd
- \end_inset
- that was generated by the random sampling of reads in proportion to feature
- abundances.
- Like
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- estimates the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- for each feature using an empirical Bayes procedure that represents a compromis
- e between per-feature dispersions and a single pooled dispersion estimate
- shared across all features.
- For differential abundance testing,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- offers a likelihood ratio test based on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- .
- However, this test assumes the dispersion parameter is known exactly rather
- than estimated from the data, which can result in overstating the significance
- of differential abundance results.
- More recently, a quasi-likelihood test has been introduced that properly
- factors the uncertainty in dispersion estimation into the estimates of
- statistical significance, and this test is recommended over the likelihood
- ratio test in most cases
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lund2012"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Calling consensus peaks from ChIP-seq data
- \end_layout
- \begin_layout Standard
- Unlike
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, in which gene annotations provide a well-defined set of discrete
- genomic regions in which to count reads,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads can potentially occur anywhere in the genome.
- However, most genome regions will not contain significant
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage, and analyzing every position in the entire genome is statistical
- ly and computationally infeasible, so it is necessary to identify regions
- of interest inside which
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads will be counted and analyzed.
- One option is to define a set of interesting regions
- \emph on
- a priori
- \emph default
- , for example by defining a promoter region for each annotated gene.
- However, it is also possible to use the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data itself to identify regions with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage significantly above the background level, known as peaks.
-
- \end_layout
- \begin_layout Standard
- The challenge in peak calling is that the immunoprecipitation step is not
- 100% selective, so some fraction of reads are
- \emph on
- not
- \emph default
- derived from DNA fragments that were bound by the immunoprecipitated protein.
- These are referred to as background reads.
- Biases in amplification and sequencing, as well as the aforementioned Poisson
- randomness of the sequencing itself, can cause fluctuations in the background
- level of reads that resemble peaks, and the true peaks must be distinguished
- from these.
- It is common to sequence the input DNA to the ChIP-seq reaction alongside
- the immunoprecipitated product in order to aid in estimating the fluctuations
- in background level across the genome.
- \end_layout
- \begin_layout Standard
- There are generally two kinds of peaks that can be identified: narrow peaks
- and broadly enriched regions.
- Proteins that bind specific sites in the genome (such as many transcription
- factors) typically show most of their
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage at these specific sites and very little coverage anywhere
- else.
- Because the footprint of the protein is consistent wherever it binds, each
- peak has a consistent width, typically tens to hundreds of base pairs,
- representing the length of DNA that it binds to.
- Algorithms like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- exploit this pattern to identify specific loci at which such
- \begin_inset Quotes eld
- \end_inset
- narrow peaks
- \begin_inset Quotes erd
- \end_inset
- occur by looking for the characteristic peak shape in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- coverage rising above the surrounding background coverage
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zhang2008"
- literal "false"
- \end_inset
- .
- In contrast, some proteins, chief among them histones, do not bind only
- at a small number of specific sites, but rather bind potentially almost
- everywhere in the entire genome.
- When looking at histone marks, adjacent histones tend to be similarly marked,
- and a given mark may be present on an arbitrary number of consecutive histones
- along the genome.
- Hence, there is no consistent
- \begin_inset Quotes eld
- \end_inset
- footprint size
- \begin_inset Quotes erd
- \end_inset
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- peaks based on histone marks, and peaks typically span many histones.
- Hence, typical peaks span many hundreds or even thousands of base pairs.
- Instead of identifying specific loci of strong enrichment, algorithms like
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- assume that peaks are represented in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data by modest enrichment above background occurring across broad regions,
- and they attempt to identify the extent of those regions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zang2009"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Regardless of the type of peak identified, it is important to identify peaks
- that occur consistently across biological replicates.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ENCODE
- \end_layout
- \end_inset
- project has developed a method called
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- for this purpose
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2006"
- literal "false"
- \end_inset
- .
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- is defined as the probability that a peak identified in one biological
- replicate will
- \emph on
- not
- \emph default
- also be identified in a second replicate.
- Where the more familiar false discovery rate measures the degree of corresponde
- nce between a data-derived ranked list and the (unknown) true list of significan
- t features,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- instead measures the degree of correspondence between two ranked lists
- derived from different data.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- assumes that the highest-ranked features are
- \begin_inset Quotes eld
- \end_inset
- signal
- \begin_inset Quotes erd
- \end_inset
- peaks that tend to be listed in the same order in both lists, while the
- lowest-ranked features are essentially noise peaks, listed in random order
- with no correspondence between the lists.
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- attempts to locate the
- \begin_inset Quotes eld
- \end_inset
- crossover point
- \begin_inset Quotes erd
- \end_inset
- between the signal and the noise by determining how far down the list the
- rank consistency breaks down into randomness (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Example-IDR"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/IDR/D4659vsD5053_epic-PAGE1-CROP-RASTER.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example IDR consistency plot.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Example-IDR"
- \end_inset
- \series bold
- Example IDR consistency plot.
- \series default
- Peak calls in two replicates are ranked from highest score (top and right)
- to lowest score (bottom and left).
- IDR identifies reproducible peaks, which rank highly in both replicates
- (light blue), separating them from
- \begin_inset Quotes eld
- \end_inset
- noise
- \begin_inset Quotes erd
- \end_inset
- peak calls whose ranking is not reproducible between replicates (dark blue).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In addition to other considerations, if called peaks are to be used as regions
- of interest for differential abundance analysis, then care must be taken
- to call peaks in a way that is blind to differential abundance between
- experimental conditions, or else the statistical significance calculations
- for differential abundance will overstate their confidence in the results.
- The
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- csaw
- \end_layout
- \end_inset
- package provides guidelines for calling peaks in this way: peaks are called
- based on a combination of all
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads from all experimental conditions, so that the identified peaks are
- based on the average abundance across all conditions, which is independent
- of any differential abundance between conditions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lun2015a"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Normalization of high-throughput data is non-trivial and application-dependent
- \end_layout
- \begin_layout Standard
- High-throughput data sets invariably require some kind of normalization
- before further analysis can be conducted.
- In general, the goal of normalization is to remove effects in the data
- that are caused by technical factors that have nothing to do with the biology
- being studied.
- \end_layout
- \begin_layout Standard
- For Affymetrix expression arrays, the standard normalization algorithm used
- in most analyses is
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Irizarry2003a"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- is designed with the assumption that some fraction of probes on each array
- will be artifactual and takes advantage of the fact that each gene is represent
- ed by multiple probes by implementing normalization and summarization steps
- that are robust against outlier probes.
- However,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- uses the probe intensities of all arrays in the data set in the normalization
- of each individual array, meaning that the normalized expression values
- in each array depend on every array in the data set, and will necessarily
- change each time an array is added or removed from the data set.
- If this is undesirable,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- implements a variant of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- where the relevant distributional parameters are learned from a large reference
- set of diverse public array data sets and then
- \begin_inset Quotes eld
- \end_inset
- frozen
- \begin_inset Quotes erd
- \end_inset
- , so that each array is effectively normalized against this frozen reference
- set rather than the other arrays in the data set under study
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Other available array normalization methods considered include dChip,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- , and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2001,Pelz2008,Piccolo2012"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- In contrast,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- HTS
- \end_layout
- \end_inset
- data present very different normalization challenges.
- The simplest case is
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in which read counts are obtained for a set of gene annotations, yielding
- a matrix of counts with rows representing genes and columns representing
- samples.
- Because
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- approximates a process of sampling from a population with replacement,
- each gene's count is only interpretable as a fraction of the total reads
- for that sample.
- For that reason,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- abundances are often reported as
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CPM
- \end_layout
- \end_inset
- .
- Furthermore, if the abundance of a single gene increases, then in order
- for its fraction of the total reads to increase, all other genes' fractions
- must decrease to accommodate it.
- This effect is known as composition bias, and it is an artifact of the
- read sampling process that has nothing to do with the biology of the samples
- and must therefore be normalized out.
- The most commonly used methods to normalize for composition bias in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data seek to equalize the average gene abundance across samples, under
- the assumption that the average gene is likely not changing
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010,Anders2010"
- literal "false"
- \end_inset
- .
- The effect of such normalizations is to center the distribution of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- at zero.
- Note that if a true global difference in gene expression is present in
- the data, this difference will be normalized out as well, since it is indisting
- uishable from composition bias.
- In other words,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- cannot measure absolute gene expression, only gene expression as a fraction
- of total reads.
- \end_layout
- \begin_layout Standard
- In
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data, normalization is not as straightforward.
- The
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- csaw
- \end_layout
- \end_inset
- package implements several different normalization strategies and provides
- guidance on when to use each one
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lun2015a"
- literal "false"
- \end_inset
- .
- Briefly, a typical
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- sample has a bimodal distribution of read counts: a low-abundance mode
- representing background regions and a high-abundance mode representing
- signal regions.
- This offers two mutually incompatible normalization strategies: equalizing
- background coverage or equalizing signal coverage (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:chipseq-norm-example"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- If the experiment is well controlled and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP
- \end_layout
- \end_inset
- efficiency is known to be consistent across all samples, then normalizing
- the background coverage to be equal across all samples is a reasonable
- strategy.
- If this is not a safe assumption, then the preferred strategy is to normalize
- the signal regions in a way similar to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data by assuming that the average signal region is not changing abundance
- between samples.
- Beyond this, if a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- experiment has a more complicated structure that doesn't show the typical
- bimodal count distribution, it may be necessary to implement a normalization
- as a smooth function of abundance.
- However, this strategy makes a much stronger assumption about the data:
- that the average
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- is zero across all abundance levels.
- Hence, the simpler scaling normalization based on background or signal
- regions are generally preferred whenever possible.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-sample-MAplot-bins-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example MA plot of ChIP-seq read counts in 10kb bins for two arbitrary samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:chipseq-norm-example"
- \end_inset
- \series bold
- Example MA plot of ChIP-seq read counts in 10kb bins for two arbitrary samples.
-
- \series default
- The distribution of bins is bimodal along the x axis (average abundance),
- with the left mode representing
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- regions with no protein binding and the right mode representing bound regions.
- The modes are also separated on the y axis (logFC), motivating two conflicting
- normalization strategies: background normalization (red) and signal normalizati
- on (blue and green, two similar signal normalizations).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- ComBat and SVA for correction of known and unknown batch effects
- \end_layout
- \begin_layout Standard
- In addition to well-understood effects that can be easily normalized out,
- a data set often contains confounding biological effects that must be accounted
- for in the modeling step.
- For instance, in an experiment with pre-treatment and post-treatment samples
- of cells from several different donors, donor variability represents a
- known batch effect.
- The most straightforward correction for known batches is to estimate the
- mean for each batch independently and subtract out the differences, so
- that all batches have identical means for each feature.
- However, as with variance estimation, estimating the differences in batch
- means is not necessarily robust at the feature level, so the ComBat method
- adds empirical Bayes squeezing of the batch mean differences toward a common
- value, analogous to
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's empirical Bayes squeezing of feature variance estimates
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Johnson2007"
- literal "false"
- \end_inset
- .
- Effectively, ComBat assumes that modest differences between batch means
- are real batch effects, but extreme differences between batch means are
- more likely to be the result of outlier observations that happen to line
- up with the batches rather than a genuine batch effect.
- The result is a batch correction that is more robust against outliers than
- simple subtraction of mean differences.
- \end_layout
- \begin_layout Standard
- In some data sets, unknown batch effects may be present due to inherent
- variability in the data, either caused by technical or biological effects.
- Examples of unknown batch effects include variations in enrichment efficiency
- between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- samples, variations in populations of different cell types, and the effects
- of uncontrolled environmental factors on gene expression in humans or live
- animals.
- In an ordinary linear model context, unknown batch effects cannot be inferred
- and must be treated as random noise.
- However, in high-throughput experiments, once again information can be
- shared across features to identify patterns of un-modeled variation that
- are repeated in many features.
- One attractive strategy would be to perform
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVD
- \end_layout
- \end_inset
- on the matrix of linear model residuals (which contain all the un-modeled
- variation in the data) and take the first few singular vectors as batch
- effects.
- While this can be effective, it makes the unreasonable assumption that
- all batch effects are completely uncorrelated with any of the effects being
- modeled.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- starts with this approach, but takes some additional steps to identify
- batch effects in the full data that are both highly correlated with the
- singular vectors in the residuals and least correlated with the effects
- of interest
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- .
- Since the final batch effects are estimated from the full data, moderate
- correlations between the batch effects and effects of interest are allowed,
- which gives
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- much more freedom to estimate the true extent of the batch effects compared
- to simple residual
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVD
- \end_layout
- \end_inset
- .
- Once the surrogate variables are estimated, they can be included as coefficient
- s in the linear model in a similar fashion to known batch effects in order
- to subtract out their effects on each feature's abundance.
- \end_layout
- \begin_layout Subsection
- Interpreting p-value distributions and estimating false discovery rates
- \end_layout
- \begin_layout Standard
- When testing thousands of genes for differential expression or performing
- thousands of statistical tests for other kinds of genomic data, the result
- is thousands of p-values.
- By construction, p-values have a
- \begin_inset Formula $\mathrm{Uniform}(0,1)$
- \end_inset
- distribution under the null hypothesis.
- This means that if all null hypotheses are true in a large number
- \begin_inset Formula $N$
- \end_inset
- of tests, then for any significance threshold
- \begin_inset Formula $T$
- \end_inset
- , approximately
- \begin_inset Formula $N*T$
- \end_inset
- p-values would be called
- \begin_inset Quotes eld
- \end_inset
- significant
- \begin_inset Quotes erd
- \end_inset
- at that threshold even though the null hypotheses are all true.
- These are called false discoveries.
- \end_layout
- \begin_layout Standard
- When only a fraction of null hypotheses are true, the p-value distribution
- will be a mixture of a uniform component representing the null hypotheses
- that are true and a non-uniform component representing the null hypotheses
- that are not true (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Example-pval-hist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The fraction belonging to the uniform component is referred to as
- \begin_inset Formula $\pi_{0}$
- \end_inset
- , which ranges from 1 (all null hypotheses true) to 0 (all null hypotheses
- false).
- Furthermore, the non-uniform component must be biased toward zero, since
- any evidence against the null hypothesis pushes the p-value for a test
- toward zero.
- We can exploit this fact to estimate the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- for any significance threshold by estimating the degree to which the density
- of p-values left of that threshold exceeds what would be expected for a
- uniform distribution.
- In genomics, the most commonly used
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- estimation method, and the one used in this work, is that of
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- glsdisp{BH}{Benjamini and Hochberg}
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- This is a conservative method that effectively assumes
- \begin_inset Formula $\pi_{0}=1$
- \end_inset
- .
- Hence it gives an estimated upper bound for the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- at any significance threshold, rather than a point estimate.
-
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Intro/med-pval-hist-colored-CROP.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Example p-value histogram.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Example-pval-hist"
- \end_inset
- \series bold
- Example p-value histogram.
-
- \series default
- The distribution of p-values from a large number of independent tests (such
- as differential expression tests for each gene in the genome) is a mixture
- of a uniform component representing the null hypotheses that are true (blue
- shading) and a zero-biased component representing the null hypotheses that
- are false (red shading).
- The FDR for any column in the histogram is the fraction of that column
- that is blue.
- The line
- \begin_inset Formula $y=\pi_{0}$
- \end_inset
- represents the theoretical uniform component of this p-value distribution,
- while the line
- \begin_inset Formula $y=1$
- \end_inset
- represents the uniform component when all null hypotheses are true.
- Note that in real data, the true status of each hypothesis is unknown,
- so only the overall shape of the distribution is known.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- We can also estimate
- \begin_inset Formula $\pi_{0}$
- \end_inset
- for the entire distribution of p-values, which can give an idea of the
- overall signal size in the data without setting any significance threshold
- or making any decisions about which specific null hypotheses to reject.
- As
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- estimation, there are many methods proposed for estimating
- \begin_inset Formula $\pi_{0}$
- \end_inset
- .
- The one used in this work is the Phipson method of averaging local
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- .
- Once
- \begin_inset Formula $\pi_{0}$
- \end_inset
- is estimated, the number of null hypotheses that are false can be estimated
- as
- \begin_inset Formula $(1-\pi_{0})*N$
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Conversely, a p-value distribution that is neither uniform nor zero-biased
- is evidence of a modeling failure.
- Such a distribution would imply that there is less than zero evidence against
- the null hypothesis, which is not possible (in a frequentist setting).
- Attempting to estimate
- \begin_inset Formula $\pi_{0}$
- \end_inset
- from such a distribution would yield an estimate greater than 1, a nonsensical
- result.
- The usual cause of a poorly-behaving p-value distribution is a model assumption
- that is violated by the data, such as assuming equal variance between groups
- (homoskedasticity) when the variance of each group is not equal (heteroskedasti
- city) or failing to model a strong confounding batch effect.
- In particular, such a p-value distribution is
- \emph on
- not
- \emph default
- consistent with a simple lack of signal in the data, as this should result
- in a uniform distribution.
- Hence, observing such a p-value distribution should prompt a search for
- violated model assumptions.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Subsection
- Factor analysis: PCA, PCoA, MOFA
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Not sure if this merits a subsection here.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Batch-corrected
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- is informative, but careful application is required to avoid bias
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Structure of the thesis
- \end_layout
- \begin_layout Standard
- This thesis presents 3 instances of using high-throughput genomic and epigenomic
- assays to investigate hypotheses or solve problems relating to the study
- of transplant rejection.
- In Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:CD4-ChIP-seq"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- are used to investigate the dynamics of promoter histone methylation as
- it relates to gene expression in T-cell activation and memory.
- Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Improving-array-based-diagnostic"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- looks at several array-based assays with the potential to diagnose transplant
- rejection and shows that analyses of this array data are greatly improved
- by paying careful attention to normalization and preprocessing.
- Finally Chapter
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "chap:Globin-blocking-cyno"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- presents a custom method for improving
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of non-human primate blood samples by preventing reverse transcription
- of unwanted globin transcripts.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Add a sentence about Ch5 once written
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Chapter
- \begin_inset CommandInset label
- LatexCommand label
- name "chap:CD4-ChIP-seq"
- \end_inset
- Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
- in naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation
- \end_layout
- \begin_layout Standard
- \size large
- Ryan C.
- Thompson, Sarah A.
- Lamere, Daniel R.
- Salomon
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- This causes all abbreviations to be reintroduced.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Introduction
- \end_layout
- \begin_layout Standard
- CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells are central to all adaptive immune responses, as well as immune
- memory
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Murphy2012"
- literal "false"
- \end_inset
- .
- After an infection is cleared, a subset of the naïve CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells that responded to that infection differentiate into memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, which are responsible for responding to the same pathogen in the
- future.
- Memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells are functionally distinct, able to respond to an infection more
- quickly and without the co-stimulation required by naïve CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells.
- However, the molecular mechanisms underlying this functional distinction
- are not well-understood.
- Epigenetic regulation via histone modification is thought to play an important
- role, but while many studies have looked at static snapshots of histone
- methylation in T-cells, few studies have looked at the dynamics of histone
- regulation after T-cell activation, nor the differences in histone methylation
- between naïve and memory T-cells.
- H3K4me2, H3K4me3 and H3K27me3 are three histone marks thought to be major
- epigenetic regulators of gene expression.
- The goal of the present study is to investigate the role of these histone
- marks in CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation kinetics and memory differentiation.
- In static snapshots, H3K4me2 and H3K4me3 are often observed in the promoters
- of highly transcribed genes, while H3K27me3 is more often observed in promoters
- of inactive genes with little to no transcription occurring.
- As a result, the two H3K4 marks have been characterized as
- \begin_inset Quotes eld
- \end_inset
- activating
- \begin_inset Quotes erd
- \end_inset
- marks, while H3K27me3 has been characterized as
- \begin_inset Quotes eld
- \end_inset
- deactivating
- \begin_inset Quotes erd
- \end_inset
- .
- Despite these characterizations, the actual causal relationship between
- these histone modifications and gene transcription is complex and likely
- involves positive and negative feedback loops between the two.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Standard
- In order to investigate the relationship between gene expression and these
- histone modifications in the context of naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation, a previously published data set of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data was re-analyzed using up-to-date methods designed to address the specific
- analysis challenges posed by this data set.
- The data set contains naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell samples in a time course before and after activation.
- Like the original analysis, this analysis looks at the dynamics of these
- histone marks and compares them to gene expression dynamics at the same
- time points during activation, as well as compares them between naïve and
- memory cells, in hope of discovering evidence of new mechanistic details
- in the interplay between them.
- The original analysis of this data treated each gene promoter as a monolithic
- unit and mostly assumed that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads or peaks occurring anywhere within a promoter were equivalent, regardless
- of where they occurred relative to the gene structure.
- For an initial analysis of the data, this was a necessary simplifying assumptio
- n.
- The current analysis aims to relax this assumption, first by directly analyzing
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- peaks for differential modification, and second by taking a more granular
- look at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage within promoter regions to ask whether the location of histone
- modifications relative to the gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- is an important factor, as opposed to simple proximity.
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Standard
- A reproducible workflow was written to analyze the raw
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data from previous studies (
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
- accession number
- \begin_inset CommandInset href
- LatexCommand href
- name "GSE73214"
- target "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE73214"
- literal "false"
- \end_inset
- )
- \begin_inset CommandInset citation
- LatexCommand cite
- key "gh-cd4-csaw,LaMere2016,LaMere2017"
- literal "true"
- \end_inset
- .
- Briefly, this data consists of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- from CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells from 4 donors.
- From each donor, naïve and memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells were isolated separately.
- Then cultures of both cells were activated with CD3/CD28 beads, and samples
- were taken at 4 time points: Day 0 (pre-activation), Day 1 (early activation),
- Day 5 (peak activation), and Day 14 (post-activation).
- For each combination of cell type and time point, RNA was isolated and
- sequenced, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- was performed for each of 3 histone marks: H3K4me2, H3K4me3, and H3K27me3.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- input DNA was also sequenced for each sample.
- The result was 32 samples for each assay.
- \end_layout
- \begin_layout Subsection
- RNA-seq differential expression analysis
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- STAR quantification, Entrez vs Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon+Shoal quantification, Entrez vs Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- STAR vs HISAT2 quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon vs STAR quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon vs Kallisto quantification, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \qquad{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
- lyxscale 25
- width 35col%
- groupId rna-comp-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- Salmon+Shoal vs Salmon alone, Ensembl gene annotation
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-norm-comp"
- \end_inset
- RNA-seq comparisons
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Sequence reads were retrieved from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SRA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leinonen2011"
- literal "false"
- \end_inset
- .
- Five different alignment and quantification methods were tested for the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Dobin2012,Kim2019,Liao2014,Pimentel2016,Patro2017,gh-shoal,gh-hg38-ref"
- literal "false"
- \end_inset
- .
- Each quantification was tested with both Ensembl transcripts and GENCODE
- known gene annotations
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zerbino2018,Harrow2012"
- literal "false"
- \end_inset
- .
- Comparisons of downstream results from each combination of quantification
- method and reference revealed that all quantifications gave broadly similar
- results for most genes, with non being obviously superior.
- Salmon quantification with regularization by shoal with the Ensembl annotation
- was chosen as the method theoretically most likely to partially mitigate
- some of the batch effect in the data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Patro2017,gh-shoal"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Due to an error in sample preparation, the RNA from the samples for days
- 0 and 5 were sequenced using a different kit than those for days 1 and
- 14.
- This induced a substantial batch effect in the data due to differences
- in sequencing biases between the two kits, and this batch effect is unfortunate
- ly confounded with the time point variable (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-no-batchsub"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- To do the best possible analysis with this data, this batch effect was
- subtracted out from the data using ComBat
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Johnson2007"
- literal "false"
- \end_inset
- , ignoring the time point variable due to the confounding with the batch
- variable.
- The result is a marked improvement, but the unavoidable confounding with
- time point means that certain real patterns of gene expression will be
- indistinguishable from the batch effect and subtracted out as a result.
- Specifically, any
- \begin_inset Quotes eld
- \end_inset
- zig-zag
- \begin_inset Quotes erd
- \end_inset
- pattern, such as a gene whose expression goes up on day 1, down on day
- 5, and back up again on day 14, will be attenuated or eliminated entirely.
- In the context of a T-cell activation time course, it is unlikely that
- many genes of interest will follow such an expression pattern, so this
- loss was deemed an acceptable cost for correcting the batch effect.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
- lyxscale 25
- width 75col%
- groupId rna-pca-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-no-batchsub"
- \end_inset
- Before batch correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
- lyxscale 25
- width 75col%
- groupId rna-pca-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-ComBat-batchsub"
- \end_inset
- After batch correction with ComBat
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plots of RNA-seq data showing effect of batch correction.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA"
- \end_inset
- \series bold
- PCoA plots of RNA-seq data showing effect of batch correction.
-
- \series default
- The uncorrected data (a) shows a clear separation between samples from the
- two batches (red and blue) dominating the first principal coordinate.
- After correction with ComBat (b), the two batches now have approximately
- the same center, and the first two principal coordinates both show separation
- between experimental conditions rather than batches.
- (Note that time points are shown in hours rather than days in these plots.)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- However, removing the systematic component of the batch effect still leaves
- the noise component.
- The gene quantifications from the first batch are substantially noisier
- than those in the second batch.
- This analysis corrected for this by using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's sample weighting method to assign lower weights to the noisy samples
- of batch 1 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-seq-weights-vs-covars"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- )
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006,Liu2015"
- literal "false"
- \end_inset
- .
- The resulting analysis gives an accurate assessment of statistical significance
- for all comparisons, which unfortunately means a loss of statistical power
- for comparisons involving samples in batch 1.
- \end_layout
- \begin_layout Standard
- In any case, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- counts were first normalized using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TMM
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010"
- literal "false"
- \end_inset
- , converted to normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- with quality weights using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- voomWithQualityWeights
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014,Liu2015"
- literal "false"
- \end_inset
- , and batch-corrected at this point using ComBat.
- A linear model was fit to the batch-corrected, quality-weighted data for
- each gene using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- , and each gene was tested for differential expression using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's empirical Bayes moderated
- \begin_inset Formula $t$
- \end_inset
- -test
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005,Law2014,Phipson2016"
- literal "false"
- \end_inset
- .
- P-values were corrected for multiple testing using the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BH
- \end_layout
- \end_inset
- procedure for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-nobcv-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- RNA-seq sample weights, grouped by experimental and technical covariates.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-seq-weights-vs-covars"
- \end_inset
- \series bold
- RNA-seq sample weights, grouped by experimental and technical covariates.
-
- \series default
- Inverse variance weights were estimated for each sample using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- limma
- \end_layout
- \end_inset
- 's
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- arrayWeights
- \end_layout
- \end_inset
- function (part of
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- voomWithQualityWeights
- \end_layout
- \end_inset
- ).
- The samples were grouped by each known covariate and the distribution of
- weights was plotted for each group.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- ChIP-seq analyses
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Be consistent about use of
- \begin_inset Quotes eld
- \end_inset
- differential binding
- \begin_inset Quotes erd
- \end_inset
- vs
- \begin_inset Quotes eld
- \end_inset
- differential modification
- \begin_inset Quotes erd
- \end_inset
- throughout this chapter.
- The latter is usually preferred.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Sequence reads were retrieved from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SRA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leinonen2011"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- (and input) reads were aligned to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRCh38
- \end_layout
- \end_inset
- genome assembly using Bowtie 2
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Langmead2012,Schneider2017,gh-hg38-ref"
- literal "false"
- \end_inset
- .
- Artifact regions were annotated using a custom implementation of the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- GreyListChIP
- \end_layout
- \end_inset
- algorithm, and these
- \begin_inset Quotes eld
- \end_inset
- greylists
- \begin_inset Quotes erd
- \end_inset
- were merged with the published
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ENCODE
- \end_layout
- \end_inset
- blacklists
- \begin_inset CommandInset citation
- LatexCommand cite
- key "greylistchip,Dunham2012,Amemiya2019,gh-cd4-csaw"
- literal "false"
- \end_inset
- .
- Any read or called peak overlapping one of these regions was regarded as
- artifactual and excluded from downstream analyses.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-master"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the improvement after blacklisting in the strand cross-correlation
- plots, a common quality control plot for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kharchenko2008,Lun2015a"
- literal "false"
- \end_inset
- .
- Peaks were called using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- epic
- \end_layout
- \end_inset
- , an implementation of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- algorithm
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zang2009,gh-epic"
- literal "false"
- \end_inset
- .
- Peaks were also called separately using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- , but
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- was determined to be a poor fit for the data, and these peak calls are
- not used in any further analyses
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zhang2008"
- literal "false"
- \end_inset
- .
- Consensus peaks were determined by applying the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- IDR
- \end_layout
- \end_inset
- framework
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2006,gh-idr"
- literal "false"
- \end_inset
- to find peaks consistently called in the same locations across all 4 donors.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/csaw/CCF-plots-noBL-PAGE2-CROP.pdf
- lyxscale 75
- width 47col%
- groupId ccf-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-without-blacklist"
- \end_inset
- Cross-correlation plots without removing blacklisted reads.
-
- \series default
- Without blacklisting, many artifactual peaks are visible in the cross-correlatio
- ns of the ChIP-seq samples, and the peak at the true fragment size (147
- \begin_inset space ~
- \end_inset
- bp) is frequently overshadowed by the artifactual peak at the read length
- (100
- \begin_inset space ~
- \end_inset
- bp).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/csaw/CCF-plots-PAGE2-CROP.pdf
- lyxscale 75
- width 47col%
- groupId ccf-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-with-blacklist"
- \end_inset
- Cross-correlation plots with blacklisted reads removed.
- \series default
- After blacklisting, most ChIP-seq samples have clean-looking periodic cross-cor
- relation plots, with the largest peak around 147
- \begin_inset space ~
- \end_inset
- bp, the expected size for a fragment of DNA from a single nucleosome, and
- little to no peak at the read length, 100
- \begin_inset space ~
- \end_inset
- bp.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Strand cross-correlation plots for ChIP-seq data, before and after blacklisting.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:CCF-master"
- \end_inset
- \series bold
- Strand cross-correlation plots for ChIP-seq data, before and after blacklisting.
-
- \series default
- The number of reads starting at each position in the genome was counted
- separately for the plus and minus strands, and then the correlation coefficient
- between the read start counts for both strands (cross-correlation) was
- computed after shifting the plus strand counts forward by a specified interval
- (the delay).
- This was repeated for every delay value from 0 to 1000, and the cross-correlati
- on values were plotted as a function of the delay.
- In good quality samples, cross-correlation is maximized when the delay
- equals the fragment size; in poor quality samples, cross-correlation is
- often maximized when the delay equals the read length, an artifactual peak
- whose cause is not fully understood.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Promoters were defined by computing the distance from each annotated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- to the nearest called peak and examining the distribution of distances,
- observing that peaks for each histone mark were enriched within a certain
- distance of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- (Note: this analysis was performed using the original peak calls and expression
- values from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- .) For H3K4me2 and H3K4me3, this distance was about 1
- \begin_inset space ~
- \end_inset
- kbp, while for H3K27me3 it was 2.5
- \begin_inset space ~
- \end_inset
- kbp.
- These distances were used as an
- \begin_inset Quotes eld
- \end_inset
- effective promoter radius
- \begin_inset Quotes erd
- \end_inset
- for each mark.
- The promoter region for each gene was defined as the region of the genome
- within this distance upstream or downstream of the gene's annotated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- For genes with multiple annotated
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , a promoter region was defined for each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- individually, and any promoters that overlapped (due to multiple
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- being closer than 2 times the radius) were merged into one large promoter.
- Thus, some genes had multiple promoters defined, which were each analyzed
- separately for differential modification.
- \end_layout
- \begin_layout Standard
- Reads in promoters, peaks, and sliding windows across the genome were counted
- and normalized using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- csaw
- \end_layout
- \end_inset
- and analyzed for differential modification using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lun2014,Lun2015a,Lund2012,Phipson2016"
- literal "false"
- \end_inset
- .
- Unobserved confounding factors in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- data were corrected using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007,Leek2014"
- literal "false"
- \end_inset
- .
- Principal coordinate plots of the promoter count data for each histone
- mark before and after subtracting surrogate variable effects are shown
- in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-ChIP"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-bad"
- \end_inset
- H3K4me2, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-good"
- \end_inset
- H3K4me2, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-bad"
- \end_inset
- H3K4me3, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-good"
- \end_inset
- H3K4me3, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-raw-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-bad"
- \end_inset
- H3K27me3, no correction
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-SVsub-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-good"
- \end_inset
- H3K27me3, SVs subtracted
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status collapsed
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plots of ChIP-seq sliding window data, before and after subtracting
- surrogate variables.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-ChIP"
- \end_inset
- \series bold
- PCoA plots of ChIP-seq sliding window data, before and after subtracting
- surrogate variables (SVs).
-
- \series default
- For each histone mark, a PCoA plot of the first 2 principal coordinates
- was created before and after subtraction of SV effects.
- Time points are shown by color and cell type by shape, and samples from
- the same time point and cell type are enclosed in a shaded area to aid
- in visial recognition (this shaded area has no meaning on the plot).
- Samples of the same cell type from the same donor are connected with a
- line in time point order, showing the
- \begin_inset Quotes eld
- \end_inset
- trajectory
- \begin_inset Quotes erd
- \end_inset
- of each donor's samples over time.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate whether the location of a peak within the promoter region
- was important,
- \begin_inset Quotes eld
- \end_inset
- relative coverage profiles
- \begin_inset Quotes erd
- \end_inset
- were generated.
- First, 500-bp sliding windows were tiled around each annotated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- : one window centered on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- itself, and 10 windows each upstream and downstream, thus covering a 10.5-kb
- region centered on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- with 21 windows.
- Reads in each window for each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- were counted in each sample, and the counts were normalized and converted
- to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- as in the differential modification analysis.
- Then, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values within each promoter were normalized to an average of zero, such
- that each window's normalized abundance now represents the relative read
- depth of that window compared to all other windows in the same promoter.
- The normalized abundance values for each window in a promoter are collectively
- referred to as that promoter's
- \begin_inset Quotes eld
- \end_inset
- relative coverage profile
- \begin_inset Quotes erd
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- MOFA analysis of cross-dataset variation patterns
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- was run on all the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- windows overlapping consensus peaks for each histone mark, as well as the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, in order to identify patterns of coordinated variation across all
- data sets
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Argelaguet2018"
- literal "false"
- \end_inset
- .
- The results are summarized in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MOFA-master"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (Capital, pl)
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 1, 4, and 5 were determined to explain the most variation consistently
- across all data sets (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-varexplained"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), and scatter plots of these factors show that they also correlate best
- with the experimental factors (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 2 captures the batch effect in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data.
- Removing the effect of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 2 using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- theoretically yields a batch correction that does not depend on knowing
- the experimental factors.
- When this was attempted, the resulting batch correction was comparable
- to ComBat (see Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-ComBat-batchsub"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), indicating that the ComBat-based batch correction has little room for
- improvement given the problems with the data set.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-varExplaiend-matrix-CROP.png
- lyxscale 25
- width 45col%
- groupId mofa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-varexplained"
- \end_inset
- Variance explained in each data set by each latent factor estimated by MOFA.
- \series default
- For each LF learned by MOFA, the variance explained by that factor in each
- data set (
- \begin_inset Quotes eld
- \end_inset
- view
- \begin_inset Quotes erd
- \end_inset
- ) is shown by the shading of the cells in the lower section.
- The upper section shows the total fraction of each data set's variance
- that is explained by all LFs combined.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-LF-scatter-small.png
- lyxscale 25
- width 45col%
- groupId mofa-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-lf-scatter"
- \end_inset
- Scatter plots of specific pairs of MOFA latent factors.
- \series default
- LFs 1, 4, and 5 explain substantial variation in all data sets, so they
- were plotted against each other in order to reveal patterns of variation
- that are shared across all data sets.
- These plots can be interpreted similarly to PCA and PCoA plots.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font a bit too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- MOFA latent factors identify shared patterns of variation.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MOFA-master"
- \end_inset
- \series bold
- MOFA latent factors identify shared patterns of variation.
-
- \series default
- MOFA was used to estimate latent factors (LFs) that explain substantial
- variation in the RNA-seq data and the ChIP-seq data (a).
- Then specific LFs of interest were selected and plotted (b).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/MOFA-batch-correct-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:mofa-batchsub"
- \end_inset
- Result of RNA-seq batch-correction using MOFA latent factors
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Focus on what hypotheses were tested, then select figures that show how
- those hypotheses were tested, even if the result is a negative.
- Not every interesting result needs to be in here.
- Chapter should tell a story.
-
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Interpretation of RNA-seq analysis is limited by a major confounding factor
- \end_layout
- \begin_layout Standard
- Genes called as present in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data were tested for differential expression between all time points and
- cell types.
- The counts of differentially expressed genes are shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Estimated-and-detected-rnaseq"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Notably, all the results for Day 0 and Day 5 have substantially fewer genes
- called differentially expressed than any of the results for other time
- points.
- This is an unfortunate result of the difference in sample quality between
- the two batches of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data.
- All the samples in Batch 1, which includes all the samples from Days 0
- and 5, have substantially more variability than the samples in Batch 2,
- which includes the other time points.
- This is reflected in the substantially higher weights assigned to Batch
- 2 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-seq-weights-vs-covars"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
-
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="11" columns="3">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Est.
- non-null
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \begin_inset Formula $\mathrm{FDR}\le10\%$
- \end_inset
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Naïve Day 0 vs Day 1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 5992
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1613
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Naïve Day 0 vs Day 5
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 3038
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 32
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Naïve Day 0 vs Day 14
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1870
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 190
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Memory Day 0 vs Day 1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 3195
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 411
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Memory Day 0 vs Day 5
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2688
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Memory Day 0 vs Day 14
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1911
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 227
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 0 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 1 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9167
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 5532
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 5 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 14 Naïve vs Memory
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6446
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2319
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Estimated and detected differentially expressed genes.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Estimated-and-detected-rnaseq"
- \end_inset
- \series bold
- Estimated and detected differentially expressed genes.
- \series default
-
- \begin_inset Quotes eld
- \end_inset
- Test
- \begin_inset Quotes erd
- \end_inset
- : Which sample groups were compared;
- \begin_inset Quotes eld
- \end_inset
- Est non-null
- \begin_inset Quotes erd
- \end_inset
- : Estimated number of differentially expressed genes, using the method of
- averaging local FDR values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- ;
- \begin_inset Quotes eld
- \end_inset
- \begin_inset Formula $\mathrm{FDR}\le10\%$
- \end_inset
- \begin_inset Quotes erd
- \end_inset
- : Number of significantly differentially expressed genes at an FDR threshold
- of 10%.
- The total number of genes tested was 16707.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- If float lost issues, reposition randomly until success.
- \end_layout
- \end_inset
- The batch effect has both a systematic component and a random noise component.
- While the systematic component was subtracted out using ComBat (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), no such correction is possible for the noise component: Batch 1 simply
- has substantially more random noise in it, which reduces the statistical
- power for any differential expression tests involving samples in that batch.
-
- \end_layout
- \begin_layout Standard
- Despite the difficulty in detecting specific differentially expressed genes,
- there is still evidence that differential expression is present for these
- time points.
- In Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rna-pca-final"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , there is a clear separation between naïve and memory samples at Day 0,
- despite the fact that only 2 genes were significantly differentially expressed
- for this comparison.
- Similarly, the small numbers of genes detected for the Day 0 vs Day 5 compariso
- ns do not reflect the large separation between these time points in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rna-pca-final"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- In addition, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- plots in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This suggests that there is indeed a differential expression signal present
- in the data for these comparisons, but the large variability in the Batch
- 1 samples obfuscates this signal at the individual gene level.
- As a result, it is impossible to make any meaningful statements about the
-
- \begin_inset Quotes eld
- \end_inset
- size
- \begin_inset Quotes erd
- \end_inset
- of the gene signature for any time point, since the number of significant
- genes as well as the estimated number of differentially expressed genes
- depends so strongly on the variations in sample quality in addition to
- the size of the differential expression signal in the data.
- Gene-set enrichment analyses are similarly impractical.
- However, analyses looking at genome-wide patterns of expression are still
- practical.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-final-12-CROP.png
- lyxscale 25
- width 100col%
- groupId colwidth-raster
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plot of RNA-seq samples after ComBat batch correction.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:rna-pca-final"
- \end_inset
- \series bold
- PCoA plot of RNA-seq samples after ComBat batch correction.
-
- \series default
- Each point represents an individual sample.
- Samples with the same combination of cell type and time point are encircled
- with a shaded region to aid in visual identification of the sample groups.
- Samples of the same cell type from the same donor are connected by lines
- to indicate the
- \begin_inset Quotes eld
- \end_inset
- trajectory
- \begin_inset Quotes erd
- \end_inset
- of each donor's cells over time in PCoA space.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 methylation occur in broad regions and are enriched near
- promoters
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Also get
- \emph on
- median
- \emph default
- peak width and maybe other quantiles (25%, 75%)
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Histone Mark
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- # Peaks
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Mean peak width
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- genome coverage
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- FRiP
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 14,965
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 3,970
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1.92%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 14.2%
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6,163
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2,946
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.588%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6.57%
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18,139
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 18,967
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 11.1%
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 22.5%
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Get the IDR threshold
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Summary of peak-calling statistics.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:peak-calling-summary"
- \end_inset
- \series bold
- Summary of peak-calling statistics.
-
- \series default
- For each histone mark, the number of peaks called using SICER at an IDR
- threshold of ???, the mean width of those peaks, the fraction of the genome
- covered by peaks, and the fraction of reads in peaks (FRiP).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:peak-calling-summary"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- gives a summary of the peak calling statistics for each histone mark.
- Consistent with previous observations, all 3 histone marks occur in broad
- regions spanning many consecutive nucleosomes, rather than in sharp peaks
- as would be expected for a transcription factor or other molecule that
- binds to specific sites.
- This conclusion is further supported by Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-with-blacklist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , in which a clear nucleosome-sized periodicity is visible in the cross-correlat
- ion value for each sample, indicating that each time a given mark is present
- on one histone, it is also likely to be found on adjacent histones as well.
- H3K27me3 enrichment in particular is substantially more broad than either
- H3K4 mark, with a mean peak width of almost 19,000 bp.
- This is also reflected in the periodicity observed in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:CCF-with-blacklist"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which remains strong much farther out for H3K27me3 than the other marks,
- showing H3K27me3 especially tends to be found on long runs of consecutive
- histones.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- All 3 histone marks tend to occur more often near promoter regions, as shown
- in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The majority of each density distribution is flat, representing the background
- density of peaks genome-wide.
- Each distribution has a peak near zero, representing an enrichment of peaks
- close to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- positions relative to the remainder of the genome.
- Interestingly, the
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- within which this enrichment occurs is not the same for every histone mark
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:effective-promoter-radius"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For H3K4me2 and H3K4me3, peaks are most enriched within 1
- \begin_inset space ~
- \end_inset
- kbp of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- positions, while for H3K27me3, enrichment is broader, extending to 2.5
- \begin_inset space ~
- \end_inset
- kbp.
- These
- \begin_inset Quotes eld
- \end_inset
- effective promoter radii
- \begin_inset Quotes erd
- \end_inset
- remain approximately the same across all combinations of experimental condition
- (cell type, time point, and donor), so they appear to be a property of
- the histone mark itself.
- Hence, these radii were used to define the promoter regions for each histone
- mark in all further analyses.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/Promoter-Peak-Distance-Profile-PAGE1-CROP.pdf
- lyxscale 50
- width 80col%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Future direction idea: Need a control: shuffle all peaks and repeat, N times.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Enrichment of peaks in promoter neighborhoods.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:near-promoter-peak-enrich"
- \end_inset
- \series bold
- Enrichment of peaks in promoter neighborhoods.
-
- \series default
- This plot shows the distribution of distances from each annotated transcription
- start site in the genome to the nearest called peak.
- Each line represents one combination of histone mark, cell type, and time
- point.
- Distributions are smoothed using kernel density estimation.
- TSSs that occur
- \emph on
- within
- \emph default
- peaks were excluded from this plot to avoid a large spike at zero that
- would overshadow the rest of the distribution.
- (Note: this figure was generated using the original peak calls and expression
- values from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- .)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="2">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Histone mark
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Effective promoter radius
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1 kbp
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1 kbp
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2.5 kbp
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Effective promoter radius for each histone mark.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:effective-promoter-radius"
- \end_inset
- \series bold
- Effective promoter radius for each histone mark.
- \series default
- These values represent the approximate distance from transcription start
- site positions within which an excess of peaks are found, as shown in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider also showing figure for distance to nearest peak center, and reference
- median peak size once that is known.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Correlations between gene expression and promoter methylation follow expected
- genome-wide trends
- \end_layout
- \begin_layout Standard
- H3K4me2 and H3K4me2 have previously been reported as activating marks whose
- presence in a gene's promoter is associated with higher gene expression,
- while H3K27me3 has been reported as inactivating
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016,LaMere2017"
- literal "false"
- \end_inset
- .
- The data are consistent with this characterization: genes whose promoters
- (as defined by the radii for each histone mark listed in
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:effective-promoter-radius"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) overlap with a H3K4me2 or H3K4me3 peak tend to have higher expression
- than those that don't, while H3K27me3 is likewise associated with lower
- gene expression, as shown in
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:fpkm-by-peak"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This pattern holds across all combinations of cell type and time point
- (Welch's
- \emph on
- t
- \emph default
- -test, all
- \begin_inset Formula $p\textrm{-values}\ll2.2\times10^{-16}$
- \end_inset
- ).
- The difference in average
- \begin_inset Formula $\log_{2}$
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FPKM
- \end_layout
- \end_inset
- values when a peak overlaps the promoter is about
- \begin_inset Formula $+5.67$
- \end_inset
- for H3K4me2,
- \begin_inset Formula $+5.76$
- \end_inset
- for H3K4me2, and
- \begin_inset Formula $-4.00$
- \end_inset
- for H3K27me3.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/FPKM-by-Peak-Violin-Plots-CROP.pdf
- lyxscale 50
- height 80theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Expression distributions of genes with and without promoter peaks.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:fpkm-by-peak"
- \end_inset
- \series bold
- Expression distributions of genes with and without promoter peaks.
-
- \series default
- For each histone mark in each experimental condition, the average RNA-seq
- abundance (
- \begin_inset Formula $\log_{2}$
- \end_inset
- FPKM) of each gene across all 4 donors was calculated.
- Genes were grouped based on whether or not a peak was called in their promoters
- in that condition, and the distribution of abundance values was plotted
- for the no-peak and peak groups.
- (Note: this figure was generated using the original peak calls and expression
- values from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- .)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Gene expression and promoter histone methylation patterns show convergence
- between naïve and memory cells at day 14
- \end_layout
- \begin_layout Standard
- We hypothesized that if naïve cells had differentiated into memory cells
- by Day 14, then their patterns of expression and histone modification should
- converge with those of memory cells at Day 14.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the patterns of variation in all 3 histone marks in the promoter
- regions of the genome using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCoA
- \end_layout
- \end_inset
- .
- All 3 marks show a noticeable convergence between the naïve and memory
- samples at day 14, visible as an overlapping of the day 14 groups on each
- plot.
- This is consistent with the counts of significantly differentially modified
- promoters and estimates of the total numbers of differentially modified
- promoters shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Number-signif-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- For all histone marks, evidence of differential modification between naïve
- and memory samples was detected at every time point except day 14.
- The day 14 convergence pattern is also present in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-group"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), albeit in the 2nd and 3rd principal coordinates, indicating that it is
- not the most dominant pattern driving gene expression.
- Taken together, the data show that promoter histone methylation for these
- 3 histone marks and RNA expression for naïve and memory cells are most
- similar at day 14, the furthest time point after activation.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- was also able to capture this day 14 convergence pattern in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 5 (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), which accounts for shared variation across all 3 histone marks and the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, confirming that this convergence is a coordinated pattern across
- all 4 data sets.
- While this observation does not prove that the naïve cells have differentiated
- into memory cells at Day 14, it is consistent with that hypothesis.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- placement p
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me2-prom"
- \end_inset
- PCoA plot of H3K4me2 promoters, after subtracting surrogate variables.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K4me3-prom"
- \end_inset
- PCoA plot of H3K4me3 promoters, after subtracting surrogate variables.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-promoter-PCA-group-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-H3K27me3-prom"
- \end_inset
- PCoA plot of H3K27me3 promoters, after subtracting surrogate variables.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/RNA-seq/PCA-final-23-CROP.png
- lyxscale 25
- width 45col%
- groupId pcoa-prom-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:RNA-PCA-group"
- \end_inset
- RNA-seq PCoA, after ComBat batch correction, showing principal coordinates
- 2 and 3.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- PCoA plots for promoter ChIP-seq and expression RNA-seq data
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:PCoA-promoters"
- \end_inset
- \series bold
- PCoA plots for promoter ChIP-seq and expression RNA-seq data.
-
- \series default
- Each point represents an individual sample.
- Samples with the same combination of cell type and time point are encircled
- with a shaded region to aid in visual identification of the sample groups.
- Samples of the same cell type from the same donor are connected by lines
- to indicate the
- \begin_inset Quotes eld
- \end_inset
- trajectory
- \begin_inset Quotes erd
- \end_inset
- of each donor's cells over time in PCoA space.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="6" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Number of significant promoters
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Est.
- differentially modified promoters
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Time Point
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me2
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K4me3
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- H3K27me3
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4553
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 927
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9967
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4149
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2404
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 567
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 278
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1570
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4370
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2145
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 6598
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 5
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 2313
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 139
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 490
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 9450
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 1148
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 4141
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Day 14
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Number of differentially modified promoters between naïve and memory cells
- at each time point after activation.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Number-signif-promoters"
- \end_inset
- \series bold
- Number of differentially modified promoters between naïve and memory cells
- at each time point after activation.
-
- \series default
- This table shows both the number of differentially modified promoters detected
- at a 10% FDR threshold (left half), and the total number of differentially
- modified promoters estimated using the method of averaging local FDR estimates
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2016"
- literal "false"
- \end_inset
- (right half).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Association between resting H3K4me2 and H3K4me3 promoter coverage landscapes
- and gene expression
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Need a better section title, for this and the next one.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Make sure use of coverage/abundance/whatever is consistent.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- For the figures in this section and the next, the group labels are arbitrary,
- so if time allows, it would be good to manually reorder them in a logical
- way, e.g.
- most upstream to most downstream.
- If this is done, make sure to update the text with the correct group labels.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To test whether the position of a histone mark relative to a gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- was important, we looked at the
- \begin_inset Quotes eld
- \end_inset
- landscape
- \begin_inset Quotes erd
- \end_inset
- of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- read coverage in naïve Day 0 samples within 5 kbp of each gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- by binning reads into 500-bp windows tiled across each promoter
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values were calculated for the bins in each promoter and then the average
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- for each promoter's bins was normalized to zero, such that the values represent
- coverage relative to other regions of the same promoter rather than being
- proportional to absolute read count.
- The promoters were then clustered based on the normalized bin abundances
- using
- \begin_inset Formula $k$
- \end_inset
- -means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- .
- Different values of
- \begin_inset Formula $K$
- \end_inset
- were also tested, but did not substantially change the interpretation of
- the data.
- \end_layout
- \begin_layout Standard
- For H3K4me2, plotting the average bin abundances for each cluster reveals
- a simple pattern (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood-clusters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ): Cluster 5 represents a completely flat promoter coverage profile, likely
- consisting of genes with no H3K4me2 methylation in the promoter.
- All the other clusters represent a continuum of peak positions relative
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- In order from most upstream to most downstream, they are Clusters 6, 4,
- 3, 1, and 2.
- There do not appear to be any clusters representing coverage patterns other
- than lone peaks, such as coverage troughs or double peaks.
- Next, all promoters were plotted in a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot based on the same relative bin abundance data, and colored based on
- cluster membership (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood-pca"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot shows Cluster 5 (the
- \begin_inset Quotes eld
- \end_inset
- no peak
- \begin_inset Quotes erd
- \end_inset
- cluster) at the center, with the other clusters arranged in a counter-clockwise
- arc around it in the order noted above, from most upstream peak to most
- downstream.
- Notably, the
- \begin_inset Quotes eld
- \end_inset
- clusters
- \begin_inset Quotes erd
- \end_inset
- form a single large
- \begin_inset Quotes eld
- \end_inset
- cloud
- \begin_inset Quotes erd
- \end_inset
- with no apparent separation between them, further supporting the conclusion
- that these clusters represent an arbitrary partitioning of a continuous
- distribution of promoter coverage landscapes.
- While the clusters are a useful abstraction that aids in visualization,
- they are ultimately not an accurate representation of the data.
- The continuous nature of the distribution also explains why different values
- of
- \begin_inset Formula $K$
- \end_inset
- led to similar conclusions.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Figure font too small
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- K-means clustering of promoter H3K4me2 relative coverage depth in naïve
- day 0 samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me2-neighborhood"
- \end_inset
- \series bold
- K-means clustering of promoter H3K4me2 relative coverage depth in naïve
- day 0 samples.
-
- \series default
- H3K4me2 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using K-means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two PCs were plotted,
- coloring each point by its K-means cluster identity (b).
- For each cluster, the distribution of gene expression values was plotted
- (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Should have a table of p-values on difference of means between Cluster 5
- and the others.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate the association between relative peak position and gene expressio
- n, we plotted the Naïve Day 0 expression for the genes in each cluster (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood-expression"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Most genes in Cluster 5, the
- \begin_inset Quotes eld
- \end_inset
- no peak
- \begin_inset Quotes erd
- \end_inset
- cluster, have low expression values.
- Taking this as the
- \begin_inset Quotes eld
- \end_inset
- baseline
- \begin_inset Quotes erd
- \end_inset
- distribution when no H3K4me2 methylation is present, we can compare the
- other clusters' distributions to determine which peak positions are associated
- with elevated expression.
- As might be expected, the 3 clusters representing peaks closest to the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , Clusters 1, 3, and 4, show the highest average expression distributions.
- Specifically, these clusters all have their highest
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- abundance within 1kb of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , consistent with the previously determined promoter radius.
- In contrast, cluster 6, which represents peaks several kbp upstream of
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , shows a slightly higher average expression than baseline, while Cluster
- 2, which represents peaks several kbp downstream, doesn't appear to show
- any appreciable difference.
- Interestingly, the cluster with the highest average expression is Cluster
- 1, which represents peaks about 1 kbp downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , rather than Cluster 3, which represents peaks centered directly at the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- This suggests that conceptualizing the promoter as a region centered on
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- with a certain
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- may be an oversimplification – a peak that is a specific distance from
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- may have a different degree of influence depending on whether it is upstream
- or downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- All observations described above for H3K4me2
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- also appear to hold for H3K4me3 as well (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This is expected, since there is a high correlation between the positions
- where both histone marks occur.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K4me3-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- K-means clustering of promoter H3K4me3 relative coverage depth in naïve
- day 0 samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K4me3-neighborhood"
- \end_inset
- \series bold
- K-means clustering of promoter H3K4me3 relative coverage depth in naïve
- day 0 samples.
-
- \series default
- H3K4me3 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using K-means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two PCs were plotted,
- coloring each point by its K-means cluster identity (b).
- For each cluster, the distribution of gene expression values was plotted
- (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Association between resting H3K27me3 promoter coverage landscapes and gene
- expression
- \end_layout
- \begin_layout Standard
- Unlike both H3K4 marks, whose main patterns of variation appear directly
- related to the size and position of a single peak within the promoter,
- the patterns of H3K27me3 methylation in promoters are more complex (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Once again looking at the relative coverage in a 500-bp wide bins in a
- 5kb radius around each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , promoters were clustered based on the normalized relative coverage values
- in each bin using
- \begin_inset Formula $k$
- \end_inset
- -means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood-clusters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This time, 3
- \begin_inset Quotes eld
- \end_inset
- axes
- \begin_inset Quotes erd
- \end_inset
- of variation can be observed, each represented by 2 clusters with opposing
- patterns.
- The first axis is greater upstream coverage (Cluster 1) vs.
- greater downstream coverage (Cluster 3); the second axis is the coverage
- at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- itself: peak (Cluster 4) or trough (Cluster 2); lastly, the third axis
- represents a trough upstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- (Cluster 5) vs.
- downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- (Cluster 6).
- Referring to these opposing pairs of clusters as axes of variation is justified
- , because they correspond precisely to the first 3
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- PC
- \end_layout
- \end_inset
- in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot of the relative coverage values (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood-pca"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCA
- \end_layout
- \end_inset
- plot reveals that as in the case of H3K4me2, all the
- \begin_inset Quotes eld
- \end_inset
- clusters
- \begin_inset Quotes erd
- \end_inset
- are really just sections of a single connected cloud rather than discrete
- clusters.
- The cloud is approximately ellipsoid-shaped, with each PC being an axis
- of the ellipse, and each cluster consisting of a pyramidal section of the
- ellipsoid.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-clusters-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-clusters"
- \end_inset
- Average relative coverage for each bin in each cluster.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-PCA-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-pca"
- \end_inset
- PCA of relative coverage depth, colored by K-means cluster membership.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-expression-CROP.png
- lyxscale 25
- width 30col%
- groupId covprof-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood-expression"
- \end_inset
- Gene expression grouped by promoter coverage clusters.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Repeated figure legends are kind of an issue here.
- What to do?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- K-means clustering of promoter H3K27me3 relative coverage depth in naïve
- day 0 samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:H3K27me3-neighborhood"
- \end_inset
- \series bold
- K-means clustering of promoter H3K27me3 relative coverage depth in naïve
- day 0 samples.
-
- \series default
- H3K27me3 ChIP-seq reads were binned into 500-bp windows tiled across each
- promoter from 5
- \begin_inset space ~
- \end_inset
- kbp upstream to 5
- \begin_inset space ~
- \end_inset
- kbp downstream, and the logCPM values were normalized within each promoter
- to an average of 0, yielding relative coverage depths.
- These were then grouped using
- \begin_inset Formula $k$
- \end_inset
- -means clustering with
- \begin_inset Formula $K=6$
- \end_inset
- ,
- \series bold
-
- \series default
- and the average bin values were plotted for each cluster (a).
- The
- \begin_inset Formula $x$
- \end_inset
- -axis is the genomic coordinate of each bin relative to the the transcription
- start site, and the
- \begin_inset Formula $y$
- \end_inset
- -axis is the mean relative coverage depth of that bin across all promoters
- in the cluster.
- Each line represents the average
- \begin_inset Quotes eld
- \end_inset
- shape
- \begin_inset Quotes erd
- \end_inset
- of the promoter coverage for promoters in that cluster.
- PCA was performed on the same data, and the first two PCs were plotted,
- coloring each point by its K-means cluster identity (b).
- (Note: In (b), Cluster 6 is hidden behind all the other clusters.) For each
- cluster, the distribution of gene expression values was plotted (c).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood-expression"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , we can see that Clusters 1 and 2 are the only clusters with higher gene
- expression than the others.
- For Cluster 2, this is expected, since this cluster represents genes with
- depletion of H3K27me3 near the promoter.
- Hence, elevated expression in cluster 2 is consistent with the conventional
- view of H3K27me3 as a deactivating mark.
- However, Cluster 1, the cluster with the most elevated gene expression,
- represents genes with elevated coverage upstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , or equivalently, decreased coverage downstream, inside the gene body.
- The opposite pattern, in which H3K27me3 is more abundant within the gene
- body and less abundance in the upstream promoter region, does not show
- any elevation in gene expression.
- As with H3K4me2, this shows that the location of H3K27 trimethylation relative
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- is potentially an important factor beyond simple proximity.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Show the figures where the negative result ended this line of inquiry.
- I need to debug some errors resulting from an R upgrade to do this.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Defined pattern analysis
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- This was where I defined interesting expression patterns and then looked
- at initial relative promoter coverage for each expression pattern.
- Negative result.
- I forgot about this until recently.
- Worth including? Remember to also write methods.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Promoter CpG islands?
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- I forgot until recently about the work I did on this.
- Worth including? Remember to also write methods.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Write better section headers
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Each histone mark's
- \begin_inset Quotes eld
- \end_inset
- effective promoter extent
- \begin_inset Quotes erd
- \end_inset
- must be determined empirically
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that H3K4me2, H3K4me3, and H3K27me3 are all enriched near promoters,
- relative to the rest of the genome, consistent with their conventionally
- understood role in regulating gene transcription.
- Interestingly, the radius within this enrichment occurs is not the same
- for each histone mark.
- H3K4me2 and H3K4me3 are enriched within a 1
- \begin_inset space ~
- \end_inset
- kbp radius, while H3K27me3 is enriched within 2.5
- \begin_inset space ~
- \end_inset
- kbp.
- Notably, the determined promoter radius was consistent across all experimental
- conditions, varying only between different histone marks.
- This suggests that the conventional
- \begin_inset Quotes eld
- \end_inset
- one size fits all
- \begin_inset Quotes erd
- \end_inset
- approach of defining a single promoter region for each gene (or each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- ) and using that same promoter region for analyzing all types of genomic
- data within an experiment may not be appropriate, and a better approach
- may be to use a separate promoter radius for each kind of data, with each
- radius being derived from the data itself.
- Furthermore, the apparent asymmetry of upstream and downstream promoter
- histone modification with respect to gene expression, seen in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , shows that even the concept of a promoter
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- is likely an oversimplification.
- At a minimum, nearby enrichment of peaks should be evaluated separately
- for both upstream and downstream peaks, and an appropriate
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- should be selected for each direction.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Sarah: I would have to search the literature, but I believe this has been
- observed before.
- The position relative to the TSS likely has to do with recruitment of the
- transcriptional machinery and the space required for that.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- show that the determined promoter radius of 1
- \begin_inset space ~
- \end_inset
- kbp is approximately consistent with the distance from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- at which enrichment of H3K4 methylation correlates with increased expression,
- showing that this radius, which was determined by a simple analysis of
- measuring the distance from each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- to the nearest peak, also has functional significance.
- For H3K27me3, the correlation between histone modification near the promoter
- and gene expression is more complex, involving non-peak variations such
- as troughs in coverage at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- and asymmetric coverage upstream and downstream, so it is difficult in
- this case to evaluate whether the 2.5
- \begin_inset space ~
- \end_inset
- kbp radius determined from TSS-to-peak distances is functionally significant.
- However, the two patterns of coverage associated with elevated expression
- levels both have interesting features within this radius.
- \end_layout
- \begin_layout Subsection
- Day 14 convergence is consistent with naïve-to-memory differentiation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look up some more references for these histone marks being involved in memory
- differentiation.
- (Ask Sarah)
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- We observed that all 3 histone marks and the gene expression data all exhibit
- evidence of convergence in abundance between naïve and memory cells by
- day 14 after activation (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Number-signif-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- scatter plots (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:mofa-lf-scatter"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) show that this pattern of convergence is captured in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- 5.
- Like all the
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- LF
- \end_layout
- \end_inset
- in this plot, this factor explains a substantial portion of the variance
- in all 4 data sets, indicating a coordinated pattern of variation shared
- across all histone marks and gene expression.
- This is consistent with the expectation that any naïve CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells remaining at day 14 should have differentiated into memory cells
- by that time, and should therefore have a genomic and epigenomic state
- similar to memory cells.
- This convergence is evidence that these histone marks all play an important
- role in the naïve-to-memory differentiation process.
- A histone mark that was not involved in naïve-to-memory differentiation
- would not be expected to converge in this way after activation.
- \end_layout
- \begin_layout Standard
- In H3K4me2, H3K4me3, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- , this convergence appears to be in progress already by Day 5, shown by
- the smaller distance between naïve and memory cells at day 5 along the
-
- \begin_inset Formula $y$
- \end_inset
- -axes in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-H3K4me2-prom"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-H3K4me3-prom"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:RNA-PCA-group"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This agrees with the model proposed by Sarah Lamere based on an prior analysis
- of the same data, shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Lamere2016-Fig8"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which shows the pattern of H3K4 methylation and expression for naïve cells
- and memory cells converging at day 5.
- This model was developed without the benefit of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCoA
- \end_layout
- \end_inset
- plots in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which have been corrected for confounding factors by ComBat and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- .
- This shows that proper batch correction assists in extracting meaningful
- patterns in the data while eliminating systematic sources of irrelevant
- variation in the data, allowing simple automated procedures like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCoA
- \end_layout
- \end_inset
- to reveal interesting behaviors in the data that were previously only detectabl
- e by a detailed manual analysis.
- While the ideal comparison to demonstrate this convergence would be naïve
- cells at day 14 to memory cells at day 0, this is not feasible in this
- experimental system, since neither naïve nor memory cells are able to fully
- return to their pre-activation state, as shown by the lack of overlap between
- days 0 and 14 for either naïve or memory cells in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:PCoA-promoters"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/LaMere2016_fig8.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Lamere 2016 Figure 8 “Model for the role of H3K4 methylation during CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cell activation.
- \begin_inset Quotes erd
- \end_inset
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Lamere2016-Fig8"
- \end_inset
- \series bold
- Lamere 2016 Figure 8
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- ,
- \begin_inset Quotes eld
- \end_inset
- Model for the role of H3K4 methylation during CD4
- \begin_inset Formula $\mathbf{^{+}}$
- \end_inset
- T-cell activation.
- \begin_inset Quotes erd
- \end_inset
-
- \series default
- (Reproduced with permission.)
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- The location of histone modifications within the promoter is important
- \end_layout
- \begin_layout Standard
- When looking at patterns in the relative coverage of each histone mark near
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- of each gene, several interesting patterns were apparent.
- For H3K4me2 and H3K4me3, the pattern was straightforward: the consistent
- pattern across all promoters was a single peak a few kbp wide, with the
- main axis of variation being the position of this peak relative to the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- (Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- &
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- There were no obvious
- \begin_inset Quotes eld
- \end_inset
- preferred
- \begin_inset Quotes erd
- \end_inset
- positions, but rather a continuous distribution of relative positions ranging
- all across the promoter region.
- The association with gene expression was also straightforward: peaks closer
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- were more strongly associated with elevated gene expression.
- Coverage downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- appears to be more strongly associated with elevated expression than coverage
- at the same distance upstream, indicating that the
- \begin_inset Quotes eld
- \end_inset
- effective promoter region
- \begin_inset Quotes erd
- \end_inset
- for H3K4me2 and H3K4me3 may be centered downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- The relative promoter coverage for H3K27me3 had a more complex pattern,
- with two specific patterns of promoter coverage associated with elevated
- expression: a sharp depletion of H3K27me3 around the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- relative to the surrounding area, and a depletion of H3K27me3 downstream
- of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- relative to upstream (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- A previous study found that H3K27me3 depletion within the gene body was
- associated with elevated gene expression in 4 different cell types in mice
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Young2011"
- literal "false"
- \end_inset
- .
- This is consistent with the second pattern described here.
- This study also reported that a spike in coverage at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- was associated with
- \emph on
- lower
- \emph default
- expression, which is indirectly consistent with the first pattern described
- here, in the sense that it associates lower H3K27me3 levels near the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- with higher expression.
- \end_layout
- \begin_layout Subsection
- A reproducible workflow aids in analysis
- \end_layout
- \begin_layout Standard
- The analyses described in this chapter were organized into a reproducible
- workflow using the Snakemake workflow management system
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Koster2012"
- literal "false"
- \end_inset
- .
- As shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rulegraph"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , the workflow includes many steps with complex dependencies between them.
- For example, the step that counts the number of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads in 500
- \begin_inset space ~
- \end_inset
- bp windows in each promoter (the starting point for Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), named
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- chipseq_count_tss_neighborhoods
- \end_layout
- \end_inset
- , depends on the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- abundance estimates in order to select the most-used
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- for each gene, the aligned
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- reads, the index for those reads, and the blacklist of regions to be excluded
- from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- analysis.
- Each step declares its inputs and outputs, and Snakemake uses these to
- determine the dependencies between steps.
- Each step is marked as depending on all the steps whose outputs match its
- inputs, generating the workflow graph in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:rulegraph"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , which Snakemake uses to determine order in which to execute each step
- so that each step is executed only after all of the steps it depends on
- have completed, thereby automating the entire workflow from start to finish.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
- lyxscale 50
- width 100col%
- height 95theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Dependency graph of steps in reproducible workflow.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:rulegraph"
- \end_inset
- \series bold
- Dependency graph of steps in reproducible workflow.
-
- \series default
- The analysis flows from left to right.
- Arrows indicate which analysis steps depend on the output of other steps.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In addition to simply making it easier to organize the steps in the analysis,
- structuring the analysis as a workflow allowed for some analysis strategies
- that would not have been practical otherwise.
- For example, 5 different
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- quantification methods were tested against two different reference transcriptom
- e annotations for a total of 10 different quantifications of the same
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data.
- These were then compared against each other in the exploratory data analysis
- step, to determine that the results were not very sensitive to either the
- choice of quantification method or the choice of annotation.
- This was possible with a single script for the exploratory data analysis,
- because Snakemake was able to automate running this script for every combinatio
- n of method and reference.
- In a similar manner, two different peak calling methods were tested against
- each other, and in this case it was determined that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- was unambiguously superior to
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MACS
- \end_layout
- \end_inset
- for all histone marks studied.
- By enabling these types of comparisons, structuring the analysis as an
- automated workflow allowed important analysis decisions to be made in a
- data-driven way, by running every reasonable option through the downstream
- steps, seeing the consequences of choosing each option, and deciding accordingl
- y.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Subsection
- Data quality issues limit conclusions
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Is this needed?
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Future Directions
- \end_layout
- \begin_layout Standard
- The analysis of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- in CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells in Chapter 2 is in many ways a preliminary study that suggests
- a multitude of new avenues of investigation.
- Here we consider a selection of such avenues.
- \end_layout
- \begin_layout Subsection
- Previous negative results
- \end_layout
- \begin_layout Standard
- Two additional analyses were conducted beyond those reported in the results.
- First, we searched for evidence that the presence or absence of a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CpGi
- \end_layout
- \end_inset
- in the promoter was correlated with increases or decreases in gene expression
- or any histone mark in any of the tested contrasts.
- Second, we searched for evidence that the relative
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- coverage profiles prior to activations could predict the change in expression
- of a gene after activation.
- Neither analysis turned up any clear positive results.
- \end_layout
- \begin_layout Subsection
- Improve on the idea of an effective promoter radius
- \end_layout
- \begin_layout Standard
- This study introduced the concept of an
- \begin_inset Quotes eld
- \end_inset
- effective promoter radius
- \begin_inset Quotes erd
- \end_inset
- specific to each histone mark based on distance from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- within which an excess of peaks was called for that mark.
- This concept was then used to guide further analyses throughout the study.
- However, while the effective promoter radius was useful in those analyses,
- it is both limited in theory and shown in practice to be a possible oversimplif
- ication.
- First, the effective promoter radii used in this study were chosen based
- on manual inspection of the TSS-to-peak distance distributions in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , selecting round numbers of analyst convenience (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:effective-promoter-radius"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- It would be better to define an algorithm that selects a more precise radius
- based on the features of the graph.
- One possible way to do this would be to randomly rearrange the called peaks
- throughout the genome many (while preserving the distribution of peak widths)
- and re-generate the same plot as in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:near-promoter-peak-enrich"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This would yield a better
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- distribution that demonstrates the degree of near-TSS enrichment that would
- be expected by random chance.
- The effective promoter radius could be defined as the point where the true
- distribution diverges from the randomized background distribution.
-
- \end_layout
- \begin_layout Standard
- Furthermore, the above definition of effective promoter radius has the significa
- nt limitation of being based on the peak calling method.
- It is thus very sensitive to the choice of peak caller and significance
- threshold for calling peaks, as well as the degree of saturation in the
- sequencing.
- Calling peaks from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- samples with insufficient coverage depth, with the wrong peak caller, or
- with a different significance threshold could give a drastically different
- number of called peaks, and hence a drastically different distribution
- of peak-to-TSS distances.
- To address this, it is desirable to develop a better method of determining
- the effective promoter radius that relies only on the distribution of read
- coverage around the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- , independent of the peak calling.
- Furthermore, as demonstrated by the upstream-downstream asymmetries observed
- in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K27me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , this definition should determine a different radius for the upstream and
- downstream directions.
- At this point, it may be better to rename this concept
- \begin_inset Quotes eld
- \end_inset
- effective promoter extent
- \begin_inset Quotes erd
- \end_inset
- and avoid the word
- \begin_inset Quotes eld
- \end_inset
- radius
- \begin_inset Quotes erd
- \end_inset
- , since a radius implies a symmetry about the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- that is not supported by the data.
- \end_layout
- \begin_layout Standard
- Beyond improving the definition of effective promoter extent, functional
- validation is necessary to show that this measure of near-TSS enrichment
- has biological meaning.
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me2-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:H3K4me3-neighborhood"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- already provide a very limited functional validation of the chosen promoter
- extents for H3K4me2 and H3K4me3 by showing that spikes in coverage within
- this region are most strongly correlated with elevated gene expression.
- However, there are other ways to show functional relevance of the promoter
- extent.
- For example, correlations could be computed between read counts in peaks
- nearby gene promoters and the expression level of those genes, and these
- correlations could be plotted against the distance of the peak upstream
- or downstream of the gene's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- If the promoter extent truly defines a
- \begin_inset Quotes eld
- \end_inset
- sphere of influence
- \begin_inset Quotes erd
- \end_inset
- within which a histone mark is involved with the regulation of a gene,
- then the correlations for peaks within this extent should be significantly
- higher than those further upstream or downstream.
- Peaks within these extents may also be more likely to show differential
- modification than those outside genic regions of the genome.
- \end_layout
- \begin_layout Subsection
- Design experiments to focus on post-activation convergence of naïve & memory
- cells
- \end_layout
- \begin_layout Standard
- In this study, a convergence between naïve and memory cells was observed
- in both the pattern of gene expression and in epigenetic state of the 3
- histone marks studied, consistent with the hypothesis that any naïve cells
- remaining 14 days after activation have differentiated into memory cells,
- and that both gene expression and these histone marks are involved in this
- differentiation.
- However, the current study was not designed with this specific hypothesis
- in mind, and it therefore has some deficiencies with regard to testing
- it.
- The memory CD4
- \begin_inset Formula $^{+}$
- \end_inset
- samples at day 14 do not resemble the memory samples at day 0, indicating
- that in the specific model of activation used for this experiment, the
- cells are not guaranteed to return to their original pre-activation state,
- or perhaps this process takes substantially longer than 14 days.
- This difference is expected, as the cell cultures in this experiment were
- treated with IL2 from day 5 onward
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- , so the signalling environments in which the cells are cultured are different
- at day 0 and day 14.
- This is a challenge for testing the convergence hypothesis because the
- ideal comparison to prove that naïve cells are converging to a resting
- memory state would be to compare the final naïve time point to the Day
- 0 memory samples, but this comparison is only meaningful if memory cells
- generally return to the same
- \begin_inset Quotes eld
- \end_inset
- resting
- \begin_inset Quotes erd
- \end_inset
- state that they started at.
- \end_layout
- \begin_layout Standard
- Because pre-culture and post-culture cells will probably never behave identicall
- y even if they both nominally have a
- \begin_inset Quotes eld
- \end_inset
- resting
- \begin_inset Quotes erd
- \end_inset
- phenotype, a different experiment should be designed in which post-activation
- naive cells are compared to memory cells that were cultured for the same
- amount of time but never activated, in addition to post-activation memory
- cells.
- If the convergence hypothesis is correct, both post-activation cultures
- should converge on the culture of never-activated memory cells.
- \end_layout
- \begin_layout Standard
- In addition, if naïve-to-memory convergence is a general pattern, it should
- also be detectable in other epigenetic marks, including other histone marks
- and DNA methylation.
- An experiment should be designed studying a large number of epigenetic
- marks known or suspected to be involved in regulation of gene expression,
- assaying all of these at the same pre- and post-activation time points.
- Multi-dataset factor analysis methods like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MOFA
- \end_layout
- \end_inset
- can then be used to identify coordinated patterns of regulation shared
- across many epigenetic marks.
- Of course, CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells are not the only adaptive immune cells that exhibit memory formation.
- A similar study could be designed for CD8
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, B-cells, and even specific subsets of CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, such as Th1, Th2, Treg, and Th17 cells, to determine whether these
- also show convergence.
- \end_layout
- \begin_layout Subsection
- Follow up on hints of interesting patterns in promoter relative coverage
- profiles
- \end_layout
- \begin_layout Standard
- The analysis of promoter coverage landscapes in resting naive CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells and their correlations with gene expression raises many interesting
- questions.
- The chosen analysis strategy used a clustering approach, but this approach
- was subsequently shown to be a poor fit for the data.
- In light of this, a better means of dimension reduction for promoter landscape
- data is required.
- In the case of H3K4me2 and H3K4me3, one option is to define the first 3
- principal componets as orthogonal promoter
- \begin_inset Quotes eld
- \end_inset
- state variables
- \begin_inset Quotes erd
- \end_inset
- : upstream vs downstream coverage, TSS-centered peak vs trough, and proximal
- upstream trough vs proximal downstream trough.
- Gene expression could then be modeled as a function of these three variables,
- or possibly as a function of the first
- \begin_inset Formula $N$
- \end_inset
- principal components for
- \begin_inset Formula $N$
- \end_inset
- larger than 3.
- For H3K4me2 and H3K4me3, a better representation might be obtained by transform
- ing the first 2 principal coordinates into a polar coordinate system
- \begin_inset Formula $(r,\theta)$
- \end_inset
- with the origin at the center of the
- \begin_inset Quotes eld
- \end_inset
- no peak
- \begin_inset Quotes erd
- \end_inset
- cluster, where the radius
- \begin_inset Formula $r$
- \end_inset
- represents the peak height above the background and the angle
- \begin_inset Formula $\theta$
- \end_inset
- represents the peak's position upstream or downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
-
- \end_layout
- \begin_layout Standard
- Another weakness in the current analysis is the normalization of the average
- abundance of each promoter to an average of zero.
- This allows the abundance value in each window to represent the relative
- abundance of that window compared to all the other windows in the interrogated
- area.
- However, while using the remainder of the windows to set the
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- level against which each window is normalized is convenient, it is far
- from optimal.
- As shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:peak-calling-summary"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , many enriched regions are larger than the 5
- \begin_inset space ~
- \end_inset
- kbp radius., which means there may not be any
- \begin_inset Quotes eld
- \end_inset
- background
- \begin_inset Quotes erd
- \end_inset
- regions within 5
- \begin_inset space ~
- \end_inset
- kbp of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- to normalize against.
- For example, this normalization strategy fails to distinguish between a
- trough in coverage at the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- and a pair of wide peaks upstream and downstream of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- .
- Both cases would present as lower coverage in the windows immediately adjacent
- to the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TSS
- \end_layout
- \end_inset
- and higher coverage in windows further away, but the functional implications
- of these two cases might be completely different.
- To improve the normalization, the background estimation method used by
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SICER
- \end_layout
- \end_inset
- , which is specifically designed for finding broad regions of enrichment,
- should be adapted to estimate the background sequencing depth in each window
- from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- input samples, and each window's read count should be normalized against
- the background and reported as a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- relative to that background.
- \end_layout
- \begin_layout Standard
- Lastly, the analysis of promoter coverage landscapes presented in this work
- only looked at promoter coverage of resting naive CD4
- \begin_inset Formula $^{+}$
- \end_inset
- T-cells, with the goal of determining whether this initial promoter state
- was predictive of post-activation changes in gene expression.
- Changes in the promoter coverage landscape over time have not yet been
- considered.
- This represents a significant analysis challenge, by adding yet another
- dimension (genomic coordinate) in to the data.
- \end_layout
- \begin_layout Subsection
- Investigate causes of high correlation between mutually exclusive histone
- marks
- \end_layout
- \begin_layout Standard
- The high correlation between coverage depth observed between H3K4me2 and
- H3K4me3 is both expected and unexpected.
- Since both marks are associated with elevated gene transcription, a positive
- correlation between them is not surprising.
- However, these two marks represent different post-translational modifications
- of the
- \emph on
- same
- \emph default
- lysine residue on the histone H3 polypeptide, which means that they cannot
- both be present on the same H3 subunit.
- Thus, the high correlation between them has several potential explanations.
- One possible reason is cell population heterogeneity: perhaps some genomic
- loci are frequently marked with H3K4me2 in some cells, while in other cells
- the same loci are marked with H3K4me3.
- Another possibility is allele-specific modifications: the loci are marked
- in each diploid cell with H3K4me2 on one allele and H3K4me3 on the other
- allele.
- Lastly, since each histone octamer contains 2 H3 subunits, it is possible
- that having one H3K4me2 mark and one H3K4me3 mark on a given histone octamer
- represents a distinct epigenetic state with a different function than either
- double H3K4me2 or double H3K4me3.
-
- \end_layout
- \begin_layout Standard
- The hypothesis of allele-specific histone modification can easily be tested
- with existing data by locating all heterozygous loci occurring within both
- H3K4me3 and H3K4me2 peaks and checking for opposite allelic imbalance between
- H3K4me3 and H3K4me2 read at each locus.
- If the allele fractions in the reads from the two histone marks for each
- locus are plotted against each other, there should be a negative correlation.
- If no such negative correlation is found, then allele-specific histone
- modification is unlikely to be the reason for the high correlation between
- these histone marks.
- \end_layout
- \begin_layout Standard
- To test the hypothesis that H3K4me2 and H3K4me3 marks are occurring on the
- same histones.
- A double
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP
- \end_layout
- \end_inset
- experiment can be performed
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Jin2007"
- literal "false"
- \end_inset
- .
- In this assay, the input DNA goes through two sequential immunoprecipitations
- with different antibodies: first the anti-H3K4me2 antibody, then the anti-H3K4m
- e3 antibody.
- Only bearing both histone marks, and the DNA associated with them, should
- be isolated.
- This can be followed by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- HTS
- \end_layout
- \end_inset
- to form a
- \begin_inset Quotes eld
- \end_inset
- double
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- \begin_inset Quotes erd
- \end_inset
- assay that can be used to identify DNA regions bound by the isolated histones
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Jin2009"
- literal "false"
- \end_inset
- .
- If peaks called from this double
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ChIP-seq
- \end_layout
- \end_inset
- assay are highly correlated with both H3K4me2 and H3K4me3 peaks, then this
- is strong evidence that the correlation between the two marks is actually
- caused by physical co-location on the same histone.
- \end_layout
- \begin_layout Chapter
- \begin_inset CommandInset label
- LatexCommand label
- name "chap:Improving-array-based-diagnostic"
- \end_inset
- Improving array-based diagnostics for transplant rejection by optimizing
- data preprocessing
- \end_layout
- \begin_layout Standard
- \size large
- Ryan C.
- Thompson, Sunil M.
- Kurian, Thomas Whisnant, Padmaja Natarajan, Daniel R.
- Salomon
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Introduction
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Fill this out
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Arrays for diagnostics
- \end_layout
- \begin_layout Standard
- Arrays are an attractive platform for diagnostics
- \end_layout
- \begin_layout Subsection
- Proper pre-processing is essential for array data
- \end_layout
- \begin_layout Standard
- Microarrays, bead arrays, and similar assays produce raw data in the form
- of fluorescence intensity measurements, with each intensity measurement
- proportional to the abundance of some fluorescently labelled target DNA
- or RNA sequence that base pairs to a specific probe sequence.
- However, the fluorescence measurements for each probe are also affected
- my many technical confounding factors, such as the concentration of target
- material, strength of off-target binding, the sensitivity of the imaging
- sensor, and visual artifacts in the image.
- Some array designs also use multiple probe sequences for each target.
- Hence, extensive pre-processing of array data is necessary to normalize
- out the effects of these technical factors and summarize the information
- from multiple probes to arrive at a single usable estimate of abundance
- or other relevant quantity, such as a ratio of two abundances, for each
- target
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Gentleman2005"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- The choice of pre-processing algorithms used in the analysis of an array
- data set can have a large effect on the results of that analysis.
- However, despite their importance, these steps are often neglected or rushed
- in order to get to the more scientifically interesting analysis steps involving
- the actual biology of the system under study.
- Hence, it is often possible to achieve substantial gains in statistical
- power, model goodness-of-fit, or other relevant performance measures, by
- checking the assumptions made by each preprocessing step and choosing specific
- normalization methods tailored to the specific goals of the current analysis.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Subsection
- Clinical diagnostic applications for microarrays require single-channel
- normalization
- \end_layout
- \begin_layout Standard
- As the cost of performing microarray assays falls, there is increasing interest
- in using genomic assays for diagnostic purposes, such as distinguishing
-
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{TX}{healthy transplants (TX)}
- \end_layout
- \end_inset
- from transplants undergoing
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- or
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- .
- However, the the standard normalization algorithm used for microarray data,
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Irizarry2003a"
- literal "false"
- \end_inset
- , is not applicable in a clinical setting.
- Two of the steps in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , quantile normalization and probe summarization by median polish, depend
- on every array in the data set being normalized.
- This means that adding or removing any arrays from a data set changes the
- normalized values for all arrays, and data sets that have been normalized
- separately cannot be compared to each other.
- Hence, when using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , any arrays to be analyzed together must also be normalized together, and
- the set of arrays included in the data set must be held constant throughout
- an analysis.
- \end_layout
- \begin_layout Standard
- These limitations present serious impediments to the use of arrays as a
- diagnostic tool.
- When training a classifier, the samples to be classified must not be involved
- in any step of the training process, lest their inclusion bias the training
- process.
- Once a classifier is deployed in a clinical setting, the samples to be
- classified will not even
- \emph on
- exist
- \emph default
- at the time of training, so including them would be impossible even if
- it were statistically justifiable.
- Therefore, any machine learning application for microarrays demands that
- the normalized expression values computed for an array must depend only
- on information contained within that array.
- This would ensure that each array's normalization is independent of every
- other array, and that arrays normalized separately can still be compared
- to each other without bias.
- Such a normalization is commonly referred to as
- \begin_inset Quotes eld
- \end_inset
- single-channel normalization
- \begin_inset Quotes erd
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- addresses these concerns by replacing the quantile normalization and median
- polish with alternatives that do not introduce inter-array dependence,
- allowing each array to be normalized independently of all others
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Quantile normalization is performed against a pre-generated set of quantiles
- learned from a collection of 850 publicly available arrays sampled from
- a wide variety of tissues in
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{GEO}{the Gene Expression Omnibus (GEO)}
- \end_layout
- \end_inset
- .
- Each array's probe intensity distribution is normalized against these pre-gener
- ated quantiles.
- The median polish step is replaced with a robust weighted average of probe
- intensities, using inverse variance weights learned from the same public
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
- data.
- The result is a normalization that satisfies the requirements mentioned
- above: each array is normalized independently of all others, and any two
- normalized arrays can be compared directly to each other.
- \end_layout
- \begin_layout Standard
- One important limitation of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- is that it requires a separate reference data set from which to learn the
- parameters (reference quantiles and probe weights) that will be used to
- normalize each array.
- These parameters are specific to a given array platform, and pre-generated
- parameters are only provided for the most common platforms, such as Affymetrix
- hgu133plus2.
- For a less common platform, such as hthgu133pluspm, is is necessary to
- learn custom parameters from in-house data before
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- can be used to normalize samples on that platform
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- One other option is the aptly-named
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{SCAN}{Single Channel Array Normalization (SCAN)}
- \end_layout
- \end_inset
- , which adapts a normalization method originally designed for tiling arrays
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Piccolo2012"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- is truly single-channel in that it does not require a set of normalization
- parameters estimated from an external set of reference samples like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- does.
- \end_layout
- \begin_layout Subsection
- Heteroskedasticity must be accounted for in methylation array data
- \end_layout
- \begin_layout Standard
- DNA methylation arrays are a relatively new kind of assay that uses microarrays
- to measure the degree of methylation on cytosines in specific regions arrayed
- across the genome.
- First, bisulfite treatment converts all unmethylated cytosines to uracil
- (which are read as thymine during amplification and sequencing) while leaving
- methylated cytosines unaffected.
- Then, each target region is interrogated with two probes: one binds to
- the original genomic sequence and interrogates the level of methylated
- DNA, and the other binds to the same sequence with all cytosines replaced
- by thymidines and interrogates the level of unmethylated DNA.
- \end_layout
- \begin_layout Standard
- After normalization, these two probe intensities are summarized in one of
- two ways, each with advantages and disadvantages.
- β
- \series bold
-
- \series default
- values, interpreted as fraction of DNA copies methylated, range from 0 to
- 1.
- β
- \series bold
-
- \series default
- values are conceptually easy to interpret, but the constrained range makes
- them unsuitable for linear modeling, and their error distributions are
- highly non-normal, which also frustrates linear modeling.
-
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsdisp*{M-value}{M-values}
- \end_layout
- \end_inset
- , interpreted as the log ratios of methylated to unmethylated copies for
- each probe region, are computed by mapping the beta values from
- \begin_inset Formula $[0,1]$
- \end_inset
- onto
- \begin_inset Formula $(-\infty,+\infty)$
- \end_inset
- using a sigmoid curve (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This transformation results in values with better statistical properties:
- the unconstrained range is suitable for linear modeling, and the error
- distributions are more normal.
- Hence, most linear modeling and other statistical testing on methylation
- arrays is performed using
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/sigmoid.pdf
- lyxscale 50
- width 60col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Sigmoid shape of the mapping between β and M values.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Sigmoid-beta-m-mapping"
- \end_inset
- \series bold
- Sigmoid shape of the mapping between β and M values.
-
- \series default
- This mapping is monotonic and non-linear, but it is approximately linear
- in the neighborhood of
- \begin_inset Formula $(\beta=0.5,M=0)$
- \end_inset
- .
-
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- However, the steep slope of the sigmoid transformation near 0 and 1 tends
- to over-exaggerate small differences in β values near those extremes, which
- in turn amplifies the error in those values, leading to a U-shaped trend
- in the mean-variance curve: extreme values have higher variances than values
- near the middle.
- This mean-variance dependency must be accounted for when fitting the linear
- model for differential methylation, or else the variance will be systematically
- overestimated for probes with moderate
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- and underestimated for probes with extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- This is particularly undesirable for methylation data because the intermediate
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- are the ones of most interest, since they are more likely to represent
- areas of varying methylation, whereas extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- typically represent complete methylation or complete lack of methylation.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- read count data are also known to show heteroskedasticity, and the voom
- method was introduced for modeling this heteroskedasticity by estimating
- the mean-variance trend in the data and using this trend to assign precision
- weights to each observation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- .
- While methylation array data are not derived from counts and have a very
- different mean-variance relationship from that of typical
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- data, the voom method makes no specific assumptions on the shape of the
- mean-variance relationship – it only assumes that the relationship can
- be modeled as a smooth curve.
- Hence, the method is sufficiently general to model the mean-variance relationsh
- ip in methylation array data.
- However, while the method does not require count data as input, the standard
- implementation of voom assumes that the input is given in raw read counts,
- and it must be adapted to run on methylation
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- Evaluation of classifier performance with different normalization methods
- \end_layout
- \begin_layout Standard
- For testing different expression microarray normalizations, a data set of
- 157 hgu133plus2 arrays was used, consisting of blood samples from kidney
- transplant patients whose grafts had been graded as
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- , or
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- via biopsy and histology (46 TX, 69 AR, 42 ADNR)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "true"
- \end_inset
- .
- Additionally, an external validation set of 75 samples was gathered from
- public
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GEO
- \end_layout
- \end_inset
- data (37 TX, 38 AR, no ADNR).
-
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Find appropriate GEO identifiers if possible.
- Kurian 2014 says GSE15296, but this seems to be different data.
- I also need to look up the GEO accession for the external validation set.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the effect of each normalization on classifier performance,
- the same classifier training and validation procedure was used after each
- normalization method.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PAM
- \end_layout
- \end_inset
- algorithm was used to train a nearest shrunken centroid classifier on the
- training set and select the appropriate threshold for centroid shrinking
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Tibshirani2002"
- literal "false"
- \end_inset
- .
- Then the trained classifier was used to predict the class probabilities
- of each validation sample.
- From these class probabilities,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values were generated
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Turck2011"
- literal "false"
- \end_inset
- .
- Each normalization was tested on two different sets of training and validation
- samples.
- For internal validation, the 115
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- arrays in the internal set were split at random into two equal sized sets,
- one for training and one for validation, each containing the same numbers
- of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples as the other set.
- For external validation, the full set of 115
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples were used as a training set, and the 75 external
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples were used as the validation set.
- Thus, 2
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values were generated for each normalization method: one internal and one
- external.
- Because the external validation set contains no
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples, only classification of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- samples was considered.
- The
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples were included during normalization but excluded from all classifier
- training and validation.
- This ensures that the performance on internal and external validation sets
- is directly comparable, since both are performing the same task: distinguishing
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Summarize the get.best.threshold algorithm for PAM threshold selection, or
- just put the code online?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Six different normalization strategies were evaluated.
- First, 2 well-known non-single-channel normalization methods were considered:
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- and dChip
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2001,Irizarry2003a"
- literal "false"
- \end_inset
- .
- Since
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- produces expression values on a
- \begin_inset Formula $\log_{2}$
- \end_inset
- scale and dChip does not, the values from dChip were
- \begin_inset Formula $\log_{2}$
- \end_inset
- transformed after normalization.
- Next,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- and dChip followed by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- were tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Pelz2008"
- literal "false"
- \end_inset
- .
- Post-processing with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- does not turn
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- or dChip into single-channel methods, but it may help mitigate batch effects
- and is therefore useful as a benchmark.
- Lastly, the two single-channel normalization methods,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- , were tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010,Piccolo2012"
- literal "false"
- \end_inset
- .
- When evaluating internal validation performance, only the 157 internal
- samples were normalized; when evaluating external validation performance,
- all 157 internal samples and 75 external samples were normalized together.
- \end_layout
- \begin_layout Standard
- For demonstrating the problem with separate normalization of training and
- validation data, one additional normalization was performed: the internal
- and external sets were each normalized separately using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , and the normalized data for each set were combined into a single set with
- no further attempts at normalizing between the two sets.
- This represents approximately how
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- would have to be used in a clinical setting, where the samples to be classified
- are not available at the time the classifier is trained.
- \end_layout
- \begin_layout Subsection
- Generating custom fRMA vectors for hthgu133pluspm array platform
- \end_layout
- \begin_layout Standard
- In order to enable
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization for the hthgu133pluspm array platform, custom
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors were trained using the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- frmaTools
- \end_layout
- \end_inset
- package
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- Separate vectors were created for two types of samples: kidney graft biopsy
- samples and blood samples from graft recipients.
- For training, 341 kidney biopsy samples from 2 data sets and 965 blood
- samples from 5 data sets were used as the reference set.
- Arrays were groups into batches based on unique combinations of sample
- type (blood or biopsy), diagnosis (TX, AR, etc.), data set, and scan date.
- Thus, each batch represents arrays of the same kind that were run together
- on the same day.
- For estimating the probe inverse variance weights, frmaTools requires equal-siz
- ed batches, which means a batch size must be chosen, and then batches smaller
- than that size must be ignored, while batches larger than the chosen size
- must be downsampled.
- This downsampling is performed randomly, so the sampling process is repeated
- 5 times and the resulting normalizations are compared to each other.
- \end_layout
- \begin_layout Standard
- To evaluate the consistency of the generated normalization vectors, the
- 5
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- vector sets generated from 5 random batch samplings were each used to normalize
- the same 20 randomly selected samples from each tissue.
- Then the normalized expression values for each probe on each array were
- compared across all normalizations.
- Each
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization was also compared against the normalized expression values
- obtained by normalizing the same 20 samples with ordinary
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Modeling methylation array M-value heteroskedasticity with a modified voom
- implementation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Put code on Github and reference it.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate the whether DNA methylation could be used to distinguish
- between healthy and dysfunctional transplants, a data set of 78 Illumina
- 450k methylation arrays from human kidney graft biopsies was analyzed for
- differential methylation between 4 transplant statuses:
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- , transplants undergoing
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- , and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CAN
- \end_layout
- \end_inset
- .
- The data consisted of 33 TX, 9 AR, 8 ADNR, and 28 CAN samples.
- The uneven group sizes are a result of taking the biopsy samples before
- the eventual fate of the transplant was known.
- Each sample was additionally annotated with a donor
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ID
- \end_layout
- \end_inset
- (anonymized), sex, age, ethnicity, creatinine level, and diabetes diagnosis
- (all samples in this data set came from patients with either
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- or
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- ).
-
- \end_layout
- \begin_layout Standard
- The intensity data were first normalized using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SWAN
- \end_layout
- \end_inset
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Maksimovic2012"
- literal "false"
- \end_inset
- , then converted to intensity ratios (beta values)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Aryee2014"
- literal "false"
- \end_inset
- .
- Any probes binding to loci that overlapped annotated SNPs were dropped,
- and the annotated sex of each sample was verified against the sex inferred
- from the ratio of median probe intensities for the X and Y chromosomes.
- Then, the ratios were transformed to
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="6">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- random effect
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- eBayes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- weights
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- voom
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Summary of analysis variants for methylation array data.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Summary-of-meth-analysis"
- \end_inset
- \series bold
- Summary of analysis variants for methylation array data.
-
- \series default
- Each analysis included a different set of steps to adjust or account for
- various systematic features of the data.
- Random effect: The model included a random effect accounting for correlation
- between samples from the same patient
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- ; eBayes: Empirical bayes squeezing of per-probe variances toward the mean-varia
- nce trend
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2015"
- literal "false"
- \end_inset
- ; SVA: Surrogate variable analysis to account for unobserved confounders
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- ; Weights: Estimate sample weights to account for differences in sample
- quality
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liu2015,Ritchie2006"
- literal "false"
- \end_inset
- ; voom: Use mean-variance trend to assign individual sample weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- .
- See the text for a more detailed explanation of each step.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- From the
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- , a series of parallel analyses was performed, each adding additional steps
- into the model fit to accommodate a feature of the data (see Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Summary-of-meth-analysis"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For analysis A, a
- \begin_inset Quotes eld
- \end_inset
- basic
- \begin_inset Quotes erd
- \end_inset
- linear modeling analysis was performed, compensating for known confounders
- by including terms for the factor of interest (transplant status) as well
- as the known biological confounders: sex, age, ethnicity, and diabetes.
- Since some samples came from the same patients at different times, the
- intra-patient correlation was modeled as a random effect, estimating a
- shared correlation value across all probes
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- .
- Then the linear model was fit, and the variance was modeled using empirical
- Bayes squeezing toward the mean-variance trend
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2015"
- literal "false"
- \end_inset
- .
- Finally, t-tests or F-tests were performed as appropriate for each test:
- t-tests for single contrasts, and F-tests for multiple contrasts.
- P-values were corrected for multiple testing using the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BH
- \end_layout
- \end_inset
- procedure for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- For the analysis B,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- was used to infer additional unobserved sources of heterogeneity in the
- data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- .
- These surrogate variables were added to the design matrix before fitting
- the linear model.
- In addition, sample quality weights were estimated from the data and used
- during linear modeling to down-weight the contribution of highly variable
- arrays while increasing the weight to arrays with lower variability
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006"
- literal "false"
- \end_inset
- .
- The remainder of the analysis proceeded as in analysis A.
- For analysis C, the voom method was adapted to run on methylation array
- data and used to model and correct for the mean-variance trend using individual
- observation weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2014"
- literal "false"
- \end_inset
- , which were combined with the sample weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liu2015,Ritchie2006"
- literal "false"
- \end_inset
- .
- Each time weights were used, they were estimated once before estimating
- the random effect correlation value, and then the weights were re-estimated
- taking the random effect into account.
- The remainder of the analysis proceeded as in analysis B.
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Improve subsection titles in this section.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Reconsider subsection organization?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Separate normalization with RMA introduces unwanted biases in classification
- \end_layout
- \begin_layout Standard
- To demonstrate the problem with non-single-channel normalization methods,
- we considered the problem of training a classifier to distinguish
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- using the samples from the internal set as training data, evaluating performanc
- e on the external set.
- First, training and evaluation were performed after normalizing all array
- samples together as a single set using
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , and second, the internal samples were normalized separately from the external
- samples and the training and evaluation were repeated.
- For each sample in the validation set, the classifier probabilities from
- both classifiers were plotted against each other (Fig.
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- As expected, separate normalization biases the classifier probabilities,
- resulting in several misclassifications.
- In this case, the bias from separate normalization causes the classifier
- to assign a lower probability of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AR
- \end_layout
- \end_inset
- to every sample.
-
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/predplot.pdf
- lyxscale 50
- width 60col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Classifier probabilities on validation samples when normalized with RMA
- together vs.
- separately.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Classifier-probabilities-RMA"
- \end_inset
- \series bold
- Classifier probabilities on validation samples when normalized with RMA
- together vs.
- separately.
-
- \series default
- The PAM classifier algorithm was trained on the training set of arrays to
- distinguish AR from TX and then used to assign class probabilities to the
- validation set.
- The process was performed after normalizing all samples together and after
- normalizing the training and test sets separately, and the class probabilities
- assigned to each sample in the validation set were plotted against each
- other.
- Each axis indicates the posterior probability of AR assigned to a sample
- by the classifier in the specified analysis.
- The color of each point indicates the true classification of that sample.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- fRMA and SCAN maintain classification performance while eliminating dependence
- on normalization strategy
- \end_layout
- \begin_layout Standard
- For internal validation, the 6 methods' AUC values ranged from 0.816 to 0.891,
- as shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Among the non-single-channel normalizations, dChip outperformed
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , while
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- reduced the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values for both dChip and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- .
- Both single-channel methods,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- , slightly outperformed
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- ahead of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- .
- However, the difference between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- is still quite small.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-int"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , dChip, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- look very similar and relatively smooth, while both
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- curves and the curve for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- have a more jagged appearance.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-internal.pdf
- lyxscale 50
- height 40theight%
- groupId roc-pam
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-int"
- \end_inset
- ROC curves for PAM on internal validation data
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-external.pdf
- lyxscale 50
- height 40theight%
- groupId roc-pam
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-ext"
- \end_inset
- ROC curves for PAM on external validation data
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- ROC curves for PAM using different normalization strategies.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-main"
- \end_inset
- \series bold
- ROC curves for PAM using different normalization strategies.
-
- \series default
- ROC curves were generated for PAM classification of AR vs TX after 6 different
- normalization strategies applied to the same data sets.
- Only fRMA and SCAN are single-channel normalizations.
- The other normalizations are for comparison.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="7" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Normalization
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Single-channel?
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Internal Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- External Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.852
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.713
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.891
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.657
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.816
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.750
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.875
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.642
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- fRMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.863
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.718
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- SCAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.853
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.689
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- ROC curve AUC values for internal and external validation with 6 different
- normalization strategies.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:AUC-PAM"
- \end_inset
- \series bold
- ROC curve AUC values for internal and external validation with 6 different
- normalization strategies.
- \series default
- These AUC values correspond to the ROC curves in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-main"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- For external validation, as expected, all the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- values are lower than the internal validations, ranging from 0.642 to 0.750
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- With or without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- ,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- shows its dominance over dChip in this more challenging test.
- Unlike in the internal validation,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GRSN
- \end_layout
- \end_inset
- actually improves the classifier performance for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , although it does not for dChip.
- Once again, both single-channel methods perform about on par with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- performing slightly better and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- performing a bit worse.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-ext"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves for the external validation test.
- As expected, none of them are as clean-looking as the internal validation
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ROC
- \end_layout
- \end_inset
- curves.
- The curves for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , RMA+GRSN, and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- all look similar, while the other curves look more divergent.
- \end_layout
- \begin_layout Subsection
- fRMA with custom-generated vectors enables single-channel normalization
- on hthgu133pluspm platform
- \end_layout
- \begin_layout Standard
- In order to enable use of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- to normalize hthgu133pluspm, a custom set of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- vectors was created.
- First, an appropriate batch size was chosen by looking at the number of
- batches and number of samples included as a function of batch size (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:frmatools-batch-size"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- For a given batch size, all batches with fewer samples that the chosen
- size must be ignored during training, while larger batches must be randomly
- downsampled to the chosen size.
- Hence, the number of samples included for a given batch size equals the
- batch size times the number of batches with at least that many samples.
- From Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:batch-size-samples"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , it is apparent that a batch size of 8 maximizes the number of samples
- included in training.
- Increasing the batch size beyond this causes too many smaller batches to
- be excluded, reducing the total number of samples for both tissue types.
- However, a batch size of 8 is not necessarily optimal.
- The article introducing frmaTools concluded that it was highly advantageous
- to use a smaller batch size in order to include more batches, even at the
- cost of including fewer total samples in training
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- To strike an appropriate balance between more batches and more samples,
- a batch size of 5 was chosen.
- For both blood and biopsy samples, this increased the number of batches
- included by 10, with only a modest reduction in the number of samples compared
- to a batch size of 8.
- With a batch size of 5, 26 batches of biopsy samples and 46 batches of
- blood samples were available.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_batches.pdf
- lyxscale 50
- height 35theight%
- groupId frmatools-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-batches"
- \end_inset
- \series bold
- Number of batches usable in fRMA probe weight learning as a function of
- batch size.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- placement tb
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_samples.pdf
- lyxscale 50
- height 35theight%
- groupId frmatools-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-samples"
- \end_inset
- \series bold
- Number of samples usable in fRMA probe weight learning as a function of
- batch size.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Effect of batch size selection on number of batches and number of samples
- included in fRMA probe weight learning.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:frmatools-batch-size"
- \end_inset
- \series bold
- Effect of batch size selection on number of batches and number of samples
- included in fRMA probe weight learning.
-
- \series default
- For batch sizes ranging from 3 to 15, the number of batches (a) and samples
- (b) included in probe weight training were plotted for biopsy (BX) and
- blood (PAX) samples.
- The selected batch size, 5, is marked with a dotted vertical line.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- training requires equal-size batches, larger batches are downsampled randomly.
- This introduces a nondeterministic step in the generation of normalization
- vectors.
- To show that this randomness does not substantially change the outcome,
- the random downsampling and subsequent vector learning was repeated 5 times,
- with a different random seed each time.
- 20 samples were selected at random as a test set and normalized with each
- of the 5 sets of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors as well as ordinary RMA, and the normalized expression
- values were compared across normalizations.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a summary of these comparisons for biopsy samples.
- Comparing RMA to each of the 5
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalizations, the distribution of log ratios is somewhat wide, indicating
- that the normalizations disagree on the expression values of a fair number
- of probe sets.
- In contrast, comparisons of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- against
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- , the vast majority of probe sets have very small log ratios, indicating
- a very high agreement between the normalized values generated by the two
- normalizations.
- This shows that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization's behavior is not very sensitive to the random downsampling
- of larger batches during training.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-BX-violin.pdf
- lyxscale 40
- height 90theight%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Violin plot of log ratios between normalizations for 20 biopsy samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-bx-violin"
- \end_inset
- \series bold
- Violin plot of log ratios between normalizations for 20 biopsy samples.
-
- \series default
- Each of 20 randomly selected samples was normalized with RMA and with 5
- different sets of fRMA vectors.
- The distribution of log ratios between normalized expression values, aggregated
- across all 20 arrays, was plotted for each pair of normalizations.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-PAX-violin.pdf
- lyxscale 40
- height 90theight%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-pax-violin"
- \end_inset
- \begin_inset Argument 1
- status open
- \begin_layout Plain Layout
- Violin plot of log ratios between normalizations for 20 blood samples.
- \end_layout
- \end_inset
- \series bold
- Violin plot of log ratios between normalizations for 20 blood samples.
-
- \series default
- Each of 20 randomly selected samples was normalized with RMA and with 5
- different sets of fRMA vectors.
- The distribution of log ratios between normalized expression values, aggregated
- across all 20 arrays, was plotted for each pair of normalizations.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows an MA plot of the RMA-normalized values against the fRMA-normalized
- values for the same probe sets and arrays, corresponding to the first row
- of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This MA plot shows that not only is there a wide distribution of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- , but the trend of
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- is dependent on the average normalized intensity.
- This is expected, since the overall trend represents the differences in
- the quantile normalization step.
- When running
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , only the quantiles for these specific 20 arrays are used, while for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- the quantile distribution is taking from all arrays used in training.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a similar MA plot comparing 2 different
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalizations, corresponding to the 6th row of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The MA plot is very tightly centered around zero with no visible trend.
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-pax-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-PAX-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- show exactly the same information for the blood samples, once again comparing
- the normalized expression values between normalizations for all probe sets
- across 20 randomly selected test arrays.
- Once again, there is a wider distribution of log ratios between RMA-normalized
- values and fRMA-normalized, and a much tighter distribution when comparing
- different
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalizations to each other, indicating that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- training process is robust to random batch sub-sampling for the blood samples
- as well.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-RMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-rma-frma"
- \end_inset
- RMA vs.
- fRMA for biopsy samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-fRMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-frma-frma"
- \end_inset
- fRMA vs fRMA for biopsy samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-RMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-rma-frma"
- \end_inset
- RMA vs.
- fRMA for blood samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-fRMA.fRMA-RASTER.png
- lyxscale 10
- width 45col%
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-frma-frma"
- \end_inset
- fRMA vs fRMA for blood samples.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Representative MA plots comparing RMA and custom fRMA normalizations.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Representative-MA-plots"
- \end_inset
- \series bold
- Representative MA plots comparing RMA and custom fRMA normalizations.
-
- \series default
- For each plot, 20 samples were normalized using 2 different normalizations,
- and then averages (A) and log ratios (M) were plotted between the two different
- normalizations for every probe.
- For the
- \begin_inset Quotes eld
- \end_inset
- fRMA vs fRMA
- \begin_inset Quotes erd
- \end_inset
- plots (b & d), two different fRMA normalizations using vectors from two
- independent batch samplings were compared.
- Density of points is represented by blue shading, and individual outlier
- points are plotted.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- SVA, voom, and array weights improve model fit for methylation array data
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-basic"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the relationship between the mean
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- and the standard deviation calculated for each probe in the methylation
- array data set.
- A few features of the data are apparent.
- First, the data are very strongly bimodal, with peaks in the density around
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- of +4 and -4.
- These modes correspond to methylation sites that are nearly 100% methylated
- and nearly 100% unmethylated, respectively.
- The strong bimodality indicates that a majority of probes interrogate sites
- that fall into one of these two categories.
- The points in between these modes represent sites that are either partially
- methylated in many samples, or are fully methylated in some samples and
- fully unmethylated in other samples, or some combination.
- The next visible feature of the data is the W-shaped variance trend.
- The upticks in the variance trend on either side are expected, based on
- the sigmoid transformation exaggerating small differences at extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- However, the uptick in the center is interesting: it indicates that sites
- that are not constitutively methylated or unmethylated have a higher variance.
- This could be a genuine biological effect, or it could be spurious noise
- that is only observable at sites with varying methylation.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Fix axis labels:
- \begin_inset Quotes eld
- \end_inset
- log2 M-value
- \begin_inset Quotes erd
- \end_inset
- is redundant because M-values are already log scale
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/meanvar-trends-PAGE1-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-basic"
- \end_inset
- Mean-variance trend for analysis A.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/meanvar-trends-PAGE1-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-aw"
- \end_inset
- Mean-variance trend for analysis B.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/meanvar-trends-PAGE2-CROP-RASTER.png
- lyxscale 15
- width 30col%
- groupId voomaw-subfig
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-voomaw"
- \end_inset
- Mean-variance trend after voom modeling in analysis C.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Mean-variance trend modeling in methylation array data.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:-Meanvar-trend-methyl"
- \end_inset
- \series bold
- Mean-variance trend modeling in methylation array data.
-
- \series default
- The estimated
- \begin_inset Formula $\log_{2}$
- \end_inset
- (standard deviation) for each probe is plotted against the probe's average
- M-value across all samples as a black point, with some transparency to
- make over-plotting more visible, since there are about 450,000 points.
- Density of points is also indicated by the dark blue contour lines.
- The prior variance trend estimated by eBayes is shown in light blue, while
- the lowess trend of the points is shown in red.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , we see the mean-variance trend for the same methylation array data, this
- time with surrogate variables and sample quality weights estimated from
- the data and included in the model.
- As expected, the overall average variance is smaller, since the surrogate
- variables account for some of the variance.
- In addition, the uptick in variance in the middle of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- range has disappeared, turning the W shape into a wide U shape.
- This indicates that the excess variance in the probes with intermediate
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- was explained by systematic variations not correlated with known covariates,
- and these variations were modeled by the surrogate variables.
- The result is a nearly flat variance trend for the entire intermediate
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- range from about -3 to +3.
- Note that this corresponds closely to the range within which the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- transformation shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- is nearly linear.
- In contrast, the excess variance at the extremes (greater than +3 and less
- than -3) was not
- \begin_inset Quotes eld
- \end_inset
- absorbed
- \begin_inset Quotes erd
- \end_inset
- by the surrogate variables and remains in the plot, indicating that this
- variation has no systematic component: probes with extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- are uniformly more variable across all samples, as expected.
-
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-voomaw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the mean-variance trend after fitting the model with the observation
- weights assigned by voom based on the mean-variance trend shown in Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- As expected, the weights exactly counteract the trend in the data, resulting
- in a nearly flat trend centered vertically at 1 (i.e.
- 0 on the log scale).
- This shows that the observations with extreme
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- have been appropriately down-weighted to account for the fact that the
- noise in those observations has been amplified by the non-linear
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- transformation.
- In turn, this gives relatively more weight to observations in the middle
- region, which are more likely to correspond to probes measuring interesting
- biology (not constitutively methylated or unmethylated).
- \end_layout
- \begin_layout Standard
- To determine whether any of the known experimental factors had an impact
- on data quality, the sample quality weights estimated from the data were
- tested for association with each of the experimental factors (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:weight-covariate-tests"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Diabetes diagnosis was found to have a potentially significant association
- with the sample weights, with a t-test p-value of
- \begin_inset Formula $1.06\times10^{-3}$
- \end_inset
- .
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:diabetes-sample-weights"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the distribution of sample weights grouped by diabetes diagnosis.
- The samples from patients with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- were assigned significantly lower weights than those from patients with
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- .
- This indicates that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- samples had an overall higher variance on average across all probes.
-
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="3">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Covariate
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Test used
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- p-value
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Transplant Status
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- F-test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.404
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Diabetes Diagnosis
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \emph on
- t
- \emph default
- -test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.00106
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Sex
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \emph on
- t
- \emph default
- -test
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.148
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Age
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- linear regression
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.212
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Association of sample weights with clinical covariates in methylation array
- data.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:weight-covariate-tests"
- \end_inset
- \series bold
- Association of sample weights with clinical covariates in methylation array
- data.
-
- \series default
- Computed sample quality log weights were tested for significant association
- with each of the variables in the model (1st column).
- An appropriate test was selected for each variable based on whether the
- variable had 2 categories (
- \emph on
- t
- \emph default
- -test), had more than 2 categories (F-test), or was numeric (linear regression).
- The test selected is shown in the 2nd column.
- P-values for association with the log weights are shown in the 3rd column.
- No multiple testing adjustment was performed for these p-values.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Redo the sample weight boxplot with notches, and remove fill colors
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/sample-weights-PAGE3-CROP.pdf
- lyxscale 50
- width 60col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Box-and-whiskers plot of sample quality weights grouped by diabetes diagnosis.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:diabetes-sample-weights"
- \end_inset
- \series bold
- Box-and-whiskers plot of sample quality weights grouped by diabetes diagnosis.
-
- \series default
- Samples were grouped based on diabetes diagnosis, and the distribution of
- sample quality weights for each diagnosis was plotted as a box-and-whiskers
- plot
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McGill1978"
- literal "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-num-signif"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the number of significantly differentially methylated probes reported
- by each analysis for each comparison of interest at an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- of 10%.
- As expected, the more elaborate analyses, B and C, report more significant
- probes than the more basic analysis A, consistent with the conclusions
- above that the data contain hidden systematic variations that must be modeled.
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the estimated number differentially methylated probes for each test
- from each analysis.
- This was computed by estimating the proportion of null hypotheses that
- were true using the method of
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- and subtracting that fraction from the total number of probes, yielding
- an estimate of the number of null hypotheses that are false based on the
- distribution of p-values across the entire dataset.
- Note that this does not identify which null hypotheses should be rejected
- (i.e.
- which probes are significant); it only estimates the true number of such
- probes.
- Once again, analyses B and C result it much larger estimates for the number
- of differentially methylated probes.
- In this case, analysis C, the only analysis that includes voom, estimates
- the largest number of differentially methylated probes for all 3 contrasts.
- If the assumptions of all the methods employed hold, then this represents
- a gain in statistical power over the simpler analysis A.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meth-p-value-histograms"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the p-value distributions for each test, from which the numbers in
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- were generated.
- The distributions for analysis A all have a dip in density near zero, which
- is a strong sign of a poor model fit.
- The histograms for analyses B and C are more well-behaved, with a uniform
- component stretching all the way from 0 to 1 representing the probes for
- which the null hypotheses is true (no differential methylation), and a
- zero-biased component representing the probes for which the null hypothesis
- is false (differentially methylated).
- These histograms do not indicate any major issues with the model fit.
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider transposing these tables
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 22
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 7
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 338
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 369
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 278
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-num-signif"
- \end_inset
- Number of probes significant at 10% FDR.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 10,063
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 11,225
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 27
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 12,674
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 13,086
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 966
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,039
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,955
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-est-nonnull"
- \end_inset
- Estimated number of non-null tests, using the method of averaging local
- FDR values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- .
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Estimates of degree of differential methylation in for each contrast in
- each analysis.
- \end_layout
- \end_inset
- \series bold
- Estimates of degree of differential methylation in for each contrast in
- each analysis.
-
- \series default
- For each of the analyses in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Summary-of-meth-analysis"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , these tables show the number of probes called significantly differentially
- methylated at a threshold of 10% FDR for each comparison between TX and
- the other 3 transplant statuses (a) and the estimated total number of probes
- that are differentially methylated (b).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis A
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis B
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \align center
- \series bold
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE1.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- AR vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE2.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- ADNR vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset space \hfill{}
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE3.pdf
- lyxscale 33
- width 30col%
- groupId meth-pval-hist
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \series bold
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- CAN vs.
- TX, Analysis C
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Probe p-value histograms for each contrast in each analysis.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meth-p-value-histograms"
- \end_inset
- \series bold
- Probe p-value histograms for each contrast in each analysis.
-
- \series default
- For each differential methylation test of interest, the distribution of
- p-values across all probes is plotted as a histogram.
- The red solid line indicates the density that would be expected under the
- null hypothesis for all probes (a
- \begin_inset Formula $\mathrm{Uniform}(0,1)$
- \end_inset
- distribution), while the blue dotted line indicates the fraction of p-values
- that actually follow the null hypothesis (
- \begin_inset Formula $\hat{\pi}_{0}$
- \end_inset
- ) estimated using the method of averaging local FDR values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Phipson2013Thesis"
- literal "false"
- \end_inset
- .
- A blue line is only shown in each plot if the estimate of
- \begin_inset Formula $\hat{\pi}_{0}$
- \end_inset
- for that p-value distribution is smaller than 1.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- If time allows, maybe generate the PCA plots before/after SVA effect subtraction
- ?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Subsection
- fRMA achieves clinically applicable normalization without sacrificing classifica
- tion performance
- \end_layout
- \begin_layout Standard
- As shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , improper normalization, particularly separate normalization of training
- and test samples, leads to unwanted biases in classification.
- In a controlled experimental context, it is always possible to correct
- this issue by normalizing all experimental samples together.
- However, because it is not feasible to normalize all samples together in
- a clinical context, a single-channel normalization is required.
-
- \end_layout
- \begin_layout Standard
- The major concern in using a single-channel normalization is that non-single-cha
- nnel methods can share information between arrays to improve the normalization,
- and single-channel methods risk sacrificing the gains in normalization
- accuracy that come from this information sharing.
- In the case of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- , this information sharing is accomplished through quantile normalization
- and median polish steps.
- The need for information sharing in quantile normalization can easily be
- removed by learning a fixed set of quantiles from external data and normalizing
- each array to these fixed quantiles, instead of the quantiles of the data
- itself.
- As long as the fixed quantiles are reasonable, the result will be similar
- to standard
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- .
- However, there is no analogous way to eliminate cross-array information
- sharing in the median polish step, so
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- replaces this with a weighted average of probes on each array, with the
- weights learned from external data.
- This step of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- has the greatest potential to diverge from RMA in undesirable ways.
- \end_layout
- \begin_layout Standard
- However, when run on real data,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- performed at least as well as
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- in both the internal validation and external validation tests.
- This shows that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- can be used to normalize individual clinical samples in a class prediction
- context without sacrificing the classifier performance that would be obtained
- by using the more well-established
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RMA
- \end_layout
- \end_inset
- for normalization.
- The other single-channel normalization method considered,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SCAN
- \end_layout
- \end_inset
- , showed some loss of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- AUC
- \end_layout
- \end_inset
- in the external validation test.
- Based on these results,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- is the preferred normalization for clinical samples in a class prediction
- context.
- \end_layout
- \begin_layout Subsection
- Robust fRMA vectors can be generated for new array platforms
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look up the exact numbers, do a find & replace for
- \begin_inset Quotes eld
- \end_inset
- 850
- \begin_inset Quotes erd
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The published
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors for the hgu133plus2 platform were generated from
- a set of about 850 samples chosen from a wide range of tissues, which the
- authors determined was sufficient to generate a robust set of normalization
- vectors that could be applied across all tissues
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Since we only had hthgu133pluspm for 2 tissues of interest, our needs were
- more modest.
- Even using only 130 samples in 26 batches of 5 samples each for kidney
- biopsies, we were able to train a robust set of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors that were not meaningfully affected by the random
- selection of 5 samples from each batch.
- As expected, the training process was just as robust for the blood samples
- with 230 samples in 46 batches of 5 samples each.
- Because these vectors were each generated using training samples from a
- single tissue, they are not suitable for general use, unlike the vectors
- provided with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- itself.
- They are purpose-built for normalizing a specific type of sample on a specific
- platform.
- This is a mostly acceptable limitation in the context of developing a machine
- learning classifier for diagnosing a disease based on samples of a specific
- tissue.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Talk about how these vectors can be used for any data from these tissues
- on this platform even though they were custom made for this data set.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- How to bring up that these custom vectors were used in another project by
- someone else that was never published?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Methylation array data can be successfully analyzed using existing techniques,
- but machine learning poses additional challenges
- \end_layout
- \begin_layout Standard
- Both analysis strategies B and C both yield a reasonable analysis, with
- a mean-variance trend that matches the expected behavior for the non-linear
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- transformation (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) and well-behaved p-value distributions (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meth-p-value-histograms"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- These two analyses also yield similar numbers of significant probes (Table
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-num-signif"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ) and similar estimates of the number of differentially methylated probes
- (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:methyl-est-nonnull"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The main difference between these two analyses is the method used to account
- for the mean-variance trend.
- In analysis B, the trend is estimated and applied at the probe level: each
- probe's estimated variance is squeezed toward the trend using an empirical
- Bayes procedure (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-aw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- In analysis C, the trend is still estimated at the probe level, but instead
- of estimating a single variance value shared across all observations for
- a given probe, the voom method computes an initial estimate of the variance
- for each observation individually based on where its model-fitted
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- falls on the trend line and then assigns inverse-variance weights to model
- the difference in variance between observations.
- An overall variance is still estimated for each probe using the same empirical
- Bayes method, but now the residual trend is flat (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:meanvar-sva-voomaw"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ), indicating that the mean-variance trend is adequately modeled by scaling
- the estimated variance for each observation using the weights computed
- by voom.
-
- \end_layout
- \begin_layout Standard
- The difference between the standard empirical Bayes trended variance modeling
- (analysis B) and voom (analysis C) is analogous to the difference between
- a t-test with equal variance and a t-test with unequal variance, except
- that the unequal group variances used in the latter test are estimated
- based on the mean-variance trend from all the probes rather than the data
- for the specific probe being tested, thus stabilizing the group variance
- estimates by sharing information between probes.
- Allowing voom to model the variance using observation weights in this manner
- allows the linear model fit to concentrate statistical power where it will
- do the most good.
- For example, if a particular probe's
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- are always at the extreme of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- range (e.g.
- less than -4) for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples, but the
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- for that probe in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CAN
- \end_layout
- \end_inset
- samples are within the flat region of the mean-variance trend (between
-
- \begin_inset Formula $-3$
- \end_inset
- and
- \begin_inset Formula $+3$
- \end_inset
- ), voom is able to down-weight the contribution of the high-variance
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- M-value
- \end_layout
- \end_inset
- from the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples in order to gain more statistical power while testing for differential
- methylation between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- CAN
- \end_layout
- \end_inset
- .
- In contrast, modeling the mean-variance trend only at the probe level would
- combine the high-variance
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ADNR
- \end_layout
- \end_inset
- samples and lower-variance samples from other conditions and estimate an
- intermediate variance for this probe.
- In practice, analysis B shows that this approach is adequate, but the voom
- approach in analysis C performs at least as well on all model fit criteria
- and yields a larger estimate for the number of differentially methylated
- genes,
- \emph on
- and
- \emph default
- it matches up slightly better with the theoretical properties of the data.
- \end_layout
- \begin_layout Standard
- The significant association of diabetes diagnosis with sample quality is
- interesting.
- The samples with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- tended to have more variation, averaged across all probes, than those with
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- .
- This is consistent with the consensus that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T2D
- \end_layout
- \end_inset
- and the associated metabolic syndrome represent a broad dysregulation of
- the body's endocrine signaling related to metabolism
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Volkmar2012,Hall2018,Yokoi2018"
- literal "false"
- \end_inset
- .
- This dysregulation could easily manifest as a greater degree of variation
- in the DNA methylation patterns of affected tissues.
- In contrast,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- T1D
- \end_layout
- \end_inset
- has a more specific cause and effect, so a less variable methylation signature
- is expected.
- \end_layout
- \begin_layout Standard
- This preliminary analysis suggests that some degree of differential methylation
- exists between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TX
- \end_layout
- \end_inset
- and each of the three types of transplant disfunction studied.
- Hence, it may be feasible to train a classifier to diagnose transplant
- disfunction from DNA methylation array data.
- However, the major importance of both
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- and sample quality weighting for proper modeling of this data poses significant
- challenges for any attempt at a machine learning on data of similar quality.
- While these are easily used in a modeling context with full sample information,
- neither of these methods is directly applicable in a machine learning context,
- where the diagnosis is not known ahead of time.
- If a machine learning approach for methylation-based diagnosis is to be
- pursued, it will either require machine-learning-friendly methods to address
- the same systematic trends in the data that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- and sample quality weighting address, or it will require higher quality
- data with substantially less systematic perturbation of the data.
- \end_layout
- \begin_layout Section
- Future Directions
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Some work was already being done with the existing fRMA vectors.
- Do I mention that here?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Improving fRMA to allow training from batches of unequal size
- \end_layout
- \begin_layout Standard
- Because the tools for building
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- normalization vectors require equal-size batches, many samples must be
- discarded from the training data.
- This is undesirable for a few reasons.
- First, more data is simply better, all other things being equal.
- In this case,
- \begin_inset Quotes eld
- \end_inset
- better
- \begin_inset Quotes erd
- \end_inset
- means a more precise estimate of normalization parameters.
- In addition, the samples to be discarded must be chosen arbitrarily, which
- introduces an unnecessary element of randomness into the estimation process.
- While the randomness can be made deterministic by setting a consistent
- random seed, the need for equal size batches also introduces a need for
- the analyst to decide on the appropriate trade-off between batch size and
- the number of batches.
- This introduces an unnecessary and undesirable
- \begin_inset Quotes eld
- \end_inset
- researcher degree of freedom
- \begin_inset Quotes erd
- \end_inset
- into the analysis, since the generated normalization vectors now depend
- on the choice of batch size based on vague selection criteria and instinct,
- which can unintentionally introduce bias if the researcher chooses a batch
- size based on what seems to yield the most favorable downstream results
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Simmons2011"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- Fortunately, the requirement for equal-size batches is not inherent to the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- fRMA
- \end_layout
- \end_inset
- algorithm but rather a limitation of the implementation in the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- frmaTools
- \end_layout
- \end_inset
- package.
- In personal communication, the package's author, Matthew McCall, has indicated
- that with some work, it should be possible to improve the implementation
- to work with batches of unequal sizes.
- The current implementation ignores the batch size when calculating with-batch
- and between-batch residual variances, since the batch size constant cancels
- out later in the calculations as long as all batches are of equal size.
- Hence, the calculations of these parameters would need to be modified to
- remove this optimization and properly calculate the variances using the
- full formula.
- Once this modification is made, a new strategy would need to be developed
- for assessing the stability of parameter estimates, since the random sub-sampli
- ng step is eliminated, meaning that different sub-samplings can no longer
- be compared as in Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:frma-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Representative-MA-plots"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Bootstrap resampling is likely a good candidate here: sample many training
- sets of equal size from the existing training set with replacement, estimate
- parameters from each resampled training set, and compare the estimated
- parameters between bootstraps in order to quantify the variability in each
- parameter's estimation.
- \end_layout
- \begin_layout Subsection
- Developing methylation arrays as a diagnostic tool for kidney transplant
- rejection
- \end_layout
- \begin_layout Standard
- The current study has showed that DNA methylation, as assayed by Illumina
- 450k methylation arrays, has some potential for diagnosing transplant dysfuncti
- ons, including rejection.
- However, very few probes could be confidently identified as differentially
- methylated between healthy and dysfunctional transplants.
- One likely explanation for this is the predominant influence of unobserved
- confounding factors.
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- can model and correct for such factors, but the correction can never be
- perfect, so some degree of unwanted systematic variation will always remain
- after
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- correction.
- If the effect size of the confounding factors was similar to that of the
- factor of interest (in this case, transplant status), this would be an
- acceptable limitation, since removing most of the confounding factors'
- effects would allow the main effect to stand out.
- However, in this data set, the confounding factors have a much larger effect
- size than transplant status, which means that the small degree of remaining
- variation not removed by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- can still swamp the effect of interest, making it difficult to detect.
- This is, of course, a major issue when the end goal is to develop a classifier
- to diagnose transplant rejection from methylation data, since batch-correction
- methods like
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- that work in a linear modeling context cannot be applied in a machine learning
- context.
- \end_layout
- \begin_layout Standard
- Currently, the source of these unwanted systematic variations in the data
- is unknown.
- The best solution would be to determine the cause of the variation and
- eliminate it, thereby eliminating the need to model and remove that variation.
- However, if this proves impractical, another option is to use
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- SVA
- \end_layout
- \end_inset
- to identify probes that are highly associated with the surrogate variables
- that describe the unwanted variation in the data.
- These probes could be discarded prior to classifier training, in order
- to maximize the chance that the training algorithm will be able to identify
- highly predictive probes from those remaining.
- Lastly, it is possible that some of this unwanted variation is a result
- of the array-based assay being used and would be eliminated by switching
- to assaying DNA methylation using bisulphite sequencing.
- However, this carries the risk that the sequencing assay will have its
- own set of biases that must be corrected for in a different way.
- \end_layout
- \begin_layout Chapter
- \begin_inset CommandInset label
- LatexCommand label
- name "chap:Globin-blocking-cyno"
- \end_inset
- Globin-blocking for more effective blood RNA-seq analysis in primate animal
- model
- \end_layout
- \begin_layout Standard
- \size large
- Ryan C.
- Thompson, Terri Gelbart, Steven R.
- Head, Phillip Ordoukhanian, Courtney Mullen, Dongmei Han, Dora Berman,
- Amelia Bartholomew, Norma Kenyon, Daniel R.
- Salomon
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Choose between above and the paper title: Optimizing yield of deep RNA sequencin
- g for gene expression profiling by globin reduction of peripheral blood
- samples from cynomolgus monkeys (
- \emph on
- Macaca fascicularis
- \emph default
- ).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section*
- Abstract
- \end_layout
- \begin_layout Paragraph
- Background
- \end_layout
- \begin_layout Standard
- Primate blood contains high concentrations of globin
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- .
- Globin reduction is a standard technique used to improve the expression
- results obtained by DNA microarrays on RNA from blood samples.
- However, with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- quickly replacing microarrays for many applications, the impact of globin
- reduction for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- is less well-studied.
- Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
- primates.
- \end_layout
- \begin_layout Paragraph
- Results
- \end_layout
- \begin_layout Standard
- Here we report a protocol for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in primate blood samples that uses complimentary
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- to block reverse transcription of the alpha and beta globin genes.
- In test samples from cynomolgus monkeys (
- \emph on
- Macaca fascicularis
- \emph default
- ), this
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol approximately doubles the yield of informative (non-globin) reads
- by greatly reducing the fraction of globin reads, while also improving
- the consistency in sequencing depth between samples.
- The increased yield enables detection of about 2000 more genes, significantly
- increases the correlation in measured gene expression levels between samples,
- and increases the sensitivity of differential gene expression tests.
- \end_layout
- \begin_layout Paragraph
- Conclusions
- \end_layout
- \begin_layout Standard
- These results show that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- significantly improves the cost-effectiveness of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in primate blood samples by doubling the yield of useful reads, allowing
- detection of more genes, and improving the precision of gene expression
- measurements.
- Based on these results, a globin reducing or blocking protocol is recommended
- for all
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- studies of primate blood samples.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Introduction
- \end_layout
- \begin_layout Standard
- As part of a multi-lab PO1 grant to study
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- MSC
- \end_layout
- \end_inset
- infusion as a treatment for graft rejection in cynomolgus monkeys (
- \emph on
- Macaca fascicularis
- \emph default
- ), a large number of serial blood draws from cynomolgus monkeys were planned
- in order to monitor the progress of graft healing and eventual rejection
- after transplantation.
- In order to streamline the process of performing
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- on these blood samples, we developed a custom sequencing protocol.
- In the developement of this protocol, we required a solution for the problem
- of excess globin reads.
- High fractions of globin
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- are naturally present in mammalian peripheral blood samples (up to 70%
- of total
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- ) and these are known to interfere with the results of array-based expression
- profiling
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- Globin reduction is also necessary for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of blood samples, though for unrelated reasons: without globin reduction,
- many
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- reads will be derived from the globin genes, leaving fewer for the remainder
- of the genes in the transcriptome.
- However, existing strategies for globin reduction require an additional
- step during sample preparation to deplete the population of globin transcripts
- from the sample prior to reverse transcription
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012,Choi2014,Shin2014"
- literal "false"
- \end_inset
- .
- Furthermore, off-the-shelf globin reduction kits are generally targeted
- at human or mouse globin, not cynomolgus monkey, and sequence identity
- between human and cyno globin genes cannot be automatically assumed.
- Hence, we sought to incorporate a custom globin reduction method into our
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- protocol purely by adding additional reagents to an existing step in the
- sample preparation.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Consider putting some of this in the Intro chapter
- \end_layout
- \begin_layout Itemize
- Cynomolgus monkeys as a model organism
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Highly related to humans
- \end_layout
- \begin_layout Itemize
- Small size and short life cycle - good research animal
- \end_layout
- \begin_layout Itemize
- Genomics resources still in development
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Inadequacy of existing blood RNA-seq protocols
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Existing protocols use a separate globin pulldown step, slowing down processing
- \end_layout
- \end_deeper
- \end_inset
- \end_layout
- \begin_layout Standard
- We evaluated globin reduction for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- by blocking reverse transcription of globin transcripts using custom blocking
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- .
- We demonstrate that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- significantly improves the cost-effectiveness of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- in blood samples.
- Thus, our protocol offers a significant advantage to any investigator planning
- to use
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- for gene expression profiling of nonhuman primate blood samples.
- Our method can be generally applied to any species by designing complementary
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- blocking probes to the globin gene sequences of that species.
- Indeed, any highly expressed but biologically uninformative transcripts
- can also be blocked to further increase sequencing efficiency and value
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Arnaud2016"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- Sample collection
- \end_layout
- \begin_layout Standard
- All research reported here was done under IACUC-approved protocols at the
- University of Miami and complied with all applicable federal and state
- regulations and ethical principles for nonhuman primate research.
- Blood draws occurred between 16
- \begin_inset space ~
- \end_inset
- April
- \begin_inset space ~
- \end_inset
- 2012 and 18
- \begin_inset space ~
- \end_inset
- June
- \begin_inset space ~
- \end_inset
- 2015.
- The experimental system involved intrahepatic pancreatic islet transplantation
- into Cynomolgus monkeys with induced diabetes mellitus with or without
- concomitant infusion of mesenchymal stem cells.
- Blood was collected at serial time points before and after transplantation
- into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
- precise volume:volume ratio of 2.5
- \begin_inset space ~
- \end_inset
- ml whole blood into 6.9
- \begin_inset space ~
- \end_inset
- ml of PAX gene additive.
- \end_layout
- \begin_layout Subsection
- Globin blocking oligonucleotide design
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- HBA1 and HBA2 is wrong for cyno?
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Four
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- were designed to hybridize to the
- \begin_inset Formula $3^{\prime}$
- \end_inset
- end of the transcripts for the Cynomolgus HBA1, HBA2 and HBB genes, with
- two hybridization sites for HBB and 2 sites for HBA (the chosen sites were
- identical in both HBA genes).
- All
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- were purchased from Sigma and were entirely composed of 2
- \begin_inset Formula $^{\prime}$
- \end_inset
- O-Me bases with a C3 spacer positioned at the
- \begin_inset Formula $3^{\prime}$
- \end_inset
- ends to prevent any polymerase mediated primer extension.
- \end_layout
- \begin_layout Description
- HBA1/2
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 1:
- \family typewriter
- GCCCACUCAGACUUUAUUCAAAG-C3spacer
- \end_layout
- \begin_layout Description
- HBA1/2
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 2:
- \family typewriter
- GGUGCAAGGAGGGGAGGAG-C3spacer
- \end_layout
- \begin_layout Description
- HBB
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 1:
- \family typewriter
- AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
- \end_layout
- \begin_layout Description
- HBB
- \begin_inset space ~
- \end_inset
- site
- \begin_inset space ~
- \end_inset
- 2:
- \family typewriter
- CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
- \end_layout
- \begin_layout Subsection
- RNA-seq library preparation
- \end_layout
- \begin_layout Standard
- Sequencing libraries were prepared with 200
- \begin_inset space ~
- \end_inset
- ng total RNA from each sample.
- Polyadenylated
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- mRNA
- \end_layout
- \end_inset
- was selected from 200
- \begin_inset space ~
- \end_inset
- ng aliquots of cynomolgus blood-derived total RNA using Ambion Dynabeads
- Oligo(dT)25 beads (Invitrogen) following the manufacturer’s recommended
- protocol.
- PolyA selected RNA was then combined with 8
- \begin_inset space ~
- \end_inset
- pmol of HBA1/2
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 1), 8
- \begin_inset space ~
- \end_inset
- pmol of HBA1/2
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 2), 12
- \begin_inset space ~
- \end_inset
- pmol of HBB
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 1) and 12
- \begin_inset space ~
- \end_inset
- pmol of HBB
- \begin_inset space ~
- \end_inset
- (site
- \begin_inset space ~
- \end_inset
- 2)
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- .
- In addition, 20
- \begin_inset space ~
- \end_inset
- pmol of RT primer containing a portion of the Illumina adapter sequence
- (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV) and 4
- \begin_inset space ~
- \end_inset
- \emph on
- μ
- \emph default
- L of 5X First Strand buffer (250
- \begin_inset space ~
- \end_inset
- mM Tris-HCl pH
- \begin_inset space ~
- \end_inset
- 8.3, 375
- \begin_inset space ~
- \end_inset
- mM KCl, 15
- \begin_inset space ~
- \end_inset
- mM
- \begin_inset Formula $\textrm{MgCl}_{2}$
- \end_inset
- ) were added in a total volume of 15
- \begin_inset space ~
- \end_inset
- µL.
- The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
- then placed on ice.
- This was followed by the addition of 2
- \begin_inset space ~
- \end_inset
- µL 0.1
- \begin_inset space ~
- \end_inset
- M DTT, 1
- \begin_inset space ~
- \end_inset
- µL RNaseOUT, 1
- \begin_inset space ~
- \end_inset
- µL 10
- \begin_inset space ~
- \end_inset
- mM dNTPs 10% biotin-16 aminoallyl-
- \begin_inset Formula $2^{\prime}$
- \end_inset
- - dUTP and 10% biotin-16 aminoallyl-
- \begin_inset Formula $2^{\prime}$
- \end_inset
- -dCTP (TriLink Biotech, San Diego, CA), 1
- \begin_inset space ~
- \end_inset
- µL Superscript II (200
- \begin_inset space ~
- \end_inset
- U/µL, Thermo-Fisher).
- A second “unblocked” library was prepared in the same way for each sample
- but replacing the blocking
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- with an equivalent volume of water.
- The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
- followed by incubation at 75°C for 10 minutes to inactivate the reverse
- transcriptase.
- \end_layout
- \begin_layout Standard
- The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
- ) following supplier’s recommended protocol.
- The cDNA/RNA hybrid was eluted in 25
- \begin_inset space ~
- \end_inset
- µL of 10
- \begin_inset space ~
- \end_inset
- mM Tris-HCl pH
- \begin_inset space ~
- \end_inset
- 8.0, and then bound to 25
- \begin_inset space ~
- \end_inset
- µL of M280 Magnetic Streptavidin beads washed per recommended protocol (Thermo-F
- isher).
- After 30 minutes of binding, beads were washed one time in 100
- \begin_inset space ~
- \end_inset
- µL 0.1
- \begin_inset space ~
- \end_inset
- N NaOH to denature and remove the bound RNA, followed by two 100
- \begin_inset space ~
- \end_inset
- µL washes with 1X TE buffer.
- \end_layout
- \begin_layout Standard
- Subsequent attachment of the
- \begin_inset Formula $5^{\prime}$
- \end_inset
- Illumina A adapter was performed by on-bead random primer extension of
- the following sequence (A-N8 primer:
- \family typewriter
- TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN
- \family default
- ).
- Briefly, beads were resuspended in a 20
- \begin_inset space ~
- \end_inset
- µL reaction containing 5
- \begin_inset space ~
- \end_inset
- µM A-N8 primer, 40
- \begin_inset space ~
- \end_inset
- mM Tris-HCl pH
- \begin_inset space ~
- \end_inset
- 7.5, 20
- \begin_inset space ~
- \end_inset
- mM
- \begin_inset Formula $\textrm{MgCl}_{2}$
- \end_inset
- , 50
- \begin_inset space ~
- \end_inset
- mM NaCl, 0.325
- \begin_inset space ~
- \end_inset
- U/µL Sequenase
- \begin_inset space ~
- \end_inset
- 2.0 (Affymetrix, Santa Clara, CA), 0.0025
- \begin_inset space ~
- \end_inset
- U/µL inorganic pyrophosphatase (Affymetrix) and 300
- \begin_inset space ~
- \end_inset
- µM each dNTP.
- Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
- times with 1X TE buffer (200
- \begin_inset space ~
- \end_inset
- µL).
- \end_layout
- \begin_layout Standard
- The magnetic streptavidin beads were resuspended in 34
- \begin_inset space ~
- \end_inset
- µL nuclease-free water and added directly to a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- tube.
- The two Illumina protocol-specified
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- primers were added at 0.53
- \begin_inset space ~
- \end_inset
- µM (Illumina TruSeq Universal Primer 1 and Illumina TruSeq barcoded
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- primer 2), along with 40
- \begin_inset space ~
- \end_inset
- µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycled
- as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
- 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
- \end_layout
- \begin_layout Standard
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- PCR
- \end_layout
- \end_inset
- products were purified with 1X Ampure Beads following manufacturer’s recommende
- d protocol.
- Libraries were then analyzed using the Agilent TapeStation and quantitation
- of desired size range was performed by “smear analysis”.
- Samples were pooled in equimolar batches of 16 samples.
- Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
- Gels; Thermo-Fisher).
- Products were cut between 250 and 350
- \begin_inset space ~
- \end_inset
- bp (corresponding to insert sizes of 130 to 230
- \begin_inset space ~
- \end_inset
- bp).
- Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
- t with 75
- \begin_inset space ~
- \end_inset
- bp read lengths.
-
- \end_layout
- \begin_layout Subsection
- Read alignment and counting
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- emergencystretch 3em
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Need to relax the justification parameters just for this paragraph, or else
- featureCounts can break out of the margin.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Reads were aligned to the cynomolgus genome using STAR
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilson2013,Dobin2012"
- literal "false"
- \end_inset
- .
- Counts of uniquely mapped reads were obtained for every gene in each sample
- with the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- featureCounts
- \end_layout
- \end_inset
- function from the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- Rsubread
- \end_layout
- \end_inset
- package, using each of the three possibilities for the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- strandSpecific
- \end_layout
- \end_inset
- option: sense, antisense, and unstranded
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liao2014"
- literal "false"
- \end_inset
- .
- A few artifacts in the cynomolgus genome annotation complicated read counting.
- First, no ortholog is annotated for alpha globin in the cynomolgus genome,
- presumably because the human genome has two alpha globin genes with nearly
- identical sequences, making the orthology relationship ambiguous.
- However, two loci in the cynomolgus genome are annotated as “hemoglobin
- subunit alpha-like” (LOC102136192 and LOC102136846).
- LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
- as protein-coding.
- Our globin reduction protocol was designed to include blocking of these
- two genes.
- Indeed, these two genes together have almost the same read counts in each
- library as the properly-annotated HBB gene and much larger counts than
- any other gene in the unblocked libraries, giving confidence that reads
- derived from the real alpha globin are mapping to both genes.
- Thus, reads from both of these loci were counted as alpha globin reads
- in all further analyses.
- The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
- 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
- If counting is not performed in stranded mode (or if a non-strand-specific
- sequencing protocol is used), many reads mapping to the globin gene will
- be discarded as ambiguous due to their overlap with this
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ncRNA
- \end_layout
- \end_inset
- gene, resulting in significant undercounting of globin reads.
- Therefore, stranded sense counts were used for all further analysis in
- the present study to insure that we accurately accounted for globin transcript
- reduction.
- However, we note that stranded reads are not necessary for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- using our protocol in standard practice.
-
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- emergencystretch 0em
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Normalization and exploratory data analysis
- \end_layout
- \begin_layout Standard
- Libraries were normalized by computing scaling factors using the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- package's
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TMM
- \end_layout
- \end_inset
- method
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010"
- literal "false"
- \end_inset
- .
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values were calculated using the
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- cpm
- \end_layout
- \end_inset
- function in
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- for individual samples and
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- aveLogCPM
- \end_layout
- \end_inset
- function for averages across groups of samples, using those functions’
- default prior count values to avoid taking the logarithm of 0.
- Genes were considered “present” if their average normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values across all libraries were at least
- \begin_inset Formula $-1$
- \end_inset
- .
- Normalizing for gene length was unnecessary because the sequencing protocol
- is
- \begin_inset Formula $3^{\prime}$
- \end_inset
- -biased and hence the expected read count for each gene is related to the
- transcript’s copy number but not its length.
- \end_layout
- \begin_layout Standard
- In order to assess the effect of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- on reproducibility, Pearson and Spearman correlation coefficients were
- computed between the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values for every pair of libraries within the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- non-GB groups, and
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- 's
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- estimateDisp
- \end_layout
- \end_inset
- function was used to compute
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
- dispersions separately for the two groups
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Chen2014"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Differential expression analysis
- \end_layout
- \begin_layout Standard
- All tests for differential gene expression were performed using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- , by first fitting a
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- NB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GLM
- \end_layout
- \end_inset
- to the counts and normalization factors and then performing a quasi-likelihood
- F-test with robust estimation of outlier gene dispersions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lund2012,Phipson2016"
- literal "false"
- \end_inset
- .
- To investigate the effects of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- on each gene, an additive model was fit to the full data with coefficients
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and Sample
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ID
- \end_layout
- \end_inset
- .
- To test the effect of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- on detection of differentially expressed genes, the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples and non-GB samples were each analyzed independently as follows:
- for each animal with both a pre-transplant and a post-transplant time point
- in the data set, the pre-transplant sample and the earliest post-transplant
- sample were selected, and all others were excluded, yielding a pre-/post-transp
- lant pair of samples for each animal (
- \begin_inset Formula $N=7$
- \end_inset
- animals with paired samples).
- These samples were analyzed for pre-transplant vs.
- post-transplant differential gene expression while controlling for inter-animal
- variation using an additive model with coefficients for transplant and
- animal
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ID
- \end_layout
- \end_inset
- .
- In all analyses, p-values were adjusted using the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BH
- \end_layout
- \end_inset
- procedure for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- control
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Itemize
- New blood RNA-seq protocol to block reverse transcription of globin genes
- \end_layout
- \begin_layout Itemize
- Blood RNA-seq time course after transplants with/without MSC infusion
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Subsection
- Globin blocking yields a larger and more consistent fraction of useful reads
- \end_layout
- \begin_layout Standard
- The objective of the present study was to validate a new protocol for deep
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of whole blood drawn into PaxGene tubes from cynomolgus monkeys undergoing
- islet transplantation, with particular focus on minimizing the loss of
- useful sequencing space to uninformative globin reads.
- The details of the analysis with respect to transplant outcomes and the
- impact of mesenchymal stem cell treatment will be reported in a separate
- manuscript (in preparation).
- To focus on the efficacy of our
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol, 37 blood samples, 16 from pre-transplant and 21 from post-transplant
- time points, were each prepped once with and once without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- , and were then sequenced on an Illumina NextSeq500 instrument.
- The number of reads aligning to each gene in the cynomolgus genome was
- counted.
- Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Fractions-of-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- summarizes the distribution of read fractions among the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB libraries.
- In the libraries with no
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- , globin reads made up an average of 44.6% of total input reads, while reads
- assigned to all other genes made up an average of 26.3%.
- The remaining reads either aligned to intergenic regions (that include
- long non-coding RNAs) or did not align with any annotated transcripts in
- the current build of the cynomolgus genome.
- In the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries, globin reads made up only 3.48% and reads assigned to all other
- genes increased to 50.4%.
- Thus,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- resulted in a 92.2% reduction in globin reads and a 91.6% increase in yield
- of useful non-globin reads.
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- afterpage{
- \end_layout
- \begin_layout Plain Layout
- \backslash
- begin{landscape}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- placement p
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Total Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Aligned Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 50.4% ± 6.82
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 3.48% ± 2.94
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 53.9% ± 6.81
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 89.7% ± 2.40
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 93.5% ± 5.25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 6.49% ± 5.25
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 26.3% ± 8.95
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 44.6% ± 16.6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 70.1% ± 9.38
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 90.7% ± 5.16
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 38.8% ± 17.1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 61.2% ± 17.1
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fractions of reads mapping to genomic features in GB and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Fractions-of-reads"
- \end_inset
- \series bold
- Fractions of reads mapping to genomic features in GB and non-GB samples.
-
- \series default
- All values are given as mean ± standard deviation.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- end{landscape}
- \end_layout
- \begin_layout Plain Layout
- }
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- This reduction is not quite as efficient as the previous analysis showed
- for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- Nonetheless, this degree of globin reduction is sufficient to nearly double
- the yield of useful reads.
- Thus,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- cuts the required sequencing effort (and costs) to achieve a target coverage
- depth by almost 50%.
- Consistent with this near doubling of yield, the average difference in
- un-normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- across all genes between the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries and non-GB libraries is approximately 1 (mean = 1.01, median =
- 1.08), an overall 2-fold increase.
- Un-normalized values are used here because the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- TMM
- \end_layout
- \end_inset
- normalization correctly identifies this 2-fold difference as biologically
- irrelevant and removes it.
- \end_layout
- \begin_layout Standard
- Another important aspect is that the standard deviations in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Fractions-of-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- are uniformly smaller in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples than the non-GB ones, indicating much greater consistency of yield.
- This is best seen in the percentage of non-globin reads as a fraction of
- total reads aligned to annotated genes (genic reads).
- For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
- the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples it ranges from 81.9% to 99.9% (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Fraction-of-genic-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure1-globin-fractions.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without GB.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Fraction-of-genic-reads"
- \end_inset
- \series bold
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without GB.
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- For each sample, counts were summed separately for all globin genes and
- for the remainder of the genes (non-globin genes), and the fraction of
- genic reads aligned to non-globin genes was computed.
- Each point represents an individual sample.
- Gray + signs indicate the means for globin-blocked libraries and unblocked
- libraries.
- The overall distribution for each group is represented as a notched box
- plot.
- Points are randomly spread vertically to avoid excessive overlapping.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Float lost issues
- \end_layout
- \end_inset
- ).
- This means that for applications where it is critical that each sample
- achieve a specified minimum coverage in order to provide useful information,
- it would be necessary to budget up to 10 times the sequencing depth per
- sample without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- , even though the average yield improvement for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- is only 2-fold, because every sample has a chance of being 90% globin and
- 10% useful reads.
- Hence, the more consistent behavior of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples makes planning an experiment easier and more efficient because
- it eliminates the need to over-sequence every sample in order to guard
- against the worst case of a high-globin fraction.
- \end_layout
- \begin_layout Subsection
- Globin blocking lowers the noise floor and allows detection of about 2000
- more low-expression genes
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Remove redundant titles from figures
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- yields more usable sequencing depth, it should also allow detection of
- more genes at any given threshold.
- When we looked at the distribution of average normalized
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values across all libraries for genes with at least one read assigned to
- them, we observed the expected bimodal distribution, with a high-abundance
- "signal" peak representing detected genes and a low-abundance "noise" peak
- representing genes whose read count did not rise above the noise floor
- (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Consistent with the 2-fold increase in raw counts assigned to non-globin
- genes, the signal peak for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples is shifted to the right relative to the non-GB signal peak.
- When all the samples are normalized together, this difference is normalized
- out, lining up the signal peaks, and this reveals that, as expected, the
- noise floor for the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples is about 2-fold lower.
- This greater separation between signal and noise peaks in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples means that low-expression genes should be more easily detected
- and more precisely quantified than in the non-GB samples.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure2-aveLogCPM-colored.pdf
- lyxscale 50
- height 60theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Distributions of average group gene abundances when normalized separately
- or together.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:logcpm-dists"
- \end_inset
- \series bold
- Distributions of average group gene abundances when normalized separately
- or together.
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- Genes with zero counts in all libraries were discarded.
- Libraries were normalized using the TMM method.
- Libraries were split into GB and non-GB groups and the average logCPM was
- computed.
- The distribution of average gene logCPM values was plotted for both groups
- using a kernel density plot to approximate a continuous distribution.
- The GB logCPM distributions are marked in red, non-GB in blue.
- The black vertical line denotes the chosen detection threshold of
- \begin_inset Formula $-1$
- \end_inset
- .
- Top panel: Libraries were split into GB and non-GB groups first and normalized
- separately.
- Bottom panel: Libraries were all normalized together first and then split
- into groups.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Based on these distributions, we selected a detection threshold of
- \begin_inset Formula $-1$
- \end_inset
- , which is approximately the leftmost edge of the trough between the signal
- and noise peaks.
- This represents the most liberal possible detection threshold that doesn't
- call substantial numbers of noise genes as detected.
- Among the full dataset, 13429 genes were detected at this threshold, and
- 22276 were not.
- When considering the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries and non-GB libraries separately and re-computing normalization
- factors independently within each group, 14535 genes were detected in the
-
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries while only 12460 were detected in the non-GB libraries.
- Thus,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- allowed the detection of 2000 extra genes that were buried under the noise
- floor without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- .
- This pattern of at least 2000 additional genes detected with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- was also consistent across a wide range of possible detection thresholds,
- from -2 to 3 (see Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Gene-detections"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure3-detection.pdf
- lyxscale 50
- width 70col%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Gene detections as a function of abundance thresholds in GB and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Gene-detections"
- \end_inset
- \series bold
- Gene detections as a function of abundance thresholds in GB and non-GB samples.
- \series default
- Average logCPM was computed by separate group normalization as described
- in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- for both the GB and non-GB groups, as well as for all samples considered
- as one large group.
- For each every integer threshold from
- \begin_inset Formula $-2$
- \end_inset
- to 3, the number of genes detected at or above that logCPM threshold was
- plotted for each group.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Globin blocking does not add significant additional noise or decrease sample
- quality
- \end_layout
- \begin_layout Standard
- One potential worry is that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol could perturb the levels of non-globin genes.
- There are two kinds of possible perturbations: systematic and random.
- The former is not a major concern for detection of differential expression,
- since a 2-fold change in every sample has no effect on the relative fold
- change between samples.
- In contrast, random perturbations would increase the noise and obscure
- the signal in the dataset, reducing the capacity to detect differential
- expression.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Standardize on
- \begin_inset Quotes eld
- \end_inset
- log2
- \begin_inset Quotes erd
- \end_inset
- notation
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The data do indeed show small systematic perturbations in gene levels (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-plot"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Other than the 3 designated alpha and beta globin genes, two other genes
- stand out as having especially large negative
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- : HBD and LOC1021365.
- HBD, delta globin, is most likely targeted by the blocking
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- due to high sequence homology with the other globin genes.
- LOC1021365 is the aforementioned
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- ncRNA
- \end_layout
- \end_inset
- that is reverse-complementary to one of the alpha-like genes and that would
- be expected to be removed during the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- step.
- All other genes appear in a cluster centered vertically at 0, and the vast
- majority of genes in this cluster show an absolute
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logFC
- \end_layout
- \end_inset
- of 0.5 or less.
- Nevertheless, many of these small perturbations are still statistically
- significant, indicating that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- likely cause very small but non-zero systematic perturbations in measured
- gene expression levels.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure4-maplot-colored.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- MA plot showing effects of GB on each gene's abundance.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-plot"
- \end_inset
- \series bold
- MA plot showing effects of GB on each gene's abundance.
-
- \series default
- All libraries were normalized together as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and genes with an average logCPM below
- \begin_inset Formula $-1$
- \end_inset
- were filtered out.
- Each remaining gene was tested for differential abundance with respect
- to
- \begin_inset Flex Glossary Term (glstext)
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- ’s quasi-likelihood F-test, fitting a NB GLM to table of read counts in
- each library.
- For each gene,
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- reported average logCPM, logFC, p-value, and BH-adjusted FDR.
- Each gene's logFC was plotted against its logCPM, colored by FDR.
- Red points are significant at
- \begin_inset Formula $≤10\%$
- \end_inset
- FDR, and blue are not significant at that threshold.
- The alpha and beta globin genes targeted for blocking are marked with large
- triangles, while all other genes are represented as small points.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Give these numbers the LaTeX math treatment
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the possibility of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- causing random perturbations and reducing sample quality, we computed the
- Pearson correlation between
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values for every pair of samples with and without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and plotted them against each other (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:gene-abundance-correlations"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The plot indicated that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries have higher sample-to-sample correlations than the non-GB libraries.
- Parametric and nonparametric tests for differences between the correlations
- with and without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- both confirmed that this difference was highly significant (2-sided paired
- t-test:
- \begin_inset Formula $t=37.2$
- \end_inset
- ,
- \begin_inset Formula $d.f.=665$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ; 2-sided Wilcoxon sign-rank test:
- \begin_inset Formula $V=2195$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ).
- Performing the same tests on the Spearman correlations gave the same conclusion
- (t-test:
- \begin_inset Formula $t=26.8$
- \end_inset
- ,
- \begin_inset Formula $d.f.=665$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ; sign-rank test:
- \begin_inset Formula $V=8781$
- \end_inset
- ,
- \begin_inset Formula $P\ll2.2\times10^{-16}$
- \end_inset
- ).
- The
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- package was used to compute the overall
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB libraries, and found that
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- resulted in a negligible increase in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- (0.417 with
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- vs.
- 0.400 without).
- The near equality of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- for both sets indicates that the higher correlations in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries are most likely a result of the increased yield of useful reads,
- which reduces the contribution of Poisson counting uncertainty to the overall
- variance of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- logCPM
- \end_layout
- \end_inset
- values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCarthy2012"
- literal "false"
- \end_inset
- .
- This improves the precision of expression measurements and more than offsets
- the negligible increase in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/globin-paper/figure5-corrplot.pdf
- lyxscale 50
- width 100col%
- groupId colfullwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Comparison of inter-sample gene abundance correlations with and without
- GB.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:gene-abundance-correlations"
- \end_inset
- \series bold
- Comparison of inter-sample gene abundance correlations with and without
- GB.
- \series default
- All libraries were normalized together as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and genes with an average logCPM less than
- \begin_inset Formula $-1$
- \end_inset
- were filtered out.
- Each gene’s logCPM was computed in each library using
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- 's
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- cpm
- \end_layout
- \end_inset
- function.
- For each pair of biological samples, the Pearson correlation between those
- samples' GB libraries was plotted against the correlation between the same
- samples’ non-GB libraries.
- Each point represents an unique pair of samples.
- The solid gray line shows a quantile-quantile plot of distribution of GB
- correlations vs.
- that of non-GB correlations.
- The thin dashed line is the identity line, provided for reference.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- More differentially expressed genes are detected with globin blocking
- \end_layout
- \begin_layout Standard
- To compare performance on differential gene expression tests, we took subsets
- of both the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB libraries with exactly one pre-transplant and one post-transplant
- sample for each animal that had paired samples available for analysis (
- \begin_inset Formula $N=7$
- \end_inset
- animals,
- \begin_inset Formula $N=14$
- \end_inset
- samples in each subset).
- The same test for pre- vs.
- post-transplant differential gene expression was performed on the same
- 7 pairs of samples from
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- libraries and non-GB libraries, in each case using an
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- FDR
- \end_layout
- \end_inset
- of 10% as the threshold of significance.
- Out of 12,954 genes that passed the detection threshold in both subsets,
- 358 were called significantly differentially expressed in the same direction
- in both sets; 1063 were differentially expressed in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- set only; 296 were differentially expressed in the non-GB set only; 2 genes
- were called significantly up in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- set but significantly down in the non-GB set; and the remaining 11,235
- were not called differentially expressed in either set.
- These data are summarized in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Comparison-of-significant"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The differences in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- calculated by
- \begin_inset Flex Code
- status open
- \begin_layout Plain Layout
- edgeR
- \end_layout
- \end_inset
- for these subsets of samples were negligible (
- \begin_inset Formula $\textrm{BCV}=0.302$
- \end_inset
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and 0.297 for non-GB).
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- No Globin Blocking
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Globin-Blocking
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 515
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 2
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 160
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 11235
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 136
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 548
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 127
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Comparison-of-significant"
- \end_inset
- \series bold
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \series default
- Up, Down: Genes significantly up/down-regulated in post-transplant samples
- relative to pre-transplant samples, with a false discovery rate of 10%
- or less.
- NS: Non-significant genes (false discovery rate greater than 10%).
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The key point is that the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- data results in substantially more differentially expressed calls than
- the non-GB data.
- Since there is no gold standard for this dataset, it is impossible to be
- certain whether this is due to under-calling of differential expression
- in the non-GB samples or over-calling in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples.
- However, given that both datasets are derived from the same biological
- samples and have nearly equal
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- BCV
- \end_layout
- \end_inset
- , it is more likely that the larger number of differential expression calls
- in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples are genuine detections that were enabled by the higher sequencing
- depth and measurement precision of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples.
- Note that the same set of genes was considered in both subsets, so the
- larger number of differentially expressed gene calls in the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- data set reflects a greater sensitivity to detect significant differential
- gene expression and not simply the larger total number of detected genes
- in
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- samples described earlier.
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Standard
- The original experience with whole blood gene expression profiling on DNA
- microarrays demonstrated that the high concentration of globin transcripts
- reduced the sensitivity to detect genes with relatively low expression
- levels, in effect, significantly reducing the sensitivity.
- To address this limitation, commercial protocols for globin reduction were
- developed based on strategies to block globin transcript amplification
- during labeling or physically removing globin transcripts by affinity bead
- methods
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- More recently, using the latest generation of labeling protocols and arrays,
- it was determined that globin reduction was no longer necessary to obtain
- sufficient sensitivity to detect differential transcript expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "NuGEN2010"
- literal "false"
- \end_inset
- .
- However, we are not aware of any publications using these currently available
- protocols with the latest generation of microarrays that actually compare
- the detection sensitivity with and without globin reduction.
- However, in practice this has now been adopted generally primarily driven
- by concerns for cost control.
- The main objective of our work was to directly test the impact of globin
- gene transcripts and a new
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- protocol for application to the newest generation of differential gene
- expression profiling determined using next generation sequencing.
-
- \end_layout
- \begin_layout Standard
- The challenge of doing global gene expression profiling in cynomolgus monkeys
- is that the current available arrays were never designed to comprehensively
- cover this genome and have not been updated since the first assemblies
- of the cynomolgus genome were published.
- Therefore, we determined that the best strategy for peripheral blood profiling
- was to perform deep
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- and inform the workflow using the latest available genome assembly and
- annotation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilson2013"
- literal "false"
- \end_inset
- .
- However, it was not immediately clear whether globin reduction was necessary
- for
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- or how much improvement in efficiency or sensitivity to detect differential
- gene expression would be achieved for the added cost and effort.
-
- \end_layout
- \begin_layout Standard
- Existing strategies for globin reduction involve degradation or physical
- removal of globin transcripts in a separate step prior to reverse transcription
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012,Choi2014,Shin2014"
- literal "false"
- \end_inset
- .
- This additional step adds significant time, complexity, and cost to sample
- preparation.
- Faced with the need to perform
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- on large numbers of blood samples we sought a solution to globin reduction
- that could be achieved purely by adding additional reagents during the
- reverse transcription reaction.
- Furthermore, we needed a globin reduction method specific to cynomolgus
- globin sequences that would work an organism for which no kit is available
- off the shelf.
- \end_layout
- \begin_layout Standard
- As mentioned above, the addition of
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
-
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- has a very small impact on measured expression levels of gene expression.
- However, this is a non-issue for the purposes of differential expression
- testing, since a systematic change in a gene in all samples does not affect
- relative expression levels between samples.
- However, we must acknowledge that simple comparisons of gene expression
- data obtained by
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- and non-GB protocols are not possible without additional normalization.
-
- \end_layout
- \begin_layout Standard
- More importantly,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- not only nearly doubles the yield of usable reads, it also increases inter-samp
- le correlation and sensitivity to detect differential gene expression relative
- to the same set of samples profiled without
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- .
- In addition,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- does not add a significant amount of random noise to the data.
-
- \begin_inset Flex Glossary Term (Capital)
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- thus represents a cost-effective and low-effort way to squeeze more data
- and statistical power out of the same blood samples and the same amount
- of sequencing.
- In conclusion,
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- greatly increases the yield of useful
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- reads mapping to the rest of the genome, with minimal perturbations in
- the relative levels of non-globin genes.
- Based on these results, globin transcript reduction using sequence-specific,
- complementary blocking
- \begin_inset Flex Glossary Term (pl)
- status open
- \begin_layout Plain Layout
- oligo
- \end_layout
- \end_inset
- is recommended for all deep
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- of cynomolgus and other nonhuman primate blood samples.
- \end_layout
- \begin_layout Section
- Future Directions
- \end_layout
- \begin_layout Standard
- One drawback of the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- method presented in this analysis is a poor yield of genic reads, only
- around 50%.
- In a separate experiment, the reagent mixture was modified so as to address
- this drawback, resulting in a method that produces an even better reduction
- in globin reads without reducing the overall fraction of genic reads.
- However, the data showing this improvement consists of only a few test
- samples, so the larger data set analyzed above was chosen in order to demonstra
- te the effectiveness of the method in reducing globin reads while preserving
- the biological signal.
- \end_layout
- \begin_layout Standard
- The motivation for developing a fast practical way to enrich for non-globin
- reads in cyno blood samples was to enable a large-scale
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- RNA-seq
- \end_layout
- \end_inset
- experiment investigating the effects of mesenchymal stem cell infusion
- on blood gene expression in cynomologus transplant recipients in a time
- course after transplantation.
- With the
- \begin_inset Flex Glossary Term
- status open
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- method in place, the way is now clear for this experiment to proceed.
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Chapter*
- Future Directions
- \end_layout
- \begin_layout Plain Layout
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- If there are any chapter-independent future directions, put them here.
- Otherwise, delete this section.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Chapter
- Closing remarks
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- \backslash
- addcontentsline{toc}{chapter}{Test}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- \backslash
- glsresetall
- \end_layout
- \end_inset
- \begin_inset Note Note
- status collapsed
- \begin_layout Plain Layout
- Reintroduce all abbreviations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \align center
- \begin_inset ERT
- status collapsed
- \begin_layout Plain Layout
- % Use "References" as the title of the Bibliography
- \end_layout
- \begin_layout Plain Layout
- \backslash
- renewcommand{
- \backslash
- bibname}{References}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset bibtex
- LatexCommand bibtex
- btprint "btPrintCited"
- bibfiles "code-refs,refs-PROCESSED"
- options "bibtotoc"
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Reference URLs that span pages have clickable links that include the page
- numbers and watermark.
- Try to fix that.
- \end_layout
- \end_inset
- \end_layout
- \end_body
- \end_document
|