thesis.lyx 249 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317
  1. #LyX 2.3 created this file. For more info see http://www.lyx.org/
  2. \lyxformat 544
  3. \begin_document
  4. \begin_header
  5. \save_transient_properties true
  6. \origin unavailable
  7. \textclass extbook
  8. \begin_preamble
  9. % List all used files in log output
  10. \listfiles
  11. % Add a DRAFT watermark
  12. \usepackage{draftwatermark}
  13. \SetWatermarkLightness{0.97}
  14. \SetWatermarkScale{1}
  15. % Set up required header format
  16. \usepackage{fancyhdr}
  17. \pagestyle{fancy}
  18. \renewcommand{\headrulewidth}{0pt}
  19. \rhead{}
  20. \lhead{}
  21. \rfoot{}
  22. \lfoot{}
  23. \cfoot{\thepage} % Page number bottom center
  24. % Allow FloatBarrier command
  25. \usepackage{placeins}
  26. % Allow landscape pages
  27. \usepackage{pdflscape}
  28. % Allow doing things after the end of the current page
  29. % (to avoid landscape figures breaking up text)
  30. \usepackage{afterpage}
  31. % This one breaks subfigs so it's disabled
  32. % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
  33. \end_preamble
  34. \use_default_options true
  35. \begin_modules
  36. todonotes
  37. \end_modules
  38. \maintain_unincluded_children false
  39. \language english
  40. \language_package default
  41. \inputencoding utf8
  42. \fontencoding default
  43. \font_roman "default" "default"
  44. \font_sans "default" "default"
  45. \font_typewriter "default" "default"
  46. \font_math "auto" "auto"
  47. \font_default_family default
  48. \use_non_tex_fonts false
  49. \font_sc false
  50. \font_osf false
  51. \font_sf_scale 100 100
  52. \font_tt_scale 100 100
  53. \use_microtype false
  54. \use_dash_ligatures true
  55. \graphics default
  56. \default_output_format pdf4
  57. \output_sync 0
  58. \bibtex_command biber
  59. \index_command default
  60. \paperfontsize 12
  61. \spacing double
  62. \use_hyperref true
  63. \pdf_bookmarks true
  64. \pdf_bookmarksnumbered false
  65. \pdf_bookmarksopen false
  66. \pdf_bookmarksopenlevel 1
  67. \pdf_breaklinks false
  68. \pdf_pdfborder false
  69. \pdf_colorlinks false
  70. \pdf_backref false
  71. \pdf_pdfusetitle true
  72. \papersize letterpaper
  73. \use_geometry true
  74. \use_package amsmath 1
  75. \use_package amssymb 1
  76. \use_package cancel 1
  77. \use_package esint 1
  78. \use_package mathdots 1
  79. \use_package mathtools 1
  80. \use_package mhchem 1
  81. \use_package stackrel 1
  82. \use_package stmaryrd 1
  83. \use_package undertilde 1
  84. \cite_engine biblatex
  85. \cite_engine_type authoryear
  86. \biblio_style plain
  87. \biblatex_bibstyle authoryear
  88. \biblatex_citestyle numeric
  89. \use_bibtopic false
  90. \use_indices false
  91. \paperorientation portrait
  92. \suppress_date false
  93. \justification true
  94. \use_refstyle 1
  95. \use_minted 0
  96. \index Index
  97. \shortcut idx
  98. \color #008000
  99. \end_index
  100. \leftmargin 1.5in
  101. \topmargin 1in
  102. \rightmargin 1in
  103. \bottommargin 1in
  104. \secnumdepth 3
  105. \tocdepth 3
  106. \paragraph_separation indent
  107. \paragraph_indentation default
  108. \is_math_indent 0
  109. \math_numbering_side default
  110. \quotes_style english
  111. \dynamic_quotes 0
  112. \papercolumns 1
  113. \papersides 1
  114. \paperpagestyle default
  115. \tracking_changes false
  116. \output_changes false
  117. \html_math_output 0
  118. \html_css_as_file 0
  119. \html_be_strict false
  120. \end_header
  121. \begin_body
  122. \begin_layout Title
  123. Bioinformatic analysis of complex, high-throughput genomic and epigenomic
  124. data in the context of immunology and transplant rejection
  125. \end_layout
  126. \begin_layout Author
  127. A thesis presented
  128. \begin_inset Newline newline
  129. \end_inset
  130. by
  131. \begin_inset Newline newline
  132. \end_inset
  133. Ryan C.
  134. Thompson
  135. \begin_inset Newline newline
  136. \end_inset
  137. to
  138. \begin_inset Newline newline
  139. \end_inset
  140. The Scripps Research Institute Graduate Program
  141. \begin_inset Newline newline
  142. \end_inset
  143. in partial fulfillment of the requirements for the degree of
  144. \begin_inset Newline newline
  145. \end_inset
  146. Doctor of Philosophy in the subject of Biology
  147. \begin_inset Newline newline
  148. \end_inset
  149. for
  150. \begin_inset Newline newline
  151. \end_inset
  152. The Scripps Research Institute
  153. \begin_inset Newline newline
  154. \end_inset
  155. La Jolla, California
  156. \end_layout
  157. \begin_layout Date
  158. October 2019
  159. \end_layout
  160. \begin_layout Standard
  161. [Copyright notice]
  162. \end_layout
  163. \begin_layout Standard
  164. [Thesis acceptance form]
  165. \end_layout
  166. \begin_layout Standard
  167. [Dedication]
  168. \end_layout
  169. \begin_layout Standard
  170. [Acknowledgements]
  171. \end_layout
  172. \begin_layout Standard
  173. \begin_inset CommandInset toc
  174. LatexCommand tableofcontents
  175. \end_inset
  176. \end_layout
  177. \begin_layout Standard
  178. \begin_inset FloatList table
  179. \end_inset
  180. \end_layout
  181. \begin_layout Standard
  182. \begin_inset FloatList figure
  183. \end_inset
  184. \end_layout
  185. \begin_layout Standard
  186. [List of Abbreviations]
  187. \end_layout
  188. \begin_layout Standard
  189. \begin_inset Flex TODO Note (inline)
  190. status open
  191. \begin_layout Plain Layout
  192. Look into auto-generated nomenclature list: https://wiki.lyx.org/Tips/Nomenclature
  193. \end_layout
  194. \end_inset
  195. \end_layout
  196. \begin_layout List of TODOs
  197. \end_layout
  198. \begin_layout Standard
  199. \begin_inset Flex TODO Note (inline)
  200. status open
  201. \begin_layout Plain Layout
  202. On final pass: Check all figures to make sure they fit on the page with
  203. their legends.
  204. \end_layout
  205. \end_inset
  206. \end_layout
  207. \begin_layout Standard
  208. \begin_inset Flex TODO Note (inline)
  209. status open
  210. \begin_layout Plain Layout
  211. Search and replace: naive -> naïve
  212. \end_layout
  213. \end_inset
  214. \end_layout
  215. \begin_layout Chapter*
  216. Abstract
  217. \end_layout
  218. \begin_layout Standard
  219. \begin_inset Note Note
  220. status open
  221. \begin_layout Plain Layout
  222. It is included as an integral part of the thesis and should immediately
  223. precede the introduction.
  224. \end_layout
  225. \begin_layout Plain Layout
  226. Preparing your Abstract.
  227. Your abstract (a succinct description of your work) is limited to 350 words.
  228. UMI will shorten it if they must; please do not exceed the limit.
  229. \end_layout
  230. \begin_layout Itemize
  231. Include pertinent place names, names of persons (in full), and other proper
  232. nouns.
  233. These are useful in automated retrieval.
  234. \end_layout
  235. \begin_layout Itemize
  236. Display symbols, as well as foreign words and phrases, clearly and accurately.
  237. Include transliterations for characters other than Roman and Greek letters
  238. and Arabic numerals.
  239. Include accents and diacritical marks.
  240. \end_layout
  241. \begin_layout Itemize
  242. Do not include graphs, charts, tables, or illustrations in your abstract.
  243. \end_layout
  244. \end_inset
  245. \end_layout
  246. \begin_layout Chapter
  247. Introduction
  248. \end_layout
  249. \begin_layout Section
  250. Background & Significance
  251. \end_layout
  252. \begin_layout Subsection
  253. Biological motivation
  254. \end_layout
  255. \begin_layout Itemize
  256. Rejection is the major long-term threat to organ and tissue grafts
  257. \end_layout
  258. \begin_deeper
  259. \begin_layout Itemize
  260. Common mechanisms of rejection
  261. \end_layout
  262. \begin_layout Itemize
  263. Effective immune suppression requires monitoring for rejection and tuning
  264. \end_layout
  265. \begin_layout Itemize
  266. Current tests for rejection (tissue biopsy) are invasive and biased
  267. \end_layout
  268. \begin_layout Itemize
  269. A blood test based on microarrays would be less biased and invasive
  270. \end_layout
  271. \end_deeper
  272. \begin_layout Itemize
  273. Memory cells are resistant to immune suppression
  274. \end_layout
  275. \begin_deeper
  276. \begin_layout Itemize
  277. Mechanisms of resistance in memory cells are poorly understood
  278. \end_layout
  279. \begin_layout Itemize
  280. A better understanding of immune memory formation is needed
  281. \end_layout
  282. \end_deeper
  283. \begin_layout Itemize
  284. Mesenchymal stem cell infusion is a promising new treatment to prevent/delay
  285. rejection
  286. \end_layout
  287. \begin_deeper
  288. \begin_layout Itemize
  289. Demonstrated in mice, but not yet in primates
  290. \end_layout
  291. \begin_layout Itemize
  292. Mechanism currently unknown, but MSC are known to be immune modulatory
  293. \end_layout
  294. \end_deeper
  295. \begin_layout Subsection
  296. Overview of bioinformatic analysis methods
  297. \end_layout
  298. \begin_layout Standard
  299. An overview of all the methods used, including what problem they solve,
  300. what assumptions they make, and a basic description of how they work.
  301. \end_layout
  302. \begin_layout Itemize
  303. ChIP-seq Peak calling
  304. \end_layout
  305. \begin_deeper
  306. \begin_layout Itemize
  307. Cross-correlation analysis to determine fragment size
  308. \end_layout
  309. \begin_layout Itemize
  310. Broad vs narrow peaks
  311. \end_layout
  312. \begin_layout Itemize
  313. SICER for broad peaks
  314. \end_layout
  315. \begin_layout Itemize
  316. IDR for biologically reproducible peaks
  317. \end_layout
  318. \begin_layout Itemize
  319. csaw peak filtering guidelines for unbiased downstream analysis
  320. \end_layout
  321. \end_deeper
  322. \begin_layout Itemize
  323. Normalization is non-trivial and application-dependant
  324. \end_layout
  325. \begin_deeper
  326. \begin_layout Itemize
  327. Expression arrays: RMA & fRMA; why fRMA is needed
  328. \end_layout
  329. \begin_layout Itemize
  330. Methylation arrays: M-value transformation approximates normal data but
  331. induces heteroskedasticity
  332. \end_layout
  333. \begin_layout Itemize
  334. RNA-seq: normalize based on assumption that the average gene is not changing
  335. \end_layout
  336. \begin_layout Itemize
  337. ChIP-seq: complex with many considerations, dependent on experimental methods,
  338. biological system, and analysis goals
  339. \end_layout
  340. \end_deeper
  341. \begin_layout Itemize
  342. Limma: The standard linear modeling framework for genomics
  343. \end_layout
  344. \begin_deeper
  345. \begin_layout Itemize
  346. empirical Bayes variance modeling: limma's core feature
  347. \end_layout
  348. \begin_layout Itemize
  349. edgeR & DESeq2: Extend with negative bonomial GLM for RNA-seq and other
  350. count data
  351. \end_layout
  352. \begin_layout Itemize
  353. voom: Extend with precision weights to model mean-variance trend
  354. \end_layout
  355. \begin_layout Itemize
  356. arrayWeights and duplicateCorrelation to handle complex variance structures
  357. \end_layout
  358. \end_deeper
  359. \begin_layout Itemize
  360. sva and ComBat for batch correction
  361. \end_layout
  362. \begin_layout Itemize
  363. Factor analysis: PCA, MDS, MOFA
  364. \end_layout
  365. \begin_deeper
  366. \begin_layout Itemize
  367. Batch-corrected PCA is informative, but careful application is required
  368. to avoid bias
  369. \end_layout
  370. \end_deeper
  371. \begin_layout Itemize
  372. Gene set analysis: camera and SPIA
  373. \end_layout
  374. \begin_layout Section
  375. Innovation
  376. \end_layout
  377. \begin_layout Itemize
  378. MSC infusion to improve transplant outcomes (prevent/delay rejection)
  379. \end_layout
  380. \begin_deeper
  381. \begin_layout Itemize
  382. Characterize MSC response to interferon gamma
  383. \end_layout
  384. \begin_layout Itemize
  385. IFN-g is thought to stimulate their function
  386. \end_layout
  387. \begin_layout Itemize
  388. Test IFN-g treated MSC infusion as a therapy to delay graft rejection in
  389. cynomolgus monkeys
  390. \end_layout
  391. \begin_layout Itemize
  392. Monitor animals post-transplant using blood RNA-seq at serial time points
  393. \end_layout
  394. \end_deeper
  395. \begin_layout Itemize
  396. Investigate dynamics of histone marks in CD4 T-cell activation and memory
  397. \end_layout
  398. \begin_deeper
  399. \begin_layout Itemize
  400. Previous studies have looked at single snapshots of histone marks
  401. \end_layout
  402. \begin_layout Itemize
  403. Instead, look at changes in histone marks across activation and memory
  404. \end_layout
  405. \end_deeper
  406. \begin_layout Itemize
  407. High-throughput sequencing and microarray technologies
  408. \end_layout
  409. \begin_deeper
  410. \begin_layout Itemize
  411. Powerful methods for assaying gene expression and epigenetics across entire
  412. genomes
  413. \end_layout
  414. \begin_layout Itemize
  415. Proper analysis requires finding and exploiting systematic genome-wide trends
  416. \end_layout
  417. \end_deeper
  418. \begin_layout Chapter
  419. Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
  420. in naive and memory CD4 T-cell activation
  421. \end_layout
  422. \begin_layout Standard
  423. \begin_inset Flex TODO Note (inline)
  424. status open
  425. \begin_layout Plain Layout
  426. Chapter author list: Me, Sarah, Dan
  427. \end_layout
  428. \end_inset
  429. \end_layout
  430. \begin_layout Standard
  431. \begin_inset Flex TODO Note (inline)
  432. status open
  433. \begin_layout Plain Layout
  434. Need better section titles throughout the chapter
  435. \end_layout
  436. \end_inset
  437. \end_layout
  438. \begin_layout Section
  439. Approach
  440. \end_layout
  441. \begin_layout Itemize
  442. CD4 T-cells are central to all adaptive immune responses and memory
  443. \end_layout
  444. \begin_layout Itemize
  445. H3K4 and H3K27 methylation are major epigenetic regulators of gene expression
  446. \end_layout
  447. \begin_layout Itemize
  448. Canonically, H3K4 is activating and H3K27 is inhibitory, but the reality
  449. is complex
  450. \end_layout
  451. \begin_layout Itemize
  452. Looking at these marks during CD4 activation and memory should reveal new
  453. mechanistic details
  454. \end_layout
  455. \begin_layout Itemize
  456. Test
  457. \begin_inset Quotes eld
  458. \end_inset
  459. poised promoter
  460. \begin_inset Quotes erd
  461. \end_inset
  462. hypothesis in which H3K4 and H3K27 are both methylated
  463. \end_layout
  464. \begin_layout Itemize
  465. Expand scope of analysis beyond simple promoter counts
  466. \end_layout
  467. \begin_deeper
  468. \begin_layout Itemize
  469. Analyze peaks genome-wide, including in intergenic regions
  470. \end_layout
  471. \begin_layout Itemize
  472. Analysis of coverage distribution shape within promoters, e.g.
  473. upstream vs downstream coverage
  474. \end_layout
  475. \end_deeper
  476. \begin_layout Section
  477. Methods
  478. \end_layout
  479. \begin_layout Standard
  480. \begin_inset Flex TODO Note (inline)
  481. status open
  482. \begin_layout Plain Layout
  483. Look up some more details from the papers (e.g.
  484. activation method).
  485. \end_layout
  486. \end_inset
  487. \end_layout
  488. \begin_layout Standard
  489. A reproducible workflow was written to analyze the raw ChIP-seq and RNA-seq
  490. data from previous studies
  491. \begin_inset CommandInset citation
  492. LatexCommand cite
  493. key "LaMere2016,LaMere2017,gh-cd4-csaw"
  494. literal "true"
  495. \end_inset
  496. .
  497. Briefly, this data consists of RNA-seq and ChIP-seq from CD4 T-cells cultured
  498. from 4 donors.
  499. From each donor, naive and memory CD4 T-cells were isolated separately.
  500. Then cultures of both cells were activated [how?], and samples were taken
  501. at 4 time points: Day 0 (pre-activation), Day 1 (early activation), Day
  502. 5 (peak activation), and Day 14 (post-activation).
  503. For each combination of cell type and time point, RNA was isolated, and
  504. ChIP-seq was performed for each of 3 histone marks: H3K4me2, H3K4me3, and
  505. H3K27me3.
  506. The ChIP-seq input was also sequenced for each sample.
  507. The result was 32 samples for each assay.
  508. \end_layout
  509. \begin_layout Subsection
  510. ChIP-seq alignment and peak calling
  511. \end_layout
  512. \begin_layout Standard
  513. \begin_inset Flex TODO Note (inline)
  514. status open
  515. \begin_layout Plain Layout
  516. All info from this subsection belongs in other subsections.
  517. \end_layout
  518. \end_inset
  519. \end_layout
  520. \begin_layout Standard
  521. Sequence reads were retrieved from the Sequence Read Archive (SRA)
  522. \begin_inset CommandInset citation
  523. LatexCommand cite
  524. key "Leinonen2011"
  525. literal "false"
  526. \end_inset
  527. .
  528. ChIP-seq (and input) reads were aligned to CRCh38 genome assembly using
  529. Bowtie 2
  530. \begin_inset CommandInset citation
  531. LatexCommand cite
  532. key "Langmead2012,Schneider2017,gh-hg38-ref"
  533. literal "false"
  534. \end_inset
  535. .
  536. Artifact regions were annotated using a custom implementation of the GreyListCh
  537. IP algorithm, and these
  538. \begin_inset Quotes eld
  539. \end_inset
  540. greylists
  541. \begin_inset Quotes erd
  542. \end_inset
  543. were merged with the ENCODE blacklist
  544. \begin_inset CommandInset citation
  545. LatexCommand cite
  546. key "greylistchip,Amemiya2019,Dunham2012"
  547. literal "false"
  548. \end_inset
  549. .
  550. Any read or peak overlapping one of these regions was regarded as artifactual
  551. and excluded from downstream analyses.
  552. \end_layout
  553. \begin_layout Standard
  554. Peaks are called using epic, an implementation of the SICER algorithm
  555. \begin_inset CommandInset citation
  556. LatexCommand cite
  557. key "Zang2009,gh-epic"
  558. literal "false"
  559. \end_inset
  560. .
  561. Peaks are also called separately using MACS, but MACS was determined to
  562. be a poor fit for the data, and these peak calls are not used in any further
  563. analyses
  564. \begin_inset CommandInset citation
  565. LatexCommand cite
  566. key "Zhang2008"
  567. literal "false"
  568. \end_inset
  569. .
  570. \end_layout
  571. \begin_layout Subsection
  572. RNA-seq align+quant method comparison
  573. \end_layout
  574. \begin_layout Standard
  575. \align left
  576. \begin_inset Flex TODO Note (inline)
  577. status open
  578. \begin_layout Plain Layout
  579. Write a legend for Figure
  580. \begin_inset CommandInset ref
  581. LatexCommand ref
  582. reference "fig:RNA-norm-comp"
  583. plural "false"
  584. caps "false"
  585. noprefix "false"
  586. \end_inset
  587. \end_layout
  588. \end_inset
  589. \end_layout
  590. \begin_layout Standard
  591. \begin_inset Float figure
  592. wide false
  593. sideways false
  594. status open
  595. \begin_layout Plain Layout
  596. \align center
  597. \begin_inset Float figure
  598. wide false
  599. sideways false
  600. status collapsed
  601. \begin_layout Plain Layout
  602. \align center
  603. \begin_inset Graphics
  604. filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-star-CROP.png
  605. lyxscale 25
  606. width 35col%
  607. groupId rna-comp-subfig
  608. \end_inset
  609. \end_layout
  610. \begin_layout Plain Layout
  611. \begin_inset Caption Standard
  612. \begin_layout Plain Layout
  613. STAR quantification, Entrez vs Ensembl gene annotation
  614. \end_layout
  615. \end_inset
  616. \end_layout
  617. \end_inset
  618. \begin_inset space \qquad{}
  619. \end_inset
  620. \begin_inset Float figure
  621. wide false
  622. sideways false
  623. status collapsed
  624. \begin_layout Plain Layout
  625. \align center
  626. \begin_inset Graphics
  627. filename graphics/CD4-csaw/rnaseq-compare/ensmebl-vs-entrez-shoal-CROP.png
  628. lyxscale 25
  629. width 35col%
  630. groupId rna-comp-subfig
  631. \end_inset
  632. \end_layout
  633. \begin_layout Plain Layout
  634. \begin_inset Caption Standard
  635. \begin_layout Plain Layout
  636. Salmon+Shoal quantification, Entrez vs Ensembl gene annotation
  637. \end_layout
  638. \end_inset
  639. \end_layout
  640. \end_inset
  641. \end_layout
  642. \begin_layout Plain Layout
  643. \align center
  644. \begin_inset Float figure
  645. wide false
  646. sideways false
  647. status collapsed
  648. \begin_layout Plain Layout
  649. \align center
  650. \begin_inset Graphics
  651. filename graphics/CD4-csaw/rnaseq-compare/star-vs-hisat2-CROP.png
  652. lyxscale 25
  653. width 35col%
  654. groupId rna-comp-subfig
  655. \end_inset
  656. \end_layout
  657. \begin_layout Plain Layout
  658. \begin_inset Caption Standard
  659. \begin_layout Plain Layout
  660. STAR vs HISAT2 quantification, Ensembl gene annotation
  661. \end_layout
  662. \end_inset
  663. \end_layout
  664. \end_inset
  665. \begin_inset space \qquad{}
  666. \end_inset
  667. \begin_inset Float figure
  668. wide false
  669. sideways false
  670. status collapsed
  671. \begin_layout Plain Layout
  672. \align center
  673. \begin_inset Graphics
  674. filename graphics/CD4-csaw/rnaseq-compare/star-vs-salmon-CROP.png
  675. lyxscale 25
  676. width 35col%
  677. groupId rna-comp-subfig
  678. \end_inset
  679. \end_layout
  680. \begin_layout Plain Layout
  681. \begin_inset Caption Standard
  682. \begin_layout Plain Layout
  683. Salomn vs STAR quantification, Ensembl gene annotation
  684. \end_layout
  685. \end_inset
  686. \end_layout
  687. \end_inset
  688. \end_layout
  689. \begin_layout Plain Layout
  690. \align center
  691. \begin_inset Float figure
  692. wide false
  693. sideways false
  694. status collapsed
  695. \begin_layout Plain Layout
  696. \align center
  697. \begin_inset Graphics
  698. filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-kallisto-CROP.png
  699. lyxscale 25
  700. width 35col%
  701. groupId rna-comp-subfig
  702. \end_inset
  703. \end_layout
  704. \begin_layout Plain Layout
  705. \begin_inset Caption Standard
  706. \begin_layout Plain Layout
  707. Salmon vs Kallisto quantification, Ensembl gene annotation
  708. \end_layout
  709. \end_inset
  710. \end_layout
  711. \end_inset
  712. \begin_inset space \qquad{}
  713. \end_inset
  714. \begin_inset Float figure
  715. wide false
  716. sideways false
  717. status collapsed
  718. \begin_layout Plain Layout
  719. \align center
  720. \begin_inset Graphics
  721. filename graphics/CD4-csaw/rnaseq-compare/salmon-vs-shoal-CROP.png
  722. lyxscale 25
  723. width 35col%
  724. groupId rna-comp-subfig
  725. \end_inset
  726. \end_layout
  727. \begin_layout Plain Layout
  728. \begin_inset Caption Standard
  729. \begin_layout Plain Layout
  730. Salmon+Shoal vs Salmon alone, Ensembl gene annotation
  731. \end_layout
  732. \end_inset
  733. \end_layout
  734. \end_inset
  735. \end_layout
  736. \begin_layout Plain Layout
  737. \begin_inset Caption Standard
  738. \begin_layout Plain Layout
  739. \begin_inset CommandInset label
  740. LatexCommand label
  741. name "fig:RNA-norm-comp"
  742. \end_inset
  743. RNA-seq comparisons
  744. \end_layout
  745. \end_inset
  746. \end_layout
  747. \end_inset
  748. \end_layout
  749. \begin_layout Itemize
  750. Ultimately selected shoal as quantification, Ensembl as annotation.
  751. Why? Running downstream analyses with all quant methods and both annotations
  752. showed very little practical difference, so choice was not terribly important.
  753. Prefer shoal due to theoretical advantages.
  754. To note in discussion: reproducible workflow made it easy to do this, enabling
  755. an informed decision.
  756. \end_layout
  757. \begin_layout Subsection
  758. RNA-seq has a large confounding batch effect
  759. \end_layout
  760. \begin_layout Standard
  761. \begin_inset Float figure
  762. wide false
  763. sideways false
  764. status open
  765. \begin_layout Plain Layout
  766. \begin_inset Flex TODO Note (inline)
  767. status open
  768. \begin_layout Plain Layout
  769. Just take the top row
  770. \end_layout
  771. \end_inset
  772. \end_layout
  773. \begin_layout Plain Layout
  774. \align center
  775. \begin_inset Graphics
  776. filename graphics/CD4-csaw/RNA-seq/weights-vs-covars-CROP.png
  777. lyxscale 25
  778. width 100col%
  779. groupId colwidth-raster
  780. \end_inset
  781. \end_layout
  782. \begin_layout Plain Layout
  783. \begin_inset Caption Standard
  784. \begin_layout Plain Layout
  785. \series bold
  786. \begin_inset CommandInset label
  787. LatexCommand label
  788. name "fig:RNA-seq-weights-vs-covars"
  789. \end_inset
  790. RNA-seq sample weights, grouped by experimental and technical covariates.
  791. \end_layout
  792. \end_inset
  793. \end_layout
  794. \end_inset
  795. \end_layout
  796. \begin_layout Itemize
  797. Batch 1 is garbage quality.
  798. Analyses involving batch 1 samples are expected to yield poor statistical
  799. power.
  800. \end_layout
  801. \begin_layout Standard
  802. \begin_inset Float figure
  803. wide false
  804. sideways false
  805. status open
  806. \begin_layout Plain Layout
  807. \align center
  808. \begin_inset Float figure
  809. wide false
  810. sideways false
  811. status open
  812. \begin_layout Plain Layout
  813. \align center
  814. \begin_inset Graphics
  815. filename graphics/CD4-csaw/RNA-seq/PCA-no-batchsub-CROP.png
  816. lyxscale 25
  817. width 75col%
  818. groupId rna-pca-subfig
  819. \end_inset
  820. \end_layout
  821. \begin_layout Plain Layout
  822. \begin_inset Caption Standard
  823. \begin_layout Plain Layout
  824. \series bold
  825. \begin_inset CommandInset label
  826. LatexCommand label
  827. name "fig:RNA-PCA-no-batchsub"
  828. \end_inset
  829. Before batch correction
  830. \end_layout
  831. \end_inset
  832. \end_layout
  833. \end_inset
  834. \end_layout
  835. \begin_layout Plain Layout
  836. \align center
  837. \begin_inset Float figure
  838. wide false
  839. sideways false
  840. status open
  841. \begin_layout Plain Layout
  842. \align center
  843. \begin_inset Graphics
  844. filename graphics/CD4-csaw/RNA-seq/PCA-combat-batchsub-CROP.png
  845. lyxscale 25
  846. width 75col%
  847. groupId rna-pca-subfig
  848. \end_inset
  849. \end_layout
  850. \begin_layout Plain Layout
  851. \begin_inset Caption Standard
  852. \begin_layout Plain Layout
  853. \series bold
  854. \begin_inset CommandInset label
  855. LatexCommand label
  856. name "fig:RNA-PCA-ComBat-batchsub"
  857. \end_inset
  858. After batch correction with ComBat
  859. \end_layout
  860. \end_inset
  861. \end_layout
  862. \end_inset
  863. \end_layout
  864. \begin_layout Plain Layout
  865. \begin_inset Caption Standard
  866. \begin_layout Plain Layout
  867. \series bold
  868. \begin_inset CommandInset label
  869. LatexCommand label
  870. name "fig:RNA-PCA"
  871. \end_inset
  872. PCoA plots of RNA-seq data showing effect of batch correction.
  873. \end_layout
  874. \end_inset
  875. \end_layout
  876. \end_inset
  877. \end_layout
  878. \begin_layout Itemize
  879. RNA-seq batch effect can be partially corrected, but still induces uncorrectable
  880. biases in downstream analysis
  881. \end_layout
  882. \begin_layout Subsection
  883. ChIP-seq blacklisting is important
  884. \end_layout
  885. \begin_layout Standard
  886. \begin_inset Float figure
  887. wide false
  888. sideways false
  889. status open
  890. \begin_layout Plain Layout
  891. \align center
  892. \begin_inset Float figure
  893. wide false
  894. sideways false
  895. status open
  896. \begin_layout Plain Layout
  897. \align center
  898. \begin_inset Graphics
  899. filename graphics/CD4-csaw/csaw/CCF-plots-PAGE2-CROP.pdf
  900. lyxscale 50
  901. height 40theight%
  902. groupId ccf-subfig
  903. \end_inset
  904. \end_layout
  905. \begin_layout Plain Layout
  906. \begin_inset Caption Standard
  907. \begin_layout Plain Layout
  908. \series bold
  909. \begin_inset CommandInset label
  910. LatexCommand label
  911. name "fig:CCF-with-blacklist"
  912. \end_inset
  913. Cross-correlation plots with blacklisted reads removed
  914. \end_layout
  915. \end_inset
  916. \end_layout
  917. \end_inset
  918. \end_layout
  919. \begin_layout Plain Layout
  920. \align center
  921. \begin_inset Float figure
  922. wide false
  923. sideways false
  924. status open
  925. \begin_layout Plain Layout
  926. \align center
  927. \begin_inset Graphics
  928. filename graphics/CD4-csaw/csaw/CCF-plots-noBL-PAGE2-CROP.pdf
  929. lyxscale 50
  930. height 40theight%
  931. groupId ccf-subfig
  932. \end_inset
  933. \end_layout
  934. \begin_layout Plain Layout
  935. \begin_inset Caption Standard
  936. \begin_layout Plain Layout
  937. \series bold
  938. \begin_inset CommandInset label
  939. LatexCommand label
  940. name "fig:CCF-without-blacklist"
  941. \end_inset
  942. Cross-correlation plots without removing blacklisted reads
  943. \end_layout
  944. \end_inset
  945. \end_layout
  946. \end_inset
  947. \end_layout
  948. \begin_layout Plain Layout
  949. \begin_inset Caption Standard
  950. \begin_layout Plain Layout
  951. \series bold
  952. \begin_inset CommandInset label
  953. LatexCommand label
  954. name "fig:CCF-master"
  955. \end_inset
  956. Strand cross-correlation plots for ChIP-seq data.
  957. \end_layout
  958. \end_inset
  959. \end_layout
  960. \end_inset
  961. \end_layout
  962. \begin_layout Subsection
  963. ChIP-seq peak calling
  964. \end_layout
  965. \begin_layout Standard
  966. \begin_inset Float figure
  967. wide false
  968. sideways false
  969. status open
  970. \begin_layout Plain Layout
  971. \align center
  972. \begin_inset Float figure
  973. wide false
  974. sideways false
  975. status collapsed
  976. \begin_layout Plain Layout
  977. \align center
  978. \begin_inset Graphics
  979. filename graphics/CD4-csaw/IDR/D4659vsD5053_epic-PAGE1-CROP.pdf
  980. lyxscale 50
  981. width 45col%
  982. groupId idr-rc-subfig
  983. \end_inset
  984. \end_layout
  985. \begin_layout Plain Layout
  986. \begin_inset Caption Standard
  987. \begin_layout Plain Layout
  988. Peak ranks from SICER peak caller
  989. \end_layout
  990. \end_inset
  991. \end_layout
  992. \begin_layout Plain Layout
  993. \end_layout
  994. \end_inset
  995. \begin_inset space \hfill{}
  996. \end_inset
  997. \begin_inset Float figure
  998. wide false
  999. sideways false
  1000. status collapsed
  1001. \begin_layout Plain Layout
  1002. \align center
  1003. \begin_inset Graphics
  1004. filename graphics/CD4-csaw/IDR/D4659vsD5053_macs-PAGE1-CROP.pdf
  1005. lyxscale 50
  1006. width 45col%
  1007. groupId idr-rc-subfig
  1008. \end_inset
  1009. \end_layout
  1010. \begin_layout Plain Layout
  1011. \begin_inset Caption Standard
  1012. \begin_layout Plain Layout
  1013. Peak ranks from MACS peak caller
  1014. \end_layout
  1015. \end_inset
  1016. \end_layout
  1017. \end_inset
  1018. \end_layout
  1019. \begin_layout Plain Layout
  1020. \begin_inset Caption Standard
  1021. \begin_layout Plain Layout
  1022. \series bold
  1023. \begin_inset CommandInset label
  1024. LatexCommand label
  1025. name "fig:IDR-rank-consist"
  1026. \end_inset
  1027. Irreproducible Discovery Rate rank consistency plots for H3K27me3.
  1028. \series default
  1029. Peaks are ranked by the scores assigned by the peak caller in each donor,
  1030. and then the ranks for two donors are plotted against each other.
  1031. Higher ranks are more significant (top right).
  1032. Peaks meeting various thresholds of reproducibility, measured by the irreproduc
  1033. ible discovery rate (IDR), are shaded accordingly.
  1034. [This could be explained better, or refer to the text.]
  1035. \end_layout
  1036. \end_inset
  1037. \end_layout
  1038. \begin_layout Plain Layout
  1039. \end_layout
  1040. \end_inset
  1041. \end_layout
  1042. \begin_layout Standard
  1043. Figure
  1044. \begin_inset CommandInset ref
  1045. LatexCommand ref
  1046. reference "fig:IDR-rank-consist"
  1047. plural "false"
  1048. caps "false"
  1049. noprefix "false"
  1050. \end_inset
  1051. shows the IDR rank-consistency plots for peaks called in an arbitrarily-chosen
  1052. pair of donors.
  1053. when the peaks for each donor are ranked according to their scores, SICER
  1054. produces much more reproducible results between donors.
  1055. This is consistent with SICER's stated goal of identifying broad peaks,
  1056. in contrast to MACS, which is designed for identifying sharp peaks.
  1057. Based on this observation, the SICER peak calls were used for all downstream
  1058. analyses that involved ChIP-seq peaks.
  1059. \end_layout
  1060. \begin_layout Subsection
  1061. ChIP-seq normalization
  1062. \end_layout
  1063. \begin_layout Standard
  1064. \begin_inset Float figure
  1065. wide false
  1066. sideways false
  1067. status collapsed
  1068. \begin_layout Plain Layout
  1069. \align center
  1070. \begin_inset Graphics
  1071. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-sample-MAplot-bins-CROP.png
  1072. lyxscale 25
  1073. width 100col%
  1074. groupId colwidth-raster
  1075. \end_inset
  1076. \end_layout
  1077. \begin_layout Plain Layout
  1078. \begin_inset Caption Standard
  1079. \begin_layout Plain Layout
  1080. \series bold
  1081. \begin_inset CommandInset label
  1082. LatexCommand label
  1083. name "fig:MA-plot-bigbins"
  1084. \end_inset
  1085. MA plot of H3K4me2 read counts in 10kb bins for two arbitrary samples.
  1086. \end_layout
  1087. \end_inset
  1088. \end_layout
  1089. \end_inset
  1090. \end_layout
  1091. \begin_layout Subsection
  1092. ChIP-seq must be corrected for hidden confounding factors
  1093. \end_layout
  1094. \begin_layout Standard
  1095. \begin_inset Float figure
  1096. wide false
  1097. sideways false
  1098. status open
  1099. \begin_layout Plain Layout
  1100. \begin_inset Float figure
  1101. wide false
  1102. sideways false
  1103. status collapsed
  1104. \begin_layout Plain Layout
  1105. \align center
  1106. \begin_inset Graphics
  1107. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-raw-CROP.png
  1108. lyxscale 25
  1109. width 45col%
  1110. groupId pcoa-subfig
  1111. \end_inset
  1112. \end_layout
  1113. \begin_layout Plain Layout
  1114. \begin_inset Caption Standard
  1115. \begin_layout Plain Layout
  1116. \series bold
  1117. \begin_inset CommandInset label
  1118. LatexCommand label
  1119. name "fig:PCoA-H3K4me2-bad"
  1120. \end_inset
  1121. H3K4me2, no correction
  1122. \end_layout
  1123. \end_inset
  1124. \end_layout
  1125. \end_inset
  1126. \begin_inset space \hfill{}
  1127. \end_inset
  1128. \begin_inset Float figure
  1129. wide false
  1130. sideways false
  1131. status collapsed
  1132. \begin_layout Plain Layout
  1133. \align center
  1134. \begin_inset Graphics
  1135. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-PCA-SVsub-CROP.png
  1136. lyxscale 25
  1137. width 45col%
  1138. groupId pcoa-subfig
  1139. \end_inset
  1140. \end_layout
  1141. \begin_layout Plain Layout
  1142. \begin_inset Caption Standard
  1143. \begin_layout Plain Layout
  1144. \series bold
  1145. \begin_inset CommandInset label
  1146. LatexCommand label
  1147. name "fig:PCoA-H3K4me2-good"
  1148. \end_inset
  1149. H3K4me2, SVs subtracted
  1150. \end_layout
  1151. \end_inset
  1152. \end_layout
  1153. \end_inset
  1154. \end_layout
  1155. \begin_layout Plain Layout
  1156. \begin_inset Float figure
  1157. wide false
  1158. sideways false
  1159. status collapsed
  1160. \begin_layout Plain Layout
  1161. \align center
  1162. \begin_inset Graphics
  1163. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-raw-CROP.png
  1164. lyxscale 25
  1165. width 45col%
  1166. groupId pcoa-subfig
  1167. \end_inset
  1168. \end_layout
  1169. \begin_layout Plain Layout
  1170. \begin_inset Caption Standard
  1171. \begin_layout Plain Layout
  1172. \series bold
  1173. \begin_inset CommandInset label
  1174. LatexCommand label
  1175. name "fig:PCoA-H3K4me3-bad"
  1176. \end_inset
  1177. H3K4me3, no correction
  1178. \end_layout
  1179. \end_inset
  1180. \end_layout
  1181. \end_inset
  1182. \begin_inset space \hfill{}
  1183. \end_inset
  1184. \begin_inset Float figure
  1185. wide false
  1186. sideways false
  1187. status collapsed
  1188. \begin_layout Plain Layout
  1189. \align center
  1190. \begin_inset Graphics
  1191. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-PCA-SVsub-CROP.png
  1192. lyxscale 25
  1193. width 45col%
  1194. groupId pcoa-subfig
  1195. \end_inset
  1196. \end_layout
  1197. \begin_layout Plain Layout
  1198. \begin_inset Caption Standard
  1199. \begin_layout Plain Layout
  1200. \series bold
  1201. \begin_inset CommandInset label
  1202. LatexCommand label
  1203. name "fig:PCoA-H3K4me3-good"
  1204. \end_inset
  1205. H3K4me3, SVs subtracted
  1206. \end_layout
  1207. \end_inset
  1208. \end_layout
  1209. \end_inset
  1210. \end_layout
  1211. \begin_layout Plain Layout
  1212. \begin_inset Float figure
  1213. wide false
  1214. sideways false
  1215. status collapsed
  1216. \begin_layout Plain Layout
  1217. \align center
  1218. \begin_inset Graphics
  1219. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-raw-CROP.png
  1220. lyxscale 25
  1221. width 45col%
  1222. groupId pcoa-subfig
  1223. \end_inset
  1224. \end_layout
  1225. \begin_layout Plain Layout
  1226. \begin_inset Caption Standard
  1227. \begin_layout Plain Layout
  1228. \series bold
  1229. \begin_inset CommandInset label
  1230. LatexCommand label
  1231. name "fig:PCoA-H3K27me3-bad"
  1232. \end_inset
  1233. H3K27me3, no correction
  1234. \end_layout
  1235. \end_inset
  1236. \end_layout
  1237. \end_inset
  1238. \begin_inset space \hfill{}
  1239. \end_inset
  1240. \begin_inset Float figure
  1241. wide false
  1242. sideways false
  1243. status collapsed
  1244. \begin_layout Plain Layout
  1245. \align center
  1246. \begin_inset Graphics
  1247. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-PCA-SVsub-CROP.png
  1248. lyxscale 25
  1249. width 45col%
  1250. groupId pcoa-subfig
  1251. \end_inset
  1252. \end_layout
  1253. \begin_layout Plain Layout
  1254. \begin_inset Caption Standard
  1255. \begin_layout Plain Layout
  1256. \series bold
  1257. \begin_inset CommandInset label
  1258. LatexCommand label
  1259. name "fig:PCoA-H3K27me3-good"
  1260. \end_inset
  1261. H3K27me3, SVs subtracted
  1262. \end_layout
  1263. \end_inset
  1264. \end_layout
  1265. \end_inset
  1266. \end_layout
  1267. \begin_layout Plain Layout
  1268. \begin_inset Caption Standard
  1269. \begin_layout Plain Layout
  1270. \series bold
  1271. \begin_inset CommandInset label
  1272. LatexCommand label
  1273. name "fig:PCoA-ChIP"
  1274. \end_inset
  1275. PCoA plots of ChIP-seq sliding window data, before and after subtracting
  1276. surrogate variables (SVs).
  1277. \end_layout
  1278. \end_inset
  1279. \end_layout
  1280. \begin_layout Plain Layout
  1281. \end_layout
  1282. \end_inset
  1283. \end_layout
  1284. \begin_layout Itemize
  1285. Figures showing BCV plots with and without SVA for each histone mark?
  1286. \end_layout
  1287. \begin_layout Subsection
  1288. MOFA recovers biologically relevant variation from blind analysis by correlating
  1289. across datasets
  1290. \end_layout
  1291. \begin_layout Standard
  1292. \begin_inset ERT
  1293. status open
  1294. \begin_layout Plain Layout
  1295. \backslash
  1296. afterpage{
  1297. \end_layout
  1298. \begin_layout Plain Layout
  1299. \backslash
  1300. begin{landscape}
  1301. \end_layout
  1302. \end_inset
  1303. \end_layout
  1304. \begin_layout Standard
  1305. \begin_inset Float figure
  1306. wide false
  1307. sideways false
  1308. status open
  1309. \begin_layout Plain Layout
  1310. \begin_inset Float figure
  1311. wide false
  1312. sideways false
  1313. status open
  1314. \begin_layout Plain Layout
  1315. \align center
  1316. \begin_inset Graphics
  1317. filename graphics/CD4-csaw/MOFA-varExplaiend-matrix-CROP.png
  1318. lyxscale 25
  1319. width 45col%
  1320. groupId mofa-subfig
  1321. \end_inset
  1322. \end_layout
  1323. \begin_layout Plain Layout
  1324. \begin_inset Caption Standard
  1325. \begin_layout Plain Layout
  1326. \series bold
  1327. \begin_inset CommandInset label
  1328. LatexCommand label
  1329. name "fig:mofa-varexplained"
  1330. \end_inset
  1331. Variance explained in each data set by each latent factor estimated by MOFA.
  1332. \series default
  1333. For each latent factor (LF) learned by MOFA, the variance explained by
  1334. that factor in each data set (
  1335. \begin_inset Quotes eld
  1336. \end_inset
  1337. view
  1338. \begin_inset Quotes erd
  1339. \end_inset
  1340. ) is shown by the shading of the cells in the lower section.
  1341. The upper section shows the total fraction of each data set's variance
  1342. that is explained by all LFs combined.
  1343. \end_layout
  1344. \end_inset
  1345. \end_layout
  1346. \end_inset
  1347. \begin_inset space \hfill{}
  1348. \end_inset
  1349. \begin_inset Float figure
  1350. wide false
  1351. sideways false
  1352. status open
  1353. \begin_layout Plain Layout
  1354. \align center
  1355. \begin_inset Graphics
  1356. filename graphics/CD4-csaw/MOFA-LF-scatter-CROP.png
  1357. lyxscale 25
  1358. width 45col%
  1359. groupId mofa-subfig
  1360. \end_inset
  1361. \end_layout
  1362. \begin_layout Plain Layout
  1363. \begin_inset Caption Standard
  1364. \begin_layout Plain Layout
  1365. \series bold
  1366. \begin_inset CommandInset label
  1367. LatexCommand label
  1368. name "fig:mofa-lf-scatter"
  1369. \end_inset
  1370. Scatter plots of specific pairs of MOFA latent factors.
  1371. \series default
  1372. LFs 1, 4, and 5 explain substantial variation in all data sets, so they
  1373. are plotted against each other in order to reveal patterns of variation
  1374. that are shared across all data sets.
  1375. \end_layout
  1376. \end_inset
  1377. \end_layout
  1378. \end_inset
  1379. \end_layout
  1380. \begin_layout Plain Layout
  1381. \begin_inset Caption Standard
  1382. \begin_layout Plain Layout
  1383. \series bold
  1384. \begin_inset CommandInset label
  1385. LatexCommand label
  1386. name "fig:MOFA-master"
  1387. \end_inset
  1388. MOFA latent factors separate technical confounders from
  1389. \end_layout
  1390. \end_inset
  1391. \end_layout
  1392. \end_inset
  1393. \end_layout
  1394. \begin_layout Standard
  1395. \begin_inset ERT
  1396. status open
  1397. \begin_layout Plain Layout
  1398. \backslash
  1399. end{landscape}
  1400. \end_layout
  1401. \begin_layout Plain Layout
  1402. }
  1403. \end_layout
  1404. \end_inset
  1405. \end_layout
  1406. \begin_layout Itemize
  1407. Figure
  1408. \begin_inset CommandInset ref
  1409. LatexCommand ref
  1410. reference "fig:mofa-varexplained"
  1411. plural "false"
  1412. caps "false"
  1413. noprefix "false"
  1414. \end_inset
  1415. shows that LF1, 4, and 5 explain substantial var in all data sets
  1416. \end_layout
  1417. \begin_layout Itemize
  1418. Figure
  1419. \begin_inset CommandInset ref
  1420. LatexCommand ref
  1421. reference "fig:mofa-lf-scatter"
  1422. plural "false"
  1423. caps "false"
  1424. noprefix "false"
  1425. \end_inset
  1426. shows that those same 3 LFs, (1, 4, & 5) also correlate best with the experimen
  1427. tal factors (cell type & time point)
  1428. \end_layout
  1429. \begin_layout Itemize
  1430. LF2 is clearly the RNA-seq batch effect
  1431. \end_layout
  1432. \begin_layout Standard
  1433. \begin_inset Float figure
  1434. wide false
  1435. sideways false
  1436. status collapsed
  1437. \begin_layout Plain Layout
  1438. \align center
  1439. \begin_inset Graphics
  1440. filename graphics/CD4-csaw/MOFA-batch-correct-CROP.png
  1441. lyxscale 25
  1442. width 100col%
  1443. groupId colwidth-raster
  1444. \end_inset
  1445. \end_layout
  1446. \begin_layout Plain Layout
  1447. \begin_inset Caption Standard
  1448. \begin_layout Plain Layout
  1449. \series bold
  1450. \begin_inset CommandInset label
  1451. LatexCommand label
  1452. name "fig:mofa-batchsub"
  1453. \end_inset
  1454. Result of RNA-seq batch-correction using MOFA latent factors
  1455. \end_layout
  1456. \end_inset
  1457. \end_layout
  1458. \end_inset
  1459. \end_layout
  1460. \begin_layout Itemize
  1461. Attempting to remove the effect of LF2 (Figure
  1462. \begin_inset CommandInset ref
  1463. LatexCommand ref
  1464. reference "fig:mofa-batchsub"
  1465. plural "false"
  1466. caps "false"
  1467. noprefix "false"
  1468. \end_inset
  1469. ) results in batch correction comparable to ComBat (Figure
  1470. \begin_inset CommandInset ref
  1471. LatexCommand ref
  1472. reference "fig:RNA-PCA-ComBat-batchsub"
  1473. plural "false"
  1474. caps "false"
  1475. noprefix "false"
  1476. \end_inset
  1477. )
  1478. \end_layout
  1479. \begin_layout Itemize
  1480. MOFA was able to do this batch subtraction without directly using the sample
  1481. labels (sample labels were used implicitly to select which factor to subtract)
  1482. \end_layout
  1483. \begin_layout Itemize
  1484. Similarity of results shows that batch correction can't get much better
  1485. than ComBat (despite ComBat ignoring time point)
  1486. \end_layout
  1487. \begin_layout Subsection
  1488. MOFA does some interesting stuff but is mostly confirmatory in this context
  1489. \end_layout
  1490. \begin_layout Standard
  1491. \begin_inset Flex TODO Note (inline)
  1492. status open
  1493. \begin_layout Plain Layout
  1494. MOFA should be a footnote to something else, not its own point
  1495. \end_layout
  1496. \end_inset
  1497. \end_layout
  1498. \begin_layout Standard
  1499. \begin_inset Flex TODO Note (inline)
  1500. status open
  1501. \begin_layout Plain Layout
  1502. Combine with previous subsection
  1503. \end_layout
  1504. \end_inset
  1505. \end_layout
  1506. \begin_layout Itemize
  1507. MOFA shows great promise for accelerating discovery of major biological
  1508. effects in multi-omics datasets
  1509. \end_layout
  1510. \begin_deeper
  1511. \begin_layout Itemize
  1512. MOFA successfully separates biologically relevant patterns of variation
  1513. from technical confounding factors without knowing the sample labels, by
  1514. finding latent factors that explain variation across multiple data sets.
  1515. \end_layout
  1516. \begin_layout Itemize
  1517. MOFA was added to this analysis late and played primarily a confirmatory
  1518. role, but it was able to confirm earlier conclusions with much less prior
  1519. information (no sample labels) and much less analyst effort/input
  1520. \end_layout
  1521. \begin_layout Itemize
  1522. Less input from analyst means less opportunity to introduce unwanted bias
  1523. into results
  1524. \end_layout
  1525. \begin_layout Itemize
  1526. MOFA confirmed that the already-implemented batch correction in the RNA-seq
  1527. data was already performing as well as possible given the limitations of
  1528. the data
  1529. \end_layout
  1530. \end_deeper
  1531. \begin_layout Section
  1532. Results
  1533. \end_layout
  1534. \begin_layout Standard
  1535. \begin_inset Flex TODO Note (inline)
  1536. status open
  1537. \begin_layout Plain Layout
  1538. Focus on what hypotheses were tested, then select figures that show how
  1539. those hypotheses were tested, even if the result is a negative.
  1540. Not every interesting result needs to be in here.
  1541. Chapter should tell a story.
  1542. \end_layout
  1543. \end_inset
  1544. \end_layout
  1545. \begin_layout Standard
  1546. \begin_inset Flex TODO Note (inline)
  1547. status open
  1548. \begin_layout Plain Layout
  1549. Maybe reorder these sections to do RNA-seq, then ChIP-seq, then combined
  1550. analyses?
  1551. \end_layout
  1552. \end_inset
  1553. \end_layout
  1554. \begin_layout Subsection
  1555. Interpretation of RNA-seq analysis is limited by a major confounding factor
  1556. \end_layout
  1557. \begin_layout Standard
  1558. \begin_inset Float table
  1559. wide false
  1560. sideways false
  1561. status collapsed
  1562. \begin_layout Plain Layout
  1563. \align center
  1564. \begin_inset Tabular
  1565. <lyxtabular version="3" rows="11" columns="3">
  1566. <features tabularvalignment="middle">
  1567. <column alignment="center" valignment="top">
  1568. <column alignment="center" valignment="top">
  1569. <column alignment="center" valignment="top">
  1570. <row>
  1571. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1572. \begin_inset Text
  1573. \begin_layout Plain Layout
  1574. Test
  1575. \end_layout
  1576. \end_inset
  1577. </cell>
  1578. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1579. \begin_inset Text
  1580. \begin_layout Plain Layout
  1581. Est.
  1582. non-null
  1583. \end_layout
  1584. \end_inset
  1585. </cell>
  1586. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1587. \begin_inset Text
  1588. \begin_layout Plain Layout
  1589. \begin_inset Formula $\mathrm{FDR}\le10\%$
  1590. \end_inset
  1591. \end_layout
  1592. \end_inset
  1593. </cell>
  1594. </row>
  1595. <row>
  1596. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1597. \begin_inset Text
  1598. \begin_layout Plain Layout
  1599. Naive Day 0 vs Day 1
  1600. \end_layout
  1601. \end_inset
  1602. </cell>
  1603. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1604. \begin_inset Text
  1605. \begin_layout Plain Layout
  1606. 5992
  1607. \end_layout
  1608. \end_inset
  1609. </cell>
  1610. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1611. \begin_inset Text
  1612. \begin_layout Plain Layout
  1613. 1613
  1614. \end_layout
  1615. \end_inset
  1616. </cell>
  1617. </row>
  1618. <row>
  1619. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1620. \begin_inset Text
  1621. \begin_layout Plain Layout
  1622. Naive Day 0 vs Day 5
  1623. \end_layout
  1624. \end_inset
  1625. </cell>
  1626. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1627. \begin_inset Text
  1628. \begin_layout Plain Layout
  1629. 3038
  1630. \end_layout
  1631. \end_inset
  1632. </cell>
  1633. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1634. \begin_inset Text
  1635. \begin_layout Plain Layout
  1636. 32
  1637. \end_layout
  1638. \end_inset
  1639. </cell>
  1640. </row>
  1641. <row>
  1642. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1643. \begin_inset Text
  1644. \begin_layout Plain Layout
  1645. Naive Day 0 vs Day 14
  1646. \end_layout
  1647. \end_inset
  1648. </cell>
  1649. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1650. \begin_inset Text
  1651. \begin_layout Plain Layout
  1652. 1870
  1653. \end_layout
  1654. \end_inset
  1655. </cell>
  1656. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1657. \begin_inset Text
  1658. \begin_layout Plain Layout
  1659. 190
  1660. \end_layout
  1661. \end_inset
  1662. </cell>
  1663. </row>
  1664. <row>
  1665. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1666. \begin_inset Text
  1667. \begin_layout Plain Layout
  1668. Memory Day 0 vs Day 1
  1669. \end_layout
  1670. \end_inset
  1671. </cell>
  1672. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1673. \begin_inset Text
  1674. \begin_layout Plain Layout
  1675. 3195
  1676. \end_layout
  1677. \end_inset
  1678. </cell>
  1679. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1680. \begin_inset Text
  1681. \begin_layout Plain Layout
  1682. 411
  1683. \end_layout
  1684. \end_inset
  1685. </cell>
  1686. </row>
  1687. <row>
  1688. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1689. \begin_inset Text
  1690. \begin_layout Plain Layout
  1691. Memory Day 0 vs Day 5
  1692. \end_layout
  1693. \end_inset
  1694. </cell>
  1695. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1696. \begin_inset Text
  1697. \begin_layout Plain Layout
  1698. 2688
  1699. \end_layout
  1700. \end_inset
  1701. </cell>
  1702. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1703. \begin_inset Text
  1704. \begin_layout Plain Layout
  1705. 18
  1706. \end_layout
  1707. \end_inset
  1708. </cell>
  1709. </row>
  1710. <row>
  1711. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1712. \begin_inset Text
  1713. \begin_layout Plain Layout
  1714. Memory Day 0 vs Day 14
  1715. \end_layout
  1716. \end_inset
  1717. </cell>
  1718. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1719. \begin_inset Text
  1720. \begin_layout Plain Layout
  1721. 1911
  1722. \end_layout
  1723. \end_inset
  1724. </cell>
  1725. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1726. \begin_inset Text
  1727. \begin_layout Plain Layout
  1728. 227
  1729. \end_layout
  1730. \end_inset
  1731. </cell>
  1732. </row>
  1733. <row>
  1734. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1735. \begin_inset Text
  1736. \begin_layout Plain Layout
  1737. Day 0 Naive vs Memory
  1738. \end_layout
  1739. \end_inset
  1740. </cell>
  1741. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1742. \begin_inset Text
  1743. \begin_layout Plain Layout
  1744. 0
  1745. \end_layout
  1746. \end_inset
  1747. </cell>
  1748. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1749. \begin_inset Text
  1750. \begin_layout Plain Layout
  1751. 2
  1752. \end_layout
  1753. \end_inset
  1754. </cell>
  1755. </row>
  1756. <row>
  1757. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1758. \begin_inset Text
  1759. \begin_layout Plain Layout
  1760. Day 1 Naive vs Memory
  1761. \end_layout
  1762. \end_inset
  1763. </cell>
  1764. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1765. \begin_inset Text
  1766. \begin_layout Plain Layout
  1767. 9167
  1768. \end_layout
  1769. \end_inset
  1770. </cell>
  1771. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1772. \begin_inset Text
  1773. \begin_layout Plain Layout
  1774. 5532
  1775. \end_layout
  1776. \end_inset
  1777. </cell>
  1778. </row>
  1779. <row>
  1780. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1781. \begin_inset Text
  1782. \begin_layout Plain Layout
  1783. Day 5 Naive vs Memory
  1784. \end_layout
  1785. \end_inset
  1786. </cell>
  1787. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  1788. \begin_inset Text
  1789. \begin_layout Plain Layout
  1790. 0
  1791. \end_layout
  1792. \end_inset
  1793. </cell>
  1794. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  1795. \begin_inset Text
  1796. \begin_layout Plain Layout
  1797. 0
  1798. \end_layout
  1799. \end_inset
  1800. </cell>
  1801. </row>
  1802. <row>
  1803. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1804. \begin_inset Text
  1805. \begin_layout Plain Layout
  1806. Day 14 Naive vs Memory
  1807. \end_layout
  1808. \end_inset
  1809. </cell>
  1810. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  1811. \begin_inset Text
  1812. \begin_layout Plain Layout
  1813. 6446
  1814. \end_layout
  1815. \end_inset
  1816. </cell>
  1817. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  1818. \begin_inset Text
  1819. \begin_layout Plain Layout
  1820. 2319
  1821. \end_layout
  1822. \end_inset
  1823. </cell>
  1824. </row>
  1825. </lyxtabular>
  1826. \end_inset
  1827. \end_layout
  1828. \begin_layout Plain Layout
  1829. \begin_inset Caption Standard
  1830. \begin_layout Plain Layout
  1831. \series bold
  1832. \begin_inset CommandInset label
  1833. LatexCommand label
  1834. name "tab:Estimated-and-detected-rnaseq"
  1835. \end_inset
  1836. Estimated and detected differentially expressed genes.
  1837. \series default
  1838. \begin_inset Quotes eld
  1839. \end_inset
  1840. Test
  1841. \begin_inset Quotes erd
  1842. \end_inset
  1843. : Which sample groups were compared;
  1844. \begin_inset Quotes eld
  1845. \end_inset
  1846. Est non-null
  1847. \begin_inset Quotes erd
  1848. \end_inset
  1849. : Estimated number of differentially expressed genes, using the method of
  1850. averaging local FDR values
  1851. \begin_inset CommandInset citation
  1852. LatexCommand cite
  1853. key "Phipson2013Thesis"
  1854. literal "false"
  1855. \end_inset
  1856. ;
  1857. \begin_inset Quotes eld
  1858. \end_inset
  1859. \begin_inset Formula $\mathrm{FDR}\le10\%$
  1860. \end_inset
  1861. \begin_inset Quotes erd
  1862. \end_inset
  1863. : Number of significantly differentially expressed genes at an FDR threshold
  1864. of 10%.
  1865. The total number of genes tested was 16707.
  1866. \end_layout
  1867. \end_inset
  1868. \end_layout
  1869. \end_inset
  1870. \end_layout
  1871. \begin_layout Standard
  1872. \begin_inset Float figure
  1873. wide false
  1874. sideways false
  1875. status collapsed
  1876. \begin_layout Plain Layout
  1877. \align center
  1878. \begin_inset Graphics
  1879. filename graphics/CD4-csaw/RNA-seq/PCA-final-12-CROP.png
  1880. lyxscale 25
  1881. width 100col%
  1882. groupId colwidth-raster
  1883. \end_inset
  1884. \end_layout
  1885. \begin_layout Plain Layout
  1886. \begin_inset Caption Standard
  1887. \begin_layout Plain Layout
  1888. \series bold
  1889. \begin_inset CommandInset label
  1890. LatexCommand label
  1891. name "fig:rna-pca-final"
  1892. \end_inset
  1893. PCoA plot of RNA-seq samples after ComBat batch correction.
  1894. \series default
  1895. Each point represents an individual sample.
  1896. Samples with the same combination of cell type and time point are encircled
  1897. with a shaded region to aid in visual identification of the sample groups.
  1898. Samples with of same cell type from the same donor are connected by lines
  1899. to indicate the
  1900. \begin_inset Quotes eld
  1901. \end_inset
  1902. trajectory
  1903. \begin_inset Quotes erd
  1904. \end_inset
  1905. of each donor's cells over time in PCoA space.
  1906. \end_layout
  1907. \end_inset
  1908. \end_layout
  1909. \begin_layout Plain Layout
  1910. \end_layout
  1911. \end_inset
  1912. \end_layout
  1913. \begin_layout Standard
  1914. Genes called present in the RNA-seq data were tested for differential expression
  1915. between all time points and cell types.
  1916. The counts of differentially expressed genes are shown in Table
  1917. \begin_inset CommandInset ref
  1918. LatexCommand ref
  1919. reference "tab:Estimated-and-detected-rnaseq"
  1920. plural "false"
  1921. caps "false"
  1922. noprefix "false"
  1923. \end_inset
  1924. .
  1925. Notably, all the results for Day 0 and Day 5 have substantially fewer genes
  1926. called differentially expressed than any of the results for other time
  1927. points.
  1928. This is an unfortunate result of the difference in sample quality between
  1929. the two batches of RNA-seq data.
  1930. All the samples in Batch 1, which includes all the samples from Days 0
  1931. and 5, have substantially more variability than the samples in Batch 2,
  1932. which includes the other time points.
  1933. This is reflected in the substantially higher weights assigned to Batch
  1934. 2 (Figure
  1935. \begin_inset CommandInset ref
  1936. LatexCommand ref
  1937. reference "fig:RNA-seq-weights-vs-covars"
  1938. plural "false"
  1939. caps "false"
  1940. noprefix "false"
  1941. \end_inset
  1942. ).
  1943. The batch effect has both a systematic component and a random noise component.
  1944. While the systematic component was subtracted out using ComBat (Figure
  1945. \begin_inset CommandInset ref
  1946. LatexCommand ref
  1947. reference "fig:RNA-PCA"
  1948. plural "false"
  1949. caps "false"
  1950. noprefix "false"
  1951. \end_inset
  1952. ), no such correction is possible for the noise component: Batch 1 simply
  1953. has substantially more random noise in it, which reduces the statistical
  1954. power for any differential expression tests involving samples in that batch.
  1955. \end_layout
  1956. \begin_layout Standard
  1957. Despite the difficulty in detecting specific differentially expressed genes,
  1958. there is still evidence that differential expression is present for these
  1959. time points.
  1960. In Figure
  1961. \begin_inset CommandInset ref
  1962. LatexCommand ref
  1963. reference "fig:rna-pca-final"
  1964. plural "false"
  1965. caps "false"
  1966. noprefix "false"
  1967. \end_inset
  1968. , there is a clear separation between naive and memory samples at Day 0,
  1969. despite the fact that only 2 genes were significantly differentially expressed
  1970. for this comparison.
  1971. Similarly, the small numbers of genes detected for the Day 0 vs Day 5 compariso
  1972. ns do not reflect the large separation between these time points in Figure
  1973. \begin_inset CommandInset ref
  1974. LatexCommand ref
  1975. reference "fig:rna-pca-final"
  1976. plural "false"
  1977. caps "false"
  1978. noprefix "false"
  1979. \end_inset
  1980. .
  1981. In addition, the MOFA latent factor plots in Figure
  1982. \begin_inset CommandInset ref
  1983. LatexCommand ref
  1984. reference "fig:mofa-lf-scatter"
  1985. plural "false"
  1986. caps "false"
  1987. noprefix "false"
  1988. \end_inset
  1989. .
  1990. This suggests that there is indeed a differential expression signal present
  1991. in the data for these comparisons, but the large variability in the Batch
  1992. 1 samples obfuscates this signal at the individual gene level.
  1993. As a result, it is impossible to make any meaningful statements about the
  1994. \begin_inset Quotes eld
  1995. \end_inset
  1996. size
  1997. \begin_inset Quotes erd
  1998. \end_inset
  1999. of the gene signature for any time point, since the number of significant
  2000. genes as well as the estimated number of differentially expressed genes
  2001. depends so strongly on the variations in sample quality in addition to
  2002. the size of the differential expression signal in the data.
  2003. Gene-set enrichment analyses are similarly impractical for the same reason.
  2004. However, analyses looking at genome-wide patterns of expression are still
  2005. practical.
  2006. \end_layout
  2007. \begin_layout Subsection
  2008. H3K4 and H3K27 methylation occur in broad regions and are enriched near
  2009. promoters
  2010. \end_layout
  2011. \begin_layout Standard
  2012. \begin_inset Float table
  2013. wide false
  2014. sideways false
  2015. status open
  2016. \begin_layout Plain Layout
  2017. \align center
  2018. \begin_inset Flex TODO Note (inline)
  2019. status open
  2020. \begin_layout Plain Layout
  2021. Also get
  2022. \emph on
  2023. median
  2024. \emph default
  2025. peak width and maybe other quantiles (25%, 75%)
  2026. \end_layout
  2027. \end_inset
  2028. \end_layout
  2029. \begin_layout Plain Layout
  2030. \align center
  2031. \begin_inset Tabular
  2032. <lyxtabular version="3" rows="4" columns="5">
  2033. <features tabularvalignment="middle">
  2034. <column alignment="center" valignment="top">
  2035. <column alignment="center" valignment="top">
  2036. <column alignment="center" valignment="top">
  2037. <column alignment="center" valignment="top">
  2038. <column alignment="center" valignment="top">
  2039. <row>
  2040. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2041. \begin_inset Text
  2042. \begin_layout Plain Layout
  2043. Histone Mark
  2044. \end_layout
  2045. \end_inset
  2046. </cell>
  2047. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2048. \begin_inset Text
  2049. \begin_layout Plain Layout
  2050. # Peaks
  2051. \end_layout
  2052. \end_inset
  2053. </cell>
  2054. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2055. \begin_inset Text
  2056. \begin_layout Plain Layout
  2057. Mean peak width
  2058. \end_layout
  2059. \end_inset
  2060. </cell>
  2061. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2062. \begin_inset Text
  2063. \begin_layout Plain Layout
  2064. genome coverage
  2065. \end_layout
  2066. \end_inset
  2067. </cell>
  2068. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2069. \begin_inset Text
  2070. \begin_layout Plain Layout
  2071. FRiP
  2072. \end_layout
  2073. \end_inset
  2074. </cell>
  2075. </row>
  2076. <row>
  2077. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2078. \begin_inset Text
  2079. \begin_layout Plain Layout
  2080. H3K4me2
  2081. \end_layout
  2082. \end_inset
  2083. </cell>
  2084. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2085. \begin_inset Text
  2086. \begin_layout Plain Layout
  2087. 14965
  2088. \end_layout
  2089. \end_inset
  2090. </cell>
  2091. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2092. \begin_inset Text
  2093. \begin_layout Plain Layout
  2094. 3970
  2095. \end_layout
  2096. \end_inset
  2097. </cell>
  2098. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2099. \begin_inset Text
  2100. \begin_layout Plain Layout
  2101. 1.92%
  2102. \end_layout
  2103. \end_inset
  2104. </cell>
  2105. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2106. \begin_inset Text
  2107. \begin_layout Plain Layout
  2108. 14.2%
  2109. \end_layout
  2110. \end_inset
  2111. </cell>
  2112. </row>
  2113. <row>
  2114. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2115. \begin_inset Text
  2116. \begin_layout Plain Layout
  2117. H3K4me3
  2118. \end_layout
  2119. \end_inset
  2120. </cell>
  2121. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2122. \begin_inset Text
  2123. \begin_layout Plain Layout
  2124. 6163
  2125. \end_layout
  2126. \end_inset
  2127. </cell>
  2128. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2129. \begin_inset Text
  2130. \begin_layout Plain Layout
  2131. 2946
  2132. \end_layout
  2133. \end_inset
  2134. </cell>
  2135. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2136. \begin_inset Text
  2137. \begin_layout Plain Layout
  2138. 0.588%
  2139. \end_layout
  2140. \end_inset
  2141. </cell>
  2142. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2143. \begin_inset Text
  2144. \begin_layout Plain Layout
  2145. 6.57%
  2146. \end_layout
  2147. \end_inset
  2148. </cell>
  2149. </row>
  2150. <row>
  2151. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2152. \begin_inset Text
  2153. \begin_layout Plain Layout
  2154. H3K27me3
  2155. \end_layout
  2156. \end_inset
  2157. </cell>
  2158. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2159. \begin_inset Text
  2160. \begin_layout Plain Layout
  2161. 18139
  2162. \end_layout
  2163. \end_inset
  2164. </cell>
  2165. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2166. \begin_inset Text
  2167. \begin_layout Plain Layout
  2168. 18967
  2169. \end_layout
  2170. \end_inset
  2171. </cell>
  2172. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2173. \begin_inset Text
  2174. \begin_layout Plain Layout
  2175. 11.1%
  2176. \end_layout
  2177. \end_inset
  2178. </cell>
  2179. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2180. \begin_inset Text
  2181. \begin_layout Plain Layout
  2182. 22.5%
  2183. \end_layout
  2184. \end_inset
  2185. </cell>
  2186. </row>
  2187. </lyxtabular>
  2188. \end_inset
  2189. \end_layout
  2190. \begin_layout Plain Layout
  2191. \begin_inset Caption Standard
  2192. \begin_layout Plain Layout
  2193. \series bold
  2194. \begin_inset CommandInset label
  2195. LatexCommand label
  2196. name "tab:peak-calling-summary"
  2197. \end_inset
  2198. Peak-calling summary.
  2199. \series default
  2200. For each histone mark, the number of peaks called using SICER at an IDR
  2201. threshold of ???, the mean width of those peaks, the fraction of the genome
  2202. covered by peaks, and the fraction of reads in peaks (FRiP).
  2203. \end_layout
  2204. \end_inset
  2205. \end_layout
  2206. \end_inset
  2207. \end_layout
  2208. \begin_layout Standard
  2209. Table
  2210. \begin_inset CommandInset ref
  2211. LatexCommand ref
  2212. reference "tab:peak-calling-summary"
  2213. plural "false"
  2214. caps "false"
  2215. noprefix "false"
  2216. \end_inset
  2217. gives a summary of the peak calling statistics for each histone mark.
  2218. Consistent with previous observations [CITATION NEEDED], all 3 histone
  2219. marks occur in broad regions spanning many consecutive nucleosomes, rather
  2220. than in sharp peaks as would be expected for a transcription factor or
  2221. other molecule that binds to specific sites.
  2222. This conclusion is further supported by Figure
  2223. \begin_inset CommandInset ref
  2224. LatexCommand ref
  2225. reference "fig:CCF-with-blacklist"
  2226. plural "false"
  2227. caps "false"
  2228. noprefix "false"
  2229. \end_inset
  2230. , in which a clear nucleosome-sized periodicity is visible in the cross-correlat
  2231. ion value for each sample, indicating that each time a given mark is present
  2232. on one histone, it is also likely to be found on adjacent histones as well.
  2233. H3K27me3 enrichment in particular is substantially more broad than either
  2234. H3K4 mark, with a mean peak width of almost 19,000 bp.
  2235. This is also reflected in the periodicity observed in Figure
  2236. \begin_inset CommandInset ref
  2237. LatexCommand ref
  2238. reference "fig:CCF-with-blacklist"
  2239. plural "false"
  2240. caps "false"
  2241. noprefix "false"
  2242. \end_inset
  2243. , which remains strong much farther out for H3K27me3 than the other marks,
  2244. showing H3K27me3 especially tends to be found on long runs of consecutive
  2245. histones.
  2246. \end_layout
  2247. \begin_layout Standard
  2248. \begin_inset Float figure
  2249. wide false
  2250. sideways false
  2251. status open
  2252. \begin_layout Plain Layout
  2253. \begin_inset Flex TODO Note (inline)
  2254. status open
  2255. \begin_layout Plain Layout
  2256. Ensure this figure uses the peak calls from the new analysis.
  2257. \end_layout
  2258. \end_inset
  2259. \end_layout
  2260. \begin_layout Plain Layout
  2261. \begin_inset Flex TODO Note (inline)
  2262. status open
  2263. \begin_layout Plain Layout
  2264. Need a control: shuffle all peaks and repeat, N times.
  2265. Do real vs shuffled control both in a top/bottom arrangement.
  2266. \end_layout
  2267. \end_inset
  2268. \end_layout
  2269. \begin_layout Plain Layout
  2270. \begin_inset Flex TODO Note (inline)
  2271. status open
  2272. \begin_layout Plain Layout
  2273. Consider counting TSS inside peaks as negative number indicating how far
  2274. \emph on
  2275. inside
  2276. \emph default
  2277. the peak the TSS is (i.e.
  2278. distance to nearest non-peak area).
  2279. \end_layout
  2280. \end_inset
  2281. \end_layout
  2282. \begin_layout Plain Layout
  2283. \begin_inset Flex TODO Note (inline)
  2284. status open
  2285. \begin_layout Plain Layout
  2286. The H3K4 part of this figure is included in
  2287. \begin_inset CommandInset citation
  2288. LatexCommand cite
  2289. key "LaMere2016"
  2290. literal "false"
  2291. \end_inset
  2292. as Fig.
  2293. S2.
  2294. Do I need to do anything about that?
  2295. \end_layout
  2296. \end_inset
  2297. \end_layout
  2298. \begin_layout Plain Layout
  2299. \align center
  2300. \begin_inset Graphics
  2301. filename graphics/CD4-csaw/Promoter Peak Distance Profile-PAGE1-CROP.pdf
  2302. lyxscale 50
  2303. width 80col%
  2304. \end_inset
  2305. \end_layout
  2306. \begin_layout Plain Layout
  2307. \begin_inset Caption Standard
  2308. \begin_layout Plain Layout
  2309. \series bold
  2310. \begin_inset CommandInset label
  2311. LatexCommand label
  2312. name "fig:near-promoter-peak-enrich"
  2313. \end_inset
  2314. Enrichment of peaks in promoter neighborhoods.
  2315. \series default
  2316. This plot shows the distribution of distances from each annotated transcription
  2317. start site in the genome to the nearest called peak.
  2318. Each line represents one combination of histone mark, cell type, and time
  2319. point.
  2320. Distributions are smoothed using kernel density estimation [CITE?].
  2321. Transcription start sites that occur
  2322. \emph on
  2323. within
  2324. \emph default
  2325. peaks were excluded from this plot to avoid a large spike at zero that
  2326. would overshadow the rest of the distribution.
  2327. \end_layout
  2328. \end_inset
  2329. \end_layout
  2330. \end_inset
  2331. \end_layout
  2332. \begin_layout Standard
  2333. \begin_inset Float table
  2334. wide false
  2335. sideways false
  2336. status open
  2337. \begin_layout Plain Layout
  2338. \align center
  2339. \begin_inset Tabular
  2340. <lyxtabular version="3" rows="4" columns="2">
  2341. <features tabularvalignment="middle">
  2342. <column alignment="center" valignment="top">
  2343. <column alignment="center" valignment="top">
  2344. <row>
  2345. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2346. \begin_inset Text
  2347. \begin_layout Plain Layout
  2348. Histone mark
  2349. \end_layout
  2350. \end_inset
  2351. </cell>
  2352. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2353. \begin_inset Text
  2354. \begin_layout Plain Layout
  2355. Effective promoter radius
  2356. \end_layout
  2357. \end_inset
  2358. </cell>
  2359. </row>
  2360. <row>
  2361. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2362. \begin_inset Text
  2363. \begin_layout Plain Layout
  2364. H3K4me2
  2365. \end_layout
  2366. \end_inset
  2367. </cell>
  2368. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2369. \begin_inset Text
  2370. \begin_layout Plain Layout
  2371. 1 kb
  2372. \end_layout
  2373. \end_inset
  2374. </cell>
  2375. </row>
  2376. <row>
  2377. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2378. \begin_inset Text
  2379. \begin_layout Plain Layout
  2380. H3K4me3
  2381. \end_layout
  2382. \end_inset
  2383. </cell>
  2384. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2385. \begin_inset Text
  2386. \begin_layout Plain Layout
  2387. 1 kb
  2388. \end_layout
  2389. \end_inset
  2390. </cell>
  2391. </row>
  2392. <row>
  2393. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2394. \begin_inset Text
  2395. \begin_layout Plain Layout
  2396. H3K27me3
  2397. \end_layout
  2398. \end_inset
  2399. </cell>
  2400. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2401. \begin_inset Text
  2402. \begin_layout Plain Layout
  2403. 2.5 kb
  2404. \end_layout
  2405. \end_inset
  2406. </cell>
  2407. </row>
  2408. </lyxtabular>
  2409. \end_inset
  2410. \end_layout
  2411. \begin_layout Plain Layout
  2412. \begin_inset Caption Standard
  2413. \begin_layout Plain Layout
  2414. \series bold
  2415. \begin_inset CommandInset label
  2416. LatexCommand label
  2417. name "tab:effective-promoter-radius"
  2418. \end_inset
  2419. Effective promoter radius for each histone mark.
  2420. \series default
  2421. These values represent the approximate distance from transcription start
  2422. site positions within which an excess of peaks are found, as shown in Figure
  2423. \begin_inset CommandInset ref
  2424. LatexCommand ref
  2425. reference "fig:near-promoter-peak-enrich"
  2426. plural "false"
  2427. caps "false"
  2428. noprefix "false"
  2429. \end_inset
  2430. .
  2431. \end_layout
  2432. \end_inset
  2433. \end_layout
  2434. \begin_layout Plain Layout
  2435. \end_layout
  2436. \end_inset
  2437. \end_layout
  2438. \begin_layout Standard
  2439. All 3 histone marks tend to occur more often near promoter regions, as shown
  2440. in Figure
  2441. \begin_inset CommandInset ref
  2442. LatexCommand ref
  2443. reference "fig:near-promoter-peak-enrich"
  2444. plural "false"
  2445. caps "false"
  2446. noprefix "false"
  2447. \end_inset
  2448. .
  2449. The majority of each density distribution is flat, representing the background
  2450. density of peaks genome-wide.
  2451. Each distribution has a peak near zero, representing an enrichment of peaks
  2452. close transcription start site (TSS) positions relative to the remainder
  2453. of the genome.
  2454. Interestingly, the
  2455. \begin_inset Quotes eld
  2456. \end_inset
  2457. radius
  2458. \begin_inset Quotes erd
  2459. \end_inset
  2460. within which this enrichment occurs is not the same for every histone mark
  2461. (Table
  2462. \begin_inset CommandInset ref
  2463. LatexCommand ref
  2464. reference "tab:effective-promoter-radius"
  2465. plural "false"
  2466. caps "false"
  2467. noprefix "false"
  2468. \end_inset
  2469. ).
  2470. For H3K4me2 and H3K4me3, peaks are most enriched within 1
  2471. \begin_inset space ~
  2472. \end_inset
  2473. kbp of TSS positions, while for H3K27me3, enrichment is broader, extending
  2474. to 2.5
  2475. \begin_inset space ~
  2476. \end_inset
  2477. kbp.
  2478. These
  2479. \begin_inset Quotes eld
  2480. \end_inset
  2481. effective promoter radii
  2482. \begin_inset Quotes erd
  2483. \end_inset
  2484. were used to define the promoter regions for all further analyses.
  2485. \end_layout
  2486. \begin_layout Standard
  2487. \begin_inset Flex TODO Note (inline)
  2488. status open
  2489. \begin_layout Plain Layout
  2490. Clarify that radius depends on histone mark but
  2491. \emph on
  2492. not
  2493. \emph default
  2494. experimental condition.
  2495. \end_layout
  2496. \end_inset
  2497. \end_layout
  2498. \begin_layout Standard
  2499. \begin_inset Flex TODO Note (inline)
  2500. status open
  2501. \begin_layout Plain Layout
  2502. Consider also showing figure for distance to nearest peak center, and reference
  2503. median peak size once that is known.
  2504. \end_layout
  2505. \end_inset
  2506. \end_layout
  2507. \begin_layout Subsection
  2508. H3K4 and H3K27 promoter methylation has broadly the expected correlation
  2509. with gene expression
  2510. \end_layout
  2511. \begin_layout Standard
  2512. \begin_inset Float figure
  2513. wide false
  2514. sideways false
  2515. status open
  2516. \begin_layout Plain Layout
  2517. \align center
  2518. \begin_inset Graphics
  2519. filename graphics/CD4-csaw/FPKM by Peak Violin Plots-CROP.pdf
  2520. lyxscale 50
  2521. width 100col%
  2522. \end_inset
  2523. \end_layout
  2524. \begin_layout Plain Layout
  2525. \begin_inset Caption Standard
  2526. \begin_layout Plain Layout
  2527. \series bold
  2528. \begin_inset CommandInset label
  2529. LatexCommand label
  2530. name "fig:fpkm-by-peak"
  2531. \end_inset
  2532. Expression distributions of genes with and without promoter peaks.
  2533. \end_layout
  2534. \end_inset
  2535. \end_layout
  2536. \begin_layout Plain Layout
  2537. \end_layout
  2538. \end_inset
  2539. \end_layout
  2540. \begin_layout Itemize
  2541. H3K4 is correlated with higher expression, and H3K27 is correlated with
  2542. lower expression genome-wide
  2543. \end_layout
  2544. \begin_layout Standard
  2545. \begin_inset Flex TODO Note (inline)
  2546. status open
  2547. \begin_layout Plain Layout
  2548. Grr, gotta find these figures.
  2549. Maybe in the old analysis? At least one of these plots is definitely in
  2550. Sarah's paper.
  2551. \end_layout
  2552. \end_inset
  2553. \end_layout
  2554. \begin_layout Itemize
  2555. Figures showing these correlations: box/violin plots of expression distributions
  2556. with every combination of peak presence/absence in promoter
  2557. \end_layout
  2558. \begin_layout Itemize
  2559. Appropriate statistical tests showing significant differences in expected
  2560. directions
  2561. \end_layout
  2562. \begin_layout Subsection
  2563. RNA-seq and H3K4 methylation patterns in naive and memory show convergence
  2564. at day 14
  2565. \end_layout
  2566. \begin_layout Standard
  2567. \end_layout
  2568. \begin_layout Standard
  2569. \begin_inset ERT
  2570. status open
  2571. \begin_layout Plain Layout
  2572. \backslash
  2573. afterpage{
  2574. \end_layout
  2575. \begin_layout Plain Layout
  2576. \backslash
  2577. begin{landscape}
  2578. \end_layout
  2579. \end_inset
  2580. \end_layout
  2581. \begin_layout Standard
  2582. \begin_inset Float table
  2583. wide false
  2584. sideways false
  2585. status collapsed
  2586. \begin_layout Plain Layout
  2587. \align center
  2588. \begin_inset Tabular
  2589. <lyxtabular version="3" rows="6" columns="7">
  2590. <features tabularvalignment="middle">
  2591. <column alignment="center" valignment="top">
  2592. <column alignment="center" valignment="top">
  2593. <column alignment="center" valignment="top">
  2594. <column alignment="center" valignment="top">
  2595. <column alignment="center" valignment="top">
  2596. <column alignment="center" valignment="top">
  2597. <column alignment="center" valignment="top">
  2598. <row>
  2599. <cell alignment="center" valignment="top" usebox="none">
  2600. \begin_inset Text
  2601. \begin_layout Plain Layout
  2602. \end_layout
  2603. \end_inset
  2604. </cell>
  2605. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2606. \begin_inset Text
  2607. \begin_layout Plain Layout
  2608. Number of significant promoters
  2609. \end_layout
  2610. \end_inset
  2611. </cell>
  2612. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2613. \begin_inset Text
  2614. \begin_layout Plain Layout
  2615. \end_layout
  2616. \end_inset
  2617. </cell>
  2618. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2619. \begin_inset Text
  2620. \begin_layout Plain Layout
  2621. \end_layout
  2622. \end_inset
  2623. </cell>
  2624. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2625. \begin_inset Text
  2626. \begin_layout Plain Layout
  2627. Est.
  2628. differentially modified promoters
  2629. \end_layout
  2630. \end_inset
  2631. </cell>
  2632. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2633. \begin_inset Text
  2634. \begin_layout Plain Layout
  2635. \end_layout
  2636. \end_inset
  2637. </cell>
  2638. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2639. \begin_inset Text
  2640. \begin_layout Plain Layout
  2641. \end_layout
  2642. \end_inset
  2643. </cell>
  2644. </row>
  2645. <row>
  2646. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2647. \begin_inset Text
  2648. \begin_layout Plain Layout
  2649. Time Point
  2650. \end_layout
  2651. \end_inset
  2652. </cell>
  2653. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2654. \begin_inset Text
  2655. \begin_layout Plain Layout
  2656. H3K4me2
  2657. \end_layout
  2658. \end_inset
  2659. </cell>
  2660. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2661. \begin_inset Text
  2662. \begin_layout Plain Layout
  2663. H3K4me3
  2664. \end_layout
  2665. \end_inset
  2666. </cell>
  2667. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2668. \begin_inset Text
  2669. \begin_layout Plain Layout
  2670. H3K27me3
  2671. \end_layout
  2672. \end_inset
  2673. </cell>
  2674. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2675. \begin_inset Text
  2676. \begin_layout Plain Layout
  2677. H3K4me2
  2678. \end_layout
  2679. \end_inset
  2680. </cell>
  2681. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2682. \begin_inset Text
  2683. \begin_layout Plain Layout
  2684. H3K4me3
  2685. \end_layout
  2686. \end_inset
  2687. </cell>
  2688. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2689. \begin_inset Text
  2690. \begin_layout Plain Layout
  2691. H3K27me3
  2692. \end_layout
  2693. \end_inset
  2694. </cell>
  2695. </row>
  2696. <row>
  2697. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2698. \begin_inset Text
  2699. \begin_layout Plain Layout
  2700. Day 0
  2701. \end_layout
  2702. \end_inset
  2703. </cell>
  2704. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2705. \begin_inset Text
  2706. \begin_layout Plain Layout
  2707. 4553
  2708. \end_layout
  2709. \end_inset
  2710. </cell>
  2711. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2712. \begin_inset Text
  2713. \begin_layout Plain Layout
  2714. 927
  2715. \end_layout
  2716. \end_inset
  2717. </cell>
  2718. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2719. \begin_inset Text
  2720. \begin_layout Plain Layout
  2721. 6
  2722. \end_layout
  2723. \end_inset
  2724. </cell>
  2725. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2726. \begin_inset Text
  2727. \begin_layout Plain Layout
  2728. 9967
  2729. \end_layout
  2730. \end_inset
  2731. </cell>
  2732. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2733. \begin_inset Text
  2734. \begin_layout Plain Layout
  2735. 4149
  2736. \end_layout
  2737. \end_inset
  2738. </cell>
  2739. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2740. \begin_inset Text
  2741. \begin_layout Plain Layout
  2742. 2404
  2743. \end_layout
  2744. \end_inset
  2745. </cell>
  2746. </row>
  2747. <row>
  2748. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2749. \begin_inset Text
  2750. \begin_layout Plain Layout
  2751. Day 1
  2752. \end_layout
  2753. \end_inset
  2754. </cell>
  2755. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2756. \begin_inset Text
  2757. \begin_layout Plain Layout
  2758. 567
  2759. \end_layout
  2760. \end_inset
  2761. </cell>
  2762. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2763. \begin_inset Text
  2764. \begin_layout Plain Layout
  2765. 278
  2766. \end_layout
  2767. \end_inset
  2768. </cell>
  2769. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2770. \begin_inset Text
  2771. \begin_layout Plain Layout
  2772. 1570
  2773. \end_layout
  2774. \end_inset
  2775. </cell>
  2776. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2777. \begin_inset Text
  2778. \begin_layout Plain Layout
  2779. 4370
  2780. \end_layout
  2781. \end_inset
  2782. </cell>
  2783. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2784. \begin_inset Text
  2785. \begin_layout Plain Layout
  2786. 2145
  2787. \end_layout
  2788. \end_inset
  2789. </cell>
  2790. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2791. \begin_inset Text
  2792. \begin_layout Plain Layout
  2793. 6598
  2794. \end_layout
  2795. \end_inset
  2796. </cell>
  2797. </row>
  2798. <row>
  2799. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2800. \begin_inset Text
  2801. \begin_layout Plain Layout
  2802. Day 5
  2803. \end_layout
  2804. \end_inset
  2805. </cell>
  2806. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2807. \begin_inset Text
  2808. \begin_layout Plain Layout
  2809. 2313
  2810. \end_layout
  2811. \end_inset
  2812. </cell>
  2813. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2814. \begin_inset Text
  2815. \begin_layout Plain Layout
  2816. 139
  2817. \end_layout
  2818. \end_inset
  2819. </cell>
  2820. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2821. \begin_inset Text
  2822. \begin_layout Plain Layout
  2823. 490
  2824. \end_layout
  2825. \end_inset
  2826. </cell>
  2827. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2828. \begin_inset Text
  2829. \begin_layout Plain Layout
  2830. 9450
  2831. \end_layout
  2832. \end_inset
  2833. </cell>
  2834. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  2835. \begin_inset Text
  2836. \begin_layout Plain Layout
  2837. 1148
  2838. \end_layout
  2839. \end_inset
  2840. </cell>
  2841. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  2842. \begin_inset Text
  2843. \begin_layout Plain Layout
  2844. 4141
  2845. \end_layout
  2846. \end_inset
  2847. </cell>
  2848. </row>
  2849. <row>
  2850. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2851. \begin_inset Text
  2852. \begin_layout Plain Layout
  2853. Day 14
  2854. \end_layout
  2855. \end_inset
  2856. </cell>
  2857. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2858. \begin_inset Text
  2859. \begin_layout Plain Layout
  2860. 0
  2861. \end_layout
  2862. \end_inset
  2863. </cell>
  2864. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2865. \begin_inset Text
  2866. \begin_layout Plain Layout
  2867. 0
  2868. \end_layout
  2869. \end_inset
  2870. </cell>
  2871. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2872. \begin_inset Text
  2873. \begin_layout Plain Layout
  2874. 0
  2875. \end_layout
  2876. \end_inset
  2877. </cell>
  2878. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2879. \begin_inset Text
  2880. \begin_layout Plain Layout
  2881. 0
  2882. \end_layout
  2883. \end_inset
  2884. </cell>
  2885. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  2886. \begin_inset Text
  2887. \begin_layout Plain Layout
  2888. 0
  2889. \end_layout
  2890. \end_inset
  2891. </cell>
  2892. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  2893. \begin_inset Text
  2894. \begin_layout Plain Layout
  2895. 0
  2896. \end_layout
  2897. \end_inset
  2898. </cell>
  2899. </row>
  2900. </lyxtabular>
  2901. \end_inset
  2902. \end_layout
  2903. \begin_layout Plain Layout
  2904. \begin_inset Caption Standard
  2905. \begin_layout Plain Layout
  2906. \series bold
  2907. \begin_inset CommandInset label
  2908. LatexCommand label
  2909. name "tab:Number-signif-promoters"
  2910. \end_inset
  2911. Number of differentially modified promoters between naive and memory cells
  2912. at each time point after activation.
  2913. \series default
  2914. This table shows both the number of differentially modified promoters detected
  2915. at a 10% FDR threshold (left half), and the total number of differentially
  2916. modified promoters as estimated using the method of
  2917. \begin_inset CommandInset citation
  2918. LatexCommand cite
  2919. key "Phipson2013"
  2920. literal "false"
  2921. \end_inset
  2922. (right half).
  2923. \end_layout
  2924. \end_inset
  2925. \end_layout
  2926. \end_inset
  2927. \end_layout
  2928. \begin_layout Standard
  2929. \begin_inset ERT
  2930. status open
  2931. \begin_layout Plain Layout
  2932. \backslash
  2933. end{landscape}
  2934. \end_layout
  2935. \begin_layout Plain Layout
  2936. }
  2937. \end_layout
  2938. \end_inset
  2939. \end_layout
  2940. \begin_layout Standard
  2941. \begin_inset Float figure
  2942. placement p
  2943. wide false
  2944. sideways false
  2945. status open
  2946. \begin_layout Plain Layout
  2947. \align center
  2948. \begin_inset Float figure
  2949. wide false
  2950. sideways false
  2951. status collapsed
  2952. \begin_layout Plain Layout
  2953. \align center
  2954. \begin_inset Graphics
  2955. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-promoter-PCA-group-CROP.png
  2956. lyxscale 25
  2957. width 45col%
  2958. groupId pcoa-prom-subfig
  2959. \end_inset
  2960. \end_layout
  2961. \begin_layout Plain Layout
  2962. \begin_inset Caption Standard
  2963. \begin_layout Plain Layout
  2964. \series bold
  2965. \begin_inset CommandInset label
  2966. LatexCommand label
  2967. name "fig:PCoA-H3K4me2-prom"
  2968. \end_inset
  2969. PCoA plot of H3K4me2 promoters, after subtracting surrogate variables
  2970. \end_layout
  2971. \end_inset
  2972. \end_layout
  2973. \end_inset
  2974. \begin_inset space \hfill{}
  2975. \end_inset
  2976. \begin_inset Float figure
  2977. wide false
  2978. sideways false
  2979. status collapsed
  2980. \begin_layout Plain Layout
  2981. \align center
  2982. \begin_inset Graphics
  2983. filename graphics/CD4-csaw/ChIP-seq/H3K4me3-promoter-PCA-group-CROP.png
  2984. lyxscale 25
  2985. width 45col%
  2986. groupId pcoa-prom-subfig
  2987. \end_inset
  2988. \end_layout
  2989. \begin_layout Plain Layout
  2990. \begin_inset Caption Standard
  2991. \begin_layout Plain Layout
  2992. \series bold
  2993. \begin_inset CommandInset label
  2994. LatexCommand label
  2995. name "fig:PCoA-H3K4me3-prom"
  2996. \end_inset
  2997. PCoA plot of H3K4me3 promoters, after subtracting surrogate variables
  2998. \end_layout
  2999. \end_inset
  3000. \end_layout
  3001. \end_inset
  3002. \end_layout
  3003. \begin_layout Plain Layout
  3004. \align center
  3005. \begin_inset Float figure
  3006. wide false
  3007. sideways false
  3008. status collapsed
  3009. \begin_layout Plain Layout
  3010. \align center
  3011. \begin_inset Graphics
  3012. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-promoter-PCA-group-CROP.png
  3013. lyxscale 25
  3014. width 45col%
  3015. groupId pcoa-prom-subfig
  3016. \end_inset
  3017. \end_layout
  3018. \begin_layout Plain Layout
  3019. \begin_inset Caption Standard
  3020. \begin_layout Plain Layout
  3021. \series bold
  3022. \begin_inset CommandInset label
  3023. LatexCommand label
  3024. name "fig:PCoA-H3K27me3-prom"
  3025. \end_inset
  3026. PCoA plot of H3K27me3 promoters, after subtracting surrogate variables
  3027. \end_layout
  3028. \end_inset
  3029. \end_layout
  3030. \end_inset
  3031. \begin_inset space \hfill{}
  3032. \end_inset
  3033. \begin_inset Float figure
  3034. wide false
  3035. sideways false
  3036. status collapsed
  3037. \begin_layout Plain Layout
  3038. \align center
  3039. \begin_inset Graphics
  3040. filename graphics/CD4-csaw/RNA-seq/PCA-final-23-CROP.png
  3041. lyxscale 25
  3042. width 45col%
  3043. groupId pcoa-prom-subfig
  3044. \end_inset
  3045. \end_layout
  3046. \begin_layout Plain Layout
  3047. \begin_inset Caption Standard
  3048. \begin_layout Plain Layout
  3049. \series bold
  3050. \begin_inset CommandInset label
  3051. LatexCommand label
  3052. name "fig:RNA-PCA-group"
  3053. \end_inset
  3054. RNA-seq PCoA showing principal coordiantes 2 and 3.
  3055. \end_layout
  3056. \end_inset
  3057. \end_layout
  3058. \end_inset
  3059. \end_layout
  3060. \begin_layout Plain Layout
  3061. \begin_inset Caption Standard
  3062. \begin_layout Plain Layout
  3063. \series bold
  3064. \begin_inset CommandInset label
  3065. LatexCommand label
  3066. name "fig:PCoA-promoters"
  3067. \end_inset
  3068. PCoA plots for promoter ChIP-seq and expression RNA-seq data
  3069. \end_layout
  3070. \end_inset
  3071. \end_layout
  3072. \end_inset
  3073. \end_layout
  3074. \begin_layout Standard
  3075. \begin_inset Flex TODO Note (inline)
  3076. status open
  3077. \begin_layout Plain Layout
  3078. Check up on figure refs in this paragraph
  3079. \end_layout
  3080. \end_inset
  3081. \end_layout
  3082. \begin_layout Standard
  3083. Figure
  3084. \begin_inset CommandInset ref
  3085. LatexCommand ref
  3086. reference "fig:PCoA-promoters"
  3087. plural "false"
  3088. caps "false"
  3089. noprefix "false"
  3090. \end_inset
  3091. shows the patterns of variation in all 3 histone marks in the promoter
  3092. regions of the genome using principal coordinate analysis.
  3093. All 3 marks show a noticeable convergence between the naive and memory
  3094. samples at day 14, visible as an overlapping of the day 14 groups on each
  3095. plot.
  3096. This is consistent with the counts of significantly differentially modified
  3097. promoters and estimates of the total numbers of differentially modified
  3098. promoters shown in Table
  3099. \begin_inset CommandInset ref
  3100. LatexCommand ref
  3101. reference "tab:Number-signif-promoters"
  3102. plural "false"
  3103. caps "false"
  3104. noprefix "false"
  3105. \end_inset
  3106. .
  3107. For all histone marks, evidence of differential modification between naive
  3108. and memory samples was detected at every time point except day 14.
  3109. The day 14 convergence pattern is also present in the RNA-seq data (Figure
  3110. \begin_inset CommandInset ref
  3111. LatexCommand ref
  3112. reference "fig:RNA-PCA-group"
  3113. plural "false"
  3114. caps "false"
  3115. noprefix "false"
  3116. \end_inset
  3117. ), albiet in the 2nd and 3rd principal coordinates, indicating that it is
  3118. not the most dominant pattern driving gene expression.
  3119. Taken together, the data show that promoter histone methylation for these
  3120. 3 histone marks and RNA expression for naive and memory cells are most
  3121. similar at day 14, the furthest time point after activation.
  3122. MOFA was also able to capture this day 14 convergence pattern in latent
  3123. factor 5 (Figure
  3124. \begin_inset CommandInset ref
  3125. LatexCommand ref
  3126. reference "fig:mofa-lf-scatter"
  3127. plural "false"
  3128. caps "false"
  3129. noprefix "false"
  3130. \end_inset
  3131. ), which accounts for shared variation across all 3 histone marks and the
  3132. RNA-seq data, confirming that this is a coordinated pattern across all
  3133. 4 data sets.
  3134. \end_layout
  3135. \begin_layout Subsection
  3136. Effect of promoter coverage upstream vs downstream of TSS
  3137. \end_layout
  3138. \begin_layout Standard
  3139. \begin_inset Flex TODO Note (inline)
  3140. status open
  3141. \begin_layout Plain Layout
  3142. For the figures in this section, the group labels are arbitrary, so if time
  3143. allows, it would be good to manually reorder them in a logical way, e.g.
  3144. most upstream to most downstream.
  3145. \end_layout
  3146. \end_inset
  3147. \end_layout
  3148. \begin_layout Standard
  3149. \begin_inset ERT
  3150. status open
  3151. \begin_layout Plain Layout
  3152. \backslash
  3153. afterpage{
  3154. \end_layout
  3155. \begin_layout Plain Layout
  3156. \backslash
  3157. begin{landscape}
  3158. \end_layout
  3159. \end_inset
  3160. \end_layout
  3161. \begin_layout Standard
  3162. \begin_inset Float figure
  3163. wide false
  3164. sideways false
  3165. status collapsed
  3166. \begin_layout Plain Layout
  3167. \align center
  3168. \begin_inset Float figure
  3169. wide false
  3170. sideways false
  3171. status open
  3172. \begin_layout Plain Layout
  3173. \align center
  3174. \begin_inset Graphics
  3175. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-clusters-CROP.png
  3176. lyxscale 25
  3177. width 30col%
  3178. groupId covprof-subfig
  3179. \end_inset
  3180. \end_layout
  3181. \begin_layout Plain Layout
  3182. \begin_inset Caption Standard
  3183. \begin_layout Plain Layout
  3184. \series bold
  3185. \begin_inset CommandInset label
  3186. LatexCommand label
  3187. name "fig:H3K4me2-neighborhood-clusters"
  3188. \end_inset
  3189. Average relative coverage for each bin in each cluster
  3190. \end_layout
  3191. \end_inset
  3192. \end_layout
  3193. \end_inset
  3194. \begin_inset space \hfill{}
  3195. \end_inset
  3196. \begin_inset Float figure
  3197. wide false
  3198. sideways false
  3199. status collapsed
  3200. \begin_layout Plain Layout
  3201. \align center
  3202. \begin_inset Graphics
  3203. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-PCA-CROP.png
  3204. lyxscale 25
  3205. width 30col%
  3206. groupId covprof-subfig
  3207. \end_inset
  3208. \end_layout
  3209. \begin_layout Plain Layout
  3210. \begin_inset Caption Standard
  3211. \begin_layout Plain Layout
  3212. \series bold
  3213. \begin_inset CommandInset label
  3214. LatexCommand label
  3215. name "fig:H3K4me2-neighborhood-pca"
  3216. \end_inset
  3217. PCA of relative coverage depth, colored by K-means cluster membership.
  3218. \end_layout
  3219. \end_inset
  3220. \end_layout
  3221. \end_inset
  3222. \begin_inset space \hfill{}
  3223. \end_inset
  3224. \begin_inset Float figure
  3225. wide false
  3226. sideways false
  3227. status collapsed
  3228. \begin_layout Plain Layout
  3229. \align center
  3230. \begin_inset Graphics
  3231. filename graphics/CD4-csaw/ChIP-seq/H3K4me2-neighborhood-expression-CROP.png
  3232. lyxscale 25
  3233. width 30col%
  3234. groupId covprof-subfig
  3235. \end_inset
  3236. \end_layout
  3237. \begin_layout Plain Layout
  3238. \begin_inset Caption Standard
  3239. \begin_layout Plain Layout
  3240. \series bold
  3241. \begin_inset CommandInset label
  3242. LatexCommand label
  3243. name "fig:H3K4me2-neighborhood-expression"
  3244. \end_inset
  3245. Gene expression grouped by promoter coverage clusters.
  3246. \end_layout
  3247. \end_inset
  3248. \end_layout
  3249. \end_inset
  3250. \end_layout
  3251. \begin_layout Plain Layout
  3252. \begin_inset Caption Standard
  3253. \begin_layout Plain Layout
  3254. \series bold
  3255. K-means clustering of promoter H3K4me2 relative coverage depth in naive
  3256. day 0 samples.
  3257. \series default
  3258. H3K4me2 ChIP-seq reads were binned into 500-bp windows tiled across each
  3259. promoter from 5
  3260. \begin_inset space ~
  3261. \end_inset
  3262. kbp upstream to 5
  3263. \begin_inset space ~
  3264. \end_inset
  3265. kbp downstream, and the logCPM values were normalized within each promoter
  3266. to an average of 0, yielding relative coverage depths.
  3267. These were then grouped using K-means clustering with
  3268. \begin_inset Formula $K=6$
  3269. \end_inset
  3270. ,
  3271. \series bold
  3272. \series default
  3273. and the average bin values were plotted for each cluster (a).
  3274. The
  3275. \begin_inset Formula $x$
  3276. \end_inset
  3277. -axis is the genomic coordinate of each bin relative to the the transcription
  3278. start site, and the
  3279. \begin_inset Formula $y$
  3280. \end_inset
  3281. -axis is the mean relative coverage depth of that bin across all promoters
  3282. in the cluster.
  3283. Each line represents the average
  3284. \begin_inset Quotes eld
  3285. \end_inset
  3286. shape
  3287. \begin_inset Quotes erd
  3288. \end_inset
  3289. of the promoter coverage for promoters in that cluster.
  3290. PCA was performed on the same data, and the first two principal components
  3291. were plotted, coloring each point by its K-means cluster identity (b).
  3292. For each cluster, the distribution of gene expression values was plotted
  3293. (c).
  3294. \end_layout
  3295. \end_inset
  3296. \end_layout
  3297. \end_inset
  3298. \end_layout
  3299. \begin_layout Standard
  3300. \begin_inset Float figure
  3301. wide false
  3302. sideways false
  3303. status collapsed
  3304. \begin_layout Plain Layout
  3305. \align center
  3306. \begin_inset Float figure
  3307. wide false
  3308. sideways false
  3309. status collapsed
  3310. \begin_layout Plain Layout
  3311. \align center
  3312. \begin_inset Graphics
  3313. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-clusters-CROP.png
  3314. lyxscale 25
  3315. width 30col%
  3316. groupId covprof-subfig
  3317. \end_inset
  3318. \end_layout
  3319. \begin_layout Plain Layout
  3320. \begin_inset Caption Standard
  3321. \begin_layout Plain Layout
  3322. \series bold
  3323. \begin_inset CommandInset label
  3324. LatexCommand label
  3325. name "fig:H3K27me3-neighborhood-clusters"
  3326. \end_inset
  3327. Average relative coverage for each bin in each cluster
  3328. \end_layout
  3329. \end_inset
  3330. \end_layout
  3331. \end_inset
  3332. \begin_inset space \hfill{}
  3333. \end_inset
  3334. \begin_inset Float figure
  3335. wide false
  3336. sideways false
  3337. status collapsed
  3338. \begin_layout Plain Layout
  3339. \align center
  3340. \begin_inset Graphics
  3341. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-PCA-CROP.png
  3342. lyxscale 25
  3343. width 30col%
  3344. groupId covprof-subfig
  3345. \end_inset
  3346. \end_layout
  3347. \begin_layout Plain Layout
  3348. \begin_inset Caption Standard
  3349. \begin_layout Plain Layout
  3350. \series bold
  3351. \begin_inset CommandInset label
  3352. LatexCommand label
  3353. name "fig:H3K27me3-neighborhood-pca"
  3354. \end_inset
  3355. PCA of relative coverage depth, colored by K-means cluster membership.
  3356. \end_layout
  3357. \end_inset
  3358. \end_layout
  3359. \end_inset
  3360. \begin_inset space \hfill{}
  3361. \end_inset
  3362. \begin_inset Float figure
  3363. wide false
  3364. sideways false
  3365. status collapsed
  3366. \begin_layout Plain Layout
  3367. \align center
  3368. \begin_inset Graphics
  3369. filename graphics/CD4-csaw/ChIP-seq/H3K27me3-neighborhood-expression-CROP.png
  3370. lyxscale 25
  3371. width 30col%
  3372. groupId covprof-subfig
  3373. \end_inset
  3374. \end_layout
  3375. \begin_layout Plain Layout
  3376. \begin_inset Caption Standard
  3377. \begin_layout Plain Layout
  3378. \series bold
  3379. \begin_inset CommandInset label
  3380. LatexCommand label
  3381. name "fig:H3K27me3-neighborhood-expression"
  3382. \end_inset
  3383. Gene expression grouped by promoter coverage clusters.
  3384. \end_layout
  3385. \end_inset
  3386. \end_layout
  3387. \end_inset
  3388. \end_layout
  3389. \begin_layout Plain Layout
  3390. \begin_inset Caption Standard
  3391. \begin_layout Plain Layout
  3392. \series bold
  3393. K-means clustering of promoter H3K27me3 relative coverage depth in naive
  3394. day 0 samples.
  3395. \series default
  3396. H3K27me3 ChIP-seq reads were binned into 500-bp windows tiled across each
  3397. promoter from 5
  3398. \begin_inset space ~
  3399. \end_inset
  3400. kbp upstream to 5
  3401. \begin_inset space ~
  3402. \end_inset
  3403. kbp downstream, and the logCPM values were normalized within each promoter
  3404. to an average of 0, yielding relative coverage depths.
  3405. These were then grouped using K-means clustering with
  3406. \begin_inset Formula $K=6$
  3407. \end_inset
  3408. ,
  3409. \series bold
  3410. \series default
  3411. and the average bin values were plotted for each cluster (a).
  3412. The
  3413. \begin_inset Formula $x$
  3414. \end_inset
  3415. -axis is the genomic coordinate of each bin relative to the the transcription
  3416. start site, and the
  3417. \begin_inset Formula $y$
  3418. \end_inset
  3419. -axis is the mean relative coverage depth of that bin across all promoters
  3420. in the cluster.
  3421. Each line represents the average
  3422. \begin_inset Quotes eld
  3423. \end_inset
  3424. shape
  3425. \begin_inset Quotes erd
  3426. \end_inset
  3427. of the promoter coverage for promoters in that cluster.
  3428. PCA was performed on the same data, and the first two principal components
  3429. were plotted, coloring each point by its K-means cluster identity (b).
  3430. For each cluster, the distribution of gene expression values was plotted
  3431. (c).
  3432. \end_layout
  3433. \end_inset
  3434. \end_layout
  3435. \end_inset
  3436. \end_layout
  3437. \begin_layout Standard
  3438. \begin_inset ERT
  3439. status open
  3440. \begin_layout Plain Layout
  3441. \backslash
  3442. end{landscape}
  3443. \end_layout
  3444. \begin_layout Plain Layout
  3445. }
  3446. \end_layout
  3447. \end_inset
  3448. \end_layout
  3449. \begin_layout Itemize
  3450. H3K4me peaks seem to correlate with increased expression as long as they
  3451. are anywhere near the TSS
  3452. \end_layout
  3453. \begin_layout Itemize
  3454. H3K27me3 peaks can have different correlations to gene expression depending
  3455. on their position relative to TSS (e.g.
  3456. upstream vs downstream) Results consistent with
  3457. \begin_inset CommandInset citation
  3458. LatexCommand cite
  3459. key "Young2011"
  3460. literal "false"
  3461. \end_inset
  3462. \end_layout
  3463. \begin_layout Standard
  3464. \begin_inset Flex TODO Note (inline)
  3465. status open
  3466. \begin_layout Plain Layout
  3467. Show the figures where the negative result ended this line of inquiry
  3468. \end_layout
  3469. \end_inset
  3470. \end_layout
  3471. \begin_layout Section
  3472. Discussion
  3473. \end_layout
  3474. \begin_layout Subsection
  3475. Effective promoter radius
  3476. \end_layout
  3477. \begin_layout Itemize
  3478. "Promoter radius" is not constant and must be defined empirically for a
  3479. given data set.
  3480. Coverage within promoter radius has an expression correlation as well
  3481. \end_layout
  3482. \begin_layout Itemize
  3483. Further study required to demonstarte functional consequences of effective
  3484. promoter radius (e.g.
  3485. show diminished association with gene expression outside radius)
  3486. \end_layout
  3487. \begin_layout Subsection
  3488. Convergence
  3489. \end_layout
  3490. \begin_layout Standard
  3491. \begin_inset Flex TODO Note (inline)
  3492. status open
  3493. \begin_layout Plain Layout
  3494. Look up some more references for these histone marks being involved in memory
  3495. differentiation.
  3496. (Ask Sarah)
  3497. \end_layout
  3498. \end_inset
  3499. \end_layout
  3500. \begin_layout Itemize
  3501. Naive-to-memory convergence implies that naive cells are differentiating
  3502. into memory cells, and that gene expression and H3K4/K27 methylation are
  3503. involved in this differentiation
  3504. \end_layout
  3505. \begin_deeper
  3506. \begin_layout Itemize
  3507. Convergence is consistent with Lamere2016 fig 8
  3508. \begin_inset CommandInset citation
  3509. LatexCommand cite
  3510. key "LaMere2016"
  3511. literal "false"
  3512. \end_inset
  3513. (which was created without the benefit of SVA)
  3514. \end_layout
  3515. \begin_layout Itemize
  3516. H3K27me3, canonically regarded as a deactivating mark, seems to have a more
  3517. complex effect
  3518. \end_layout
  3519. \end_deeper
  3520. \begin_layout Standard
  3521. \begin_inset Float figure
  3522. wide false
  3523. sideways false
  3524. status open
  3525. \begin_layout Plain Layout
  3526. \begin_inset Flex TODO Note (inline)
  3527. status open
  3528. \begin_layout Plain Layout
  3529. This float should ideally go right after the section header, but doing so
  3530. crashes LaTeX.
  3531. \end_layout
  3532. \end_inset
  3533. \end_layout
  3534. \begin_layout Plain Layout
  3535. \align center
  3536. \begin_inset Graphics
  3537. filename graphics/CD4-csaw/LaMere2016_fig8.pdf
  3538. lyxscale 50
  3539. width 60col%
  3540. groupId colwidth
  3541. \end_inset
  3542. \end_layout
  3543. \begin_layout Plain Layout
  3544. \begin_inset Caption Standard
  3545. \begin_layout Plain Layout
  3546. \series bold
  3547. \begin_inset CommandInset label
  3548. LatexCommand label
  3549. name "fig:Lamere2016-Fig8"
  3550. \end_inset
  3551. Lamere 2016 Figure 8
  3552. \begin_inset CommandInset citation
  3553. LatexCommand cite
  3554. key "LaMere2016"
  3555. literal "false"
  3556. \end_inset
  3557. .
  3558. \series default
  3559. Reproduced with permission.
  3560. \end_layout
  3561. \end_inset
  3562. \end_layout
  3563. \end_inset
  3564. \end_layout
  3565. \begin_layout Subsection
  3566. Positional
  3567. \end_layout
  3568. \begin_layout Itemize
  3569. TSS positional coverage, hints of something interesting but no clear conclusions
  3570. \end_layout
  3571. \begin_layout Subsection
  3572. Workflow
  3573. \end_layout
  3574. \begin_layout Standard
  3575. \begin_inset ERT
  3576. status open
  3577. \begin_layout Plain Layout
  3578. \backslash
  3579. afterpage{
  3580. \end_layout
  3581. \begin_layout Plain Layout
  3582. \backslash
  3583. begin{landscape}
  3584. \end_layout
  3585. \end_inset
  3586. \end_layout
  3587. \begin_layout Standard
  3588. \begin_inset Float figure
  3589. wide false
  3590. sideways false
  3591. status open
  3592. \begin_layout Plain Layout
  3593. \align center
  3594. \begin_inset Graphics
  3595. filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
  3596. lyxscale 50
  3597. width 100col%
  3598. height 95theight%
  3599. \end_inset
  3600. \end_layout
  3601. \begin_layout Plain Layout
  3602. \begin_inset Caption Standard
  3603. \begin_layout Plain Layout
  3604. \begin_inset CommandInset label
  3605. LatexCommand label
  3606. name "fig:rulegraph"
  3607. \end_inset
  3608. \series bold
  3609. Dependency graph of steps in reproducible workflow
  3610. \end_layout
  3611. \end_inset
  3612. \end_layout
  3613. \end_inset
  3614. \end_layout
  3615. \begin_layout Standard
  3616. \begin_inset ERT
  3617. status open
  3618. \begin_layout Plain Layout
  3619. \backslash
  3620. end{landscape}
  3621. \end_layout
  3622. \begin_layout Plain Layout
  3623. }
  3624. \end_layout
  3625. \end_inset
  3626. \end_layout
  3627. \begin_layout Itemize
  3628. Discuss advantages of developing using a reproducible workflow
  3629. \end_layout
  3630. \begin_deeper
  3631. \begin_layout Itemize
  3632. Decision-making based on trying every option and running the workflow downstream
  3633. to see the effects
  3634. \end_layout
  3635. \end_deeper
  3636. \begin_layout Subsection
  3637. Data quality issues limit conclusions
  3638. \end_layout
  3639. \begin_layout Chapter
  3640. Improving array-based diagnostics for transplant rejection by optimizing
  3641. data preprocessing
  3642. \end_layout
  3643. \begin_layout Standard
  3644. \begin_inset Note Note
  3645. status open
  3646. \begin_layout Plain Layout
  3647. Chapter author list: Me, Sunil, Tom, Padma, Dan
  3648. \end_layout
  3649. \end_inset
  3650. \end_layout
  3651. \begin_layout Section
  3652. Approach
  3653. \end_layout
  3654. \begin_layout Subsection
  3655. Proper pre-processing is essential for array data
  3656. \end_layout
  3657. \begin_layout Standard
  3658. \begin_inset Flex TODO Note (inline)
  3659. status open
  3660. \begin_layout Plain Layout
  3661. This section could probably use some citations
  3662. \end_layout
  3663. \end_inset
  3664. \end_layout
  3665. \begin_layout Standard
  3666. Microarrays, bead arrays, and similar assays produce raw data in the form
  3667. of fluorescence intensity measurements, with the each intensity measurement
  3668. proportional to the abundance of some fluorescently-labelled target DNA
  3669. or RNA sequence that base pairs to a specific probe sequence.
  3670. However, these measurements for each probe are also affected my many technical
  3671. confounding factors, such as the concentration of target material, strength
  3672. of off-target binding, and the sensitivity of the imaging sensor.
  3673. Some array designs also use multiple probe sequences for each target.
  3674. Hence, extensive pre-processing of array data is necessary to normalize
  3675. out the effects of these technical factors and summarize the information
  3676. from multiple probes to arrive at a single usable estimate of abundance
  3677. or other relevant quantity, such as a ratio of two abundances, for each
  3678. target.
  3679. \end_layout
  3680. \begin_layout Standard
  3681. The choice of pre-processing algorithms used in the analysis of an array
  3682. data set can have a large effect on the results of that analysis.
  3683. However, despite their importance, these steps are often neglected or rushed
  3684. in order to get to the more scientifically interesting analysis steps involving
  3685. the actual biology of the system under study.
  3686. Hence, it is often possible to achieve substantial gains in statistical
  3687. power, model goodness-of-fit, or other relevant performance measures, by
  3688. checking the assumptions made by each preprocessing step and choosing specific
  3689. normalization methods tailored to the specific goals of the current analysis.
  3690. \end_layout
  3691. \begin_layout Subsection
  3692. Clinical diagnostic applications for microarrays require single-channel
  3693. normalization
  3694. \end_layout
  3695. \begin_layout Standard
  3696. As the cost of performing microarray assays falls, there is increasing interest
  3697. in using genomic assays for diagnostic purposes, such as distinguishing
  3698. healthy transplants (TX) from transplants undergoing acute rejection (AR)
  3699. or acute dysfunction with no rejection (ADNR).
  3700. However, the the standard normalization algorithm used for microarray data,
  3701. Robust Multi-chip Average (RMA)
  3702. \begin_inset CommandInset citation
  3703. LatexCommand cite
  3704. key "Irizarry2003a"
  3705. literal "false"
  3706. \end_inset
  3707. , is not applicable in a clinical setting.
  3708. Two of the steps in RMA, quantile normalization and probe summarization
  3709. by median polish, depend on every array in the data set being normalized.
  3710. This means that adding or removing any arrays from a data set changes the
  3711. normalized values for all arrays, and data sets that have been normalized
  3712. separately cannot be compared to each other.
  3713. Hence, when using RMA, any arrays to be analyzed together must also be
  3714. normalized together, and the set of arrays included in the data set must
  3715. be held constant throughout an analysis.
  3716. \end_layout
  3717. \begin_layout Standard
  3718. These limitations present serious impediments to the use of arrays as a
  3719. diagnostic tool.
  3720. When training a classifier, the samples to be classified must not be involved
  3721. in any step of the training process, lest their inclusion bias the training
  3722. process.
  3723. Once a classifier is deployed in a clinical setting, the samples to be
  3724. classified will not even
  3725. \emph on
  3726. exist
  3727. \emph default
  3728. at the time of training, so including them would be impossible even if
  3729. it were statistically justifiable.
  3730. Therefore, any machine learning application for microarrays demands that
  3731. the normalized expression values computed for an array must depend only
  3732. on information contained within that array.
  3733. This would ensure that each array's normalization is independent of every
  3734. other array, and that arrays normalized separately can still be compared
  3735. to each other without bias.
  3736. Such a normalization is commonly referred to as
  3737. \begin_inset Quotes eld
  3738. \end_inset
  3739. single-channel normalization
  3740. \begin_inset Quotes erd
  3741. \end_inset
  3742. .
  3743. \end_layout
  3744. \begin_layout Standard
  3745. Frozen RMA (fRMA) addresses these concerns by replacing the quantile normalizati
  3746. on and median polish with alternatives that do not introduce inter-array
  3747. dependence, allowing each array to be normalized independently of all others
  3748. \begin_inset CommandInset citation
  3749. LatexCommand cite
  3750. key "McCall2010"
  3751. literal "false"
  3752. \end_inset
  3753. .
  3754. Quantile normalization is performed against a pre-generated set of quantiles
  3755. learned from a collection of 850 publically available arrays sampled from
  3756. a wide variety of tissues in the Gene Expression Omnibus (GEO).
  3757. Each array's probe intensity distribution is normalized against these pre-gener
  3758. ated quantiles.
  3759. The median polish step is replaced with a robust weighted average of probe
  3760. intensities, using inverse variance weights learned from the same public
  3761. GEO data.
  3762. The result is a normalization that satisfies the requirements mentioned
  3763. above: each array is normalized independently of all others, and any two
  3764. normalized arrays can be compared directly to each other.
  3765. \end_layout
  3766. \begin_layout Standard
  3767. One important limitation of fRMA is that it requires a separate reference
  3768. data set from which to learn the parameters (reference quantiles and probe
  3769. weights) that will be used to normalize each array.
  3770. These parameters are specific to a given array platform, and pre-generated
  3771. parameters are only provided for the most common platforms, such as Affymetrix
  3772. hgu133plus2.
  3773. For a less common platform, such as hthgu133pluspm, is is necessary to
  3774. learn custom parameters from in-house data before fRMA can be used to normalize
  3775. samples on that platform
  3776. \begin_inset CommandInset citation
  3777. LatexCommand cite
  3778. key "McCall2011"
  3779. literal "false"
  3780. \end_inset
  3781. .
  3782. \end_layout
  3783. \begin_layout Standard
  3784. One other option is the aptly-named Single Channel Array Normalization (SCAN),
  3785. which adapts a normalization method originally designed for tiling arrays
  3786. \begin_inset CommandInset citation
  3787. LatexCommand cite
  3788. key "Piccolo2012"
  3789. literal "false"
  3790. \end_inset
  3791. .
  3792. SCAN is truly single-channel in that it does not require a set of normalization
  3793. paramters estimated from an external set of reference samples like fRMA
  3794. does.
  3795. \end_layout
  3796. \begin_layout Subsection
  3797. Heteroskedasticity must be accounted for in methylation array data
  3798. \end_layout
  3799. \begin_layout Standard
  3800. DNA methylation arrays are a relatively new kind of assay that uses microarrays
  3801. to measure the degree of methylation on cytosines in specific regions arrayed
  3802. across the genome.
  3803. First, bisulfite treatment converts all unmethylated cytosines to uracil
  3804. (which then become thymine after amplication) while leaving methylated
  3805. cytosines unaffected.
  3806. Then, each target region is interrogated with two probes: one binds to
  3807. the original genomic sequence and interrogates the level of methylated
  3808. DNA, and the other binds to the same sequence with all cytosines replaced
  3809. by thymidines and interrogates the level of unmethylated DNA.
  3810. \end_layout
  3811. \begin_layout Standard
  3812. \begin_inset Float figure
  3813. wide false
  3814. sideways false
  3815. status collapsed
  3816. \begin_layout Plain Layout
  3817. \align center
  3818. \begin_inset Graphics
  3819. filename graphics/methylvoom/sigmoid.pdf
  3820. lyxscale 50
  3821. width 60col%
  3822. groupId colwidth
  3823. \end_inset
  3824. \end_layout
  3825. \begin_layout Plain Layout
  3826. \begin_inset Caption Standard
  3827. \begin_layout Plain Layout
  3828. \begin_inset CommandInset label
  3829. LatexCommand label
  3830. name "fig:Sigmoid-beta-m-mapping"
  3831. \end_inset
  3832. \series bold
  3833. Sigmoid shape of the mapping between β and M values
  3834. \end_layout
  3835. \end_inset
  3836. \end_layout
  3837. \end_inset
  3838. \end_layout
  3839. \begin_layout Standard
  3840. After normalization, these two probe intensities are summarized in one of
  3841. two ways, each with advantages and disadvantages.
  3842. β
  3843. \series bold
  3844. \series default
  3845. values, interpreted as fraction of DNA copies methylated, range from 0 to
  3846. 1.
  3847. β
  3848. \series bold
  3849. \series default
  3850. values are conceptually easy to interpret, but the constrained range makes
  3851. them unsuitable for linear modeling, and their error distributions are
  3852. highly non-normal, which also frustrates linear modeling.
  3853. M-values, interpreted as the log ratio of methylated to unmethylated copies,
  3854. are computed by mapping the beta values from
  3855. \begin_inset Formula $[0,1]$
  3856. \end_inset
  3857. onto
  3858. \begin_inset Formula $(-\infty,+\infty)$
  3859. \end_inset
  3860. using a sigmoid curve (Figure
  3861. \begin_inset CommandInset ref
  3862. LatexCommand ref
  3863. reference "fig:Sigmoid-beta-m-mapping"
  3864. plural "false"
  3865. caps "false"
  3866. noprefix "false"
  3867. \end_inset
  3868. ).
  3869. This transformation results in values with better statistical perperties:
  3870. the unconstrained range is suitable for linear modeling, and the error
  3871. distributions are more normal.
  3872. Hence, most linear modeling and other statistical testing on methylation
  3873. arrays is performed using M-values.
  3874. \end_layout
  3875. \begin_layout Standard
  3876. However, the steep slope of the sigmoid transformation near 0 and 1 tends
  3877. to over-exaggerate small differences in β values near those extremes, which
  3878. in turn amplifies the error in those values, leading to a U-shaped trend
  3879. in the mean-variance curve: extreme values have higher variances than values
  3880. near the middle.
  3881. This mean-variance dependency must be accounted for when fitting the linear
  3882. model for differential methylation, or else the variance will be systematically
  3883. overestimated for probes with moderate M-values and underestimated for
  3884. probes with extreme M-values.
  3885. This is particularly undesirable for methylation data because the intermediate
  3886. M-values are the ones of most interest, since they are more likely to represent
  3887. areas of varying methylation, whereas extreme M-values typically represent
  3888. complete methylation or complete lack of methylation.
  3889. \end_layout
  3890. \begin_layout Standard
  3891. RNA-seq read count data are also known to show heteroskedasticity, and the
  3892. voom method was introduced for modeling this heteroskedasticity by estimating
  3893. the mean-variance trend in the data and using this trend to assign precision
  3894. weights to each observation
  3895. \begin_inset CommandInset citation
  3896. LatexCommand cite
  3897. key "Law2013"
  3898. literal "false"
  3899. \end_inset
  3900. .
  3901. While methylation array data are not derived from counts and have a very
  3902. different mean-variance relationship from that of typical RNA-seq data,
  3903. the voom method makes no specific assumptions on the shape of the mean-variance
  3904. relationship – it only assumes that the relationship can be modeled as
  3905. a smooth curve.
  3906. Hence, the method is sufficiently general to model the mean-variance relationsh
  3907. ip in methylation array data.
  3908. However, the standard implementation of voom assumes that the input is
  3909. given in raw read counts, and it must be adapted to run on methylation
  3910. M-values.
  3911. \end_layout
  3912. \begin_layout Section
  3913. Methods
  3914. \end_layout
  3915. \begin_layout Subsection
  3916. Evaluation of classifier performance with different normalization methods
  3917. \end_layout
  3918. \begin_layout Standard
  3919. For testing different expression microarray normalizations, a data set of
  3920. 157 hgu133plus2 arrays was used, consisting of blood samples from kidney
  3921. transplant patients whose grafts had been graded as TX, AR, or ADNR via
  3922. biopsy and histology (46 TX, 69 AR, 42 ADNR)
  3923. \begin_inset CommandInset citation
  3924. LatexCommand cite
  3925. key "Kurian2014"
  3926. literal "true"
  3927. \end_inset
  3928. .
  3929. Additionally, an external validation set of 75 samples was gathered from
  3930. public GEO data (37 TX, 38 AR, no ADNR).
  3931. \end_layout
  3932. \begin_layout Standard
  3933. \begin_inset Flex TODO Note (inline)
  3934. status open
  3935. \begin_layout Plain Layout
  3936. Find appropriate GEO identifiers if possible.
  3937. Kurian 2014 says GSE15296, but this seems to be different data.
  3938. I also need to look up the GEO accession for the external validation set.
  3939. \end_layout
  3940. \end_inset
  3941. \end_layout
  3942. \begin_layout Standard
  3943. To evaluate the effect of each normalization on classifier performance,
  3944. the same classifier training and validation procedure was used after each
  3945. normalization method.
  3946. The PAM package was used to train a nearest shrunken centroid classifier
  3947. on the training set and select the appropriate threshold for centroid shrinking.
  3948. Then the trained classifier was used to predict the class probabilities
  3949. of each validation sample.
  3950. From these class probabilities, ROC curves and area-under-curve (AUC) values
  3951. were generated
  3952. \begin_inset CommandInset citation
  3953. LatexCommand cite
  3954. key "Turck2011"
  3955. literal "false"
  3956. \end_inset
  3957. .
  3958. Each normalization was tested on two different sets of training and validation
  3959. samples.
  3960. For internal validation, the 115 TX and AR arrays in the internal set were
  3961. split at random into two equal sized sets, one for training and one for
  3962. validation, each containing the same numbers of TX and AR samples as the
  3963. other set.
  3964. For external validation, the full set of 115 TX and AR samples were used
  3965. as a training set, and the 75 external TX and AR samples were used as the
  3966. validation set.
  3967. Thus, 2 ROC curves and AUC values were generated for each normalization
  3968. method: one internal and one external.
  3969. Because the external validation set contains no ADNR samples, only classificati
  3970. on of TX and AR samples was considered.
  3971. The ADNR samples were included during normalization but excluded from all
  3972. classifier training and validation.
  3973. This ensures that the performance on internal and external validation sets
  3974. is directly comparable, since both are performing the same task: distinguising
  3975. TX from AR.
  3976. \end_layout
  3977. \begin_layout Standard
  3978. \begin_inset Flex TODO Note (inline)
  3979. status open
  3980. \begin_layout Plain Layout
  3981. Summarize the get.best.threshold algorithm for PAM threshold selection, or
  3982. just put the code online?
  3983. \end_layout
  3984. \end_inset
  3985. \end_layout
  3986. \begin_layout Standard
  3987. Six different normalization strategies were evaluated.
  3988. First, 2 well-known non-single-channel normalization methods were considered:
  3989. RMA and dChip
  3990. \begin_inset CommandInset citation
  3991. LatexCommand cite
  3992. key "Li2001,Irizarry2003a"
  3993. literal "false"
  3994. \end_inset
  3995. .
  3996. Since RMA produces expression values on a log2 scale and dChip does not,
  3997. the values from dChip were log2 transformed after normalization.
  3998. Next, RMA and dChip followed by Global Rank-invariant Set Normalization
  3999. (GRSN) were tested
  4000. \begin_inset CommandInset citation
  4001. LatexCommand cite
  4002. key "Pelz2008"
  4003. literal "false"
  4004. \end_inset
  4005. .
  4006. Post-processing with GRSN does not turn RMA or dChip into single-channel
  4007. methods, but it may help mitigate batch effects and is therefore useful
  4008. as a benchmark.
  4009. Lastly, the two single-channel normalization methods, fRMA and SCAN, were
  4010. tested
  4011. \begin_inset CommandInset citation
  4012. LatexCommand cite
  4013. key "McCall2010,Piccolo2012"
  4014. literal "false"
  4015. \end_inset
  4016. .
  4017. When evaluting internal validation performance, only the 157 internal samples
  4018. were normalized; when evaluating external validation performance, all 157
  4019. internal samples and 75 external samples were normalized together.
  4020. \end_layout
  4021. \begin_layout Standard
  4022. For demonstrating the problem with separate normalization of training and
  4023. validation data, one additional normalization was performed: the internal
  4024. and external sets were each normalized separately using RMA, and the normalized
  4025. data for each set were combined into a single set with no further attempts
  4026. at normalizing between the two sets.
  4027. The represents approximately how RMA would have to be used in a clinical
  4028. setting, where the samples to be classified are not available at the time
  4029. the classifier is trained.
  4030. \end_layout
  4031. \begin_layout Subsection
  4032. Generating custom fRMA vectors for hthgu133pluspm array platform
  4033. \end_layout
  4034. \begin_layout Standard
  4035. In order to enable fRMA normalization for the hthgu133pluspm array platform,
  4036. custom fRMA normalization vectors were trained using the frmaTools package
  4037. \begin_inset CommandInset citation
  4038. LatexCommand cite
  4039. key "McCall2011"
  4040. literal "false"
  4041. \end_inset
  4042. .
  4043. Separate vectors were created for two types of samples: kidney graft biopsy
  4044. samples and blood samples from graft recipients.
  4045. For training, a 341 kidney biopsy samples from 2 data sets and 965 blood
  4046. samples from 5 data sets were used as the reference set.
  4047. Arrays were groups into batches based on unique combinations of sample
  4048. type (blood or biopsy), diagnosis (TX, AR, etc.), data set, and scan date.
  4049. Thus, each batch represents arrays of the same kind that were run together
  4050. on the same day.
  4051. For estimating the probe inverse variance weights, frmaTools requires equal-siz
  4052. ed batches, which means a batch size must be chosen, and then batches smaller
  4053. than that size must be ignored, while batches larger than the chosen size
  4054. must be downsampled.
  4055. This downsampling is performed randomly, so the sampling process is repeated
  4056. 5 times and the resulting normalizations are compared to each other.
  4057. \end_layout
  4058. \begin_layout Standard
  4059. To evaluate the consistency of the generated normalization vectors, the
  4060. 5 fRMA vector sets generated from 5 random batch samplings were each used
  4061. to normalize the same 20 randomly selected samples from each tissue.
  4062. Then the normalized expression values for each probe on each array were
  4063. compared across all normalizations.
  4064. Each fRMA normalization was also compared against the normalized expression
  4065. values obtained by normalizing the same 20 samples with ordinary RMA.
  4066. \end_layout
  4067. \begin_layout Subsection
  4068. Modeling methylation array M-value heteroskedasticy in linear models with
  4069. modified voom implementation
  4070. \end_layout
  4071. \begin_layout Standard
  4072. \begin_inset Flex TODO Note (inline)
  4073. status open
  4074. \begin_layout Plain Layout
  4075. Put code on Github and reference it.
  4076. \end_layout
  4077. \end_inset
  4078. \end_layout
  4079. \begin_layout Standard
  4080. To investigate the whether DNA methylation could be used to distinguish
  4081. between healthy and dysfunctional transplants, a data set of 78 Illumina
  4082. 450k methylation arrays from human kidney graft biopsies was analyzed for
  4083. differential metylation between 4 transplant statuses: healthy transplant
  4084. (TX), transplants undergoing acute rejection (AR), acute dysfunction with
  4085. no rejection (ADNR), and chronic allograpft nephropathy (CAN).
  4086. The data consisted of 33 TX, 9 AR, 8 ADNR, and 28 CAN samples.
  4087. The uneven group sizes are a result of taking the biopsy samples before
  4088. the eventual fate of the transplant was known.
  4089. Each sample was additionally annotated with a donor ID (anonymized), Sex,
  4090. Age, Ethnicity, Creatinine Level, and Diabetes diagnosois (all samples
  4091. in this data set came from patients with either Type 1 or Type 2 diabetes).
  4092. \end_layout
  4093. \begin_layout Standard
  4094. The intensity data were first normalized using subset-quantile within array
  4095. normalization (SWAN)
  4096. \begin_inset CommandInset citation
  4097. LatexCommand cite
  4098. key "Maksimovic2012"
  4099. literal "false"
  4100. \end_inset
  4101. , then converted to intensity ratios (beta values)
  4102. \begin_inset CommandInset citation
  4103. LatexCommand cite
  4104. key "Aryee2014"
  4105. literal "false"
  4106. \end_inset
  4107. .
  4108. Any probes binding to loci that overlapped annotated SNPs were dropped,
  4109. and the annotated sex of each sample was verified against the sex inferred
  4110. from the ratio of median probe intensities for the X and Y chromosomes.
  4111. Then, the ratios were transformed to M-values.
  4112. \end_layout
  4113. \begin_layout Standard
  4114. \begin_inset Float table
  4115. wide false
  4116. sideways false
  4117. status open
  4118. \begin_layout Plain Layout
  4119. \align center
  4120. \begin_inset Tabular
  4121. <lyxtabular version="3" rows="4" columns="6">
  4122. <features tabularvalignment="middle">
  4123. <column alignment="center" valignment="top">
  4124. <column alignment="center" valignment="top">
  4125. <column alignment="center" valignment="top">
  4126. <column alignment="center" valignment="top">
  4127. <column alignment="center" valignment="top">
  4128. <column alignment="center" valignment="top">
  4129. <row>
  4130. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4131. \begin_inset Text
  4132. \begin_layout Plain Layout
  4133. Analysis
  4134. \end_layout
  4135. \end_inset
  4136. </cell>
  4137. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4138. \begin_inset Text
  4139. \begin_layout Plain Layout
  4140. random effect
  4141. \end_layout
  4142. \end_inset
  4143. </cell>
  4144. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4145. \begin_inset Text
  4146. \begin_layout Plain Layout
  4147. eBayes
  4148. \end_layout
  4149. \end_inset
  4150. </cell>
  4151. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4152. \begin_inset Text
  4153. \begin_layout Plain Layout
  4154. SVA
  4155. \end_layout
  4156. \end_inset
  4157. </cell>
  4158. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4159. \begin_inset Text
  4160. \begin_layout Plain Layout
  4161. weights
  4162. \end_layout
  4163. \end_inset
  4164. </cell>
  4165. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  4166. \begin_inset Text
  4167. \begin_layout Plain Layout
  4168. voom
  4169. \end_layout
  4170. \end_inset
  4171. </cell>
  4172. </row>
  4173. <row>
  4174. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4175. \begin_inset Text
  4176. \begin_layout Plain Layout
  4177. A
  4178. \end_layout
  4179. \end_inset
  4180. </cell>
  4181. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4182. \begin_inset Text
  4183. \begin_layout Plain Layout
  4184. Yes
  4185. \end_layout
  4186. \end_inset
  4187. </cell>
  4188. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4189. \begin_inset Text
  4190. \begin_layout Plain Layout
  4191. Yes
  4192. \end_layout
  4193. \end_inset
  4194. </cell>
  4195. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4196. \begin_inset Text
  4197. \begin_layout Plain Layout
  4198. No
  4199. \end_layout
  4200. \end_inset
  4201. </cell>
  4202. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4203. \begin_inset Text
  4204. \begin_layout Plain Layout
  4205. No
  4206. \end_layout
  4207. \end_inset
  4208. </cell>
  4209. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4210. \begin_inset Text
  4211. \begin_layout Plain Layout
  4212. No
  4213. \end_layout
  4214. \end_inset
  4215. </cell>
  4216. </row>
  4217. <row>
  4218. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4219. \begin_inset Text
  4220. \begin_layout Plain Layout
  4221. B
  4222. \end_layout
  4223. \end_inset
  4224. </cell>
  4225. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4226. \begin_inset Text
  4227. \begin_layout Plain Layout
  4228. Yes
  4229. \end_layout
  4230. \end_inset
  4231. </cell>
  4232. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4233. \begin_inset Text
  4234. \begin_layout Plain Layout
  4235. Yes
  4236. \end_layout
  4237. \end_inset
  4238. </cell>
  4239. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4240. \begin_inset Text
  4241. \begin_layout Plain Layout
  4242. Yes
  4243. \end_layout
  4244. \end_inset
  4245. </cell>
  4246. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4247. \begin_inset Text
  4248. \begin_layout Plain Layout
  4249. Yes
  4250. \end_layout
  4251. \end_inset
  4252. </cell>
  4253. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4254. \begin_inset Text
  4255. \begin_layout Plain Layout
  4256. No
  4257. \end_layout
  4258. \end_inset
  4259. </cell>
  4260. </row>
  4261. <row>
  4262. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4263. \begin_inset Text
  4264. \begin_layout Plain Layout
  4265. C
  4266. \end_layout
  4267. \end_inset
  4268. </cell>
  4269. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4270. \begin_inset Text
  4271. \begin_layout Plain Layout
  4272. Yes
  4273. \end_layout
  4274. \end_inset
  4275. </cell>
  4276. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4277. \begin_inset Text
  4278. \begin_layout Plain Layout
  4279. Yes
  4280. \end_layout
  4281. \end_inset
  4282. </cell>
  4283. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4284. \begin_inset Text
  4285. \begin_layout Plain Layout
  4286. Yes
  4287. \end_layout
  4288. \end_inset
  4289. </cell>
  4290. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4291. \begin_inset Text
  4292. \begin_layout Plain Layout
  4293. Yes
  4294. \end_layout
  4295. \end_inset
  4296. </cell>
  4297. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  4298. \begin_inset Text
  4299. \begin_layout Plain Layout
  4300. Yes
  4301. \end_layout
  4302. \end_inset
  4303. </cell>
  4304. </row>
  4305. </lyxtabular>
  4306. \end_inset
  4307. \end_layout
  4308. \begin_layout Plain Layout
  4309. \begin_inset Caption Standard
  4310. \begin_layout Plain Layout
  4311. \series bold
  4312. \begin_inset CommandInset label
  4313. LatexCommand label
  4314. name "tab:Summary-of-meth-analysis"
  4315. \end_inset
  4316. Summary of analysis variants for methylation array data.
  4317. \series default
  4318. Each analysis included a different set of steps to adjust or account for
  4319. various systematic features of the data.
  4320. Random effect: The model included a random effect accounting for correlation
  4321. between samples from the same patient
  4322. \begin_inset CommandInset citation
  4323. LatexCommand cite
  4324. key "Smyth2005a"
  4325. literal "false"
  4326. \end_inset
  4327. ; eBayes: Empirical bayes squeezing of per-probe variances toward the mean-varia
  4328. nce trend
  4329. \begin_inset CommandInset citation
  4330. LatexCommand cite
  4331. key "Ritchie2015"
  4332. literal "false"
  4333. \end_inset
  4334. ; SVA: Surrogate variable analysis to account for unobserved confounders
  4335. \begin_inset CommandInset citation
  4336. LatexCommand cite
  4337. key "Leek2007"
  4338. literal "false"
  4339. \end_inset
  4340. ; Weights: Estimate sample weights to account for differences in sample
  4341. quality
  4342. \begin_inset CommandInset citation
  4343. LatexCommand cite
  4344. key "Liu2015,Ritchie2006"
  4345. literal "false"
  4346. \end_inset
  4347. ; voom: Use mean-variance trend to assign individual sample weights
  4348. \begin_inset CommandInset citation
  4349. LatexCommand cite
  4350. key "Law2013"
  4351. literal "false"
  4352. \end_inset
  4353. .
  4354. See the text for a more detailed explanation of each step.
  4355. \end_layout
  4356. \end_inset
  4357. \end_layout
  4358. \end_inset
  4359. \end_layout
  4360. \begin_layout Standard
  4361. From the M-values, a series of parallel analyses was performed, each adding
  4362. additional steps into the model fit to accomodate a feature of the data
  4363. (see Table
  4364. \begin_inset CommandInset ref
  4365. LatexCommand ref
  4366. reference "tab:Summary-of-meth-analysis"
  4367. plural "false"
  4368. caps "false"
  4369. noprefix "false"
  4370. \end_inset
  4371. ).
  4372. For analysis A, a
  4373. \begin_inset Quotes eld
  4374. \end_inset
  4375. basic
  4376. \begin_inset Quotes erd
  4377. \end_inset
  4378. linear modeling analysis was performed, compensating for known confounders
  4379. by including terms for the factor of interest (transplant status) as well
  4380. as the known biological confounders: sex, age, ethnicity, and diabetes.
  4381. Since some samples came from the same patients at different times, the
  4382. intra-patient correlation was modeled as a random effect, estimating a
  4383. shared correlation value across all probes
  4384. \begin_inset CommandInset citation
  4385. LatexCommand cite
  4386. key "Smyth2005a"
  4387. literal "false"
  4388. \end_inset
  4389. .
  4390. Then the linear model was fit, and the variance was modeled using empirical
  4391. Bayes squeezing toward the mean-variance trend
  4392. \begin_inset CommandInset citation
  4393. LatexCommand cite
  4394. key "Ritchie2015"
  4395. literal "false"
  4396. \end_inset
  4397. .
  4398. Finally, t-tests or F-tests were performed as appropriate for each test:
  4399. t-tests for single contrasts, and F-tests for multiple contrasts.
  4400. P-values were corrected for multiple testing using the Benjamini-Hochberg
  4401. procedure for FDR control
  4402. \begin_inset CommandInset citation
  4403. LatexCommand cite
  4404. key "Benjamini1995"
  4405. literal "false"
  4406. \end_inset
  4407. .
  4408. \end_layout
  4409. \begin_layout Standard
  4410. For the analysis B, surrogate variable analysis (SVA) was used to infer
  4411. additional unobserved sources of heterogeneity in the data
  4412. \begin_inset CommandInset citation
  4413. LatexCommand cite
  4414. key "Leek2007"
  4415. literal "false"
  4416. \end_inset
  4417. .
  4418. These surrogate variables were added to the design matrix before fitting
  4419. the linear model.
  4420. In addition, sample quality weights were estimated from the data and used
  4421. during linear modeling to down-weight the contribution of highly variable
  4422. arrays while increasing the weight to arrays with lower variability
  4423. \begin_inset CommandInset citation
  4424. LatexCommand cite
  4425. key "Ritchie2006"
  4426. literal "false"
  4427. \end_inset
  4428. .
  4429. The remainder of the analysis proceeded as in analysis A.
  4430. For analysis C, the voom method was adapted to run on methylation array
  4431. data and used to model and correct for the mean-variance trend using individual
  4432. observation weights
  4433. \begin_inset CommandInset citation
  4434. LatexCommand cite
  4435. key "Law2013"
  4436. literal "false"
  4437. \end_inset
  4438. , which were combined with the sample weights
  4439. \begin_inset CommandInset citation
  4440. LatexCommand cite
  4441. key "Liu2015,Ritchie2006"
  4442. literal "false"
  4443. \end_inset
  4444. .
  4445. Each time weights were used, they were estimated once before estimating
  4446. the random effect correlation value, and then the weights were re-estimated
  4447. taking the random effect into account.
  4448. The remainder of the analysis proceeded as in analysis B.
  4449. \end_layout
  4450. \begin_layout Section
  4451. Results
  4452. \end_layout
  4453. \begin_layout Standard
  4454. \begin_inset Flex TODO Note (inline)
  4455. status open
  4456. \begin_layout Plain Layout
  4457. Improve subsection titles in this section
  4458. \end_layout
  4459. \end_inset
  4460. \end_layout
  4461. \begin_layout Subsection
  4462. Separate normalization with RMA introduces unwanted biases in classification
  4463. \end_layout
  4464. \begin_layout Standard
  4465. \begin_inset Float figure
  4466. wide false
  4467. sideways false
  4468. status open
  4469. \begin_layout Plain Layout
  4470. \align center
  4471. \begin_inset Graphics
  4472. filename graphics/PAM/predplot.pdf
  4473. lyxscale 50
  4474. width 60col%
  4475. groupId colwidth
  4476. \end_inset
  4477. \end_layout
  4478. \begin_layout Plain Layout
  4479. \begin_inset Caption Standard
  4480. \begin_layout Plain Layout
  4481. \begin_inset CommandInset label
  4482. LatexCommand label
  4483. name "fig:Classifier-probabilities-RMA"
  4484. \end_inset
  4485. \series bold
  4486. Classifier probabilities on validation samples when normalized with RMA
  4487. together vs.
  4488. separately.
  4489. \series default
  4490. The PAM classifier algorithm was trained on the training set of arrays to
  4491. distinguish AR from TX and then used to assign class probabilities to the
  4492. validation set.
  4493. The process was performed after normalizing all samples together and after
  4494. normalizing the training and test sets separately, and the class probabilities
  4495. assigned to each sample in the validation set were plotted against each
  4496. other (PP(AR), posterior probability of being AR).
  4497. The color of each point indicates the true classification of that sample.
  4498. \end_layout
  4499. \end_inset
  4500. \end_layout
  4501. \end_inset
  4502. \end_layout
  4503. \begin_layout Standard
  4504. To demonstrate the problem with non-single-channel normalization methods,
  4505. we considered the problem of training a classifier to distinguish TX from
  4506. AR using the samples from the internal set as training data, evaluating
  4507. performance on the external set.
  4508. First, training and evaluation were performed after normalizing all array
  4509. samples together as a single set using RMA, and second, the internal samples
  4510. were normalized separately from the external samples and the training and
  4511. evaluation were repeated.
  4512. For each sample in the validation set, the classifier probabilities from
  4513. both classifiers were plotted against each other (Fig.
  4514. \begin_inset CommandInset ref
  4515. LatexCommand ref
  4516. reference "fig:Classifier-probabilities-RMA"
  4517. plural "false"
  4518. caps "false"
  4519. noprefix "false"
  4520. \end_inset
  4521. ).
  4522. As expected, separate normalization biases the classifier probabilities,
  4523. resulting in several misclassifications.
  4524. In this case, the bias from separate normalization causes the classifier
  4525. to assign a lower probability of AR to every sample.
  4526. \end_layout
  4527. \begin_layout Subsection
  4528. fRMA and SCAN maintain classification performance while eliminating dependence
  4529. on normalization strategy
  4530. \end_layout
  4531. \begin_layout Standard
  4532. \begin_inset Float figure
  4533. wide false
  4534. sideways false
  4535. status open
  4536. \begin_layout Plain Layout
  4537. \align center
  4538. \begin_inset Float figure
  4539. placement tb
  4540. wide false
  4541. sideways false
  4542. status open
  4543. \begin_layout Plain Layout
  4544. \align center
  4545. \begin_inset Graphics
  4546. filename graphics/PAM/ROC-TXvsAR-internal.pdf
  4547. lyxscale 50
  4548. height 40theight%
  4549. groupId roc-pam
  4550. \end_inset
  4551. \end_layout
  4552. \begin_layout Plain Layout
  4553. \begin_inset Caption Standard
  4554. \begin_layout Plain Layout
  4555. \begin_inset CommandInset label
  4556. LatexCommand label
  4557. name "fig:ROC-PAM-int"
  4558. \end_inset
  4559. ROC curves for PAM on internal validation data
  4560. \end_layout
  4561. \end_inset
  4562. \end_layout
  4563. \end_inset
  4564. \end_layout
  4565. \begin_layout Plain Layout
  4566. \align center
  4567. \begin_inset Float figure
  4568. placement tb
  4569. wide false
  4570. sideways false
  4571. status open
  4572. \begin_layout Plain Layout
  4573. \align center
  4574. \begin_inset Graphics
  4575. filename graphics/PAM/ROC-TXvsAR-external.pdf
  4576. lyxscale 50
  4577. height 40theight%
  4578. groupId roc-pam
  4579. \end_inset
  4580. \end_layout
  4581. \begin_layout Plain Layout
  4582. \begin_inset Caption Standard
  4583. \begin_layout Plain Layout
  4584. \begin_inset CommandInset label
  4585. LatexCommand label
  4586. name "fig:ROC-PAM-ext"
  4587. \end_inset
  4588. ROC curves for PAM on external validation data
  4589. \end_layout
  4590. \end_inset
  4591. \end_layout
  4592. \end_inset
  4593. \end_layout
  4594. \begin_layout Plain Layout
  4595. \begin_inset Caption Standard
  4596. \begin_layout Plain Layout
  4597. \series bold
  4598. \begin_inset CommandInset label
  4599. LatexCommand label
  4600. name "fig:ROC-PAM-main"
  4601. \end_inset
  4602. ROC curves for PAM using different normalization strategies.
  4603. \series default
  4604. ROC curves were generated for PAM classification of AR vs TX after 6 different
  4605. normalization strategies applied to the same data sets.
  4606. Only fRMA and SCAN are single-channel normalizations.
  4607. The other normalizations are for comparison.
  4608. \end_layout
  4609. \end_inset
  4610. \end_layout
  4611. \end_inset
  4612. \end_layout
  4613. \begin_layout Standard
  4614. \begin_inset Float table
  4615. wide false
  4616. sideways false
  4617. status open
  4618. \begin_layout Plain Layout
  4619. \align center
  4620. \begin_inset Tabular
  4621. <lyxtabular version="3" rows="7" columns="4">
  4622. <features tabularvalignment="middle">
  4623. <column alignment="center" valignment="top">
  4624. <column alignment="center" valignment="top">
  4625. <column alignment="center" valignment="top">
  4626. <column alignment="center" valignment="top">
  4627. <row>
  4628. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4629. \begin_inset Text
  4630. \begin_layout Plain Layout
  4631. \family roman
  4632. \series medium
  4633. \shape up
  4634. \size normal
  4635. \emph off
  4636. \bar no
  4637. \strikeout off
  4638. \xout off
  4639. \uuline off
  4640. \uwave off
  4641. \noun off
  4642. \color none
  4643. Normalization
  4644. \end_layout
  4645. \end_inset
  4646. </cell>
  4647. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4648. \begin_inset Text
  4649. \begin_layout Plain Layout
  4650. Single-channel?
  4651. \end_layout
  4652. \end_inset
  4653. </cell>
  4654. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  4655. \begin_inset Text
  4656. \begin_layout Plain Layout
  4657. \family roman
  4658. \series medium
  4659. \shape up
  4660. \size normal
  4661. \emph off
  4662. \bar no
  4663. \strikeout off
  4664. \xout off
  4665. \uuline off
  4666. \uwave off
  4667. \noun off
  4668. \color none
  4669. Internal Val.
  4670. AUC
  4671. \end_layout
  4672. \end_inset
  4673. </cell>
  4674. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  4675. \begin_inset Text
  4676. \begin_layout Plain Layout
  4677. External Val.
  4678. AUC
  4679. \end_layout
  4680. \end_inset
  4681. </cell>
  4682. </row>
  4683. <row>
  4684. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4685. \begin_inset Text
  4686. \begin_layout Plain Layout
  4687. \family roman
  4688. \series medium
  4689. \shape up
  4690. \size normal
  4691. \emph off
  4692. \bar no
  4693. \strikeout off
  4694. \xout off
  4695. \uuline off
  4696. \uwave off
  4697. \noun off
  4698. \color none
  4699. RMA
  4700. \end_layout
  4701. \end_inset
  4702. </cell>
  4703. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4704. \begin_inset Text
  4705. \begin_layout Plain Layout
  4706. No
  4707. \end_layout
  4708. \end_inset
  4709. </cell>
  4710. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4711. \begin_inset Text
  4712. \begin_layout Plain Layout
  4713. \family roman
  4714. \series medium
  4715. \shape up
  4716. \size normal
  4717. \emph off
  4718. \bar no
  4719. \strikeout off
  4720. \xout off
  4721. \uuline off
  4722. \uwave off
  4723. \noun off
  4724. \color none
  4725. 0.852
  4726. \end_layout
  4727. \end_inset
  4728. </cell>
  4729. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4730. \begin_inset Text
  4731. \begin_layout Plain Layout
  4732. \family roman
  4733. \series medium
  4734. \shape up
  4735. \size normal
  4736. \emph off
  4737. \bar no
  4738. \strikeout off
  4739. \xout off
  4740. \uuline off
  4741. \uwave off
  4742. \noun off
  4743. \color none
  4744. 0.713
  4745. \end_layout
  4746. \end_inset
  4747. </cell>
  4748. </row>
  4749. <row>
  4750. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4751. \begin_inset Text
  4752. \begin_layout Plain Layout
  4753. \family roman
  4754. \series medium
  4755. \shape up
  4756. \size normal
  4757. \emph off
  4758. \bar no
  4759. \strikeout off
  4760. \xout off
  4761. \uuline off
  4762. \uwave off
  4763. \noun off
  4764. \color none
  4765. dChip
  4766. \end_layout
  4767. \end_inset
  4768. </cell>
  4769. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4770. \begin_inset Text
  4771. \begin_layout Plain Layout
  4772. No
  4773. \end_layout
  4774. \end_inset
  4775. </cell>
  4776. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4777. \begin_inset Text
  4778. \begin_layout Plain Layout
  4779. \family roman
  4780. \series medium
  4781. \shape up
  4782. \size normal
  4783. \emph off
  4784. \bar no
  4785. \strikeout off
  4786. \xout off
  4787. \uuline off
  4788. \uwave off
  4789. \noun off
  4790. \color none
  4791. 0.891
  4792. \end_layout
  4793. \end_inset
  4794. </cell>
  4795. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4796. \begin_inset Text
  4797. \begin_layout Plain Layout
  4798. \family roman
  4799. \series medium
  4800. \shape up
  4801. \size normal
  4802. \emph off
  4803. \bar no
  4804. \strikeout off
  4805. \xout off
  4806. \uuline off
  4807. \uwave off
  4808. \noun off
  4809. \color none
  4810. 0.657
  4811. \end_layout
  4812. \end_inset
  4813. </cell>
  4814. </row>
  4815. <row>
  4816. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4817. \begin_inset Text
  4818. \begin_layout Plain Layout
  4819. \family roman
  4820. \series medium
  4821. \shape up
  4822. \size normal
  4823. \emph off
  4824. \bar no
  4825. \strikeout off
  4826. \xout off
  4827. \uuline off
  4828. \uwave off
  4829. \noun off
  4830. \color none
  4831. RMA + GRSN
  4832. \end_layout
  4833. \end_inset
  4834. </cell>
  4835. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4836. \begin_inset Text
  4837. \begin_layout Plain Layout
  4838. No
  4839. \end_layout
  4840. \end_inset
  4841. </cell>
  4842. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4843. \begin_inset Text
  4844. \begin_layout Plain Layout
  4845. \family roman
  4846. \series medium
  4847. \shape up
  4848. \size normal
  4849. \emph off
  4850. \bar no
  4851. \strikeout off
  4852. \xout off
  4853. \uuline off
  4854. \uwave off
  4855. \noun off
  4856. \color none
  4857. 0.816
  4858. \end_layout
  4859. \end_inset
  4860. </cell>
  4861. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4862. \begin_inset Text
  4863. \begin_layout Plain Layout
  4864. \family roman
  4865. \series medium
  4866. \shape up
  4867. \size normal
  4868. \emph off
  4869. \bar no
  4870. \strikeout off
  4871. \xout off
  4872. \uuline off
  4873. \uwave off
  4874. \noun off
  4875. \color none
  4876. 0.750
  4877. \end_layout
  4878. \end_inset
  4879. </cell>
  4880. </row>
  4881. <row>
  4882. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4883. \begin_inset Text
  4884. \begin_layout Plain Layout
  4885. \family roman
  4886. \series medium
  4887. \shape up
  4888. \size normal
  4889. \emph off
  4890. \bar no
  4891. \strikeout off
  4892. \xout off
  4893. \uuline off
  4894. \uwave off
  4895. \noun off
  4896. \color none
  4897. dChip + GRSN
  4898. \end_layout
  4899. \end_inset
  4900. </cell>
  4901. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4902. \begin_inset Text
  4903. \begin_layout Plain Layout
  4904. No
  4905. \end_layout
  4906. \end_inset
  4907. </cell>
  4908. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4909. \begin_inset Text
  4910. \begin_layout Plain Layout
  4911. \family roman
  4912. \series medium
  4913. \shape up
  4914. \size normal
  4915. \emph off
  4916. \bar no
  4917. \strikeout off
  4918. \xout off
  4919. \uuline off
  4920. \uwave off
  4921. \noun off
  4922. \color none
  4923. 0.875
  4924. \end_layout
  4925. \end_inset
  4926. </cell>
  4927. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4928. \begin_inset Text
  4929. \begin_layout Plain Layout
  4930. \family roman
  4931. \series medium
  4932. \shape up
  4933. \size normal
  4934. \emph off
  4935. \bar no
  4936. \strikeout off
  4937. \xout off
  4938. \uuline off
  4939. \uwave off
  4940. \noun off
  4941. \color none
  4942. 0.642
  4943. \end_layout
  4944. \end_inset
  4945. </cell>
  4946. </row>
  4947. <row>
  4948. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4949. \begin_inset Text
  4950. \begin_layout Plain Layout
  4951. \family roman
  4952. \series medium
  4953. \shape up
  4954. \size normal
  4955. \emph off
  4956. \bar no
  4957. \strikeout off
  4958. \xout off
  4959. \uuline off
  4960. \uwave off
  4961. \noun off
  4962. \color none
  4963. fRMA
  4964. \end_layout
  4965. \end_inset
  4966. </cell>
  4967. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4968. \begin_inset Text
  4969. \begin_layout Plain Layout
  4970. Yes
  4971. \end_layout
  4972. \end_inset
  4973. </cell>
  4974. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  4975. \begin_inset Text
  4976. \begin_layout Plain Layout
  4977. \family roman
  4978. \series medium
  4979. \shape up
  4980. \size normal
  4981. \emph off
  4982. \bar no
  4983. \strikeout off
  4984. \xout off
  4985. \uuline off
  4986. \uwave off
  4987. \noun off
  4988. \color none
  4989. 0.863
  4990. \end_layout
  4991. \end_inset
  4992. </cell>
  4993. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  4994. \begin_inset Text
  4995. \begin_layout Plain Layout
  4996. \family roman
  4997. \series medium
  4998. \shape up
  4999. \size normal
  5000. \emph off
  5001. \bar no
  5002. \strikeout off
  5003. \xout off
  5004. \uuline off
  5005. \uwave off
  5006. \noun off
  5007. \color none
  5008. 0.718
  5009. \end_layout
  5010. \end_inset
  5011. </cell>
  5012. </row>
  5013. <row>
  5014. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5015. \begin_inset Text
  5016. \begin_layout Plain Layout
  5017. \family roman
  5018. \series medium
  5019. \shape up
  5020. \size normal
  5021. \emph off
  5022. \bar no
  5023. \strikeout off
  5024. \xout off
  5025. \uuline off
  5026. \uwave off
  5027. \noun off
  5028. \color none
  5029. SCAN
  5030. \end_layout
  5031. \end_inset
  5032. </cell>
  5033. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5034. \begin_inset Text
  5035. \begin_layout Plain Layout
  5036. Yes
  5037. \end_layout
  5038. \end_inset
  5039. </cell>
  5040. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5041. \begin_inset Text
  5042. \begin_layout Plain Layout
  5043. \family roman
  5044. \series medium
  5045. \shape up
  5046. \size normal
  5047. \emph off
  5048. \bar no
  5049. \strikeout off
  5050. \xout off
  5051. \uuline off
  5052. \uwave off
  5053. \noun off
  5054. \color none
  5055. 0.853
  5056. \end_layout
  5057. \end_inset
  5058. </cell>
  5059. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  5060. \begin_inset Text
  5061. \begin_layout Plain Layout
  5062. \family roman
  5063. \series medium
  5064. \shape up
  5065. \size normal
  5066. \emph off
  5067. \bar no
  5068. \strikeout off
  5069. \xout off
  5070. \uuline off
  5071. \uwave off
  5072. \noun off
  5073. \color none
  5074. 0.689
  5075. \end_layout
  5076. \end_inset
  5077. </cell>
  5078. </row>
  5079. </lyxtabular>
  5080. \end_inset
  5081. \end_layout
  5082. \begin_layout Plain Layout
  5083. \begin_inset Caption Standard
  5084. \begin_layout Plain Layout
  5085. \begin_inset CommandInset label
  5086. LatexCommand label
  5087. name "tab:AUC-PAM"
  5088. \end_inset
  5089. \series bold
  5090. ROC curve AUC values for internal and external validation with 6 different
  5091. normalization strategies.
  5092. \series default
  5093. These AUC values correspond to the ROC curves in Figure
  5094. \begin_inset CommandInset ref
  5095. LatexCommand ref
  5096. reference "fig:ROC-PAM-main"
  5097. plural "false"
  5098. caps "false"
  5099. noprefix "false"
  5100. \end_inset
  5101. .
  5102. \end_layout
  5103. \end_inset
  5104. \end_layout
  5105. \end_inset
  5106. \end_layout
  5107. \begin_layout Standard
  5108. For internal validation, the 6 methods' AUC values ranged from 0.816 to 0.891,
  5109. as shown in Table
  5110. \begin_inset CommandInset ref
  5111. LatexCommand ref
  5112. reference "tab:AUC-PAM"
  5113. plural "false"
  5114. caps "false"
  5115. noprefix "false"
  5116. \end_inset
  5117. .
  5118. Among the non-single-channel normalizations, dChip outperformed RMA, while
  5119. GRSN reduced the AUC values for both dChip and RMA.
  5120. Both single-channel methods, fRMA and SCAN, slightly outperformed RMA,
  5121. with fRMA ahead of SCAN.
  5122. However, the difference between RMA and fRMA is still quite small.
  5123. Figure
  5124. \begin_inset CommandInset ref
  5125. LatexCommand ref
  5126. reference "fig:ROC-PAM-int"
  5127. plural "false"
  5128. caps "false"
  5129. noprefix "false"
  5130. \end_inset
  5131. shows that the ROC curves for RMA, dChip, and fRMA look very similar and
  5132. relatively smooth, while both GRSN curves and the curve for SCAN have a
  5133. more jagged appearance.
  5134. \end_layout
  5135. \begin_layout Standard
  5136. For external validation, as expected, all the AUC values are lower than
  5137. the internal validations, ranging from 0.642 to 0.750 (Table
  5138. \begin_inset CommandInset ref
  5139. LatexCommand ref
  5140. reference "tab:AUC-PAM"
  5141. plural "false"
  5142. caps "false"
  5143. noprefix "false"
  5144. \end_inset
  5145. ).
  5146. With or without GRSN, RMA shows its dominance over dChip in this more challengi
  5147. ng test.
  5148. Unlike in the internal validation, GRSN actually improves the classifier
  5149. performance for RMA, although it does not for dChip.
  5150. Once again, both single-channel methods perform about on par with RMA,
  5151. with fRMA performing slightly better and SCAN performing a bit worse.
  5152. Figure
  5153. \begin_inset CommandInset ref
  5154. LatexCommand ref
  5155. reference "fig:ROC-PAM-ext"
  5156. plural "false"
  5157. caps "false"
  5158. noprefix "false"
  5159. \end_inset
  5160. shows the ROC curves for the external validation test.
  5161. As expected, none of them are as clean-looking as the internal validation
  5162. ROC curves.
  5163. The curves for RMA, RMA+GRSN, and fRMA all look similar, while the other
  5164. curves look more divergent.
  5165. \end_layout
  5166. \begin_layout Subsection
  5167. fRMA with custom-generated vectors enables single-channel normalization
  5168. on hthgu133pluspm platform
  5169. \end_layout
  5170. \begin_layout Standard
  5171. \begin_inset Float figure
  5172. wide false
  5173. sideways false
  5174. status open
  5175. \begin_layout Plain Layout
  5176. \align center
  5177. \begin_inset Float figure
  5178. placement tb
  5179. wide false
  5180. sideways false
  5181. status collapsed
  5182. \begin_layout Plain Layout
  5183. \align center
  5184. \begin_inset Graphics
  5185. filename graphics/frma-pax-bx/batchsize_batches.pdf
  5186. lyxscale 50
  5187. height 35theight%
  5188. groupId frmatools-subfig
  5189. \end_inset
  5190. \end_layout
  5191. \begin_layout Plain Layout
  5192. \begin_inset Caption Standard
  5193. \begin_layout Plain Layout
  5194. \begin_inset CommandInset label
  5195. LatexCommand label
  5196. name "fig:batch-size-batches"
  5197. \end_inset
  5198. \series bold
  5199. Number of batches usable in fRMA probe weight learning as a function of
  5200. batch size.
  5201. \end_layout
  5202. \end_inset
  5203. \end_layout
  5204. \end_inset
  5205. \end_layout
  5206. \begin_layout Plain Layout
  5207. \align center
  5208. \begin_inset Float figure
  5209. placement tb
  5210. wide false
  5211. sideways false
  5212. status collapsed
  5213. \begin_layout Plain Layout
  5214. \align center
  5215. \begin_inset Graphics
  5216. filename graphics/frma-pax-bx/batchsize_samples.pdf
  5217. lyxscale 50
  5218. height 35theight%
  5219. groupId frmatools-subfig
  5220. \end_inset
  5221. \end_layout
  5222. \begin_layout Plain Layout
  5223. \begin_inset Caption Standard
  5224. \begin_layout Plain Layout
  5225. \begin_inset CommandInset label
  5226. LatexCommand label
  5227. name "fig:batch-size-samples"
  5228. \end_inset
  5229. \series bold
  5230. Number of samples usable in fRMA probe weight learning as a function of
  5231. batch size.
  5232. \end_layout
  5233. \end_inset
  5234. \end_layout
  5235. \end_inset
  5236. \end_layout
  5237. \begin_layout Plain Layout
  5238. \begin_inset Caption Standard
  5239. \begin_layout Plain Layout
  5240. \series bold
  5241. \begin_inset CommandInset label
  5242. LatexCommand label
  5243. name "fig:frmatools-batch-size"
  5244. \end_inset
  5245. Effect of batch size selection on number of batches and number of samples
  5246. included in fRMA probe weight learning.
  5247. \series default
  5248. For batch sizes ranging from 3 to 15, the number of batches (a) and samples
  5249. (b) included in probe weight training were plotted for biopsy (BX) and
  5250. blood (PAX) samples.
  5251. The selected batch size, 5, is marked with a dotted vertical line.
  5252. \end_layout
  5253. \end_inset
  5254. \end_layout
  5255. \end_inset
  5256. \end_layout
  5257. \begin_layout Standard
  5258. In order to enable use of fRMA to normalize hthgu133pluspm, a custom set
  5259. of fRMA vectors was created.
  5260. First, an appropriate batch size was chosen by looking at the number of
  5261. batches and number of samples included as a function of batch size (Figure
  5262. \begin_inset CommandInset ref
  5263. LatexCommand ref
  5264. reference "fig:frmatools-batch-size"
  5265. plural "false"
  5266. caps "false"
  5267. noprefix "false"
  5268. \end_inset
  5269. ).
  5270. For a given batch size, all batches with fewer samples that the chosen
  5271. size must be ignored during training, while larger batches must be randomly
  5272. downsampled to the chosen size.
  5273. Hence, the number of samples included for a given batch size equals the
  5274. batch size times the number of batches with at least that many samples.
  5275. From Figure
  5276. \begin_inset CommandInset ref
  5277. LatexCommand ref
  5278. reference "fig:batch-size-samples"
  5279. plural "false"
  5280. caps "false"
  5281. noprefix "false"
  5282. \end_inset
  5283. , it is apparent that that a batch size of 8 maximizes the number of samples
  5284. included in training.
  5285. Increasing the batch size beyond this causes too many smaller batches to
  5286. be excluded, reducing the total number of samples for both tissue types.
  5287. However, a batch size of 8 is not necessarily optimal.
  5288. The article introducing frmaTools concluded that it was highly advantageous
  5289. to use a smaller batch size in order to include more batches, even at the
  5290. expense of including fewer total samples in training
  5291. \begin_inset CommandInset citation
  5292. LatexCommand cite
  5293. key "McCall2011"
  5294. literal "false"
  5295. \end_inset
  5296. .
  5297. To strike an appropriate balance between more batches and more samples,
  5298. a batch size of 5 was chosen.
  5299. For both blood and biopsy samples, this increased the number of batches
  5300. included by 10, with only a modest reduction in the number of samples compared
  5301. to a batch size of 8.
  5302. With a batch size of 5, 26 batches of biopsy samples and 46 batches of
  5303. blood samples were available.
  5304. \end_layout
  5305. \begin_layout Standard
  5306. \begin_inset Float figure
  5307. wide false
  5308. sideways false
  5309. status open
  5310. \begin_layout Plain Layout
  5311. \begin_inset Float figure
  5312. wide false
  5313. sideways false
  5314. status collapsed
  5315. \begin_layout Plain Layout
  5316. \align center
  5317. \begin_inset Graphics
  5318. filename graphics/frma-pax-bx/M-BX-violin.pdf
  5319. lyxscale 40
  5320. width 45col%
  5321. groupId m-violin
  5322. \end_inset
  5323. \end_layout
  5324. \begin_layout Plain Layout
  5325. \begin_inset Caption Standard
  5326. \begin_layout Plain Layout
  5327. \begin_inset CommandInset label
  5328. LatexCommand label
  5329. name "fig:m-bx-violin"
  5330. \end_inset
  5331. \series bold
  5332. Violin plot of inter-normalization log ratios for biopsy samples.
  5333. \end_layout
  5334. \end_inset
  5335. \end_layout
  5336. \end_inset
  5337. \begin_inset space \hfill{}
  5338. \end_inset
  5339. \begin_inset Float figure
  5340. wide false
  5341. sideways false
  5342. status collapsed
  5343. \begin_layout Plain Layout
  5344. \align center
  5345. \begin_inset Graphics
  5346. filename graphics/frma-pax-bx/M-PAX-violin.pdf
  5347. lyxscale 40
  5348. width 45col%
  5349. groupId m-violin
  5350. \end_inset
  5351. \end_layout
  5352. \begin_layout Plain Layout
  5353. \begin_inset Caption Standard
  5354. \begin_layout Plain Layout
  5355. \begin_inset CommandInset label
  5356. LatexCommand label
  5357. name "fig:m-pax-violin"
  5358. \end_inset
  5359. \series bold
  5360. Violin plot of inter-normalization log ratios for blood samples.
  5361. \end_layout
  5362. \end_inset
  5363. \end_layout
  5364. \end_inset
  5365. \end_layout
  5366. \begin_layout Plain Layout
  5367. \begin_inset Caption Standard
  5368. \begin_layout Plain Layout
  5369. \series bold
  5370. Violin plot of log ratios between normalizations for 20 biopsy samples.
  5371. \series default
  5372. Each of 20 randomly selected samples was normalized with RMA and with 5
  5373. different sets of fRMA vectors.
  5374. The distribution of log ratios between normalized expression values, aggregated
  5375. across all 20 arrays, was plotted for each pair of normalizations.
  5376. \end_layout
  5377. \end_inset
  5378. \end_layout
  5379. \end_inset
  5380. \end_layout
  5381. \begin_layout Standard
  5382. Since fRMA training requires equal-size batches, larger batches are downsampled
  5383. randomly.
  5384. This introduces a nondeterministic step in the generation of normalization
  5385. vectors.
  5386. To show that this randomness does not substantially change the outcome,
  5387. the random downsampling and subsequent vector learning was repeated 5 times,
  5388. with a different random seed each time.
  5389. 20 samples were selected at random as a test set and normalized with each
  5390. of the 5 sets of fRMA normalization vectors as well as ordinary RMA, and
  5391. the normalized expression values were compared across normalizations.
  5392. Figure
  5393. \begin_inset CommandInset ref
  5394. LatexCommand ref
  5395. reference "fig:m-bx-violin"
  5396. plural "false"
  5397. caps "false"
  5398. noprefix "false"
  5399. \end_inset
  5400. shows a summary of these comparisons for biopsy samples.
  5401. Comparing RMA to each of the 5 fRMA normalizations, the distribution of
  5402. log ratios is somewhat wide, indicating that the normalizations disagree
  5403. on the expression values of a fair number of probe sets.
  5404. In contrast, comparisons of fRMA against fRMA, the vast mojority of probe
  5405. sets have very small log ratios, indicating a very high agreement between
  5406. the normalized values generated by the two normalizations.
  5407. This shows that the fRMA normalization's behavior is not very sensitive
  5408. to the random downsampling of larger batches during training.
  5409. \end_layout
  5410. \begin_layout Standard
  5411. \begin_inset Float figure
  5412. wide false
  5413. sideways false
  5414. status open
  5415. \begin_layout Plain Layout
  5416. \align center
  5417. \begin_inset Float figure
  5418. wide false
  5419. sideways false
  5420. status collapsed
  5421. \begin_layout Plain Layout
  5422. \align center
  5423. \begin_inset Graphics
  5424. filename graphics/frma-pax-bx/MA-BX-RMA.fRMA-RASTER.png
  5425. lyxscale 10
  5426. width 45col%
  5427. groupId ma-frma
  5428. \end_inset
  5429. \end_layout
  5430. \begin_layout Plain Layout
  5431. \begin_inset Caption Standard
  5432. \begin_layout Plain Layout
  5433. \begin_inset CommandInset label
  5434. LatexCommand label
  5435. name "fig:ma-bx-rma-frma"
  5436. \end_inset
  5437. RMA vs.
  5438. fRMA for biopsy samples.
  5439. \end_layout
  5440. \end_inset
  5441. \end_layout
  5442. \end_inset
  5443. \begin_inset space \hfill{}
  5444. \end_inset
  5445. \begin_inset Float figure
  5446. wide false
  5447. sideways false
  5448. status collapsed
  5449. \begin_layout Plain Layout
  5450. \align center
  5451. \begin_inset Graphics
  5452. filename graphics/frma-pax-bx/MA-BX-fRMA.fRMA-RASTER.png
  5453. lyxscale 10
  5454. width 45col%
  5455. groupId ma-frma
  5456. \end_inset
  5457. \end_layout
  5458. \begin_layout Plain Layout
  5459. \begin_inset Caption Standard
  5460. \begin_layout Plain Layout
  5461. \begin_inset CommandInset label
  5462. LatexCommand label
  5463. name "fig:ma-bx-frma-frma"
  5464. \end_inset
  5465. fRMA vs fRMA for biopsy samples.
  5466. \end_layout
  5467. \end_inset
  5468. \end_layout
  5469. \end_inset
  5470. \end_layout
  5471. \begin_layout Plain Layout
  5472. \align center
  5473. \begin_inset Float figure
  5474. wide false
  5475. sideways false
  5476. status collapsed
  5477. \begin_layout Plain Layout
  5478. \align center
  5479. \begin_inset Graphics
  5480. filename graphics/frma-pax-bx/MA-PAX-RMA.fRMA-RASTER.png
  5481. lyxscale 10
  5482. width 45col%
  5483. groupId ma-frma
  5484. \end_inset
  5485. \end_layout
  5486. \begin_layout Plain Layout
  5487. \begin_inset Caption Standard
  5488. \begin_layout Plain Layout
  5489. \begin_inset CommandInset label
  5490. LatexCommand label
  5491. name "fig:MA-PAX-rma-frma"
  5492. \end_inset
  5493. RMA vs.
  5494. fRMA for blood samples.
  5495. \end_layout
  5496. \end_inset
  5497. \end_layout
  5498. \end_inset
  5499. \begin_inset space \hfill{}
  5500. \end_inset
  5501. \begin_inset Float figure
  5502. wide false
  5503. sideways false
  5504. status collapsed
  5505. \begin_layout Plain Layout
  5506. \align center
  5507. \begin_inset Graphics
  5508. filename graphics/frma-pax-bx/MA-PAX-fRMA.fRMA-RASTER.png
  5509. lyxscale 10
  5510. width 45col%
  5511. groupId ma-frma
  5512. \end_inset
  5513. \end_layout
  5514. \begin_layout Plain Layout
  5515. \begin_inset Caption Standard
  5516. \begin_layout Plain Layout
  5517. \begin_inset CommandInset label
  5518. LatexCommand label
  5519. name "fig:MA-PAX-frma-frma"
  5520. \end_inset
  5521. fRMA vs fRMA for blood samples.
  5522. \end_layout
  5523. \end_inset
  5524. \end_layout
  5525. \end_inset
  5526. \end_layout
  5527. \begin_layout Plain Layout
  5528. \begin_inset Caption Standard
  5529. \begin_layout Plain Layout
  5530. \series bold
  5531. \begin_inset CommandInset label
  5532. LatexCommand label
  5533. name "fig:Representative-MA-plots"
  5534. \end_inset
  5535. Representative MA plots comparing RMA and custom fRMA normalizations.
  5536. \series default
  5537. For each plot, 20 samples were normalized using 2 different normalizations,
  5538. and then averages (A) and log ratios (M) were plotted between the two different
  5539. normalizations for every probe.
  5540. For the
  5541. \begin_inset Quotes eld
  5542. \end_inset
  5543. fRMA vs fRMA
  5544. \begin_inset Quotes erd
  5545. \end_inset
  5546. plots (b & d), two different fRMA normalizations using vectors from two
  5547. independent batch samplings were compared.
  5548. Density of points is represented by blue shading, and individual outlier
  5549. points are plotted.
  5550. \end_layout
  5551. \end_inset
  5552. \end_layout
  5553. \end_inset
  5554. \end_layout
  5555. \begin_layout Standard
  5556. Figure
  5557. \begin_inset CommandInset ref
  5558. LatexCommand ref
  5559. reference "fig:ma-bx-rma-frma"
  5560. plural "false"
  5561. caps "false"
  5562. noprefix "false"
  5563. \end_inset
  5564. shows an MA plot of the RMA-normalized values against the fRMA-normalized
  5565. values for the same probe sets and arrays, corresponding to the first row
  5566. of Figure
  5567. \begin_inset CommandInset ref
  5568. LatexCommand ref
  5569. reference "fig:m-bx-violin"
  5570. plural "false"
  5571. caps "false"
  5572. noprefix "false"
  5573. \end_inset
  5574. .
  5575. This MA plot shows that not only is there a wide distribution of M-values,
  5576. but the trend of M-values is dependent on the average normalized intensity.
  5577. This is expected, since the overall trend represents the differences in
  5578. the quantile normalization step.
  5579. When running RMA, only the quantiles for these specific 20 arrays are used,
  5580. while for fRMA the quantile distribution is taking from all arrays used
  5581. in training.
  5582. Figure
  5583. \begin_inset CommandInset ref
  5584. LatexCommand ref
  5585. reference "fig:ma-bx-frma-frma"
  5586. plural "false"
  5587. caps "false"
  5588. noprefix "false"
  5589. \end_inset
  5590. shows a similar MA plot comparing 2 different fRMA normalizations, correspondin
  5591. g to the 6th row of Figure
  5592. \begin_inset CommandInset ref
  5593. LatexCommand ref
  5594. reference "fig:m-bx-violin"
  5595. plural "false"
  5596. caps "false"
  5597. noprefix "false"
  5598. \end_inset
  5599. .
  5600. The MA plot is very tightly centered around zero with no visible trend.
  5601. Figures
  5602. \begin_inset CommandInset ref
  5603. LatexCommand ref
  5604. reference "fig:m-pax-violin"
  5605. plural "false"
  5606. caps "false"
  5607. noprefix "false"
  5608. \end_inset
  5609. ,
  5610. \begin_inset CommandInset ref
  5611. LatexCommand ref
  5612. reference "fig:MA-PAX-rma-frma"
  5613. plural "false"
  5614. caps "false"
  5615. noprefix "false"
  5616. \end_inset
  5617. , and
  5618. \begin_inset CommandInset ref
  5619. LatexCommand ref
  5620. reference "fig:ma-bx-frma-frma"
  5621. plural "false"
  5622. caps "false"
  5623. noprefix "false"
  5624. \end_inset
  5625. show exactly the same information for the blood samples, once again comparing
  5626. the normalized expression values between normalizations for all probe sets
  5627. across 20 randomly selected test arrays.
  5628. Once again, there is a wider distribution of log ratios between RMA-normalized
  5629. values and fRMA-normalized, and a much tighter distribution when comparing
  5630. different fRMA normalizations to each other, indicating that the fRMA training
  5631. process is robust to random batch downsampling for the blood samples as
  5632. well.
  5633. \end_layout
  5634. \begin_layout Subsection
  5635. SVA, voom, and array weights improve model fit for methylation array data
  5636. \end_layout
  5637. \begin_layout Standard
  5638. \begin_inset ERT
  5639. status open
  5640. \begin_layout Plain Layout
  5641. \backslash
  5642. afterpage{
  5643. \end_layout
  5644. \begin_layout Plain Layout
  5645. \backslash
  5646. begin{landscape}
  5647. \end_layout
  5648. \end_inset
  5649. \end_layout
  5650. \begin_layout Standard
  5651. \begin_inset Float figure
  5652. wide false
  5653. sideways false
  5654. status open
  5655. \begin_layout Plain Layout
  5656. \begin_inset Flex TODO Note (inline)
  5657. status open
  5658. \begin_layout Plain Layout
  5659. Fix axis labels:
  5660. \begin_inset Quotes eld
  5661. \end_inset
  5662. log2 M-value
  5663. \begin_inset Quotes erd
  5664. \end_inset
  5665. is redundant because M-values are already log scale
  5666. \end_layout
  5667. \end_inset
  5668. \end_layout
  5669. \begin_layout Plain Layout
  5670. \begin_inset Float figure
  5671. wide false
  5672. sideways false
  5673. status collapsed
  5674. \begin_layout Plain Layout
  5675. \align center
  5676. \begin_inset Graphics
  5677. filename graphics/methylvoom/unadj.dupcor/meanvar-trends-PAGE1-CROP-RASTER.png
  5678. lyxscale 15
  5679. width 30col%
  5680. groupId voomaw-subfig
  5681. \end_inset
  5682. \end_layout
  5683. \begin_layout Plain Layout
  5684. \begin_inset Caption Standard
  5685. \begin_layout Plain Layout
  5686. \begin_inset CommandInset label
  5687. LatexCommand label
  5688. name "fig:meanvar-basic"
  5689. \end_inset
  5690. Mean-variance trend for analysis A.
  5691. \end_layout
  5692. \end_inset
  5693. \end_layout
  5694. \end_inset
  5695. \begin_inset space \hfill{}
  5696. \end_inset
  5697. \begin_inset Float figure
  5698. wide false
  5699. sideways false
  5700. status collapsed
  5701. \begin_layout Plain Layout
  5702. \align center
  5703. \begin_inset Graphics
  5704. filename graphics/methylvoom/unadj.dupcor.sva.aw/meanvar-trends-PAGE1-CROP-RASTER.png
  5705. lyxscale 15
  5706. width 30col%
  5707. groupId voomaw-subfig
  5708. \end_inset
  5709. \end_layout
  5710. \begin_layout Plain Layout
  5711. \begin_inset Caption Standard
  5712. \begin_layout Plain Layout
  5713. \begin_inset CommandInset label
  5714. LatexCommand label
  5715. name "fig:meanvar-sva-aw"
  5716. \end_inset
  5717. Mean-variance trend for analysis B.
  5718. \end_layout
  5719. \end_inset
  5720. \end_layout
  5721. \end_inset
  5722. \begin_inset space \hfill{}
  5723. \end_inset
  5724. \begin_inset Float figure
  5725. wide false
  5726. sideways false
  5727. status collapsed
  5728. \begin_layout Plain Layout
  5729. \align center
  5730. \begin_inset Graphics
  5731. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/meanvar-trends-PAGE2-CROP-RASTER.png
  5732. lyxscale 15
  5733. width 30col%
  5734. groupId voomaw-subfig
  5735. \end_inset
  5736. \end_layout
  5737. \begin_layout Plain Layout
  5738. \begin_inset Caption Standard
  5739. \begin_layout Plain Layout
  5740. \begin_inset CommandInset label
  5741. LatexCommand label
  5742. name "fig:meanvar-sva-voomaw"
  5743. \end_inset
  5744. Mean-variance trend after voom modeling in analysis C.
  5745. \end_layout
  5746. \end_inset
  5747. \end_layout
  5748. \end_inset
  5749. \end_layout
  5750. \begin_layout Plain Layout
  5751. \begin_inset Caption Standard
  5752. \begin_layout Plain Layout
  5753. \series bold
  5754. Mean-variance trend modeling in methylation array data.
  5755. \series default
  5756. The estimated log2(standard deviation) for each probe is plotted against
  5757. the probe's average M-value across all samples as a black point, with some
  5758. transparency to make overplotting more visible, since there are about 450,000
  5759. points.
  5760. Density of points is also indicated by the dark blue contour lines.
  5761. The prior variance trend estimated by eBayes is shown in light blue, while
  5762. the lowess trend of the points is shown in red.
  5763. \end_layout
  5764. \end_inset
  5765. \end_layout
  5766. \end_inset
  5767. \end_layout
  5768. \begin_layout Standard
  5769. \begin_inset ERT
  5770. status open
  5771. \begin_layout Plain Layout
  5772. \backslash
  5773. end{landscape}
  5774. \end_layout
  5775. \begin_layout Plain Layout
  5776. }
  5777. \end_layout
  5778. \end_inset
  5779. \end_layout
  5780. \begin_layout Standard
  5781. Figure
  5782. \begin_inset CommandInset ref
  5783. LatexCommand ref
  5784. reference "fig:meanvar-basic"
  5785. plural "false"
  5786. caps "false"
  5787. noprefix "false"
  5788. \end_inset
  5789. shows the relationship between the mean M-value and the standard deviation
  5790. calculated for each probe in the methylation array data set.
  5791. A few features of the data are apparent.
  5792. First, the data are very strongly bimodal, with peaks in the density around
  5793. M-values of +4 and -4.
  5794. These modes correspond to methylation sites that are nearly 100% methylated
  5795. and nearly 100% unmethylated, respectively.
  5796. The strong bomodality indicates that a majority of probes interrogate sites
  5797. that fall into one of these two categories.
  5798. The points in between these modes represent sites that are either partially
  5799. methylated in many samples, or are fully methylated in some samples and
  5800. fully unmethylated in other samples, or some combination.
  5801. The next visible feature of the data is the W-shaped variance trend.
  5802. The upticks in the variance trend on either side are expected, based on
  5803. the sigmoid transformation exaggerating small differences at extreme M-values
  5804. (Figure
  5805. \begin_inset CommandInset ref
  5806. LatexCommand ref
  5807. reference "fig:Sigmoid-beta-m-mapping"
  5808. plural "false"
  5809. caps "false"
  5810. noprefix "false"
  5811. \end_inset
  5812. ).
  5813. However, the uptick in the center is interesting: it indicates that sites
  5814. that are not constitutitively methylated or unmethylated have a higher
  5815. variance.
  5816. This could be a genuine biological effect, or it could be spurious noise
  5817. that is only observable at sites with varying methylation.
  5818. \end_layout
  5819. \begin_layout Standard
  5820. In Figure
  5821. \begin_inset CommandInset ref
  5822. LatexCommand ref
  5823. reference "fig:meanvar-sva-aw"
  5824. plural "false"
  5825. caps "false"
  5826. noprefix "false"
  5827. \end_inset
  5828. , we see the mean-variance trend for the same methylation array data, this
  5829. time with surrogate variables and sample quality weights estimated from
  5830. the data and included in the model.
  5831. As expected, the overall average variance is smaller, since the surrogate
  5832. variables account for some of the variance.
  5833. In addition, the uptick in variance in the middle of the M-value range
  5834. has disappeared, turning the W shape into a wide U shape.
  5835. This indicates that the excess variance in the probes with intermediate
  5836. M-values was explained by systematic variations not correlated with known
  5837. covariates, and these variations were modeled by the surrogate variables.
  5838. The result is a nearly flat variance trend for the entire intermediate
  5839. M-value range from about -3 to +3.
  5840. Note that this corresponds closely to the range within which the M-value
  5841. transformation shown in Figure
  5842. \begin_inset CommandInset ref
  5843. LatexCommand ref
  5844. reference "fig:Sigmoid-beta-m-mapping"
  5845. plural "false"
  5846. caps "false"
  5847. noprefix "false"
  5848. \end_inset
  5849. is nearly linear.
  5850. In contrast, the excess variance at the extremes (greater than +3 and less
  5851. than -3) was not
  5852. \begin_inset Quotes eld
  5853. \end_inset
  5854. absorbed
  5855. \begin_inset Quotes erd
  5856. \end_inset
  5857. by the surrogate variables and remains in the plot, indicating that this
  5858. variation has no systematic component: probes with extreme M-values are
  5859. uniformly more variable across all samples, as expected.
  5860. \end_layout
  5861. \begin_layout Standard
  5862. Figure
  5863. \begin_inset CommandInset ref
  5864. LatexCommand ref
  5865. reference "fig:meanvar-sva-voomaw"
  5866. plural "false"
  5867. caps "false"
  5868. noprefix "false"
  5869. \end_inset
  5870. shows the mean-variance trend after fitting the model with the observation
  5871. weights assigned by voom based on the mean-variance trend shown in Figure
  5872. \begin_inset CommandInset ref
  5873. LatexCommand ref
  5874. reference "fig:meanvar-sva-aw"
  5875. plural "false"
  5876. caps "false"
  5877. noprefix "false"
  5878. \end_inset
  5879. .
  5880. As expected, the weights exactly counteract the trend in the data, resulting
  5881. in a nearly flat trend centered vertically at 1 (i.e.
  5882. 0 on the log scale).
  5883. This shows that the observations with extreme M-values have been appropriately
  5884. down-weighted to account for the fact that the noise in those observations
  5885. has been amplified by the non-linear M-value transformation.
  5886. In turn, this gives relatively more weight to observervations in the middle
  5887. region, which are more likely to correspond to probes measuring interesting
  5888. biology (not constitutively methylated or unmethylated).
  5889. \end_layout
  5890. \begin_layout Standard
  5891. \begin_inset Float table
  5892. wide false
  5893. sideways false
  5894. status open
  5895. \begin_layout Plain Layout
  5896. \align center
  5897. \begin_inset Tabular
  5898. <lyxtabular version="3" rows="5" columns="3">
  5899. <features tabularvalignment="middle">
  5900. <column alignment="center" valignment="top">
  5901. <column alignment="center" valignment="top">
  5902. <column alignment="center" valignment="top">
  5903. <row>
  5904. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5905. \begin_inset Text
  5906. \begin_layout Plain Layout
  5907. Covariate
  5908. \end_layout
  5909. \end_inset
  5910. </cell>
  5911. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  5912. \begin_inset Text
  5913. \begin_layout Plain Layout
  5914. Test used
  5915. \end_layout
  5916. \end_inset
  5917. </cell>
  5918. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  5919. \begin_inset Text
  5920. \begin_layout Plain Layout
  5921. p-value
  5922. \end_layout
  5923. \end_inset
  5924. </cell>
  5925. </row>
  5926. <row>
  5927. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5928. \begin_inset Text
  5929. \begin_layout Plain Layout
  5930. Transplant Status
  5931. \end_layout
  5932. \end_inset
  5933. </cell>
  5934. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5935. \begin_inset Text
  5936. \begin_layout Plain Layout
  5937. F-test
  5938. \end_layout
  5939. \end_inset
  5940. </cell>
  5941. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5942. \begin_inset Text
  5943. \begin_layout Plain Layout
  5944. 0.404
  5945. \end_layout
  5946. \end_inset
  5947. </cell>
  5948. </row>
  5949. <row>
  5950. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5951. \begin_inset Text
  5952. \begin_layout Plain Layout
  5953. Diabetes Diagnosis
  5954. \end_layout
  5955. \end_inset
  5956. </cell>
  5957. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5958. \begin_inset Text
  5959. \begin_layout Plain Layout
  5960. \emph on
  5961. t
  5962. \emph default
  5963. -test
  5964. \end_layout
  5965. \end_inset
  5966. </cell>
  5967. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5968. \begin_inset Text
  5969. \begin_layout Plain Layout
  5970. 0.00106
  5971. \end_layout
  5972. \end_inset
  5973. </cell>
  5974. </row>
  5975. <row>
  5976. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5977. \begin_inset Text
  5978. \begin_layout Plain Layout
  5979. Sex
  5980. \end_layout
  5981. \end_inset
  5982. </cell>
  5983. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  5984. \begin_inset Text
  5985. \begin_layout Plain Layout
  5986. \emph on
  5987. t
  5988. \emph default
  5989. -test
  5990. \end_layout
  5991. \end_inset
  5992. </cell>
  5993. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  5994. \begin_inset Text
  5995. \begin_layout Plain Layout
  5996. 0.148
  5997. \end_layout
  5998. \end_inset
  5999. </cell>
  6000. </row>
  6001. <row>
  6002. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6003. \begin_inset Text
  6004. \begin_layout Plain Layout
  6005. Age
  6006. \end_layout
  6007. \end_inset
  6008. </cell>
  6009. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6010. \begin_inset Text
  6011. \begin_layout Plain Layout
  6012. linear regression
  6013. \end_layout
  6014. \end_inset
  6015. </cell>
  6016. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6017. \begin_inset Text
  6018. \begin_layout Plain Layout
  6019. 0.212
  6020. \end_layout
  6021. \end_inset
  6022. </cell>
  6023. </row>
  6024. </lyxtabular>
  6025. \end_inset
  6026. \end_layout
  6027. \begin_layout Plain Layout
  6028. \begin_inset Caption Standard
  6029. \begin_layout Plain Layout
  6030. \series bold
  6031. \begin_inset CommandInset label
  6032. LatexCommand label
  6033. name "tab:weight-covariate-tests"
  6034. \end_inset
  6035. Association of sample weights with clinical covariates in methylation array
  6036. data.
  6037. \series default
  6038. Computed sample quality log weights were tested for significant association
  6039. with each of the variables in the model (1st column).
  6040. An appropriate test was selected for each variable based on whether the
  6041. variable had 2 categories (
  6042. \emph on
  6043. t
  6044. \emph default
  6045. -test), had more than 2 categories (F-test), or was numeric (linear regression).
  6046. The test selected is shown in the 2nd column.
  6047. P-values for association with the log weights are shown in the 3rd column.
  6048. No multiple testing adjustment was performed for these p-values.
  6049. \end_layout
  6050. \end_inset
  6051. \end_layout
  6052. \end_inset
  6053. \end_layout
  6054. \begin_layout Standard
  6055. \begin_inset Float figure
  6056. wide false
  6057. sideways false
  6058. status open
  6059. \begin_layout Plain Layout
  6060. \begin_inset Flex TODO Note (inline)
  6061. status open
  6062. \begin_layout Plain Layout
  6063. Redo the sample weight boxplot with notches, and remove fill colors
  6064. \end_layout
  6065. \end_inset
  6066. \end_layout
  6067. \begin_layout Plain Layout
  6068. \align center
  6069. \begin_inset Graphics
  6070. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/sample-weights-PAGE3-CROP.pdf
  6071. lyxscale 50
  6072. width 60col%
  6073. groupId colwidth
  6074. \end_inset
  6075. \end_layout
  6076. \begin_layout Plain Layout
  6077. \begin_inset Caption Standard
  6078. \begin_layout Plain Layout
  6079. \begin_inset CommandInset label
  6080. LatexCommand label
  6081. name "fig:diabetes-sample-weights"
  6082. \end_inset
  6083. \series bold
  6084. Box-and-whiskers plot of sample quality weights grouped by diabetes diagnosis.
  6085. \series default
  6086. Samples were grouped based on diabetes diagnosis, and the distribution of
  6087. sample quality weights for each diagnosis was plotted as a box-and-whiskers
  6088. plot
  6089. \begin_inset CommandInset citation
  6090. LatexCommand cite
  6091. key "McGill1978"
  6092. literal "false"
  6093. \end_inset
  6094. .
  6095. \end_layout
  6096. \end_inset
  6097. \end_layout
  6098. \begin_layout Plain Layout
  6099. \end_layout
  6100. \end_inset
  6101. \end_layout
  6102. \begin_layout Standard
  6103. To determine whether any of the known experimental factors had an impact
  6104. on data quality, the sample quality weights estimated from the data were
  6105. tested for association with each of the experimental factors (Table
  6106. \begin_inset CommandInset ref
  6107. LatexCommand ref
  6108. reference "tab:weight-covariate-tests"
  6109. plural "false"
  6110. caps "false"
  6111. noprefix "false"
  6112. \end_inset
  6113. ).
  6114. Diabetes diagnosis was found to have a potentially significant association
  6115. with the sample weights, with a t-test p-value of
  6116. \begin_inset Formula $1.06\times10^{-3}$
  6117. \end_inset
  6118. .
  6119. Figure
  6120. \begin_inset CommandInset ref
  6121. LatexCommand ref
  6122. reference "fig:diabetes-sample-weights"
  6123. plural "false"
  6124. caps "false"
  6125. noprefix "false"
  6126. \end_inset
  6127. shows the distribution of sample weights grouped by diabetes diagnosis.
  6128. The samples from patients with Type 2 diabetes were assigned significantly
  6129. lower weights than those from patients with Type 1 diabetes.
  6130. This indicates that the type 2 diabetes samples had an overall higher variance
  6131. on average across all probes.
  6132. \end_layout
  6133. \begin_layout Standard
  6134. \begin_inset Float table
  6135. wide false
  6136. sideways false
  6137. status open
  6138. \begin_layout Plain Layout
  6139. \align center
  6140. \begin_inset Flex TODO Note (inline)
  6141. status open
  6142. \begin_layout Plain Layout
  6143. Consider transposing these tables
  6144. \end_layout
  6145. \end_inset
  6146. \end_layout
  6147. \begin_layout Plain Layout
  6148. \begin_inset Float table
  6149. wide false
  6150. sideways false
  6151. status open
  6152. \begin_layout Plain Layout
  6153. \align center
  6154. \begin_inset Tabular
  6155. <lyxtabular version="3" rows="5" columns="4">
  6156. <features tabularvalignment="middle">
  6157. <column alignment="center" valignment="top">
  6158. <column alignment="center" valignment="top">
  6159. <column alignment="center" valignment="top">
  6160. <column alignment="center" valignment="top">
  6161. <row>
  6162. <cell alignment="center" valignment="top" usebox="none">
  6163. \begin_inset Text
  6164. \begin_layout Plain Layout
  6165. \end_layout
  6166. \end_inset
  6167. </cell>
  6168. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6169. \begin_inset Text
  6170. \begin_layout Plain Layout
  6171. Analysis
  6172. \end_layout
  6173. \end_inset
  6174. </cell>
  6175. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6176. \begin_inset Text
  6177. \begin_layout Plain Layout
  6178. \end_layout
  6179. \end_inset
  6180. </cell>
  6181. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6182. \begin_inset Text
  6183. \begin_layout Plain Layout
  6184. \end_layout
  6185. \end_inset
  6186. </cell>
  6187. </row>
  6188. <row>
  6189. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6190. \begin_inset Text
  6191. \begin_layout Plain Layout
  6192. Contrast
  6193. \end_layout
  6194. \end_inset
  6195. </cell>
  6196. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6197. \begin_inset Text
  6198. \begin_layout Plain Layout
  6199. A
  6200. \end_layout
  6201. \end_inset
  6202. </cell>
  6203. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6204. \begin_inset Text
  6205. \begin_layout Plain Layout
  6206. B
  6207. \end_layout
  6208. \end_inset
  6209. </cell>
  6210. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6211. \begin_inset Text
  6212. \begin_layout Plain Layout
  6213. C
  6214. \end_layout
  6215. \end_inset
  6216. </cell>
  6217. </row>
  6218. <row>
  6219. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6220. \begin_inset Text
  6221. \begin_layout Plain Layout
  6222. TX vs AR
  6223. \end_layout
  6224. \end_inset
  6225. </cell>
  6226. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6227. \begin_inset Text
  6228. \begin_layout Plain Layout
  6229. 0
  6230. \end_layout
  6231. \end_inset
  6232. </cell>
  6233. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6234. \begin_inset Text
  6235. \begin_layout Plain Layout
  6236. 25
  6237. \end_layout
  6238. \end_inset
  6239. </cell>
  6240. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6241. \begin_inset Text
  6242. \begin_layout Plain Layout
  6243. 22
  6244. \end_layout
  6245. \end_inset
  6246. </cell>
  6247. </row>
  6248. <row>
  6249. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6250. \begin_inset Text
  6251. \begin_layout Plain Layout
  6252. TX vs ADNR
  6253. \end_layout
  6254. \end_inset
  6255. </cell>
  6256. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6257. \begin_inset Text
  6258. \begin_layout Plain Layout
  6259. 7
  6260. \end_layout
  6261. \end_inset
  6262. </cell>
  6263. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6264. \begin_inset Text
  6265. \begin_layout Plain Layout
  6266. 338
  6267. \end_layout
  6268. \end_inset
  6269. </cell>
  6270. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6271. \begin_inset Text
  6272. \begin_layout Plain Layout
  6273. 369
  6274. \end_layout
  6275. \end_inset
  6276. </cell>
  6277. </row>
  6278. <row>
  6279. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6280. \begin_inset Text
  6281. \begin_layout Plain Layout
  6282. TX vs CAN
  6283. \end_layout
  6284. \end_inset
  6285. </cell>
  6286. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6287. \begin_inset Text
  6288. \begin_layout Plain Layout
  6289. 0
  6290. \end_layout
  6291. \end_inset
  6292. </cell>
  6293. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6294. \begin_inset Text
  6295. \begin_layout Plain Layout
  6296. 231
  6297. \end_layout
  6298. \end_inset
  6299. </cell>
  6300. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6301. \begin_inset Text
  6302. \begin_layout Plain Layout
  6303. 278
  6304. \end_layout
  6305. \end_inset
  6306. </cell>
  6307. </row>
  6308. </lyxtabular>
  6309. \end_inset
  6310. \end_layout
  6311. \begin_layout Plain Layout
  6312. \begin_inset Caption Standard
  6313. \begin_layout Plain Layout
  6314. \begin_inset CommandInset label
  6315. LatexCommand label
  6316. name "tab:methyl-num-signif"
  6317. \end_inset
  6318. Number of probes significant at 10% FDR.
  6319. \end_layout
  6320. \end_inset
  6321. \end_layout
  6322. \end_inset
  6323. \begin_inset space \hfill{}
  6324. \end_inset
  6325. \begin_inset Float table
  6326. wide false
  6327. sideways false
  6328. status open
  6329. \begin_layout Plain Layout
  6330. \align center
  6331. \begin_inset Tabular
  6332. <lyxtabular version="3" rows="5" columns="4">
  6333. <features tabularvalignment="middle">
  6334. <column alignment="center" valignment="top">
  6335. <column alignment="center" valignment="top">
  6336. <column alignment="center" valignment="top">
  6337. <column alignment="center" valignment="top">
  6338. <row>
  6339. <cell alignment="center" valignment="top" usebox="none">
  6340. \begin_inset Text
  6341. \begin_layout Plain Layout
  6342. \end_layout
  6343. \end_inset
  6344. </cell>
  6345. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6346. \begin_inset Text
  6347. \begin_layout Plain Layout
  6348. Analysis
  6349. \end_layout
  6350. \end_inset
  6351. </cell>
  6352. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6353. \begin_inset Text
  6354. \begin_layout Plain Layout
  6355. \end_layout
  6356. \end_inset
  6357. </cell>
  6358. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6359. \begin_inset Text
  6360. \begin_layout Plain Layout
  6361. \end_layout
  6362. \end_inset
  6363. </cell>
  6364. </row>
  6365. <row>
  6366. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6367. \begin_inset Text
  6368. \begin_layout Plain Layout
  6369. Contrast
  6370. \end_layout
  6371. \end_inset
  6372. </cell>
  6373. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6374. \begin_inset Text
  6375. \begin_layout Plain Layout
  6376. A
  6377. \end_layout
  6378. \end_inset
  6379. </cell>
  6380. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6381. \begin_inset Text
  6382. \begin_layout Plain Layout
  6383. B
  6384. \end_layout
  6385. \end_inset
  6386. </cell>
  6387. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6388. \begin_inset Text
  6389. \begin_layout Plain Layout
  6390. C
  6391. \end_layout
  6392. \end_inset
  6393. </cell>
  6394. </row>
  6395. <row>
  6396. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6397. \begin_inset Text
  6398. \begin_layout Plain Layout
  6399. TX vs AR
  6400. \end_layout
  6401. \end_inset
  6402. </cell>
  6403. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6404. \begin_inset Text
  6405. \begin_layout Plain Layout
  6406. 0
  6407. \end_layout
  6408. \end_inset
  6409. </cell>
  6410. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6411. \begin_inset Text
  6412. \begin_layout Plain Layout
  6413. 10,063
  6414. \end_layout
  6415. \end_inset
  6416. </cell>
  6417. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6418. \begin_inset Text
  6419. \begin_layout Plain Layout
  6420. 11,225
  6421. \end_layout
  6422. \end_inset
  6423. </cell>
  6424. </row>
  6425. <row>
  6426. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6427. \begin_inset Text
  6428. \begin_layout Plain Layout
  6429. TX vs ADNR
  6430. \end_layout
  6431. \end_inset
  6432. </cell>
  6433. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6434. \begin_inset Text
  6435. \begin_layout Plain Layout
  6436. 27
  6437. \end_layout
  6438. \end_inset
  6439. </cell>
  6440. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  6441. \begin_inset Text
  6442. \begin_layout Plain Layout
  6443. 12,674
  6444. \end_layout
  6445. \end_inset
  6446. </cell>
  6447. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  6448. \begin_inset Text
  6449. \begin_layout Plain Layout
  6450. 13,086
  6451. \end_layout
  6452. \end_inset
  6453. </cell>
  6454. </row>
  6455. <row>
  6456. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6457. \begin_inset Text
  6458. \begin_layout Plain Layout
  6459. TX vs CAN
  6460. \end_layout
  6461. \end_inset
  6462. </cell>
  6463. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6464. \begin_inset Text
  6465. \begin_layout Plain Layout
  6466. 966
  6467. \end_layout
  6468. \end_inset
  6469. </cell>
  6470. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  6471. \begin_inset Text
  6472. \begin_layout Plain Layout
  6473. 20,039
  6474. \end_layout
  6475. \end_inset
  6476. </cell>
  6477. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  6478. \begin_inset Text
  6479. \begin_layout Plain Layout
  6480. 20,955
  6481. \end_layout
  6482. \end_inset
  6483. </cell>
  6484. </row>
  6485. </lyxtabular>
  6486. \end_inset
  6487. \end_layout
  6488. \begin_layout Plain Layout
  6489. \begin_inset Caption Standard
  6490. \begin_layout Plain Layout
  6491. \begin_inset CommandInset label
  6492. LatexCommand label
  6493. name "tab:methyl-est-nonnull"
  6494. \end_inset
  6495. Estimated number of non-null tests, using the method of averaging local
  6496. FDR values
  6497. \begin_inset CommandInset citation
  6498. LatexCommand cite
  6499. key "Phipson2013Thesis"
  6500. literal "false"
  6501. \end_inset
  6502. .
  6503. \end_layout
  6504. \end_inset
  6505. \end_layout
  6506. \end_inset
  6507. \end_layout
  6508. \begin_layout Plain Layout
  6509. \begin_inset Caption Standard
  6510. \begin_layout Plain Layout
  6511. \series bold
  6512. Estimates of degree of differential methylation in for each contrast in
  6513. each analysis.
  6514. \series default
  6515. For each of the analyses in Table
  6516. \begin_inset CommandInset ref
  6517. LatexCommand ref
  6518. reference "tab:Summary-of-meth-analysis"
  6519. plural "false"
  6520. caps "false"
  6521. noprefix "false"
  6522. \end_inset
  6523. , these tables show the number of probes called significantly differentially
  6524. methylated at a threshold of 10% FDR for each comparison between TX and
  6525. the other 3 transplant statuses (a) and the estimated total number of probes
  6526. that are differentially methylated (b).
  6527. \end_layout
  6528. \end_inset
  6529. \end_layout
  6530. \end_inset
  6531. \end_layout
  6532. \begin_layout Standard
  6533. \begin_inset Float figure
  6534. wide false
  6535. sideways false
  6536. status open
  6537. \begin_layout Plain Layout
  6538. \align center
  6539. \series bold
  6540. \begin_inset Float figure
  6541. wide false
  6542. sideways false
  6543. status collapsed
  6544. \begin_layout Plain Layout
  6545. \align center
  6546. \begin_inset Graphics
  6547. filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE1.pdf
  6548. lyxscale 33
  6549. width 30col%
  6550. groupId meth-pval-hist
  6551. \end_inset
  6552. \end_layout
  6553. \begin_layout Plain Layout
  6554. \series bold
  6555. \begin_inset Caption Standard
  6556. \begin_layout Plain Layout
  6557. AR vs.
  6558. TX, Analysis A
  6559. \end_layout
  6560. \end_inset
  6561. \end_layout
  6562. \begin_layout Plain Layout
  6563. \end_layout
  6564. \end_inset
  6565. \begin_inset space \hfill{}
  6566. \end_inset
  6567. \begin_inset Float figure
  6568. wide false
  6569. sideways false
  6570. status collapsed
  6571. \begin_layout Plain Layout
  6572. \align center
  6573. \begin_inset Graphics
  6574. filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE2.pdf
  6575. lyxscale 33
  6576. width 30col%
  6577. groupId meth-pval-hist
  6578. \end_inset
  6579. \end_layout
  6580. \begin_layout Plain Layout
  6581. \series bold
  6582. \begin_inset Caption Standard
  6583. \begin_layout Plain Layout
  6584. ADNR vs.
  6585. TX, Analysis A
  6586. \end_layout
  6587. \end_inset
  6588. \end_layout
  6589. \end_inset
  6590. \begin_inset space \hfill{}
  6591. \end_inset
  6592. \begin_inset Float figure
  6593. wide false
  6594. sideways false
  6595. status collapsed
  6596. \begin_layout Plain Layout
  6597. \align center
  6598. \begin_inset Graphics
  6599. filename graphics/methylvoom/unadj.dupcor/pval-histograms-PAGE3.pdf
  6600. lyxscale 33
  6601. width 30col%
  6602. groupId meth-pval-hist
  6603. \end_inset
  6604. \end_layout
  6605. \begin_layout Plain Layout
  6606. \series bold
  6607. \begin_inset Caption Standard
  6608. \begin_layout Plain Layout
  6609. CAN vs.
  6610. TX, Analysis A
  6611. \end_layout
  6612. \end_inset
  6613. \end_layout
  6614. \end_inset
  6615. \end_layout
  6616. \begin_layout Plain Layout
  6617. \align center
  6618. \series bold
  6619. \begin_inset Float figure
  6620. wide false
  6621. sideways false
  6622. status collapsed
  6623. \begin_layout Plain Layout
  6624. \align center
  6625. \begin_inset Graphics
  6626. filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE1.pdf
  6627. lyxscale 33
  6628. width 30col%
  6629. groupId meth-pval-hist
  6630. \end_inset
  6631. \end_layout
  6632. \begin_layout Plain Layout
  6633. \series bold
  6634. \begin_inset Caption Standard
  6635. \begin_layout Plain Layout
  6636. AR vs.
  6637. TX, Analysis B
  6638. \end_layout
  6639. \end_inset
  6640. \end_layout
  6641. \end_inset
  6642. \begin_inset space \hfill{}
  6643. \end_inset
  6644. \begin_inset Float figure
  6645. wide false
  6646. sideways false
  6647. status collapsed
  6648. \begin_layout Plain Layout
  6649. \align center
  6650. \begin_inset Graphics
  6651. filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE2.pdf
  6652. lyxscale 33
  6653. width 30col%
  6654. groupId meth-pval-hist
  6655. \end_inset
  6656. \end_layout
  6657. \begin_layout Plain Layout
  6658. \series bold
  6659. \begin_inset Caption Standard
  6660. \begin_layout Plain Layout
  6661. ADNR vs.
  6662. TX, Analysis B
  6663. \end_layout
  6664. \end_inset
  6665. \end_layout
  6666. \end_inset
  6667. \begin_inset space \hfill{}
  6668. \end_inset
  6669. \begin_inset Float figure
  6670. wide false
  6671. sideways false
  6672. status collapsed
  6673. \begin_layout Plain Layout
  6674. \align center
  6675. \begin_inset Graphics
  6676. filename graphics/methylvoom/unadj.dupcor.sva.aw/pval-histograms-PAGE3.pdf
  6677. lyxscale 33
  6678. width 30col%
  6679. groupId meth-pval-hist
  6680. \end_inset
  6681. \end_layout
  6682. \begin_layout Plain Layout
  6683. \series bold
  6684. \begin_inset Caption Standard
  6685. \begin_layout Plain Layout
  6686. CAN vs.
  6687. TX, Analysis B
  6688. \end_layout
  6689. \end_inset
  6690. \end_layout
  6691. \end_inset
  6692. \end_layout
  6693. \begin_layout Plain Layout
  6694. \align center
  6695. \series bold
  6696. \begin_inset Float figure
  6697. wide false
  6698. sideways false
  6699. status collapsed
  6700. \begin_layout Plain Layout
  6701. \align center
  6702. \begin_inset Graphics
  6703. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE1.pdf
  6704. lyxscale 33
  6705. width 30col%
  6706. groupId meth-pval-hist
  6707. \end_inset
  6708. \end_layout
  6709. \begin_layout Plain Layout
  6710. \series bold
  6711. \begin_inset Caption Standard
  6712. \begin_layout Plain Layout
  6713. AR vs.
  6714. TX, Analysis C
  6715. \end_layout
  6716. \end_inset
  6717. \end_layout
  6718. \end_inset
  6719. \begin_inset space \hfill{}
  6720. \end_inset
  6721. \begin_inset Float figure
  6722. wide false
  6723. sideways false
  6724. status collapsed
  6725. \begin_layout Plain Layout
  6726. \align center
  6727. \begin_inset Graphics
  6728. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE2.pdf
  6729. lyxscale 33
  6730. width 30col%
  6731. groupId meth-pval-hist
  6732. \end_inset
  6733. \end_layout
  6734. \begin_layout Plain Layout
  6735. \series bold
  6736. \begin_inset Caption Standard
  6737. \begin_layout Plain Layout
  6738. ADNR vs.
  6739. TX, Analysis C
  6740. \end_layout
  6741. \end_inset
  6742. \end_layout
  6743. \end_inset
  6744. \begin_inset space \hfill{}
  6745. \end_inset
  6746. \begin_inset Float figure
  6747. wide false
  6748. sideways false
  6749. status collapsed
  6750. \begin_layout Plain Layout
  6751. \align center
  6752. \begin_inset Graphics
  6753. filename graphics/methylvoom/unadj.dupcor.sva.voomaw/pval-histograms-PAGE3.pdf
  6754. lyxscale 33
  6755. width 30col%
  6756. groupId meth-pval-hist
  6757. \end_inset
  6758. \end_layout
  6759. \begin_layout Plain Layout
  6760. \series bold
  6761. \begin_inset Caption Standard
  6762. \begin_layout Plain Layout
  6763. CAN vs.
  6764. TX, Analysis C
  6765. \end_layout
  6766. \end_inset
  6767. \end_layout
  6768. \end_inset
  6769. \end_layout
  6770. \begin_layout Plain Layout
  6771. \begin_inset Caption Standard
  6772. \begin_layout Plain Layout
  6773. \series bold
  6774. \begin_inset CommandInset label
  6775. LatexCommand label
  6776. name "fig:meth-p-value-histograms"
  6777. \end_inset
  6778. Probe p-value histograms for each contrast in each analysis.
  6779. \series default
  6780. For each differential methylation test of interest, the distribution of
  6781. p-values across all probes is plotted as a histogram.
  6782. The red solid line indicates the density that would be expected under the
  6783. null hypothesis for all probes (a
  6784. \begin_inset Formula $\mathrm{Uniform}(0,1)$
  6785. \end_inset
  6786. distribution), while the blue dotted line indicates the fraction of p-values
  6787. that actually follow the null hypothesis (
  6788. \begin_inset Formula $\hat{\pi}_{0}$
  6789. \end_inset
  6790. ) estimated using the method of averaging local FDR values
  6791. \begin_inset CommandInset citation
  6792. LatexCommand cite
  6793. key "Phipson2013Thesis"
  6794. literal "false"
  6795. \end_inset
  6796. .
  6797. the blue line is only shown in each plot if the estimate of
  6798. \begin_inset Formula $\hat{\pi}_{0}$
  6799. \end_inset
  6800. for that p-value distribution is different from 1.
  6801. \end_layout
  6802. \end_inset
  6803. \end_layout
  6804. \end_inset
  6805. \end_layout
  6806. \begin_layout Standard
  6807. Table
  6808. \begin_inset CommandInset ref
  6809. LatexCommand ref
  6810. reference "tab:methyl-num-signif"
  6811. plural "false"
  6812. caps "false"
  6813. noprefix "false"
  6814. \end_inset
  6815. shows the number of significantly differentially methylated probes reported
  6816. by each analysis for each comparison of interest at an FDR of 10%.
  6817. As expected, the more elaborate analyses, B and C, report more significant
  6818. probes than the more basic analysis A, consistent with the conclusions
  6819. above that the data contain hidden systematic variations that must be modeled.
  6820. Table
  6821. \begin_inset CommandInset ref
  6822. LatexCommand ref
  6823. reference "tab:methyl-est-nonnull"
  6824. plural "false"
  6825. caps "false"
  6826. noprefix "false"
  6827. \end_inset
  6828. shows the estimated number differentially methylated probes for each test
  6829. from each analysis.
  6830. This was computed by estimating the proportion of null hypotheses that
  6831. were true using the method of
  6832. \begin_inset CommandInset citation
  6833. LatexCommand cite
  6834. key "Phipson2013Thesis"
  6835. literal "false"
  6836. \end_inset
  6837. and subtracting that fraction from the total number of probes, yielding
  6838. an estimate of the number of null hypotheses that are false based on the
  6839. distribution of p-values across the entire dataset.
  6840. Note that this does not identify which null hypotheses should be rejected
  6841. (i.e.
  6842. which probes are significant); it only estimates the true number of such
  6843. probes.
  6844. Once again, analyses B and C result it much larger estimates for the number
  6845. of differentially methylated probes.
  6846. In this case, analysis C, the only analysis that includes voom, estimates
  6847. the largest number of differentially methylated probes for all 3 contrasts.
  6848. If the assumptions of all the methods employed hold, then this represents
  6849. a gain in statistical power over the simpler analysis A.
  6850. Figure
  6851. \begin_inset CommandInset ref
  6852. LatexCommand ref
  6853. reference "fig:meth-p-value-histograms"
  6854. plural "false"
  6855. caps "false"
  6856. noprefix "false"
  6857. \end_inset
  6858. shows the p-value distributions for each test, from which the numbers in
  6859. Table
  6860. \begin_inset CommandInset ref
  6861. LatexCommand ref
  6862. reference "tab:methyl-est-nonnull"
  6863. plural "false"
  6864. caps "false"
  6865. noprefix "false"
  6866. \end_inset
  6867. were generated.
  6868. The distributions for analysis A all have a dip in density near zero, which
  6869. is a strong sign of a poor model fit.
  6870. The histograms for analyses B and C are more well-behaved, with a uniform
  6871. component stretching all the way from 0 to 1 representing the probes for
  6872. which the null hypotheses is true (no differential methylation), and a
  6873. zero-biased component representing the probes for which the null hypothesis
  6874. is false (differentially methylated).
  6875. These histograms do not indicate any major issues with the model fit.
  6876. \end_layout
  6877. \begin_layout Standard
  6878. \begin_inset Flex TODO Note (inline)
  6879. status open
  6880. \begin_layout Plain Layout
  6881. If time allows, maybe generate the PCA plots before/after SVA effect subtraction
  6882. ?
  6883. \end_layout
  6884. \end_inset
  6885. \end_layout
  6886. \begin_layout Section
  6887. Discussion
  6888. \end_layout
  6889. \begin_layout Subsection
  6890. fRMA achieves clinically applicable normalization without sacrificing classifica
  6891. tion performance
  6892. \end_layout
  6893. \begin_layout Standard
  6894. As shown in Figure
  6895. \begin_inset CommandInset ref
  6896. LatexCommand ref
  6897. reference "fig:Classifier-probabilities-RMA"
  6898. plural "false"
  6899. caps "false"
  6900. noprefix "false"
  6901. \end_inset
  6902. , improper normalization, particularly separate normalization of training
  6903. and test samples, leads to unwanted biases in classification.
  6904. In a controlled experimental context, it is always possible to correct
  6905. this issue by normalizing all experimental samples together.
  6906. However, because it is not feasible to normalize all samples together in
  6907. a clinical context, a single-channel normalization is required is required.
  6908. \end_layout
  6909. \begin_layout Standard
  6910. The major concern in using a single-channel normalization is that non-single-cha
  6911. nnel methods can share information between arrays to improve the normalization,
  6912. and single-channel methods risk sacrificing the gains in normalization
  6913. accuracy that come from this information sharing.
  6914. In the case of RMA, this information sharing is accomplished through quantile
  6915. normalization and median polish steps.
  6916. The need for information sharing in quantile normalization can easily be
  6917. removed by learning a fixed set of quantiles from external data and normalizing
  6918. each array to these fixed quantiles, instead of the quantiles of the data
  6919. itself.
  6920. As long as the fixed quantiles are reasonable, the result will be similar
  6921. to standard RMA.
  6922. However, there is no analogous way to eliminate cross-array information
  6923. sharing in the median polish step, so fRMA replaces this with a weighted
  6924. average of probes on each array, with the weights learned from external
  6925. data.
  6926. This step of fRMA has the greatest potential to diverge from RMA un undesirable
  6927. ways.
  6928. \end_layout
  6929. \begin_layout Standard
  6930. However, when run on real data, fRMA performed at least as well as RMA in
  6931. both the internal validation and external validation tests.
  6932. This shows that fRMA can be used to normalize individual clinical samples
  6933. in a class prediction context without sacrificing the classifier performance
  6934. that would be obtained by using the more well-established RMA for normalization.
  6935. The other single-channel normalization method considered, SCAN, showed
  6936. some loss of AUC in the external validation test.
  6937. Based on these results, fRMA is the preferred normalization for clinical
  6938. samples in a class prediction context.
  6939. \end_layout
  6940. \begin_layout Subsection
  6941. Robust fRMA vectors can be generated for new array platforms
  6942. \end_layout
  6943. \begin_layout Standard
  6944. \begin_inset Flex TODO Note (inline)
  6945. status open
  6946. \begin_layout Plain Layout
  6947. Look up the exact numbers, do a find & replace for
  6948. \begin_inset Quotes eld
  6949. \end_inset
  6950. 850
  6951. \begin_inset Quotes erd
  6952. \end_inset
  6953. \end_layout
  6954. \end_inset
  6955. \end_layout
  6956. \begin_layout Standard
  6957. The published fRMA normalization vectors for the hgu133plus2 platform were
  6958. generated from a set of about 850 samples chosen from a wide range of tissues,
  6959. which the authors determined was sufficient to generate a robust set of
  6960. normalization vectors that could be applied across all tissues
  6961. \begin_inset CommandInset citation
  6962. LatexCommand cite
  6963. key "McCall2010"
  6964. literal "false"
  6965. \end_inset
  6966. .
  6967. Since we only had hthgu133pluspm for 2 tissues of interest, our needs were
  6968. more modest.
  6969. Even using only 130 samples in 26 batches of 5 samples each for kidney
  6970. biopsies, we were able to train a robust set of fRMA normalization vectors
  6971. that were not meaningfully affected by the random selection of 5 samples
  6972. from each batch.
  6973. As expected, the training process was just as robust for the blood samples
  6974. with 230 samples in 46 batches of 5 samples each.
  6975. Because these vectors were each generated using training samples from a
  6976. single tissue, they are not suitable for general use, unlike the vectors
  6977. provided with fRMA itself.
  6978. They are purpose-built for normalizing a specific type of sample on a specific
  6979. platform.
  6980. This is a mostly acceptable limitation in the context of developing a machine
  6981. learning classifier for diagnosing a disease based on samples of a specific
  6982. tissue.
  6983. \end_layout
  6984. \begin_layout Standard
  6985. \begin_inset Flex TODO Note (inline)
  6986. status open
  6987. \begin_layout Plain Layout
  6988. Talk about how these vectors can be used for any data from these tissues
  6989. on this platform even though they were custom made for this data set.
  6990. \end_layout
  6991. \end_inset
  6992. \end_layout
  6993. \begin_layout Standard
  6994. \begin_inset Flex TODO Note (inline)
  6995. status open
  6996. \begin_layout Plain Layout
  6997. How to bring up that these custom vectors were used in another project by
  6998. someone else that was never published?
  6999. \end_layout
  7000. \end_inset
  7001. \end_layout
  7002. \begin_layout Subsection
  7003. Methylation array data can be successfully analyzed using existing techniques,
  7004. but machine learning poses additional challenges
  7005. \end_layout
  7006. \begin_layout Standard
  7007. Both analysis strategies B and C both yield a reasonable analysis, with
  7008. a mean-variance trend that matches the expected behavior for the non-linear
  7009. M-value transformation (Figure
  7010. \begin_inset CommandInset ref
  7011. LatexCommand ref
  7012. reference "fig:meanvar-sva-aw"
  7013. plural "false"
  7014. caps "false"
  7015. noprefix "false"
  7016. \end_inset
  7017. ) and well-behaved p-value distributions (Figure
  7018. \begin_inset CommandInset ref
  7019. LatexCommand ref
  7020. reference "fig:meth-p-value-histograms"
  7021. plural "false"
  7022. caps "false"
  7023. noprefix "false"
  7024. \end_inset
  7025. ).
  7026. These two analyses also yield similar numbers of significant probes (Table
  7027. \begin_inset CommandInset ref
  7028. LatexCommand ref
  7029. reference "tab:methyl-num-signif"
  7030. plural "false"
  7031. caps "false"
  7032. noprefix "false"
  7033. \end_inset
  7034. ) and similar estimates of the number of differentially methylated probes
  7035. (Table
  7036. \begin_inset CommandInset ref
  7037. LatexCommand ref
  7038. reference "tab:methyl-est-nonnull"
  7039. plural "false"
  7040. caps "false"
  7041. noprefix "false"
  7042. \end_inset
  7043. ).
  7044. The main difference between these two analyses is the method used to account
  7045. for the mean-variance trend.
  7046. In analysis B, the trend is estimated and applied at the probe level: each
  7047. probe's estimated variance is squeezed toward the trend using an empirical
  7048. Bayes procedure (Figure
  7049. \begin_inset CommandInset ref
  7050. LatexCommand ref
  7051. reference "fig:meanvar-sva-aw"
  7052. plural "false"
  7053. caps "false"
  7054. noprefix "false"
  7055. \end_inset
  7056. ).
  7057. In analysis C, the trend is still estimated at the probe level, but instead
  7058. of estimating a single variance value shared across all observations for
  7059. a given probe, the voom method computes an initial estiamte of the variance
  7060. for each observation individually based on where its model-fitted M-value
  7061. falls on the trend line and then assigns inverse-variance weights to model
  7062. the difference in variance between observations.
  7063. An overall variance is still estimated for each probe using the same empirical
  7064. Bayes method, but now the residual trend is flat (Figure
  7065. \begin_inset CommandInset ref
  7066. LatexCommand ref
  7067. reference "fig:meanvar-sva-voomaw"
  7068. plural "false"
  7069. caps "false"
  7070. noprefix "false"
  7071. \end_inset
  7072. ), indicating that the mean-variance trend is adequately modeled by scaling
  7073. the estimated variance for each observation using the weights computed
  7074. by voom.
  7075. \end_layout
  7076. \begin_layout Standard
  7077. The difference between the standard empirical Bayes trended variance modeling
  7078. (analysis B) and voom (analysis C) is analogous to the difference between
  7079. a t-test with equal variance and a t-test with unequal variance, except
  7080. that the unequal group variances used in the latter test are estimated
  7081. based on the mean-variance trend from all the probes rather than the data
  7082. for the specific probe being tested, thus stabilizing the group variance
  7083. estimates by sharing information between probes.
  7084. Allowing voom to model the variance using observation weights in this manner
  7085. allows the linear model fit to concentrate statistical power where it will
  7086. do the most good.
  7087. For example, if a particular probe's M-values are always at the extreme
  7088. of the M-value range (e.g.
  7089. less than -4) for ADNR samples, but the M-values for that probe in TX and
  7090. CAN samples are within the flat region of the mean-variance trend (between
  7091. -3 and +3), voom is able to down-weight the contribution of the high-variance
  7092. M-values from the ADNR samples in order to gain more statistical power
  7093. while testing for differential methylation between TX and CAN.
  7094. In contrast, modeling the mean-variance trend only at the probe level would
  7095. combine the high-variance ADNR samples and lower-variance samples from
  7096. other conditions and estimate an intermediate variance for this probe.
  7097. In practice, analysis B shows that this approach is adequate, but the voom
  7098. approach in analysis C is at least as good on all model fit criteria and
  7099. yields a larger estimate for the number of differentially methylated genes,
  7100. \emph on
  7101. and
  7102. \emph default
  7103. it matches up better with the theoretical
  7104. \end_layout
  7105. \begin_layout Standard
  7106. The significant association of diebetes diagnosis with sample quality is
  7107. interesting.
  7108. The samples with Type 2 diabetes tended to have more variation, averaged
  7109. across all probes, than those with Type 1 diabetes.
  7110. This is consistent with the consensus that type 2 disbetes and the associated
  7111. metabolic syndrome represent a broad dysregulation of the body's endocrine
  7112. signalling related to metabolism [citation needed].
  7113. This dysregulation could easily manifest as a greater degree of variation
  7114. in the DNA methylation patterns of affected tissues.
  7115. In contrast, Type 1 disbetes has a more specific cause and effect, so a
  7116. less variable methylation signature is expected.
  7117. \end_layout
  7118. \begin_layout Standard
  7119. This preliminary anlaysis suggests that some degree of differential methylation
  7120. exists between TX and each of the three types of transplant disfunction
  7121. studied.
  7122. Hence, it may be feasible to train a classifier to diagnose transplant
  7123. disfunction from DNA methylation array data.
  7124. However, the major importance of both SVA and sample quality weighting
  7125. for proper modeling of this data poses significant challenges for any attempt
  7126. at a machine learning on data of similar quality.
  7127. While these are easily used in a modeling context with full sample information,
  7128. neither of these methods is directly applicable in a machine learning context,
  7129. where the diagnosis is not known ahead of time.
  7130. If a machine learning approach for methylation-based diagnosis is to be
  7131. pursued, it will either require machine-learning-friendly methods to address
  7132. the same systematic trends in the data that SVA and sample quality weighting
  7133. address, or it will require higher quality data with substantially less
  7134. systematic perturbation of the data.
  7135. \end_layout
  7136. \begin_layout Chapter
  7137. Globin-blocking for more effective blood RNA-seq analysis in primate animal
  7138. model
  7139. \end_layout
  7140. \begin_layout Standard
  7141. \begin_inset Flex TODO Note (inline)
  7142. status open
  7143. \begin_layout Plain Layout
  7144. Choose between above and the paper title: Optimizing yield of deep RNA sequencin
  7145. g for gene expression profiling by globin reduction of peripheral blood
  7146. samples from cynomolgus monkeys (Macaca fascicularis).
  7147. \end_layout
  7148. \end_inset
  7149. \end_layout
  7150. \begin_layout Standard
  7151. \begin_inset Flex TODO Note (inline)
  7152. status open
  7153. \begin_layout Plain Layout
  7154. Chapter author list: https://tex.stackexchange.com/questions/156862/displaying-aut
  7155. hor-for-each-chapter-in-book Every chapter gets an author list, which may
  7156. or may not be part of a citation to a published/preprinted paper.
  7157. \end_layout
  7158. \end_inset
  7159. \end_layout
  7160. \begin_layout Standard
  7161. \begin_inset Flex TODO Note (inline)
  7162. status open
  7163. \begin_layout Plain Layout
  7164. Preprint then cite the paper
  7165. \end_layout
  7166. \end_inset
  7167. \end_layout
  7168. \begin_layout Section*
  7169. Abstract
  7170. \end_layout
  7171. \begin_layout Paragraph
  7172. Background
  7173. \end_layout
  7174. \begin_layout Standard
  7175. Primate blood contains high concentrations of globin messenger RNA.
  7176. Globin reduction is a standard technique used to improve the expression
  7177. results obtained by DNA microarrays on RNA from blood samples.
  7178. However, with whole transcriptome RNA-sequencing (RNA-seq) quickly replacing
  7179. microarrays for many applications, the impact of globin reduction for RNA-seq
  7180. has not been previously studied.
  7181. Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
  7182. primates.
  7183. \end_layout
  7184. \begin_layout Paragraph
  7185. Results
  7186. \end_layout
  7187. \begin_layout Standard
  7188. Here we report a protocol for RNA-seq in primate blood samples that uses
  7189. complimentary oligonucleotides to block reverse transcription of the alpha
  7190. and beta globin genes.
  7191. In test samples from cynomolgus monkeys (Macaca fascicularis), this globin
  7192. blocking protocol approximately doubles the yield of informative (non-globin)
  7193. reads by greatly reducing the fraction of globin reads, while also improving
  7194. the consistency in sequencing depth between samples.
  7195. The increased yield enables detection of about 2000 more genes, significantly
  7196. increases the correlation in measured gene expression levels between samples,
  7197. and increases the sensitivity of differential gene expression tests.
  7198. \end_layout
  7199. \begin_layout Paragraph
  7200. Conclusions
  7201. \end_layout
  7202. \begin_layout Standard
  7203. These results show that globin blocking significantly improves the cost-effectiv
  7204. eness of mRNA sequencing in primate blood samples by doubling the yield
  7205. of useful reads, allowing detection of more genes, and improving the precision
  7206. of gene expression measurements.
  7207. Based on these results, a globin reducing or blocking protocol is recommended
  7208. for all RNA-seq studies of primate blood samples.
  7209. \end_layout
  7210. \begin_layout Section
  7211. Approach
  7212. \end_layout
  7213. \begin_layout Standard
  7214. \begin_inset Note Note
  7215. status open
  7216. \begin_layout Plain Layout
  7217. Consider putting some of this in the Intro chapter
  7218. \end_layout
  7219. \begin_layout Itemize
  7220. Cynomolgus monkeys as a model organism
  7221. \end_layout
  7222. \begin_deeper
  7223. \begin_layout Itemize
  7224. Highly related to humans
  7225. \end_layout
  7226. \begin_layout Itemize
  7227. Small size and short life cycle - good research animal
  7228. \end_layout
  7229. \begin_layout Itemize
  7230. Genomics resources still in development
  7231. \end_layout
  7232. \end_deeper
  7233. \begin_layout Itemize
  7234. Inadequacy of existing blood RNA-seq protocols
  7235. \end_layout
  7236. \begin_deeper
  7237. \begin_layout Itemize
  7238. Existing protocols use a separate globin pulldown step, slowing down processing
  7239. \end_layout
  7240. \end_deeper
  7241. \end_inset
  7242. \end_layout
  7243. \begin_layout Standard
  7244. Increasingly, researchers are turning to high-throughput mRNA sequencing
  7245. technologies (RNA-seq) in preference to expression microarrays for analysis
  7246. of gene expression
  7247. \begin_inset CommandInset citation
  7248. LatexCommand cite
  7249. key "Mutz2012"
  7250. literal "false"
  7251. \end_inset
  7252. .
  7253. The advantages are even greater for study of model organisms with no well-estab
  7254. lished array platforms available, such as the cynomolgus monkey (Macaca
  7255. fascicularis).
  7256. High fractions of globin mRNA are naturally present in mammalian peripheral
  7257. blood samples (up to 70% of total mRNA) and these are known to interfere
  7258. with the results of array-based expression profiling
  7259. \begin_inset CommandInset citation
  7260. LatexCommand cite
  7261. key "Winn2010"
  7262. literal "false"
  7263. \end_inset
  7264. .
  7265. The importance of globin reduction for RNA-seq of blood has only been evaluated
  7266. for a deepSAGE protocol on human samples
  7267. \begin_inset CommandInset citation
  7268. LatexCommand cite
  7269. key "Mastrokolias2012"
  7270. literal "false"
  7271. \end_inset
  7272. .
  7273. In the present report, we evaluated globin reduction using custom blocking
  7274. oligonucleotides for deep RNA-seq of peripheral blood samples from a nonhuman
  7275. primate, cynomolgus monkey, using the Illumina technology platform.
  7276. We demonstrate that globin reduction significantly improves the cost-effectiven
  7277. ess of RNA-seq in blood samples.
  7278. Thus, our protocol offers a significant advantage to any investigator planning
  7279. to use RNA-seq for gene expression profiling of nonhuman primate blood
  7280. samples.
  7281. Our method can be generally applied to any species by designing complementary
  7282. oligonucleotide blocking probes to the globin gene sequences of that species.
  7283. Indeed, any highly expressed but biologically uninformative transcripts
  7284. can also be blocked to further increase sequencing efficiency and value
  7285. \begin_inset CommandInset citation
  7286. LatexCommand cite
  7287. key "Arnaud2016"
  7288. literal "false"
  7289. \end_inset
  7290. .
  7291. \end_layout
  7292. \begin_layout Section
  7293. Methods
  7294. \end_layout
  7295. \begin_layout Subsection
  7296. Sample collection
  7297. \end_layout
  7298. \begin_layout Standard
  7299. All research reported here was done under IACUC-approved protocols at the
  7300. University of Miami and complied with all applicable federal and state
  7301. regulations and ethical principles for nonhuman primate research.
  7302. Blood draws occurred between 16 April 2012 and 18 June 2015.
  7303. The experimental system involved intrahepatic pancreatic islet transplantation
  7304. into Cynomolgus monkeys with induced diabetes mellitus with or without
  7305. concomitant infusion of mesenchymal stem cells.
  7306. Blood was collected at serial time points before and after transplantation
  7307. into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
  7308. precise volume:volume ratio of 2.5 ml whole blood into 6.9 ml of PAX gene
  7309. additive.
  7310. \end_layout
  7311. \begin_layout Subsection
  7312. Globin Blocking
  7313. \end_layout
  7314. \begin_layout Standard
  7315. Four oligonucleotides were designed to hybridize to the 3’ end of the transcript
  7316. s for Cynomolgus HBA1, HBA2 and HBB, with two hybridization sites for HBB
  7317. and 2 sites for HBA (the chosen sites were identical in both HBA genes).
  7318. All oligos were purchased from Sigma and were entirely composed of 2’O-Me
  7319. bases with a C3 spacer positioned at the 3’ ends to prevent any polymerase
  7320. mediated primer extension.
  7321. \end_layout
  7322. \begin_layout Quote
  7323. HBA1/2 site 1: GCCCACUCAGACUUUAUUCAAAG-C3spacer
  7324. \end_layout
  7325. \begin_layout Quote
  7326. HBA1/2 site 2: GGUGCAAGGAGGGGAGGAG-C3spacer
  7327. \end_layout
  7328. \begin_layout Quote
  7329. HBB site 1: AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
  7330. \end_layout
  7331. \begin_layout Quote
  7332. HBB site 2: CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
  7333. \end_layout
  7334. \begin_layout Subsection
  7335. RNA-seq Library Preparation
  7336. \end_layout
  7337. \begin_layout Standard
  7338. Sequencing libraries were prepared with 200ng total RNA from each sample.
  7339. Polyadenylated mRNA was selected from 200 ng aliquots of cynomologus blood-deri
  7340. ved total RNA using Ambion Dynabeads Oligo(dT)25 beads (Invitrogen) following
  7341. manufacturer’s recommended protocol.
  7342. PolyA selected RNA was then combined with 8 pmol of HBA1/2 (site 1), 8
  7343. pmol of HBA1/2 (site 2), 12 pmol of HBB (site 1) and 12 pmol of HBB (site
  7344. 2) oligonucleotides.
  7345. In addition, 20 pmol of RT primer containing a portion of the Illumina
  7346. adapter sequence (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV)
  7347. and 4 µL of 5X First Strand buffer (250 mM Tris-HCl pH 8.3, 375 mM KCl,
  7348. 15mM MgCl2) were added in a total volume of 15 µL.
  7349. The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
  7350. then placed on ice.
  7351. This was followed by the addition of 2 µL 0.1 M DTT, 1 µL RNaseOUT, 1 µL
  7352. 10mM dNTPs 10% biotin-16 aminoallyl-2’- dUTP and 10% biotin-16 aminoallyl-2’-
  7353. dCTP (TriLink Biotech, San Diego, CA), 1 µL Superscript II (200U/ µL, Thermo-Fi
  7354. sher).
  7355. A second “unblocked” library was prepared in the same way for each sample
  7356. but replacing the blocking oligos with an equivalent volume of water.
  7357. The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
  7358. followed by incubation at 75°C for 10 minutes to inactivate the reverse
  7359. transcriptase.
  7360. \end_layout
  7361. \begin_layout Standard
  7362. The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
  7363. ) following supplier’s recommended protocol.
  7364. The cDNA/RNA hybrid was eluted in 25 µL of 10 mM Tris-HCl pH 8.0, and then
  7365. bound to 25 µL of M280 Magnetic Streptavidin beads washed per recommended
  7366. protocol (Thermo-Fisher).
  7367. After 30 minutes of binding, beads were washed one time in 100 µL 0.1N NaOH
  7368. to denature and remove the bound RNA, followed by two 100 µL washes with
  7369. 1X TE buffer.
  7370. \end_layout
  7371. \begin_layout Standard
  7372. Subsequent attachment of the 5-prime Illumina A adapter was performed by
  7373. on-bead random primer extension of the following sequence (A-N8 primer:
  7374. TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN).
  7375. Briefly, beads were resuspended in a 20 µL reaction containing 5 µM A-N8
  7376. primer, 40mM Tris-HCl pH 7.5, 20mM MgCl2, 50mM NaCl, 0.325U/µL Sequenase
  7377. 2.0 (Affymetrix, Santa Clara, CA), 0.0025U/µL inorganic pyrophosphatase (Affymetr
  7378. ix) and 300 µM each dNTP.
  7379. Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
  7380. times with 1X TE buffer (200µL).
  7381. \end_layout
  7382. \begin_layout Standard
  7383. The magnetic streptavidin beads were resuspended in 34 µL nuclease-free
  7384. water and added directly to a PCR tube.
  7385. The two Illumina protocol-specified PCR primers were added at 0.53 µM (Illumina
  7386. TruSeq Universal Primer 1 and Illumina TruSeq barcoded PCR primer 2), along
  7387. with 40 µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycl
  7388. ed as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
  7389. 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
  7390. \end_layout
  7391. \begin_layout Standard
  7392. PCR products were purified with 1X Ampure Beads following manufacturer’s
  7393. recommended protocol.
  7394. Libraries were then analyzed using the Agilent TapeStation and quantitation
  7395. of desired size range was performed by “smear analysis”.
  7396. Samples were pooled in equimolar batches of 16 samples.
  7397. Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
  7398. Gels; Thermo-Fisher).
  7399. Products were cut between 250 and 350 bp (corresponding to insert sizes
  7400. of 130 to 230 bps).
  7401. Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
  7402. t with 75 base read lengths.
  7403. \end_layout
  7404. \begin_layout Subsection
  7405. Read alignment and counting
  7406. \end_layout
  7407. \begin_layout Standard
  7408. Reads were aligned to the cynomolgus genome using STAR
  7409. \begin_inset CommandInset citation
  7410. LatexCommand cite
  7411. key "Dobin2013,Wilson2013"
  7412. literal "false"
  7413. \end_inset
  7414. .
  7415. Counts of uniquely mapped reads were obtained for every gene in each sample
  7416. with the “featureCounts” function from the Rsubread package, using each
  7417. of the three possibilities for the “strandSpecific” option: sense, antisense,
  7418. and unstranded
  7419. \begin_inset CommandInset citation
  7420. LatexCommand cite
  7421. key "Liao2014"
  7422. literal "false"
  7423. \end_inset
  7424. .
  7425. A few artifacts in the cynomolgus genome annotation complicated read counting.
  7426. First, no ortholog is annotated for alpha globin in the cynomolgus genome,
  7427. presumably because the human genome has two alpha globin genes with nearly
  7428. identical sequences, making the orthology relationship ambiguous.
  7429. However, two loci in the cynomolgus genome are as “hemoglobin subunit alpha-lik
  7430. e” (LOC102136192 and LOC102136846).
  7431. LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
  7432. as protein-coding.
  7433. Our globin reduction protocol was designed to include blocking of these
  7434. two genes.
  7435. Indeed, these two genes have almost the same read counts in each library
  7436. as the properly-annotated HBB gene and much larger counts than any other
  7437. gene in the unblocked libraries, giving confidence that reads derived from
  7438. the real alpha globin are mapping to both genes.
  7439. Thus, reads from both of these loci were counted as alpha globin reads
  7440. in all further analyses.
  7441. The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
  7442. 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
  7443. If counting is not performed in stranded mode (or if a non-strand-specific
  7444. sequencing protocol is used), many reads mapping to the globin gene will
  7445. be discarded as ambiguous due to their overlap with this ncRNA gene, resulting
  7446. in significant undercounting of globin reads.
  7447. Therefore, stranded sense counts were used for all further analysis in
  7448. the present study to insure that we accurately accounted for globin transcript
  7449. reduction.
  7450. However, we note that stranded reads are not necessary for RNA-seq using
  7451. our protocol in standard practice.
  7452. \end_layout
  7453. \begin_layout Subsection
  7454. Normalization and Exploratory Data Analysis
  7455. \end_layout
  7456. \begin_layout Standard
  7457. Libraries were normalized by computing scaling factors using the edgeR package’s
  7458. Trimmed Mean of M-values method
  7459. \begin_inset CommandInset citation
  7460. LatexCommand cite
  7461. key "Robinson2010"
  7462. literal "false"
  7463. \end_inset
  7464. .
  7465. Log2 counts per million values (logCPM) were calculated using the cpm function
  7466. in edgeR for individual samples and aveLogCPM function for averages across
  7467. groups of samples, using those functions’ default prior count values to
  7468. avoid taking the logarithm of 0.
  7469. Genes were considered “present” if their average normalized logCPM values
  7470. across all libraries were at least -1.
  7471. Normalizing for gene length was unnecessary because the sequencing protocol
  7472. is 3’-biased and hence the expected read count for each gene is related
  7473. to the transcript’s copy number but not its length.
  7474. \end_layout
  7475. \begin_layout Standard
  7476. In order to assess the effect of blocking on reproducibility, Pearson and
  7477. Spearman correlation coefficients were computed between the logCPM values
  7478. for every pair of libraries within the globin-blocked (GB) and unblocked
  7479. (non-GB) groups, and edgeR's “estimateDisp” function was used to compute
  7480. negative binomial dispersions separately for the two groups
  7481. \begin_inset CommandInset citation
  7482. LatexCommand cite
  7483. key "Chen2014"
  7484. literal "false"
  7485. \end_inset
  7486. .
  7487. \end_layout
  7488. \begin_layout Subsection
  7489. Differential Expression Analysis
  7490. \end_layout
  7491. \begin_layout Standard
  7492. All tests for differential gene expression were performed using edgeR, by
  7493. first fitting a negative binomial generalized linear model to the counts
  7494. and normalization factors and then performing a quasi-likelihood F-test
  7495. with robust estimation of outlier gene dispersions
  7496. \begin_inset CommandInset citation
  7497. LatexCommand cite
  7498. key "Lund2012,Phipson2016"
  7499. literal "false"
  7500. \end_inset
  7501. .
  7502. To investigate the effects of globin blocking on each gene, an additive
  7503. model was fit to the full data with coefficients for globin blocking and
  7504. SampleID.
  7505. To test the effect of globin blocking on detection of differentially expressed
  7506. genes, the GB samples and non-GB samples were each analyzed independently
  7507. as follows: for each animal with both a pre-transplant and a post-transplant
  7508. time point in the data set, the pre-transplant sample and the earliest
  7509. post-transplant sample were selected, and all others were excluded, yielding
  7510. a pre-/post-transplant pair of samples for each animal (N=7 animals with
  7511. paired samples).
  7512. These samples were analyzed for pre-transplant vs.
  7513. post-transplant differential gene expression while controlling for inter-animal
  7514. variation using an additive model with coefficients for transplant and
  7515. animal ID.
  7516. In all analyses, p-values were adjusted using the Benjamini-Hochberg procedure
  7517. for FDR control
  7518. \begin_inset CommandInset citation
  7519. LatexCommand cite
  7520. key "Benjamini1995"
  7521. literal "false"
  7522. \end_inset
  7523. .
  7524. \end_layout
  7525. \begin_layout Standard
  7526. \begin_inset Note Note
  7527. status open
  7528. \begin_layout Itemize
  7529. New blood RNA-seq protocol to block reverse transcription of globin genes
  7530. \end_layout
  7531. \begin_layout Itemize
  7532. Blood RNA-seq time course after transplants with/without MSC infusion
  7533. \end_layout
  7534. \end_inset
  7535. \end_layout
  7536. \begin_layout Section
  7537. Results
  7538. \end_layout
  7539. \begin_layout Subsection
  7540. Globin blocking yields a larger and more consistent fraction of useful reads
  7541. \end_layout
  7542. \begin_layout Standard
  7543. \begin_inset ERT
  7544. status open
  7545. \begin_layout Plain Layout
  7546. \backslash
  7547. afterpage{
  7548. \end_layout
  7549. \begin_layout Plain Layout
  7550. \backslash
  7551. begin{landscape}
  7552. \end_layout
  7553. \end_inset
  7554. \end_layout
  7555. \begin_layout Standard
  7556. \begin_inset Float table
  7557. placement p
  7558. wide false
  7559. sideways false
  7560. status collapsed
  7561. \begin_layout Plain Layout
  7562. \align center
  7563. \begin_inset Tabular
  7564. <lyxtabular version="3" rows="4" columns="7">
  7565. <features tabularvalignment="middle">
  7566. <column alignment="center" valignment="top">
  7567. <column alignment="center" valignment="top">
  7568. <column alignment="center" valignment="top">
  7569. <column alignment="center" valignment="top">
  7570. <column alignment="center" valignment="top">
  7571. <column alignment="center" valignment="top">
  7572. <column alignment="center" valignment="top">
  7573. <row>
  7574. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7575. \begin_inset Text
  7576. \begin_layout Plain Layout
  7577. \end_layout
  7578. \end_inset
  7579. </cell>
  7580. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7581. \begin_inset Text
  7582. \begin_layout Plain Layout
  7583. \family roman
  7584. \series medium
  7585. \shape up
  7586. \size normal
  7587. \emph off
  7588. \bar no
  7589. \strikeout off
  7590. \xout off
  7591. \uuline off
  7592. \uwave off
  7593. \noun off
  7594. \color none
  7595. Percent of Total Reads
  7596. \end_layout
  7597. \end_inset
  7598. </cell>
  7599. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7600. \begin_inset Text
  7601. \begin_layout Plain Layout
  7602. \end_layout
  7603. \end_inset
  7604. </cell>
  7605. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7606. \begin_inset Text
  7607. \begin_layout Plain Layout
  7608. \end_layout
  7609. \end_inset
  7610. </cell>
  7611. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7612. \begin_inset Text
  7613. \begin_layout Plain Layout
  7614. \end_layout
  7615. \end_inset
  7616. </cell>
  7617. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  7618. \begin_inset Text
  7619. \begin_layout Plain Layout
  7620. \family roman
  7621. \series medium
  7622. \shape up
  7623. \size normal
  7624. \emph off
  7625. \bar no
  7626. \strikeout off
  7627. \xout off
  7628. \uuline off
  7629. \uwave off
  7630. \noun off
  7631. \color none
  7632. Percent of Genic Reads
  7633. \end_layout
  7634. \end_inset
  7635. </cell>
  7636. <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  7637. \begin_inset Text
  7638. \begin_layout Plain Layout
  7639. \end_layout
  7640. \end_inset
  7641. </cell>
  7642. </row>
  7643. <row>
  7644. <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
  7645. \begin_inset Text
  7646. \begin_layout Plain Layout
  7647. GB
  7648. \end_layout
  7649. \end_inset
  7650. </cell>
  7651. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7652. \begin_inset Text
  7653. \begin_layout Plain Layout
  7654. \family roman
  7655. \series medium
  7656. \shape up
  7657. \size normal
  7658. \emph off
  7659. \bar no
  7660. \strikeout off
  7661. \xout off
  7662. \uuline off
  7663. \uwave off
  7664. \noun off
  7665. \color none
  7666. Non-globin Reads
  7667. \end_layout
  7668. \end_inset
  7669. </cell>
  7670. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7671. \begin_inset Text
  7672. \begin_layout Plain Layout
  7673. \family roman
  7674. \series medium
  7675. \shape up
  7676. \size normal
  7677. \emph off
  7678. \bar no
  7679. \strikeout off
  7680. \xout off
  7681. \uuline off
  7682. \uwave off
  7683. \noun off
  7684. \color none
  7685. Globin Reads
  7686. \end_layout
  7687. \end_inset
  7688. </cell>
  7689. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7690. \begin_inset Text
  7691. \begin_layout Plain Layout
  7692. \family roman
  7693. \series medium
  7694. \shape up
  7695. \size normal
  7696. \emph off
  7697. \bar no
  7698. \strikeout off
  7699. \xout off
  7700. \uuline off
  7701. \uwave off
  7702. \noun off
  7703. \color none
  7704. All Genic Reads
  7705. \end_layout
  7706. \end_inset
  7707. </cell>
  7708. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7709. \begin_inset Text
  7710. \begin_layout Plain Layout
  7711. \family roman
  7712. \series medium
  7713. \shape up
  7714. \size normal
  7715. \emph off
  7716. \bar no
  7717. \strikeout off
  7718. \xout off
  7719. \uuline off
  7720. \uwave off
  7721. \noun off
  7722. \color none
  7723. All Aligned Reads
  7724. \end_layout
  7725. \end_inset
  7726. </cell>
  7727. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7728. \begin_inset Text
  7729. \begin_layout Plain Layout
  7730. \family roman
  7731. \series medium
  7732. \shape up
  7733. \size normal
  7734. \emph off
  7735. \bar no
  7736. \strikeout off
  7737. \xout off
  7738. \uuline off
  7739. \uwave off
  7740. \noun off
  7741. \color none
  7742. Non-globin Reads
  7743. \end_layout
  7744. \end_inset
  7745. </cell>
  7746. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  7747. \begin_inset Text
  7748. \begin_layout Plain Layout
  7749. \family roman
  7750. \series medium
  7751. \shape up
  7752. \size normal
  7753. \emph off
  7754. \bar no
  7755. \strikeout off
  7756. \xout off
  7757. \uuline off
  7758. \uwave off
  7759. \noun off
  7760. \color none
  7761. Globin Reads
  7762. \end_layout
  7763. \end_inset
  7764. </cell>
  7765. </row>
  7766. <row>
  7767. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7768. \begin_inset Text
  7769. \begin_layout Plain Layout
  7770. \family roman
  7771. \series medium
  7772. \shape up
  7773. \size normal
  7774. \emph off
  7775. \bar no
  7776. \strikeout off
  7777. \xout off
  7778. \uuline off
  7779. \uwave off
  7780. \noun off
  7781. \color none
  7782. Yes
  7783. \end_layout
  7784. \end_inset
  7785. </cell>
  7786. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7787. \begin_inset Text
  7788. \begin_layout Plain Layout
  7789. \family roman
  7790. \series medium
  7791. \shape up
  7792. \size normal
  7793. \emph off
  7794. \bar no
  7795. \strikeout off
  7796. \xout off
  7797. \uuline off
  7798. \uwave off
  7799. \noun off
  7800. \color none
  7801. 50.4% ± 6.82
  7802. \end_layout
  7803. \end_inset
  7804. </cell>
  7805. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7806. \begin_inset Text
  7807. \begin_layout Plain Layout
  7808. \family roman
  7809. \series medium
  7810. \shape up
  7811. \size normal
  7812. \emph off
  7813. \bar no
  7814. \strikeout off
  7815. \xout off
  7816. \uuline off
  7817. \uwave off
  7818. \noun off
  7819. \color none
  7820. 3.48% ± 2.94
  7821. \end_layout
  7822. \end_inset
  7823. </cell>
  7824. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7825. \begin_inset Text
  7826. \begin_layout Plain Layout
  7827. \family roman
  7828. \series medium
  7829. \shape up
  7830. \size normal
  7831. \emph off
  7832. \bar no
  7833. \strikeout off
  7834. \xout off
  7835. \uuline off
  7836. \uwave off
  7837. \noun off
  7838. \color none
  7839. 53.9% ± 6.81
  7840. \end_layout
  7841. \end_inset
  7842. </cell>
  7843. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7844. \begin_inset Text
  7845. \begin_layout Plain Layout
  7846. \family roman
  7847. \series medium
  7848. \shape up
  7849. \size normal
  7850. \emph off
  7851. \bar no
  7852. \strikeout off
  7853. \xout off
  7854. \uuline off
  7855. \uwave off
  7856. \noun off
  7857. \color none
  7858. 89.7% ± 2.40
  7859. \end_layout
  7860. \end_inset
  7861. </cell>
  7862. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  7863. \begin_inset Text
  7864. \begin_layout Plain Layout
  7865. \family roman
  7866. \series medium
  7867. \shape up
  7868. \size normal
  7869. \emph off
  7870. \bar no
  7871. \strikeout off
  7872. \xout off
  7873. \uuline off
  7874. \uwave off
  7875. \noun off
  7876. \color none
  7877. 93.5% ± 5.25
  7878. \end_layout
  7879. \end_inset
  7880. </cell>
  7881. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  7882. \begin_inset Text
  7883. \begin_layout Plain Layout
  7884. \family roman
  7885. \series medium
  7886. \shape up
  7887. \size normal
  7888. \emph off
  7889. \bar no
  7890. \strikeout off
  7891. \xout off
  7892. \uuline off
  7893. \uwave off
  7894. \noun off
  7895. \color none
  7896. 6.49% ± 5.25
  7897. \end_layout
  7898. \end_inset
  7899. </cell>
  7900. </row>
  7901. <row>
  7902. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7903. \begin_inset Text
  7904. \begin_layout Plain Layout
  7905. \family roman
  7906. \series medium
  7907. \shape up
  7908. \size normal
  7909. \emph off
  7910. \bar no
  7911. \strikeout off
  7912. \xout off
  7913. \uuline off
  7914. \uwave off
  7915. \noun off
  7916. \color none
  7917. No
  7918. \end_layout
  7919. \end_inset
  7920. </cell>
  7921. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7922. \begin_inset Text
  7923. \begin_layout Plain Layout
  7924. \family roman
  7925. \series medium
  7926. \shape up
  7927. \size normal
  7928. \emph off
  7929. \bar no
  7930. \strikeout off
  7931. \xout off
  7932. \uuline off
  7933. \uwave off
  7934. \noun off
  7935. \color none
  7936. 26.3% ± 8.95
  7937. \end_layout
  7938. \end_inset
  7939. </cell>
  7940. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7941. \begin_inset Text
  7942. \begin_layout Plain Layout
  7943. \family roman
  7944. \series medium
  7945. \shape up
  7946. \size normal
  7947. \emph off
  7948. \bar no
  7949. \strikeout off
  7950. \xout off
  7951. \uuline off
  7952. \uwave off
  7953. \noun off
  7954. \color none
  7955. 44.6% ± 16.6
  7956. \end_layout
  7957. \end_inset
  7958. </cell>
  7959. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7960. \begin_inset Text
  7961. \begin_layout Plain Layout
  7962. \family roman
  7963. \series medium
  7964. \shape up
  7965. \size normal
  7966. \emph off
  7967. \bar no
  7968. \strikeout off
  7969. \xout off
  7970. \uuline off
  7971. \uwave off
  7972. \noun off
  7973. \color none
  7974. 70.1% ± 9.38
  7975. \end_layout
  7976. \end_inset
  7977. </cell>
  7978. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7979. \begin_inset Text
  7980. \begin_layout Plain Layout
  7981. \family roman
  7982. \series medium
  7983. \shape up
  7984. \size normal
  7985. \emph off
  7986. \bar no
  7987. \strikeout off
  7988. \xout off
  7989. \uuline off
  7990. \uwave off
  7991. \noun off
  7992. \color none
  7993. 90.7% ± 5.16
  7994. \end_layout
  7995. \end_inset
  7996. </cell>
  7997. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  7998. \begin_inset Text
  7999. \begin_layout Plain Layout
  8000. \family roman
  8001. \series medium
  8002. \shape up
  8003. \size normal
  8004. \emph off
  8005. \bar no
  8006. \strikeout off
  8007. \xout off
  8008. \uuline off
  8009. \uwave off
  8010. \noun off
  8011. \color none
  8012. 38.8% ± 17.1
  8013. \end_layout
  8014. \end_inset
  8015. </cell>
  8016. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  8017. \begin_inset Text
  8018. \begin_layout Plain Layout
  8019. \family roman
  8020. \series medium
  8021. \shape up
  8022. \size normal
  8023. \emph off
  8024. \bar no
  8025. \strikeout off
  8026. \xout off
  8027. \uuline off
  8028. \uwave off
  8029. \noun off
  8030. \color none
  8031. 61.2% ± 17.1
  8032. \end_layout
  8033. \end_inset
  8034. </cell>
  8035. </row>
  8036. </lyxtabular>
  8037. \end_inset
  8038. \end_layout
  8039. \begin_layout Plain Layout
  8040. \begin_inset Caption Standard
  8041. \begin_layout Plain Layout
  8042. \series bold
  8043. \begin_inset Argument 1
  8044. status collapsed
  8045. \begin_layout Plain Layout
  8046. Fractions of reads mapping to genomic features in GB and non-GB samples.
  8047. \end_layout
  8048. \end_inset
  8049. \begin_inset CommandInset label
  8050. LatexCommand label
  8051. name "tab:Fractions-of-reads"
  8052. \end_inset
  8053. Fractions of reads mapping to genomic features in GB and non-GB samples.
  8054. \series default
  8055. All values are given as mean ± standard deviation.
  8056. \end_layout
  8057. \end_inset
  8058. \end_layout
  8059. \end_inset
  8060. \end_layout
  8061. \begin_layout Standard
  8062. \begin_inset ERT
  8063. status open
  8064. \begin_layout Plain Layout
  8065. \backslash
  8066. end{landscape}
  8067. \end_layout
  8068. \begin_layout Plain Layout
  8069. }
  8070. \end_layout
  8071. \end_inset
  8072. \end_layout
  8073. \begin_layout Standard
  8074. The objective of the present study was to validate a new protocol for deep
  8075. RNA-seq of whole blood drawn into PaxGene tubes from cynomolgus monkeys
  8076. undergoing islet transplantation, with particular focus on minimizing the
  8077. loss of useful sequencing space to uninformative globin reads.
  8078. The details of the analysis with respect to transplant outcomes and the
  8079. impact of mesenchymal stem cell treatment will be reported in a separate
  8080. manuscript (in preparation).
  8081. To focus on the efficacy of our globin blocking protocol, 37 blood samples,
  8082. 16 from pre-transplant and 21 from post-transplant time points, were each
  8083. prepped once with and once without globin blocking oligos, and were then
  8084. sequenced on an Illumina NextSeq500 instrument.
  8085. The number of reads aligning to each gene in the cynomolgus genome was
  8086. counted.
  8087. Table 1 summarizes the distribution of read fractions among the GB and
  8088. non-GB libraries.
  8089. In the libraries with no globin blocking, globin reads made up an average
  8090. of 44.6% of total input reads, while reads assigned to all other genes made
  8091. up an average of 26.3%.
  8092. The remaining reads either aligned to intergenic regions (that include
  8093. long non-coding RNAs) or did not align with any annotated transcripts in
  8094. the current build of the cynomolgus genome.
  8095. In the GB libraries, globin reads made up only 3.48% and reads assigned
  8096. to all other genes increased to 50.4%.
  8097. Thus, globin blocking resulted in a 92.2% reduction in globin reads and
  8098. a 91.6% increase in yield of useful non-globin reads.
  8099. \end_layout
  8100. \begin_layout Standard
  8101. This reduction is not quite as efficient as the previous analysis showed
  8102. for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
  8103. \begin_inset CommandInset citation
  8104. LatexCommand cite
  8105. key "Mastrokolias2012"
  8106. literal "false"
  8107. \end_inset
  8108. .
  8109. Nonetheless, this degree of globin reduction is sufficient to nearly double
  8110. the yield of useful reads.
  8111. Thus, globin blocking cuts the required sequencing effort (and costs) to
  8112. achieve a target coverage depth by almost 50%.
  8113. Consistent with this near doubling of yield, the average difference in
  8114. un-normalized logCPM across all genes between the GB libraries and non-GB
  8115. libraries is approximately 1 (mean = 1.01, median = 1.08), an overall 2-fold
  8116. increase.
  8117. Un-normalized values are used here because the TMM normalization correctly
  8118. identifies this 2-fold difference as biologically irrelevant and removes
  8119. it.
  8120. \end_layout
  8121. \begin_layout Standard
  8122. \begin_inset Float figure
  8123. wide false
  8124. sideways false
  8125. status collapsed
  8126. \begin_layout Plain Layout
  8127. \align center
  8128. \begin_inset Graphics
  8129. filename graphics/Globin Paper/figure1 - globin-fractions.pdf
  8130. lyxscale 50
  8131. width 75col%
  8132. \end_inset
  8133. \end_layout
  8134. \begin_layout Plain Layout
  8135. \begin_inset Caption Standard
  8136. \begin_layout Plain Layout
  8137. \series bold
  8138. \begin_inset Argument 1
  8139. status collapsed
  8140. \begin_layout Plain Layout
  8141. Fraction of genic reads in each sample aligned to non-globin genes, with
  8142. and without globin blocking (GB).
  8143. \end_layout
  8144. \end_inset
  8145. \begin_inset CommandInset label
  8146. LatexCommand label
  8147. name "fig:Fraction-of-genic-reads"
  8148. \end_inset
  8149. Fraction of genic reads in each sample aligned to non-globin genes, with
  8150. and without globin blocking (GB).
  8151. \series default
  8152. All reads in each sequencing library were aligned to the cyno genome, and
  8153. the number of reads uniquely aligning to each gene was counted.
  8154. For each sample, counts were summed separately for all globin genes and
  8155. for the remainder of the genes (non-globin genes), and the fraction of
  8156. genic reads aligned to non-globin genes was computed.
  8157. Each point represents an individual sample.
  8158. Gray + signs indicate the means for globin-blocked libraries and unblocked
  8159. libraries.
  8160. The overall distribution for each group is represented as a notched box
  8161. plots.
  8162. Points are randomly spread vertically to avoid excessive overlapping.
  8163. \end_layout
  8164. \end_inset
  8165. \end_layout
  8166. \end_inset
  8167. \end_layout
  8168. \begin_layout Standard
  8169. Another important aspect is that the standard deviations in Table
  8170. \begin_inset CommandInset ref
  8171. LatexCommand ref
  8172. reference "tab:Fractions-of-reads"
  8173. plural "false"
  8174. caps "false"
  8175. noprefix "false"
  8176. \end_inset
  8177. are uniformly smaller in the GB samples than the non-GB ones, indicating
  8178. much greater consistency of yield.
  8179. This is best seen in the percentage of non-globin reads as a fraction of
  8180. total reads aligned to annotated genes (genic reads).
  8181. For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
  8182. the GB samples it ranges from 81.9% to 99.9% (Figure
  8183. \begin_inset CommandInset ref
  8184. LatexCommand ref
  8185. reference "fig:Fraction-of-genic-reads"
  8186. plural "false"
  8187. caps "false"
  8188. noprefix "false"
  8189. \end_inset
  8190. ).
  8191. This means that for applications where it is critical that each sample
  8192. achieve a specified minimum coverage in order to provide useful information,
  8193. it would be necessary to budget up to 10 times the sequencing depth per
  8194. sample without globin blocking, even though the average yield improvement
  8195. for globin blocking is only 2-fold, because every sample has a chance of
  8196. being 90% globin and 10% useful reads.
  8197. Hence, the more consistent behavior of GB samples makes planning an experiment
  8198. easier and more efficient because it eliminates the need to over-sequence
  8199. every sample in order to guard against the worst case of a high-globin
  8200. fraction.
  8201. \end_layout
  8202. \begin_layout Subsection
  8203. Globin blocking lowers the noise floor and allows detection of about 2000
  8204. more low-expression genes
  8205. \end_layout
  8206. \begin_layout Standard
  8207. \begin_inset Flex TODO Note (inline)
  8208. status open
  8209. \begin_layout Plain Layout
  8210. Remove redundant titles from figures
  8211. \end_layout
  8212. \end_inset
  8213. \end_layout
  8214. \begin_layout Standard
  8215. \begin_inset Float figure
  8216. wide false
  8217. sideways false
  8218. status collapsed
  8219. \begin_layout Plain Layout
  8220. \align center
  8221. \begin_inset Graphics
  8222. filename graphics/Globin Paper/figure2 - aveLogCPM-colored.pdf
  8223. lyxscale 50
  8224. height 60theight%
  8225. \end_inset
  8226. \end_layout
  8227. \begin_layout Plain Layout
  8228. \begin_inset Caption Standard
  8229. \begin_layout Plain Layout
  8230. \series bold
  8231. \begin_inset Argument 1
  8232. status collapsed
  8233. \begin_layout Plain Layout
  8234. Distributions of average group gene abundances when normalized separately
  8235. or together.
  8236. \end_layout
  8237. \end_inset
  8238. \begin_inset CommandInset label
  8239. LatexCommand label
  8240. name "fig:logcpm-dists"
  8241. \end_inset
  8242. Distributions of average group gene abundances when normalized separately
  8243. or together.
  8244. \series default
  8245. All reads in each sequencing library were aligned to the cyno genome, and
  8246. the number of reads uniquely aligning to each gene was counted.
  8247. Genes with zero counts in all libraries were discarded.
  8248. Libraries were normalized using the TMM method.
  8249. Libraries were split into globin-blocked (GB) and non-GB groups and the
  8250. average abundance for each gene in both groups, measured in log2 counts
  8251. per million reads counted, was computed using the aveLogCPM function.
  8252. The distribution of average gene logCPM values was plotted for both groups
  8253. using a kernel density plot to approximate a continuous distribution.
  8254. The logCPM GB distributions are marked in red, non-GB in blue.
  8255. The black vertical line denotes the chosen detection threshold of -1.
  8256. Top panel: Libraries were split into GB and non-GB groups first and normalized
  8257. separately.
  8258. Bottom panel: Libraries were all normalized together first and then split
  8259. into groups.
  8260. \end_layout
  8261. \end_inset
  8262. \end_layout
  8263. \begin_layout Plain Layout
  8264. \end_layout
  8265. \end_inset
  8266. \end_layout
  8267. \begin_layout Standard
  8268. Since globin blocking yields more usable sequencing depth, it should also
  8269. allow detection of more genes at any given threshold.
  8270. When we looked at the distribution of average normalized logCPM values
  8271. across all libraries for genes with at least one read assigned to them,
  8272. we observed the expected bimodal distribution, with a high-abundance "signal"
  8273. peak representing detected genes and a low-abundance "noise" peak representing
  8274. genes whose read count did not rise above the noise floor (Figure
  8275. \begin_inset CommandInset ref
  8276. LatexCommand ref
  8277. reference "fig:logcpm-dists"
  8278. plural "false"
  8279. caps "false"
  8280. noprefix "false"
  8281. \end_inset
  8282. ).
  8283. Consistent with the 2-fold increase in raw counts assigned to non-globin
  8284. genes, the signal peak for GB samples is shifted to the right relative
  8285. to the non-GB signal peak.
  8286. When all the samples are normalized together, this difference is normalized
  8287. out, lining up the signal peaks, and this reveals that, as expected, the
  8288. noise floor for the GB samples is about 2-fold lower.
  8289. This greater separation between signal and noise peaks in the GB samples
  8290. means that low-expression genes should be more easily detected and more
  8291. precisely quantified than in the non-GB samples.
  8292. \end_layout
  8293. \begin_layout Standard
  8294. \begin_inset Float figure
  8295. wide false
  8296. sideways false
  8297. status collapsed
  8298. \begin_layout Plain Layout
  8299. \align center
  8300. \begin_inset Graphics
  8301. filename graphics/Globin Paper/figure3 - detection.pdf
  8302. lyxscale 50
  8303. width 70col%
  8304. \end_inset
  8305. \end_layout
  8306. \begin_layout Plain Layout
  8307. \begin_inset Caption Standard
  8308. \begin_layout Plain Layout
  8309. \series bold
  8310. \begin_inset Argument 1
  8311. status collapsed
  8312. \begin_layout Plain Layout
  8313. Gene detections as a function of abundance thresholds in globin-blocked
  8314. (GB) and non-GB samples.
  8315. \end_layout
  8316. \end_inset
  8317. \begin_inset CommandInset label
  8318. LatexCommand label
  8319. name "fig:Gene-detections"
  8320. \end_inset
  8321. Gene detections as a function of abundance thresholds in globin-blocked
  8322. (GB) and non-GB samples.
  8323. \series default
  8324. Average abundance (logCPM,
  8325. \begin_inset Formula $\log_{2}$
  8326. \end_inset
  8327. counts per million reads counted) was computed by separate group normalization
  8328. as described in Figure
  8329. \begin_inset CommandInset ref
  8330. LatexCommand ref
  8331. reference "fig:logcpm-dists"
  8332. plural "false"
  8333. caps "false"
  8334. noprefix "false"
  8335. \end_inset
  8336. for both the GB and non-GB groups, as well as for all samples considered
  8337. as one large group.
  8338. For each every integer threshold from -2 to 3, the number of genes detected
  8339. at or above that logCPM threshold was plotted for each group.
  8340. \end_layout
  8341. \end_inset
  8342. \end_layout
  8343. \begin_layout Plain Layout
  8344. \end_layout
  8345. \end_inset
  8346. \end_layout
  8347. \begin_layout Standard
  8348. Based on these distributions, we selected a detection threshold of -1, which
  8349. is approximately the leftmost edge of the trough between the signal and
  8350. noise peaks.
  8351. This represents the most liberal possible detection threshold that doesn't
  8352. call substantial numbers of noise genes as detected.
  8353. Among the full dataset, 13429 genes were detected at this threshold, and
  8354. 22276 were not.
  8355. When considering the GB libraries and non-GB libraries separately and re-comput
  8356. ing normalization factors independently within each group, 14535 genes were
  8357. detected in the GB libraries while only 12460 were detected in the non-GB
  8358. libraries.
  8359. Thus, GB allowed the detection of 2000 extra genes that were buried under
  8360. the noise floor without GB.
  8361. This pattern of at least 2000 additional genes detected with GB was also
  8362. consistent across a wide range of possible detection thresholds, from -2
  8363. to 3 (see Figure
  8364. \begin_inset CommandInset ref
  8365. LatexCommand ref
  8366. reference "fig:Gene-detections"
  8367. plural "false"
  8368. caps "false"
  8369. noprefix "false"
  8370. \end_inset
  8371. ).
  8372. \end_layout
  8373. \begin_layout Subsection
  8374. Globin blocking does not add significant additional noise or decrease sample
  8375. quality
  8376. \end_layout
  8377. \begin_layout Standard
  8378. One potential worry is that the globin blocking protocol could perturb the
  8379. levels of non-globin genes.
  8380. There are two kinds of possible perturbations: systematic and random.
  8381. The former is not a major concern for detection of differential expression,
  8382. since a 2-fold change in every sample has no effect on the relative fold
  8383. change between samples.
  8384. In contrast, random perturbations would increase the noise and obscure
  8385. the signal in the dataset, reducing the capacity to detect differential
  8386. expression.
  8387. \end_layout
  8388. \begin_layout Standard
  8389. \begin_inset Float figure
  8390. wide false
  8391. sideways false
  8392. status collapsed
  8393. \begin_layout Plain Layout
  8394. \align center
  8395. \begin_inset Graphics
  8396. filename graphics/Globin Paper/figure4 - maplot-colored.pdf
  8397. lyxscale 50
  8398. width 60col%
  8399. groupId colwidth
  8400. \end_inset
  8401. \end_layout
  8402. \begin_layout Plain Layout
  8403. \begin_inset Caption Standard
  8404. \begin_layout Plain Layout
  8405. \begin_inset Argument 1
  8406. status collapsed
  8407. \begin_layout Plain Layout
  8408. MA plot showing effects of globin blocking on each gene's abundance.
  8409. \end_layout
  8410. \end_inset
  8411. \begin_inset CommandInset label
  8412. LatexCommand label
  8413. name "fig:MA-plot"
  8414. \end_inset
  8415. \series bold
  8416. MA plot showing effects of globin blocking on each gene's abundance.
  8417. \series default
  8418. All libraries were normalized together as described in Figure
  8419. \begin_inset CommandInset ref
  8420. LatexCommand ref
  8421. reference "fig:logcpm-dists"
  8422. plural "false"
  8423. caps "false"
  8424. noprefix "false"
  8425. \end_inset
  8426. , and genes with an average logCPM below -1 were filtered out.
  8427. Each remaining gene was tested for differential abundance with respect
  8428. to globin blocking (GB) using edgeR’s quasi-likelihod F-test, fitting a
  8429. negative binomial generalized linear model to table of read counts in each
  8430. library.
  8431. For each gene, edgeR reported average abundance (logCPM),
  8432. \begin_inset Formula $\log_{2}$
  8433. \end_inset
  8434. fold change (logFC), p-value, and Benjamini-Hochberg adjusted false discovery
  8435. rate (FDR).
  8436. Each gene's logFC was plotted against its logCPM, colored by FDR.
  8437. Red points are significant at ≤10% FDR, and blue are not significant at
  8438. that threshold.
  8439. The alpha and beta globin genes targeted for blocking are marked with large
  8440. triangles, while all other genes are represented as small points.
  8441. \end_layout
  8442. \end_inset
  8443. \end_layout
  8444. \begin_layout Plain Layout
  8445. \end_layout
  8446. \end_inset
  8447. \end_layout
  8448. \begin_layout Standard
  8449. \begin_inset Flex TODO Note (inline)
  8450. status open
  8451. \begin_layout Plain Layout
  8452. Standardize on
  8453. \begin_inset Quotes eld
  8454. \end_inset
  8455. log2
  8456. \begin_inset Quotes erd
  8457. \end_inset
  8458. notation
  8459. \end_layout
  8460. \end_inset
  8461. \end_layout
  8462. \begin_layout Standard
  8463. The data do indeed show small systematic perturbations in gene levels (Figure
  8464. \begin_inset CommandInset ref
  8465. LatexCommand ref
  8466. reference "fig:MA-plot"
  8467. plural "false"
  8468. caps "false"
  8469. noprefix "false"
  8470. \end_inset
  8471. ).
  8472. Other than the 3 designated alpha and beta globin genes, two other genes
  8473. stand out as having especially large negative log fold changes: HBD and
  8474. LOC1021365.
  8475. HBD, delta globin, is most likely targeted by the blocking oligos due to
  8476. high sequence homology with the other globin genes.
  8477. LOC1021365 is the aforementioned ncRNA that is reverse-complementary to
  8478. one of the alpha-like genes and that would be expected to be removed during
  8479. the globin blocking step.
  8480. All other genes appear in a cluster centered vertically at 0, and the vast
  8481. majority of genes in this cluster show an absolute log2(FC) of 0.5 or less.
  8482. Nevertheless, many of these small perturbations are still statistically
  8483. significant, indicating that the globin blocking oligos likely cause very
  8484. small but non-zero systematic perturbations in measured gene expression
  8485. levels.
  8486. \end_layout
  8487. \begin_layout Standard
  8488. \begin_inset Float figure
  8489. wide false
  8490. sideways false
  8491. status collapsed
  8492. \begin_layout Plain Layout
  8493. \align center
  8494. \begin_inset Graphics
  8495. filename graphics/Globin Paper/figure5 - corrplot.pdf
  8496. lyxscale 50
  8497. width 70col%
  8498. \end_inset
  8499. \end_layout
  8500. \begin_layout Plain Layout
  8501. \begin_inset Caption Standard
  8502. \begin_layout Plain Layout
  8503. \series bold
  8504. \begin_inset Argument 1
  8505. status collapsed
  8506. \begin_layout Plain Layout
  8507. Comparison of inter-sample gene abundance correlations with and without
  8508. globin blocking.
  8509. \end_layout
  8510. \end_inset
  8511. \begin_inset CommandInset label
  8512. LatexCommand label
  8513. name "fig:gene-abundance-correlations"
  8514. \end_inset
  8515. Comparison of inter-sample gene abundance correlations with and without
  8516. globin blocking (GB).
  8517. \series default
  8518. All libraries were normalized together as described in Figure 2, and genes
  8519. with an average abundance (logCPM, log2 counts per million reads counted)
  8520. less than -1 were filtered out.
  8521. Each gene’s logCPM was computed in each library using the edgeR cpm function.
  8522. For each pair of biological samples, the Pearson correlation between those
  8523. samples' GB libraries was plotted against the correlation between the same
  8524. samples’ non-GB libraries.
  8525. Each point represents an unique pair of samples.
  8526. The solid gray line shows a quantile-quantile plot of distribution of GB
  8527. correlations vs.
  8528. that of non-GB correlations.
  8529. The thin dashed line is the identity line, provided for reference.
  8530. \end_layout
  8531. \end_inset
  8532. \end_layout
  8533. \begin_layout Plain Layout
  8534. \end_layout
  8535. \end_inset
  8536. \end_layout
  8537. \begin_layout Standard
  8538. To evaluate the possibility of globin blocking causing random perturbations
  8539. and reducing sample quality, we computed the Pearson correlation between
  8540. logCPM values for every pair of samples with and without GB and plotted
  8541. them against each other (Figure
  8542. \begin_inset CommandInset ref
  8543. LatexCommand ref
  8544. reference "fig:gene-abundance-correlations"
  8545. plural "false"
  8546. caps "false"
  8547. noprefix "false"
  8548. \end_inset
  8549. ).
  8550. The plot indicated that the GB libraries have higher sample-to-sample correlati
  8551. ons than the non-GB libraries.
  8552. Parametric and nonparametric tests for differences between the correlations
  8553. with and without GB both confirmed that this difference was highly significant
  8554. (2-sided paired t-test: t = 37.2, df = 665, P ≪ 2.2e-16; 2-sided Wilcoxon
  8555. sign-rank test: V = 2195, P ≪ 2.2e-16).
  8556. Performing the same tests on the Spearman correlations gave the same conclusion
  8557. (t-test: t = 26.8, df = 665, P ≪ 2.2e-16; sign-rank test: V = 8781, P ≪ 2.2e-16).
  8558. The edgeR package was used to compute the overall biological coefficient
  8559. of variation (BCV) for GB and non-GB libraries, and found that globin blocking
  8560. resulted in a negligible increase in the BCV (0.417 with GB vs.
  8561. 0.400 without).
  8562. The near equality of the BCVs for both sets indicates that the higher correlati
  8563. ons in the GB libraries are most likely a result of the increased yield
  8564. of useful reads, which reduces the contribution of Poisson counting uncertainty
  8565. to the overall variance of the logCPM values
  8566. \begin_inset CommandInset citation
  8567. LatexCommand cite
  8568. key "McCarthy2012"
  8569. literal "false"
  8570. \end_inset
  8571. .
  8572. This improves the precision of expression measurements and more than offsets
  8573. the negligible increase in BCV.
  8574. \end_layout
  8575. \begin_layout Subsection
  8576. More differentially expressed genes are detected with globin blocking
  8577. \end_layout
  8578. \begin_layout Standard
  8579. \begin_inset Float table
  8580. wide false
  8581. sideways false
  8582. status collapsed
  8583. \begin_layout Plain Layout
  8584. \align center
  8585. \begin_inset Tabular
  8586. <lyxtabular version="3" rows="5" columns="5">
  8587. <features tabularvalignment="middle">
  8588. <column alignment="center" valignment="top">
  8589. <column alignment="center" valignment="top">
  8590. <column alignment="center" valignment="top">
  8591. <column alignment="center" valignment="top">
  8592. <column alignment="center" valignment="top">
  8593. <row>
  8594. <cell alignment="center" valignment="top" usebox="none">
  8595. \begin_inset Text
  8596. \begin_layout Plain Layout
  8597. \end_layout
  8598. \end_inset
  8599. </cell>
  8600. <cell alignment="center" valignment="top" usebox="none">
  8601. \begin_inset Text
  8602. \begin_layout Plain Layout
  8603. \end_layout
  8604. \end_inset
  8605. </cell>
  8606. <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  8607. \begin_inset Text
  8608. \begin_layout Plain Layout
  8609. \series bold
  8610. No Globin Blocking
  8611. \end_layout
  8612. \end_inset
  8613. </cell>
  8614. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8615. \begin_inset Text
  8616. \begin_layout Plain Layout
  8617. \end_layout
  8618. \end_inset
  8619. </cell>
  8620. <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  8621. \begin_inset Text
  8622. \begin_layout Plain Layout
  8623. \end_layout
  8624. \end_inset
  8625. </cell>
  8626. </row>
  8627. <row>
  8628. <cell alignment="center" valignment="top" usebox="none">
  8629. \begin_inset Text
  8630. \begin_layout Plain Layout
  8631. \end_layout
  8632. \end_inset
  8633. </cell>
  8634. <cell alignment="center" valignment="top" usebox="none">
  8635. \begin_inset Text
  8636. \begin_layout Plain Layout
  8637. \end_layout
  8638. \end_inset
  8639. </cell>
  8640. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8641. \begin_inset Text
  8642. \begin_layout Plain Layout
  8643. \series bold
  8644. Up
  8645. \end_layout
  8646. \end_inset
  8647. </cell>
  8648. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8649. \begin_inset Text
  8650. \begin_layout Plain Layout
  8651. \series bold
  8652. NS
  8653. \end_layout
  8654. \end_inset
  8655. </cell>
  8656. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  8657. \begin_inset Text
  8658. \begin_layout Plain Layout
  8659. \series bold
  8660. Down
  8661. \end_layout
  8662. \end_inset
  8663. </cell>
  8664. </row>
  8665. <row>
  8666. <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
  8667. \begin_inset Text
  8668. \begin_layout Plain Layout
  8669. \series bold
  8670. Globin-Blocking
  8671. \end_layout
  8672. \end_inset
  8673. </cell>
  8674. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8675. \begin_inset Text
  8676. \begin_layout Plain Layout
  8677. \series bold
  8678. Up
  8679. \end_layout
  8680. \end_inset
  8681. </cell>
  8682. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8683. \begin_inset Text
  8684. \begin_layout Plain Layout
  8685. \family roman
  8686. \series medium
  8687. \shape up
  8688. \size normal
  8689. \emph off
  8690. \bar no
  8691. \strikeout off
  8692. \xout off
  8693. \uuline off
  8694. \uwave off
  8695. \noun off
  8696. \color none
  8697. 231
  8698. \end_layout
  8699. \end_inset
  8700. </cell>
  8701. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8702. \begin_inset Text
  8703. \begin_layout Plain Layout
  8704. \family roman
  8705. \series medium
  8706. \shape up
  8707. \size normal
  8708. \emph off
  8709. \bar no
  8710. \strikeout off
  8711. \xout off
  8712. \uuline off
  8713. \uwave off
  8714. \noun off
  8715. \color none
  8716. 515
  8717. \end_layout
  8718. \end_inset
  8719. </cell>
  8720. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  8721. \begin_inset Text
  8722. \begin_layout Plain Layout
  8723. \family roman
  8724. \series medium
  8725. \shape up
  8726. \size normal
  8727. \emph off
  8728. \bar no
  8729. \strikeout off
  8730. \xout off
  8731. \uuline off
  8732. \uwave off
  8733. \noun off
  8734. \color none
  8735. 2
  8736. \end_layout
  8737. \end_inset
  8738. </cell>
  8739. </row>
  8740. <row>
  8741. <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8742. \begin_inset Text
  8743. \begin_layout Plain Layout
  8744. \end_layout
  8745. \end_inset
  8746. </cell>
  8747. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8748. \begin_inset Text
  8749. \begin_layout Plain Layout
  8750. \series bold
  8751. NS
  8752. \end_layout
  8753. \end_inset
  8754. </cell>
  8755. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8756. \begin_inset Text
  8757. \begin_layout Plain Layout
  8758. \family roman
  8759. \series medium
  8760. \shape up
  8761. \size normal
  8762. \emph off
  8763. \bar no
  8764. \strikeout off
  8765. \xout off
  8766. \uuline off
  8767. \uwave off
  8768. \noun off
  8769. \color none
  8770. 160
  8771. \end_layout
  8772. \end_inset
  8773. </cell>
  8774. <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
  8775. \begin_inset Text
  8776. \begin_layout Plain Layout
  8777. \family roman
  8778. \series medium
  8779. \shape up
  8780. \size normal
  8781. \emph off
  8782. \bar no
  8783. \strikeout off
  8784. \xout off
  8785. \uuline off
  8786. \uwave off
  8787. \noun off
  8788. \color none
  8789. 11235
  8790. \end_layout
  8791. \end_inset
  8792. </cell>
  8793. <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
  8794. \begin_inset Text
  8795. \begin_layout Plain Layout
  8796. \family roman
  8797. \series medium
  8798. \shape up
  8799. \size normal
  8800. \emph off
  8801. \bar no
  8802. \strikeout off
  8803. \xout off
  8804. \uuline off
  8805. \uwave off
  8806. \noun off
  8807. \color none
  8808. 136
  8809. \end_layout
  8810. \end_inset
  8811. </cell>
  8812. </row>
  8813. <row>
  8814. <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8815. \begin_inset Text
  8816. \begin_layout Plain Layout
  8817. \end_layout
  8818. \end_inset
  8819. </cell>
  8820. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8821. \begin_inset Text
  8822. \begin_layout Plain Layout
  8823. \series bold
  8824. Down
  8825. \end_layout
  8826. \end_inset
  8827. </cell>
  8828. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8829. \begin_inset Text
  8830. \begin_layout Plain Layout
  8831. \family roman
  8832. \series medium
  8833. \shape up
  8834. \size normal
  8835. \emph off
  8836. \bar no
  8837. \strikeout off
  8838. \xout off
  8839. \uuline off
  8840. \uwave off
  8841. \noun off
  8842. \color none
  8843. 0
  8844. \end_layout
  8845. \end_inset
  8846. </cell>
  8847. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
  8848. \begin_inset Text
  8849. \begin_layout Plain Layout
  8850. \family roman
  8851. \series medium
  8852. \shape up
  8853. \size normal
  8854. \emph off
  8855. \bar no
  8856. \strikeout off
  8857. \xout off
  8858. \uuline off
  8859. \uwave off
  8860. \noun off
  8861. \color none
  8862. 548
  8863. \end_layout
  8864. \end_inset
  8865. </cell>
  8866. <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
  8867. \begin_inset Text
  8868. \begin_layout Plain Layout
  8869. \family roman
  8870. \series medium
  8871. \shape up
  8872. \size normal
  8873. \emph off
  8874. \bar no
  8875. \strikeout off
  8876. \xout off
  8877. \uuline off
  8878. \uwave off
  8879. \noun off
  8880. \color none
  8881. 127
  8882. \end_layout
  8883. \end_inset
  8884. </cell>
  8885. </row>
  8886. </lyxtabular>
  8887. \end_inset
  8888. \end_layout
  8889. \begin_layout Plain Layout
  8890. \begin_inset Caption Standard
  8891. \begin_layout Plain Layout
  8892. \series bold
  8893. \begin_inset Argument 1
  8894. status open
  8895. \begin_layout Plain Layout
  8896. Comparison of significantly differentially expressed genes with and without
  8897. globin blocking.
  8898. \end_layout
  8899. \end_inset
  8900. \begin_inset CommandInset label
  8901. LatexCommand label
  8902. name "tab:Comparison-of-significant"
  8903. \end_inset
  8904. Comparison of significantly differentially expressed genes with and without
  8905. globin blocking.
  8906. \series default
  8907. Up, Down: Genes significantly up/down-regulated in post-transplant samples
  8908. relative to pre-transplant samples, with a false discovery rate of 10%
  8909. or less.
  8910. NS: Non-significant genes (false discovery rate greater than 10%).
  8911. \end_layout
  8912. \end_inset
  8913. \end_layout
  8914. \begin_layout Plain Layout
  8915. \end_layout
  8916. \end_inset
  8917. \end_layout
  8918. \begin_layout Standard
  8919. To compare performance on differential gene expression tests, we took subsets
  8920. of both the GB and non-GB libraries with exactly one pre-transplant and
  8921. one post-transplant sample for each animal that had paired samples available
  8922. for analysis (N=7 animals, N=14 samples in each subset).
  8923. The same test for pre- vs.
  8924. post-transplant differential gene expression was performed on the same
  8925. 7 pairs of samples from GB libraries and non-GB libraries, in each case
  8926. using an FDR of 10% as the threshold of significance.
  8927. Out of 12954 genes that passed the detection threshold in both subsets,
  8928. 358 were called significantly differentially expressed in the same direction
  8929. in both sets; 1063 were differentially expressed in the GB set only; 296
  8930. were differentially expressed in the non-GB set only; 2 genes were called
  8931. significantly up in the GB set but significantly down in the non-GB set;
  8932. and the remaining 11235 were not called differentially expressed in either
  8933. set.
  8934. These data are summarized in Table
  8935. \begin_inset CommandInset ref
  8936. LatexCommand ref
  8937. reference "tab:Comparison-of-significant"
  8938. plural "false"
  8939. caps "false"
  8940. noprefix "false"
  8941. \end_inset
  8942. .
  8943. The differences in BCV calculated by EdgeR for these subsets of samples
  8944. were negligible (BCV = 0.302 for GB and 0.297 for non-GB).
  8945. \end_layout
  8946. \begin_layout Standard
  8947. The key point is that the GB data results in substantially more differentially
  8948. expressed calls than the non-GB data.
  8949. Since there is no gold standard for this dataset, it is impossible to be
  8950. certain whether this is due to under-calling of differential expression
  8951. in the non-GB samples or over-calling in the GB samples.
  8952. However, given that both datasets are derived from the same biological
  8953. samples and have nearly equal BCVs, it is more likely that the larger number
  8954. of DE calls in the GB samples are genuine detections that were enabled
  8955. by the higher sequencing depth and measurement precision of the GB samples.
  8956. Note that the same set of genes was considered in both subsets, so the
  8957. larger number of differentially expressed gene calls in the GB data set
  8958. reflects a greater sensitivity to detect significant differential gene
  8959. expression and not simply the larger total number of detected genes in
  8960. GB samples described earlier.
  8961. \end_layout
  8962. \begin_layout Section
  8963. Discussion
  8964. \end_layout
  8965. \begin_layout Standard
  8966. The original experience with whole blood gene expression profiling on DNA
  8967. microarrays demonstrated that the high concentration of globin transcripts
  8968. reduced the sensitivity to detect genes with relatively low expression
  8969. levels, in effect, significantly reducing the sensitivity.
  8970. To address this limitation, commercial protocols for globin reduction were
  8971. developed based on strategies to block globin transcript amplification
  8972. during labeling or physically removing globin transcripts by affinity bead
  8973. methods
  8974. \begin_inset CommandInset citation
  8975. LatexCommand cite
  8976. key "Winn2010"
  8977. literal "false"
  8978. \end_inset
  8979. .
  8980. More recently, using the latest generation of labeling protocols and arrays,
  8981. it was determined that globin reduction was no longer necessary to obtain
  8982. sufficient sensitivity to detect differential transcript expression
  8983. \begin_inset CommandInset citation
  8984. LatexCommand cite
  8985. key "NuGEN2010"
  8986. literal "false"
  8987. \end_inset
  8988. .
  8989. However, we are not aware of any publications using these currently available
  8990. protocols the with latest generation of microarrays that actually compare
  8991. the detection sensitivity with and without globin reduction.
  8992. However, in practice this has now been adopted generally primarily driven
  8993. by concerns for cost control.
  8994. The main objective of our work was to directly test the impact of globin
  8995. gene transcripts and a new globin blocking protocol for application to
  8996. the newest generation of differential gene expression profiling determined
  8997. using next generation sequencing.
  8998. \end_layout
  8999. \begin_layout Standard
  9000. The challenge of doing global gene expression profiling in cynomolgus monkeys
  9001. is that the current available arrays were never designed to comprehensively
  9002. cover this genome and have not been updated since the first assemblies
  9003. of the cynomolgus genome were published.
  9004. Therefore, we determined that the best strategy for peripheral blood profiling
  9005. was to do deep RNA-seq and inform the workflow using the latest available
  9006. genome assembly and annotation
  9007. \begin_inset CommandInset citation
  9008. LatexCommand cite
  9009. key "Wilson2013"
  9010. literal "false"
  9011. \end_inset
  9012. .
  9013. However, it was not immediately clear whether globin reduction was necessary
  9014. for RNA-seq or how much improvement in efficiency or sensitivity to detect
  9015. differential gene expression would be achieved for the added cost and work.
  9016. \end_layout
  9017. \begin_layout Standard
  9018. We only found one report that demonstrated that globin reduction significantly
  9019. improved the effective read yields for sequencing of human peripheral blood
  9020. cell RNA using a DeepSAGE protocol
  9021. \begin_inset CommandInset citation
  9022. LatexCommand cite
  9023. key "Mastrokolias2012"
  9024. literal "false"
  9025. \end_inset
  9026. .
  9027. The approach to DeepSAGE involves two different restriction enzymes that
  9028. purify and then tag small fragments of transcripts at specific locations
  9029. and thus, significantly reduces the complexity of the transcriptome.
  9030. Therefore, we could not determine how DeepSAGE results would translate
  9031. to the common strategy in the field for assaying the entire transcript
  9032. population by whole-transcriptome 3’-end RNA-seq.
  9033. Furthermore, if globin reduction is necessary, we also needed a globin
  9034. reduction method specific to cynomolgus globin sequences that would work
  9035. an organism for which no kit is available off the shelf.
  9036. \end_layout
  9037. \begin_layout Standard
  9038. As mentioned above, the addition of globin blocking oligos has a very small
  9039. impact on measured expression levels of gene expression.
  9040. However, this is a non-issue for the purposes of differential expression
  9041. testing, since a systematic change in a gene in all samples does not affect
  9042. relative expression levels between samples.
  9043. However, we must acknowledge that simple comparisons of gene expression
  9044. data obtained by GB and non-GB protocols are not possible without additional
  9045. normalization.
  9046. \end_layout
  9047. \begin_layout Standard
  9048. More importantly, globin blocking not only nearly doubles the yield of usable
  9049. reads, it also increases inter-sample correlation and sensitivity to detect
  9050. differential gene expression relative to the same set of samples profiled
  9051. without blocking.
  9052. In addition, globin blocking does not add a significant amount of random
  9053. noise to the data.
  9054. Globin blocking thus represents a cost-effective way to squeeze more data
  9055. and statistical power out of the same blood samples and the same amount
  9056. of sequencing.
  9057. In conclusion, globin reduction greatly increases the yield of useful RNA-seq
  9058. reads mapping to the rest of the genome, with minimal perturbations in
  9059. the relative levels of non-globin genes.
  9060. Based on these results, globin transcript reduction using sequence-specific,
  9061. complementary blocking oligonucleotides is recommended for all deep RNA-seq
  9062. of cynomolgus and other nonhuman primate blood samples.
  9063. \end_layout
  9064. \begin_layout Chapter
  9065. Future Directions
  9066. \end_layout
  9067. \begin_layout Standard
  9068. \begin_inset Flex TODO Note (inline)
  9069. status open
  9070. \begin_layout Plain Layout
  9071. Consider per-chapter future directions.
  9072. Check instructions.
  9073. \end_layout
  9074. \end_inset
  9075. \end_layout
  9076. \begin_layout Section*
  9077. Ch2
  9078. \end_layout
  9079. \begin_layout Itemize
  9080. Functional validation of effective promoter radius
  9081. \end_layout
  9082. \begin_layout Itemize
  9083. Current definition of promoter radius is dependent on peak calling.
  9084. Would be nice to have a better way of defining promoter radius independent
  9085. of peak calling.
  9086. Possibly based on the promoter coverage profiles
  9087. \end_layout
  9088. \begin_layout Itemize
  9089. N-to-M convergence deserves further study of some kind
  9090. \end_layout
  9091. \begin_layout Itemize
  9092. Promoter positional coverage: follow up on hints of interesting patterns
  9093. \end_layout
  9094. \begin_layout Itemize
  9095. Study other epigenetic marks in more contexts
  9096. \end_layout
  9097. \begin_deeper
  9098. \begin_layout Itemize
  9099. DNA methylation, histone marks, chromatin accessibility & conformation in
  9100. CD4 T-cells
  9101. \end_layout
  9102. \begin_layout Itemize
  9103. Also look at other types of lymphocytes: CD8 T-cells, B-cells, NK cells
  9104. \end_layout
  9105. \end_deeper
  9106. \begin_layout Section*
  9107. Ch3
  9108. \end_layout
  9109. \begin_layout Itemize
  9110. Use CV or bootstrap to better evaluate classifiers
  9111. \end_layout
  9112. \begin_layout Itemize
  9113. fRMAtools could be adapted to not require equal-sized groups
  9114. \end_layout
  9115. \begin_layout Section*
  9116. Ch4
  9117. \end_layout
  9118. \begin_layout Itemize
  9119. Look in discussion, I think there's some stuff there already
  9120. \end_layout
  9121. \begin_layout Standard
  9122. \begin_inset ERT
  9123. status open
  9124. \begin_layout Plain Layout
  9125. % Call it "References" instead of "Bibliography"
  9126. \end_layout
  9127. \begin_layout Plain Layout
  9128. \backslash
  9129. renewcommand{
  9130. \backslash
  9131. bibname}{References}
  9132. \end_layout
  9133. \end_inset
  9134. \end_layout
  9135. \begin_layout Standard
  9136. \begin_inset Flex TODO Note (inline)
  9137. status open
  9138. \begin_layout Plain Layout
  9139. Check bib entry formatting & sort order
  9140. \end_layout
  9141. \end_inset
  9142. \end_layout
  9143. \begin_layout Standard
  9144. \begin_inset Flex TODO Note (inline)
  9145. status open
  9146. \begin_layout Plain Layout
  9147. Check in-text citation format.
  9148. Probably don't just want [1], [2], etc.
  9149. \end_layout
  9150. \end_inset
  9151. \end_layout
  9152. \begin_layout Standard
  9153. \begin_inset CommandInset bibtex
  9154. LatexCommand bibtex
  9155. btprint "btPrintCited"
  9156. bibfiles "code-refs,refs-PROCESSED"
  9157. options "bibtotoc,unsrt"
  9158. \end_inset
  9159. \end_layout
  9160. \end_body
  9161. \end_document