1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499 |
- #LyX 2.3 created this file. For more info see http://www.lyx.org/
- \lyxformat 544
- \begin_document
- \begin_header
- \save_transient_properties true
- \origin unavailable
- \textclass extbook
- \begin_preamble
- % List all used files in log output
- \listfiles
- % Add a DRAFT watermark
- \usepackage{draftwatermark}
- \SetWatermarkLightness{0.97}
- \SetWatermarkScale{1}
- % Set up required header format
- \usepackage{fancyhdr}
- \pagestyle{fancy}
- \renewcommand{\headrulewidth}{0pt}
- \rhead{}
- \lhead{}
- \rfoot{}
- \lfoot{}
- \cfoot{\thepage} % Page number bottom center
- % https://tex.stackexchange.com/questions/65680/automatically-bold-first-sentence-of-a-floats-caption
- \usepackage{xstring}
- \usepackage{etoolbox}
- \usepackage{caption}
- \captionsetup{labelfont=bf,tableposition=top}
- \makeatletter
- \newcommand\formatlabel[1]{%
- \noexpandarg
- \IfSubStr{#1}{.}{%
- \StrBefore{#1}{.}[\firstcaption]%
- \StrBehind{#1}{.}[\secondcaption]%
- \textbf{\firstcaption.} \secondcaption}{%
- #1}%
- }
- \patchcmd{\@caption}{#3}{\formatlabel{#3}}
- \makeatother
- \end_preamble
- \use_default_options true
- \begin_modules
- todonotes
- \end_modules
- \maintain_unincluded_children false
- \language english
- \language_package default
- \inputencoding utf8
- \fontencoding default
- \font_roman "default" "default"
- \font_sans "default" "default"
- \font_typewriter "default" "default"
- \font_math "auto" "auto"
- \font_default_family default
- \use_non_tex_fonts false
- \font_sc false
- \font_osf false
- \font_sf_scale 100 100
- \font_tt_scale 100 100
- \use_microtype false
- \use_dash_ligatures true
- \graphics default
- \default_output_format pdf4
- \output_sync 0
- \bibtex_command default
- \index_command default
- \paperfontsize 12
- \spacing double
- \use_hyperref true
- \pdf_bookmarks true
- \pdf_bookmarksnumbered false
- \pdf_bookmarksopen false
- \pdf_bookmarksopenlevel 1
- \pdf_breaklinks false
- \pdf_pdfborder false
- \pdf_colorlinks false
- \pdf_backref false
- \pdf_pdfusetitle true
- \papersize letterpaper
- \use_geometry true
- \use_package amsmath 1
- \use_package amssymb 1
- \use_package cancel 1
- \use_package esint 1
- \use_package mathdots 1
- \use_package mathtools 1
- \use_package mhchem 1
- \use_package stackrel 1
- \use_package stmaryrd 1
- \use_package undertilde 1
- \cite_engine basic
- \cite_engine_type default
- \biblio_style plain
- \use_bibtopic false
- \use_indices false
- \paperorientation portrait
- \suppress_date false
- \justification true
- \use_refstyle 1
- \use_minted 0
- \index Index
- \shortcut idx
- \color #008000
- \end_index
- \leftmargin 1.5in
- \topmargin 1in
- \rightmargin 1in
- \bottommargin 1in
- \secnumdepth 3
- \tocdepth 3
- \paragraph_separation indent
- \paragraph_indentation default
- \is_math_indent 0
- \math_numbering_side default
- \quotes_style english
- \dynamic_quotes 0
- \papercolumns 1
- \papersides 2
- \paperpagestyle default
- \tracking_changes false
- \output_changes false
- \html_math_output 0
- \html_css_as_file 0
- \html_be_strict false
- \end_header
- \begin_body
- \begin_layout Title
- Bioinformatic analysis of complex, high-throughput genomic and epigenomic
- data in the context of immunology and transplant rejection
- \end_layout
- \begin_layout Author
- A thesis presented
- \begin_inset Newline newline
- \end_inset
- by
- \begin_inset Newline newline
- \end_inset
- Ryan C.
- Thompson
- \begin_inset Newline newline
- \end_inset
- to
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute Graduate Program
- \begin_inset Newline newline
- \end_inset
- in partial fulfillment of the requirements for the degree of
- \begin_inset Newline newline
- \end_inset
- Doctor of Philosophy in the subject of Biology
- \begin_inset Newline newline
- \end_inset
- for
- \begin_inset Newline newline
- \end_inset
- The Scripps Research Institute
- \begin_inset Newline newline
- \end_inset
- La Jolla, California
- \end_layout
- \begin_layout Date
- May 2019
- \end_layout
- \begin_layout Standard
- [Copyright notice]
- \end_layout
- \begin_layout Standard
- [Thesis acceptance form]
- \end_layout
- \begin_layout Standard
- [Dedication]
- \end_layout
- \begin_layout Standard
- [Acknowledgements]
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset toc
- LatexCommand tableofcontents
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList table
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset FloatList figure
- \end_inset
- \end_layout
- \begin_layout Standard
- [List of Abbreviations]
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look into auto-generated nomenclature list: https://wiki.lyx.org/Tips/Nomenclature
- \end_layout
- \end_inset
- \end_layout
- \begin_layout List of TODOs
- \end_layout
- \begin_layout Standard
- [Abstract]
- \end_layout
- \begin_layout Chapter*
- Abstract
- \end_layout
- \begin_layout Chapter
- Introduction
- \end_layout
- \begin_layout Section
- Background & Significance
- \end_layout
- \begin_layout Subsection
- Biological motivation
- \end_layout
- \begin_layout Itemize
- Rejection is the major long-term threat to organ and tissue grafts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Common mechanisms of rejection
- \end_layout
- \begin_layout Itemize
- Effective immune suppression requires monitoring for rejection and tuning
-
- \end_layout
- \begin_layout Itemize
- Current tests for rejection (tissue biopsy) are invasive and biased
- \end_layout
- \begin_layout Itemize
- A blood test based on microarrays would be less biased and invasive
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Memory cells are resistant to immune suppression
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Mechanisms of resistance in memory cells are poorly understood
- \end_layout
- \begin_layout Itemize
- A better understanding of immune memory formation is needed
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Mesenchymal stem cell infusion is a promising new treatment to prevent/delay
- rejection
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Demonstrated in mice, but not yet in primates
- \end_layout
- \begin_layout Itemize
- Mechanism currently unknown, but MSC are known to be immune modulatory
- \end_layout
- \end_deeper
- \begin_layout Subsection
- Overview of bioinformatic analysis methods
- \end_layout
- \begin_layout Standard
- An overview of all the methods used, including what problem they solve,
- what assumptions they make, and a basic description of how they work.
- \end_layout
- \begin_layout Itemize
- ChIP-seq Peak calling
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Cross-correlation analysis to determine fragment size
- \end_layout
- \begin_layout Itemize
- Broad vs narrow peaks
- \end_layout
- \begin_layout Itemize
- SICER for broad peaks
- \end_layout
- \begin_layout Itemize
- IDR for biologically reproducible peaks
- \end_layout
- \begin_layout Itemize
- csaw peak filtering guidelines for unbiased downstream analysis
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Normalization is non-trivial and application-dependant
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Expression arrays: RMA & fRMA; why fRMA is needed
- \end_layout
- \begin_layout Itemize
- Methylation arrays: M-value transformation approximates normal data but
- induces heteroskedasticity
- \end_layout
- \begin_layout Itemize
- RNA-seq: normalize based on assumption that the average gene is not changing
- \end_layout
- \begin_layout Itemize
- ChIP-seq: complex with many considerations, dependent on experimental methods,
- biological system, and analysis goals
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Limma: The standard linear modeling framework for genomics
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- empirical Bayes variance modeling: limma's core feature
- \end_layout
- \begin_layout Itemize
- edgeR & DESeq2: Extend with negative bonomial GLM for RNA-seq and other
- count data
- \end_layout
- \begin_layout Itemize
- voom: Extend with precision weights to model mean-variance trend
- \end_layout
- \begin_layout Itemize
- arrayWeights and duplicateCorrelation to handle complex variance structures
- \end_layout
- \end_deeper
- \begin_layout Itemize
- sva and ComBat for batch correction
- \end_layout
- \begin_layout Itemize
- Factor analysis: PCA, MDS, MOFA
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Batch-corrected PCA is informative, but careful application is required
- to avoid bias
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Gene set analysis: camera and SPIA
- \end_layout
- \begin_layout Section
- Innovation
- \end_layout
- \begin_layout Itemize
- MSC infusion to improve transplant outcomes (prevent/delay rejection)
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Characterize MSC response to interferon gamma
- \end_layout
- \begin_layout Itemize
- IFN-g is thought to stimulate their function
- \end_layout
- \begin_layout Itemize
- Test IFN-g treated MSC infusion as a therapy to delay graft rejection in
- cynomolgus monkeys
- \end_layout
- \begin_layout Itemize
- Monitor animals post-transplant using blood RNA-seq at serial time points
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Investigate dynamics of histone marks in CD4 T-cell activation and memory
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Previous studies have looked at single snapshots of histone marks
- \end_layout
- \begin_layout Itemize
- Instead, look at changes in histone marks across activation and memory
- \end_layout
- \end_deeper
- \begin_layout Itemize
- High-throughput sequencing and microarray technologies
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Powerful methods for assaying gene expression and epigenetics across entire
- genomes
- \end_layout
- \begin_layout Itemize
- Proper analysis requires finding and exploiting systematic genome-wide trends
- \end_layout
- \end_deeper
- \begin_layout Chapter
- Reproducible genome-wide epigenetic analysis of H3K4 and H3K27 methylation
- in naive and memory CD4 T-cell activation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Author list: Me, Sarah, Dan
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Itemize
- CD4 T-cells are central to all adaptive immune responses and memory
- \end_layout
- \begin_layout Itemize
- H3K4 and H3K27 methylation are major epigenetic regulators of gene expression
- \end_layout
- \begin_layout Itemize
- Canonically, H3K4 is activating and H3K27 is inhibitory, but the reality
- is complex
- \end_layout
- \begin_layout Itemize
- Looking at these marks during CD4 activation and memory should reveal new
- mechanistic details
- \end_layout
- \begin_layout Itemize
- Test
- \begin_inset Quotes eld
- \end_inset
- poised promoter
- \begin_inset Quotes erd
- \end_inset
- hypothesis in which H3K4 and H3K27 are both methylated
- \end_layout
- \begin_layout Itemize
- Expand scope of analysis beyond simple promoter counts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Analyze peaks genome-wide, including in intergenic regions
- \end_layout
- \begin_layout Itemize
- Analysis of coverage distribution shape within promoters, e.g.
- upstream vs downstream coverage
- \end_layout
- \end_deeper
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways true
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/CD4-csaw/rulegraphs/rulegraph-all.pdf
- width 100theight%
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:rulegraph"
- \end_inset
- \series bold
- Dependency graph of steps in reproducible workflow
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- A reproducible workflow
- \begin_inset CommandInset citation
- LatexCommand cite
- key "gh-cd4-csaw"
- literal "false"
- \end_inset
- was written to analyze the raw ChIP-seq and RNA-seq data from previous
- studies
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016,LaMere2017"
- literal "true"
- \end_inset
- .
- Briefly, this data consists of RNA-seq and ChIP-seq from CD4 T-cells cultured
- from 4 donors.
- From each donor, naive and memory CD4 T-cells were isolated separately.
- Then cultures of both cells were activated [how?], and samples were taken
- at 4 time points: Day 0 (pre-activation), Day 1 (early activation), Day
- 5 (peak activation), and Day 14 (post-activation).
- For each combination of cell type and time point, RNA was isolated, and
- ChIP-seq was performed for each of 3 histone marks: H3K4me2, H3K4me3, and
- H3K27me3.
- The ChIP-seq input was also sequenced for each sample.
- The result was 32 samples for each assay.
- \end_layout
- \begin_layout Standard
- Sequence reads were retrieved from the Sequence Read Archive (SRA)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leinonen2011"
- literal "false"
- \end_inset
- .
- ChIP-seq (and input) reads were aligned to CRCh38 genome assembly using
- Bowtie 2
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Langmead2012,Schneider2017,gh-hg38-ref"
- literal "false"
- \end_inset
- .
- Artifact regions were annotated using a custom implementation of the GreyListCh
- IP algorithm, and these
- \begin_inset Quotes eld
- \end_inset
- greylists
- \begin_inset Quotes erd
- \end_inset
- were merged with the ENCODE blacklist
- \begin_inset CommandInset citation
- LatexCommand cite
- key "greylistchip,Amemiya2019,Dunham2012"
- literal "false"
- \end_inset
- .
- Any read or peak overlapping one of these regions was regarded as artifactual
- and excluded from downstream analyses.
-
- \end_layout
- \begin_layout Standard
- Peaks are called using epic, an implementation of the SICER algorithm
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zang2009,gh-epic"
- literal "false"
- \end_inset
- .
- Peaks are also called separately using MACS, but MACS was determined to
- be a poor fit for the data, and these peak calls are not used further
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Zhang2008"
- literal "false"
- \end_inset
- .
-
- \end_layout
- \begin_layout Itemize
- Re-analyze previously published CD4 ChIP-seq & RNA-seq data
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Completely reimplement analysis from scratch as a reproducible workflow
- \end_layout
- \begin_layout Itemize
- Use newly published methods & algorithms not available during the original
- analysis: SICER, csaw, MOFA, ComBat, sva, GREAT, and more
- \end_layout
- \end_deeper
- \begin_layout Itemize
- SICER, IDR, csaw, & GREAT to call ChIP-seq peaks genome-wide, perform differenti
- al abundance analysis, and relate those peaks to gene expression
- \end_layout
- \begin_layout Itemize
- Promoter counts in sliding windows around each gene's highest-expressed
- TSS to investigate coverage distribution within promoters
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Focus on what hypotheses were tested, then select figures that show how
- those hypotheses were tested, even if the result is a negative.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 methylation occur in broad regions and are enriched near
- promoters
- \end_layout
- \begin_layout Itemize
- Figures comparing MACS (non-broad peak caller) to SICER/epic (broad peak
- caller)
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Compare peak sizes and number of called peaks
- \end_layout
- \begin_layout Itemize
- Show representative IDR consistency plots for both
- \end_layout
- \end_deeper
- \begin_layout Itemize
- IDR analysis shows that SICER-called peaks are much more reproducible between
- biological replicates
- \end_layout
- \begin_layout Itemize
- Each histone mark is enriched within a certain radius of gene TSS positions,
- but that radius is different for each mark (figure)
- \end_layout
- \begin_layout Subsection
- RNA-seq has a large confounding batch effect
- \end_layout
- \begin_layout Itemize
- RNA-seq batch effect can be partially corrected, but still induces uncorrectable
- biases in downstream analysis
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Figure showing MDS plot before & after ComBat
- \end_layout
- \begin_layout Itemize
- Figure relating sample weights to batches, cell types, time points, etc.,
- showing that one batch is significantly worse quality
- \end_layout
- \begin_layout Itemize
- Figures showing p-value histograms for within-batch and cross-batch contrasts,
- showing that cross-batch contrasts have attenuated signal, as do comparisons
- within the bad batch
- \end_layout
- \end_deeper
- \begin_layout Subsection
- ChIP-seq must be corrected for hidden confounding factors
- \end_layout
- \begin_layout Itemize
- Figures showing pre- and post-SVA MDS plots for each histone mark
- \end_layout
- \begin_layout Itemize
- Figures showing BCV plots with and without SVA for each histone mark
- \end_layout
- \begin_layout Subsection
- H3K4 and H3K27 promoter methylation has broadly the expected correlation
- with gene expression
- \end_layout
- \begin_layout Itemize
- H3K4 is correlated with higher expression, and H3K27 is correlated with
- lower expression genome-wide
- \end_layout
- \begin_layout Itemize
- Figures showing these correlations: box/violin plots of expression distributions
- with every combination of peak presence/absence in promoter
- \end_layout
- \begin_layout Itemize
- Appropriate statistical tests showing significant differences in expected
- directions
- \end_layout
- \begin_layout Subsection
- MOFA recovers biologically relevant variation from blind analysis by correlating
- across datasets
- \end_layout
- \begin_layout Itemize
- MOFA
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Argelaguet2018"
- literal "false"
- \end_inset
- successfully separates biologically relevant patterns of variation from
- technical confounding factors without knowing the sample labels, by finding
- latent factors that explain variation across multiple data sets.
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Figure: show percent-variance-explained plot from MOFA and PCA-like plots
- for the relevant latent factors
- \end_layout
- \begin_layout Itemize
- MOFA analysis also shows that batch effect correction can't get much better
- than it already is (Figure comparing blind MOFA batch correction to ComBat
- correction)
- \end_layout
- \end_deeper
- \begin_layout Subsection
- Naive-to-memory convergence observed in H3K4 and RNA-seq data, not in H3K27me3
- \end_layout
- \begin_layout Itemize
- H3K4 and RNA-seq data show clear evidence of naive convergence with memory
- between days 1 and 5 (MDS plot figure, also compare with last figure from
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "LaMere2016"
- literal "false"
- \end_inset
- )
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Note that Sarah has granted permission to use her figures
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Table of numbers of genes different between N & M at each time point, showing
- dwindling differences at later time points, consistent with convergence
- \end_layout
- \begin_layout Itemize
- Similar figure for H3K27me3 showing lack of convergence
- \end_layout
- \begin_layout Subsection
- Effect of promoter coverage upstream vs downstream of TSS
- \end_layout
- \begin_layout Itemize
- H3K4me peaks seem to correlate with increased expression as long as they
- are anywhere near the TSS
- \end_layout
- \begin_layout Itemize
- H3K27me3 peaks can have different correlations to gene expression depending
- on their position relative to TSS (e.g.
- upstream vs downstream) Results consistent with
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Young2011"
- literal "false"
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Itemize
- "Promoter radius" is not constant and must be defined empirically for a
- given data set
- \end_layout
- \begin_layout Itemize
- MOFA shows great promise for accelerating discovery of major biological
- effects in multi-omics datasets
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- MOFA was added to this analysis late and played primarily a confirmatory
- role, but it was able to confirm earlier conclusions with much less prior
- information (no sample labels) and much less analyst effort
- \end_layout
- \begin_layout Itemize
- MOFA confirmed that the already-implemented batch correction in the RNA-seq
- data was already performing as well as possible given the limitations of
- the data
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Naive-to-memory convergence implies that naive cells are differentiating
- into memory cells, and that gene expression and H3K4 methylation are involved
- in this differentiation while H3K27me3 is less involved
- \end_layout
- \begin_layout Itemize
- H3K27me3, canonically regarded as a deactivating mark, seems to have a more
- complex
- \end_layout
- \begin_layout Itemize
- Discuss advantages of developing using a reproducible workflow
- \end_layout
- \begin_layout Chapter
- Improving array-based analyses of transplant rejection by optimizing data
- preprocessing
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Author list: Me, Sunil, Tom, Padma, Dan
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Subsection
- Proper pre-processing is essential for array data
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- This section could probably use some citations
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Microarrays, bead ararys, and similar assays produce raw data in the form
- of fluorescence intensity measurements, with the each intensity measurement
- proportional to the abundance of some fluorescently-labelled target DNA
- or RNA sequence that base pairs to a specific probe sequence.
- However, these measurements for each probe are also affected my many technical
- confounding factors, such as the concentration of target material, strength
- of off-target binding, and the sensitivity of the imaging sensor.
- Some array designs also use multiple probe sequences for each target.
- Hence, extensive pre-processing of array data is necessary to normalize
- out the effects of these technical factors and summarize the information
- from multiple probes to arrive at a single usable estimate of abundance
- or other relevant quantity, such as a ratio of two abundances, for each
- target.
- \end_layout
- \begin_layout Standard
- The choice of pre-processing algorithms used in the analysis of an array
- data set can have a large effect on the results of that analysis.
- However, despite their importance, these steps are often neglected or rushed
- in order to get to the more scientifically interesting analysis steps involving
- the actual biology of the system under study.
- Hence, it is often possible to achieve substantial gains in statistical
- power, model goodness-of-fit, or other relevant performance measures, by
- checking the assumptions made by each preprocessing step and choosing specific
- normalization methods tailored to the specific goals of the current analysis.
- \end_layout
- \begin_layout Subsection
- Normalization for clinical microarray classifiers must be single-channel
- \end_layout
- \begin_layout Subsubsection
- Standard normalization methods are unsuitable for clinical application
- \end_layout
- \begin_layout Standard
- As the cost of performing microarray assays falls, there is increasing interest
- in using genomic assays for diagnostic purposes, such as distinguishing
- healthy transplants (TX) from transplants undergoing acute rejection (AR)
- or acute dysfunction with no rejection (ADNR).
- However, the the standard normalization algorithm used for microarray data,
- Robust Multi-chip Average (RMA)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Irizarry2003a"
- literal "false"
- \end_inset
- , is not applicable in a clinical setting.
- Two of the steps in RMA, quantile normalization and probe summarization
- by median polish, depend on every array in the data set being normalized.
- This means that adding or removing any arrays from a data set changes the
- normalized values for all arrays, and data sets that have been normalized
- separately cannot be compared to each other.
- Hence, when using RMA, any arrays to be analyzed together must also be
- normalized together, and the set of arrays included in the data set must
- be held constant throughout an analysis.
- \end_layout
- \begin_layout Standard
- These limitations present serious impediments to the use of arrays as a
- diagnostic tool.
- When training a classifier, the samples to be classified must not be involved
- in any step of the training process, lest their inclusion bias the training
- process.
- Once a classifier is deployed in a clinical setting, the samples to be
- classified will not even
- \emph on
- exist
- \emph default
- at the time of training, so including them would be impossible even if
- it were statistically justifiable.
- Therefore, any machine learning application for microarrays demands that
- the normalized expression values computed for an array must depend only
- on information contained within that array.
- This would ensure that each array's normalization is independent of every
- other array, and that arrays normalized separately can still be compared
- to each other without bias.
- Such a normalization is commonly referred to as
- \begin_inset Quotes eld
- \end_inset
- single-channel normalization
- \begin_inset Quotes erd
- \end_inset
- .
- \end_layout
- \begin_layout Subsubsection
- Several strategies are available to meet clinical normalization requirements
- \end_layout
- \begin_layout Standard
- Frozen RMA (fRMA) addresses these concerns by replacing the quantile normalizati
- on and median polish with alternatives that do not introduce inter-array
- dependence, allowing each array to be normalized independently of all others
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Quantile normalization is performed against a pre-generated set of quantiles
- learned from a collection of 850 publically available arrays sampled from
- a wide variety of tissues in the Gene Expression Omnibus (GEO).
- Each array's probe intensity distribution is normalized against these pre-gener
- ated quantiles.
- The median polish step is replaced with a robust weighted average of probe
- intensities, using inverse variance weights learned from the same public
- GEO data.
- The result is a normalization that satisfies the requirements mentioned
- above: each array is normalized independently of all others, and any two
- normalized arrays can be compared directly to each other.
- \end_layout
- \begin_layout Standard
- One important limitation of fRMA is that it requires a separate reference
- data set from which to learn the parameters (reference quantiles and probe
- weights) that will be used to normalize each array.
- These parameters are specific to a given array platform, and pre-generated
- parameters are only provided for the most common platforms, such as Affymetrix
- hgu133plus2.
- For a less common platform, such as hthgu133pluspm, is is necessary to
- learn custom parameters from in-house data before fRMA can be used to normalize
- samples on that platform
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- One other option is the aptly-named Single Channel Array Normalization (SCAN),
- which adapts a normalization method originally designed for tiling arrays
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Piccolo2012"
- literal "false"
- \end_inset
- .
- SCAN is truly single-channel in that it does not require a set of normalization
- paramters estimated from an external set of reference samples like fRMA
- does.
- \end_layout
- \begin_layout Subsection
- Heteroskedasticity must be accounted for in methylation array data
- \end_layout
- \begin_layout Subsubsection
- Methylation array preprocessing induces heteroskedasticity
- \end_layout
- \begin_layout Standard
- DNA methylation arrays are a relatively new kind of assay that uses microarrays
- to measure the degree of methylation on cytosines in specific regions arrayed
- across the genome.
- First, bisulfite treatment converts all unmethylated cytosines to uracil
- (which then become thymine after amplication) while leaving methylated
- cytosines unaffected.
- Then, each target region is interrogated with two probes: one binds to
- the original genomic sequence and interrogates the level of methylated
- DNA, and the other binds to the same sequence with all cytosines replaced
- by thymidines and interrogates the level of unmethylated DNA.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/methylvoom/sigmoid.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Sigmoid-beta-m-mapping"
- \end_inset
- \series bold
- Sigmoid shape of the mapping between β and M values
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- After normalization, these two probe intensities are summarized in one of
- two ways, each with advantages and disadvantages.
- β
- \series bold
-
- \series default
- values, interpreted as fraction of DNA copies methylated, range from 0 to
- 1.
- β
- \series bold
-
- \series default
- values are conceptually easy to interpret, but the constrained range makes
- them unsuitable for linear modeling, and their error distributions are
- highly non-normal, which also frustrates linear modeling.
- M-values, interpreted as the log ratio of methylated to unmethylated copies,
- are computed by mapping the beta values from
- \begin_inset Formula $[0,1]$
- \end_inset
- onto
- \begin_inset Formula $(-\infty,+\infty)$
- \end_inset
- using a sigmoid curve (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Sigmoid-beta-m-mapping"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This transformation results in values with better statistical perperties:
- the unconstrained range is suitable for linear modeling, and the error
- distributions are more normal.
- Hence, most linear modeling and other statistical testing on methylation
- arrays is performed using M-values.
- \end_layout
- \begin_layout Standard
- However, the steep slope of the sigmoid transformation near 0 and 1 tends
- to over-exaggerate small differences in β values near those extremes, which
- in turn amplifies the error in those values, leading to a U-shaped trend
- in the mean-variance curve: extreme values have higher variances than values
- near the middle.
- This mean-variance dependency must be accounted for when fitting the linear
- model for differential methylation, or else the variance will be systematically
- overestimated for probes with moderate M-values and underestimated for
- probes with extreme M-values.
- \end_layout
- \begin_layout Subsubsection
- The voom method for RNA-seq data can model M-value heteroskedasticity
- \end_layout
- \begin_layout Standard
- RNA-seq read count data are also known to show heteroskedasticity, and the
- voom method was developed for modeling this heteroskedasticity by estimating
- the mean-variance trend in the data and using this trend to assign precision
- weights to each observation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2013"
- literal "false"
- \end_inset
- .
- While methylation array data are not derived from counts and have a very
- different mean-variance relationship from that of typical RNA-seq data,
- the voom method makes no specific assumptions on the shape of the mean-variance
- relationship - it only assumes that the relationship is smooth enough to
- model using a lowess curve.
- Hence, the method is sufficiently general to model the mean-variance relationsh
- ip in methylation array data.
- However, the standard implementation of voom assumes that the input is
- given in raw read counts, and it must be adapted to run on methylation
- M-values.
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection
- Evaluation of classifier performance with different normalization methods
- \end_layout
- \begin_layout Standard
- For testing different expression microarray normalizations, a data set of
- 157 hgu133plus2 arrays was used, consisting of blood samples from kidney
- transplant patients whose grafts had been graded as TX, AR, or ADNR via
- biopsy and histology (46 TX, 69 AR, 42 ADNR)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Kurian2014"
- literal "true"
- \end_inset
- .
- Additionally, an external validation set of 75 samples was gathered from
- public GEO data (37 TX, 38 AR, no ADNR).
-
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status collapsed
- \begin_layout Plain Layout
- Find appropriate GEO identifiers if possible.
- Kurian 2014 says GSE15296, but this seems to be different data.
- I also need to look up the GEO accession for the external validation set.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the effect of each normalization on classifier performance,
- the same classifier training and validation procedure was used after each
- normalization method.
- The PAM package was used to train a nearest shrunken centroid classifier
- on the training set and select the appropriate threshold for centroid shrinking.
- Then the trained classifier was used to predict the class probabilities
- of each validation sample.
- From these class probabilities, ROC curves and area-under-curve (AUC) values
- were generated
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Turck2011"
- literal "false"
- \end_inset
- .
- Each normalization was tested on two different sets of training and validation
- samples.
- For internal validation, the 115 TX and AR arrays in the internal set were
- split at random into two equal sized sets, one for training and one for
- validation, each containing the same numbers of TX and AR samples as the
- other set.
- For external validation, the full set of 115 TX and AR samples were used
- as a training set, and the 75 external TX and AR samples were used as the
- validation set.
- Thus, 2 ROC curves and AUC values were generated for each normalization
- method: one internal and one external.
- Because the external validation set contains no ADNR samples, only classificati
- on of TX and AR samples was considered.
- The ADNR samples were included during normalization but excluded from all
- classifier training and validation.
- This ensures that the performance on internal and external validation sets
- is directly comparable, since both are performing the same task: distinguising
- TX from AR.
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status collapsed
- \begin_layout Plain Layout
- Summarize the get.best.threshold algorithm for PAM threshold selection
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Six different normalization strategies were evaluated.
- First, 2 well-known non-single-channel normalization methods were considered:
- RMA and dChip
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Li2001,Irizarry2003a"
- literal "false"
- \end_inset
- .
- Since RMA produces expression values on a log2 scale and dChip does not,
- the values from dChip were log2 transformed after normalization.
- Next, RMA and dChip followed by Global Rank-invariant Set Normalization
- (GRSN) were tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Pelz2008"
- literal "false"
- \end_inset
- .
- Post-processing with GRSN does not turn RMA or dChip into single-channel
- methods, but it may help mitigate batch effects and is therefore useful
- as a benchmark.
- Lastly, the two single-channel normalization methods, fRMA and SCAN, were
- tested
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010,Piccolo2012"
- literal "false"
- \end_inset
- .
- When evaluting internal validation performance, only the 157 internal samples
- were normalized; when evaluating external validation performance, all 157
- internal samples and 75 external samples were normalized together.
- \end_layout
- \begin_layout Standard
- For demonstrating the problem with separate normalization of training and
- validation data, one additional normalization was performed: the internal
- and external sets were each normalized separately using RMA, and the normalized
- data for each set were combined into a single set with no further attempts
- at normalizing between the two sets.
- The represents approximately how RMA would have to be used in a clinical
- setting, where the samples to be classified are not available at the time
- the classifier is trained.
- \end_layout
- \begin_layout Subsection
- Generating custom fRMA vectors for hthgu133pluspm array platform
- \end_layout
- \begin_layout Standard
- In order to enable fRMA normalization for the hthgu133pluspm array platform,
- custom fRMA normalization vectors were trained using the frmaTools package
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- Separate vectors were created for two types of samples: kidney graft biopsy
- samples and blood samples from graft recipients.
- For training, a 341 kidney biopsy samples from 2 data sets and 965 blood
- samples from 5 data sets were used as the reference set.
- Arrays were groups into batches based on unique combinations of sample
- type (blood or biopsy), diagnosis (TX, AR, etc.), data set, and scan date.
- Thus, each batch represents arrays of the same kind that were run together
- on the same day.
- For estimating the probe inverse variance weights, frmaTools requires equal-siz
- ed batches, which means a batch size must be chosen, and then batches smaller
- than that size must be ignored, while batches larger than the chosen size
- must be downsampled.
- This downsampling is performed randomly, so the sampling process is repeated
- 5 times and the resulting normalizations are compared to each other.
- \end_layout
- \begin_layout Standard
- To evaluate the consistency of the generated normalization vectors, the
- 5 fRMA vector sets generated from 5 random batch samplings were each used
- to normalize the same 20 randomly selected samples from each tissue.
- Then the normalized expression values for each probe on each array were
- compared across all normalizations.
- Each fRMA normalization was also compared against the normalized expression
- values obtained by normalizing the same 20 samples with ordinary RMA.
- \end_layout
- \begin_layout Subsection
- Modeling methylation array M-value heteroskedasticy in linear models with
- modified voom implementation
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Put code on Github and reference it.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To investigate the whether DNA methylation could be used to distinguish
- between healthy and dysfunctional transplants, a data set of 78 Illumina
- 450k methylation arrays from human kidney graft biopsies was analyzed for
- differential metylation between 4 transplant statuses: healthy transplant
- (TX), transplants undergoing acute rejection (AR), acute dysfunction with
- no rejection (ADNR), and chronic allograpft nephropathy (CAN).
- The data consisted of 33 TX, 9 AR, 8 ADNR, and 28 CAN samples.
- The uneven group sizes are a result of taking the biopsy samples before
- the eventual fate of the transplant was known.
- Each sample was additionally annotated with a donor ID (anonymized), Sex,
- Age, Ethnicity, Creatinine Level, and Diabetes diagnosois (all samples
- in this data set came from patients with either Type 1 or Type 2 diabetes).
-
- \end_layout
- \begin_layout Standard
- The intensity data were first normalized using subset-quantile within array
- normalization (SWAN)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Maksimovic2012"
- literal "false"
- \end_inset
- , then converted to intensity ratios (beta values)
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Aryee2014"
- literal "false"
- \end_inset
- .
- Any probes binding to loci that overlapped annotated SNPs were dropped,
- and the annotated sex of each sample was verified against the sex inferred
- from the ratio of median probe intensities for the X and Y chromosomes.
- Then, the ratios were transformed to M-values.
- \end_layout
- \begin_layout Standard
- From the M-values, a series of parallel analyses was performed, each adding
- additional steps into the model fit to accomodate a feature of the data.
- First, a
- \begin_inset Quotes eld
- \end_inset
- basic
- \begin_inset Quotes erd
- \end_inset
- linear modeling analysis was performed, compensating for known features
- of the data using existing tools.
- A design matrix was prepared including terms for the factor of interest
- as well as the known biological confounders: sex, age, ethnicity, and diabetes.
- Since some samples came from the same patients at differen times, the intra-pat
- ient correlation was modeled as a random effect, estimating a shared correlation
- value across all probes
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Smyth2005a"
- literal "false"
- \end_inset
- .
- Then the linear model was fit, and the variance was modeled using empirical
- Bayes squeezing toward the mean-variance trend
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2015"
- literal "false"
- \end_inset
- .
- Finally, t-tests or F-tests were performed a appropriate for each test:
- t-tests for single contrasts, and F-tests for multiple contrasts.
- \end_layout
- \begin_layout Standard
- For the second analysis, surrogate variable analysis (SVA) was used to infer
- additional unobserved sources of heterogeneity in the data
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Leek2007"
- literal "false"
- \end_inset
- .
- These surrogate variables were added to the design matrix before fitting
- the linear model.
- In addition, sample quality weights were estimated from the data and used
- during linear modeling to down-weight the contribution of highly variable
- arrays while increasing the weight to arrays with lower variability
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Ritchie2006"
- literal "false"
- \end_inset
- .
- For the third analysis, the voom method was adapted to run on methylation
- array data and used to model the mean-variance trend as individual observation
- weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Law2013"
- literal "false"
- \end_inset
- , which were combined with the sample weights
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liu2015"
- literal "false"
- \end_inset
- .
- Each time weights were used, they were estimated once before estimating
- the random effect correlation value, and then the weights were re-estimated
- taking the random effect into account.
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Improve subsection titles in this section
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- fRMA eliminates unwanted dependence of classifier training on normalization
- strategy caused by RMA
- \end_layout
- \begin_layout Subsubsection
- Separate normalization with RMA introduces unwanted biases in classification
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/predplot.pdf
- width 100col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Classifier-probabilities-RMA"
- \end_inset
- \series bold
- Classifier probabilities on validation samples when normalized with RMA
- together vs.
- separately.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To demonstrate the problem with non-single-channel methods, we considered
- the problem of training a classifier to distinguish TX from AR using the
- samples from the internal set as training data, evaluating performance
- on the external set.
- First, training and evaluation were performed after normalizing all array
- samples together as a single set using RMA, and second, the internal samples
- were normalized separately from the external samples and the training and
- evaluation were repeated.
- For each sample in the validation set, the classifier probabilities from
- both classifiers were plotted against each other (Fig.
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- As expected, separate normalization biases the classifier probabilities,
- resulting in several misclassifications.
- In this case, the bias from separate normalization causes the classifier
- to assign a lower probability of AR to every sample.
-
- \end_layout
- \begin_layout Subsubsection
- fRMA and SCAN achieve maintain classification performance while eliminating
- dependence on normalization strategy
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-internal.pdf
- width 100col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-int"
- \end_inset
- ROC curves for PAM on internal validation data using different normalization
- strategies
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status collapsed
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="7" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Normalization
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Single-channel?
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Internal Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- External Val.
- AUC
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.852
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.713
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.891
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.657
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- RMA + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.816
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.750
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- dChip + GRSN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.875
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.642
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- fRMA
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.863
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.718
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- SCAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.853
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0.689
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:AUC-PAM"
- \end_inset
- \series bold
- AUC values for internal and external validation with 6 different normalization
- strategies.
- \series default
- Only fRMA and SCAN are single-channel normalizations.
- The other 4 normalizations are for comparison.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- For internal validation, the 6 methods' AUC values ranged from 0.816 to 0.891,
- as shown in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- Among the non-single-channel normalizations, dChip outperformed RMA, while
- GRSN reduced the AUC values for both dChip and RMA.
- Both single-channel methods, fRMA and SCAN, slightly outperformed RMA,
- with fRMA ahead of SCAN.
- However, the difference between RMA and fRMA is still quite small.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-int"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows that the ROC curves for RMA, dChip, and fRMA look very similar and
- relatively smooth, while both GRSN curves and the curve for SCAN have a
- more jagged appearance.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/PAM/ROC-TXvsAR-external.pdf
- width 100col%
- groupId colwidth
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ROC-PAM-ext"
- \end_inset
- ROC curve for PAM on external validation data using different normalization
- strategies
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- For external validation, as expected, all the AUC values are lower than
- the internal validations, ranging from 0.642 to 0.750 (Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:AUC-PAM"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- With or without GRSN, RMA shows its dominance over dChip in this more challengi
- ng test.
- Unlike in the internal validation, GRSN actually improves the classifier
- performance for RMA, although it does not for dChip.
- Once again, both single-channel methods perform about on par with RMA,
- with fRMA performing slightly better and SCAN performing a bit worse.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ROC-PAM-ext"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows the ROC curves for the external validation test.
- As expected, none of them are as clean-looking as the internal validation
- ROC curves.
- The curves for RMA, RMA+GRSN, and fRMA all look similar, while the other
- curves look more divergent.
- \end_layout
- \begin_layout Subsection
- fRMA with custom-generated vectors enables normalization on hthgu133pluspm
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_batches.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-batches"
- \end_inset
- \series bold
- Effect of batch size selection on number of batches included in fRMA probe
- weight learning.
-
- \series default
- For batch sizes ranging from 3 to 15, the number of batches with at least
- that many samples was plotted for biopsy (BX) and blood (PAX) samples.
- The selected batch size, 5, is marked with a dotted vertical line.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/batchsize_samples.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:batch-size-samples"
- \end_inset
- \series bold
- Effect of batch size selection on number of samples included in fRMA probe
- weight learning.
-
- \series default
- For batch sizes ranging from 3 to 15, the number of samples included in
- probe weight training was plotted for biopsy (BX) and blood (PAX) samples.
- The selected batch size, 5, is marked with a dotted vertical line.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- In order to enable use of fRMA to normalize hthgu133pluspm, a custom set
- of fRMA vectors was created.
- First, an appropriate batch size was chosen by looking at the number of
- batches and number of samples included as a function of batch size (Figures
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:batch-size-batches"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:batch-size-samples"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , respectively).
- For a given batch size, all batches with fewer samples that the chosen
- size must be ignored during training, while larger batches must be randomly
- downsampled to the chosen size.
- Hence, the number of samples included for a given batch size equals the
- batch size times the number of batches with at least that many samples.
- From Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:batch-size-samples"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , it is apparent that that a batch size of 8 maximizes the number of samples
- included in training.
- Increasing the batch size beyond this causes too many smaller batches to
- be excluded, reducing the total number of samples for both tissue types.
- However, a batch size of 8 is not necessarily optimal.
- The article introducing frmaTools concluded that it was highly advantageous
- to use a smaller batch size in order to include more batches, even at the
- expense of including fewer total samples in training
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2011"
- literal "false"
- \end_inset
- .
- To strike an appropriate balance between more batches and more samples,
- a batch size of 5 was chosen.
- For both blood and biopsy samples, this increased the number of batches
- included by 10, with only a modest reduction in the number of samples compared
- to a batch size of 8.
- With a batch size of 5, 26 batches of biopsy samples and 46 batches of
- blood samples were available.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-BX-violin.pdf
- lyxscale 40
- height 80theight%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-bx-violin"
- \end_inset
- \series bold
- Violin plot of log ratios between normalizations for 20 biopsy samples.
-
- \series default
- Each of 20 randomly selected biopsy samples was normalized with RMA and
- with 5 different sets of fRMA vectors.
- This shows the distribution of log ratios between normalized expression
- values, aggregated across all 20 arrays.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since fRMA training requires equal-size batches, larger batches are downsampled
- randomly.
- This introduces a nondeterministic step in the generation of normalization
- vectors.
- To show that this randomness does not substantially change the outcome,
- the random downsampling and subsequent vector learning was repeated 5 times,
- with a different random seed each time.
- 20 samples were selected at random as a test set and normalized with each
- of the 5 sets of fRMA normalization vectors as well as ordinary RMA, and
- the normalized expression values were compared across normalizations.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a summary of these comparisons for biopsy samples.
- Comparing RMA to each of the 5 fRMA normalizations, the distribution of
- log ratios is somewhat wide, indicating that the normalizations disagree
- on the expression values of a fair number of probe sets.
- In contrast, comparisons of fRMA against fRMA, the vast mojority of probe
- sets have very small log ratios, indicating a very high agreement between
- the normalized values generated by the two normalizations.
- This shows that the fRMA normalization's behavior is not very sensitive
- to the random downsampling of larger batches during training.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-RMA.fRMA.pdf
- lyxscale 50
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-rma-frma"
- \end_inset
- \series bold
- Representative MA plot comparing RMA against fRMA for 20 biopsy samples.
-
- \series default
- Averages and log ratios were computed for every probe in each of 20 biopsy
- samples between RMA normalization and fRMA.
- Density of points is represented by darkness of shading, and individual
- outlier points are plotted.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-BX-fRMA.fRMA.pdf
- lyxscale 50
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:ma-bx-frma-frma"
- \end_inset
- \series bold
- Representative MA plot comparing different fRMA vectors for 20 biopsy samples.
-
- \series default
- Averages and log ratios were computed for every probe in each of 20 biopsy
- samples between fRMA normalizations using vectors from two different batch
- samplings.
- Density of points is represented by darkness of shading, and individual
- outlier points are plotted.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows an MA plot of the RMA-normalized values against the fRMA-normalized
- values for the same probe sets and arrays, corresponding to the first row
- of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- This MA plot shows that not only is there a wide distribution of M-values,
- but the trend of M-values is dependent on the average normalized intensity.
- This is expected, since the overall trend represents the differences in
- the quantile normalization step.
- When running RMA, only the quantiles for these specific 20 arrays are used,
- while for fRMA the quantile distribution is taking from all arrays used
- in training.
- Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- shows a similar MA plot comparing 2 different fRMA normalizations, correspondin
- g to the 6th row of Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-bx-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The MA plot is very tightly centered around zero with no visible trend.
- Figures
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:m-pax-violin"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ,
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-PAX-rma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:ma-bx-frma-frma"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- show exactly the same information for the blood samples, once again comparing
- the normalized expression values between normalizations for all probe sets
- across 20 randomly selected test arrays.
- Once again, there is a wider distribution of log ratios between RMA-normalized
- values and fRMA-normalized, and a much tighter distribution when comparing
- different fRMA normalizations to each other, indicating that the fRMA training
- process is robust to random batch downsampling for the blood samples as
- well.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/M-PAX-violin.pdf
- lyxscale 40
- height 80theight%
- groupId m-violin
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:m-pax-violin"
- \end_inset
- \series bold
- Violin plot of log ratios between normalizations for 20 blood samples.
-
- \series default
- Each of 20 randomly selected blood samples was normalized with RMA and with
- 5 different sets of fRMA vectors.
- This shows the distribution of log ratios between normalized expression
- values, aggregated across all 20 arrays.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-RMA.fRMA.pdf
- lyxscale 50
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-rma-frma"
- \end_inset
- \series bold
- Representative MA plot comparing RMA against fRMA for 20 blood samples.
-
- \series default
- Averages and log ratios were computed for every probe in each of 20 blood
- samples between RMA normalization and fRMA.
- Density of points is represented by darkness of shading, and individual
- outlier points are plotted.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/frma-pax-bx/MA-PAX-fRMA.fRMA.pdf
- lyxscale 50
- groupId ma-frma
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-PAX-frma-frma"
- \end_inset
- \series bold
- Representative MA plot comparing different fRMA vectors for 20 blood samples.
-
- \series default
- Averages and log ratios were computed for every probe in each of 20 blood
- samples between fRMA normalizations using vectors from two different batch
- samplings.
- Density of points is represented by darkness of shading, and individual
- outlier points are plotted.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Subsection
- Adapting voom to methylation array data improves model fit
- \end_layout
- \begin_layout Itemize
- voom, precision weights, and sva improved model fit
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Also increased sensitivity for detecting differential methylation
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Figure showing (a) heteroskedasticy without voom, (b) voom-modeled mean-variance
- trend, and (c) homoskedastic mean-variance trend after running voom
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Write figure legends
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupCor/meanvar-trends-PAGE1-RASTER.png
- lyxscale 15
- groupId raster-600ppi
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-basic"
- \end_inset
- Mean-variance trend with no SVA or weights
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupcor.sva.aw/meanvar-trends-PAGE1-RASTER.png
- lyxscale 15
- groupId raster-600ppi
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-aw"
- \end_inset
- Mean-variance trend with no SVA and sample quality weights.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupCor.sva.voomaw/meanvar-trends-PAGE1-RASTER.png
- lyxscale 15
- groupId raster-600ppi
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:voom-sva-voomaw"
- \end_inset
- Mean-variance trend modelled by voom, with SVA and sample weights.
-
- \series default
- The y-axis is the square root of the standard deviation for each probe,
- because this is the scale on which voom fits its lowess curve.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Graphics
- filename graphics/methylvoom/unadj.dupCor.sva.voomaw/meanvar-trends-PAGE2-RASTER.png
- lyxscale 15
- groupId raster-600ppi
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:meanvar-sva-voomaw"
- \end_inset
- Residual mean-variance trend after modeling with SVA, sample weights, and
- voom.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="2">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Covariate
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- p-value
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Transplant Status
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.404
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Diabetes Diagnosis
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.00106
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Sex
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.148
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Age
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0.212
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:weight-covariate-tests"
- \end_inset
- Association of sample weights with clinical covariates.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 22
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 7
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 338
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 369
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 278
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-num-signif"
- \end_inset
- \series bold
- Number of probes significant at 10% FDR for each contrast in each analysis.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Cite the pi0 estimation method from propTrueNull
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="4">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Analysis
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- Contrast
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- A
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- B
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- C
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs AR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 10,063
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 11,225
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs ADNR
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 27
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 12,674
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 13,086
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- TX vs CAN
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 966
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,039
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- 20,955
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:methyl-est-nonnull"
- \end_inset
- \series bold
- Estimated number of non-null tests for each contrast in each analysis.
- \end_layout
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Re-generate p-value histograms for all relevant contrasts in a single figure.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Subsection
- fRMA achieves clinically applicable normalization without sacrificing classifica
- tion performance
- \end_layout
- \begin_layout Standard
- As shown in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Classifier-probabilities-RMA"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , improper normalization, particularly separate normalization of training
- and test samples, leads to unwanted biases in classification.
- In a controlled experimental context, it is always possible to correct
- this issue by normalizing all experimental samples together.
- However, because it is not feasible to normalize all samples together in
- a clinical context, a single-channel normalization is required is required.
-
- \end_layout
- \begin_layout Standard
- The major concern in using a single-channel normalization is that non-single-cha
- nnel methods can share information between arrays to improve the normalization,
- and single-channel methods risk sacrificing the gains in normalization
- accuracy that come from this information sharing.
- In the case of RMA, this information sharing is accomplished through quantile
- normalization and median polish steps.
- The need for information sharing in quantile normalization can easily be
- removed by learning a fixed set of quantiles from external data and normalizing
- each array to these fixed quantiles, instead of the quantiles of the data
- itself.
- As long as the fixed quantiles are reasonable, the result will be similar
- to standard RMA.
- However, there is no analogous way to eliminate cross-array information
- sharing in the median polish step, so fRMA replaces this with a weighted
- average of probes on each array, with the weights learned from external
- data.
- This step of fRMA has the greatest potential to diverge from RMA un undesirable
- ways.
- \end_layout
- \begin_layout Standard
- However, when run on real data, fRMA performed at least as well as RMA in
- both the internal validation and external validation tests.
- This shows that fRMA can be used to normalize individual clinical samples
- in a class prediction context without sacrificing the classifier performance
- that would be obtained by using the more well-established RMA for normalization.
- The other single-channel normalization method considered, SCAN, showed
- some loss of AUC in the external validation test.
- Based on these results, fRMA is the preferred normalization for clinical
- samples in a class prediction context.
- \end_layout
- \begin_layout Subsection
- Robust fRMA vectors can be generated for new array platforms
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Look up the exact numbers, do a find & replace for
- \begin_inset Quotes eld
- \end_inset
- 850
- \begin_inset Quotes erd
- \end_inset
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The published fRMA normalization vectors for the hgu133plus2 platform were
- generated from a set of about 850 samples chosen from a wide range of tissues,
- which the authors determined was sufficient to generate a robust set of
- normalization vectors that could be applied across all tissues
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCall2010"
- literal "false"
- \end_inset
- .
- Since we only had hthgu133pluspm for 2 tissues of interest, our needs were
- more modest.
- Even using only 130 samples in 26 batches of 5 samples each for kidney
- biopsies, we were able to train a robust set of fRMA normalization vectors
- that were not meaningfully affected by the random selection of 5 samples
- from each batch.
- As expected, the training process was just as robust for the blood samples
- with 230 samples in 46 batches of 5 samples each.
- Because these vectors were each generated using training samples from a
- single tissue, they are not suitable for general use, unlike the vectors
- provided with fRMA itself.
- They are purpose-build for normalizing a specific type of sample on a specific
- platform.
- \end_layout
- \begin_layout Subsection
- voom
- \end_layout
- \begin_layout Itemize
- Methods like voom designed for RNA-seq can also help with array analysis
- \end_layout
- \begin_layout Itemize
- Extracting and modeling confounders common to many features improves model
- correspondence to known biology
- \end_layout
- \begin_layout Chapter
- Globin-blocking for more effective blood RNA-seq analysis in primate animal
- model
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Choose between above and the paper title: Optimizing yield of deep RNA sequencin
- g for gene expression profiling by globin reduction of peripheral blood
- samples from cynomolgus monkeys (Macaca fascicularis).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Chapter author list: https://tex.stackexchange.com/questions/156862/displaying-aut
- hor-for-each-chapter-in-book Every chapter gets an author list, which may
- or may not be part of a citation to a published/preprinted paper.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Preprint then cite the paper
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section*
- Abstract
- \end_layout
- \begin_layout Paragraph
- Background
- \end_layout
- \begin_layout Standard
- Primate blood contains high concentrations of globin messenger RNA.
- Globin reduction is a standard technique used to improve the expression
- results obtained by DNA microarrays on RNA from blood samples.
- However, with whole transcriptome RNA-sequencing (RNA-seq) quickly replacing
- microarrays for many applications, the impact of globin reduction for RNA-seq
- has not been previously studied.
- Moreover, no off-the-shelf kits are available for globin reduction in nonhuman
- primates.
-
- \end_layout
- \begin_layout Paragraph
- Results
- \end_layout
- \begin_layout Standard
- Here we report a protocol for RNA-seq in primate blood samples that uses
- complimentary oligonucleotides to block reverse transcription of the alpha
- and beta globin genes.
- In test samples from cynomolgus monkeys (Macaca fascicularis), this globin
- blocking protocol approximately doubles the yield of informative (non-globin)
- reads by greatly reducing the fraction of globin reads, while also improving
- the consistency in sequencing depth between samples.
- The increased yield enables detection of about 2000 more genes, significantly
- increases the correlation in measured gene expression levels between samples,
- and increases the sensitivity of differential gene expression tests.
- \end_layout
- \begin_layout Paragraph
- Conclusions
- \end_layout
- \begin_layout Standard
- These results show that globin blocking significantly improves the cost-effectiv
- eness of mRNA sequencing in primate blood samples by doubling the yield
- of useful reads, allowing detection of more genes, and improving the precision
- of gene expression measurements.
- Based on these results, a globin reducing or blocking protocol is recommended
- for all RNA-seq studies of primate blood samples.
- \end_layout
- \begin_layout Section
- Approach
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Plain Layout
- Consider putting some of this in the Intro chapter
- \end_layout
- \begin_layout Itemize
- Cynomolgus monkeys as a model organism
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Highly related to humans
- \end_layout
- \begin_layout Itemize
- Small size and short life cycle - good research animal
- \end_layout
- \begin_layout Itemize
- Genomics resources still in development
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Inadequacy of existing blood RNA-seq protocols
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- Existing protocols use a separate globin pulldown step, slowing down processing
- \end_layout
- \end_deeper
- \end_inset
- \end_layout
- \begin_layout Standard
- Increasingly, researchers are turning to high-throughput mRNA sequencing
- technologies (RNA-seq) in preference to expression microarrays for analysis
- of gene expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mutz2012"
- literal "false"
- \end_inset
- .
- The advantages are even greater for study of model organisms with no well-estab
- lished array platforms available, such as the cynomolgus monkey (Macaca
- fascicularis).
- High fractions of globin mRNA are naturally present in mammalian peripheral
- blood samples (up to 70% of total mRNA) and these are known to interfere
- with the results of array-based expression profiling
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- The importance of globin reduction for RNA-seq of blood has only been evaluated
- for a deepSAGE protocol on human samples
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- In the present report, we evaluated globin reduction using custom blocking
- oligonucleotides for deep RNA-seq of peripheral blood samples from a nonhuman
- primate, cynomolgus monkey, using the Illumina technology platform.
- We demonstrate that globin reduction significantly improves the cost-effectiven
- ess of RNA-seq in blood samples.
- Thus, our protocol offers a significant advantage to any investigator planning
- to use RNA-seq for gene expression profiling of nonhuman primate blood
- samples.
- Our method can be generally applied to any species by designing complementary
- oligonucleotide blocking probes to the globin gene sequences of that species.
- Indeed, any highly expressed but biologically uninformative transcripts
- can also be blocked to further increase sequencing efficiency and value
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Arnaud2016"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Methods
- \end_layout
- \begin_layout Subsection*
- Sample collection
- \end_layout
- \begin_layout Standard
- All research reported here was done under IACUC-approved protocols at the
- University of Miami and complied with all applicable federal and state
- regulations and ethical principles for nonhuman primate research.
- Blood draws occurred between 16 April 2012 and 18 June 2015.
- The experimental system involved intrahepatic pancreatic islet transplantation
- into Cynomolgus monkeys with induced diabetes mellitus with or without
- concomitant infusion of mesenchymal stem cells.
- Blood was collected at serial time points before and after transplantation
- into PAXgene Blood RNA tubes (PreAnalytiX/Qiagen, Valencia, CA) at the
- precise volume:volume ratio of 2.5 ml whole blood into 6.9 ml of PAX gene
- additive.
- \end_layout
- \begin_layout Subsection*
- Globin Blocking
- \end_layout
- \begin_layout Standard
- Four oligonucleotides were designed to hybridize to the 3’ end of the transcript
- s for Cynomolgus HBA1, HBA2 and HBB, with two hybridization sites for HBB
- and 2 sites for HBA (the chosen sites were identical in both HBA genes).
- All oligos were purchased from Sigma and were entirely composed of 2’O-Me
- bases with a C3 spacer positioned at the 3’ ends to prevent any polymerase
- mediated primer extension.
- \end_layout
- \begin_layout Quote
- HBA1/2 site 1: GCCCACUCAGACUUUAUUCAAAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBA1/2 site 2: GGUGCAAGGAGGGGAGGAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBB site 1: AAUGAAAAUAAAUGUUUUUUAUUAG-C3spacer
- \end_layout
- \begin_layout Quote
- HBB site 2: CUCAAGGCCCUUCAUAAUAUCCC-C3spacer
- \end_layout
- \begin_layout Subsection*
- RNA-seq Library Preparation
- \end_layout
- \begin_layout Standard
- Sequencing libraries were prepared with 200ng total RNA from each sample.
- Polyadenylated mRNA was selected from 200 ng aliquots of cynomologus blood-deri
- ved total RNA using Ambion Dynabeads Oligo(dT)25 beads (Invitrogen) following
- manufacturer’s recommended protocol.
- PolyA selected RNA was then combined with 8 pmol of HBA1/2 (site 1), 8
- pmol of HBA1/2 (site 2), 12 pmol of HBB (site 1) and 12 pmol of HBB (site
- 2) oligonucleotides.
- In addition, 20 pmol of RT primer containing a portion of the Illumina
- adapter sequence (B-oligo-dTV: GAGTTCCTTGGCACCCGAGAATTCCATTTTTTTTTTTTTTTTTTTV)
- and 4 µL of 5X First Strand buffer (250 mM Tris-HCl pH 8.3, 375 mM KCl,
- 15mM MgCl2) were added in a total volume of 15 µL.
- The RNA was fragmented by heating this cocktail for 3 minutes at 95°C and
- then placed on ice.
- This was followed by the addition of 2 µL 0.1 M DTT, 1 µL RNaseOUT, 1 µL
- 10mM dNTPs 10% biotin-16 aminoallyl-2’- dUTP and 10% biotin-16 aminoallyl-2’-
- dCTP (TriLink Biotech, San Diego, CA), 1 µL Superscript II (200U/ µL, Thermo-Fi
- sher).
- A second “unblocked” library was prepared in the same way for each sample
- but replacing the blocking oligos with an equivalent volume of water.
- The reaction was carried out at 25°C for 15 minutes and 42°C for 40 minutes,
- followed by incubation at 75°C for 10 minutes to inactivate the reverse
- transcriptase.
- \end_layout
- \begin_layout Standard
- The cDNA/RNA hybrid molecules were purified using 1.8X Ampure XP beads (Agencourt
- ) following supplier’s recommended protocol.
- The cDNA/RNA hybrid was eluted in 25 µL of 10 mM Tris-HCl pH 8.0, and then
- bound to 25 µL of M280 Magnetic Streptavidin beads washed per recommended
- protocol (Thermo-Fisher).
- After 30 minutes of binding, beads were washed one time in 100 µL 0.1N NaOH
- to denature and remove the bound RNA, followed by two 100 µL washes with
- 1X TE buffer.
- \end_layout
- \begin_layout Standard
- Subsequent attachment of the 5-prime Illumina A adapter was performed by
- on-bead random primer extension of the following sequence (A-N8 primer:
- TTCAGAGTTCTACAGTCCGACGATCNNNNNNNN).
- Briefly, beads were resuspended in a 20 µL reaction containing 5 µM A-N8
- primer, 40mM Tris-HCl pH 7.5, 20mM MgCl2, 50mM NaCl, 0.325U/µL Sequenase
- 2.0 (Affymetrix, Santa Clara, CA), 0.0025U/µL inorganic pyrophosphatase (Affymetr
- ix) and 300 µM each dNTP.
- Reaction was incubated at 22°C for 30 minutes, then beads were washed 2
- times with 1X TE buffer (200µL).
- \end_layout
- \begin_layout Standard
- The magnetic streptavidin beads were resuspended in 34 µL nuclease-free
- water and added directly to a PCR tube.
- The two Illumina protocol-specified PCR primers were added at 0.53 µM (Illumina
- TruSeq Universal Primer 1 and Illumina TruSeq barcoded PCR primer 2), along
- with 40 µL 2X KAPA HiFi Hotstart ReadyMix (KAPA, Willmington MA) and thermocycl
- ed as follows: starting with 98°C (2 min-hold); 15 cycles of 98°C, 20sec;
- 60°C, 30sec; 72°C, 30sec; and finished with a 72°C (2 min-hold).
- \end_layout
- \begin_layout Standard
- PCR products were purified with 1X Ampure Beads following manufacturer’s
- recommended protocol.
- Libraries were then analyzed using the Agilent TapeStation and quantitation
- of desired size range was performed by “smear analysis”.
- Samples were pooled in equimolar batches of 16 samples.
- Pooled libraries were size selected on 2% agarose gels (E-Gel EX Agarose
- Gels; Thermo-Fisher).
- Products were cut between 250 and 350 bp (corresponding to insert sizes
- of 130 to 230 bps).
- Finished library pools were then sequenced on the Illumina NextSeq500 instrumen
- t with 75 base read lengths.
-
- \end_layout
- \begin_layout Subsection*
- Read alignment and counting
- \end_layout
- \begin_layout Standard
- Reads were aligned to the cynomolgus genome using STAR
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Dobin2013,Wilson2013"
- literal "false"
- \end_inset
- .
- Counts of uniquely mapped reads were obtained for every gene in each sample
- with the “featureCounts” function from the Rsubread package, using each
- of the three possibilities for the “strandSpecific” option: sense, antisense,
- and unstranded
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Liao2014"
- literal "false"
- \end_inset
- .
- A few artifacts in the cynomolgus genome annotation complicated read counting.
- First, no ortholog is annotated for alpha globin in the cynomolgus genome,
- presumably because the human genome has two alpha globin genes with nearly
- identical sequences, making the orthology relationship ambiguous.
- However, two loci in the cynomolgus genome are as “hemoglobin subunit alpha-lik
- e” (LOC102136192 and LOC102136846).
- LOC102136192 is annotated as a pseudogene while LOC102136846 is annotated
- as protein-coding.
- Our globin reduction protocol was designed to include blocking of these
- two genes.
- Indeed, these two genes have almost the same read counts in each library
- as the properly-annotated HBB gene and much larger counts than any other
- gene in the unblocked libraries, giving confidence that reads derived from
- the real alpha globin are mapping to both genes.
- Thus, reads from both of these loci were counted as alpha globin reads
- in all further analyses.
- The second artifact is a small, uncharacterized non-coding RNA gene (LOC1021365
- 91), which overlaps the HBA-like gene (LOC102136192) on the opposite strand.
- If counting is not performed in stranded mode (or if a non-strand-specific
- sequencing protocol is used), many reads mapping to the globin gene will
- be discarded as ambiguous due to their overlap with this ncRNA gene, resulting
- in significant undercounting of globin reads.
- Therefore, stranded sense counts were used for all further analysis in
- the present study to insure that we accurately accounted for globin transcript
- reduction.
- However, we note that stranded reads are not necessary for RNA-seq using
- our protocol in standard practice.
-
- \end_layout
- \begin_layout Subsection*
- Normalization and Exploratory Data Analysis
- \end_layout
- \begin_layout Standard
- Libraries were normalized by computing scaling factors using the edgeR package’s
- Trimmed Mean of M-values method
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Robinson2010"
- literal "false"
- \end_inset
- .
- Log2 counts per million values (logCPM) were calculated using the cpm function
- in edgeR for individual samples and aveLogCPM function for averages across
- groups of samples, using those functions’ default prior count values to
- avoid taking the logarithm of 0.
- Genes were considered “present” if their average normalized logCPM values
- across all libraries were at least -1.
- Normalizing for gene length was unnecessary because the sequencing protocol
- is 3’-biased and hence the expected read count for each gene is related
- to the transcript’s copy number but not its length.
- \end_layout
- \begin_layout Standard
- In order to assess the effect of blocking on reproducibility, Pearson and
- Spearman correlation coefficients were computed between the logCPM values
- for every pair of libraries within the globin-blocked (GB) and unblocked
- (non-GB) groups, and edgeR's “estimateDisp” function was used to compute
- negative binomial dispersions separately for the two groups
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Chen2014"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Subsection*
- Differential Expression Analysis
- \end_layout
- \begin_layout Standard
- All tests for differential gene expression were performed using edgeR, by
- first fitting a negative binomial generalized linear model to the counts
- and normalization factors and then performing a quasi-likelihood F-test
- with robust estimation of outlier gene dispersions
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Lund2012,Phipson2016"
- literal "false"
- \end_inset
- .
- To investigate the effects of globin blocking on each gene, an additive
- model was fit to the full data with coefficients for globin blocking and
- SampleID.
- To test the effect of globin blocking on detection of differentially expressed
- genes, the GB samples and non-GB samples were each analyzed independently
- as follows: for each animal with both a pre-transplant and a post-transplant
- time point in the data set, the pre-transplant sample and the earliest
- post-transplant sample were selected, and all others were excluded, yielding
- a pre-/post-transplant pair of samples for each animal (N=7 animals with
- paired samples).
- These samples were analyzed for pre-transplant vs.
- post-transplant differential gene expression while controlling for inter-animal
- variation using an additive model with coefficients for transplant and
- animal ID.
- In all analyses, p-values were adjusted using the Benjamini-Hochberg procedure
- for FDR correction
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Benjamini1995"
- literal "false"
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- \begin_inset Note Note
- status open
- \begin_layout Itemize
- New blood RNA-seq protocol to block reverse transcription of globin genes
- \end_layout
- \begin_layout Itemize
- Blood RNA-seq time course after transplants with/without MSC infusion
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Section
- Results
- \end_layout
- \begin_layout Subsection*
- Globin blocking yields a larger and more consistent fraction of useful reads
- \end_layout
- \begin_layout Standard
- The objective of the present study was to validate a new protocol for deep
- RNA-seq of whole blood drawn into PaxGene tubes from cynomolgus monkeys
- undergoing islet transplantation, with particular focus on minimizing the
- loss of useful sequencing space to uninformative globin reads.
- The details of the analysis with respect to transplant outcomes and the
- impact of mesenchymal stem cell treatment will be reported in a separate
- manuscript (in preparation).
- To focus on the efficacy of our globin blocking protocol, 37 blood samples,
- 16 from pre-transplant and 21 from post-transplant time points, were each
- prepped once with and once without globin blocking oligos, and were then
- sequenced on an Illumina NextSeq500 instrument.
- The number of reads aligning to each gene in the cynomolgus genome was
- counted.
- Table 1 summarizes the distribution of read fractions among the GB and
- non-GB libraries.
- In the libraries with no globin blocking, globin reads made up an average
- of 44.6% of total input reads, while reads assigned to all other genes made
- up an average of 26.3%.
- The remaining reads either aligned to intergenic regions (that include
- long non-coding RNAs) or did not align with any annotated transcripts in
- the current build of the cynomolgus genome.
- In the GB libraries, globin reads made up only 3.48% and reads assigned
- to all other genes increased to 50.4%.
- Thus, globin blocking resulted in a 92.2% reduction in globin reads and
- a 91.6% increase in yield of useful non-globin reads.
- \end_layout
- \begin_layout Standard
- This reduction is not quite as efficient as the previous analysis showed
- for human samples by DeepSAGE (<0.4% globin reads after globin reduction)
-
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- Nonetheless, this degree of globin reduction is sufficient to nearly double
- the yield of useful reads.
- Thus, globin blocking cuts the required sequencing effort (and costs) to
- achieve a target coverage depth by almost 50%.
- Consistent with this near doubling of yield, the average difference in
- un-normalized logCPM across all genes between the GB libraries and non-GB
- libraries is approximately 1 (mean = 1.01, median = 1.08), an overall 2-fold
- increase.
- Un-normalized values are used here because the TMM normalization correctly
- identifies this 2-fold difference as biologically irrelevant and removes
- it.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure1 - globin-fractions.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without globin blocking (GB).
-
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Fraction-of-genic-reads"
- \end_inset
- Fraction of genic reads in each sample aligned to non-globin genes, with
- and without globin blocking (GB).
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- For each sample, counts were summed separately for all globin genes and
- for the remainder of the genes (non-globin genes), and the fraction of
- genic reads aligned to non-globin genes was computed.
- Each point represents an individual sample.
- Gray + signs indicate the means for globin-blocked libraries and unblocked
- libraries.
- The overall distribution for each group is represented as a notched box
- plots.
- Points are randomly spread vertically to avoid excessive overlapping.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- placement p
- wide false
- sideways true
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="4" columns="7">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Total Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Percent of Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- GB
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Genic Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- All Aligned Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Non-globin Reads
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Globin Reads
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- Yes
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 50.4% ± 6.82
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 3.48% ± 2.94
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 53.9% ± 6.81
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 89.7% ± 2.40
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 93.5% ± 5.25
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 6.49% ± 5.25
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- No
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 26.3% ± 8.95
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 44.6% ± 16.6
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 70.1% ± 9.38
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 90.7% ± 5.16
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 38.8% ± 17.1
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 61.2% ± 17.1
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Fractions of reads mapping to genomic features in GB and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Fractions-of-reads"
- \end_inset
- Fractions of reads mapping to genomic features in GB and non-GB samples.
-
- \series default
- All values are given as mean ± standard deviation.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Another important aspect is that the standard deviations in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Fractions-of-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- are uniformly smaller in the GB samples than the non-GB ones, indicating
- much greater consistency of yield.
- This is best seen in the percentage of non-globin reads as a fraction of
- total reads aligned to annotated genes (genic reads).
- For the non-GB samples, this measure ranges from 10.9% to 80.9%, while for
- the GB samples it ranges from 81.9% to 99.9% (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Fraction-of-genic-reads"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- This means that for applications where it is critical that each sample
- achieve a specified minimum coverage in order to provide useful information,
- it would be necessary to budget up to 10 times the sequencing depth per
- sample without globin blocking, even though the average yield improvement
- for globin blocking is only 2-fold, because every sample has a chance of
- being 90% globin and 10% useful reads.
- Hence, the more consistent behavior of GB samples makes planning an experiment
- easier and more efficient because it eliminates the need to over-sequence
- every sample in order to guard against the worst case of a high-globin
- fraction.
- \end_layout
- \begin_layout Subsection*
- Globin blocking lowers the noise floor and allows detection of about 2000
- more genes
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Remove redundant titles from figures
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure2 - aveLogCPM-colored.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Distributions of average group gene abundances when normalized separately
- or together.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:logcpm-dists"
- \end_inset
- Distributions of average group gene abundances when normalized separately
- or together.
- \series default
- All reads in each sequencing library were aligned to the cyno genome, and
- the number of reads uniquely aligning to each gene was counted.
- Genes with zero counts in all libraries were discarded.
- Libraries were normalized using the TMM method.
- Libraries were split into globin-blocked (GB) and non-GB groups and the
- average abundance for each gene in both groups, measured in log2 counts
- per million reads counted, was computed using the aveLogCPM function.
- The distribution of average gene logCPM values was plotted for both groups
- using a kernel density plot to approximate a continuous distribution.
- The logCPM GB distributions are marked in red, non-GB in blue.
- The black vertical line denotes the chosen detection threshold of -1.
- Top panel: Libraries were split into GB and non-GB groups first and normalized
- separately.
- Bottom panel: Libraries were all normalized together first and then split
- into groups.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Since globin blocking yields more usable sequencing depth, it should also
- allow detection of more genes at any given threshold.
- When we looked at the distribution of average normalized logCPM values
- across all libraries for genes with at least one read assigned to them,
- we observed the expected bimodal distribution, with a high-abundance "signal"
- peak representing detected genes and a low-abundance "noise" peak representing
- genes whose read count did not rise above the noise floor (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Consistent with the 2-fold increase in raw counts assigned to non-globin
- genes, the signal peak for GB samples is shifted to the right relative
- to the non-GB signal peak.
- When all the samples are normalized together, this difference is normalized
- out, lining up the signal peaks, and this reveals that, as expected, the
- noise floor for the GB samples is about 2-fold lower.
- This greater separation between signal and noise peaks in the GB samples
- means that low-expression genes should be more easily detected and more
- precisely quantified than in the non-GB samples.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure3 - detection.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Gene detections as a function of abundance thresholds in globin-blocked
- (GB) and non-GB samples.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:Gene-detections"
- \end_inset
- Gene detections as a function of abundance thresholds in globin-blocked
- (GB) and non-GB samples.
- \series default
- Average abundance (logCPM,
- \begin_inset Formula $\log_{2}$
- \end_inset
- counts per million reads counted) was computed by separate group normalization
- as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- for both the GB and non-GB groups, as well as for all samples considered
- as one large group.
- For each every integer threshold from -2 to 3, the number of genes detected
- at or above that logCPM threshold was plotted for each group.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- Based on these distributions, we selected a detection threshold of -1, which
- is approximately the leftmost edge of the trough between the signal and
- noise peaks.
- This represents the most liberal possible detection threshold that doesn't
- call substantial numbers of noise genes as detected.
- Among the full dataset, 13429 genes were detected at this threshold, and
- 22276 were not.
- When considering the GB libraries and non-GB libraries separately and re-comput
- ing normalization factors independently within each group, 14535 genes were
- detected in the GB libraries while only 12460 were detected in the non-GB
- libraries.
- Thus, GB allowed the detection of 2000 extra genes that were buried under
- the noise floor without GB.
- This pattern of at least 2000 additional genes detected with GB was also
- consistent across a wide range of possible detection thresholds, from -2
- to 3 (see Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:Gene-detections"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- \end_layout
- \begin_layout Subsection*
- Globin blocking does not add significant additional noise or decrease sample
- quality
- \end_layout
- \begin_layout Standard
- One potential worry is that the globin blocking protocol could perturb the
- levels of non-globin genes.
- There are two kinds of possible perturbations: systematic and random.
- The former is not a major concern for detection of differential expression,
- since a 2-fold change in every sample has no effect on the relative fold
- change between samples.
- In contrast, random perturbations would increase the noise and obscure
- the signal in the dataset, reducing the capacity to detect differential
- expression.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure4 - maplot-colored.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- MA plot showing effects of globin blocking on each gene's abundance.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:MA-plot"
- \end_inset
- \series bold
- MA plot showing effects of globin blocking on each gene's abundance.
-
- \series default
- All libraries were normalized together as described in Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:logcpm-dists"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- , and genes with an average logCPM below -1 were filtered out.
- Each remaining gene was tested for differential abundance with respect
- to globin blocking (GB) using edgeR’s quasi-likelihod F-test, fitting a
- negative binomial generalized linear model to table of read counts in each
- library.
- For each gene, edgeR reported average abundance (logCPM),
- \begin_inset Formula $\log_{2}$
- \end_inset
- fold change (logFC), p-value, and Benjamini-Hochberg adjusted false discovery
- rate (FDR).
- Each gene's logFC was plotted against its logCPM, colored by FDR.
- Red points are significant at ≤10% FDR, and blue are not significant at
- that threshold.
- The alpha and beta globin genes targeted for blocking are marked with large
- triangles, while all other genes are represented as small points.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Standardize on
- \begin_inset Quotes eld
- \end_inset
- log2
- \begin_inset Quotes erd
- \end_inset
- notation
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- The data do indeed show small systematic perturbations in gene levels (Figure
-
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:MA-plot"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- Other than the 3 designated alpha and beta globin genes, two other genes
- stand out as having especially large negative log fold changes: HBD and
- LOC1021365.
- HBD, delta globin, is most likely targeted by the blocking oligos due to
- high sequence homology with the other globin genes.
- LOC1021365 is the aforementioned ncRNA that is reverse-complementary to
- one of the alpha-like genes and that would be expected to be removed during
- the globin blocking step.
- All other genes appear in a cluster centered vertically at 0, and the vast
- majority of genes in this cluster show an absolute log2(FC) of 0.5 or less.
- Nevertheless, many of these small perturbations are still statistically
- significant, indicating that the globin blocking oligos likely cause very
- small but non-zero systematic perturbations in measured gene expression
- levels.
- \end_layout
- \begin_layout Standard
- \begin_inset Float figure
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Graphics
- filename graphics/Globin Paper/figure5 - corrplot.pdf
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status collapsed
- \begin_layout Plain Layout
- Comparison of inter-sample gene abundance correlations with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "fig:gene-abundance-correlations"
- \end_inset
- Comparison of inter-sample gene abundance correlations with and without
- globin blocking (GB).
- \series default
- All libraries were normalized together as described in Figure 2, and genes
- with an average abundance (logCPM, log2 counts per million reads counted)
- less than -1 were filtered out.
- Each gene’s logCPM was computed in each library using the edgeR cpm function.
- For each pair of biological samples, the Pearson correlation between those
- samples' GB libraries was plotted against the correlation between the same
- samples’ non-GB libraries.
- Each point represents an unique pair of samples.
- The solid gray line shows a quantile-quantile plot of distribution of GB
- correlations vs.
- that of non-GB correlations.
- The thin dashed line is the identity line, provided for reference.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To evaluate the possibility of globin blocking causing random perturbations
- and reducing sample quality, we computed the Pearson correlation between
- logCPM values for every pair of samples with and without GB and plotted
- them against each other (Figure
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "fig:gene-abundance-correlations"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- ).
- The plot indicated that the GB libraries have higher sample-to-sample correlati
- ons than the non-GB libraries.
- Parametric and nonparametric tests for differences between the correlations
- with and without GB both confirmed that this difference was highly significant
- (2-sided paired t-test: t = 37.2, df = 665, P ≪ 2.2e-16; 2-sided Wilcoxon
- sign-rank test: V = 2195, P ≪ 2.2e-16).
- Performing the same tests on the Spearman correlations gave the same conclusion
- (t-test: t = 26.8, df = 665, P ≪ 2.2e-16; sign-rank test: V = 8781, P ≪ 2.2e-16).
- The edgeR package was used to compute the overall biological coefficient
- of variation (BCV) for GB and non-GB libraries, and found that globin blocking
- resulted in a negligible increase in the BCV (0.417 with GB vs.
- 0.400 without).
- The near equality of the BCVs for both sets indicates that the higher correlati
- ons in the GB libraries are most likely a result of the increased yield
- of useful reads, which reduces the contribution of Poisson counting uncertainty
- to the overall variance of the logCPM values
- \begin_inset CommandInset citation
- LatexCommand cite
- key "McCarthy2012"
- literal "false"
- \end_inset
- .
- This improves the precision of expression measurements and more than offsets
- the negligible increase in BCV.
- \end_layout
- \begin_layout Subsection*
- More differentially expressed genes are detected with globin blocking
- \end_layout
- \begin_layout Standard
- \begin_inset Float table
- wide false
- sideways false
- status open
- \begin_layout Plain Layout
- \align center
- \begin_inset Tabular
- <lyxtabular version="3" rows="5" columns="5">
- <features tabularvalignment="middle">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <column alignment="center" valignment="top">
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- No Globin Blocking
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="3" alignment="center" valignment="middle" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Globin-Blocking
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Up
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 231
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 515
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 2
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- NS
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 160
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 11235
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 136
- \end_layout
- \end_inset
- </cell>
- </row>
- <row>
- <cell multirow="4" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \series bold
- Down
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 0
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 548
- \end_layout
- \end_inset
- </cell>
- <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
- \begin_inset Text
- \begin_layout Plain Layout
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \xout off
- \uuline off
- \uwave off
- \noun off
- \color none
- 127
- \end_layout
- \end_inset
- </cell>
- </row>
- </lyxtabular>
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \begin_inset Caption Standard
- \begin_layout Plain Layout
- \series bold
- \begin_inset Argument 1
- status open
- \begin_layout Plain Layout
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \end_layout
- \end_inset
- \begin_inset CommandInset label
- LatexCommand label
- name "tab:Comparison-of-significant"
- \end_inset
- Comparison of significantly differentially expressed genes with and without
- globin blocking.
- \series default
- Up, Down: Genes significantly up/down-regulated in post-transplant samples
- relative to pre-transplant samples, with a false discovery rate of 10%
- or less.
- NS: Non-significant genes (false discovery rate greater than 10%).
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Plain Layout
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- To compare performance on differential gene expression tests, we took subsets
- of both the GB and non-GB libraries with exactly one pre-transplant and
- one post-transplant sample for each animal that had paired samples available
- for analysis (N=7 animals, N=14 samples in each subset).
- The same test for pre- vs.
- post-transplant differential gene expression was performed on the same
- 7 pairs of samples from GB libraries and non-GB libraries, in each case
- using an FDR of 10% as the threshold of significance.
- Out of 12954 genes that passed the detection threshold in both subsets,
- 358 were called significantly differentially expressed in the same direction
- in both sets; 1063 were differentially expressed in the GB set only; 296
- were differentially expressed in the non-GB set only; 2 genes were called
- significantly up in the GB set but significantly down in the non-GB set;
- and the remaining 11235 were not called differentially expressed in either
- set.
- These data are summarized in Table
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "tab:Comparison-of-significant"
- plural "false"
- caps "false"
- noprefix "false"
- \end_inset
- .
- The differences in BCV calculated by EdgeR for these subsets of samples
- were negligible (BCV = 0.302 for GB and 0.297 for non-GB).
- \end_layout
- \begin_layout Standard
- The key point is that the GB data results in substantially more differentially
- expressed calls than the non-GB data.
- Since there is no gold standard for this dataset, it is impossible to be
- certain whether this is due to under-calling of differential expression
- in the non-GB samples or over-calling in the GB samples.
- However, given that both datasets are derived from the same biological
- samples and have nearly equal BCVs, it is more likely that the larger number
- of DE calls in the GB samples are genuine detections that were enabled
- by the higher sequencing depth and measurement precision of the GB samples.
- Note that the same set of genes was considered in both subsets, so the
- larger number of differentially expressed gene calls in the GB data set
- reflects a greater sensitivity to detect significant differential gene
- expression and not simply the larger total number of detected genes in
- GB samples described earlier.
- \end_layout
- \begin_layout Section
- Discussion
- \end_layout
- \begin_layout Standard
- The original experience with whole blood gene expression profiling on DNA
- microarrays demonstrated that the high concentration of globin transcripts
- reduced the sensitivity to detect genes with relatively low expression
- levels, in effect, significantly reducing the sensitivity.
- To address this limitation, commercial protocols for globin reduction were
- developed based on strategies to block globin transcript amplification
- during labeling or physically removing globin transcripts by affinity bead
- methods
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Winn2010"
- literal "false"
- \end_inset
- .
- More recently, using the latest generation of labeling protocols and arrays,
- it was determined that globin reduction was no longer necessary to obtain
- sufficient sensitivity to detect differential transcript expression
- \begin_inset CommandInset citation
- LatexCommand cite
- key "NuGEN2010"
- literal "false"
- \end_inset
- .
- However, we are not aware of any publications using these currently available
- protocols the with latest generation of microarrays that actually compare
- the detection sensitivity with and without globin reduction.
- However, in practice this has now been adopted generally primarily driven
- by concerns for cost control.
- The main objective of our work was to directly test the impact of globin
- gene transcripts and a new globin blocking protocol for application to
- the newest generation of differential gene expression profiling determined
- using next generation sequencing.
-
- \end_layout
- \begin_layout Standard
- The challenge of doing global gene expression profiling in cynomolgus monkeys
- is that the current available arrays were never designed to comprehensively
- cover this genome and have not been updated since the first assemblies
- of the cynomolgus genome were published.
- Therefore, we determined that the best strategy for peripheral blood profiling
- was to do deep RNA-seq and inform the workflow using the latest available
- genome assembly and annotation
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Wilson2013"
- literal "false"
- \end_inset
- .
- However, it was not immediately clear whether globin reduction was necessary
- for RNA-seq or how much improvement in efficiency or sensitivity to detect
- differential gene expression would be achieved for the added cost and work.
-
- \end_layout
- \begin_layout Standard
- We only found one report that demonstrated that globin reduction significantly
- improved the effective read yields for sequencing of human peripheral blood
- cell RNA using a DeepSAGE protocol
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Mastrokolias2012"
- literal "false"
- \end_inset
- .
- The approach to DeepSAGE involves two different restriction enzymes that
- purify and then tag small fragments of transcripts at specific locations
- and thus, significantly reduces the complexity of the transcriptome.
- Therefore, we could not determine how DeepSAGE results would translate
- to the common strategy in the field for assaying the entire transcript
- population by whole-transcriptome 3’-end RNA-seq.
- Furthermore, if globin reduction is necessary, we also needed a globin
- reduction method specific to cynomolgus globin sequences that would work
- an organism for which no kit is available off the shelf.
- \end_layout
- \begin_layout Standard
- As mentioned above, the addition of globin blocking oligos has a very small
- impact on measured expression levels of gene expression.
- However, this is a non-issue for the purposes of differential expression
- testing, since a systematic change in a gene in all samples does not affect
- relative expression levels between samples.
- However, we must acknowledge that simple comparisons of gene expression
- data obtained by GB and non-GB protocols are not possible without additional
- normalization.
-
- \end_layout
- \begin_layout Standard
- More importantly, globin blocking not only nearly doubles the yield of usable
- reads, it also increases inter-sample correlation and sensitivity to detect
- differential gene expression relative to the same set of samples profiled
- without blocking.
- In addition, globin blocking does not add a significant amount of random
- noise to the data.
- Globin blocking thus represents a cost-effective way to squeeze more data
- and statistical power out of the same blood samples and the same amount
- of sequencing.
- In conclusion, globin reduction greatly increases the yield of useful RNA-seq
- reads mapping to the rest of the genome, with minimal perturbations in
- the relative levels of non-globin genes.
- Based on these results, globin transcript reduction using sequence-specific,
- complementary blocking oligonucleotides is recommended for all deep RNA-seq
- of cynomolgus and other nonhuman primate blood samples.
- \end_layout
- \begin_layout Chapter
- Future Directions
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Consider per-chapter future directions.
- Check instructions.
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Itemize
- Study other epigenetic marks in more contexts
- \end_layout
- \begin_deeper
- \begin_layout Itemize
- DNA methylation, histone marks, chromatin accessibility & conformation in
- CD4 T-cells
- \end_layout
- \begin_layout Itemize
- Also look at other types lymphocytes: CD8 T-cells, B-cells, NK cells
- \end_layout
- \end_deeper
- \begin_layout Itemize
- Use CV or bootstrap to better evaluate classifiers
- \end_layout
- \begin_layout Standard
- \begin_inset ERT
- status open
- \begin_layout Plain Layout
- % Use "References" instead of "Bibliography"
- \end_layout
- \begin_layout Plain Layout
- \backslash
- renewcommand{
- \backslash
- bibname}{References}
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Flex TODO Note (inline)
- status open
- \begin_layout Plain Layout
- Check bib entry formatting & sort order
- \end_layout
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset bibtex
- LatexCommand bibtex
- btprint "btPrintCited"
- bibfiles "refs,code-refs"
- options "bibtotoc,unsrt"
- \end_inset
- \end_layout
- \end_body
- \end_document
|